audiopod 1.2.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,212 +1,210 @@
1
1
  """
2
- Transcription Service - Speech-to-text operations
3
- """
2
+ Transcription Service - Speech-to-text
4
3
 
5
- from typing import List, Optional, Dict, Any, Union
4
+ API Routes:
5
+ - POST /api/v1/transcription/transcribe - Transcribe from URL
6
+ - POST /api/v1/transcription/transcribe-upload - Transcribe from file upload
7
+ - GET /api/v1/transcription/jobs/{id} - Get job details
8
+ - GET /api/v1/transcription/jobs - List jobs
9
+ - DELETE /api/v1/transcription/jobs/{id} - Delete job
10
+ """
6
11
 
12
+ from typing import Optional, Dict, Any, List
7
13
  from .base import BaseService
8
- from ..models import Job, TranscriptionResult
9
- from ..exceptions import ValidationError
10
14
 
11
15
 
12
16
  class TranscriptionService(BaseService):
13
- """Service for audio transcription operations"""
14
-
15
- def transcribe_audio(
17
+ """Service for speech-to-text transcription."""
18
+
19
+ def create(
16
20
  self,
17
- audio_file: str,
21
+ file: Optional[str] = None,
22
+ url: Optional[str] = None,
18
23
  language: Optional[str] = None,
19
- model_type: str = "whisperx",
20
- enable_speaker_diarization: bool = False,
21
- enable_word_timestamps: bool = True,
24
+ speaker_diarization: bool = False,
22
25
  wait_for_completion: bool = False,
23
- timeout: int = 600
24
- ) -> Union[Job, TranscriptionResult]:
26
+ timeout: int = 600,
27
+ ) -> Dict[str, Any]:
28
+ """Alias for transcribe() - matches Node.js SDK."""
29
+ return self.transcribe(
30
+ audio_file=file,
31
+ url=url,
32
+ language=language,
33
+ speaker_diarization=speaker_diarization,
34
+ wait_for_completion=wait_for_completion,
35
+ timeout=timeout,
36
+ )
37
+
38
+ def transcribe(
39
+ self,
40
+ audio_file: Optional[str] = None,
41
+ url: Optional[str] = None,
42
+ language: Optional[str] = None,
43
+ speaker_diarization: bool = False,
44
+ wait_for_completion: bool = False,
45
+ timeout: int = 600,
46
+ ) -> Dict[str, Any]:
25
47
  """
26
- Transcribe audio to text
27
-
48
+ Transcribe audio to text.
49
+
28
50
  Args:
29
- audio_file: Path to audio file
30
- language: Language code (auto-detect if None)
31
- model_type: Model to use ('whisperx', 'faster-whisper')
32
- enable_speaker_diarization: Enable speaker identification
33
- enable_word_timestamps: Include word-level timestamps
34
- wait_for_completion: Whether to wait for completion
35
- timeout: Maximum time to wait
36
-
51
+ audio_file: Path to local audio file
52
+ url: URL of audio file (or list of URLs)
53
+ language: Language code (auto-detected if not provided)
54
+ speaker_diarization: Enable speaker separation
55
+ wait_for_completion: Wait for completion
56
+ timeout: Max wait time in seconds
57
+
37
58
  Returns:
38
- Job object or transcription result
59
+ Job dict with transcript when completed
39
60
  """
40
- # Validate inputs
41
- if language:
42
- language = self._validate_language_code(language)
43
- if model_type not in ["whisperx", "faster-whisper"]:
44
- raise ValidationError("Model type must be 'whisperx' or 'faster-whisper'")
45
-
46
- # Prepare file upload
47
- files = self._prepare_file_upload(audio_file, "files")
48
-
49
- # Prepare form data
50
- data = {
51
- "model_type": model_type,
52
- "enable_speaker_diarization": enable_speaker_diarization,
53
- "enable_word_timestamps": enable_word_timestamps
54
- }
55
- if language:
56
- data["language"] = language
57
-
58
- # Make request
59
- if self.async_mode:
60
- return self._async_transcribe_audio(files, data, wait_for_completion, timeout)
61
+ if audio_file:
62
+ # File upload endpoint
63
+ data = {
64
+ "enable_speaker_diarization": speaker_diarization,
65
+ }
66
+ if language:
67
+ data["language"] = language
68
+
69
+ files = self._prepare_file_upload(audio_file, "files")
70
+
71
+ if self.async_mode:
72
+ return self._async_transcribe_upload(data, files, wait_for_completion, timeout)
73
+
74
+ response = self.client.request(
75
+ "POST", "/api/v1/transcription/transcribe-upload", data=data, files=files
76
+ )
61
77
  else:
78
+ # URL-based endpoint
79
+ data = {
80
+ "source_urls": [url] if isinstance(url, str) else url,
81
+ "enable_speaker_diarization": speaker_diarization,
82
+ }
83
+ if language:
84
+ data["language"] = language
85
+
86
+ if self.async_mode:
87
+ return self._async_transcribe(data, wait_for_completion, timeout)
88
+
62
89
  response = self.client.request(
63
- "POST",
64
- "/api/v1/transcription/transcribe-upload",
65
- data=data,
66
- files=files
90
+ "POST", "/api/v1/transcription/transcribe", json_data=data
67
91
  )
68
-
69
- job = Job.from_dict(response)
70
-
71
- if wait_for_completion:
72
- completed_job = self._wait_for_completion(job.id, timeout)
73
- return TranscriptionResult.from_dict(completed_job.result or completed_job.__dict__)
74
-
75
- return job
76
-
77
- async def _async_transcribe_audio(
78
- self,
79
- files: Dict[str, Any],
80
- data: Dict[str, Any],
81
- wait_for_completion: bool,
82
- timeout: int
83
- ) -> Union[Job, TranscriptionResult]:
84
- """Async version of transcribe_audio"""
92
+
93
+ if wait_for_completion:
94
+ job_id = response.get("id") or response.get("job_id")
95
+ return self._wait_for_transcription(job_id, timeout)
96
+ return response
97
+
98
+ async def _async_transcribe(
99
+ self, data: Dict, wait_for_completion: bool, timeout: int
100
+ ) -> Dict[str, Any]:
85
101
  response = await self.client.request(
86
- "POST",
87
- "/api/v1/transcription/transcribe-upload",
88
- data=data,
89
- files=files
102
+ "POST", "/api/v1/transcription/transcribe", json_data=data
90
103
  )
91
-
92
- job = Job.from_dict(response)
93
-
94
104
  if wait_for_completion:
95
- completed_job = await self._async_wait_for_completion(job.id, timeout)
96
- return TranscriptionResult.from_dict(completed_job.result or completed_job.__dict__)
97
-
98
- return job
99
-
100
- def transcribe_url(
101
- self,
102
- url: str,
103
- language: Optional[str] = None,
104
- model_type: str = "whisperx",
105
- enable_speaker_diarization: bool = False,
106
- wait_for_completion: bool = False,
107
- timeout: int = 600
108
- ) -> Union[Job, TranscriptionResult]:
109
- """
110
- Transcribe audio from URL (YouTube, etc.)
111
-
112
- Args:
113
- url: URL to audio/video content
114
- language: Language code
115
- model_type: Model to use
116
- enable_speaker_diarization: Enable speaker identification
117
- wait_for_completion: Whether to wait for completion
118
- timeout: Maximum time to wait
119
-
120
- Returns:
121
- Job object or transcription result
122
- """
123
- if language:
124
- language = self._validate_language_code(language)
125
-
126
- data = {
127
- "source_urls": [url],
128
- "model_type": model_type,
129
- "enable_speaker_diarization": enable_speaker_diarization
130
- }
131
- if language:
132
- data["language"] = language
133
-
134
- if self.async_mode:
135
- return self._async_transcribe_url(data, wait_for_completion, timeout)
136
- else:
137
- response = self.client.request("POST", "/api/v1/transcription/transcribe", data=data)
138
- job = Job.from_dict(response)
139
-
140
- if wait_for_completion:
141
- completed_job = self._wait_for_completion(job.id, timeout)
142
- return TranscriptionResult.from_dict(completed_job.result or completed_job.__dict__)
143
-
144
- return job
145
-
146
- async def _async_transcribe_url(
147
- self,
148
- data: Dict[str, Any],
149
- wait_for_completion: bool,
150
- timeout: int
151
- ) -> Union[Job, TranscriptionResult]:
152
- """Async version of transcribe_url"""
153
- response = await self.client.request("POST", "/api/v1/transcription/transcribe", data=data)
154
- job = Job.from_dict(response)
155
-
105
+ job_id = response.get("id") or response.get("job_id")
106
+ return await self._async_wait_for_transcription(job_id, timeout)
107
+ return response
108
+
109
+ async def _async_transcribe_upload(
110
+ self, data: Dict, files: Dict, wait_for_completion: bool, timeout: int
111
+ ) -> Dict[str, Any]:
112
+ response = await self.client.request(
113
+ "POST", "/api/v1/transcription/transcribe-upload", data=data, files=files
114
+ )
156
115
  if wait_for_completion:
157
- completed_job = await self._async_wait_for_completion(job.id, timeout)
158
- return TranscriptionResult.from_dict(completed_job.result or completed_job.__dict__)
159
-
160
- return job
161
-
162
- def get_transcription_job(self, job_id: int) -> TranscriptionResult:
163
- """Get transcription job details"""
116
+ job_id = response.get("id") or response.get("job_id")
117
+ return await self._async_wait_for_transcription(job_id, timeout)
118
+ return response
119
+
120
+ def get_job(self, job_id: int) -> Dict[str, Any]:
121
+ """Get transcription job details and status."""
164
122
  if self.async_mode:
165
- return self._async_get_transcription_job(job_id)
166
- else:
167
- response = self.client.request("GET", f"/api/v1/transcription/jobs/{job_id}")
168
- return TranscriptionResult.from_dict(response)
169
-
170
- async def _async_get_transcription_job(self, job_id: int) -> TranscriptionResult:
171
- """Async version of get_transcription_job"""
172
- response = await self.client.request("GET", f"/api/v1/transcription/jobs/{job_id}")
173
- return TranscriptionResult.from_dict(response)
174
-
175
- def download_transcript(
176
- self,
177
- job_id: int,
178
- format: str = "json"
179
- ) -> str:
123
+ return self._async_get_job(job_id)
124
+ return self.client.request("GET", f"/api/v1/transcription/jobs/{job_id}")
125
+
126
+ async def _async_get_job(self, job_id: int) -> Dict[str, Any]:
127
+ return await self.client.request("GET", f"/api/v1/transcription/jobs/{job_id}")
128
+
129
+ def list_jobs(self, skip: int = 0, limit: int = 50) -> List[Dict[str, Any]]:
130
+ """List transcription jobs."""
131
+ if self.async_mode:
132
+ return self._async_list_jobs(skip, limit)
133
+ return self.client.request(
134
+ "GET", "/api/v1/transcription/jobs", params={"skip": skip, "limit": limit}
135
+ )
136
+
137
+ async def _async_list_jobs(self, skip: int, limit: int) -> List[Dict[str, Any]]:
138
+ return await self.client.request(
139
+ "GET", "/api/v1/transcription/jobs", params={"skip": skip, "limit": limit}
140
+ )
141
+
142
+ def delete_job(self, job_id: int) -> Dict[str, str]:
143
+ """Delete a transcription job."""
144
+ if self.async_mode:
145
+ return self._async_delete_job(job_id)
146
+ return self.client.request("DELETE", f"/api/v1/transcription/jobs/{job_id}")
147
+
148
+ async def _async_delete_job(self, job_id: int) -> Dict[str, str]:
149
+ return await self.client.request("DELETE", f"/api/v1/transcription/jobs/{job_id}")
150
+
151
+ def get_transcript(self, job_id: int, format: str = "json") -> Any:
180
152
  """
181
- Download transcript in specified format
153
+ Get transcript content.
182
154
 
183
155
  Args:
184
- job_id: Transcription job ID
185
- format: Output format ('json', 'txt', 'srt', 'vtt', 'pdf')
186
-
187
- Returns:
188
- Transcript content
156
+ job_id: Job ID
157
+ format: Output format - 'json', 'txt', 'srt', 'vtt'
189
158
  """
190
- if format not in ["json", "txt", "srt", "vtt", "pdf", "docx", "html"]:
191
- raise ValidationError("Format must be one of: json, txt, srt, vtt, pdf, docx, html")
192
-
193
- params = {"format": format}
194
-
195
159
  if self.async_mode:
196
- return self._async_download_transcript(job_id, params)
197
- else:
198
- response = self.client.request(
199
- "GET",
200
- f"/api/v1/transcription/jobs/{job_id}/transcript",
201
- params=params
202
- )
203
- return response
204
-
205
- async def _async_download_transcript(self, job_id: int, params: Dict[str, str]) -> str:
206
- """Async version of download_transcript"""
207
- response = await self.client.request(
208
- "GET",
209
- f"/api/v1/transcription/jobs/{job_id}/transcript",
210
- params=params
160
+ return self._async_get_transcript(job_id, format)
161
+ return self.client.request(
162
+ "GET", f"/api/v1/transcription/jobs/{job_id}/transcript", params={"format": format}
211
163
  )
212
- return response
164
+
165
+ async def _async_get_transcript(self, job_id: int, format: str) -> Any:
166
+ return await self.client.request(
167
+ "GET", f"/api/v1/transcription/jobs/{job_id}/transcript", params={"format": format}
168
+ )
169
+
170
+ def wait_for_completion(self, job_id: int, timeout: int = 600) -> Dict[str, Any]:
171
+ """Wait for transcription job completion (matches Node.js SDK)."""
172
+ return self._wait_for_transcription(job_id, timeout)
173
+
174
+ def _wait_for_transcription(self, job_id: int, timeout: int) -> Dict[str, Any]:
175
+ """Wait for transcription job completion."""
176
+ import time
177
+ start_time = time.time()
178
+
179
+ while time.time() - start_time < timeout:
180
+ job = self.get_job(job_id)
181
+ status = job.get("status", "").upper()
182
+
183
+ if status == "COMPLETED":
184
+ return job
185
+ elif status in ("FAILED", "ERROR", "CANCELLED"):
186
+ raise Exception(f"Transcription failed: {job.get('error_message', 'Unknown error')}")
187
+
188
+ time.sleep(3)
189
+
190
+ raise TimeoutError(f"Transcription {job_id} did not complete within {timeout} seconds")
191
+
192
+ async def _async_wait_for_transcription(self, job_id: int, timeout: int) -> Dict[str, Any]:
193
+ """Async wait for transcription job completion."""
194
+ import asyncio
195
+ import time
196
+ start_time = time.time()
197
+
198
+ while time.time() - start_time < timeout:
199
+ job = await self.get_job(job_id)
200
+ status = job.get("status", "").upper()
201
+
202
+ if status == "COMPLETED":
203
+ return job
204
+ elif status in ("FAILED", "ERROR", "CANCELLED"):
205
+ raise Exception(f"Transcription failed: {job.get('error_message', 'Unknown error')}")
206
+
207
+ await asyncio.sleep(3)
208
+
209
+ raise TimeoutError(f"Transcription {job_id} did not complete within {timeout} seconds")
210
+