audiopod 1.1.1__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,212 +1,187 @@
1
1
  """
2
- Transcription Service - Speech-to-text operations
3
- """
2
+ Transcription Service - Speech-to-text
4
3
 
5
- from typing import List, Optional, Dict, Any, Union
4
+ API Routes:
5
+ - POST /api/v1/transcription/transcribe - Transcribe from URL
6
+ - POST /api/v1/transcription/transcribe-upload - Transcribe from file upload
7
+ - GET /api/v1/transcription/jobs/{id} - Get job details
8
+ - GET /api/v1/transcription/jobs - List jobs
9
+ - DELETE /api/v1/transcription/jobs/{id} - Delete job
10
+ """
6
11
 
12
+ from typing import Optional, Dict, Any, List
7
13
  from .base import BaseService
8
- from ..models import Job, TranscriptionResult
9
- from ..exceptions import ValidationError
10
14
 
11
15
 
12
16
  class TranscriptionService(BaseService):
13
- """Service for audio transcription operations"""
14
-
15
- def transcribe_audio(
17
+ """Service for speech-to-text transcription."""
18
+
19
+ def transcribe(
16
20
  self,
17
- audio_file: str,
21
+ audio_file: Optional[str] = None,
22
+ url: Optional[str] = None,
18
23
  language: Optional[str] = None,
19
- model_type: str = "whisperx",
20
- enable_speaker_diarization: bool = False,
21
- enable_word_timestamps: bool = True,
24
+ speaker_diarization: bool = False,
22
25
  wait_for_completion: bool = False,
23
- timeout: int = 600
24
- ) -> Union[Job, TranscriptionResult]:
26
+ timeout: int = 600,
27
+ ) -> Dict[str, Any]:
25
28
  """
26
- Transcribe audio to text
27
-
29
+ Transcribe audio to text.
30
+
28
31
  Args:
29
- audio_file: Path to audio file
30
- language: Language code (auto-detect if None)
31
- model_type: Model to use ('whisperx', 'faster-whisper')
32
- enable_speaker_diarization: Enable speaker identification
33
- enable_word_timestamps: Include word-level timestamps
34
- wait_for_completion: Whether to wait for completion
35
- timeout: Maximum time to wait
36
-
32
+ audio_file: Path to local audio file
33
+ url: URL of audio file (or list of URLs)
34
+ language: Language code (auto-detected if not provided)
35
+ speaker_diarization: Enable speaker separation
36
+ wait_for_completion: Wait for completion
37
+ timeout: Max wait time in seconds
38
+
37
39
  Returns:
38
- Job object or transcription result
40
+ Job dict with transcript when completed
39
41
  """
40
- # Validate inputs
41
- if language:
42
- language = self._validate_language_code(language)
43
- if model_type not in ["whisperx", "faster-whisper"]:
44
- raise ValidationError("Model type must be 'whisperx' or 'faster-whisper'")
45
-
46
- # Prepare file upload
47
- files = self._prepare_file_upload(audio_file, "files")
48
-
49
- # Prepare form data
50
- data = {
51
- "model_type": model_type,
52
- "enable_speaker_diarization": enable_speaker_diarization,
53
- "enable_word_timestamps": enable_word_timestamps
54
- }
55
- if language:
56
- data["language"] = language
57
-
58
- # Make request
59
- if self.async_mode:
60
- return self._async_transcribe_audio(files, data, wait_for_completion, timeout)
42
+ if audio_file:
43
+ # File upload endpoint
44
+ data = {
45
+ "enable_speaker_diarization": speaker_diarization,
46
+ }
47
+ if language:
48
+ data["language"] = language
49
+
50
+ files = self._prepare_file_upload(audio_file, "files")
51
+
52
+ if self.async_mode:
53
+ return self._async_transcribe_upload(data, files, wait_for_completion, timeout)
54
+
55
+ response = self.client.request(
56
+ "POST", "/api/v1/transcription/transcribe-upload", data=data, files=files
57
+ )
61
58
  else:
59
+ # URL-based endpoint
60
+ data = {
61
+ "source_urls": [url] if isinstance(url, str) else url,
62
+ "enable_speaker_diarization": speaker_diarization,
63
+ }
64
+ if language:
65
+ data["language"] = language
66
+
67
+ if self.async_mode:
68
+ return self._async_transcribe(data, wait_for_completion, timeout)
69
+
62
70
  response = self.client.request(
63
- "POST",
64
- "/api/v1/transcription/transcribe-upload",
65
- data=data,
66
- files=files
71
+ "POST", "/api/v1/transcription/transcribe", json_data=data
67
72
  )
68
-
69
- job = Job.from_dict(response)
70
-
71
- if wait_for_completion:
72
- completed_job = self._wait_for_completion(job.id, timeout)
73
- return TranscriptionResult.from_dict(completed_job.result or completed_job.__dict__)
74
-
75
- return job
76
-
77
- async def _async_transcribe_audio(
78
- self,
79
- files: Dict[str, Any],
80
- data: Dict[str, Any],
81
- wait_for_completion: bool,
82
- timeout: int
83
- ) -> Union[Job, TranscriptionResult]:
84
- """Async version of transcribe_audio"""
73
+
74
+ if wait_for_completion:
75
+ job_id = response.get("id") or response.get("job_id")
76
+ return self._wait_for_transcription(job_id, timeout)
77
+ return response
78
+
79
+ async def _async_transcribe(
80
+ self, data: Dict, wait_for_completion: bool, timeout: int
81
+ ) -> Dict[str, Any]:
85
82
  response = await self.client.request(
86
- "POST",
87
- "/api/v1/transcription/transcribe-upload",
88
- data=data,
89
- files=files
83
+ "POST", "/api/v1/transcription/transcribe", json_data=data
90
84
  )
91
-
92
- job = Job.from_dict(response)
93
-
94
85
  if wait_for_completion:
95
- completed_job = await self._async_wait_for_completion(job.id, timeout)
96
- return TranscriptionResult.from_dict(completed_job.result or completed_job.__dict__)
97
-
98
- return job
99
-
100
- def transcribe_url(
101
- self,
102
- url: str,
103
- language: Optional[str] = None,
104
- model_type: str = "whisperx",
105
- enable_speaker_diarization: bool = False,
106
- wait_for_completion: bool = False,
107
- timeout: int = 600
108
- ) -> Union[Job, TranscriptionResult]:
109
- """
110
- Transcribe audio from URL (YouTube, etc.)
111
-
112
- Args:
113
- url: URL to audio/video content
114
- language: Language code
115
- model_type: Model to use
116
- enable_speaker_diarization: Enable speaker identification
117
- wait_for_completion: Whether to wait for completion
118
- timeout: Maximum time to wait
119
-
120
- Returns:
121
- Job object or transcription result
122
- """
123
- if language:
124
- language = self._validate_language_code(language)
125
-
126
- data = {
127
- "source_urls": [url],
128
- "model_type": model_type,
129
- "enable_speaker_diarization": enable_speaker_diarization
130
- }
131
- if language:
132
- data["language"] = language
133
-
134
- if self.async_mode:
135
- return self._async_transcribe_url(data, wait_for_completion, timeout)
136
- else:
137
- response = self.client.request("POST", "/api/v1/transcription/transcribe", data=data)
138
- job = Job.from_dict(response)
139
-
140
- if wait_for_completion:
141
- completed_job = self._wait_for_completion(job.id, timeout)
142
- return TranscriptionResult.from_dict(completed_job.result or completed_job.__dict__)
143
-
144
- return job
145
-
146
- async def _async_transcribe_url(
147
- self,
148
- data: Dict[str, Any],
149
- wait_for_completion: bool,
150
- timeout: int
151
- ) -> Union[Job, TranscriptionResult]:
152
- """Async version of transcribe_url"""
153
- response = await self.client.request("POST", "/api/v1/transcription/transcribe", data=data)
154
- job = Job.from_dict(response)
155
-
86
+ job_id = response.get("id") or response.get("job_id")
87
+ return await self._async_wait_for_transcription(job_id, timeout)
88
+ return response
89
+
90
+ async def _async_transcribe_upload(
91
+ self, data: Dict, files: Dict, wait_for_completion: bool, timeout: int
92
+ ) -> Dict[str, Any]:
93
+ response = await self.client.request(
94
+ "POST", "/api/v1/transcription/transcribe-upload", data=data, files=files
95
+ )
156
96
  if wait_for_completion:
157
- completed_job = await self._async_wait_for_completion(job.id, timeout)
158
- return TranscriptionResult.from_dict(completed_job.result or completed_job.__dict__)
159
-
160
- return job
161
-
162
- def get_transcription_job(self, job_id: int) -> TranscriptionResult:
163
- """Get transcription job details"""
97
+ job_id = response.get("id") or response.get("job_id")
98
+ return await self._async_wait_for_transcription(job_id, timeout)
99
+ return response
100
+
101
+ def get_job(self, job_id: int) -> Dict[str, Any]:
102
+ """Get transcription job details and status."""
164
103
  if self.async_mode:
165
- return self._async_get_transcription_job(job_id)
166
- else:
167
- response = self.client.request("GET", f"/api/v1/transcription/jobs/{job_id}")
168
- return TranscriptionResult.from_dict(response)
169
-
170
- async def _async_get_transcription_job(self, job_id: int) -> TranscriptionResult:
171
- """Async version of get_transcription_job"""
172
- response = await self.client.request("GET", f"/api/v1/transcription/jobs/{job_id}")
173
- return TranscriptionResult.from_dict(response)
174
-
175
- def download_transcript(
176
- self,
177
- job_id: int,
178
- format: str = "json"
179
- ) -> str:
104
+ return self._async_get_job(job_id)
105
+ return self.client.request("GET", f"/api/v1/transcription/jobs/{job_id}")
106
+
107
+ async def _async_get_job(self, job_id: int) -> Dict[str, Any]:
108
+ return await self.client.request("GET", f"/api/v1/transcription/jobs/{job_id}")
109
+
110
+ def list_jobs(self, skip: int = 0, limit: int = 50) -> List[Dict[str, Any]]:
111
+ """List transcription jobs."""
112
+ if self.async_mode:
113
+ return self._async_list_jobs(skip, limit)
114
+ return self.client.request(
115
+ "GET", "/api/v1/transcription/jobs", params={"skip": skip, "limit": limit}
116
+ )
117
+
118
+ async def _async_list_jobs(self, skip: int, limit: int) -> List[Dict[str, Any]]:
119
+ return await self.client.request(
120
+ "GET", "/api/v1/transcription/jobs", params={"skip": skip, "limit": limit}
121
+ )
122
+
123
+ def delete_job(self, job_id: int) -> Dict[str, str]:
124
+ """Delete a transcription job."""
125
+ if self.async_mode:
126
+ return self._async_delete_job(job_id)
127
+ return self.client.request("DELETE", f"/api/v1/transcription/jobs/{job_id}")
128
+
129
+ async def _async_delete_job(self, job_id: int) -> Dict[str, str]:
130
+ return await self.client.request("DELETE", f"/api/v1/transcription/jobs/{job_id}")
131
+
132
+ def get_transcript(self, job_id: int, format: str = "json") -> Any:
180
133
  """
181
- Download transcript in specified format
134
+ Get transcript content.
182
135
 
183
136
  Args:
184
- job_id: Transcription job ID
185
- format: Output format ('json', 'txt', 'srt', 'vtt', 'pdf')
186
-
187
- Returns:
188
- Transcript content
137
+ job_id: Job ID
138
+ format: Output format - 'json', 'txt', 'srt', 'vtt'
189
139
  """
190
- if format not in ["json", "txt", "srt", "vtt", "pdf", "docx", "html"]:
191
- raise ValidationError("Format must be one of: json, txt, srt, vtt, pdf, docx, html")
192
-
193
- params = {"format": format}
194
-
195
140
  if self.async_mode:
196
- return self._async_download_transcript(job_id, params)
197
- else:
198
- response = self.client.request(
199
- "GET",
200
- f"/api/v1/transcription/jobs/{job_id}/transcript",
201
- params=params
202
- )
203
- return response
204
-
205
- async def _async_download_transcript(self, job_id: int, params: Dict[str, str]) -> str:
206
- """Async version of download_transcript"""
207
- response = await self.client.request(
208
- "GET",
209
- f"/api/v1/transcription/jobs/{job_id}/transcript",
210
- params=params
141
+ return self._async_get_transcript(job_id, format)
142
+ return self.client.request(
143
+ "GET", f"/api/v1/transcription/jobs/{job_id}/transcript", params={"format": format}
211
144
  )
212
- return response
145
+
146
+ async def _async_get_transcript(self, job_id: int, format: str) -> Any:
147
+ return await self.client.request(
148
+ "GET", f"/api/v1/transcription/jobs/{job_id}/transcript", params={"format": format}
149
+ )
150
+
151
+ def _wait_for_transcription(self, job_id: int, timeout: int) -> Dict[str, Any]:
152
+ """Wait for transcription job completion."""
153
+ import time
154
+ start_time = time.time()
155
+
156
+ while time.time() - start_time < timeout:
157
+ job = self.get_job(job_id)
158
+ status = job.get("status", "").upper()
159
+
160
+ if status == "COMPLETED":
161
+ return job
162
+ elif status in ("FAILED", "ERROR", "CANCELLED"):
163
+ raise Exception(f"Transcription failed: {job.get('error_message', 'Unknown error')}")
164
+
165
+ time.sleep(3)
166
+
167
+ raise TimeoutError(f"Transcription {job_id} did not complete within {timeout} seconds")
168
+
169
+ async def _async_wait_for_transcription(self, job_id: int, timeout: int) -> Dict[str, Any]:
170
+ """Async wait for transcription job completion."""
171
+ import asyncio
172
+ import time
173
+ start_time = time.time()
174
+
175
+ while time.time() - start_time < timeout:
176
+ job = await self.get_job(job_id)
177
+ status = job.get("status", "").upper()
178
+
179
+ if status == "COMPLETED":
180
+ return job
181
+ elif status in ("FAILED", "ERROR", "CANCELLED"):
182
+ raise Exception(f"Transcription failed: {job.get('error_message', 'Unknown error')}")
183
+
184
+ await asyncio.sleep(3)
185
+
186
+ raise TimeoutError(f"Transcription {job_id} did not complete within {timeout} seconds")
187
+