audiopod 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,212 @@
1
+ """
2
+ Transcription Service - Speech-to-text operations
3
+ """
4
+
5
+ from typing import List, Optional, Dict, Any, Union
6
+
7
+ from .base import BaseService
8
+ from ..models import Job, TranscriptionResult
9
+ from ..exceptions import ValidationError
10
+
11
+
12
+ class TranscriptionService(BaseService):
13
+ """Service for audio transcription operations"""
14
+
15
+ def transcribe_audio(
16
+ self,
17
+ audio_file: str,
18
+ language: Optional[str] = None,
19
+ model_type: str = "whisperx",
20
+ enable_speaker_diarization: bool = False,
21
+ enable_word_timestamps: bool = True,
22
+ wait_for_completion: bool = False,
23
+ timeout: int = 600
24
+ ) -> Union[Job, TranscriptionResult]:
25
+ """
26
+ Transcribe audio to text
27
+
28
+ Args:
29
+ audio_file: Path to audio file
30
+ language: Language code (auto-detect if None)
31
+ model_type: Model to use ('whisperx', 'faster-whisper')
32
+ enable_speaker_diarization: Enable speaker identification
33
+ enable_word_timestamps: Include word-level timestamps
34
+ wait_for_completion: Whether to wait for completion
35
+ timeout: Maximum time to wait
36
+
37
+ Returns:
38
+ Job object or transcription result
39
+ """
40
+ # Validate inputs
41
+ if language:
42
+ language = self._validate_language_code(language)
43
+ if model_type not in ["whisperx", "faster-whisper"]:
44
+ raise ValidationError("Model type must be 'whisperx' or 'faster-whisper'")
45
+
46
+ # Prepare file upload
47
+ files = self._prepare_file_upload(audio_file, "files")
48
+
49
+ # Prepare form data
50
+ data = {
51
+ "model_type": model_type,
52
+ "enable_speaker_diarization": enable_speaker_diarization,
53
+ "enable_word_timestamps": enable_word_timestamps
54
+ }
55
+ if language:
56
+ data["language"] = language
57
+
58
+ # Make request
59
+ if self.async_mode:
60
+ return self._async_transcribe_audio(files, data, wait_for_completion, timeout)
61
+ else:
62
+ response = self.client.request(
63
+ "POST",
64
+ "/api/v1/transcription/transcribe-upload",
65
+ data=data,
66
+ files=files
67
+ )
68
+
69
+ job = Job.from_dict(response)
70
+
71
+ if wait_for_completion:
72
+ completed_job = self._wait_for_completion(job.id, timeout)
73
+ return TranscriptionResult.from_dict(completed_job.result or completed_job.__dict__)
74
+
75
+ return job
76
+
77
+ async def _async_transcribe_audio(
78
+ self,
79
+ files: Dict[str, Any],
80
+ data: Dict[str, Any],
81
+ wait_for_completion: bool,
82
+ timeout: int
83
+ ) -> Union[Job, TranscriptionResult]:
84
+ """Async version of transcribe_audio"""
85
+ response = await self.client.request(
86
+ "POST",
87
+ "/api/v1/transcription/transcribe-upload",
88
+ data=data,
89
+ files=files
90
+ )
91
+
92
+ job = Job.from_dict(response)
93
+
94
+ if wait_for_completion:
95
+ completed_job = await self._async_wait_for_completion(job.id, timeout)
96
+ return TranscriptionResult.from_dict(completed_job.result or completed_job.__dict__)
97
+
98
+ return job
99
+
100
+ def transcribe_url(
101
+ self,
102
+ url: str,
103
+ language: Optional[str] = None,
104
+ model_type: str = "whisperx",
105
+ enable_speaker_diarization: bool = False,
106
+ wait_for_completion: bool = False,
107
+ timeout: int = 600
108
+ ) -> Union[Job, TranscriptionResult]:
109
+ """
110
+ Transcribe audio from URL (YouTube, etc.)
111
+
112
+ Args:
113
+ url: URL to audio/video content
114
+ language: Language code
115
+ model_type: Model to use
116
+ enable_speaker_diarization: Enable speaker identification
117
+ wait_for_completion: Whether to wait for completion
118
+ timeout: Maximum time to wait
119
+
120
+ Returns:
121
+ Job object or transcription result
122
+ """
123
+ if language:
124
+ language = self._validate_language_code(language)
125
+
126
+ data = {
127
+ "source_urls": [url],
128
+ "model_type": model_type,
129
+ "enable_speaker_diarization": enable_speaker_diarization
130
+ }
131
+ if language:
132
+ data["language"] = language
133
+
134
+ if self.async_mode:
135
+ return self._async_transcribe_url(data, wait_for_completion, timeout)
136
+ else:
137
+ response = self.client.request("POST", "/api/v1/transcription/transcribe", data=data)
138
+ job = Job.from_dict(response)
139
+
140
+ if wait_for_completion:
141
+ completed_job = self._wait_for_completion(job.id, timeout)
142
+ return TranscriptionResult.from_dict(completed_job.result or completed_job.__dict__)
143
+
144
+ return job
145
+
146
+ async def _async_transcribe_url(
147
+ self,
148
+ data: Dict[str, Any],
149
+ wait_for_completion: bool,
150
+ timeout: int
151
+ ) -> Union[Job, TranscriptionResult]:
152
+ """Async version of transcribe_url"""
153
+ response = await self.client.request("POST", "/api/v1/transcription/transcribe", data=data)
154
+ job = Job.from_dict(response)
155
+
156
+ if wait_for_completion:
157
+ completed_job = await self._async_wait_for_completion(job.id, timeout)
158
+ return TranscriptionResult.from_dict(completed_job.result or completed_job.__dict__)
159
+
160
+ return job
161
+
162
+ def get_transcription_job(self, job_id: int) -> TranscriptionResult:
163
+ """Get transcription job details"""
164
+ if self.async_mode:
165
+ return self._async_get_transcription_job(job_id)
166
+ else:
167
+ response = self.client.request("GET", f"/api/v1/transcription/jobs/{job_id}")
168
+ return TranscriptionResult.from_dict(response)
169
+
170
+ async def _async_get_transcription_job(self, job_id: int) -> TranscriptionResult:
171
+ """Async version of get_transcription_job"""
172
+ response = await self.client.request("GET", f"/api/v1/transcription/jobs/{job_id}")
173
+ return TranscriptionResult.from_dict(response)
174
+
175
+ def download_transcript(
176
+ self,
177
+ job_id: int,
178
+ format: str = "json"
179
+ ) -> str:
180
+ """
181
+ Download transcript in specified format
182
+
183
+ Args:
184
+ job_id: Transcription job ID
185
+ format: Output format ('json', 'txt', 'srt', 'vtt', 'pdf')
186
+
187
+ Returns:
188
+ Transcript content
189
+ """
190
+ if format not in ["json", "txt", "srt", "vtt", "pdf", "docx", "html"]:
191
+ raise ValidationError("Format must be one of: json, txt, srt, vtt, pdf, docx, html")
192
+
193
+ params = {"format": format}
194
+
195
+ if self.async_mode:
196
+ return self._async_download_transcript(job_id, params)
197
+ else:
198
+ response = self.client.request(
199
+ "GET",
200
+ f"/api/v1/transcription/jobs/{job_id}/transcript",
201
+ params=params
202
+ )
203
+ return response
204
+
205
+ async def _async_download_transcript(self, job_id: int, params: Dict[str, str]) -> str:
206
+ """Async version of download_transcript"""
207
+ response = await self.client.request(
208
+ "GET",
209
+ f"/api/v1/transcription/jobs/{job_id}/transcript",
210
+ params=params
211
+ )
212
+ return response
@@ -0,0 +1,81 @@
1
+ """
2
+ Translation Service - Audio/video translation operations
3
+ """
4
+
5
+ from typing import Optional, Union
6
+ from .base import BaseService
7
+ from ..models import Job, TranslationResult
8
+
9
+
10
+ class TranslationService(BaseService):
11
+ """Service for audio and video translation operations"""
12
+
13
+ def translate_audio(
14
+ self,
15
+ audio_file: str,
16
+ target_language: str,
17
+ source_language: Optional[str] = None,
18
+ wait_for_completion: bool = False,
19
+ timeout: int = 900
20
+ ) -> Union[Job, TranslationResult]:
21
+ """
22
+ Translate audio to another language
23
+
24
+ Args:
25
+ audio_file: Path to audio file
26
+ target_language: Target language code
27
+ source_language: Source language (auto-detect if None)
28
+ wait_for_completion: Whether to wait for completion
29
+ timeout: Maximum time to wait
30
+
31
+ Returns:
32
+ Job object or translation result
33
+ """
34
+ target_language = self._validate_language_code(target_language)
35
+ if source_language:
36
+ source_language = self._validate_language_code(source_language)
37
+
38
+ files = self._prepare_file_upload(audio_file, "file")
39
+ data = {"target_language": target_language}
40
+ if source_language:
41
+ data["source_language"] = source_language
42
+
43
+ if self.async_mode:
44
+ return self._async_translate_audio(files, data, wait_for_completion, timeout)
45
+ else:
46
+ response = self.client.request(
47
+ "POST", "/api/v1/translation/translate", data=data, files=files
48
+ )
49
+ job = Job.from_dict(response)
50
+
51
+ if wait_for_completion:
52
+ completed_job = self._wait_for_completion(job.id, timeout)
53
+ return TranslationResult.from_dict(completed_job.result or completed_job.__dict__)
54
+
55
+ return job
56
+
57
+ async def _async_translate_audio(self, files, data, wait_for_completion, timeout):
58
+ """Async version of translate_audio"""
59
+ response = await self.client.request(
60
+ "POST", "/api/v1/translation/translate", data=data, files=files
61
+ )
62
+ job = Job.from_dict(response)
63
+
64
+ if wait_for_completion:
65
+ completed_job = await self._async_wait_for_completion(job.id, timeout)
66
+ return TranslationResult.from_dict(completed_job.result or completed_job.__dict__)
67
+
68
+ return job
69
+
70
+ def get_translation_job(self, job_id: int) -> TranslationResult:
71
+ """Get translation job details"""
72
+ if self.async_mode:
73
+ return self._async_get_translation_job(job_id)
74
+ else:
75
+ response = self.client.request("GET", f"/api/v1/translation/translations/{job_id}")
76
+ return TranslationResult.from_dict(response)
77
+
78
+ async def _async_get_translation_job(self, job_id: int) -> TranslationResult:
79
+ """Async version of get_translation_job"""
80
+ response = await self.client.request("GET", f"/api/v1/translation/translations/{job_id}")
81
+ return TranslationResult.from_dict(response)
@@ -0,0 +1,376 @@
1
+ """
2
+ Voice Service - Voice cloning and TTS operations
3
+ """
4
+
5
+ from typing import List, Optional, Dict, Any, Union
6
+ from pathlib import Path
7
+
8
+ from .base import BaseService
9
+ from ..models import Job, VoiceProfile, JobStatus
10
+ from ..exceptions import ValidationError
11
+
12
+
13
+ class VoiceService(BaseService):
14
+ """Service for voice cloning and text-to-speech operations"""
15
+
16
+ def clone_voice(
17
+ self,
18
+ voice_file: str,
19
+ text: str,
20
+ language: Optional[str] = None,
21
+ speed: float = 1.0,
22
+ wait_for_completion: bool = False,
23
+ timeout: int = 300
24
+ ) -> Union[Job, Dict[str, Any]]:
25
+ """
26
+ Clone a voice from an audio file
27
+
28
+ Args:
29
+ voice_file: Path to audio file containing voice to clone
30
+ text: Text to generate with the cloned voice
31
+ language: Target language code (e.g., 'en', 'es')
32
+ speed: Speech speed (0.5 to 2.0)
33
+ wait_for_completion: Whether to wait for job completion
34
+ timeout: Maximum time to wait if wait_for_completion=True
35
+
36
+ Returns:
37
+ Job object if wait_for_completion=False, otherwise job result
38
+ """
39
+ # Validate inputs
40
+ text = self._validate_text_input(text)
41
+ if language:
42
+ language = self._validate_language_code(language)
43
+ if not 0.5 <= speed <= 2.0:
44
+ raise ValidationError("Speed must be between 0.5 and 2.0")
45
+
46
+ # Prepare file upload
47
+ files = self._prepare_file_upload(voice_file, "file")
48
+
49
+ # Prepare form data
50
+ data = {
51
+ "input_text": text,
52
+ "speed": speed
53
+ }
54
+ if language:
55
+ data["target_language"] = language
56
+
57
+ # Make request
58
+ if self.async_mode:
59
+ return self._async_clone_voice(files, data, wait_for_completion, timeout)
60
+ else:
61
+ response = self.client.request(
62
+ "POST",
63
+ "/api/v1/voice/voice-clone",
64
+ data=data,
65
+ files=files
66
+ )
67
+
68
+ job = Job.from_dict(response)
69
+
70
+ if wait_for_completion:
71
+ job = self._wait_for_completion(job.id, timeout)
72
+ return job.result if job.result else job
73
+
74
+ return job
75
+
76
+ async def _async_clone_voice(
77
+ self,
78
+ files: Dict[str, Any],
79
+ data: Dict[str, Any],
80
+ wait_for_completion: bool,
81
+ timeout: int
82
+ ) -> Union[Job, Dict[str, Any]]:
83
+ """Async version of clone_voice"""
84
+ response = await self.client.request(
85
+ "POST",
86
+ "/api/v1/voice/voice-clone",
87
+ data=data,
88
+ files=files
89
+ )
90
+
91
+ job = Job.from_dict(response)
92
+
93
+ if wait_for_completion:
94
+ job = await self._async_wait_for_completion(job.id, timeout)
95
+ return job.result if job.result else job
96
+
97
+ return job
98
+
99
+ def create_voice_profile(
100
+ self,
101
+ name: str,
102
+ voice_file: str,
103
+ description: Optional[str] = None,
104
+ is_public: bool = False,
105
+ wait_for_completion: bool = False,
106
+ timeout: int = 600
107
+ ) -> Union[Job, VoiceProfile]:
108
+ """
109
+ Create a reusable voice profile
110
+
111
+ Args:
112
+ name: Name for the voice profile
113
+ voice_file: Path to audio file containing voice sample
114
+ description: Optional description
115
+ is_public: Whether to make the voice profile public
116
+ wait_for_completion: Whether to wait for processing completion
117
+ timeout: Maximum time to wait if wait_for_completion=True
118
+
119
+ Returns:
120
+ Job object if wait_for_completion=False, otherwise VoiceProfile
121
+ """
122
+ # Validate inputs
123
+ if not name or len(name.strip()) < 1:
124
+ raise ValidationError("Voice profile name cannot be empty")
125
+ if len(name) > 100:
126
+ raise ValidationError("Voice profile name too long (max 100 characters)")
127
+
128
+ # Prepare file upload
129
+ files = self._prepare_file_upload(voice_file, "file")
130
+
131
+ # Prepare form data
132
+ data = {
133
+ "name": name.strip(),
134
+ "is_public": is_public
135
+ }
136
+ if description:
137
+ data["description"] = description.strip()
138
+
139
+ # Make request
140
+ if self.async_mode:
141
+ return self._async_create_voice_profile(files, data, wait_for_completion, timeout)
142
+ else:
143
+ response = self.client.request(
144
+ "POST",
145
+ "/api/v1/voice/voice-profiles",
146
+ data=data,
147
+ files=files
148
+ )
149
+
150
+ if wait_for_completion:
151
+ voice_id = response["id"]
152
+ # Poll for completion
153
+ import time
154
+ start_time = time.time()
155
+ while time.time() - start_time < timeout:
156
+ voice_data = self.client.request("GET", f"/api/v1/voice/voice-profiles/{voice_id}")
157
+ if voice_data["status"] == "completed":
158
+ return VoiceProfile.from_dict(voice_data)
159
+ elif voice_data["status"] == "failed":
160
+ raise ValidationError(f"Voice profile creation failed: {voice_data.get('error_message')}")
161
+ time.sleep(5)
162
+ raise ValidationError("Voice profile creation timed out")
163
+ else:
164
+ return VoiceProfile.from_dict(response)
165
+
166
+ async def _async_create_voice_profile(
167
+ self,
168
+ files: Dict[str, Any],
169
+ data: Dict[str, Any],
170
+ wait_for_completion: bool,
171
+ timeout: int
172
+ ) -> Union[Job, VoiceProfile]:
173
+ """Async version of create_voice_profile"""
174
+ import asyncio
175
+
176
+ response = await self.client.request(
177
+ "POST",
178
+ "/api/v1/voice/voice-profiles",
179
+ data=data,
180
+ files=files
181
+ )
182
+
183
+ if wait_for_completion:
184
+ voice_id = response["id"]
185
+ # Poll for completion
186
+ start_time = time.time()
187
+ while time.time() - start_time < timeout:
188
+ voice_data = await self.client.request("GET", f"/api/v1/voice/voice-profiles/{voice_id}")
189
+ if voice_data["status"] == "completed":
190
+ return VoiceProfile.from_dict(voice_data)
191
+ elif voice_data["status"] == "failed":
192
+ raise ValidationError(f"Voice profile creation failed: {voice_data.get('error_message')}")
193
+ await asyncio.sleep(5)
194
+ raise ValidationError("Voice profile creation timed out")
195
+ else:
196
+ return VoiceProfile.from_dict(response)
197
+
198
+ def generate_speech(
199
+ self,
200
+ voice_id: Union[int, str],
201
+ text: str,
202
+ language: Optional[str] = None,
203
+ speed: float = 1.0,
204
+ audio_format: str = "mp3",
205
+ wait_for_completion: bool = False,
206
+ timeout: int = 300
207
+ ) -> Union[Job, Dict[str, Any]]:
208
+ """
209
+ Generate speech using an existing voice profile
210
+
211
+ Args:
212
+ voice_id: ID or UUID of the voice profile
213
+ text: Text to generate speech for
214
+ language: Target language code
215
+ speed: Speech speed (0.5 to 2.0)
216
+ audio_format: Output audio format (mp3, wav)
217
+ wait_for_completion: Whether to wait for completion
218
+ timeout: Maximum time to wait
219
+
220
+ Returns:
221
+ Job object or generation result
222
+ """
223
+ # Validate inputs
224
+ text = self._validate_text_input(text)
225
+ if language:
226
+ language = self._validate_language_code(language)
227
+ if not 0.5 <= speed <= 2.0:
228
+ raise ValidationError("Speed must be between 0.5 and 2.0")
229
+ if audio_format not in ["mp3", "wav"]:
230
+ raise ValidationError("Audio format must be 'mp3' or 'wav'")
231
+
232
+ # Prepare form data
233
+ data = {
234
+ "input_text": text,
235
+ "speed": speed,
236
+ "audio_format": audio_format
237
+ }
238
+ if language:
239
+ data["language"] = language
240
+
241
+ # Make request
242
+ endpoint = f"/api/v1/voice/voices/{voice_id}/generate"
243
+
244
+ if self.async_mode:
245
+ return self._async_generate_speech(endpoint, data, wait_for_completion, timeout)
246
+ else:
247
+ response = self.client.request("POST", endpoint, data=data)
248
+
249
+ if "job_id" in response:
250
+ job = Job.from_dict(response)
251
+ if wait_for_completion:
252
+ job = self._wait_for_completion(job.id, timeout)
253
+ return job.result if job.result else job
254
+ return job
255
+ else:
256
+ # Direct response with audio URL
257
+ return response
258
+
259
+ async def _async_generate_speech(
260
+ self,
261
+ endpoint: str,
262
+ data: Dict[str, Any],
263
+ wait_for_completion: bool,
264
+ timeout: int
265
+ ) -> Union[Job, Dict[str, Any]]:
266
+ """Async version of generate_speech"""
267
+ response = await self.client.request("POST", endpoint, data=data)
268
+
269
+ if "job_id" in response:
270
+ job = Job.from_dict(response)
271
+ if wait_for_completion:
272
+ job = await self._async_wait_for_completion(job.id, timeout)
273
+ return job.result if job.result else job
274
+ return job
275
+ else:
276
+ return response
277
+
278
+ def list_voice_profiles(
279
+ self,
280
+ voice_type: Optional[str] = None,
281
+ is_public: Optional[bool] = None,
282
+ include_public: bool = True,
283
+ limit: int = 50
284
+ ) -> List[VoiceProfile]:
285
+ """
286
+ List available voice profiles
287
+
288
+ Args:
289
+ voice_type: Filter by voice type ('custom', 'standard')
290
+ is_public: Filter by public status
291
+ include_public: Include public voices
292
+ limit: Maximum number of results
293
+
294
+ Returns:
295
+ List of voice profiles
296
+ """
297
+ params = {
298
+ "limit": limit,
299
+ "include_public": include_public
300
+ }
301
+ if voice_type:
302
+ params["voice_type"] = voice_type
303
+ if is_public is not None:
304
+ params["is_public"] = is_public
305
+
306
+ if self.async_mode:
307
+ return self._async_list_voice_profiles(params)
308
+ else:
309
+ response = self.client.request("GET", "/api/v1/voice/voice-profiles", params=params)
310
+ return [VoiceProfile.from_dict(voice_data) for voice_data in response]
311
+
312
+ async def _async_list_voice_profiles(self, params: Dict[str, Any]) -> List[VoiceProfile]:
313
+ """Async version of list_voice_profiles"""
314
+ response = await self.client.request("GET", "/api/v1/voice/voice-profiles", params=params)
315
+ return [VoiceProfile.from_dict(voice_data) for voice_data in response]
316
+
317
+ def get_voice_profile(self, voice_id: Union[int, str]) -> VoiceProfile:
318
+ """
319
+ Get details of a specific voice profile
320
+
321
+ Args:
322
+ voice_id: ID or UUID of the voice profile
323
+
324
+ Returns:
325
+ Voice profile details
326
+ """
327
+ if self.async_mode:
328
+ return self._async_get_voice_profile(voice_id)
329
+ else:
330
+ response = self.client.request("GET", f"/api/v1/voice/voice-profiles/{voice_id}")
331
+ return VoiceProfile.from_dict(response)
332
+
333
+ async def _async_get_voice_profile(self, voice_id: Union[int, str]) -> VoiceProfile:
334
+ """Async version of get_voice_profile"""
335
+ response = await self.client.request("GET", f"/api/v1/voice/voice-profiles/{voice_id}")
336
+ return VoiceProfile.from_dict(response)
337
+
338
+ def delete_voice_profile(self, voice_id: Union[int, str]) -> Dict[str, str]:
339
+ """
340
+ Delete a voice profile
341
+
342
+ Args:
343
+ voice_id: ID or UUID of the voice profile
344
+
345
+ Returns:
346
+ Deletion confirmation
347
+ """
348
+ if self.async_mode:
349
+ return self._async_delete_voice_profile(voice_id)
350
+ else:
351
+ return self.client.request("DELETE", f"/api/v1/voice/voices/{voice_id}")
352
+
353
+ async def _async_delete_voice_profile(self, voice_id: Union[int, str]) -> Dict[str, str]:
354
+ """Async version of delete_voice_profile"""
355
+ return await self.client.request("DELETE", f"/api/v1/voice/voices/{voice_id}")
356
+
357
+ def get_job_status(self, job_id: int) -> Job:
358
+ """
359
+ Get status of a voice processing job
360
+
361
+ Args:
362
+ job_id: ID of the job
363
+
364
+ Returns:
365
+ Job status and details
366
+ """
367
+ if self.async_mode:
368
+ return self._async_get_job_status(job_id)
369
+ else:
370
+ response = self.client.request("GET", f"/api/v1/voice/clone/{job_id}/status")
371
+ return Job.from_dict(response)
372
+
373
+ async def _async_get_job_status(self, job_id: int) -> Job:
374
+ """Async version of get_job_status"""
375
+ response = await self.client.request("GET", f"/api/v1/voice/clone/{job_id}/status")
376
+ return Job.from_dict(response)