audiopod 1.2.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,470 +1,187 @@
1
1
  """
2
- Voice Service - Voice cloning and TTS operations
3
- """
2
+ Voice Service - Voice cloning and text-to-speech
4
3
 
5
- import time
6
- from typing import List, Optional, Dict, Any, Union
7
- from pathlib import Path
4
+ API Routes:
5
+ - GET /api/v1/voice/voice-profiles - List all voices
6
+ - GET /api/v1/voice/voices/{id}/status - Get voice details
7
+ - POST /api/v1/voice/voice-profiles - Create voice clone
8
+ - DELETE /api/v1/voice/voices/{id} - Delete voice
9
+ - POST /api/v1/voice/voices/{id}/generate - Generate TTS
10
+ - GET /api/v1/voice/tts-jobs/{id}/status - Get TTS job status
11
+ """
8
12
 
13
+ from typing import Optional, Dict, Any, List, Union
9
14
  from .base import BaseService
10
- from ..models import Job, VoiceProfile, JobStatus
11
- from ..exceptions import ValidationError
12
15
 
13
16
 
14
17
  class VoiceService(BaseService):
15
- """Service for voice cloning and text-to-speech operations"""
16
-
17
- def generate_voice(
18
- self,
19
- text: str,
20
- voice_file: Optional[str] = None,
21
- voice_id: Optional[Union[int, str]] = None,
22
- language: Optional[str] = None,
23
- speed: float = 1.0,
24
- audio_format: str = "mp3",
25
- generation_params: Optional[Dict[str, Any]] = None,
26
- wait_for_completion: bool = False,
27
- timeout: int = 300
28
- ) -> Union[Job, Dict[str, Any]]:
29
- """
30
- Generate speech using either a voice file (for cloning) or existing voice profile
31
-
32
- This unified method handles both voice cloning and text-to-speech generation:
33
- - For voice cloning: Provide voice_file parameter
34
- - For TTS with existing voice: Provide voice_id parameter
35
-
36
- Args:
37
- text: Text to generate speech for
38
- voice_file: Path to audio file for voice cloning (mutually exclusive with voice_id)
39
- voice_id: ID/UUID of existing voice profile (mutually exclusive with voice_file)
40
- language: Target language code (e.g., 'en', 'es')
41
- speed: Speech speed (0.25 to 4.0, provider dependent)
42
- audio_format: Output audio format ('mp3', 'wav', 'ogg')
43
- generation_params: Provider-specific parameters (speed, temperature, etc.)
44
- wait_for_completion: Whether to wait for job completion
45
- timeout: Maximum time to wait if wait_for_completion=True
46
-
47
- Returns:
48
- Job object if wait_for_completion=False, otherwise job result
49
-
50
- Raises:
51
- ValidationError: If both or neither voice_file and voice_id are provided
52
- """
53
- # Validate inputs
54
- if not voice_file and not voice_id:
55
- raise ValidationError("Either voice_file (for cloning) or voice_id (for TTS) must be provided")
56
- if voice_file and voice_id:
57
- raise ValidationError("Provide either voice_file or voice_id, not both")
58
-
59
- text = self._validate_text_input(text)
60
- if language:
61
- language = self._validate_language_code(language)
62
- if not 0.25 <= speed <= 4.0:
63
- raise ValidationError("Speed must be between 0.25 and 4.0")
64
- if audio_format not in ["mp3", "wav", "ogg"]:
65
- raise ValidationError("Audio format must be 'mp3', 'wav', or 'ogg'")
66
-
67
- # For voice cloning, we need to create a temporary voice first, then generate
68
- if voice_file:
69
- return self._generate_with_voice_file(
70
- voice_file, text, language, speed, audio_format,
71
- generation_params, wait_for_completion, timeout
72
- )
73
- else:
74
- # Use existing voice profile with unified endpoint
75
- return self._generate_with_voice_id(
76
- voice_id, text, language, speed, audio_format,
77
- generation_params, wait_for_completion, timeout
78
- )
79
-
80
- def _generate_with_voice_file(
18
+ """Service for voice cloning and text-to-speech."""
19
+
20
+ def list_voices(
81
21
  self,
82
- voice_file: str,
83
- text: str,
84
- language: Optional[str],
85
- speed: float,
86
- audio_format: str,
87
- generation_params: Optional[Dict[str, Any]],
88
- wait_for_completion: bool,
89
- timeout: int
90
- ) -> Union[Job, Dict[str, Any]]:
91
- """Generate speech with voice cloning using unified endpoint"""
92
- # For voice cloning, we use the "clone" identifier with the unified endpoint
93
- files = self._prepare_file_upload(voice_file, "file")
94
-
95
- # Prepare form data for unified endpoint
96
- data = {
97
- "input_text": text,
98
- "speed": speed,
99
- "audio_format": audio_format
22
+ skip: int = 0,
23
+ limit: int = 100,
24
+ include_public: bool = True,
25
+ ) -> List[Dict[str, Any]]:
26
+ """List available voices (both custom and public)."""
27
+ params = {
28
+ "skip": skip,
29
+ "limit": limit,
30
+ "include_public": str(include_public).lower(),
100
31
  }
101
- if language:
102
- data["language"] = language
103
- if generation_params:
104
- # Add generation parameters
105
- data.update(generation_params)
106
-
107
32
  if self.async_mode:
108
- return self._async_generate_voice("clone", data, files, wait_for_completion, timeout)
109
- else:
110
- # Use unified voice generation endpoint with "clone" identifier
111
- response = self.client.request(
112
- "POST",
113
- "/api/v1/voice/voices/clone/generate",
114
- data=data,
115
- files=files
116
- )
117
-
118
- job = Job.from_dict(response)
119
-
120
- if wait_for_completion:
121
- job = self._wait_for_completion(job.id, timeout)
122
- return job.result if job.result else job
123
-
124
- return job
125
-
126
- def _generate_with_voice_id(
127
- self,
128
- voice_id: Union[int, str],
129
- text: str,
130
- language: Optional[str],
131
- speed: float,
132
- audio_format: str,
133
- generation_params: Optional[Dict[str, Any]],
134
- wait_for_completion: bool,
135
- timeout: int
136
- ) -> Union[Job, Dict[str, Any]]:
137
- """Generate speech with existing voice profile using unified endpoint"""
138
- # Prepare form data for unified endpoint
139
- data = {
140
- "input_text": text,
141
- "speed": speed,
142
- "audio_format": audio_format
143
- }
144
- if language:
145
- data["language"] = language
146
- if generation_params:
147
- # Add generation parameters
148
- data.update(generation_params)
149
-
33
+ return self._async_list_voices(params)
34
+ return self.client.request("GET", "/api/v1/voice/voice-profiles", params=params)
35
+
36
+ async def _async_list_voices(self, params: Dict) -> List[Dict[str, Any]]:
37
+ return await self.client.request("GET", "/api/v1/voice/voice-profiles", params=params)
38
+
39
+ def get_voice(self, voice_id: Union[int, str]) -> Dict[str, Any]:
40
+ """Get voice details by ID or UUID."""
150
41
  if self.async_mode:
151
- return self._async_generate_voice(voice_id, data, None, wait_for_completion, timeout)
152
- else:
153
- # Use unified voice generation endpoint
154
- response = self.client.request(
155
- "POST",
156
- f"/api/v1/voice/voices/{voice_id}/generate",
157
- data=data
158
- )
159
-
160
- job = Job.from_dict(response)
161
-
162
- if wait_for_completion:
163
- job = self._wait_for_completion(job.id, timeout)
164
- return job.result if job.result else job
165
-
166
- return job
167
-
168
- async def _async_generate_voice(
169
- self,
170
- voice_identifier: Union[int, str],
171
- data: Dict[str, Any],
172
- files: Optional[Dict[str, Any]],
173
- wait_for_completion: bool,
174
- timeout: int
175
- ) -> Union[Job, Dict[str, Any]]:
176
- """Async version of unified voice generation"""
177
- response = await self.client.request(
178
- "POST",
179
- f"/api/v1/voice/voices/{voice_identifier}/generate",
180
- data=data,
181
- files=files
182
- )
183
-
184
- job = Job.from_dict(response)
185
-
186
- if wait_for_completion:
187
- job = await self._async_wait_for_completion(job.id, timeout)
188
- return job.result if job.result else job
189
-
190
- return job
191
-
192
- def create_voice_profile(
42
+ return self._async_get_voice(voice_id)
43
+ return self.client.request("GET", f"/api/v1/voice/voices/{voice_id}/status")
44
+
45
+ async def _async_get_voice(self, voice_id: Union[int, str]) -> Dict[str, Any]:
46
+ return await self.client.request("GET", f"/api/v1/voice/voices/{voice_id}/status")
47
+
48
+ def create_voice(
193
49
  self,
194
50
  name: str,
195
- voice_file: str,
51
+ audio_file: str,
196
52
  description: Optional[str] = None,
197
- is_public: bool = False,
198
- wait_for_completion: bool = False,
199
- timeout: int = 600
200
- ) -> Union[Job, VoiceProfile]:
201
- """
202
- Create a reusable voice profile
203
-
204
- Args:
205
- name: Name for the voice profile
206
- voice_file: Path to audio file containing voice sample
207
- description: Optional description
208
- is_public: Whether to make the voice profile public
209
- wait_for_completion: Whether to wait for processing completion
210
- timeout: Maximum time to wait if wait_for_completion=True
211
-
212
- Returns:
213
- Job object if wait_for_completion=False, otherwise VoiceProfile
214
- """
215
- # Validate inputs
216
- if not name or len(name.strip()) < 1:
217
- raise ValidationError("Voice profile name cannot be empty")
218
- if len(name) > 100:
219
- raise ValidationError("Voice profile name too long (max 100 characters)")
220
-
221
- # Prepare file upload
222
- files = self._prepare_file_upload(voice_file, "file")
223
-
224
- # Prepare form data
225
- data = {
226
- "name": name.strip(),
227
- "is_public": is_public
228
- }
53
+ ) -> Dict[str, Any]:
54
+ """Create a new voice clone from an audio file."""
55
+ files = self._prepare_file_upload(audio_file, "file")
56
+ data = {"name": name}
229
57
  if description:
230
- data["description"] = description.strip()
231
-
232
- # Make request
58
+ data["description"] = description
59
+
233
60
  if self.async_mode:
234
- return self._async_create_voice_profile(files, data, wait_for_completion, timeout)
235
- else:
236
- response = self.client.request(
237
- "POST",
238
- "/api/v1/voice/voice-profiles",
239
- data=data,
240
- files=files
241
- )
242
-
243
- if wait_for_completion:
244
- voice_id = response["id"]
245
- # Poll for completion
246
- import time
247
- start_time = time.time()
248
- while time.time() - start_time < timeout:
249
- voice_data = self.client.request("GET", f"/api/v1/voice/voice-profiles/{voice_id}")
250
- if voice_data["status"] == "completed":
251
- return VoiceProfile.from_dict(voice_data)
252
- elif voice_data["status"] == "failed":
253
- raise ValidationError(f"Voice profile creation failed: {voice_data.get('error_message')}")
254
- time.sleep(5)
255
- raise ValidationError("Voice profile creation timed out")
256
- else:
257
- return VoiceProfile.from_dict(response)
258
-
259
- async def _async_create_voice_profile(
260
- self,
261
- files: Dict[str, Any],
262
- data: Dict[str, Any],
263
- wait_for_completion: bool,
264
- timeout: int
265
- ) -> Union[Job, VoiceProfile]:
266
- """Async version of create_voice_profile"""
267
- import asyncio
268
-
269
- response = await self.client.request(
270
- "POST",
271
- "/api/v1/voice/voice-profiles",
272
- data=data,
273
- files=files
274
- )
275
-
276
- if wait_for_completion:
277
- voice_id = response["id"]
278
- # Poll for completion
279
- start_time = time.time()
280
- while time.time() - start_time < timeout:
281
- voice_data = await self.client.request("GET", f"/api/v1/voice/voice-profiles/{voice_id}")
282
- if voice_data["status"] == "completed":
283
- return VoiceProfile.from_dict(voice_data)
284
- elif voice_data["status"] == "failed":
285
- raise ValidationError(f"Voice profile creation failed: {voice_data.get('error_message')}")
286
- await asyncio.sleep(5)
287
- raise ValidationError("Voice profile creation timed out")
288
- else:
289
- return VoiceProfile.from_dict(response)
290
-
61
+ return self._async_create_voice(data, files)
62
+ return self.client.request("POST", "/api/v1/voice/voice-profiles", data=data, files=files)
63
+
64
+ async def _async_create_voice(self, data: Dict, files: Dict) -> Dict[str, Any]:
65
+ return await self.client.request("POST", "/api/v1/voice/voice-profiles", data=data, files=files)
66
+
67
+ def delete_voice(self, voice_id: Union[int, str]) -> Dict[str, str]:
68
+ """Delete a voice by ID or UUID."""
69
+ if self.async_mode:
70
+ return self._async_delete_voice(voice_id)
71
+ return self.client.request("DELETE", f"/api/v1/voice/voices/{voice_id}")
72
+
73
+ async def _async_delete_voice(self, voice_id: Union[int, str]) -> Dict[str, str]:
74
+ return await self.client.request("DELETE", f"/api/v1/voice/voices/{voice_id}")
75
+
291
76
  def generate_speech(
292
77
  self,
293
78
  voice_id: Union[int, str],
294
79
  text: str,
295
- language: Optional[str] = None,
296
80
  speed: float = 1.0,
81
+ language: str = "en",
297
82
  audio_format: str = "mp3",
298
- generation_params: Optional[Dict[str, Any]] = None,
299
83
  wait_for_completion: bool = False,
300
- timeout: int = 300
301
- ) -> Union[Job, Dict[str, Any]]:
84
+ timeout: int = 300,
85
+ ) -> Dict[str, Any]:
302
86
  """
303
- Generate speech using an existing voice profile (unified endpoint)
304
-
305
- This method now uses the unified voice generation endpoint for consistency.
87
+ Generate speech from text using a voice.
306
88
 
307
89
  Args:
308
- voice_id: ID or UUID of the voice profile
309
- text: Text to generate speech for
310
- language: Target language code
311
- speed: Speech speed (0.25 to 4.0, provider dependent)
312
- audio_format: Output audio format ('mp3', 'wav', 'ogg')
313
- generation_params: Additional generation parameters
314
- wait_for_completion: Whether to wait for completion
315
- timeout: Maximum time to wait
90
+ voice_id: Voice ID (int) or UUID (str) to use for generation
91
+ text: Text to convert to speech
92
+ speed: Speech speed (0.25 to 4.0, default 1.0)
93
+ language: Language code (default "en")
94
+ audio_format: Output format - mp3, wav, ogg (default "mp3")
95
+ wait_for_completion: If True, poll until job completes
96
+ timeout: Max seconds to wait for completion
316
97
 
317
98
  Returns:
318
- Job object or generation result
99
+ Job info dict with job_id, status, etc.
100
+ If wait_for_completion=True, includes output_url when done.
319
101
  """
320
- # Use unified voice generation method
321
- return self.generate_voice(
322
- text=text,
323
- voice_id=voice_id,
324
- language=language,
325
- speed=speed,
326
- audio_format=audio_format,
327
- generation_params=generation_params,
328
- wait_for_completion=wait_for_completion,
329
- timeout=timeout
102
+ data = {
103
+ "input_text": text,
104
+ "speed": speed,
105
+ "language": language,
106
+ "audio_format": audio_format,
107
+ }
108
+
109
+ if self.async_mode:
110
+ return self._async_generate_speech(voice_id, data, wait_for_completion, timeout)
111
+
112
+ response = self.client.request(
113
+ "POST",
114
+ f"/api/v1/voice/voices/{voice_id}/generate",
115
+ data=data,
330
116
  )
331
-
332
- def clone_voice(
333
- self,
334
- voice_file: str,
335
- text: str,
336
- language: Optional[str] = None,
337
- speed: float = 1.0,
338
- generation_params: Optional[Dict[str, Any]] = None,
339
- wait_for_completion: bool = False,
340
- timeout: int = 300
341
- ) -> Union[Job, Dict[str, Any]]:
342
- """
343
- Clone a voice from an audio file (backward compatibility)
344
-
345
- This method is now a wrapper around the unified generate_voice method.
346
- For new code, consider using generate_voice() directly with voice_file parameter.
347
-
348
- Args:
349
- voice_file: Path to audio file containing voice to clone
350
- text: Text to generate with the cloned voice
351
- language: Target language code (e.g., 'en', 'es')
352
- speed: Speech speed (0.25 to 4.0, provider dependent)
353
- generation_params: Additional generation parameters
354
- wait_for_completion: Whether to wait for job completion
355
- timeout: Maximum time to wait if wait_for_completion=True
356
-
357
- Returns:
358
- Job object if wait_for_completion=False, otherwise job result
359
- """
360
- # Use unified voice generation method
361
- return self.generate_voice(
362
- text=text,
363
- voice_file=voice_file,
364
- language=language,
365
- speed=speed,
366
- audio_format="mp3", # Default format for backward compatibility
367
- generation_params=generation_params,
368
- wait_for_completion=wait_for_completion,
369
- timeout=timeout
117
+
118
+ if wait_for_completion:
119
+ job_id = response.get("job_id") or response.get("id")
120
+ return self._wait_for_job_completion(job_id, timeout)
121
+ return response
122
+
123
+ async def _async_generate_speech(
124
+ self, voice_id: Union[int, str], data: Dict, wait_for_completion: bool, timeout: int
125
+ ) -> Dict[str, Any]:
126
+ response = await self.client.request(
127
+ "POST",
128
+ f"/api/v1/voice/voices/{voice_id}/generate",
129
+ data=data,
370
130
  )
371
-
372
- def list_voice_profiles(
373
- self,
374
- voice_type: Optional[str] = None,
375
- is_public: Optional[bool] = None,
376
- include_public: bool = True,
377
- limit: int = 50
378
- ) -> List[VoiceProfile]:
131
+ if wait_for_completion:
132
+ job_id = response.get("job_id") or response.get("id")
133
+ return await self._async_wait_for_job_completion(job_id, timeout)
134
+ return response
135
+
136
+ def get_job_status(self, job_id: int) -> Dict[str, Any]:
379
137
  """
380
- List available voice profiles
138
+ Get TTS job status.
381
139
 
382
140
  Args:
383
- voice_type: Filter by voice type ('custom', 'standard')
384
- is_public: Filter by public status
385
- include_public: Include public voices
386
- limit: Maximum number of results
141
+ job_id: The job ID returned from generate_speech
387
142
 
388
143
  Returns:
389
- List of voice profiles
144
+ Job status dict with status, progress, output_url (when completed), etc.
390
145
  """
391
- params = {
392
- "limit": limit,
393
- "include_public": include_public
394
- }
395
- if voice_type:
396
- params["voice_type"] = voice_type
397
- if is_public is not None:
398
- params["is_public"] = is_public
399
-
400
146
  if self.async_mode:
401
- return self._async_list_voice_profiles(params)
402
- else:
403
- response = self.client.request("GET", "/api/v1/voice/voice-profiles", params=params)
404
- return [VoiceProfile.from_dict(voice_data) for voice_data in response]
405
-
406
- async def _async_list_voice_profiles(self, params: Dict[str, Any]) -> List[VoiceProfile]:
407
- """Async version of list_voice_profiles"""
408
- response = await self.client.request("GET", "/api/v1/voice/voice-profiles", params=params)
409
- return [VoiceProfile.from_dict(voice_data) for voice_data in response]
410
-
411
- def get_voice_profile(self, voice_id: Union[int, str]) -> VoiceProfile:
412
- """
413
- Get details of a specific voice profile
147
+ return self._async_get_job_status(job_id)
148
+ return self.client.request("GET", f"/api/v1/voice/tts-jobs/{job_id}/status")
149
+
150
+ async def _async_get_job_status(self, job_id: int) -> Dict[str, Any]:
151
+ return await self.client.request("GET", f"/api/v1/voice/tts-jobs/{job_id}/status")
152
+
153
+ def _wait_for_job_completion(self, job_id: int, timeout: int) -> Dict[str, Any]:
154
+ """Poll job status until completion or timeout."""
155
+ import time
156
+ start_time = time.time()
414
157
 
415
- Args:
416
- voice_id: ID or UUID of the voice profile
158
+ while time.time() - start_time < timeout:
159
+ status = self.get_job_status(job_id)
417
160
 
418
- Returns:
419
- Voice profile details
420
- """
421
- if self.async_mode:
422
- return self._async_get_voice_profile(voice_id)
423
- else:
424
- response = self.client.request("GET", f"/api/v1/voice/voice-profiles/{voice_id}")
425
- return VoiceProfile.from_dict(response)
161
+ if status.get("status") in ("completed", "COMPLETED"):
162
+ return status
163
+ elif status.get("status") in ("failed", "FAILED", "error", "ERROR"):
164
+ raise Exception(f"Job failed: {status.get('error_message', 'Unknown error')}")
426
165
 
427
- async def _async_get_voice_profile(self, voice_id: Union[int, str]) -> VoiceProfile:
428
- """Async version of get_voice_profile"""
429
- response = await self.client.request("GET", f"/api/v1/voice/voice-profiles/{voice_id}")
430
- return VoiceProfile.from_dict(response)
166
+ time.sleep(2)
431
167
 
432
- def delete_voice_profile(self, voice_id: Union[int, str]) -> Dict[str, str]:
433
- """
434
- Delete a voice profile
168
+ raise TimeoutError(f"Job {job_id} did not complete within {timeout} seconds")
169
+
170
+ async def _async_wait_for_job_completion(self, job_id: int, timeout: int) -> Dict[str, Any]:
171
+ """Async poll job status until completion or timeout."""
172
+ import asyncio
173
+ import time
174
+ start_time = time.time()
435
175
 
436
- Args:
437
- voice_id: ID or UUID of the voice profile
176
+ while time.time() - start_time < timeout:
177
+ status = await self.get_job_status(job_id)
438
178
 
439
- Returns:
440
- Deletion confirmation
441
- """
442
- if self.async_mode:
443
- return self._async_delete_voice_profile(voice_id)
444
- else:
445
- return self.client.request("DELETE", f"/api/v1/voice/voices/{voice_id}")
179
+ if status.get("status") in ("completed", "COMPLETED"):
180
+ return status
181
+ elif status.get("status") in ("failed", "FAILED", "error", "ERROR"):
182
+ raise Exception(f"Job failed: {status.get('error_message', 'Unknown error')}")
446
183
 
447
- async def _async_delete_voice_profile(self, voice_id: Union[int, str]) -> Dict[str, str]:
448
- """Async version of delete_voice_profile"""
449
- return await self.client.request("DELETE", f"/api/v1/voice/voices/{voice_id}")
450
-
451
- def get_job_status(self, job_id: int) -> Job:
452
- """
453
- Get status of a voice processing job
184
+ await asyncio.sleep(2)
454
185
 
455
- Args:
456
- job_id: ID of the job
457
-
458
- Returns:
459
- Job status and details
460
- """
461
- if self.async_mode:
462
- return self._async_get_job_status(job_id)
463
- else:
464
- response = self.client.request("GET", f"/api/v1/voice/clone/{job_id}/status")
465
- return Job.from_dict(response)
466
-
467
- async def _async_get_job_status(self, job_id: int) -> Job:
468
- """Async version of get_job_status"""
469
- response = await self.client.request("GET", f"/api/v1/voice/clone/{job_id}/status")
470
- return Job.from_dict(response)
186
+ raise TimeoutError(f"Job {job_id} did not complete within {timeout} seconds")
187
+