audiopod 1.1.1__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
audiopod/__init__.py CHANGED
@@ -47,7 +47,7 @@ from .models import (
47
47
  TranslationResult
48
48
  )
49
49
 
50
- __version__ = "1.1.1"
50
+ __version__ = "1.2.0"
51
51
  __author__ = "AudioPod AI"
52
52
  __email__ = "support@audiopod.ai"
53
53
  __license__ = "MIT"
@@ -2,6 +2,7 @@
2
2
  Voice Service - Voice cloning and TTS operations
3
3
  """
4
4
 
5
+ import time
5
6
  from typing import List, Optional, Dict, Any, Union
6
7
  from pathlib import Path
7
8
 
@@ -13,54 +14,103 @@ from ..exceptions import ValidationError
13
14
  class VoiceService(BaseService):
14
15
  """Service for voice cloning and text-to-speech operations"""
15
16
 
16
- def clone_voice(
17
+ def generate_voice(
17
18
  self,
18
- voice_file: str,
19
19
  text: str,
20
+ voice_file: Optional[str] = None,
21
+ voice_id: Optional[Union[int, str]] = None,
20
22
  language: Optional[str] = None,
21
23
  speed: float = 1.0,
24
+ audio_format: str = "mp3",
25
+ generation_params: Optional[Dict[str, Any]] = None,
22
26
  wait_for_completion: bool = False,
23
27
  timeout: int = 300
24
28
  ) -> Union[Job, Dict[str, Any]]:
25
29
  """
26
- Clone a voice from an audio file
30
+ Generate speech using either a voice file (for cloning) or existing voice profile
31
+
32
+ This unified method handles both voice cloning and text-to-speech generation:
33
+ - For voice cloning: Provide voice_file parameter
34
+ - For TTS with existing voice: Provide voice_id parameter
27
35
 
28
36
  Args:
29
- voice_file: Path to audio file containing voice to clone
30
- text: Text to generate with the cloned voice
37
+ text: Text to generate speech for
38
+ voice_file: Path to audio file for voice cloning (mutually exclusive with voice_id)
39
+ voice_id: ID/UUID of existing voice profile (mutually exclusive with voice_file)
31
40
  language: Target language code (e.g., 'en', 'es')
32
- speed: Speech speed (0.5 to 2.0)
41
+ speed: Speech speed (0.25 to 4.0, provider dependent)
42
+ audio_format: Output audio format ('mp3', 'wav', 'ogg')
43
+ generation_params: Provider-specific parameters (speed, temperature, etc.)
33
44
  wait_for_completion: Whether to wait for job completion
34
45
  timeout: Maximum time to wait if wait_for_completion=True
35
46
 
36
47
  Returns:
37
48
  Job object if wait_for_completion=False, otherwise job result
49
+
50
+ Raises:
51
+ ValidationError: If both or neither voice_file and voice_id are provided
38
52
  """
39
53
  # Validate inputs
54
+ if not voice_file and not voice_id:
55
+ raise ValidationError("Either voice_file (for cloning) or voice_id (for TTS) must be provided")
56
+ if voice_file and voice_id:
57
+ raise ValidationError("Provide either voice_file or voice_id, not both")
58
+
40
59
  text = self._validate_text_input(text)
41
60
  if language:
42
61
  language = self._validate_language_code(language)
43
- if not 0.5 <= speed <= 2.0:
44
- raise ValidationError("Speed must be between 0.5 and 2.0")
62
+ if not 0.25 <= speed <= 4.0:
63
+ raise ValidationError("Speed must be between 0.25 and 4.0")
64
+ if audio_format not in ["mp3", "wav", "ogg"]:
65
+ raise ValidationError("Audio format must be 'mp3', 'wav', or 'ogg'")
45
66
 
46
- # Prepare file upload
67
+ # For voice cloning, we need to create a temporary voice first, then generate
68
+ if voice_file:
69
+ return self._generate_with_voice_file(
70
+ voice_file, text, language, speed, audio_format,
71
+ generation_params, wait_for_completion, timeout
72
+ )
73
+ else:
74
+ # Use existing voice profile with unified endpoint
75
+ return self._generate_with_voice_id(
76
+ voice_id, text, language, speed, audio_format,
77
+ generation_params, wait_for_completion, timeout
78
+ )
79
+
80
+ def _generate_with_voice_file(
81
+ self,
82
+ voice_file: str,
83
+ text: str,
84
+ language: Optional[str],
85
+ speed: float,
86
+ audio_format: str,
87
+ generation_params: Optional[Dict[str, Any]],
88
+ wait_for_completion: bool,
89
+ timeout: int
90
+ ) -> Union[Job, Dict[str, Any]]:
91
+ """Generate speech with voice cloning using unified endpoint"""
92
+ # For voice cloning, we use the "clone" identifier with the unified endpoint
47
93
  files = self._prepare_file_upload(voice_file, "file")
48
94
 
49
- # Prepare form data
95
+ # Prepare form data for unified endpoint
50
96
  data = {
51
97
  "input_text": text,
52
- "speed": speed
98
+ "speed": speed,
99
+ "audio_format": audio_format
53
100
  }
54
101
  if language:
55
- data["target_language"] = language
102
+ data["language"] = language
103
+ if generation_params:
104
+ # Add generation parameters
105
+ data.update(generation_params)
56
106
 
57
- # Make request
58
107
  if self.async_mode:
59
- return self._async_clone_voice(files, data, wait_for_completion, timeout)
108
+ return self._async_generate_voice("clone", data, files, wait_for_completion, timeout)
60
109
  else:
110
+ # Use unified voice generation endpoint with "clone" identifier
61
111
  response = self.client.request(
62
112
  "POST",
63
- "/api/v1/voice/voice-clone",
113
+ "/api/v1/voice/voices/clone/generate",
64
114
  data=data,
65
115
  files=files
66
116
  )
@@ -73,17 +123,60 @@ class VoiceService(BaseService):
73
123
 
74
124
  return job
75
125
 
76
- async def _async_clone_voice(
126
+ def _generate_with_voice_id(
77
127
  self,
78
- files: Dict[str, Any],
128
+ voice_id: Union[int, str],
129
+ text: str,
130
+ language: Optional[str],
131
+ speed: float,
132
+ audio_format: str,
133
+ generation_params: Optional[Dict[str, Any]],
134
+ wait_for_completion: bool,
135
+ timeout: int
136
+ ) -> Union[Job, Dict[str, Any]]:
137
+ """Generate speech with existing voice profile using unified endpoint"""
138
+ # Prepare form data for unified endpoint
139
+ data = {
140
+ "input_text": text,
141
+ "speed": speed,
142
+ "audio_format": audio_format
143
+ }
144
+ if language:
145
+ data["language"] = language
146
+ if generation_params:
147
+ # Add generation parameters
148
+ data.update(generation_params)
149
+
150
+ if self.async_mode:
151
+ return self._async_generate_voice(voice_id, data, None, wait_for_completion, timeout)
152
+ else:
153
+ # Use unified voice generation endpoint
154
+ response = self.client.request(
155
+ "POST",
156
+ f"/api/v1/voice/voices/{voice_id}/generate",
157
+ data=data
158
+ )
159
+
160
+ job = Job.from_dict(response)
161
+
162
+ if wait_for_completion:
163
+ job = self._wait_for_completion(job.id, timeout)
164
+ return job.result if job.result else job
165
+
166
+ return job
167
+
168
+ async def _async_generate_voice(
169
+ self,
170
+ voice_identifier: Union[int, str],
79
171
  data: Dict[str, Any],
172
+ files: Optional[Dict[str, Any]],
80
173
  wait_for_completion: bool,
81
174
  timeout: int
82
175
  ) -> Union[Job, Dict[str, Any]]:
83
- """Async version of clone_voice"""
176
+ """Async version of unified voice generation"""
84
177
  response = await self.client.request(
85
- "POST",
86
- "/api/v1/voice/voice-clone",
178
+ "POST",
179
+ f"/api/v1/voice/voices/{voice_identifier}/generate",
87
180
  data=data,
88
181
  files=files
89
182
  )
@@ -93,7 +186,7 @@ class VoiceService(BaseService):
93
186
  if wait_for_completion:
94
187
  job = await self._async_wait_for_completion(job.id, timeout)
95
188
  return job.result if job.result else job
96
-
189
+
97
190
  return job
98
191
 
99
192
  def create_voice_profile(
@@ -202,78 +295,79 @@ class VoiceService(BaseService):
202
295
  language: Optional[str] = None,
203
296
  speed: float = 1.0,
204
297
  audio_format: str = "mp3",
298
+ generation_params: Optional[Dict[str, Any]] = None,
205
299
  wait_for_completion: bool = False,
206
300
  timeout: int = 300
207
301
  ) -> Union[Job, Dict[str, Any]]:
208
302
  """
209
- Generate speech using an existing voice profile
303
+ Generate speech using an existing voice profile (unified endpoint)
304
+
305
+ This method now uses the unified voice generation endpoint for consistency.
210
306
 
211
307
  Args:
212
308
  voice_id: ID or UUID of the voice profile
213
309
  text: Text to generate speech for
214
310
  language: Target language code
215
- speed: Speech speed (0.5 to 2.0)
216
- audio_format: Output audio format (mp3, wav)
311
+ speed: Speech speed (0.25 to 4.0, provider dependent)
312
+ audio_format: Output audio format ('mp3', 'wav', 'ogg')
313
+ generation_params: Additional generation parameters
217
314
  wait_for_completion: Whether to wait for completion
218
315
  timeout: Maximum time to wait
219
316
 
220
317
  Returns:
221
318
  Job object or generation result
222
319
  """
223
- # Validate inputs
224
- text = self._validate_text_input(text)
225
- if language:
226
- language = self._validate_language_code(language)
227
- if not 0.5 <= speed <= 2.0:
228
- raise ValidationError("Speed must be between 0.5 and 2.0")
229
- if audio_format not in ["mp3", "wav"]:
230
- raise ValidationError("Audio format must be 'mp3' or 'wav'")
231
-
232
- # Prepare form data
233
- data = {
234
- "input_text": text,
235
- "speed": speed,
236
- "audio_format": audio_format
237
- }
238
- if language:
239
- data["language"] = language
240
-
241
- # Make request
242
- endpoint = f"/api/v1/voice/voices/{voice_id}/generate"
243
-
244
- if self.async_mode:
245
- return self._async_generate_speech(endpoint, data, wait_for_completion, timeout)
246
- else:
247
- response = self.client.request("POST", endpoint, data=data)
248
-
249
- if "job_id" in response:
250
- job = Job.from_dict(response)
251
- if wait_for_completion:
252
- job = self._wait_for_completion(job.id, timeout)
253
- return job.result if job.result else job
254
- return job
255
- else:
256
- # Direct response with audio URL
257
- return response
258
-
259
- async def _async_generate_speech(
320
+ # Use unified voice generation method
321
+ return self.generate_voice(
322
+ text=text,
323
+ voice_id=voice_id,
324
+ language=language,
325
+ speed=speed,
326
+ audio_format=audio_format,
327
+ generation_params=generation_params,
328
+ wait_for_completion=wait_for_completion,
329
+ timeout=timeout
330
+ )
331
+
332
+ def clone_voice(
260
333
  self,
261
- endpoint: str,
262
- data: Dict[str, Any],
263
- wait_for_completion: bool,
264
- timeout: int
334
+ voice_file: str,
335
+ text: str,
336
+ language: Optional[str] = None,
337
+ speed: float = 1.0,
338
+ generation_params: Optional[Dict[str, Any]] = None,
339
+ wait_for_completion: bool = False,
340
+ timeout: int = 300
265
341
  ) -> Union[Job, Dict[str, Any]]:
266
- """Async version of generate_speech"""
267
- response = await self.client.request("POST", endpoint, data=data)
342
+ """
343
+ Clone a voice from an audio file (backward compatibility)
268
344
 
269
- if "job_id" in response:
270
- job = Job.from_dict(response)
271
- if wait_for_completion:
272
- job = await self._async_wait_for_completion(job.id, timeout)
273
- return job.result if job.result else job
274
- return job
275
- else:
276
- return response
345
+ This method is now a wrapper around the unified generate_voice method.
346
+ For new code, consider using generate_voice() directly with voice_file parameter.
347
+
348
+ Args:
349
+ voice_file: Path to audio file containing voice to clone
350
+ text: Text to generate with the cloned voice
351
+ language: Target language code (e.g., 'en', 'es')
352
+ speed: Speech speed (0.25 to 4.0, provider dependent)
353
+ generation_params: Additional generation parameters
354
+ wait_for_completion: Whether to wait for job completion
355
+ timeout: Maximum time to wait if wait_for_completion=True
356
+
357
+ Returns:
358
+ Job object if wait_for_completion=False, otherwise job result
359
+ """
360
+ # Use unified voice generation method
361
+ return self.generate_voice(
362
+ text=text,
363
+ voice_file=voice_file,
364
+ language=language,
365
+ speed=speed,
366
+ audio_format="mp3", # Default format for backward compatibility
367
+ generation_params=generation_params,
368
+ wait_for_completion=wait_for_completion,
369
+ timeout=timeout
370
+ )
277
371
 
278
372
  def list_voice_profiles(
279
373
  self,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: audiopod
3
- Version: 1.1.1
3
+ Version: 1.2.0
4
4
  Summary: Professional Audio Processing API Client for Python
5
5
  Home-page: https://github.com/audiopod-ai/audiopod-python
6
6
  Author: AudioPod AI
@@ -95,7 +95,7 @@ client = audiopod.Client(api_key="ap_your_api_key_here")
95
95
 
96
96
  ### Basic Usage
97
97
 
98
- #### Voice Cloning
98
+ #### Voice Generation (Unified TTS & Cloning)
99
99
 
100
100
  ```python
101
101
  import audiopod
@@ -103,15 +103,39 @@ import audiopod
103
103
  # Initialize client
104
104
  client = audiopod.Client()
105
105
 
106
- # Clone a voice and generate speech
107
- job = client.voice.clone_voice(
106
+ # Generate voice using file cloning (unified approach)
107
+ job = client.voice.generate_voice(
108
+ text="Hello! This is voice generation using a cloned voice.",
109
+ voice_file="path/to/voice_sample.wav", # For voice cloning
110
+ language="en",
111
+ audio_format="mp3",
112
+ generation_params={
113
+ "speed": 1.0
114
+ },
115
+ wait_for_completion=True
116
+ )
117
+
118
+ print(f"Generated audio URL: {job.output_url}")
119
+
120
+ # Generate speech with existing voice profile (unified approach)
121
+ speech = client.voice.generate_voice(
122
+ text="Hello from my voice profile!",
123
+ voice_id="voice-profile-id", # For existing voice profiles
124
+ language="en",
125
+ audio_format="mp3",
126
+ generation_params={
127
+ "speed": 1.0
128
+ },
129
+ wait_for_completion=True
130
+ )
131
+
132
+ # Backward compatibility methods (deprecated - use generate_voice instead)
133
+ legacy_clone = client.voice.clone_voice(
108
134
  voice_file="path/to/voice_sample.wav",
109
135
  text="Hello! This is a cloned voice speaking.",
110
136
  language="en",
111
137
  wait_for_completion=True
112
138
  )
113
-
114
- print(f"Generated audio URL: {job['output_url']}")
115
139
  ```
116
140
 
117
141
  #### Music Generation
@@ -197,10 +221,22 @@ voice_profile = client.voice.create_voice_profile(
197
221
  wait_for_completion=True
198
222
  )
199
223
 
200
- # Use the voice profile for speech generation
201
- speech = client.voice.generate_speech(
224
+ # Use the voice profile for speech generation (unified approach - recommended)
225
+ speech = client.voice.generate_voice(
226
+ text="This uses my custom voice profile with the unified method!",
202
227
  voice_id=voice_profile.id,
203
- text="This uses my custom voice profile!",
228
+ language="en",
229
+ audio_format="mp3",
230
+ generation_params={
231
+ "speed": 1.0
232
+ },
233
+ wait_for_completion=True
234
+ )
235
+
236
+ # Legacy method (still works - uses generate_voice internally)
237
+ legacy_speech = client.voice.generate_speech(
238
+ voice_id=voice_profile.id,
239
+ text="This uses the legacy method.",
204
240
  wait_for_completion=True
205
241
  )
206
242
  ```
@@ -343,7 +379,7 @@ client = audiopod.Client(
343
379
 
344
380
  ### Services
345
381
 
346
- - `client.voice`: Voice cloning and TTS operations
382
+ - `client.voice`: **Voice generation operations** (unified TTS & cloning using `generate_voice()`)
347
383
  - `client.music`: Music generation and editing
348
384
  - `client.transcription`: Speech-to-text transcription
349
385
  - `client.translation`: Audio/video translation
@@ -352,6 +388,20 @@ client = audiopod.Client(
352
388
  - `client.karaoke`: Karaoke video generation
353
389
  - `client.credits`: Credit management and usage tracking
354
390
 
391
+ #### Voice Service Methods
392
+
393
+ **Recommended (Unified Approach):**
394
+ - `client.voice.generate_voice()` - Generate speech with voice file (cloning) or voice ID (TTS)
395
+
396
+ **Legacy Methods (Backward Compatibility):**
397
+ - `client.voice.clone_voice()` - Clone voice from audio file (deprecated, uses `generate_voice` internally)
398
+ - `client.voice.generate_speech()` - Generate speech with voice profile (deprecated, uses `generate_voice` internally)
399
+
400
+ **Voice Management:**
401
+ - `client.voice.create_voice_profile()` - Create reusable voice profiles
402
+ - `client.voice.list_voice_profiles()` - List available voice profiles
403
+ - `client.voice.delete_voice_profile()` - Delete voice profiles
404
+
355
405
  ### Models
356
406
 
357
407
  - `Job`: Base job information and status
@@ -1,4 +1,4 @@
1
- audiopod/__init__.py,sha256=UsB5ET6nUy1Upx8wCiB17mMErdF3RvEHfAr51-pPPGQ,1790
1
+ audiopod/__init__.py,sha256=jQtsXMdmCZArJ1pOWG9huymdE_-WlyW8Rk8q0kcQn-c,1790
2
2
  audiopod/cli.py,sha256=ZYzAQ3UpoYuOEWivMwMneJUf2z8DGGYTx1Nb6yRfdVY,9339
3
3
  audiopod/client.py,sha256=67oPSInSNssJpTR00ZuYSdk9lbx5KiRnDQw8UYKNVsA,11742
4
4
  audiopod/config.py,sha256=fuGtbuES4tXdHwqQqoZa5izCH6nVfFRP06D8eK1Cg10,1683
@@ -15,10 +15,10 @@ audiopod/services/speaker.py,sha256=OPSOwArfrGXVzRgciS13n1QsCJSK1PB-Mz6VgwxuHAA,
15
15
  audiopod/services/stem_extraction.py,sha256=3ibMFKFR25xKHpVs3WGMNriZ88sB5PriFNa_s2Bvon4,6026
16
16
  audiopod/services/transcription.py,sha256=HyH6WpGWZsggYxIvt2dhB6_5UHaigk3XwXsVgarWzcE,7565
17
17
  audiopod/services/translation.py,sha256=oUU82c61CeAt13lzlWx8S-9xEgYlskwX8bLMbQw2Ni8,7396
18
- audiopod/services/voice.py,sha256=_IHv3zU3k184kfijxr1QRBenrIpmhhPOBS96DddZ8yw,13456
19
- audiopod-1.1.1.dist-info/licenses/LICENSE,sha256=hqEjnOaGNbnLSBxbtbC7WQVREU2vQI8FmwecCiZlMfA,1068
20
- audiopod-1.1.1.dist-info/METADATA,sha256=UG_csntfzscrZTjy-2v-lCTlS7-VSEM6WEwUw26hSYc,11217
21
- audiopod-1.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
22
- audiopod-1.1.1.dist-info/entry_points.txt,sha256=uLcNDzXuOXnJAz9j91TDGayVjjZ7-ZiHBGDydqNUErU,47
23
- audiopod-1.1.1.dist-info/top_level.txt,sha256=M6yyOFFNpLdH4i1AMRqJZLRIgfpg1NvrQVmnPd8A6N8,9
24
- audiopod-1.1.1.dist-info/RECORD,,
18
+ audiopod/services/voice.py,sha256=t0-4yjVrzWXJorfQCGbBSNRGE2wZfjoRQ76elJu1BvU,17748
19
+ audiopod-1.2.0.dist-info/licenses/LICENSE,sha256=hqEjnOaGNbnLSBxbtbC7WQVREU2vQI8FmwecCiZlMfA,1068
20
+ audiopod-1.2.0.dist-info/METADATA,sha256=mLpXkYWYxvd3k3LrDtH6Cr68C7Xk7JmpoBL4vwYPiOU,13048
21
+ audiopod-1.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
22
+ audiopod-1.2.0.dist-info/entry_points.txt,sha256=uLcNDzXuOXnJAz9j91TDGayVjjZ7-ZiHBGDydqNUErU,47
23
+ audiopod-1.2.0.dist-info/top_level.txt,sha256=M6yyOFFNpLdH4i1AMRqJZLRIgfpg1NvrQVmnPd8A6N8,9
24
+ audiopod-1.2.0.dist-info/RECORD,,