audiopod 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
audiopod/__init__.py CHANGED
@@ -47,7 +47,7 @@ from .models import (
47
47
  TranslationResult
48
48
  )
49
49
 
50
- __version__ = "1.1.0"
50
+ __version__ = "1.2.0"
51
51
  __author__ = "AudioPod AI"
52
52
  __email__ = "support@audiopod.ai"
53
53
  __license__ = "MIT"
audiopod/models.py CHANGED
@@ -151,13 +151,18 @@ class MusicGenerationResult:
151
151
 
152
152
  @dataclass
153
153
  class TranslationResult:
154
- """Translation job result"""
154
+ """Speech translation job result"""
155
155
  job: Job
156
156
  source_language: Optional[str] = None
157
157
  target_language: Optional[str] = None
158
- audio_output_url: Optional[str] = None
159
- video_output_url: Optional[str] = None
158
+ display_name: Optional[str] = None
159
+ audio_output_path: Optional[str] = None
160
+ video_output_path: Optional[str] = None
160
161
  transcript_path: Optional[str] = None
162
+ translated_audio_url: Optional[str] = None
163
+ video_output_url: Optional[str] = None
164
+ transcript_urls: Optional[Dict[str, str]] = None
165
+ is_video: bool = False
161
166
 
162
167
  @classmethod
163
168
  def from_dict(cls, data: Dict[str, Any]) -> 'TranslationResult':
@@ -166,10 +171,20 @@ class TranslationResult:
166
171
  job=Job.from_dict(data),
167
172
  source_language=data.get('source_language'),
168
173
  target_language=data.get('target_language'),
169
- audio_output_url=data.get('audio_output_path'),
170
- video_output_url=data.get('video_output_path'),
171
- transcript_path=data.get('transcript_path')
174
+ display_name=data.get('display_name'),
175
+ audio_output_path=data.get('audio_output_path'),
176
+ video_output_path=data.get('video_output_path'),
177
+ transcript_path=data.get('transcript_path'),
178
+ translated_audio_url=data.get('translated_audio_url'),
179
+ video_output_url=data.get('video_output_url'),
180
+ transcript_urls=data.get('transcript_urls'),
181
+ is_video=data.get('is_video', False)
172
182
  )
183
+
184
+ @property
185
+ def audio_output_url(self) -> Optional[str]:
186
+ """Backward compatibility property - returns translated_audio_url"""
187
+ return self.translated_audio_url
173
188
 
174
189
 
175
190
  @dataclass
@@ -1,42 +1,59 @@
1
1
  """
2
- Translation Service - Audio/video translation operations
2
+ Translation Service - Speech-to-speech translation operations
3
3
  """
4
4
 
5
5
  from typing import Optional, Union
6
6
  from .base import BaseService
7
7
  from ..models import Job, TranslationResult
8
+ from ..exceptions import ValidationError
8
9
 
9
10
 
10
11
  class TranslationService(BaseService):
11
- """Service for audio and video translation operations"""
12
+ """Service for speech-to-speech translation operations"""
12
13
 
13
14
  def translate_audio(
14
15
  self,
15
- audio_file: str,
16
- target_language: str,
16
+ audio_file: Optional[str] = None,
17
+ url: Optional[str] = None,
18
+ target_language: str = "en",
17
19
  source_language: Optional[str] = None,
18
20
  wait_for_completion: bool = False,
19
21
  timeout: int = 900
20
22
  ) -> Union[Job, TranslationResult]:
21
23
  """
22
- Translate audio to another language
24
+ Translate speech from audio/video file to another language while preserving voice characteristics
23
25
 
24
26
  Args:
25
- audio_file: Path to audio file
26
- target_language: Target language code
27
- source_language: Source language (auto-detect if None)
27
+ audio_file: Path to audio/video file (required if no URL)
28
+ url: Direct media URL (required if no file)
29
+ target_language: Target language code (ISO 639-1, e.g., 'es' for Spanish)
30
+ source_language: Source language code (auto-detect if None)
28
31
  wait_for_completion: Whether to wait for completion
29
32
  timeout: Maximum time to wait
30
33
 
31
34
  Returns:
32
35
  Job object or translation result
33
36
  """
37
+ if not audio_file and not url:
38
+ raise ValidationError("Either audio_file or url must be provided")
39
+
40
+ if audio_file and url:
41
+ raise ValidationError("Provide either audio_file or url, not both")
42
+
34
43
  target_language = self._validate_language_code(target_language)
35
44
  if source_language:
36
45
  source_language = self._validate_language_code(source_language)
37
46
 
38
- files = self._prepare_file_upload(audio_file, "file")
47
+ # Prepare request data
48
+ files = {}
39
49
  data = {"target_language": target_language}
50
+
51
+ if audio_file:
52
+ files = self._prepare_file_upload(audio_file, "file")
53
+
54
+ if url:
55
+ data["url"] = url
56
+
40
57
  if source_language:
41
58
  data["source_language"] = source_language
42
59
 
@@ -44,7 +61,10 @@ class TranslationService(BaseService):
44
61
  return self._async_translate_audio(files, data, wait_for_completion, timeout)
45
62
  else:
46
63
  response = self.client.request(
47
- "POST", "/api/v1/translation/translate", data=data, files=files
64
+ "POST",
65
+ "/api/v1/translation/translate/speech", # FIXED: Use correct speech-to-speech endpoint
66
+ data=data,
67
+ files=files if files else None
48
68
  )
49
69
  job = Job.from_dict(response)
50
70
 
@@ -53,11 +73,35 @@ class TranslationService(BaseService):
53
73
  return TranslationResult.from_dict(completed_job.result or completed_job.__dict__)
54
74
 
55
75
  return job
76
+
77
+ def translate_speech(
78
+ self,
79
+ audio_file: Optional[str] = None,
80
+ url: Optional[str] = None,
81
+ target_language: str = "en",
82
+ source_language: Optional[str] = None,
83
+ wait_for_completion: bool = False,
84
+ timeout: int = 900
85
+ ) -> Union[Job, TranslationResult]:
86
+ """
87
+ Alias for translate_audio - more descriptive method name for speech translation
88
+ """
89
+ return self.translate_audio(
90
+ audio_file=audio_file,
91
+ url=url,
92
+ target_language=target_language,
93
+ source_language=source_language,
94
+ wait_for_completion=wait_for_completion,
95
+ timeout=timeout
96
+ )
56
97
 
57
98
  async def _async_translate_audio(self, files, data, wait_for_completion, timeout):
58
99
  """Async version of translate_audio"""
59
100
  response = await self.client.request(
60
- "POST", "/api/v1/translation/translate", data=data, files=files
101
+ "POST",
102
+ "/api/v1/translation/translate/speech", # FIXED: Use correct speech-to-speech endpoint
103
+ data=data,
104
+ files=files if files else None
61
105
  )
62
106
  job = Job.from_dict(response)
63
107
 
@@ -79,3 +123,74 @@ class TranslationService(BaseService):
79
123
  """Async version of get_translation_job"""
80
124
  response = await self.client.request("GET", f"/api/v1/translation/translations/{job_id}")
81
125
  return TranslationResult.from_dict(response)
126
+
127
+ def list_translation_jobs(
128
+ self,
129
+ skip: int = 0,
130
+ limit: int = 50
131
+ ) -> list:
132
+ """
133
+ List translation jobs
134
+
135
+ Args:
136
+ skip: Number of jobs to skip (pagination offset)
137
+ limit: Maximum number of jobs to return (max 100)
138
+
139
+ Returns:
140
+ List of translation jobs
141
+ """
142
+ params = {
143
+ "skip": skip,
144
+ "limit": min(limit, 100) # API max is 100
145
+ }
146
+
147
+ if self.async_mode:
148
+ return self._async_list_translation_jobs(params)
149
+ else:
150
+ response = self.client.request("GET", "/api/v1/translation/translations", params=params)
151
+ return [TranslationResult.from_dict(job_data) for job_data in response]
152
+
153
+ async def _async_list_translation_jobs(self, params: dict) -> list:
154
+ """Async version of list_translation_jobs"""
155
+ response = await self.client.request("GET", "/api/v1/translation/translations", params=params)
156
+ return [TranslationResult.from_dict(job_data) for job_data in response]
157
+
158
+ def retry_translation(self, job_id: int) -> Job:
159
+ """
160
+ Retry a failed translation job
161
+
162
+ Args:
163
+ job_id: ID of the failed translation job to retry
164
+
165
+ Returns:
166
+ New job object for the retry attempt
167
+ """
168
+ if self.async_mode:
169
+ return self._async_retry_translation(job_id)
170
+ else:
171
+ response = self.client.request("POST", f"/api/v1/translation/translations/{job_id}/retry")
172
+ return Job.from_dict(response)
173
+
174
+ async def _async_retry_translation(self, job_id: int) -> Job:
175
+ """Async version of retry_translation"""
176
+ response = await self.client.request("POST", f"/api/v1/translation/translations/{job_id}/retry")
177
+ return Job.from_dict(response)
178
+
179
+ def delete_translation_job(self, job_id: int) -> dict:
180
+ """
181
+ Delete a translation job
182
+
183
+ Args:
184
+ job_id: ID of the translation job to delete
185
+
186
+ Returns:
187
+ Deletion confirmation
188
+ """
189
+ if self.async_mode:
190
+ return self._async_delete_translation_job(job_id)
191
+ else:
192
+ return self.client.request("DELETE", f"/api/v1/translation/translations/{job_id}")
193
+
194
+ async def _async_delete_translation_job(self, job_id: int) -> dict:
195
+ """Async version of delete_translation_job"""
196
+ return await self.client.request("DELETE", f"/api/v1/translation/translations/{job_id}")
@@ -2,6 +2,7 @@
2
2
  Voice Service - Voice cloning and TTS operations
3
3
  """
4
4
 
5
+ import time
5
6
  from typing import List, Optional, Dict, Any, Union
6
7
  from pathlib import Path
7
8
 
@@ -13,54 +14,103 @@ from ..exceptions import ValidationError
13
14
  class VoiceService(BaseService):
14
15
  """Service for voice cloning and text-to-speech operations"""
15
16
 
16
- def clone_voice(
17
+ def generate_voice(
17
18
  self,
18
- voice_file: str,
19
19
  text: str,
20
+ voice_file: Optional[str] = None,
21
+ voice_id: Optional[Union[int, str]] = None,
20
22
  language: Optional[str] = None,
21
23
  speed: float = 1.0,
24
+ audio_format: str = "mp3",
25
+ generation_params: Optional[Dict[str, Any]] = None,
22
26
  wait_for_completion: bool = False,
23
27
  timeout: int = 300
24
28
  ) -> Union[Job, Dict[str, Any]]:
25
29
  """
26
- Clone a voice from an audio file
30
+ Generate speech using either a voice file (for cloning) or existing voice profile
31
+
32
+ This unified method handles both voice cloning and text-to-speech generation:
33
+ - For voice cloning: Provide voice_file parameter
34
+ - For TTS with existing voice: Provide voice_id parameter
27
35
 
28
36
  Args:
29
- voice_file: Path to audio file containing voice to clone
30
- text: Text to generate with the cloned voice
37
+ text: Text to generate speech for
38
+ voice_file: Path to audio file for voice cloning (mutually exclusive with voice_id)
39
+ voice_id: ID/UUID of existing voice profile (mutually exclusive with voice_file)
31
40
  language: Target language code (e.g., 'en', 'es')
32
- speed: Speech speed (0.5 to 2.0)
41
+ speed: Speech speed (0.25 to 4.0, provider dependent)
42
+ audio_format: Output audio format ('mp3', 'wav', 'ogg')
43
+ generation_params: Provider-specific parameters (speed, temperature, etc.)
33
44
  wait_for_completion: Whether to wait for job completion
34
45
  timeout: Maximum time to wait if wait_for_completion=True
35
46
 
36
47
  Returns:
37
48
  Job object if wait_for_completion=False, otherwise job result
49
+
50
+ Raises:
51
+ ValidationError: If both or neither voice_file and voice_id are provided
38
52
  """
39
53
  # Validate inputs
54
+ if not voice_file and not voice_id:
55
+ raise ValidationError("Either voice_file (for cloning) or voice_id (for TTS) must be provided")
56
+ if voice_file and voice_id:
57
+ raise ValidationError("Provide either voice_file or voice_id, not both")
58
+
40
59
  text = self._validate_text_input(text)
41
60
  if language:
42
61
  language = self._validate_language_code(language)
43
- if not 0.5 <= speed <= 2.0:
44
- raise ValidationError("Speed must be between 0.5 and 2.0")
62
+ if not 0.25 <= speed <= 4.0:
63
+ raise ValidationError("Speed must be between 0.25 and 4.0")
64
+ if audio_format not in ["mp3", "wav", "ogg"]:
65
+ raise ValidationError("Audio format must be 'mp3', 'wav', or 'ogg'")
45
66
 
46
- # Prepare file upload
67
+ # For voice cloning, we need to create a temporary voice first, then generate
68
+ if voice_file:
69
+ return self._generate_with_voice_file(
70
+ voice_file, text, language, speed, audio_format,
71
+ generation_params, wait_for_completion, timeout
72
+ )
73
+ else:
74
+ # Use existing voice profile with unified endpoint
75
+ return self._generate_with_voice_id(
76
+ voice_id, text, language, speed, audio_format,
77
+ generation_params, wait_for_completion, timeout
78
+ )
79
+
80
+ def _generate_with_voice_file(
81
+ self,
82
+ voice_file: str,
83
+ text: str,
84
+ language: Optional[str],
85
+ speed: float,
86
+ audio_format: str,
87
+ generation_params: Optional[Dict[str, Any]],
88
+ wait_for_completion: bool,
89
+ timeout: int
90
+ ) -> Union[Job, Dict[str, Any]]:
91
+ """Generate speech with voice cloning using unified endpoint"""
92
+ # For voice cloning, we use the "clone" identifier with the unified endpoint
47
93
  files = self._prepare_file_upload(voice_file, "file")
48
94
 
49
- # Prepare form data
95
+ # Prepare form data for unified endpoint
50
96
  data = {
51
97
  "input_text": text,
52
- "speed": speed
98
+ "speed": speed,
99
+ "audio_format": audio_format
53
100
  }
54
101
  if language:
55
- data["target_language"] = language
102
+ data["language"] = language
103
+ if generation_params:
104
+ # Add generation parameters
105
+ data.update(generation_params)
56
106
 
57
- # Make request
58
107
  if self.async_mode:
59
- return self._async_clone_voice(files, data, wait_for_completion, timeout)
108
+ return self._async_generate_voice("clone", data, files, wait_for_completion, timeout)
60
109
  else:
110
+ # Use unified voice generation endpoint with "clone" identifier
61
111
  response = self.client.request(
62
112
  "POST",
63
- "/api/v1/voice/voice-clone",
113
+ "/api/v1/voice/voices/clone/generate",
64
114
  data=data,
65
115
  files=files
66
116
  )
@@ -73,17 +123,60 @@ class VoiceService(BaseService):
73
123
 
74
124
  return job
75
125
 
76
- async def _async_clone_voice(
126
+ def _generate_with_voice_id(
77
127
  self,
78
- files: Dict[str, Any],
128
+ voice_id: Union[int, str],
129
+ text: str,
130
+ language: Optional[str],
131
+ speed: float,
132
+ audio_format: str,
133
+ generation_params: Optional[Dict[str, Any]],
134
+ wait_for_completion: bool,
135
+ timeout: int
136
+ ) -> Union[Job, Dict[str, Any]]:
137
+ """Generate speech with existing voice profile using unified endpoint"""
138
+ # Prepare form data for unified endpoint
139
+ data = {
140
+ "input_text": text,
141
+ "speed": speed,
142
+ "audio_format": audio_format
143
+ }
144
+ if language:
145
+ data["language"] = language
146
+ if generation_params:
147
+ # Add generation parameters
148
+ data.update(generation_params)
149
+
150
+ if self.async_mode:
151
+ return self._async_generate_voice(voice_id, data, None, wait_for_completion, timeout)
152
+ else:
153
+ # Use unified voice generation endpoint
154
+ response = self.client.request(
155
+ "POST",
156
+ f"/api/v1/voice/voices/{voice_id}/generate",
157
+ data=data
158
+ )
159
+
160
+ job = Job.from_dict(response)
161
+
162
+ if wait_for_completion:
163
+ job = self._wait_for_completion(job.id, timeout)
164
+ return job.result if job.result else job
165
+
166
+ return job
167
+
168
+ async def _async_generate_voice(
169
+ self,
170
+ voice_identifier: Union[int, str],
79
171
  data: Dict[str, Any],
172
+ files: Optional[Dict[str, Any]],
80
173
  wait_for_completion: bool,
81
174
  timeout: int
82
175
  ) -> Union[Job, Dict[str, Any]]:
83
- """Async version of clone_voice"""
176
+ """Async version of unified voice generation"""
84
177
  response = await self.client.request(
85
- "POST",
86
- "/api/v1/voice/voice-clone",
178
+ "POST",
179
+ f"/api/v1/voice/voices/{voice_identifier}/generate",
87
180
  data=data,
88
181
  files=files
89
182
  )
@@ -93,7 +186,7 @@ class VoiceService(BaseService):
93
186
  if wait_for_completion:
94
187
  job = await self._async_wait_for_completion(job.id, timeout)
95
188
  return job.result if job.result else job
96
-
189
+
97
190
  return job
98
191
 
99
192
  def create_voice_profile(
@@ -202,78 +295,79 @@ class VoiceService(BaseService):
202
295
  language: Optional[str] = None,
203
296
  speed: float = 1.0,
204
297
  audio_format: str = "mp3",
298
+ generation_params: Optional[Dict[str, Any]] = None,
205
299
  wait_for_completion: bool = False,
206
300
  timeout: int = 300
207
301
  ) -> Union[Job, Dict[str, Any]]:
208
302
  """
209
- Generate speech using an existing voice profile
303
+ Generate speech using an existing voice profile (unified endpoint)
304
+
305
+ This method now uses the unified voice generation endpoint for consistency.
210
306
 
211
307
  Args:
212
308
  voice_id: ID or UUID of the voice profile
213
309
  text: Text to generate speech for
214
310
  language: Target language code
215
- speed: Speech speed (0.5 to 2.0)
216
- audio_format: Output audio format (mp3, wav)
311
+ speed: Speech speed (0.25 to 4.0, provider dependent)
312
+ audio_format: Output audio format ('mp3', 'wav', 'ogg')
313
+ generation_params: Additional generation parameters
217
314
  wait_for_completion: Whether to wait for completion
218
315
  timeout: Maximum time to wait
219
316
 
220
317
  Returns:
221
318
  Job object or generation result
222
319
  """
223
- # Validate inputs
224
- text = self._validate_text_input(text)
225
- if language:
226
- language = self._validate_language_code(language)
227
- if not 0.5 <= speed <= 2.0:
228
- raise ValidationError("Speed must be between 0.5 and 2.0")
229
- if audio_format not in ["mp3", "wav"]:
230
- raise ValidationError("Audio format must be 'mp3' or 'wav'")
231
-
232
- # Prepare form data
233
- data = {
234
- "input_text": text,
235
- "speed": speed,
236
- "audio_format": audio_format
237
- }
238
- if language:
239
- data["language"] = language
240
-
241
- # Make request
242
- endpoint = f"/api/v1/voice/voices/{voice_id}/generate"
243
-
244
- if self.async_mode:
245
- return self._async_generate_speech(endpoint, data, wait_for_completion, timeout)
246
- else:
247
- response = self.client.request("POST", endpoint, data=data)
248
-
249
- if "job_id" in response:
250
- job = Job.from_dict(response)
251
- if wait_for_completion:
252
- job = self._wait_for_completion(job.id, timeout)
253
- return job.result if job.result else job
254
- return job
255
- else:
256
- # Direct response with audio URL
257
- return response
258
-
259
- async def _async_generate_speech(
320
+ # Use unified voice generation method
321
+ return self.generate_voice(
322
+ text=text,
323
+ voice_id=voice_id,
324
+ language=language,
325
+ speed=speed,
326
+ audio_format=audio_format,
327
+ generation_params=generation_params,
328
+ wait_for_completion=wait_for_completion,
329
+ timeout=timeout
330
+ )
331
+
332
+ def clone_voice(
260
333
  self,
261
- endpoint: str,
262
- data: Dict[str, Any],
263
- wait_for_completion: bool,
264
- timeout: int
334
+ voice_file: str,
335
+ text: str,
336
+ language: Optional[str] = None,
337
+ speed: float = 1.0,
338
+ generation_params: Optional[Dict[str, Any]] = None,
339
+ wait_for_completion: bool = False,
340
+ timeout: int = 300
265
341
  ) -> Union[Job, Dict[str, Any]]:
266
- """Async version of generate_speech"""
267
- response = await self.client.request("POST", endpoint, data=data)
342
+ """
343
+ Clone a voice from an audio file (backward compatibility)
268
344
 
269
- if "job_id" in response:
270
- job = Job.from_dict(response)
271
- if wait_for_completion:
272
- job = await self._async_wait_for_completion(job.id, timeout)
273
- return job.result if job.result else job
274
- return job
275
- else:
276
- return response
345
+ This method is now a wrapper around the unified generate_voice method.
346
+ For new code, consider using generate_voice() directly with voice_file parameter.
347
+
348
+ Args:
349
+ voice_file: Path to audio file containing voice to clone
350
+ text: Text to generate with the cloned voice
351
+ language: Target language code (e.g., 'en', 'es')
352
+ speed: Speech speed (0.25 to 4.0, provider dependent)
353
+ generation_params: Additional generation parameters
354
+ wait_for_completion: Whether to wait for job completion
355
+ timeout: Maximum time to wait if wait_for_completion=True
356
+
357
+ Returns:
358
+ Job object if wait_for_completion=False, otherwise job result
359
+ """
360
+ # Use unified voice generation method
361
+ return self.generate_voice(
362
+ text=text,
363
+ voice_file=voice_file,
364
+ language=language,
365
+ speed=speed,
366
+ audio_format="mp3", # Default format for backward compatibility
367
+ generation_params=generation_params,
368
+ wait_for_completion=wait_for_completion,
369
+ timeout=timeout
370
+ )
277
371
 
278
372
  def list_voice_profiles(
279
373
  self,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: audiopod
3
- Version: 1.1.0
3
+ Version: 1.2.0
4
4
  Summary: Professional Audio Processing API Client for Python
5
5
  Home-page: https://github.com/audiopod-ai/audiopod-python
6
6
  Author: AudioPod AI
@@ -95,7 +95,7 @@ client = audiopod.Client(api_key="ap_your_api_key_here")
95
95
 
96
96
  ### Basic Usage
97
97
 
98
- #### Voice Cloning
98
+ #### Voice Generation (Unified TTS & Cloning)
99
99
 
100
100
  ```python
101
101
  import audiopod
@@ -103,15 +103,39 @@ import audiopod
103
103
  # Initialize client
104
104
  client = audiopod.Client()
105
105
 
106
- # Clone a voice and generate speech
107
- job = client.voice.clone_voice(
106
+ # Generate voice using file cloning (unified approach)
107
+ job = client.voice.generate_voice(
108
+ text="Hello! This is voice generation using a cloned voice.",
109
+ voice_file="path/to/voice_sample.wav", # For voice cloning
110
+ language="en",
111
+ audio_format="mp3",
112
+ generation_params={
113
+ "speed": 1.0
114
+ },
115
+ wait_for_completion=True
116
+ )
117
+
118
+ print(f"Generated audio URL: {job.output_url}")
119
+
120
+ # Generate speech with existing voice profile (unified approach)
121
+ speech = client.voice.generate_voice(
122
+ text="Hello from my voice profile!",
123
+ voice_id="voice-profile-id", # For existing voice profiles
124
+ language="en",
125
+ audio_format="mp3",
126
+ generation_params={
127
+ "speed": 1.0
128
+ },
129
+ wait_for_completion=True
130
+ )
131
+
132
+ # Backward compatibility methods (deprecated - use generate_voice instead)
133
+ legacy_clone = client.voice.clone_voice(
108
134
  voice_file="path/to/voice_sample.wav",
109
135
  text="Hello! This is a cloned voice speaking.",
110
136
  language="en",
111
137
  wait_for_completion=True
112
138
  )
113
-
114
- print(f"Generated audio URL: {job['output_url']}")
115
139
  ```
116
140
 
117
141
  #### Music Generation
@@ -142,17 +166,25 @@ print(f"Transcript: {transcript.transcript}")
142
166
  print(f"Detected {len(transcript.segments)} speakers")
143
167
  ```
144
168
 
145
- #### Audio Translation
169
+ #### Speech-to-Speech Translation
146
170
 
147
171
  ```python
148
- # Translate audio to another language
149
- translation = client.translation.translate_audio(
172
+ # Translate speech while preserving voice characteristics
173
+ translation = client.translation.translate_speech(
150
174
  audio_file="path/to/english_audio.wav",
151
175
  target_language="es", # Spanish
176
+ source_language="en", # English (optional - auto-detect)
152
177
  wait_for_completion=True
153
178
  )
154
179
 
155
- print(f"Translated audio URL: {translation.audio_output_url}")
180
+ print(f"Translated audio URL: {translation.translated_audio_url}")
181
+
182
+ # Or translate from URL
183
+ url_translation = client.translation.translate_speech(
184
+ url="https://example.com/audio.mp3",
185
+ target_language="fr", # French
186
+ wait_for_completion=True
187
+ )
156
188
  ```
157
189
 
158
190
  ### Async Support
@@ -189,10 +221,22 @@ voice_profile = client.voice.create_voice_profile(
189
221
  wait_for_completion=True
190
222
  )
191
223
 
192
- # Use the voice profile for speech generation
193
- speech = client.voice.generate_speech(
224
+ # Use the voice profile for speech generation (unified approach - recommended)
225
+ speech = client.voice.generate_voice(
226
+ text="This uses my custom voice profile with the unified method!",
194
227
  voice_id=voice_profile.id,
195
- text="This uses my custom voice profile!",
228
+ language="en",
229
+ audio_format="mp3",
230
+ generation_params={
231
+ "speed": 1.0
232
+ },
233
+ wait_for_completion=True
234
+ )
235
+
236
+ # Legacy method (still works - uses generate_voice internally)
237
+ legacy_speech = client.voice.generate_speech(
238
+ voice_id=voice_profile.id,
239
+ text="This uses the legacy method.",
196
240
  wait_for_completion=True
197
241
  )
198
242
  ```
@@ -335,7 +379,7 @@ client = audiopod.Client(
335
379
 
336
380
  ### Services
337
381
 
338
- - `client.voice`: Voice cloning and TTS operations
382
+ - `client.voice`: **Voice generation operations** (unified TTS & cloning using `generate_voice()`)
339
383
  - `client.music`: Music generation and editing
340
384
  - `client.transcription`: Speech-to-text transcription
341
385
  - `client.translation`: Audio/video translation
@@ -344,6 +388,20 @@ client = audiopod.Client(
344
388
  - `client.karaoke`: Karaoke video generation
345
389
  - `client.credits`: Credit management and usage tracking
346
390
 
391
+ #### Voice Service Methods
392
+
393
+ **Recommended (Unified Approach):**
394
+ - `client.voice.generate_voice()` - Generate speech with voice file (cloning) or voice ID (TTS)
395
+
396
+ **Legacy Methods (Backward Compatibility):**
397
+ - `client.voice.clone_voice()` - Clone voice from audio file (deprecated, uses `generate_voice` internally)
398
+ - `client.voice.generate_speech()` - Generate speech with voice profile (deprecated, uses `generate_voice` internally)
399
+
400
+ **Voice Management:**
401
+ - `client.voice.create_voice_profile()` - Create reusable voice profiles
402
+ - `client.voice.list_voice_profiles()` - List available voice profiles
403
+ - `client.voice.delete_voice_profile()` - Delete voice profiles
404
+
347
405
  ### Models
348
406
 
349
407
  - `Job`: Base job information and status
@@ -1,9 +1,9 @@
1
- audiopod/__init__.py,sha256=U12jbLmXps3-NP3yXcucGMQbr8b6VCoWAXZSitK1pp4,1790
1
+ audiopod/__init__.py,sha256=jQtsXMdmCZArJ1pOWG9huymdE_-WlyW8Rk8q0kcQn-c,1790
2
2
  audiopod/cli.py,sha256=ZYzAQ3UpoYuOEWivMwMneJUf2z8DGGYTx1Nb6yRfdVY,9339
3
3
  audiopod/client.py,sha256=67oPSInSNssJpTR00ZuYSdk9lbx5KiRnDQw8UYKNVsA,11742
4
4
  audiopod/config.py,sha256=fuGtbuES4tXdHwqQqoZa5izCH6nVfFRP06D8eK1Cg10,1683
5
5
  audiopod/exceptions.py,sha256=c3Ym2tWyRE1kemVkXDaXFcfP3h6AokhKcUcCBImwGes,2386
6
- audiopod/models.py,sha256=gAfQkufA_hZdYBkniFJ_EXETy9ts-wr-soUEbT7ZKFM,7827
6
+ audiopod/models.py,sha256=R70iMqKDZfLtTB9FQ7KrFLBi-bFA5-FrS-5eMOtfK1o,8517
7
7
  audiopod/py.typed,sha256=ixa8YukDZ3kLo0WsFJRGohLMyHzbMur1ALmmASML2cs,64
8
8
  audiopod/services/__init__.py,sha256=9Ycl9VVscwwY42joBCSL67v8DrITW2T2QyuaokbpehM,653
9
9
  audiopod/services/base.py,sha256=mNbziYy2KsWWZrdHFlTl9pKLvoQUW-ZkkJuVWfCVP74,6731
@@ -14,11 +14,11 @@ audiopod/services/music.py,sha256=hDPjTSj-gAeEWBVB7PRPQrptMHJyQTz8e9p-p7yaRPI,20
14
14
  audiopod/services/speaker.py,sha256=OPSOwArfrGXVzRgciS13n1QsCJSK1PB-Mz6VgwxuHAA,1866
15
15
  audiopod/services/stem_extraction.py,sha256=3ibMFKFR25xKHpVs3WGMNriZ88sB5PriFNa_s2Bvon4,6026
16
16
  audiopod/services/transcription.py,sha256=HyH6WpGWZsggYxIvt2dhB6_5UHaigk3XwXsVgarWzcE,7565
17
- audiopod/services/translation.py,sha256=Gpxmom-ZSLMXBwSQlOL5PyqxQCwRYNT2IUp2ZD5QhYc,3177
18
- audiopod/services/voice.py,sha256=_IHv3zU3k184kfijxr1QRBenrIpmhhPOBS96DddZ8yw,13456
19
- audiopod-1.1.0.dist-info/licenses/LICENSE,sha256=hqEjnOaGNbnLSBxbtbC7WQVREU2vQI8FmwecCiZlMfA,1068
20
- audiopod-1.1.0.dist-info/METADATA,sha256=ee2Zg4iwkglAgnk3xwNuYg11LFZbKJQeUcxgG8Z_Dw0,10931
21
- audiopod-1.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
22
- audiopod-1.1.0.dist-info/entry_points.txt,sha256=uLcNDzXuOXnJAz9j91TDGayVjjZ7-ZiHBGDydqNUErU,47
23
- audiopod-1.1.0.dist-info/top_level.txt,sha256=M6yyOFFNpLdH4i1AMRqJZLRIgfpg1NvrQVmnPd8A6N8,9
24
- audiopod-1.1.0.dist-info/RECORD,,
17
+ audiopod/services/translation.py,sha256=oUU82c61CeAt13lzlWx8S-9xEgYlskwX8bLMbQw2Ni8,7396
18
+ audiopod/services/voice.py,sha256=t0-4yjVrzWXJorfQCGbBSNRGE2wZfjoRQ76elJu1BvU,17748
19
+ audiopod-1.2.0.dist-info/licenses/LICENSE,sha256=hqEjnOaGNbnLSBxbtbC7WQVREU2vQI8FmwecCiZlMfA,1068
20
+ audiopod-1.2.0.dist-info/METADATA,sha256=mLpXkYWYxvd3k3LrDtH6Cr68C7Xk7JmpoBL4vwYPiOU,13048
21
+ audiopod-1.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
22
+ audiopod-1.2.0.dist-info/entry_points.txt,sha256=uLcNDzXuOXnJAz9j91TDGayVjjZ7-ZiHBGDydqNUErU,47
23
+ audiopod-1.2.0.dist-info/top_level.txt,sha256=M6yyOFFNpLdH4i1AMRqJZLRIgfpg1NvrQVmnPd8A6N8,9
24
+ audiopod-1.2.0.dist-info/RECORD,,