audiopod 1.1.1__tar.gz → 1.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {audiopod-1.1.1 → audiopod-1.2.0}/CHANGELOG.md +72 -0
  2. {audiopod-1.1.1 → audiopod-1.2.0}/PKG-INFO +60 -10
  3. {audiopod-1.1.1 → audiopod-1.2.0}/README.md +59 -9
  4. {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/__init__.py +1 -1
  5. {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/services/voice.py +169 -75
  6. {audiopod-1.1.1 → audiopod-1.2.0}/examples/basic_usage.py +78 -13
  7. {audiopod-1.1.1 → audiopod-1.2.0}/pyproject.toml +1 -1
  8. {audiopod-1.1.1 → audiopod-1.2.0}/setup.py +1 -1
  9. {audiopod-1.1.1 → audiopod-1.2.0}/LICENSE +0 -0
  10. {audiopod-1.1.1 → audiopod-1.2.0}/MANIFEST.in +0 -0
  11. {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/cli.py +0 -0
  12. {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/client.py +0 -0
  13. {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/config.py +0 -0
  14. {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/exceptions.py +0 -0
  15. {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/models.py +0 -0
  16. {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/py.typed +0 -0
  17. {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/services/__init__.py +0 -0
  18. {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/services/base.py +0 -0
  19. {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/services/credits.py +0 -0
  20. {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/services/denoiser.py +0 -0
  21. {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/services/karaoke.py +0 -0
  22. {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/services/music.py +0 -0
  23. {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/services/speaker.py +0 -0
  24. {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/services/stem_extraction.py +0 -0
  25. {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/services/transcription.py +0 -0
  26. {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/services/translation.py +0 -0
  27. {audiopod-1.1.1 → audiopod-1.2.0}/audiopod.egg-info/SOURCES.txt +0 -0
  28. {audiopod-1.1.1 → audiopod-1.2.0}/examples/README.md +0 -0
  29. {audiopod-1.1.1 → audiopod-1.2.0}/requirements.txt +0 -0
  30. {audiopod-1.1.1 → audiopod-1.2.0}/setup.cfg +0 -0
  31. {audiopod-1.1.1 → audiopod-1.2.0}/tests/test_end_to_end_integration.py +0 -0
  32. {audiopod-1.1.1 → audiopod-1.2.0}/tests/test_sdk_api_compatibility.py +0 -0
@@ -5,6 +5,78 @@ All notable changes to the AudioPod Python SDK will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [1.2.0] - 2024-12-15
9
+
10
+ ### 🎤 Unified Voice Generation
11
+
12
+ This release introduces a major architectural improvement with unified voice generation endpoint, consolidating voice cloning and text-to-speech into a single, consistent API.
13
+
14
+ ### ✨ Added
15
+
16
+ - **Unified Voice Generation Method**: New `generate_voice()` method handles both voice cloning and TTS
17
+ - Single endpoint `/api/v1/voice/voices/{voice_identifier}/generate` for all voice operations
18
+ - Supports both voice file upload (cloning) and voice ID (existing profiles)
19
+ - Enhanced parameter support: `audio_format`, `generation_params`, extended speed range (0.25-4.0)
20
+
21
+ - **Enhanced Voice Generation Parameters**:
22
+ - `audio_format`: Support for 'mp3', 'wav', 'ogg' output formats
23
+ - `generation_params`: Provider-specific parameters (speed, temperature, pitch, etc.)
24
+ - Extended speed range: 0.25x to 4.0x (provider dependent)
25
+
26
+ ### 🔧 Fixed
27
+
28
+ - **Removed Legacy Clone Endpoint**: No longer uses deprecated `/api/v1/voice/voice-clone`
29
+ - **Unified API Architecture**: All voice generation now uses consistent endpoint structure
30
+ - **Improved Error Handling**: Better validation for mutually exclusive parameters
31
+
32
+ ### 🏗️ Improved
33
+
34
+ - **Backward Compatibility**: Existing `clone_voice()` and `generate_speech()` methods continue to work
35
+ - Legacy methods now internally use the unified `generate_voice()` approach
36
+ - No breaking changes for existing code
37
+ - Clear migration path with deprecation warnings in documentation
38
+
39
+ - **Enhanced Documentation**:
40
+ - Updated examples to showcase unified approach
41
+ - Clear distinction between recommended and legacy methods
42
+ - Comprehensive migration guide
43
+
44
+ ### 🚀 Usage Examples
45
+
46
+ #### New Unified Approach (Recommended)
47
+ ```python
48
+ # Voice cloning
49
+ result = client.voice.generate_voice(
50
+ text="Hello world!",
51
+ voice_file="voice.wav", # For cloning
52
+ language="en",
53
+ audio_format="mp3"
54
+ )
55
+
56
+ # TTS with existing voice
57
+ result = client.voice.generate_voice(
58
+ text="Hello world!",
59
+ voice_id="profile-id", # For existing voices
60
+ language="en",
61
+ audio_format="mp3"
62
+ )
63
+ ```
64
+
65
+ #### Backward Compatibility (Legacy methods still work)
66
+ ```python
67
+ # These continue to work unchanged
68
+ result = client.voice.clone_voice(voice_file="voice.wav", text="Hello")
69
+ result = client.voice.generate_speech(voice_id="profile-id", text="Hello")
70
+ ```
71
+
72
+ ### 🔄 Migration Notes
73
+
74
+ - **No Breaking Changes**: All existing code continues to work without modification
75
+ - **Recommended**: Migrate to `generate_voice()` for new development
76
+ - **Performance**: Unified endpoint provides better consistency and reliability
77
+
78
+ ---
79
+
8
80
  ## [1.1.1] - 2024-12-15
9
81
 
10
82
  ### 🔧 Translation Service Fixes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: audiopod
3
- Version: 1.1.1
3
+ Version: 1.2.0
4
4
  Summary: Professional Audio Processing API Client for Python
5
5
  Home-page: https://github.com/audiopod-ai/audiopod-python
6
6
  Author: AudioPod AI
@@ -95,7 +95,7 @@ client = audiopod.Client(api_key="ap_your_api_key_here")
95
95
 
96
96
  ### Basic Usage
97
97
 
98
- #### Voice Cloning
98
+ #### Voice Generation (Unified TTS & Cloning)
99
99
 
100
100
  ```python
101
101
  import audiopod
@@ -103,15 +103,39 @@ import audiopod
103
103
  # Initialize client
104
104
  client = audiopod.Client()
105
105
 
106
- # Clone a voice and generate speech
107
- job = client.voice.clone_voice(
106
+ # Generate voice using file cloning (unified approach)
107
+ job = client.voice.generate_voice(
108
+ text="Hello! This is voice generation using a cloned voice.",
109
+ voice_file="path/to/voice_sample.wav", # For voice cloning
110
+ language="en",
111
+ audio_format="mp3",
112
+ generation_params={
113
+ "speed": 1.0
114
+ },
115
+ wait_for_completion=True
116
+ )
117
+
118
+ print(f"Generated audio URL: {job.output_url}")
119
+
120
+ # Generate speech with existing voice profile (unified approach)
121
+ speech = client.voice.generate_voice(
122
+ text="Hello from my voice profile!",
123
+ voice_id="voice-profile-id", # For existing voice profiles
124
+ language="en",
125
+ audio_format="mp3",
126
+ generation_params={
127
+ "speed": 1.0
128
+ },
129
+ wait_for_completion=True
130
+ )
131
+
132
+ # Backward compatibility methods (deprecated - use generate_voice instead)
133
+ legacy_clone = client.voice.clone_voice(
108
134
  voice_file="path/to/voice_sample.wav",
109
135
  text="Hello! This is a cloned voice speaking.",
110
136
  language="en",
111
137
  wait_for_completion=True
112
138
  )
113
-
114
- print(f"Generated audio URL: {job['output_url']}")
115
139
  ```
116
140
 
117
141
  #### Music Generation
@@ -197,10 +221,22 @@ voice_profile = client.voice.create_voice_profile(
197
221
  wait_for_completion=True
198
222
  )
199
223
 
200
- # Use the voice profile for speech generation
201
- speech = client.voice.generate_speech(
224
+ # Use the voice profile for speech generation (unified approach - recommended)
225
+ speech = client.voice.generate_voice(
226
+ text="This uses my custom voice profile with the unified method!",
202
227
  voice_id=voice_profile.id,
203
- text="This uses my custom voice profile!",
228
+ language="en",
229
+ audio_format="mp3",
230
+ generation_params={
231
+ "speed": 1.0
232
+ },
233
+ wait_for_completion=True
234
+ )
235
+
236
+ # Legacy method (still works - uses generate_voice internally)
237
+ legacy_speech = client.voice.generate_speech(
238
+ voice_id=voice_profile.id,
239
+ text="This uses the legacy method.",
204
240
  wait_for_completion=True
205
241
  )
206
242
  ```
@@ -343,7 +379,7 @@ client = audiopod.Client(
343
379
 
344
380
  ### Services
345
381
 
346
- - `client.voice`: Voice cloning and TTS operations
382
+ - `client.voice`: **Voice generation operations** (unified TTS & cloning using `generate_voice()`)
347
383
  - `client.music`: Music generation and editing
348
384
  - `client.transcription`: Speech-to-text transcription
349
385
  - `client.translation`: Audio/video translation
@@ -352,6 +388,20 @@ client = audiopod.Client(
352
388
  - `client.karaoke`: Karaoke video generation
353
389
  - `client.credits`: Credit management and usage tracking
354
390
 
391
+ #### Voice Service Methods
392
+
393
+ **Recommended (Unified Approach):**
394
+ - `client.voice.generate_voice()` - Generate speech with voice file (cloning) or voice ID (TTS)
395
+
396
+ **Legacy Methods (Backward Compatibility):**
397
+ - `client.voice.clone_voice()` - Clone voice from audio file (deprecated, uses `generate_voice` internally)
398
+ - `client.voice.generate_speech()` - Generate speech with voice profile (deprecated, uses `generate_voice` internally)
399
+
400
+ **Voice Management:**
401
+ - `client.voice.create_voice_profile()` - Create reusable voice profiles
402
+ - `client.voice.list_voice_profiles()` - List available voice profiles
403
+ - `client.voice.delete_voice_profile()` - Delete voice profiles
404
+
355
405
  ### Models
356
406
 
357
407
  - `Job`: Base job information and status
@@ -43,7 +43,7 @@ client = audiopod.Client(api_key="ap_your_api_key_here")
43
43
 
44
44
  ### Basic Usage
45
45
 
46
- #### Voice Cloning
46
+ #### Voice Generation (Unified TTS & Cloning)
47
47
 
48
48
  ```python
49
49
  import audiopod
@@ -51,15 +51,39 @@ import audiopod
51
51
  # Initialize client
52
52
  client = audiopod.Client()
53
53
 
54
- # Clone a voice and generate speech
55
- job = client.voice.clone_voice(
54
+ # Generate voice using file cloning (unified approach)
55
+ job = client.voice.generate_voice(
56
+ text="Hello! This is voice generation using a cloned voice.",
57
+ voice_file="path/to/voice_sample.wav", # For voice cloning
58
+ language="en",
59
+ audio_format="mp3",
60
+ generation_params={
61
+ "speed": 1.0
62
+ },
63
+ wait_for_completion=True
64
+ )
65
+
66
+ print(f"Generated audio URL: {job.output_url}")
67
+
68
+ # Generate speech with existing voice profile (unified approach)
69
+ speech = client.voice.generate_voice(
70
+ text="Hello from my voice profile!",
71
+ voice_id="voice-profile-id", # For existing voice profiles
72
+ language="en",
73
+ audio_format="mp3",
74
+ generation_params={
75
+ "speed": 1.0
76
+ },
77
+ wait_for_completion=True
78
+ )
79
+
80
+ # Backward compatibility methods (deprecated - use generate_voice instead)
81
+ legacy_clone = client.voice.clone_voice(
56
82
  voice_file="path/to/voice_sample.wav",
57
83
  text="Hello! This is a cloned voice speaking.",
58
84
  language="en",
59
85
  wait_for_completion=True
60
86
  )
61
-
62
- print(f"Generated audio URL: {job['output_url']}")
63
87
  ```
64
88
 
65
89
  #### Music Generation
@@ -145,10 +169,22 @@ voice_profile = client.voice.create_voice_profile(
145
169
  wait_for_completion=True
146
170
  )
147
171
 
148
- # Use the voice profile for speech generation
149
- speech = client.voice.generate_speech(
172
+ # Use the voice profile for speech generation (unified approach - recommended)
173
+ speech = client.voice.generate_voice(
174
+ text="This uses my custom voice profile with the unified method!",
150
175
  voice_id=voice_profile.id,
151
- text="This uses my custom voice profile!",
176
+ language="en",
177
+ audio_format="mp3",
178
+ generation_params={
179
+ "speed": 1.0
180
+ },
181
+ wait_for_completion=True
182
+ )
183
+
184
+ # Legacy method (still works - uses generate_voice internally)
185
+ legacy_speech = client.voice.generate_speech(
186
+ voice_id=voice_profile.id,
187
+ text="This uses the legacy method.",
152
188
  wait_for_completion=True
153
189
  )
154
190
  ```
@@ -291,7 +327,7 @@ client = audiopod.Client(
291
327
 
292
328
  ### Services
293
329
 
294
- - `client.voice`: Voice cloning and TTS operations
330
+ - `client.voice`: **Voice generation operations** (unified TTS & cloning using `generate_voice()`)
295
331
  - `client.music`: Music generation and editing
296
332
  - `client.transcription`: Speech-to-text transcription
297
333
  - `client.translation`: Audio/video translation
@@ -300,6 +336,20 @@ client = audiopod.Client(
300
336
  - `client.karaoke`: Karaoke video generation
301
337
  - `client.credits`: Credit management and usage tracking
302
338
 
339
+ #### Voice Service Methods
340
+
341
+ **Recommended (Unified Approach):**
342
+ - `client.voice.generate_voice()` - Generate speech with voice file (cloning) or voice ID (TTS)
343
+
344
+ **Legacy Methods (Backward Compatibility):**
345
+ - `client.voice.clone_voice()` - Clone voice from audio file (deprecated, uses `generate_voice` internally)
346
+ - `client.voice.generate_speech()` - Generate speech with voice profile (deprecated, uses `generate_voice` internally)
347
+
348
+ **Voice Management:**
349
+ - `client.voice.create_voice_profile()` - Create reusable voice profiles
350
+ - `client.voice.list_voice_profiles()` - List available voice profiles
351
+ - `client.voice.delete_voice_profile()` - Delete voice profiles
352
+
303
353
  ### Models
304
354
 
305
355
  - `Job`: Base job information and status
@@ -47,7 +47,7 @@ from .models import (
47
47
  TranslationResult
48
48
  )
49
49
 
50
- __version__ = "1.1.1"
50
+ __version__ = "1.2.0"
51
51
  __author__ = "AudioPod AI"
52
52
  __email__ = "support@audiopod.ai"
53
53
  __license__ = "MIT"
@@ -2,6 +2,7 @@
2
2
  Voice Service - Voice cloning and TTS operations
3
3
  """
4
4
 
5
+ import time
5
6
  from typing import List, Optional, Dict, Any, Union
6
7
  from pathlib import Path
7
8
 
@@ -13,54 +14,103 @@ from ..exceptions import ValidationError
13
14
  class VoiceService(BaseService):
14
15
  """Service for voice cloning and text-to-speech operations"""
15
16
 
16
- def clone_voice(
17
+ def generate_voice(
17
18
  self,
18
- voice_file: str,
19
19
  text: str,
20
+ voice_file: Optional[str] = None,
21
+ voice_id: Optional[Union[int, str]] = None,
20
22
  language: Optional[str] = None,
21
23
  speed: float = 1.0,
24
+ audio_format: str = "mp3",
25
+ generation_params: Optional[Dict[str, Any]] = None,
22
26
  wait_for_completion: bool = False,
23
27
  timeout: int = 300
24
28
  ) -> Union[Job, Dict[str, Any]]:
25
29
  """
26
- Clone a voice from an audio file
30
+ Generate speech using either a voice file (for cloning) or existing voice profile
31
+
32
+ This unified method handles both voice cloning and text-to-speech generation:
33
+ - For voice cloning: Provide voice_file parameter
34
+ - For TTS with existing voice: Provide voice_id parameter
27
35
 
28
36
  Args:
29
- voice_file: Path to audio file containing voice to clone
30
- text: Text to generate with the cloned voice
37
+ text: Text to generate speech for
38
+ voice_file: Path to audio file for voice cloning (mutually exclusive with voice_id)
39
+ voice_id: ID/UUID of existing voice profile (mutually exclusive with voice_file)
31
40
  language: Target language code (e.g., 'en', 'es')
32
- speed: Speech speed (0.5 to 2.0)
41
+ speed: Speech speed (0.25 to 4.0, provider dependent)
42
+ audio_format: Output audio format ('mp3', 'wav', 'ogg')
43
+ generation_params: Provider-specific parameters (speed, temperature, etc.)
33
44
  wait_for_completion: Whether to wait for job completion
34
45
  timeout: Maximum time to wait if wait_for_completion=True
35
46
 
36
47
  Returns:
37
48
  Job object if wait_for_completion=False, otherwise job result
49
+
50
+ Raises:
51
+ ValidationError: If both or neither voice_file and voice_id are provided
38
52
  """
39
53
  # Validate inputs
54
+ if not voice_file and not voice_id:
55
+ raise ValidationError("Either voice_file (for cloning) or voice_id (for TTS) must be provided")
56
+ if voice_file and voice_id:
57
+ raise ValidationError("Provide either voice_file or voice_id, not both")
58
+
40
59
  text = self._validate_text_input(text)
41
60
  if language:
42
61
  language = self._validate_language_code(language)
43
- if not 0.5 <= speed <= 2.0:
44
- raise ValidationError("Speed must be between 0.5 and 2.0")
62
+ if not 0.25 <= speed <= 4.0:
63
+ raise ValidationError("Speed must be between 0.25 and 4.0")
64
+ if audio_format not in ["mp3", "wav", "ogg"]:
65
+ raise ValidationError("Audio format must be 'mp3', 'wav', or 'ogg'")
45
66
 
46
- # Prepare file upload
67
+ # For voice cloning, we need to create a temporary voice first, then generate
68
+ if voice_file:
69
+ return self._generate_with_voice_file(
70
+ voice_file, text, language, speed, audio_format,
71
+ generation_params, wait_for_completion, timeout
72
+ )
73
+ else:
74
+ # Use existing voice profile with unified endpoint
75
+ return self._generate_with_voice_id(
76
+ voice_id, text, language, speed, audio_format,
77
+ generation_params, wait_for_completion, timeout
78
+ )
79
+
80
+ def _generate_with_voice_file(
81
+ self,
82
+ voice_file: str,
83
+ text: str,
84
+ language: Optional[str],
85
+ speed: float,
86
+ audio_format: str,
87
+ generation_params: Optional[Dict[str, Any]],
88
+ wait_for_completion: bool,
89
+ timeout: int
90
+ ) -> Union[Job, Dict[str, Any]]:
91
+ """Generate speech with voice cloning using unified endpoint"""
92
+ # For voice cloning, we use the "clone" identifier with the unified endpoint
47
93
  files = self._prepare_file_upload(voice_file, "file")
48
94
 
49
- # Prepare form data
95
+ # Prepare form data for unified endpoint
50
96
  data = {
51
97
  "input_text": text,
52
- "speed": speed
98
+ "speed": speed,
99
+ "audio_format": audio_format
53
100
  }
54
101
  if language:
55
- data["target_language"] = language
102
+ data["language"] = language
103
+ if generation_params:
104
+ # Add generation parameters
105
+ data.update(generation_params)
56
106
 
57
- # Make request
58
107
  if self.async_mode:
59
- return self._async_clone_voice(files, data, wait_for_completion, timeout)
108
+ return self._async_generate_voice("clone", data, files, wait_for_completion, timeout)
60
109
  else:
110
+ # Use unified voice generation endpoint with "clone" identifier
61
111
  response = self.client.request(
62
112
  "POST",
63
- "/api/v1/voice/voice-clone",
113
+ "/api/v1/voice/voices/clone/generate",
64
114
  data=data,
65
115
  files=files
66
116
  )
@@ -73,17 +123,60 @@ class VoiceService(BaseService):
73
123
 
74
124
  return job
75
125
 
76
- async def _async_clone_voice(
126
+ def _generate_with_voice_id(
77
127
  self,
78
- files: Dict[str, Any],
128
+ voice_id: Union[int, str],
129
+ text: str,
130
+ language: Optional[str],
131
+ speed: float,
132
+ audio_format: str,
133
+ generation_params: Optional[Dict[str, Any]],
134
+ wait_for_completion: bool,
135
+ timeout: int
136
+ ) -> Union[Job, Dict[str, Any]]:
137
+ """Generate speech with existing voice profile using unified endpoint"""
138
+ # Prepare form data for unified endpoint
139
+ data = {
140
+ "input_text": text,
141
+ "speed": speed,
142
+ "audio_format": audio_format
143
+ }
144
+ if language:
145
+ data["language"] = language
146
+ if generation_params:
147
+ # Add generation parameters
148
+ data.update(generation_params)
149
+
150
+ if self.async_mode:
151
+ return self._async_generate_voice(voice_id, data, None, wait_for_completion, timeout)
152
+ else:
153
+ # Use unified voice generation endpoint
154
+ response = self.client.request(
155
+ "POST",
156
+ f"/api/v1/voice/voices/{voice_id}/generate",
157
+ data=data
158
+ )
159
+
160
+ job = Job.from_dict(response)
161
+
162
+ if wait_for_completion:
163
+ job = self._wait_for_completion(job.id, timeout)
164
+ return job.result if job.result else job
165
+
166
+ return job
167
+
168
+ async def _async_generate_voice(
169
+ self,
170
+ voice_identifier: Union[int, str],
79
171
  data: Dict[str, Any],
172
+ files: Optional[Dict[str, Any]],
80
173
  wait_for_completion: bool,
81
174
  timeout: int
82
175
  ) -> Union[Job, Dict[str, Any]]:
83
- """Async version of clone_voice"""
176
+ """Async version of unified voice generation"""
84
177
  response = await self.client.request(
85
- "POST",
86
- "/api/v1/voice/voice-clone",
178
+ "POST",
179
+ f"/api/v1/voice/voices/{voice_identifier}/generate",
87
180
  data=data,
88
181
  files=files
89
182
  )
@@ -93,7 +186,7 @@ class VoiceService(BaseService):
93
186
  if wait_for_completion:
94
187
  job = await self._async_wait_for_completion(job.id, timeout)
95
188
  return job.result if job.result else job
96
-
189
+
97
190
  return job
98
191
 
99
192
  def create_voice_profile(
@@ -202,78 +295,79 @@ class VoiceService(BaseService):
202
295
  language: Optional[str] = None,
203
296
  speed: float = 1.0,
204
297
  audio_format: str = "mp3",
298
+ generation_params: Optional[Dict[str, Any]] = None,
205
299
  wait_for_completion: bool = False,
206
300
  timeout: int = 300
207
301
  ) -> Union[Job, Dict[str, Any]]:
208
302
  """
209
- Generate speech using an existing voice profile
303
+ Generate speech using an existing voice profile (unified endpoint)
304
+
305
+ This method now uses the unified voice generation endpoint for consistency.
210
306
 
211
307
  Args:
212
308
  voice_id: ID or UUID of the voice profile
213
309
  text: Text to generate speech for
214
310
  language: Target language code
215
- speed: Speech speed (0.5 to 2.0)
216
- audio_format: Output audio format (mp3, wav)
311
+ speed: Speech speed (0.25 to 4.0, provider dependent)
312
+ audio_format: Output audio format ('mp3', 'wav', 'ogg')
313
+ generation_params: Additional generation parameters
217
314
  wait_for_completion: Whether to wait for completion
218
315
  timeout: Maximum time to wait
219
316
 
220
317
  Returns:
221
318
  Job object or generation result
222
319
  """
223
- # Validate inputs
224
- text = self._validate_text_input(text)
225
- if language:
226
- language = self._validate_language_code(language)
227
- if not 0.5 <= speed <= 2.0:
228
- raise ValidationError("Speed must be between 0.5 and 2.0")
229
- if audio_format not in ["mp3", "wav"]:
230
- raise ValidationError("Audio format must be 'mp3' or 'wav'")
231
-
232
- # Prepare form data
233
- data = {
234
- "input_text": text,
235
- "speed": speed,
236
- "audio_format": audio_format
237
- }
238
- if language:
239
- data["language"] = language
240
-
241
- # Make request
242
- endpoint = f"/api/v1/voice/voices/{voice_id}/generate"
243
-
244
- if self.async_mode:
245
- return self._async_generate_speech(endpoint, data, wait_for_completion, timeout)
246
- else:
247
- response = self.client.request("POST", endpoint, data=data)
248
-
249
- if "job_id" in response:
250
- job = Job.from_dict(response)
251
- if wait_for_completion:
252
- job = self._wait_for_completion(job.id, timeout)
253
- return job.result if job.result else job
254
- return job
255
- else:
256
- # Direct response with audio URL
257
- return response
258
-
259
- async def _async_generate_speech(
320
+ # Use unified voice generation method
321
+ return self.generate_voice(
322
+ text=text,
323
+ voice_id=voice_id,
324
+ language=language,
325
+ speed=speed,
326
+ audio_format=audio_format,
327
+ generation_params=generation_params,
328
+ wait_for_completion=wait_for_completion,
329
+ timeout=timeout
330
+ )
331
+
332
+ def clone_voice(
260
333
  self,
261
- endpoint: str,
262
- data: Dict[str, Any],
263
- wait_for_completion: bool,
264
- timeout: int
334
+ voice_file: str,
335
+ text: str,
336
+ language: Optional[str] = None,
337
+ speed: float = 1.0,
338
+ generation_params: Optional[Dict[str, Any]] = None,
339
+ wait_for_completion: bool = False,
340
+ timeout: int = 300
265
341
  ) -> Union[Job, Dict[str, Any]]:
266
- """Async version of generate_speech"""
267
- response = await self.client.request("POST", endpoint, data=data)
342
+ """
343
+ Clone a voice from an audio file (backward compatibility)
268
344
 
269
- if "job_id" in response:
270
- job = Job.from_dict(response)
271
- if wait_for_completion:
272
- job = await self._async_wait_for_completion(job.id, timeout)
273
- return job.result if job.result else job
274
- return job
275
- else:
276
- return response
345
+ This method is now a wrapper around the unified generate_voice method.
346
+ For new code, consider using generate_voice() directly with voice_file parameter.
347
+
348
+ Args:
349
+ voice_file: Path to audio file containing voice to clone
350
+ text: Text to generate with the cloned voice
351
+ language: Target language code (e.g., 'en', 'es')
352
+ speed: Speech speed (0.25 to 4.0, provider dependent)
353
+ generation_params: Additional generation parameters
354
+ wait_for_completion: Whether to wait for job completion
355
+ timeout: Maximum time to wait if wait_for_completion=True
356
+
357
+ Returns:
358
+ Job object if wait_for_completion=False, otherwise job result
359
+ """
360
+ # Use unified voice generation method
361
+ return self.generate_voice(
362
+ text=text,
363
+ voice_file=voice_file,
364
+ language=language,
365
+ speed=speed,
366
+ audio_format="mp3", # Default format for backward compatibility
367
+ generation_params=generation_params,
368
+ wait_for_completion=wait_for_completion,
369
+ timeout=timeout
370
+ )
277
371
 
278
372
  def list_voice_profiles(
279
373
  self,
@@ -61,9 +61,50 @@ def check_credits(client):
61
61
  return False
62
62
 
63
63
 
64
+ def voice_generation_example(client):
65
+ """Demonstrate unified voice generation functionality"""
66
+ print("\n🎤 Voice Generation Example (Unified Approach)")
67
+ print("=" * 50)
68
+
69
+ # For this example, you'll need a voice sample file
70
+ # Replace with path to your audio file
71
+ voice_file = "examples/voice_sample.wav"
72
+
73
+ if not Path(voice_file).exists():
74
+ print(f"⚠️ Voice sample file not found: {voice_file}")
75
+ print(" Please provide a voice sample (wav, mp3, etc.) to test voice generation")
76
+ return
77
+
78
+ try:
79
+ print(f"🔄 Generating voice using: {voice_file}")
80
+
81
+ # Generate voice using unified method (for voice cloning)
82
+ job = client.voice.generate_voice(
83
+ text="Hello! This is an example of voice generation using the AudioPod API.",
84
+ voice_file=voice_file, # For voice cloning
85
+ language="en",
86
+ audio_format="mp3",
87
+ generation_params={
88
+ "speed": 1.0
89
+ },
90
+ wait_for_completion=True,
91
+ timeout=300
92
+ )
93
+
94
+ print("✅ Voice generation completed!")
95
+ if hasattr(job, 'output_url') and job.output_url:
96
+ print(f"🎵 Generated audio: {job.output_url}")
97
+ elif isinstance(job, dict) and 'output_url' in job:
98
+ print(f"🎵 Generated audio: {job['output_url']}")
99
+
100
+ except ProcessingError as e:
101
+ print(f"❌ Voice generation failed: {e.message}")
102
+ except AudioPodError as e:
103
+ print(f"❌ API Error: {e.message}")
104
+
64
105
  def voice_cloning_example(client):
65
- """Demonstrate voice cloning functionality"""
66
- print("\n🎤 Voice Cloning Example")
106
+ """Demonstrate voice cloning functionality (backward compatibility)"""
107
+ print("\n🔄 Voice Cloning Example (Legacy - uses generate_voice internally)")
67
108
  print("=" * 50)
68
109
 
69
110
  # For this example, you'll need a voice sample file
@@ -76,12 +117,12 @@ def voice_cloning_example(client):
76
117
  return
77
118
 
78
119
  try:
79
- print(f"🔄 Cloning voice from: {voice_file}")
120
+ print(f"🔄 Cloning voice from: {voice_file} (legacy method)")
80
121
 
81
- # Clone voice with sample text
122
+ # Clone voice with sample text (legacy method - now uses generate_voice internally)
82
123
  job = client.voice.clone_voice(
83
124
  voice_file=voice_file,
84
- text="Hello! This is an example of voice cloning using the AudioPod API.",
125
+ text="Hello! This is an example using the legacy clone_voice method.",
85
126
  language="en",
86
127
  speed=1.0,
87
128
  wait_for_completion=True,
@@ -89,7 +130,9 @@ def voice_cloning_example(client):
89
130
  )
90
131
 
91
132
  print("✅ Voice cloning completed!")
92
- if 'output_url' in job:
133
+ if hasattr(job, 'output_url') and job.output_url:
134
+ print(f"🎵 Generated audio: {job.output_url}")
135
+ elif isinstance(job, dict) and 'output_url' in job:
93
136
  print(f"🎵 Generated audio: {job['output_url']}")
94
137
 
95
138
  except ProcessingError as e:
@@ -122,19 +165,40 @@ def voice_profile_example(client):
122
165
 
123
166
  print(f"✅ Voice profile created: {voice_profile.name} (ID: {voice_profile.id})")
124
167
 
125
- # Use the voice profile for speech generation
126
- print("🔄 Generating speech with voice profile...")
127
- speech = client.voice.generate_speech(
128
- voice_id=voice_profile.id,
129
- text="This speech was generated using my custom voice profile!",
168
+ # Use the voice profile for speech generation (unified method - recommended)
169
+ print("🔄 Generating speech with voice profile using unified method...")
170
+ speech = client.voice.generate_voice(
171
+ text="This speech was generated using my custom voice profile with the unified method!",
172
+ voice_id=voice_profile.id, # For existing voice profile
130
173
  language="en",
174
+ audio_format="mp3",
175
+ generation_params={
176
+ "speed": 1.0
177
+ },
131
178
  wait_for_completion=True
132
179
  )
133
180
 
134
181
  print("✅ Speech generation completed!")
135
- if 'output_url' in speech:
182
+ if hasattr(speech, 'output_url') and speech.output_url:
183
+ print(f"🎵 Generated speech: {speech.output_url}")
184
+ elif isinstance(speech, dict) and 'output_url' in speech:
136
185
  print(f"🎵 Generated speech: {speech['output_url']}")
137
186
 
187
+ # Also demonstrate legacy method for comparison
188
+ print("🔄 Generating speech with voice profile using legacy method...")
189
+ legacy_speech = client.voice.generate_speech(
190
+ voice_id=voice_profile.id,
191
+ text="This speech was generated using the legacy generate_speech method.",
192
+ language="en",
193
+ wait_for_completion=True
194
+ )
195
+
196
+ print("✅ Legacy speech generation completed!")
197
+ if hasattr(legacy_speech, 'output_url') and legacy_speech.output_url:
198
+ print(f"🎵 Generated speech (legacy): {legacy_speech.output_url}")
199
+ elif isinstance(legacy_speech, dict) and 'output_url' in legacy_speech:
200
+ print(f"🎵 Generated speech (legacy): {legacy_speech['output_url']}")
201
+
138
202
  # List all voice profiles
139
203
  print("\n📋 Your voice profiles:")
140
204
  voices = client.voice.list_voice_profiles(limit=10)
@@ -443,7 +507,8 @@ def main():
443
507
 
444
508
  # Run examples
445
509
  try:
446
- voice_cloning_example(client)
510
+ voice_generation_example(client) # New unified method (recommended)
511
+ voice_cloning_example(client) # Legacy method for backward compatibility
447
512
  voice_profile_example(client)
448
513
  music_generation_example(client)
449
514
  transcription_example(client)
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "audiopod"
7
- version = "1.1.1"
7
+ version = "1.2.0"
8
8
  authors = [
9
9
  {name = "AudioPod AI", email = "support@audiopod.ai"},
10
10
  ]
@@ -31,7 +31,7 @@ def read_requirements():
31
31
 
32
32
  setup(
33
33
  name="audiopod",
34
- version="1.1.0",
34
+ version="1.2.0",
35
35
  author="AudioPod AI",
36
36
  author_email="support@audiopod.ai",
37
37
  description="Professional Audio Processing API Client for Python",
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes