audiopod 1.1.1__tar.gz → 1.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {audiopod-1.1.1 → audiopod-1.2.0}/CHANGELOG.md +72 -0
- {audiopod-1.1.1 → audiopod-1.2.0}/PKG-INFO +60 -10
- {audiopod-1.1.1 → audiopod-1.2.0}/README.md +59 -9
- {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/__init__.py +1 -1
- {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/services/voice.py +169 -75
- {audiopod-1.1.1 → audiopod-1.2.0}/examples/basic_usage.py +78 -13
- {audiopod-1.1.1 → audiopod-1.2.0}/pyproject.toml +1 -1
- {audiopod-1.1.1 → audiopod-1.2.0}/setup.py +1 -1
- {audiopod-1.1.1 → audiopod-1.2.0}/LICENSE +0 -0
- {audiopod-1.1.1 → audiopod-1.2.0}/MANIFEST.in +0 -0
- {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/cli.py +0 -0
- {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/client.py +0 -0
- {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/config.py +0 -0
- {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/exceptions.py +0 -0
- {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/models.py +0 -0
- {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/py.typed +0 -0
- {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/services/__init__.py +0 -0
- {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/services/base.py +0 -0
- {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/services/credits.py +0 -0
- {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/services/denoiser.py +0 -0
- {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/services/karaoke.py +0 -0
- {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/services/music.py +0 -0
- {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/services/speaker.py +0 -0
- {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/services/stem_extraction.py +0 -0
- {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/services/transcription.py +0 -0
- {audiopod-1.1.1 → audiopod-1.2.0}/audiopod/services/translation.py +0 -0
- {audiopod-1.1.1 → audiopod-1.2.0}/audiopod.egg-info/SOURCES.txt +0 -0
- {audiopod-1.1.1 → audiopod-1.2.0}/examples/README.md +0 -0
- {audiopod-1.1.1 → audiopod-1.2.0}/requirements.txt +0 -0
- {audiopod-1.1.1 → audiopod-1.2.0}/setup.cfg +0 -0
- {audiopod-1.1.1 → audiopod-1.2.0}/tests/test_end_to_end_integration.py +0 -0
- {audiopod-1.1.1 → audiopod-1.2.0}/tests/test_sdk_api_compatibility.py +0 -0
|
@@ -5,6 +5,78 @@ All notable changes to the AudioPod Python SDK will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [1.2.0] - 2024-12-15
|
|
9
|
+
|
|
10
|
+
### 🎤 Unified Voice Generation
|
|
11
|
+
|
|
12
|
+
This release introduces a major architectural improvement with unified voice generation endpoint, consolidating voice cloning and text-to-speech into a single, consistent API.
|
|
13
|
+
|
|
14
|
+
### ✨ Added
|
|
15
|
+
|
|
16
|
+
- **Unified Voice Generation Method**: New `generate_voice()` method handles both voice cloning and TTS
|
|
17
|
+
- Single endpoint `/api/v1/voice/voices/{voice_identifier}/generate` for all voice operations
|
|
18
|
+
- Supports both voice file upload (cloning) and voice ID (existing profiles)
|
|
19
|
+
- Enhanced parameter support: `audio_format`, `generation_params`, extended speed range (0.25-4.0)
|
|
20
|
+
|
|
21
|
+
- **Enhanced Voice Generation Parameters**:
|
|
22
|
+
- `audio_format`: Support for 'mp3', 'wav', 'ogg' output formats
|
|
23
|
+
- `generation_params`: Provider-specific parameters (speed, temperature, pitch, etc.)
|
|
24
|
+
- Extended speed range: 0.25x to 4.0x (provider dependent)
|
|
25
|
+
|
|
26
|
+
### 🔧 Fixed
|
|
27
|
+
|
|
28
|
+
- **Removed Legacy Clone Endpoint**: No longer uses deprecated `/api/v1/voice/voice-clone`
|
|
29
|
+
- **Unified API Architecture**: All voice generation now uses consistent endpoint structure
|
|
30
|
+
- **Improved Error Handling**: Better validation for mutually exclusive parameters
|
|
31
|
+
|
|
32
|
+
### 🏗️ Improved
|
|
33
|
+
|
|
34
|
+
- **Backward Compatibility**: Existing `clone_voice()` and `generate_speech()` methods continue to work
|
|
35
|
+
- Legacy methods now internally use the unified `generate_voice()` approach
|
|
36
|
+
- No breaking changes for existing code
|
|
37
|
+
- Clear migration path with deprecation warnings in documentation
|
|
38
|
+
|
|
39
|
+
- **Enhanced Documentation**:
|
|
40
|
+
- Updated examples to showcase unified approach
|
|
41
|
+
- Clear distinction between recommended and legacy methods
|
|
42
|
+
- Comprehensive migration guide
|
|
43
|
+
|
|
44
|
+
### 🚀 Usage Examples
|
|
45
|
+
|
|
46
|
+
#### New Unified Approach (Recommended)
|
|
47
|
+
```python
|
|
48
|
+
# Voice cloning
|
|
49
|
+
result = client.voice.generate_voice(
|
|
50
|
+
text="Hello world!",
|
|
51
|
+
voice_file="voice.wav", # For cloning
|
|
52
|
+
language="en",
|
|
53
|
+
audio_format="mp3"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# TTS with existing voice
|
|
57
|
+
result = client.voice.generate_voice(
|
|
58
|
+
text="Hello world!",
|
|
59
|
+
voice_id="profile-id", # For existing voices
|
|
60
|
+
language="en",
|
|
61
|
+
audio_format="mp3"
|
|
62
|
+
)
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
#### Backward Compatibility (Legacy methods still work)
|
|
66
|
+
```python
|
|
67
|
+
# These continue to work unchanged
|
|
68
|
+
result = client.voice.clone_voice(voice_file="voice.wav", text="Hello")
|
|
69
|
+
result = client.voice.generate_speech(voice_id="profile-id", text="Hello")
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### 🔄 Migration Notes
|
|
73
|
+
|
|
74
|
+
- **No Breaking Changes**: All existing code continues to work without modification
|
|
75
|
+
- **Recommended**: Migrate to `generate_voice()` for new development
|
|
76
|
+
- **Performance**: Unified endpoint provides better consistency and reliability
|
|
77
|
+
|
|
78
|
+
---
|
|
79
|
+
|
|
8
80
|
## [1.1.1] - 2024-12-15
|
|
9
81
|
|
|
10
82
|
### 🔧 Translation Service Fixes
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: audiopod
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.0
|
|
4
4
|
Summary: Professional Audio Processing API Client for Python
|
|
5
5
|
Home-page: https://github.com/audiopod-ai/audiopod-python
|
|
6
6
|
Author: AudioPod AI
|
|
@@ -95,7 +95,7 @@ client = audiopod.Client(api_key="ap_your_api_key_here")
|
|
|
95
95
|
|
|
96
96
|
### Basic Usage
|
|
97
97
|
|
|
98
|
-
#### Voice Cloning
|
|
98
|
+
#### Voice Generation (Unified TTS & Cloning)
|
|
99
99
|
|
|
100
100
|
```python
|
|
101
101
|
import audiopod
|
|
@@ -103,15 +103,39 @@ import audiopod
|
|
|
103
103
|
# Initialize client
|
|
104
104
|
client = audiopod.Client()
|
|
105
105
|
|
|
106
|
-
#
|
|
107
|
-
job = client.voice.
|
|
106
|
+
# Generate voice using file cloning (unified approach)
|
|
107
|
+
job = client.voice.generate_voice(
|
|
108
|
+
text="Hello! This is voice generation using a cloned voice.",
|
|
109
|
+
voice_file="path/to/voice_sample.wav", # For voice cloning
|
|
110
|
+
language="en",
|
|
111
|
+
audio_format="mp3",
|
|
112
|
+
generation_params={
|
|
113
|
+
"speed": 1.0
|
|
114
|
+
},
|
|
115
|
+
wait_for_completion=True
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
print(f"Generated audio URL: {job.output_url}")
|
|
119
|
+
|
|
120
|
+
# Generate speech with existing voice profile (unified approach)
|
|
121
|
+
speech = client.voice.generate_voice(
|
|
122
|
+
text="Hello from my voice profile!",
|
|
123
|
+
voice_id="voice-profile-id", # For existing voice profiles
|
|
124
|
+
language="en",
|
|
125
|
+
audio_format="mp3",
|
|
126
|
+
generation_params={
|
|
127
|
+
"speed": 1.0
|
|
128
|
+
},
|
|
129
|
+
wait_for_completion=True
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
# Backward compatibility methods (deprecated - use generate_voice instead)
|
|
133
|
+
legacy_clone = client.voice.clone_voice(
|
|
108
134
|
voice_file="path/to/voice_sample.wav",
|
|
109
135
|
text="Hello! This is a cloned voice speaking.",
|
|
110
136
|
language="en",
|
|
111
137
|
wait_for_completion=True
|
|
112
138
|
)
|
|
113
|
-
|
|
114
|
-
print(f"Generated audio URL: {job['output_url']}")
|
|
115
139
|
```
|
|
116
140
|
|
|
117
141
|
#### Music Generation
|
|
@@ -197,10 +221,22 @@ voice_profile = client.voice.create_voice_profile(
|
|
|
197
221
|
wait_for_completion=True
|
|
198
222
|
)
|
|
199
223
|
|
|
200
|
-
# Use the voice profile for speech generation
|
|
201
|
-
speech = client.voice.
|
|
224
|
+
# Use the voice profile for speech generation (unified approach - recommended)
|
|
225
|
+
speech = client.voice.generate_voice(
|
|
226
|
+
text="This uses my custom voice profile with the unified method!",
|
|
202
227
|
voice_id=voice_profile.id,
|
|
203
|
-
|
|
228
|
+
language="en",
|
|
229
|
+
audio_format="mp3",
|
|
230
|
+
generation_params={
|
|
231
|
+
"speed": 1.0
|
|
232
|
+
},
|
|
233
|
+
wait_for_completion=True
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
# Legacy method (still works - uses generate_voice internally)
|
|
237
|
+
legacy_speech = client.voice.generate_speech(
|
|
238
|
+
voice_id=voice_profile.id,
|
|
239
|
+
text="This uses the legacy method.",
|
|
204
240
|
wait_for_completion=True
|
|
205
241
|
)
|
|
206
242
|
```
|
|
@@ -343,7 +379,7 @@ client = audiopod.Client(
|
|
|
343
379
|
|
|
344
380
|
### Services
|
|
345
381
|
|
|
346
|
-
- `client.voice`: Voice
|
|
382
|
+
- `client.voice`: **Voice generation operations** (unified TTS & cloning using `generate_voice()`)
|
|
347
383
|
- `client.music`: Music generation and editing
|
|
348
384
|
- `client.transcription`: Speech-to-text transcription
|
|
349
385
|
- `client.translation`: Audio/video translation
|
|
@@ -352,6 +388,20 @@ client = audiopod.Client(
|
|
|
352
388
|
- `client.karaoke`: Karaoke video generation
|
|
353
389
|
- `client.credits`: Credit management and usage tracking
|
|
354
390
|
|
|
391
|
+
#### Voice Service Methods
|
|
392
|
+
|
|
393
|
+
**Recommended (Unified Approach):**
|
|
394
|
+
- `client.voice.generate_voice()` - Generate speech with voice file (cloning) or voice ID (TTS)
|
|
395
|
+
|
|
396
|
+
**Legacy Methods (Backward Compatibility):**
|
|
397
|
+
- `client.voice.clone_voice()` - Clone voice from audio file (deprecated, uses `generate_voice` internally)
|
|
398
|
+
- `client.voice.generate_speech()` - Generate speech with voice profile (deprecated, uses `generate_voice` internally)
|
|
399
|
+
|
|
400
|
+
**Voice Management:**
|
|
401
|
+
- `client.voice.create_voice_profile()` - Create reusable voice profiles
|
|
402
|
+
- `client.voice.list_voice_profiles()` - List available voice profiles
|
|
403
|
+
- `client.voice.delete_voice_profile()` - Delete voice profiles
|
|
404
|
+
|
|
355
405
|
### Models
|
|
356
406
|
|
|
357
407
|
- `Job`: Base job information and status
|
|
@@ -43,7 +43,7 @@ client = audiopod.Client(api_key="ap_your_api_key_here")
|
|
|
43
43
|
|
|
44
44
|
### Basic Usage
|
|
45
45
|
|
|
46
|
-
#### Voice Cloning
|
|
46
|
+
#### Voice Generation (Unified TTS & Cloning)
|
|
47
47
|
|
|
48
48
|
```python
|
|
49
49
|
import audiopod
|
|
@@ -51,15 +51,39 @@ import audiopod
|
|
|
51
51
|
# Initialize client
|
|
52
52
|
client = audiopod.Client()
|
|
53
53
|
|
|
54
|
-
#
|
|
55
|
-
job = client.voice.
|
|
54
|
+
# Generate voice using file cloning (unified approach)
|
|
55
|
+
job = client.voice.generate_voice(
|
|
56
|
+
text="Hello! This is voice generation using a cloned voice.",
|
|
57
|
+
voice_file="path/to/voice_sample.wav", # For voice cloning
|
|
58
|
+
language="en",
|
|
59
|
+
audio_format="mp3",
|
|
60
|
+
generation_params={
|
|
61
|
+
"speed": 1.0
|
|
62
|
+
},
|
|
63
|
+
wait_for_completion=True
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
print(f"Generated audio URL: {job.output_url}")
|
|
67
|
+
|
|
68
|
+
# Generate speech with existing voice profile (unified approach)
|
|
69
|
+
speech = client.voice.generate_voice(
|
|
70
|
+
text="Hello from my voice profile!",
|
|
71
|
+
voice_id="voice-profile-id", # For existing voice profiles
|
|
72
|
+
language="en",
|
|
73
|
+
audio_format="mp3",
|
|
74
|
+
generation_params={
|
|
75
|
+
"speed": 1.0
|
|
76
|
+
},
|
|
77
|
+
wait_for_completion=True
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
# Backward compatibility methods (deprecated - use generate_voice instead)
|
|
81
|
+
legacy_clone = client.voice.clone_voice(
|
|
56
82
|
voice_file="path/to/voice_sample.wav",
|
|
57
83
|
text="Hello! This is a cloned voice speaking.",
|
|
58
84
|
language="en",
|
|
59
85
|
wait_for_completion=True
|
|
60
86
|
)
|
|
61
|
-
|
|
62
|
-
print(f"Generated audio URL: {job['output_url']}")
|
|
63
87
|
```
|
|
64
88
|
|
|
65
89
|
#### Music Generation
|
|
@@ -145,10 +169,22 @@ voice_profile = client.voice.create_voice_profile(
|
|
|
145
169
|
wait_for_completion=True
|
|
146
170
|
)
|
|
147
171
|
|
|
148
|
-
# Use the voice profile for speech generation
|
|
149
|
-
speech = client.voice.
|
|
172
|
+
# Use the voice profile for speech generation (unified approach - recommended)
|
|
173
|
+
speech = client.voice.generate_voice(
|
|
174
|
+
text="This uses my custom voice profile with the unified method!",
|
|
150
175
|
voice_id=voice_profile.id,
|
|
151
|
-
|
|
176
|
+
language="en",
|
|
177
|
+
audio_format="mp3",
|
|
178
|
+
generation_params={
|
|
179
|
+
"speed": 1.0
|
|
180
|
+
},
|
|
181
|
+
wait_for_completion=True
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
# Legacy method (still works - uses generate_voice internally)
|
|
185
|
+
legacy_speech = client.voice.generate_speech(
|
|
186
|
+
voice_id=voice_profile.id,
|
|
187
|
+
text="This uses the legacy method.",
|
|
152
188
|
wait_for_completion=True
|
|
153
189
|
)
|
|
154
190
|
```
|
|
@@ -291,7 +327,7 @@ client = audiopod.Client(
|
|
|
291
327
|
|
|
292
328
|
### Services
|
|
293
329
|
|
|
294
|
-
- `client.voice`: Voice
|
|
330
|
+
- `client.voice`: **Voice generation operations** (unified TTS & cloning using `generate_voice()`)
|
|
295
331
|
- `client.music`: Music generation and editing
|
|
296
332
|
- `client.transcription`: Speech-to-text transcription
|
|
297
333
|
- `client.translation`: Audio/video translation
|
|
@@ -300,6 +336,20 @@ client = audiopod.Client(
|
|
|
300
336
|
- `client.karaoke`: Karaoke video generation
|
|
301
337
|
- `client.credits`: Credit management and usage tracking
|
|
302
338
|
|
|
339
|
+
#### Voice Service Methods
|
|
340
|
+
|
|
341
|
+
**Recommended (Unified Approach):**
|
|
342
|
+
- `client.voice.generate_voice()` - Generate speech with voice file (cloning) or voice ID (TTS)
|
|
343
|
+
|
|
344
|
+
**Legacy Methods (Backward Compatibility):**
|
|
345
|
+
- `client.voice.clone_voice()` - Clone voice from audio file (deprecated, uses `generate_voice` internally)
|
|
346
|
+
- `client.voice.generate_speech()` - Generate speech with voice profile (deprecated, uses `generate_voice` internally)
|
|
347
|
+
|
|
348
|
+
**Voice Management:**
|
|
349
|
+
- `client.voice.create_voice_profile()` - Create reusable voice profiles
|
|
350
|
+
- `client.voice.list_voice_profiles()` - List available voice profiles
|
|
351
|
+
- `client.voice.delete_voice_profile()` - Delete voice profiles
|
|
352
|
+
|
|
303
353
|
### Models
|
|
304
354
|
|
|
305
355
|
- `Job`: Base job information and status
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
Voice Service - Voice cloning and TTS operations
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
+
import time
|
|
5
6
|
from typing import List, Optional, Dict, Any, Union
|
|
6
7
|
from pathlib import Path
|
|
7
8
|
|
|
@@ -13,54 +14,103 @@ from ..exceptions import ValidationError
|
|
|
13
14
|
class VoiceService(BaseService):
|
|
14
15
|
"""Service for voice cloning and text-to-speech operations"""
|
|
15
16
|
|
|
16
|
-
def
|
|
17
|
+
def generate_voice(
|
|
17
18
|
self,
|
|
18
|
-
voice_file: str,
|
|
19
19
|
text: str,
|
|
20
|
+
voice_file: Optional[str] = None,
|
|
21
|
+
voice_id: Optional[Union[int, str]] = None,
|
|
20
22
|
language: Optional[str] = None,
|
|
21
23
|
speed: float = 1.0,
|
|
24
|
+
audio_format: str = "mp3",
|
|
25
|
+
generation_params: Optional[Dict[str, Any]] = None,
|
|
22
26
|
wait_for_completion: bool = False,
|
|
23
27
|
timeout: int = 300
|
|
24
28
|
) -> Union[Job, Dict[str, Any]]:
|
|
25
29
|
"""
|
|
26
|
-
|
|
30
|
+
Generate speech using either a voice file (for cloning) or existing voice profile
|
|
31
|
+
|
|
32
|
+
This unified method handles both voice cloning and text-to-speech generation:
|
|
33
|
+
- For voice cloning: Provide voice_file parameter
|
|
34
|
+
- For TTS with existing voice: Provide voice_id parameter
|
|
27
35
|
|
|
28
36
|
Args:
|
|
29
|
-
|
|
30
|
-
|
|
37
|
+
text: Text to generate speech for
|
|
38
|
+
voice_file: Path to audio file for voice cloning (mutually exclusive with voice_id)
|
|
39
|
+
voice_id: ID/UUID of existing voice profile (mutually exclusive with voice_file)
|
|
31
40
|
language: Target language code (e.g., 'en', 'es')
|
|
32
|
-
speed: Speech speed (0.
|
|
41
|
+
speed: Speech speed (0.25 to 4.0, provider dependent)
|
|
42
|
+
audio_format: Output audio format ('mp3', 'wav', 'ogg')
|
|
43
|
+
generation_params: Provider-specific parameters (speed, temperature, etc.)
|
|
33
44
|
wait_for_completion: Whether to wait for job completion
|
|
34
45
|
timeout: Maximum time to wait if wait_for_completion=True
|
|
35
46
|
|
|
36
47
|
Returns:
|
|
37
48
|
Job object if wait_for_completion=False, otherwise job result
|
|
49
|
+
|
|
50
|
+
Raises:
|
|
51
|
+
ValidationError: If both or neither voice_file and voice_id are provided
|
|
38
52
|
"""
|
|
39
53
|
# Validate inputs
|
|
54
|
+
if not voice_file and not voice_id:
|
|
55
|
+
raise ValidationError("Either voice_file (for cloning) or voice_id (for TTS) must be provided")
|
|
56
|
+
if voice_file and voice_id:
|
|
57
|
+
raise ValidationError("Provide either voice_file or voice_id, not both")
|
|
58
|
+
|
|
40
59
|
text = self._validate_text_input(text)
|
|
41
60
|
if language:
|
|
42
61
|
language = self._validate_language_code(language)
|
|
43
|
-
if not 0.
|
|
44
|
-
raise ValidationError("Speed must be between 0.
|
|
62
|
+
if not 0.25 <= speed <= 4.0:
|
|
63
|
+
raise ValidationError("Speed must be between 0.25 and 4.0")
|
|
64
|
+
if audio_format not in ["mp3", "wav", "ogg"]:
|
|
65
|
+
raise ValidationError("Audio format must be 'mp3', 'wav', or 'ogg'")
|
|
45
66
|
|
|
46
|
-
#
|
|
67
|
+
# For voice cloning, we need to create a temporary voice first, then generate
|
|
68
|
+
if voice_file:
|
|
69
|
+
return self._generate_with_voice_file(
|
|
70
|
+
voice_file, text, language, speed, audio_format,
|
|
71
|
+
generation_params, wait_for_completion, timeout
|
|
72
|
+
)
|
|
73
|
+
else:
|
|
74
|
+
# Use existing voice profile with unified endpoint
|
|
75
|
+
return self._generate_with_voice_id(
|
|
76
|
+
voice_id, text, language, speed, audio_format,
|
|
77
|
+
generation_params, wait_for_completion, timeout
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
def _generate_with_voice_file(
|
|
81
|
+
self,
|
|
82
|
+
voice_file: str,
|
|
83
|
+
text: str,
|
|
84
|
+
language: Optional[str],
|
|
85
|
+
speed: float,
|
|
86
|
+
audio_format: str,
|
|
87
|
+
generation_params: Optional[Dict[str, Any]],
|
|
88
|
+
wait_for_completion: bool,
|
|
89
|
+
timeout: int
|
|
90
|
+
) -> Union[Job, Dict[str, Any]]:
|
|
91
|
+
"""Generate speech with voice cloning using unified endpoint"""
|
|
92
|
+
# For voice cloning, we use the "clone" identifier with the unified endpoint
|
|
47
93
|
files = self._prepare_file_upload(voice_file, "file")
|
|
48
94
|
|
|
49
|
-
# Prepare form data
|
|
95
|
+
# Prepare form data for unified endpoint
|
|
50
96
|
data = {
|
|
51
97
|
"input_text": text,
|
|
52
|
-
"speed": speed
|
|
98
|
+
"speed": speed,
|
|
99
|
+
"audio_format": audio_format
|
|
53
100
|
}
|
|
54
101
|
if language:
|
|
55
|
-
data["
|
|
102
|
+
data["language"] = language
|
|
103
|
+
if generation_params:
|
|
104
|
+
# Add generation parameters
|
|
105
|
+
data.update(generation_params)
|
|
56
106
|
|
|
57
|
-
# Make request
|
|
58
107
|
if self.async_mode:
|
|
59
|
-
return self.
|
|
108
|
+
return self._async_generate_voice("clone", data, files, wait_for_completion, timeout)
|
|
60
109
|
else:
|
|
110
|
+
# Use unified voice generation endpoint with "clone" identifier
|
|
61
111
|
response = self.client.request(
|
|
62
112
|
"POST",
|
|
63
|
-
"/api/v1/voice/
|
|
113
|
+
"/api/v1/voice/voices/clone/generate",
|
|
64
114
|
data=data,
|
|
65
115
|
files=files
|
|
66
116
|
)
|
|
@@ -73,17 +123,60 @@ class VoiceService(BaseService):
|
|
|
73
123
|
|
|
74
124
|
return job
|
|
75
125
|
|
|
76
|
-
|
|
126
|
+
def _generate_with_voice_id(
|
|
77
127
|
self,
|
|
78
|
-
|
|
128
|
+
voice_id: Union[int, str],
|
|
129
|
+
text: str,
|
|
130
|
+
language: Optional[str],
|
|
131
|
+
speed: float,
|
|
132
|
+
audio_format: str,
|
|
133
|
+
generation_params: Optional[Dict[str, Any]],
|
|
134
|
+
wait_for_completion: bool,
|
|
135
|
+
timeout: int
|
|
136
|
+
) -> Union[Job, Dict[str, Any]]:
|
|
137
|
+
"""Generate speech with existing voice profile using unified endpoint"""
|
|
138
|
+
# Prepare form data for unified endpoint
|
|
139
|
+
data = {
|
|
140
|
+
"input_text": text,
|
|
141
|
+
"speed": speed,
|
|
142
|
+
"audio_format": audio_format
|
|
143
|
+
}
|
|
144
|
+
if language:
|
|
145
|
+
data["language"] = language
|
|
146
|
+
if generation_params:
|
|
147
|
+
# Add generation parameters
|
|
148
|
+
data.update(generation_params)
|
|
149
|
+
|
|
150
|
+
if self.async_mode:
|
|
151
|
+
return self._async_generate_voice(voice_id, data, None, wait_for_completion, timeout)
|
|
152
|
+
else:
|
|
153
|
+
# Use unified voice generation endpoint
|
|
154
|
+
response = self.client.request(
|
|
155
|
+
"POST",
|
|
156
|
+
f"/api/v1/voice/voices/{voice_id}/generate",
|
|
157
|
+
data=data
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
job = Job.from_dict(response)
|
|
161
|
+
|
|
162
|
+
if wait_for_completion:
|
|
163
|
+
job = self._wait_for_completion(job.id, timeout)
|
|
164
|
+
return job.result if job.result else job
|
|
165
|
+
|
|
166
|
+
return job
|
|
167
|
+
|
|
168
|
+
async def _async_generate_voice(
|
|
169
|
+
self,
|
|
170
|
+
voice_identifier: Union[int, str],
|
|
79
171
|
data: Dict[str, Any],
|
|
172
|
+
files: Optional[Dict[str, Any]],
|
|
80
173
|
wait_for_completion: bool,
|
|
81
174
|
timeout: int
|
|
82
175
|
) -> Union[Job, Dict[str, Any]]:
|
|
83
|
-
"""Async version of
|
|
176
|
+
"""Async version of unified voice generation"""
|
|
84
177
|
response = await self.client.request(
|
|
85
|
-
"POST",
|
|
86
|
-
"/api/v1/voice/
|
|
178
|
+
"POST",
|
|
179
|
+
f"/api/v1/voice/voices/{voice_identifier}/generate",
|
|
87
180
|
data=data,
|
|
88
181
|
files=files
|
|
89
182
|
)
|
|
@@ -93,7 +186,7 @@ class VoiceService(BaseService):
|
|
|
93
186
|
if wait_for_completion:
|
|
94
187
|
job = await self._async_wait_for_completion(job.id, timeout)
|
|
95
188
|
return job.result if job.result else job
|
|
96
|
-
|
|
189
|
+
|
|
97
190
|
return job
|
|
98
191
|
|
|
99
192
|
def create_voice_profile(
|
|
@@ -202,78 +295,79 @@ class VoiceService(BaseService):
|
|
|
202
295
|
language: Optional[str] = None,
|
|
203
296
|
speed: float = 1.0,
|
|
204
297
|
audio_format: str = "mp3",
|
|
298
|
+
generation_params: Optional[Dict[str, Any]] = None,
|
|
205
299
|
wait_for_completion: bool = False,
|
|
206
300
|
timeout: int = 300
|
|
207
301
|
) -> Union[Job, Dict[str, Any]]:
|
|
208
302
|
"""
|
|
209
|
-
Generate speech using an existing voice profile
|
|
303
|
+
Generate speech using an existing voice profile (unified endpoint)
|
|
304
|
+
|
|
305
|
+
This method now uses the unified voice generation endpoint for consistency.
|
|
210
306
|
|
|
211
307
|
Args:
|
|
212
308
|
voice_id: ID or UUID of the voice profile
|
|
213
309
|
text: Text to generate speech for
|
|
214
310
|
language: Target language code
|
|
215
|
-
speed: Speech speed (0.
|
|
216
|
-
audio_format: Output audio format (mp3, wav)
|
|
311
|
+
speed: Speech speed (0.25 to 4.0, provider dependent)
|
|
312
|
+
audio_format: Output audio format ('mp3', 'wav', 'ogg')
|
|
313
|
+
generation_params: Additional generation parameters
|
|
217
314
|
wait_for_completion: Whether to wait for completion
|
|
218
315
|
timeout: Maximum time to wait
|
|
219
316
|
|
|
220
317
|
Returns:
|
|
221
318
|
Job object or generation result
|
|
222
319
|
"""
|
|
223
|
-
#
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
"audio_format": audio_format
|
|
237
|
-
}
|
|
238
|
-
if language:
|
|
239
|
-
data["language"] = language
|
|
240
|
-
|
|
241
|
-
# Make request
|
|
242
|
-
endpoint = f"/api/v1/voice/voices/{voice_id}/generate"
|
|
243
|
-
|
|
244
|
-
if self.async_mode:
|
|
245
|
-
return self._async_generate_speech(endpoint, data, wait_for_completion, timeout)
|
|
246
|
-
else:
|
|
247
|
-
response = self.client.request("POST", endpoint, data=data)
|
|
248
|
-
|
|
249
|
-
if "job_id" in response:
|
|
250
|
-
job = Job.from_dict(response)
|
|
251
|
-
if wait_for_completion:
|
|
252
|
-
job = self._wait_for_completion(job.id, timeout)
|
|
253
|
-
return job.result if job.result else job
|
|
254
|
-
return job
|
|
255
|
-
else:
|
|
256
|
-
# Direct response with audio URL
|
|
257
|
-
return response
|
|
258
|
-
|
|
259
|
-
async def _async_generate_speech(
|
|
320
|
+
# Use unified voice generation method
|
|
321
|
+
return self.generate_voice(
|
|
322
|
+
text=text,
|
|
323
|
+
voice_id=voice_id,
|
|
324
|
+
language=language,
|
|
325
|
+
speed=speed,
|
|
326
|
+
audio_format=audio_format,
|
|
327
|
+
generation_params=generation_params,
|
|
328
|
+
wait_for_completion=wait_for_completion,
|
|
329
|
+
timeout=timeout
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
def clone_voice(
|
|
260
333
|
self,
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
334
|
+
voice_file: str,
|
|
335
|
+
text: str,
|
|
336
|
+
language: Optional[str] = None,
|
|
337
|
+
speed: float = 1.0,
|
|
338
|
+
generation_params: Optional[Dict[str, Any]] = None,
|
|
339
|
+
wait_for_completion: bool = False,
|
|
340
|
+
timeout: int = 300
|
|
265
341
|
) -> Union[Job, Dict[str, Any]]:
|
|
266
|
-
"""
|
|
267
|
-
|
|
342
|
+
"""
|
|
343
|
+
Clone a voice from an audio file (backward compatibility)
|
|
268
344
|
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
345
|
+
This method is now a wrapper around the unified generate_voice method.
|
|
346
|
+
For new code, consider using generate_voice() directly with voice_file parameter.
|
|
347
|
+
|
|
348
|
+
Args:
|
|
349
|
+
voice_file: Path to audio file containing voice to clone
|
|
350
|
+
text: Text to generate with the cloned voice
|
|
351
|
+
language: Target language code (e.g., 'en', 'es')
|
|
352
|
+
speed: Speech speed (0.25 to 4.0, provider dependent)
|
|
353
|
+
generation_params: Additional generation parameters
|
|
354
|
+
wait_for_completion: Whether to wait for job completion
|
|
355
|
+
timeout: Maximum time to wait if wait_for_completion=True
|
|
356
|
+
|
|
357
|
+
Returns:
|
|
358
|
+
Job object if wait_for_completion=False, otherwise job result
|
|
359
|
+
"""
|
|
360
|
+
# Use unified voice generation method
|
|
361
|
+
return self.generate_voice(
|
|
362
|
+
text=text,
|
|
363
|
+
voice_file=voice_file,
|
|
364
|
+
language=language,
|
|
365
|
+
speed=speed,
|
|
366
|
+
audio_format="mp3", # Default format for backward compatibility
|
|
367
|
+
generation_params=generation_params,
|
|
368
|
+
wait_for_completion=wait_for_completion,
|
|
369
|
+
timeout=timeout
|
|
370
|
+
)
|
|
277
371
|
|
|
278
372
|
def list_voice_profiles(
|
|
279
373
|
self,
|
|
@@ -61,9 +61,50 @@ def check_credits(client):
|
|
|
61
61
|
return False
|
|
62
62
|
|
|
63
63
|
|
|
64
|
+
def voice_generation_example(client):
|
|
65
|
+
"""Demonstrate unified voice generation functionality"""
|
|
66
|
+
print("\n🎤 Voice Generation Example (Unified Approach)")
|
|
67
|
+
print("=" * 50)
|
|
68
|
+
|
|
69
|
+
# For this example, you'll need a voice sample file
|
|
70
|
+
# Replace with path to your audio file
|
|
71
|
+
voice_file = "examples/voice_sample.wav"
|
|
72
|
+
|
|
73
|
+
if not Path(voice_file).exists():
|
|
74
|
+
print(f"⚠️ Voice sample file not found: {voice_file}")
|
|
75
|
+
print(" Please provide a voice sample (wav, mp3, etc.) to test voice generation")
|
|
76
|
+
return
|
|
77
|
+
|
|
78
|
+
try:
|
|
79
|
+
print(f"🔄 Generating voice using: {voice_file}")
|
|
80
|
+
|
|
81
|
+
# Generate voice using unified method (for voice cloning)
|
|
82
|
+
job = client.voice.generate_voice(
|
|
83
|
+
text="Hello! This is an example of voice generation using the AudioPod API.",
|
|
84
|
+
voice_file=voice_file, # For voice cloning
|
|
85
|
+
language="en",
|
|
86
|
+
audio_format="mp3",
|
|
87
|
+
generation_params={
|
|
88
|
+
"speed": 1.0
|
|
89
|
+
},
|
|
90
|
+
wait_for_completion=True,
|
|
91
|
+
timeout=300
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
print("✅ Voice generation completed!")
|
|
95
|
+
if hasattr(job, 'output_url') and job.output_url:
|
|
96
|
+
print(f"🎵 Generated audio: {job.output_url}")
|
|
97
|
+
elif isinstance(job, dict) and 'output_url' in job:
|
|
98
|
+
print(f"🎵 Generated audio: {job['output_url']}")
|
|
99
|
+
|
|
100
|
+
except ProcessingError as e:
|
|
101
|
+
print(f"❌ Voice generation failed: {e.message}")
|
|
102
|
+
except AudioPodError as e:
|
|
103
|
+
print(f"❌ API Error: {e.message}")
|
|
104
|
+
|
|
64
105
|
def voice_cloning_example(client):
|
|
65
|
-
"""Demonstrate voice cloning functionality"""
|
|
66
|
-
print("\n
|
|
106
|
+
"""Demonstrate voice cloning functionality (backward compatibility)"""
|
|
107
|
+
print("\n🔄 Voice Cloning Example (Legacy - uses generate_voice internally)")
|
|
67
108
|
print("=" * 50)
|
|
68
109
|
|
|
69
110
|
# For this example, you'll need a voice sample file
|
|
@@ -76,12 +117,12 @@ def voice_cloning_example(client):
|
|
|
76
117
|
return
|
|
77
118
|
|
|
78
119
|
try:
|
|
79
|
-
print(f"🔄 Cloning voice from: {voice_file}")
|
|
120
|
+
print(f"🔄 Cloning voice from: {voice_file} (legacy method)")
|
|
80
121
|
|
|
81
|
-
# Clone voice with sample text
|
|
122
|
+
# Clone voice with sample text (legacy method - now uses generate_voice internally)
|
|
82
123
|
job = client.voice.clone_voice(
|
|
83
124
|
voice_file=voice_file,
|
|
84
|
-
text="Hello! This is an example
|
|
125
|
+
text="Hello! This is an example using the legacy clone_voice method.",
|
|
85
126
|
language="en",
|
|
86
127
|
speed=1.0,
|
|
87
128
|
wait_for_completion=True,
|
|
@@ -89,7 +130,9 @@ def voice_cloning_example(client):
|
|
|
89
130
|
)
|
|
90
131
|
|
|
91
132
|
print("✅ Voice cloning completed!")
|
|
92
|
-
if 'output_url'
|
|
133
|
+
if hasattr(job, 'output_url') and job.output_url:
|
|
134
|
+
print(f"🎵 Generated audio: {job.output_url}")
|
|
135
|
+
elif isinstance(job, dict) and 'output_url' in job:
|
|
93
136
|
print(f"🎵 Generated audio: {job['output_url']}")
|
|
94
137
|
|
|
95
138
|
except ProcessingError as e:
|
|
@@ -122,19 +165,40 @@ def voice_profile_example(client):
|
|
|
122
165
|
|
|
123
166
|
print(f"✅ Voice profile created: {voice_profile.name} (ID: {voice_profile.id})")
|
|
124
167
|
|
|
125
|
-
# Use the voice profile for speech generation
|
|
126
|
-
print("🔄 Generating speech with voice profile...")
|
|
127
|
-
speech = client.voice.
|
|
128
|
-
|
|
129
|
-
|
|
168
|
+
# Use the voice profile for speech generation (unified method - recommended)
|
|
169
|
+
print("🔄 Generating speech with voice profile using unified method...")
|
|
170
|
+
speech = client.voice.generate_voice(
|
|
171
|
+
text="This speech was generated using my custom voice profile with the unified method!",
|
|
172
|
+
voice_id=voice_profile.id, # For existing voice profile
|
|
130
173
|
language="en",
|
|
174
|
+
audio_format="mp3",
|
|
175
|
+
generation_params={
|
|
176
|
+
"speed": 1.0
|
|
177
|
+
},
|
|
131
178
|
wait_for_completion=True
|
|
132
179
|
)
|
|
133
180
|
|
|
134
181
|
print("✅ Speech generation completed!")
|
|
135
|
-
if 'output_url'
|
|
182
|
+
if hasattr(speech, 'output_url') and speech.output_url:
|
|
183
|
+
print(f"🎵 Generated speech: {speech.output_url}")
|
|
184
|
+
elif isinstance(speech, dict) and 'output_url' in speech:
|
|
136
185
|
print(f"🎵 Generated speech: {speech['output_url']}")
|
|
137
186
|
|
|
187
|
+
# Also demonstrate legacy method for comparison
|
|
188
|
+
print("🔄 Generating speech with voice profile using legacy method...")
|
|
189
|
+
legacy_speech = client.voice.generate_speech(
|
|
190
|
+
voice_id=voice_profile.id,
|
|
191
|
+
text="This speech was generated using the legacy generate_speech method.",
|
|
192
|
+
language="en",
|
|
193
|
+
wait_for_completion=True
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
print("✅ Legacy speech generation completed!")
|
|
197
|
+
if hasattr(legacy_speech, 'output_url') and legacy_speech.output_url:
|
|
198
|
+
print(f"🎵 Generated speech (legacy): {legacy_speech.output_url}")
|
|
199
|
+
elif isinstance(legacy_speech, dict) and 'output_url' in legacy_speech:
|
|
200
|
+
print(f"🎵 Generated speech (legacy): {legacy_speech['output_url']}")
|
|
201
|
+
|
|
138
202
|
# List all voice profiles
|
|
139
203
|
print("\n📋 Your voice profiles:")
|
|
140
204
|
voices = client.voice.list_voice_profiles(limit=10)
|
|
@@ -443,7 +507,8 @@ def main():
|
|
|
443
507
|
|
|
444
508
|
# Run examples
|
|
445
509
|
try:
|
|
446
|
-
|
|
510
|
+
voice_generation_example(client) # New unified method (recommended)
|
|
511
|
+
voice_cloning_example(client) # Legacy method for backward compatibility
|
|
447
512
|
voice_profile_example(client)
|
|
448
513
|
music_generation_example(client)
|
|
449
514
|
transcription_example(client)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|