audiopod 1.1.0__tar.gz → 1.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- audiopod-1.2.0/CHANGELOG.md +279 -0
- {audiopod-1.1.0 → audiopod-1.2.0}/PKG-INFO +72 -14
- {audiopod-1.1.0 → audiopod-1.2.0}/README.md +71 -13
- {audiopod-1.1.0 → audiopod-1.2.0}/audiopod/__init__.py +1 -1
- {audiopod-1.1.0 → audiopod-1.2.0}/audiopod/models.py +21 -6
- audiopod-1.2.0/audiopod/services/translation.py +196 -0
- {audiopod-1.1.0 → audiopod-1.2.0}/audiopod/services/voice.py +169 -75
- {audiopod-1.1.0 → audiopod-1.2.0}/examples/basic_usage.py +120 -19
- {audiopod-1.1.0 → audiopod-1.2.0}/pyproject.toml +1 -1
- {audiopod-1.1.0 → audiopod-1.2.0}/setup.py +1 -1
- audiopod-1.1.0/CHANGELOG.md +0 -128
- audiopod-1.1.0/audiopod/services/translation.py +0 -81
- {audiopod-1.1.0 → audiopod-1.2.0}/LICENSE +0 -0
- {audiopod-1.1.0 → audiopod-1.2.0}/MANIFEST.in +0 -0
- {audiopod-1.1.0 → audiopod-1.2.0}/audiopod/cli.py +0 -0
- {audiopod-1.1.0 → audiopod-1.2.0}/audiopod/client.py +0 -0
- {audiopod-1.1.0 → audiopod-1.2.0}/audiopod/config.py +0 -0
- {audiopod-1.1.0 → audiopod-1.2.0}/audiopod/exceptions.py +0 -0
- {audiopod-1.1.0 → audiopod-1.2.0}/audiopod/py.typed +0 -0
- {audiopod-1.1.0 → audiopod-1.2.0}/audiopod/services/__init__.py +0 -0
- {audiopod-1.1.0 → audiopod-1.2.0}/audiopod/services/base.py +0 -0
- {audiopod-1.1.0 → audiopod-1.2.0}/audiopod/services/credits.py +0 -0
- {audiopod-1.1.0 → audiopod-1.2.0}/audiopod/services/denoiser.py +0 -0
- {audiopod-1.1.0 → audiopod-1.2.0}/audiopod/services/karaoke.py +0 -0
- {audiopod-1.1.0 → audiopod-1.2.0}/audiopod/services/music.py +0 -0
- {audiopod-1.1.0 → audiopod-1.2.0}/audiopod/services/speaker.py +0 -0
- {audiopod-1.1.0 → audiopod-1.2.0}/audiopod/services/stem_extraction.py +0 -0
- {audiopod-1.1.0 → audiopod-1.2.0}/audiopod/services/transcription.py +0 -0
- {audiopod-1.1.0 → audiopod-1.2.0}/audiopod.egg-info/SOURCES.txt +0 -0
- {audiopod-1.1.0 → audiopod-1.2.0}/examples/README.md +0 -0
- {audiopod-1.1.0 → audiopod-1.2.0}/requirements.txt +0 -0
- {audiopod-1.1.0 → audiopod-1.2.0}/setup.cfg +0 -0
- {audiopod-1.1.0 → audiopod-1.2.0}/tests/test_end_to_end_integration.py +0 -0
- {audiopod-1.1.0 → audiopod-1.2.0}/tests/test_sdk_api_compatibility.py +0 -0
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to the AudioPod Python SDK will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [1.2.0] - 2024-12-15
|
|
9
|
+
|
|
10
|
+
### 🎤 Unified Voice Generation
|
|
11
|
+
|
|
12
|
+
This release introduces a major architectural improvement with unified voice generation endpoint, consolidating voice cloning and text-to-speech into a single, consistent API.
|
|
13
|
+
|
|
14
|
+
### ✨ Added
|
|
15
|
+
|
|
16
|
+
- **Unified Voice Generation Method**: New `generate_voice()` method handles both voice cloning and TTS
|
|
17
|
+
- Single endpoint `/api/v1/voice/voices/{voice_identifier}/generate` for all voice operations
|
|
18
|
+
- Supports both voice file upload (cloning) and voice ID (existing profiles)
|
|
19
|
+
- Enhanced parameter support: `audio_format`, `generation_params`, extended speed range (0.25-4.0)
|
|
20
|
+
|
|
21
|
+
- **Enhanced Voice Generation Parameters**:
|
|
22
|
+
- `audio_format`: Support for 'mp3', 'wav', 'ogg' output formats
|
|
23
|
+
- `generation_params`: Provider-specific parameters (speed, temperature, pitch, etc.)
|
|
24
|
+
- Extended speed range: 0.25x to 4.0x (provider dependent)
|
|
25
|
+
|
|
26
|
+
### 🔧 Fixed
|
|
27
|
+
|
|
28
|
+
- **Removed Legacy Clone Endpoint**: No longer uses deprecated `/api/v1/voice/voice-clone`
|
|
29
|
+
- **Unified API Architecture**: All voice generation now uses consistent endpoint structure
|
|
30
|
+
- **Improved Error Handling**: Better validation for mutually exclusive parameters
|
|
31
|
+
|
|
32
|
+
### 🏗️ Improved
|
|
33
|
+
|
|
34
|
+
- **Backward Compatibility**: Existing `clone_voice()` and `generate_speech()` methods continue to work
|
|
35
|
+
- Legacy methods now internally use the unified `generate_voice()` approach
|
|
36
|
+
- No breaking changes for existing code
|
|
37
|
+
- Clear migration path with deprecation warnings in documentation
|
|
38
|
+
|
|
39
|
+
- **Enhanced Documentation**:
|
|
40
|
+
- Updated examples to showcase unified approach
|
|
41
|
+
- Clear distinction between recommended and legacy methods
|
|
42
|
+
- Comprehensive migration guide
|
|
43
|
+
|
|
44
|
+
### 🚀 Usage Examples
|
|
45
|
+
|
|
46
|
+
#### New Unified Approach (Recommended)
|
|
47
|
+
```python
|
|
48
|
+
# Voice cloning
|
|
49
|
+
result = client.voice.generate_voice(
|
|
50
|
+
text="Hello world!",
|
|
51
|
+
voice_file="voice.wav", # For cloning
|
|
52
|
+
language="en",
|
|
53
|
+
audio_format="mp3"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# TTS with existing voice
|
|
57
|
+
result = client.voice.generate_voice(
|
|
58
|
+
text="Hello world!",
|
|
59
|
+
voice_id="profile-id", # For existing voices
|
|
60
|
+
language="en",
|
|
61
|
+
audio_format="mp3"
|
|
62
|
+
)
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
#### Backward Compatibility (Legacy methods still work)
|
|
66
|
+
```python
|
|
67
|
+
# These continue to work unchanged
|
|
68
|
+
result = client.voice.clone_voice(voice_file="voice.wav", text="Hello")
|
|
69
|
+
result = client.voice.generate_speech(voice_id="profile-id", text="Hello")
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### 🔄 Migration Notes
|
|
73
|
+
|
|
74
|
+
- **No Breaking Changes**: All existing code continues to work without modification
|
|
75
|
+
- **Recommended**: Migrate to `generate_voice()` for new development
|
|
76
|
+
- **Performance**: Unified endpoint provides better consistency and reliability
|
|
77
|
+
|
|
78
|
+
---
|
|
79
|
+
|
|
80
|
+
## [1.1.1] - 2024-12-15
|
|
81
|
+
|
|
82
|
+
### 🔧 Translation Service Fixes
|
|
83
|
+
|
|
84
|
+
This release fixes the translation service to use the proper speech-to-speech translation endpoint and adds enhanced functionality.
|
|
85
|
+
|
|
86
|
+
### ✨ Added
|
|
87
|
+
|
|
88
|
+
- **Speech-to-Speech Translation**: Now uses the correct `/api/v1/translation/translate/speech` endpoint
|
|
89
|
+
- Preserves original speaker voice characteristics during translation
|
|
90
|
+
- Supports both audio and video file translation
|
|
91
|
+
- Maintains speaker separation in multi-speaker content
|
|
92
|
+
|
|
93
|
+
- **URL-Based Translation**: Support for translating audio/video from URLs
|
|
94
|
+
- Direct media URL support (YouTube, audio links, etc.)
|
|
95
|
+
- No need to download files locally first
|
|
96
|
+
|
|
97
|
+
- **Enhanced Translation Job Management**:
|
|
98
|
+
- `list_translation_jobs()` - List translation history with pagination
|
|
99
|
+
- `retry_translation()` - Retry failed translation jobs
|
|
100
|
+
- `delete_translation_job()` - Delete translation jobs
|
|
101
|
+
- `translate_speech()` - Alias method for clearer API
|
|
102
|
+
|
|
103
|
+
### 🔧 Fixed
|
|
104
|
+
|
|
105
|
+
- **Translation Endpoint**: Changed from generic `/translate` to speech-specific `/translate/speech`
|
|
106
|
+
- **API Schema Alignment**: Request and response formats now match the actual API
|
|
107
|
+
- **Response Model**: Updated `TranslationResult` to include all API response fields:
|
|
108
|
+
- `translated_audio_url` - Direct URL to translated audio
|
|
109
|
+
- `video_output_url` - Translated video output (when applicable)
|
|
110
|
+
- `transcript_urls` - Transcript files in multiple formats
|
|
111
|
+
- `display_name` - Original file display name
|
|
112
|
+
- `is_video` - Whether the input was a video file
|
|
113
|
+
|
|
114
|
+
### 🏗️ Improved
|
|
115
|
+
|
|
116
|
+
- **Better Error Handling**: Enhanced validation for file vs URL inputs
|
|
117
|
+
- **Backward Compatibility**: Maintained `audio_output_url` property for existing code
|
|
118
|
+
- **Enhanced Examples**: Updated documentation and examples to show new features
|
|
119
|
+
- **Type Safety**: Improved type hints and validation
|
|
120
|
+
|
|
121
|
+
### 📚 Documentation
|
|
122
|
+
|
|
123
|
+
- **Updated Examples**: `basic_usage.py` now demonstrates speech-to-speech translation
|
|
124
|
+
- **README Updates**: Corrected API usage examples with proper endpoint usage
|
|
125
|
+
- **Method Documentation**: Enhanced docstrings with accurate parameter descriptions
|
|
126
|
+
|
|
127
|
+
### 🚀 Usage Examples
|
|
128
|
+
|
|
129
|
+
#### Fixed Speech Translation
|
|
130
|
+
```python
|
|
131
|
+
# Speech-to-speech translation (preserves voice characteristics)
|
|
132
|
+
translation = client.translation.translate_speech(
|
|
133
|
+
audio_file="english_speech.wav",
|
|
134
|
+
target_language="es", # Spanish
|
|
135
|
+
source_language="en", # Optional - auto-detect
|
|
136
|
+
wait_for_completion=True
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
# URL-based translation
|
|
140
|
+
url_translation = client.translation.translate_speech(
|
|
141
|
+
url="https://example.com/audio.mp3",
|
|
142
|
+
target_language="fr", # French
|
|
143
|
+
wait_for_completion=True
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
# Job management
|
|
147
|
+
jobs = client.translation.list_translation_jobs(limit=10)
|
|
148
|
+
retry_job = client.translation.retry_translation(failed_job_id)
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
### 🔄 Migration Notes
|
|
152
|
+
|
|
153
|
+
- **No Breaking Changes**: Existing `translate_audio()` method continues to work
|
|
154
|
+
- **Enhanced Functionality**: Now uses proper speech-to-speech endpoint automatically
|
|
155
|
+
- **New Properties**: Additional response fields available in `TranslationResult`
|
|
156
|
+
|
|
157
|
+
---
|
|
158
|
+
|
|
159
|
+
## [1.1.0] - 2024-01-15
|
|
160
|
+
|
|
161
|
+
### 🎉 Major API Compatibility Update
|
|
162
|
+
|
|
163
|
+
This release brings full compatibility with the AudioPod v1 API specifications and includes significant improvements and new features.
|
|
164
|
+
|
|
165
|
+
### ✨ Added
|
|
166
|
+
|
|
167
|
+
- **New Stem Extraction Service**: Complete implementation of audio stem separation
|
|
168
|
+
- `StemExtractionService` with support for vocals, drums, bass, and instrument separation
|
|
169
|
+
- Support for both `htdemucs` and `htdemucs_6s` models
|
|
170
|
+
- Methods: `extract_stems()`, `get_stem_job()`, `list_stem_jobs()`, `delete_stem_job()`
|
|
171
|
+
|
|
172
|
+
- **Enhanced Music Generation**: New vocals generation capability
|
|
173
|
+
- `generate_vocals()` method for lyric-to-vocals generation
|
|
174
|
+
- Supports the `/api/v1/music/lyric2vocals` endpoint
|
|
175
|
+
|
|
176
|
+
- **Comprehensive Test Suite**: Production-ready testing framework
|
|
177
|
+
- End-to-end integration tests (`test_end_to_end_integration.py`)
|
|
178
|
+
- API compatibility validation tests (`test_sdk_api_compatibility.py`)
|
|
179
|
+
- Complete SDK structure validation (`validate_sdk_structure.py`)
|
|
180
|
+
- Comprehensive test runner (`test_sdk_comprehensive.py`)
|
|
181
|
+
|
|
182
|
+
### 🔧 Fixed
|
|
183
|
+
|
|
184
|
+
- **Music Service API Schema Alignment**: Critical fixes for API compatibility
|
|
185
|
+
- Fixed parameter names: `duration` → `audio_duration`
|
|
186
|
+
- Fixed parameter names: `num_inference_steps` → `infer_step`
|
|
187
|
+
- Fixed parameter names: `seed` → `manual_seeds` (now accepts list)
|
|
188
|
+
- Fixed response handling to properly extract `job` object from API responses
|
|
189
|
+
|
|
190
|
+
- **Enhanced Music Generation Methods**: Improved existing capabilities
|
|
191
|
+
- `generate_music()`: Now uses correct API schema parameters
|
|
192
|
+
- `generate_rap()`: Enhanced with proper prompt construction and LoRA support
|
|
193
|
+
- `generate_instrumental()`: Improved parameter mapping
|
|
194
|
+
- `list_music_jobs()`: Fixed pagination parameter (`offset` → `skip`)
|
|
195
|
+
|
|
196
|
+
- **Response Format Handling**: Proper API response parsing
|
|
197
|
+
- All music generation endpoints now correctly handle `{"job": {...}, "message": "..."}` response format
|
|
198
|
+
- Improved error handling and status checking
|
|
199
|
+
|
|
200
|
+
### 🏗️ Improved
|
|
201
|
+
|
|
202
|
+
- **Service Integration**: Better organization and accessibility
|
|
203
|
+
- All services properly integrated in both sync and async clients
|
|
204
|
+
- Enhanced error handling across all services
|
|
205
|
+
- Improved parameter validation
|
|
206
|
+
|
|
207
|
+
- **Code Quality**: Enhanced maintainability and reliability
|
|
208
|
+
- Better type hints and documentation
|
|
209
|
+
- Improved error messages
|
|
210
|
+
- Enhanced validation for all input parameters
|
|
211
|
+
|
|
212
|
+
### 📚 Documentation
|
|
213
|
+
|
|
214
|
+
- **Comprehensive Fix Documentation**: Detailed improvement summary
|
|
215
|
+
- Complete documentation of all changes in `SDK_FIXES_SUMMARY.md`
|
|
216
|
+
- Usage examples for all new features
|
|
217
|
+
- Migration guide (no breaking changes)
|
|
218
|
+
|
|
219
|
+
- **Testing Documentation**: Complete testing framework
|
|
220
|
+
- Instructions for running validation tests
|
|
221
|
+
- API compatibility verification procedures
|
|
222
|
+
- External developer onboarding documentation
|
|
223
|
+
|
|
224
|
+
### 🔒 Validation
|
|
225
|
+
|
|
226
|
+
- **100% Structure Validation Success**: All improvements verified
|
|
227
|
+
- 9/9 validation checks passed
|
|
228
|
+
- Complete API endpoint compatibility confirmed
|
|
229
|
+
- All services properly integrated and functional
|
|
230
|
+
|
|
231
|
+
### 🚀 Usage Examples
|
|
232
|
+
|
|
233
|
+
#### New Stem Extraction
|
|
234
|
+
```python
|
|
235
|
+
# Extract audio stems
|
|
236
|
+
job = client.stem_extraction.extract_stems(
|
|
237
|
+
audio_file="song.wav",
|
|
238
|
+
stem_types=["vocals", "drums", "bass", "other"],
|
|
239
|
+
model_name="htdemucs",
|
|
240
|
+
wait_for_completion=True
|
|
241
|
+
)
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
#### Enhanced Music Generation
|
|
245
|
+
```python
|
|
246
|
+
# Generate vocals from lyrics
|
|
247
|
+
vocals_job = client.music.generate_vocals(
|
|
248
|
+
lyrics="Your song lyrics here",
|
|
249
|
+
prompt="pop vocals, female voice",
|
|
250
|
+
duration=120.0
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
# Improved music generation with correct parameters
|
|
254
|
+
music_job = client.music.generate_music(
|
|
255
|
+
prompt="upbeat electronic dance music",
|
|
256
|
+
duration=120.0, # Now correctly maps to audio_duration
|
|
257
|
+
guidance_scale=7.5,
|
|
258
|
+
num_inference_steps=50, # Now correctly maps to infer_step
|
|
259
|
+
seed=12345 # Now correctly maps to manual_seeds=[12345]
|
|
260
|
+
)
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
### 🔄 Migration Notes
|
|
264
|
+
|
|
265
|
+
- **No Breaking Changes**: All existing code continues to work
|
|
266
|
+
- **Improved Reliability**: Better error handling and API compatibility
|
|
267
|
+
- **Enhanced Features**: New capabilities available immediately
|
|
268
|
+
|
|
269
|
+
---
|
|
270
|
+
|
|
271
|
+
## [1.0.0] - 2024-01-01
|
|
272
|
+
|
|
273
|
+
### 🎉 Initial Release
|
|
274
|
+
|
|
275
|
+
- Initial implementation of AudioPod Python SDK
|
|
276
|
+
- Support for voice cloning, music generation, transcription, and translation
|
|
277
|
+
- Async and sync client implementations
|
|
278
|
+
- Basic API integration and authentication
|
|
279
|
+
- Core service implementations
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: audiopod
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.0
|
|
4
4
|
Summary: Professional Audio Processing API Client for Python
|
|
5
5
|
Home-page: https://github.com/audiopod-ai/audiopod-python
|
|
6
6
|
Author: AudioPod AI
|
|
@@ -95,7 +95,7 @@ client = audiopod.Client(api_key="ap_your_api_key_here")
|
|
|
95
95
|
|
|
96
96
|
### Basic Usage
|
|
97
97
|
|
|
98
|
-
#### Voice Cloning
|
|
98
|
+
#### Voice Generation (Unified TTS & Cloning)
|
|
99
99
|
|
|
100
100
|
```python
|
|
101
101
|
import audiopod
|
|
@@ -103,15 +103,39 @@ import audiopod
|
|
|
103
103
|
# Initialize client
|
|
104
104
|
client = audiopod.Client()
|
|
105
105
|
|
|
106
|
-
#
|
|
107
|
-
job = client.voice.
|
|
106
|
+
# Generate voice using file cloning (unified approach)
|
|
107
|
+
job = client.voice.generate_voice(
|
|
108
|
+
text="Hello! This is voice generation using a cloned voice.",
|
|
109
|
+
voice_file="path/to/voice_sample.wav", # For voice cloning
|
|
110
|
+
language="en",
|
|
111
|
+
audio_format="mp3",
|
|
112
|
+
generation_params={
|
|
113
|
+
"speed": 1.0
|
|
114
|
+
},
|
|
115
|
+
wait_for_completion=True
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
print(f"Generated audio URL: {job.output_url}")
|
|
119
|
+
|
|
120
|
+
# Generate speech with existing voice profile (unified approach)
|
|
121
|
+
speech = client.voice.generate_voice(
|
|
122
|
+
text="Hello from my voice profile!",
|
|
123
|
+
voice_id="voice-profile-id", # For existing voice profiles
|
|
124
|
+
language="en",
|
|
125
|
+
audio_format="mp3",
|
|
126
|
+
generation_params={
|
|
127
|
+
"speed": 1.0
|
|
128
|
+
},
|
|
129
|
+
wait_for_completion=True
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
# Backward compatibility methods (deprecated - use generate_voice instead)
|
|
133
|
+
legacy_clone = client.voice.clone_voice(
|
|
108
134
|
voice_file="path/to/voice_sample.wav",
|
|
109
135
|
text="Hello! This is a cloned voice speaking.",
|
|
110
136
|
language="en",
|
|
111
137
|
wait_for_completion=True
|
|
112
138
|
)
|
|
113
|
-
|
|
114
|
-
print(f"Generated audio URL: {job['output_url']}")
|
|
115
139
|
```
|
|
116
140
|
|
|
117
141
|
#### Music Generation
|
|
@@ -142,17 +166,25 @@ print(f"Transcript: {transcript.transcript}")
|
|
|
142
166
|
print(f"Detected {len(transcript.segments)} speakers")
|
|
143
167
|
```
|
|
144
168
|
|
|
145
|
-
####
|
|
169
|
+
#### Speech-to-Speech Translation
|
|
146
170
|
|
|
147
171
|
```python
|
|
148
|
-
# Translate
|
|
149
|
-
translation = client.translation.
|
|
172
|
+
# Translate speech while preserving voice characteristics
|
|
173
|
+
translation = client.translation.translate_speech(
|
|
150
174
|
audio_file="path/to/english_audio.wav",
|
|
151
175
|
target_language="es", # Spanish
|
|
176
|
+
source_language="en", # English (optional - auto-detect)
|
|
152
177
|
wait_for_completion=True
|
|
153
178
|
)
|
|
154
179
|
|
|
155
|
-
print(f"Translated audio URL: {translation.
|
|
180
|
+
print(f"Translated audio URL: {translation.translated_audio_url}")
|
|
181
|
+
|
|
182
|
+
# Or translate from URL
|
|
183
|
+
url_translation = client.translation.translate_speech(
|
|
184
|
+
url="https://example.com/audio.mp3",
|
|
185
|
+
target_language="fr", # French
|
|
186
|
+
wait_for_completion=True
|
|
187
|
+
)
|
|
156
188
|
```
|
|
157
189
|
|
|
158
190
|
### Async Support
|
|
@@ -189,10 +221,22 @@ voice_profile = client.voice.create_voice_profile(
|
|
|
189
221
|
wait_for_completion=True
|
|
190
222
|
)
|
|
191
223
|
|
|
192
|
-
# Use the voice profile for speech generation
|
|
193
|
-
speech = client.voice.
|
|
224
|
+
# Use the voice profile for speech generation (unified approach - recommended)
|
|
225
|
+
speech = client.voice.generate_voice(
|
|
226
|
+
text="This uses my custom voice profile with the unified method!",
|
|
194
227
|
voice_id=voice_profile.id,
|
|
195
|
-
|
|
228
|
+
language="en",
|
|
229
|
+
audio_format="mp3",
|
|
230
|
+
generation_params={
|
|
231
|
+
"speed": 1.0
|
|
232
|
+
},
|
|
233
|
+
wait_for_completion=True
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
# Legacy method (still works - uses generate_voice internally)
|
|
237
|
+
legacy_speech = client.voice.generate_speech(
|
|
238
|
+
voice_id=voice_profile.id,
|
|
239
|
+
text="This uses the legacy method.",
|
|
196
240
|
wait_for_completion=True
|
|
197
241
|
)
|
|
198
242
|
```
|
|
@@ -335,7 +379,7 @@ client = audiopod.Client(
|
|
|
335
379
|
|
|
336
380
|
### Services
|
|
337
381
|
|
|
338
|
-
- `client.voice`: Voice
|
|
382
|
+
- `client.voice`: **Voice generation operations** (unified TTS & cloning using `generate_voice()`)
|
|
339
383
|
- `client.music`: Music generation and editing
|
|
340
384
|
- `client.transcription`: Speech-to-text transcription
|
|
341
385
|
- `client.translation`: Audio/video translation
|
|
@@ -344,6 +388,20 @@ client = audiopod.Client(
|
|
|
344
388
|
- `client.karaoke`: Karaoke video generation
|
|
345
389
|
- `client.credits`: Credit management and usage tracking
|
|
346
390
|
|
|
391
|
+
#### Voice Service Methods
|
|
392
|
+
|
|
393
|
+
**Recommended (Unified Approach):**
|
|
394
|
+
- `client.voice.generate_voice()` - Generate speech with voice file (cloning) or voice ID (TTS)
|
|
395
|
+
|
|
396
|
+
**Legacy Methods (Backward Compatibility):**
|
|
397
|
+
- `client.voice.clone_voice()` - Clone voice from audio file (deprecated, uses `generate_voice` internally)
|
|
398
|
+
- `client.voice.generate_speech()` - Generate speech with voice profile (deprecated, uses `generate_voice` internally)
|
|
399
|
+
|
|
400
|
+
**Voice Management:**
|
|
401
|
+
- `client.voice.create_voice_profile()` - Create reusable voice profiles
|
|
402
|
+
- `client.voice.list_voice_profiles()` - List available voice profiles
|
|
403
|
+
- `client.voice.delete_voice_profile()` - Delete voice profiles
|
|
404
|
+
|
|
347
405
|
### Models
|
|
348
406
|
|
|
349
407
|
- `Job`: Base job information and status
|
|
@@ -43,7 +43,7 @@ client = audiopod.Client(api_key="ap_your_api_key_here")
|
|
|
43
43
|
|
|
44
44
|
### Basic Usage
|
|
45
45
|
|
|
46
|
-
#### Voice Cloning
|
|
46
|
+
#### Voice Generation (Unified TTS & Cloning)
|
|
47
47
|
|
|
48
48
|
```python
|
|
49
49
|
import audiopod
|
|
@@ -51,15 +51,39 @@ import audiopod
|
|
|
51
51
|
# Initialize client
|
|
52
52
|
client = audiopod.Client()
|
|
53
53
|
|
|
54
|
-
#
|
|
55
|
-
job = client.voice.
|
|
54
|
+
# Generate voice using file cloning (unified approach)
|
|
55
|
+
job = client.voice.generate_voice(
|
|
56
|
+
text="Hello! This is voice generation using a cloned voice.",
|
|
57
|
+
voice_file="path/to/voice_sample.wav", # For voice cloning
|
|
58
|
+
language="en",
|
|
59
|
+
audio_format="mp3",
|
|
60
|
+
generation_params={
|
|
61
|
+
"speed": 1.0
|
|
62
|
+
},
|
|
63
|
+
wait_for_completion=True
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
print(f"Generated audio URL: {job.output_url}")
|
|
67
|
+
|
|
68
|
+
# Generate speech with existing voice profile (unified approach)
|
|
69
|
+
speech = client.voice.generate_voice(
|
|
70
|
+
text="Hello from my voice profile!",
|
|
71
|
+
voice_id="voice-profile-id", # For existing voice profiles
|
|
72
|
+
language="en",
|
|
73
|
+
audio_format="mp3",
|
|
74
|
+
generation_params={
|
|
75
|
+
"speed": 1.0
|
|
76
|
+
},
|
|
77
|
+
wait_for_completion=True
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
# Backward compatibility methods (deprecated - use generate_voice instead)
|
|
81
|
+
legacy_clone = client.voice.clone_voice(
|
|
56
82
|
voice_file="path/to/voice_sample.wav",
|
|
57
83
|
text="Hello! This is a cloned voice speaking.",
|
|
58
84
|
language="en",
|
|
59
85
|
wait_for_completion=True
|
|
60
86
|
)
|
|
61
|
-
|
|
62
|
-
print(f"Generated audio URL: {job['output_url']}")
|
|
63
87
|
```
|
|
64
88
|
|
|
65
89
|
#### Music Generation
|
|
@@ -90,17 +114,25 @@ print(f"Transcript: {transcript.transcript}")
|
|
|
90
114
|
print(f"Detected {len(transcript.segments)} speakers")
|
|
91
115
|
```
|
|
92
116
|
|
|
93
|
-
####
|
|
117
|
+
#### Speech-to-Speech Translation
|
|
94
118
|
|
|
95
119
|
```python
|
|
96
|
-
# Translate
|
|
97
|
-
translation = client.translation.
|
|
120
|
+
# Translate speech while preserving voice characteristics
|
|
121
|
+
translation = client.translation.translate_speech(
|
|
98
122
|
audio_file="path/to/english_audio.wav",
|
|
99
123
|
target_language="es", # Spanish
|
|
124
|
+
source_language="en", # English (optional - auto-detect)
|
|
100
125
|
wait_for_completion=True
|
|
101
126
|
)
|
|
102
127
|
|
|
103
|
-
print(f"Translated audio URL: {translation.
|
|
128
|
+
print(f"Translated audio URL: {translation.translated_audio_url}")
|
|
129
|
+
|
|
130
|
+
# Or translate from URL
|
|
131
|
+
url_translation = client.translation.translate_speech(
|
|
132
|
+
url="https://example.com/audio.mp3",
|
|
133
|
+
target_language="fr", # French
|
|
134
|
+
wait_for_completion=True
|
|
135
|
+
)
|
|
104
136
|
```
|
|
105
137
|
|
|
106
138
|
### Async Support
|
|
@@ -137,10 +169,22 @@ voice_profile = client.voice.create_voice_profile(
|
|
|
137
169
|
wait_for_completion=True
|
|
138
170
|
)
|
|
139
171
|
|
|
140
|
-
# Use the voice profile for speech generation
|
|
141
|
-
speech = client.voice.
|
|
172
|
+
# Use the voice profile for speech generation (unified approach - recommended)
|
|
173
|
+
speech = client.voice.generate_voice(
|
|
174
|
+
text="This uses my custom voice profile with the unified method!",
|
|
142
175
|
voice_id=voice_profile.id,
|
|
143
|
-
|
|
176
|
+
language="en",
|
|
177
|
+
audio_format="mp3",
|
|
178
|
+
generation_params={
|
|
179
|
+
"speed": 1.0
|
|
180
|
+
},
|
|
181
|
+
wait_for_completion=True
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
# Legacy method (still works - uses generate_voice internally)
|
|
185
|
+
legacy_speech = client.voice.generate_speech(
|
|
186
|
+
voice_id=voice_profile.id,
|
|
187
|
+
text="This uses the legacy method.",
|
|
144
188
|
wait_for_completion=True
|
|
145
189
|
)
|
|
146
190
|
```
|
|
@@ -283,7 +327,7 @@ client = audiopod.Client(
|
|
|
283
327
|
|
|
284
328
|
### Services
|
|
285
329
|
|
|
286
|
-
- `client.voice`: Voice
|
|
330
|
+
- `client.voice`: **Voice generation operations** (unified TTS & cloning using `generate_voice()`)
|
|
287
331
|
- `client.music`: Music generation and editing
|
|
288
332
|
- `client.transcription`: Speech-to-text transcription
|
|
289
333
|
- `client.translation`: Audio/video translation
|
|
@@ -292,6 +336,20 @@ client = audiopod.Client(
|
|
|
292
336
|
- `client.karaoke`: Karaoke video generation
|
|
293
337
|
- `client.credits`: Credit management and usage tracking
|
|
294
338
|
|
|
339
|
+
#### Voice Service Methods
|
|
340
|
+
|
|
341
|
+
**Recommended (Unified Approach):**
|
|
342
|
+
- `client.voice.generate_voice()` - Generate speech with voice file (cloning) or voice ID (TTS)
|
|
343
|
+
|
|
344
|
+
**Legacy Methods (Backward Compatibility):**
|
|
345
|
+
- `client.voice.clone_voice()` - Clone voice from audio file (deprecated, uses `generate_voice` internally)
|
|
346
|
+
- `client.voice.generate_speech()` - Generate speech with voice profile (deprecated, uses `generate_voice` internally)
|
|
347
|
+
|
|
348
|
+
**Voice Management:**
|
|
349
|
+
- `client.voice.create_voice_profile()` - Create reusable voice profiles
|
|
350
|
+
- `client.voice.list_voice_profiles()` - List available voice profiles
|
|
351
|
+
- `client.voice.delete_voice_profile()` - Delete voice profiles
|
|
352
|
+
|
|
295
353
|
### Models
|
|
296
354
|
|
|
297
355
|
- `Job`: Base job information and status
|
|
@@ -151,13 +151,18 @@ class MusicGenerationResult:
|
|
|
151
151
|
|
|
152
152
|
@dataclass
|
|
153
153
|
class TranslationResult:
|
|
154
|
-
"""
|
|
154
|
+
"""Speech translation job result"""
|
|
155
155
|
job: Job
|
|
156
156
|
source_language: Optional[str] = None
|
|
157
157
|
target_language: Optional[str] = None
|
|
158
|
-
|
|
159
|
-
|
|
158
|
+
display_name: Optional[str] = None
|
|
159
|
+
audio_output_path: Optional[str] = None
|
|
160
|
+
video_output_path: Optional[str] = None
|
|
160
161
|
transcript_path: Optional[str] = None
|
|
162
|
+
translated_audio_url: Optional[str] = None
|
|
163
|
+
video_output_url: Optional[str] = None
|
|
164
|
+
transcript_urls: Optional[Dict[str, str]] = None
|
|
165
|
+
is_video: bool = False
|
|
161
166
|
|
|
162
167
|
@classmethod
|
|
163
168
|
def from_dict(cls, data: Dict[str, Any]) -> 'TranslationResult':
|
|
@@ -166,10 +171,20 @@ class TranslationResult:
|
|
|
166
171
|
job=Job.from_dict(data),
|
|
167
172
|
source_language=data.get('source_language'),
|
|
168
173
|
target_language=data.get('target_language'),
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
174
|
+
display_name=data.get('display_name'),
|
|
175
|
+
audio_output_path=data.get('audio_output_path'),
|
|
176
|
+
video_output_path=data.get('video_output_path'),
|
|
177
|
+
transcript_path=data.get('transcript_path'),
|
|
178
|
+
translated_audio_url=data.get('translated_audio_url'),
|
|
179
|
+
video_output_url=data.get('video_output_url'),
|
|
180
|
+
transcript_urls=data.get('transcript_urls'),
|
|
181
|
+
is_video=data.get('is_video', False)
|
|
172
182
|
)
|
|
183
|
+
|
|
184
|
+
@property
|
|
185
|
+
def audio_output_url(self) -> Optional[str]:
|
|
186
|
+
"""Backward compatibility property - returns translated_audio_url"""
|
|
187
|
+
return self.translated_audio_url
|
|
173
188
|
|
|
174
189
|
|
|
175
190
|
@dataclass
|