audiopod 1.0.0__tar.gz → 1.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- audiopod-1.1.1/CHANGELOG.md +207 -0
- audiopod-1.1.1/LICENSE +21 -0
- audiopod-1.1.1/MANIFEST.in +25 -0
- {audiopod-1.0.0 → audiopod-1.1.1}/PKG-INFO +17 -8
- {audiopod-1.0.0 → audiopod-1.1.1}/README.md +14 -7
- {audiopod-1.0.0 → audiopod-1.1.1}/audiopod/__init__.py +1 -1
- {audiopod-1.0.0 → audiopod-1.1.1}/audiopod/client.py +4 -1
- {audiopod-1.0.0 → audiopod-1.1.1}/audiopod/models.py +21 -6
- audiopod-1.1.1/audiopod/py.typed +2 -0
- {audiopod-1.0.0 → audiopod-1.1.1}/audiopod/services/__init__.py +3 -1
- {audiopod-1.0.0 → audiopod-1.1.1}/audiopod/services/music.py +104 -16
- audiopod-1.1.1/audiopod/services/stem_extraction.py +180 -0
- audiopod-1.1.1/audiopod/services/translation.py +196 -0
- {audiopod-1.0.0 → audiopod-1.1.1}/audiopod.egg-info/SOURCES.txt +11 -8
- audiopod-1.1.1/examples/README.md +81 -0
- audiopod-1.1.1/examples/basic_usage.py +471 -0
- {audiopod-1.0.0 → audiopod-1.1.1}/pyproject.toml +1 -1
- audiopod-1.1.1/requirements.txt +7 -0
- {audiopod-1.0.0 → audiopod-1.1.1}/setup.py +1 -1
- audiopod-1.1.1/tests/test_end_to_end_integration.py +617 -0
- audiopod-1.1.1/tests/test_sdk_api_compatibility.py +892 -0
- audiopod-1.0.0/audiopod/services/translation.py +0 -81
- audiopod-1.0.0/audiopod.egg-info/PKG-INFO +0 -395
- audiopod-1.0.0/audiopod.egg-info/dependency_links.txt +0 -1
- audiopod-1.0.0/audiopod.egg-info/entry_points.txt +0 -2
- audiopod-1.0.0/audiopod.egg-info/not-zip-safe +0 -1
- audiopod-1.0.0/audiopod.egg-info/requires.txt +0 -21
- audiopod-1.0.0/audiopod.egg-info/top_level.txt +0 -1
- {audiopod-1.0.0 → audiopod-1.1.1}/audiopod/cli.py +0 -0
- {audiopod-1.0.0 → audiopod-1.1.1}/audiopod/config.py +0 -0
- {audiopod-1.0.0 → audiopod-1.1.1}/audiopod/exceptions.py +0 -0
- {audiopod-1.0.0 → audiopod-1.1.1}/audiopod/services/base.py +0 -0
- {audiopod-1.0.0 → audiopod-1.1.1}/audiopod/services/credits.py +0 -0
- {audiopod-1.0.0 → audiopod-1.1.1}/audiopod/services/denoiser.py +0 -0
- {audiopod-1.0.0 → audiopod-1.1.1}/audiopod/services/karaoke.py +0 -0
- {audiopod-1.0.0 → audiopod-1.1.1}/audiopod/services/speaker.py +0 -0
- {audiopod-1.0.0 → audiopod-1.1.1}/audiopod/services/transcription.py +0 -0
- {audiopod-1.0.0 → audiopod-1.1.1}/audiopod/services/voice.py +0 -0
- {audiopod-1.0.0 → audiopod-1.1.1}/setup.cfg +0 -0
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to the AudioPod Python SDK will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [1.1.1] - 2024-12-15
|
|
9
|
+
|
|
10
|
+
### 🔧 Translation Service Fixes
|
|
11
|
+
|
|
12
|
+
This release fixes the translation service to use the proper speech-to-speech translation endpoint and adds enhanced functionality.
|
|
13
|
+
|
|
14
|
+
### ✨ Added
|
|
15
|
+
|
|
16
|
+
- **Speech-to-Speech Translation**: Now uses the correct `/api/v1/translation/translate/speech` endpoint
|
|
17
|
+
- Preserves original speaker voice characteristics during translation
|
|
18
|
+
- Supports both audio and video file translation
|
|
19
|
+
- Maintains speaker separation in multi-speaker content
|
|
20
|
+
|
|
21
|
+
- **URL-Based Translation**: Support for translating audio/video from URLs
|
|
22
|
+
- Direct media URL support (YouTube, audio links, etc.)
|
|
23
|
+
- No need to download files locally first
|
|
24
|
+
|
|
25
|
+
- **Enhanced Translation Job Management**:
|
|
26
|
+
- `list_translation_jobs()` - List translation history with pagination
|
|
27
|
+
- `retry_translation()` - Retry failed translation jobs
|
|
28
|
+
- `delete_translation_job()` - Delete translation jobs
|
|
29
|
+
- `translate_speech()` - Alias method for clearer API
|
|
30
|
+
|
|
31
|
+
### 🔧 Fixed
|
|
32
|
+
|
|
33
|
+
- **Translation Endpoint**: Changed from generic `/translate` to speech-specific `/translate/speech`
|
|
34
|
+
- **API Schema Alignment**: Request and response formats now match the actual API
|
|
35
|
+
- **Response Model**: Updated `TranslationResult` to include all API response fields:
|
|
36
|
+
- `translated_audio_url` - Direct URL to translated audio
|
|
37
|
+
- `video_output_url` - Translated video output (when applicable)
|
|
38
|
+
- `transcript_urls` - Transcript files in multiple formats
|
|
39
|
+
- `display_name` - Original file display name
|
|
40
|
+
- `is_video` - Whether the input was a video file
|
|
41
|
+
|
|
42
|
+
### 🏗️ Improved
|
|
43
|
+
|
|
44
|
+
- **Better Error Handling**: Enhanced validation for file vs URL inputs
|
|
45
|
+
- **Backward Compatibility**: Maintained `audio_output_url` property for existing code
|
|
46
|
+
- **Enhanced Examples**: Updated documentation and examples to show new features
|
|
47
|
+
- **Type Safety**: Improved type hints and validation
|
|
48
|
+
|
|
49
|
+
### 📚 Documentation
|
|
50
|
+
|
|
51
|
+
- **Updated Examples**: `basic_usage.py` now demonstrates speech-to-speech translation
|
|
52
|
+
- **README Updates**: Corrected API usage examples with proper endpoint usage
|
|
53
|
+
- **Method Documentation**: Enhanced docstrings with accurate parameter descriptions
|
|
54
|
+
|
|
55
|
+
### 🚀 Usage Examples
|
|
56
|
+
|
|
57
|
+
#### Fixed Speech Translation
|
|
58
|
+
```python
|
|
59
|
+
# Speech-to-speech translation (preserves voice characteristics)
|
|
60
|
+
translation = client.translation.translate_speech(
|
|
61
|
+
audio_file="english_speech.wav",
|
|
62
|
+
target_language="es", # Spanish
|
|
63
|
+
source_language="en", # Optional - auto-detect
|
|
64
|
+
wait_for_completion=True
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# URL-based translation
|
|
68
|
+
url_translation = client.translation.translate_speech(
|
|
69
|
+
url="https://example.com/audio.mp3",
|
|
70
|
+
target_language="fr", # French
|
|
71
|
+
wait_for_completion=True
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
# Job management
|
|
75
|
+
jobs = client.translation.list_translation_jobs(limit=10)
|
|
76
|
+
retry_job = client.translation.retry_translation(failed_job_id)
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### 🔄 Migration Notes
|
|
80
|
+
|
|
81
|
+
- **No Breaking Changes**: Existing `translate_audio()` method continues to work
|
|
82
|
+
- **Enhanced Functionality**: Now uses proper speech-to-speech endpoint automatically
|
|
83
|
+
- **New Properties**: Additional response fields available in `TranslationResult`
|
|
84
|
+
|
|
85
|
+
---
|
|
86
|
+
|
|
87
|
+
## [1.1.0] - 2024-01-15
|
|
88
|
+
|
|
89
|
+
### 🎉 Major API Compatibility Update
|
|
90
|
+
|
|
91
|
+
This release brings full compatibility with the AudioPod v1 API specifications and includes significant improvements and new features.
|
|
92
|
+
|
|
93
|
+
### ✨ Added
|
|
94
|
+
|
|
95
|
+
- **New Stem Extraction Service**: Complete implementation of audio stem separation
|
|
96
|
+
- `StemExtractionService` with support for vocals, drums, bass, and instrument separation
|
|
97
|
+
- Support for both `htdemucs` and `htdemucs_6s` models
|
|
98
|
+
- Methods: `extract_stems()`, `get_stem_job()`, `list_stem_jobs()`, `delete_stem_job()`
|
|
99
|
+
|
|
100
|
+
- **Enhanced Music Generation**: New vocals generation capability
|
|
101
|
+
- `generate_vocals()` method for lyric-to-vocals generation
|
|
102
|
+
- Supports the `/api/v1/music/lyric2vocals` endpoint
|
|
103
|
+
|
|
104
|
+
- **Comprehensive Test Suite**: Production-ready testing framework
|
|
105
|
+
- End-to-end integration tests (`test_end_to_end_integration.py`)
|
|
106
|
+
- API compatibility validation tests (`test_sdk_api_compatibility.py`)
|
|
107
|
+
- Complete SDK structure validation (`validate_sdk_structure.py`)
|
|
108
|
+
- Comprehensive test runner (`test_sdk_comprehensive.py`)
|
|
109
|
+
|
|
110
|
+
### 🔧 Fixed
|
|
111
|
+
|
|
112
|
+
- **Music Service API Schema Alignment**: Critical fixes for API compatibility
|
|
113
|
+
- Fixed parameter names: `duration` → `audio_duration`
|
|
114
|
+
- Fixed parameter names: `num_inference_steps` → `infer_step`
|
|
115
|
+
- Fixed parameter names: `seed` → `manual_seeds` (now accepts list)
|
|
116
|
+
- Fixed response handling to properly extract `job` object from API responses
|
|
117
|
+
|
|
118
|
+
- **Enhanced Music Generation Methods**: Improved existing capabilities
|
|
119
|
+
- `generate_music()`: Now uses correct API schema parameters
|
|
120
|
+
- `generate_rap()`: Enhanced with proper prompt construction and LoRA support
|
|
121
|
+
- `generate_instrumental()`: Improved parameter mapping
|
|
122
|
+
- `list_music_jobs()`: Fixed pagination parameter (`offset` → `skip`)
|
|
123
|
+
|
|
124
|
+
- **Response Format Handling**: Proper API response parsing
|
|
125
|
+
- All music generation endpoints now correctly handle `{"job": {...}, "message": "..."}` response format
|
|
126
|
+
- Improved error handling and status checking
|
|
127
|
+
|
|
128
|
+
### 🏗️ Improved
|
|
129
|
+
|
|
130
|
+
- **Service Integration**: Better organization and accessibility
|
|
131
|
+
- All services properly integrated in both sync and async clients
|
|
132
|
+
- Enhanced error handling across all services
|
|
133
|
+
- Improved parameter validation
|
|
134
|
+
|
|
135
|
+
- **Code Quality**: Enhanced maintainability and reliability
|
|
136
|
+
- Better type hints and documentation
|
|
137
|
+
- Improved error messages
|
|
138
|
+
- Enhanced validation for all input parameters
|
|
139
|
+
|
|
140
|
+
### 📚 Documentation
|
|
141
|
+
|
|
142
|
+
- **Comprehensive Fix Documentation**: Detailed improvement summary
|
|
143
|
+
- Complete documentation of all changes in `SDK_FIXES_SUMMARY.md`
|
|
144
|
+
- Usage examples for all new features
|
|
145
|
+
- Migration guide (no breaking changes)
|
|
146
|
+
|
|
147
|
+
- **Testing Documentation**: Complete testing framework
|
|
148
|
+
- Instructions for running validation tests
|
|
149
|
+
- API compatibility verification procedures
|
|
150
|
+
- External developer onboarding documentation
|
|
151
|
+
|
|
152
|
+
### 🔒 Validation
|
|
153
|
+
|
|
154
|
+
- **100% Structure Validation Success**: All improvements verified
|
|
155
|
+
- 9/9 validation checks passed
|
|
156
|
+
- Complete API endpoint compatibility confirmed
|
|
157
|
+
- All services properly integrated and functional
|
|
158
|
+
|
|
159
|
+
### 🚀 Usage Examples
|
|
160
|
+
|
|
161
|
+
#### New Stem Extraction
|
|
162
|
+
```python
|
|
163
|
+
# Extract audio stems
|
|
164
|
+
job = client.stem_extraction.extract_stems(
|
|
165
|
+
audio_file="song.wav",
|
|
166
|
+
stem_types=["vocals", "drums", "bass", "other"],
|
|
167
|
+
model_name="htdemucs",
|
|
168
|
+
wait_for_completion=True
|
|
169
|
+
)
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
#### Enhanced Music Generation
|
|
173
|
+
```python
|
|
174
|
+
# Generate vocals from lyrics
|
|
175
|
+
vocals_job = client.music.generate_vocals(
|
|
176
|
+
lyrics="Your song lyrics here",
|
|
177
|
+
prompt="pop vocals, female voice",
|
|
178
|
+
duration=120.0
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
# Improved music generation with correct parameters
|
|
182
|
+
music_job = client.music.generate_music(
|
|
183
|
+
prompt="upbeat electronic dance music",
|
|
184
|
+
duration=120.0, # Now correctly maps to audio_duration
|
|
185
|
+
guidance_scale=7.5,
|
|
186
|
+
num_inference_steps=50, # Now correctly maps to infer_step
|
|
187
|
+
seed=12345 # Now correctly maps to manual_seeds=[12345]
|
|
188
|
+
)
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
### 🔄 Migration Notes
|
|
192
|
+
|
|
193
|
+
- **No Breaking Changes**: All existing code continues to work
|
|
194
|
+
- **Improved Reliability**: Better error handling and API compatibility
|
|
195
|
+
- **Enhanced Features**: New capabilities available immediately
|
|
196
|
+
|
|
197
|
+
---
|
|
198
|
+
|
|
199
|
+
## [1.0.0] - 2024-01-01
|
|
200
|
+
|
|
201
|
+
### 🎉 Initial Release
|
|
202
|
+
|
|
203
|
+
- Initial implementation of AudioPod Python SDK
|
|
204
|
+
- Support for voice cloning, music generation, transcription, and translation
|
|
205
|
+
- Async and sync client implementations
|
|
206
|
+
- Basic API integration and authentication
|
|
207
|
+
- Core service implementations
|
audiopod-1.1.1/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 AudioPod AI
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Include important files in source distribution
|
|
2
|
+
include README.md
|
|
3
|
+
include LICENSE
|
|
4
|
+
include CHANGELOG.md
|
|
5
|
+
include requirements.txt
|
|
6
|
+
include pyproject.toml
|
|
7
|
+
|
|
8
|
+
# Include package data
|
|
9
|
+
include audiopod/py.typed
|
|
10
|
+
|
|
11
|
+
# Include examples and tests
|
|
12
|
+
recursive-include examples *.py *.md
|
|
13
|
+
recursive-include tests *.py
|
|
14
|
+
|
|
15
|
+
# Exclude development and build artifacts
|
|
16
|
+
exclude BUILD_AND_PUBLISH.md
|
|
17
|
+
exclude INSTALLATION.md
|
|
18
|
+
recursive-exclude * __pycache__
|
|
19
|
+
recursive-exclude * *.py[co]
|
|
20
|
+
recursive-exclude * *.so
|
|
21
|
+
recursive-exclude * .DS_Store
|
|
22
|
+
prune dev-tools
|
|
23
|
+
prune dist
|
|
24
|
+
prune build
|
|
25
|
+
prune *.egg-info
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: audiopod
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.1.1
|
|
4
4
|
Summary: Professional Audio Processing API Client for Python
|
|
5
5
|
Home-page: https://github.com/audiopod-ai/audiopod-python
|
|
6
6
|
Author: AudioPod AI
|
|
@@ -25,6 +25,7 @@ Classifier: Topic :: Multimedia :: Sound/Audio :: Conversion
|
|
|
25
25
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
26
26
|
Requires-Python: >=3.8
|
|
27
27
|
Description-Content-Type: text/markdown
|
|
28
|
+
License-File: LICENSE
|
|
28
29
|
Requires-Dist: requests>=2.28.0
|
|
29
30
|
Requires-Dist: aiohttp>=3.8.0
|
|
30
31
|
Requires-Dist: pydantic>=1.10.0
|
|
@@ -46,6 +47,7 @@ Requires-Dist: sphinx-rtd-theme>=1.2.0; extra == "docs"
|
|
|
46
47
|
Requires-Dist: sphinx-autodoc-typehints>=1.19.0; extra == "docs"
|
|
47
48
|
Dynamic: author
|
|
48
49
|
Dynamic: home-page
|
|
50
|
+
Dynamic: license-file
|
|
49
51
|
Dynamic: requires-python
|
|
50
52
|
|
|
51
53
|
# AudioPod Python SDK
|
|
@@ -140,17 +142,25 @@ print(f"Transcript: {transcript.transcript}")
|
|
|
140
142
|
print(f"Detected {len(transcript.segments)} speakers")
|
|
141
143
|
```
|
|
142
144
|
|
|
143
|
-
####
|
|
145
|
+
#### Speech-to-Speech Translation
|
|
144
146
|
|
|
145
147
|
```python
|
|
146
|
-
# Translate
|
|
147
|
-
translation = client.translation.
|
|
148
|
+
# Translate speech while preserving voice characteristics
|
|
149
|
+
translation = client.translation.translate_speech(
|
|
148
150
|
audio_file="path/to/english_audio.wav",
|
|
149
151
|
target_language="es", # Spanish
|
|
152
|
+
source_language="en", # English (optional - auto-detect)
|
|
150
153
|
wait_for_completion=True
|
|
151
154
|
)
|
|
152
155
|
|
|
153
|
-
print(f"Translated audio URL: {translation.
|
|
156
|
+
print(f"Translated audio URL: {translation.translated_audio_url}")
|
|
157
|
+
|
|
158
|
+
# Or translate from URL
|
|
159
|
+
url_translation = client.translation.translate_speech(
|
|
160
|
+
url="https://example.com/audio.mp3",
|
|
161
|
+
target_language="fr", # French
|
|
162
|
+
wait_for_completion=True
|
|
163
|
+
)
|
|
154
164
|
```
|
|
155
165
|
|
|
156
166
|
### Async Support
|
|
@@ -380,11 +390,10 @@ audiopod transcription transcribe audio.mp3 --language en
|
|
|
380
390
|
|
|
381
391
|
## Support
|
|
382
392
|
|
|
383
|
-
- 📖 [
|
|
384
|
-
- 🎯 [API Reference](https://api.audiopod.ai/docs)
|
|
393
|
+
- 📖 [API Reference](https://docs.audiopod.ai)
|
|
385
394
|
- 💬 [Discord Community](https://discord.gg/audiopod)
|
|
386
395
|
- 📧 [Email Support](mailto:support@audiopod.ai)
|
|
387
|
-
- 🐛 [Bug Reports](https://github.com/
|
|
396
|
+
- 🐛 [Bug Reports](https://github.com/AudiopodAI/audiopod)
|
|
388
397
|
|
|
389
398
|
## License
|
|
390
399
|
|
|
@@ -90,17 +90,25 @@ print(f"Transcript: {transcript.transcript}")
|
|
|
90
90
|
print(f"Detected {len(transcript.segments)} speakers")
|
|
91
91
|
```
|
|
92
92
|
|
|
93
|
-
####
|
|
93
|
+
#### Speech-to-Speech Translation
|
|
94
94
|
|
|
95
95
|
```python
|
|
96
|
-
# Translate
|
|
97
|
-
translation = client.translation.
|
|
96
|
+
# Translate speech while preserving voice characteristics
|
|
97
|
+
translation = client.translation.translate_speech(
|
|
98
98
|
audio_file="path/to/english_audio.wav",
|
|
99
99
|
target_language="es", # Spanish
|
|
100
|
+
source_language="en", # English (optional - auto-detect)
|
|
100
101
|
wait_for_completion=True
|
|
101
102
|
)
|
|
102
103
|
|
|
103
|
-
print(f"Translated audio URL: {translation.
|
|
104
|
+
print(f"Translated audio URL: {translation.translated_audio_url}")
|
|
105
|
+
|
|
106
|
+
# Or translate from URL
|
|
107
|
+
url_translation = client.translation.translate_speech(
|
|
108
|
+
url="https://example.com/audio.mp3",
|
|
109
|
+
target_language="fr", # French
|
|
110
|
+
wait_for_completion=True
|
|
111
|
+
)
|
|
104
112
|
```
|
|
105
113
|
|
|
106
114
|
### Async Support
|
|
@@ -330,11 +338,10 @@ audiopod transcription transcribe audio.mp3 --language en
|
|
|
330
338
|
|
|
331
339
|
## Support
|
|
332
340
|
|
|
333
|
-
- 📖 [
|
|
334
|
-
- 🎯 [API Reference](https://api.audiopod.ai/docs)
|
|
341
|
+
- 📖 [API Reference](https://docs.audiopod.ai)
|
|
335
342
|
- 💬 [Discord Community](https://discord.gg/audiopod)
|
|
336
343
|
- 📧 [Email Support](mailto:support@audiopod.ai)
|
|
337
|
-
- 🐛 [Bug Reports](https://github.com/
|
|
344
|
+
- 🐛 [Bug Reports](https://github.com/AudiopodAI/audiopod)
|
|
338
345
|
|
|
339
346
|
## License
|
|
340
347
|
|
|
@@ -23,7 +23,8 @@ from .services import (
|
|
|
23
23
|
SpeakerService,
|
|
24
24
|
DenoiserService,
|
|
25
25
|
KaraokeService,
|
|
26
|
-
CreditService
|
|
26
|
+
CreditService,
|
|
27
|
+
StemExtractionService
|
|
27
28
|
)
|
|
28
29
|
|
|
29
30
|
logger = logging.getLogger(__name__)
|
|
@@ -139,6 +140,7 @@ class Client(BaseClient):
|
|
|
139
140
|
self.denoiser = DenoiserService(self)
|
|
140
141
|
self.karaoke = KaraokeService(self)
|
|
141
142
|
self.credits = CreditService(self)
|
|
143
|
+
self.stem_extraction = StemExtractionService(self)
|
|
142
144
|
|
|
143
145
|
def request(
|
|
144
146
|
self,
|
|
@@ -227,6 +229,7 @@ class AsyncClient(BaseClient):
|
|
|
227
229
|
self.denoiser = DenoiserService(self, async_mode=True)
|
|
228
230
|
self.karaoke = KaraokeService(self, async_mode=True)
|
|
229
231
|
self.credits = CreditService(self, async_mode=True)
|
|
232
|
+
self.stem_extraction = StemExtractionService(self, async_mode=True)
|
|
230
233
|
|
|
231
234
|
@property
|
|
232
235
|
def session(self) -> aiohttp.ClientSession:
|
|
@@ -151,13 +151,18 @@ class MusicGenerationResult:
|
|
|
151
151
|
|
|
152
152
|
@dataclass
|
|
153
153
|
class TranslationResult:
|
|
154
|
-
"""
|
|
154
|
+
"""Speech translation job result"""
|
|
155
155
|
job: Job
|
|
156
156
|
source_language: Optional[str] = None
|
|
157
157
|
target_language: Optional[str] = None
|
|
158
|
-
|
|
159
|
-
|
|
158
|
+
display_name: Optional[str] = None
|
|
159
|
+
audio_output_path: Optional[str] = None
|
|
160
|
+
video_output_path: Optional[str] = None
|
|
160
161
|
transcript_path: Optional[str] = None
|
|
162
|
+
translated_audio_url: Optional[str] = None
|
|
163
|
+
video_output_url: Optional[str] = None
|
|
164
|
+
transcript_urls: Optional[Dict[str, str]] = None
|
|
165
|
+
is_video: bool = False
|
|
161
166
|
|
|
162
167
|
@classmethod
|
|
163
168
|
def from_dict(cls, data: Dict[str, Any]) -> 'TranslationResult':
|
|
@@ -166,10 +171,20 @@ class TranslationResult:
|
|
|
166
171
|
job=Job.from_dict(data),
|
|
167
172
|
source_language=data.get('source_language'),
|
|
168
173
|
target_language=data.get('target_language'),
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
174
|
+
display_name=data.get('display_name'),
|
|
175
|
+
audio_output_path=data.get('audio_output_path'),
|
|
176
|
+
video_output_path=data.get('video_output_path'),
|
|
177
|
+
transcript_path=data.get('transcript_path'),
|
|
178
|
+
translated_audio_url=data.get('translated_audio_url'),
|
|
179
|
+
video_output_url=data.get('video_output_url'),
|
|
180
|
+
transcript_urls=data.get('transcript_urls'),
|
|
181
|
+
is_video=data.get('is_video', False)
|
|
172
182
|
)
|
|
183
|
+
|
|
184
|
+
@property
|
|
185
|
+
def audio_output_url(self) -> Optional[str]:
|
|
186
|
+
"""Backward compatibility property - returns translated_audio_url"""
|
|
187
|
+
return self.translated_audio_url
|
|
173
188
|
|
|
174
189
|
|
|
175
190
|
@dataclass
|
|
@@ -11,6 +11,7 @@ from .speaker import SpeakerService
|
|
|
11
11
|
from .denoiser import DenoiserService
|
|
12
12
|
from .karaoke import KaraokeService
|
|
13
13
|
from .credits import CreditService
|
|
14
|
+
from .stem_extraction import StemExtractionService
|
|
14
15
|
|
|
15
16
|
__all__ = [
|
|
16
17
|
"VoiceService",
|
|
@@ -20,5 +21,6 @@ __all__ = [
|
|
|
20
21
|
"SpeakerService",
|
|
21
22
|
"DenoiserService",
|
|
22
23
|
"KaraokeService",
|
|
23
|
-
"CreditService"
|
|
24
|
+
"CreditService",
|
|
25
|
+
"StemExtractionService"
|
|
24
26
|
]
|
|
@@ -51,15 +51,15 @@ class MusicService(BaseService):
|
|
|
51
51
|
if seed is not None and (seed < 0 or seed > 2**32 - 1):
|
|
52
52
|
raise ValidationError("Seed must be between 0 and 2^32 - 1")
|
|
53
53
|
|
|
54
|
-
# Prepare request data
|
|
54
|
+
# Prepare request data - FIXED: Use correct parameter names matching API schema
|
|
55
55
|
data = {
|
|
56
56
|
"prompt": prompt,
|
|
57
|
-
"
|
|
57
|
+
"audio_duration": duration, # FIXED: API expects "audio_duration" not "duration"
|
|
58
58
|
"guidance_scale": guidance_scale,
|
|
59
|
-
"
|
|
59
|
+
"infer_step": num_inference_steps # FIXED: API expects "infer_step" not "num_inference_steps"
|
|
60
60
|
}
|
|
61
61
|
if seed is not None:
|
|
62
|
-
data["
|
|
62
|
+
data["manual_seeds"] = [seed] # FIXED: API expects "manual_seeds" list not "seed"
|
|
63
63
|
if display_name:
|
|
64
64
|
data["display_name"] = display_name.strip()
|
|
65
65
|
|
|
@@ -68,7 +68,9 @@ class MusicService(BaseService):
|
|
|
68
68
|
return self._async_generate_music(data, wait_for_completion, timeout)
|
|
69
69
|
else:
|
|
70
70
|
response = self.client.request("POST", "/api/v1/music/text2music", data=data)
|
|
71
|
-
job
|
|
71
|
+
# FIXED: Handle response format correctly - API returns {"job": {...}, "message": "..."}
|
|
72
|
+
job_data = response.get("job", response)
|
|
73
|
+
job = Job.from_dict(job_data)
|
|
72
74
|
|
|
73
75
|
if wait_for_completion:
|
|
74
76
|
completed_job = self._wait_for_completion(job.id, timeout)
|
|
@@ -84,7 +86,9 @@ class MusicService(BaseService):
|
|
|
84
86
|
) -> Union[Job, MusicGenerationResult]:
|
|
85
87
|
"""Async version of generate_music"""
|
|
86
88
|
response = await self.client.request("POST", "/api/v1/music/text2music", data=data)
|
|
87
|
-
|
|
89
|
+
# FIXED: Handle response format correctly
|
|
90
|
+
job_data = response.get("job", response)
|
|
91
|
+
job = Job.from_dict(job_data)
|
|
88
92
|
|
|
89
93
|
if wait_for_completion:
|
|
90
94
|
completed_job = await self._async_wait_for_completion(job.id, timeout)
|
|
@@ -122,11 +126,14 @@ class MusicService(BaseService):
|
|
|
122
126
|
if style not in ["modern", "classic", "trap"]:
|
|
123
127
|
raise ValidationError("Style must be 'modern', 'classic', or 'trap'")
|
|
124
128
|
|
|
125
|
-
# Prepare request data
|
|
129
|
+
# Prepare request data - FIXED: Match API schema for text2rap
|
|
126
130
|
data = {
|
|
131
|
+
"prompt": f"rap music, {style} style", # FIXED: API expects "prompt" field
|
|
127
132
|
"lyrics": lyrics,
|
|
128
|
-
"
|
|
129
|
-
"
|
|
133
|
+
"audio_duration": 120.0, # Default duration
|
|
134
|
+
"guidance_scale": 7.5,
|
|
135
|
+
"infer_step": 50,
|
|
136
|
+
"lora_name_or_path": "ACE-Step/ACE-Step-v1-chinese-rap-LoRA" # Rap-specific LoRA
|
|
130
137
|
}
|
|
131
138
|
if display_name:
|
|
132
139
|
data["display_name"] = display_name.strip()
|
|
@@ -136,7 +143,9 @@ class MusicService(BaseService):
|
|
|
136
143
|
return self._async_generate_rap(data, wait_for_completion, timeout)
|
|
137
144
|
else:
|
|
138
145
|
response = self.client.request("POST", "/api/v1/music/text2rap", data=data)
|
|
139
|
-
|
|
146
|
+
# FIXED: Handle response format correctly
|
|
147
|
+
job_data = response.get("job", response)
|
|
148
|
+
job = Job.from_dict(job_data)
|
|
140
149
|
|
|
141
150
|
if wait_for_completion:
|
|
142
151
|
completed_job = self._wait_for_completion(job.id, timeout)
|
|
@@ -152,7 +161,9 @@ class MusicService(BaseService):
|
|
|
152
161
|
) -> Union[Job, MusicGenerationResult]:
|
|
153
162
|
"""Async version of generate_rap"""
|
|
154
163
|
response = await self.client.request("POST", "/api/v1/music/text2rap", data=data)
|
|
155
|
-
|
|
164
|
+
# FIXED: Handle response format correctly
|
|
165
|
+
job_data = response.get("job", response)
|
|
166
|
+
job = Job.from_dict(job_data)
|
|
156
167
|
|
|
157
168
|
if wait_for_completion:
|
|
158
169
|
completed_job = await self._async_wait_for_completion(job.id, timeout)
|
|
@@ -194,10 +205,12 @@ class MusicService(BaseService):
|
|
|
194
205
|
if tempo is not None and not 60 <= tempo <= 200:
|
|
195
206
|
raise ValidationError("Tempo must be between 60 and 200 BPM")
|
|
196
207
|
|
|
197
|
-
# Prepare request data
|
|
208
|
+
# Prepare request data - FIXED: Match API schema for prompt2instrumental
|
|
198
209
|
data = {
|
|
199
210
|
"prompt": prompt,
|
|
200
|
-
"
|
|
211
|
+
"audio_duration": duration, # FIXED: API expects "audio_duration"
|
|
212
|
+
"guidance_scale": 7.5,
|
|
213
|
+
"infer_step": 50
|
|
201
214
|
}
|
|
202
215
|
if instruments:
|
|
203
216
|
data["instruments"] = instruments
|
|
@@ -213,7 +226,9 @@ class MusicService(BaseService):
|
|
|
213
226
|
return self._async_generate_instrumental(data, wait_for_completion, timeout)
|
|
214
227
|
else:
|
|
215
228
|
response = self.client.request("POST", "/api/v1/music/prompt2instrumental", data=data)
|
|
216
|
-
|
|
229
|
+
# FIXED: Handle response format correctly
|
|
230
|
+
job_data = response.get("job", response)
|
|
231
|
+
job = Job.from_dict(job_data)
|
|
217
232
|
|
|
218
233
|
if wait_for_completion:
|
|
219
234
|
completed_job = self._wait_for_completion(job.id, timeout)
|
|
@@ -229,7 +244,80 @@ class MusicService(BaseService):
|
|
|
229
244
|
) -> Union[Job, MusicGenerationResult]:
|
|
230
245
|
"""Async version of generate_instrumental"""
|
|
231
246
|
response = await self.client.request("POST", "/api/v1/music/prompt2instrumental", data=data)
|
|
232
|
-
|
|
247
|
+
# FIXED: Handle response format correctly
|
|
248
|
+
job_data = response.get("job", response)
|
|
249
|
+
job = Job.from_dict(job_data)
|
|
250
|
+
|
|
251
|
+
if wait_for_completion:
|
|
252
|
+
completed_job = await self._async_wait_for_completion(job.id, timeout)
|
|
253
|
+
return MusicGenerationResult.from_dict(completed_job.result or completed_job.__dict__)
|
|
254
|
+
|
|
255
|
+
return job
|
|
256
|
+
|
|
257
|
+
def generate_vocals(
|
|
258
|
+
self,
|
|
259
|
+
lyrics: str,
|
|
260
|
+
prompt: str = "vocals",
|
|
261
|
+
duration: float = 120.0,
|
|
262
|
+
display_name: Optional[str] = None,
|
|
263
|
+
wait_for_completion: bool = False,
|
|
264
|
+
timeout: int = 600
|
|
265
|
+
) -> Union[Job, MusicGenerationResult]:
|
|
266
|
+
"""
|
|
267
|
+
Generate vocals from lyrics - NEW METHOD matching API lyric2vocals endpoint
|
|
268
|
+
|
|
269
|
+
Args:
|
|
270
|
+
lyrics: Song lyrics
|
|
271
|
+
prompt: Vocal style description
|
|
272
|
+
duration: Duration in seconds
|
|
273
|
+
display_name: Custom name for the track
|
|
274
|
+
wait_for_completion: Whether to wait for completion
|
|
275
|
+
timeout: Maximum time to wait
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
Job object or generation result
|
|
279
|
+
"""
|
|
280
|
+
# Validate inputs
|
|
281
|
+
lyrics = self._validate_text_input(lyrics, max_length=10000)
|
|
282
|
+
prompt = self._validate_text_input(prompt, max_length=2000)
|
|
283
|
+
if not 10.0 <= duration <= 600.0:
|
|
284
|
+
raise ValidationError("Duration must be between 10 and 600 seconds")
|
|
285
|
+
|
|
286
|
+
# Prepare request data - Match API schema for lyric2vocals
|
|
287
|
+
data = {
|
|
288
|
+
"prompt": prompt,
|
|
289
|
+
"lyrics": lyrics,
|
|
290
|
+
"audio_duration": duration,
|
|
291
|
+
"guidance_scale": 7.5,
|
|
292
|
+
"infer_step": 50
|
|
293
|
+
}
|
|
294
|
+
if display_name:
|
|
295
|
+
data["display_name"] = display_name.strip()
|
|
296
|
+
|
|
297
|
+
# Make request
|
|
298
|
+
if self.async_mode:
|
|
299
|
+
return self._async_generate_vocals(data, wait_for_completion, timeout)
|
|
300
|
+
else:
|
|
301
|
+
response = self.client.request("POST", "/api/v1/music/lyric2vocals", data=data)
|
|
302
|
+
job_data = response.get("job", response)
|
|
303
|
+
job = Job.from_dict(job_data)
|
|
304
|
+
|
|
305
|
+
if wait_for_completion:
|
|
306
|
+
completed_job = self._wait_for_completion(job.id, timeout)
|
|
307
|
+
return MusicGenerationResult.from_dict(completed_job.result or completed_job.__dict__)
|
|
308
|
+
|
|
309
|
+
return job
|
|
310
|
+
|
|
311
|
+
async def _async_generate_vocals(
|
|
312
|
+
self,
|
|
313
|
+
data: Dict[str, Any],
|
|
314
|
+
wait_for_completion: bool,
|
|
315
|
+
timeout: int
|
|
316
|
+
) -> Union[Job, MusicGenerationResult]:
|
|
317
|
+
"""Async version of generate_vocals"""
|
|
318
|
+
response = await self.client.request("POST", "/api/v1/music/lyric2vocals", data=data)
|
|
319
|
+
job_data = response.get("job", response)
|
|
320
|
+
job = Job.from_dict(job_data)
|
|
233
321
|
|
|
234
322
|
if wait_for_completion:
|
|
235
323
|
completed_job = await self._async_wait_for_completion(job.id, timeout)
|
|
@@ -322,7 +410,7 @@ class MusicService(BaseService):
|
|
|
322
410
|
"""
|
|
323
411
|
params = {
|
|
324
412
|
"limit": limit,
|
|
325
|
-
"skip": offset
|
|
413
|
+
"skip": offset # FIXED: API uses "skip" parameter for offset
|
|
326
414
|
}
|
|
327
415
|
if status:
|
|
328
416
|
params["status"] = status
|