audiopod 1.0.0__tar.gz → 1.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. audiopod-1.1.1/CHANGELOG.md +207 -0
  2. audiopod-1.1.1/LICENSE +21 -0
  3. audiopod-1.1.1/MANIFEST.in +25 -0
  4. {audiopod-1.0.0 → audiopod-1.1.1}/PKG-INFO +17 -8
  5. {audiopod-1.0.0 → audiopod-1.1.1}/README.md +14 -7
  6. {audiopod-1.0.0 → audiopod-1.1.1}/audiopod/__init__.py +1 -1
  7. {audiopod-1.0.0 → audiopod-1.1.1}/audiopod/client.py +4 -1
  8. {audiopod-1.0.0 → audiopod-1.1.1}/audiopod/models.py +21 -6
  9. audiopod-1.1.1/audiopod/py.typed +2 -0
  10. {audiopod-1.0.0 → audiopod-1.1.1}/audiopod/services/__init__.py +3 -1
  11. {audiopod-1.0.0 → audiopod-1.1.1}/audiopod/services/music.py +104 -16
  12. audiopod-1.1.1/audiopod/services/stem_extraction.py +180 -0
  13. audiopod-1.1.1/audiopod/services/translation.py +196 -0
  14. {audiopod-1.0.0 → audiopod-1.1.1}/audiopod.egg-info/SOURCES.txt +11 -8
  15. audiopod-1.1.1/examples/README.md +81 -0
  16. audiopod-1.1.1/examples/basic_usage.py +471 -0
  17. {audiopod-1.0.0 → audiopod-1.1.1}/pyproject.toml +1 -1
  18. audiopod-1.1.1/requirements.txt +7 -0
  19. {audiopod-1.0.0 → audiopod-1.1.1}/setup.py +1 -1
  20. audiopod-1.1.1/tests/test_end_to_end_integration.py +617 -0
  21. audiopod-1.1.1/tests/test_sdk_api_compatibility.py +892 -0
  22. audiopod-1.0.0/audiopod/services/translation.py +0 -81
  23. audiopod-1.0.0/audiopod.egg-info/PKG-INFO +0 -395
  24. audiopod-1.0.0/audiopod.egg-info/dependency_links.txt +0 -1
  25. audiopod-1.0.0/audiopod.egg-info/entry_points.txt +0 -2
  26. audiopod-1.0.0/audiopod.egg-info/not-zip-safe +0 -1
  27. audiopod-1.0.0/audiopod.egg-info/requires.txt +0 -21
  28. audiopod-1.0.0/audiopod.egg-info/top_level.txt +0 -1
  29. {audiopod-1.0.0 → audiopod-1.1.1}/audiopod/cli.py +0 -0
  30. {audiopod-1.0.0 → audiopod-1.1.1}/audiopod/config.py +0 -0
  31. {audiopod-1.0.0 → audiopod-1.1.1}/audiopod/exceptions.py +0 -0
  32. {audiopod-1.0.0 → audiopod-1.1.1}/audiopod/services/base.py +0 -0
  33. {audiopod-1.0.0 → audiopod-1.1.1}/audiopod/services/credits.py +0 -0
  34. {audiopod-1.0.0 → audiopod-1.1.1}/audiopod/services/denoiser.py +0 -0
  35. {audiopod-1.0.0 → audiopod-1.1.1}/audiopod/services/karaoke.py +0 -0
  36. {audiopod-1.0.0 → audiopod-1.1.1}/audiopod/services/speaker.py +0 -0
  37. {audiopod-1.0.0 → audiopod-1.1.1}/audiopod/services/transcription.py +0 -0
  38. {audiopod-1.0.0 → audiopod-1.1.1}/audiopod/services/voice.py +0 -0
  39. {audiopod-1.0.0 → audiopod-1.1.1}/setup.cfg +0 -0
@@ -0,0 +1,207 @@
1
+ # Changelog
2
+
3
+ All notable changes to the AudioPod Python SDK will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [1.1.1] - 2024-12-15
9
+
10
+ ### 🔧 Translation Service Fixes
11
+
12
+ This release fixes the translation service to use the proper speech-to-speech translation endpoint and adds enhanced functionality.
13
+
14
+ ### ✨ Added
15
+
16
+ - **Speech-to-Speech Translation**: Now uses the correct `/api/v1/translation/translate/speech` endpoint
17
+ - Preserves original speaker voice characteristics during translation
18
+ - Supports both audio and video file translation
19
+ - Maintains speaker separation in multi-speaker content
20
+
21
+ - **URL-Based Translation**: Support for translating audio/video from URLs
22
+ - Direct media URL support (YouTube, audio links, etc.)
23
+ - No need to download files locally first
24
+
25
+ - **Enhanced Translation Job Management**:
26
+ - `list_translation_jobs()` - List translation history with pagination
27
+ - `retry_translation()` - Retry failed translation jobs
28
+ - `delete_translation_job()` - Delete translation jobs
29
+ - `translate_speech()` - Alias method for clearer API
30
+
31
+ ### 🔧 Fixed
32
+
33
+ - **Translation Endpoint**: Changed from generic `/translate` to speech-specific `/translate/speech`
34
+ - **API Schema Alignment**: Request and response formats now match the actual API
35
+ - **Response Model**: Updated `TranslationResult` to include all API response fields:
36
+ - `translated_audio_url` - Direct URL to translated audio
37
+ - `video_output_url` - Translated video output (when applicable)
38
+ - `transcript_urls` - Transcript files in multiple formats
39
+ - `display_name` - Original file display name
40
+ - `is_video` - Whether the input was a video file
41
+
42
+ ### 🏗️ Improved
43
+
44
+ - **Better Error Handling**: Enhanced validation for file vs URL inputs
45
+ - **Backward Compatibility**: Maintained `audio_output_url` property for existing code
46
+ - **Enhanced Examples**: Updated documentation and examples to show new features
47
+ - **Type Safety**: Improved type hints and validation
48
+
49
+ ### 📚 Documentation
50
+
51
+ - **Updated Examples**: `basic_usage.py` now demonstrates speech-to-speech translation
52
+ - **README Updates**: Corrected API usage examples with proper endpoint usage
53
+ - **Method Documentation**: Enhanced docstrings with accurate parameter descriptions
54
+
55
+ ### 🚀 Usage Examples
56
+
57
+ #### Fixed Speech Translation
58
+ ```python
59
+ # Speech-to-speech translation (preserves voice characteristics)
60
+ translation = client.translation.translate_speech(
61
+ audio_file="english_speech.wav",
62
+ target_language="es", # Spanish
63
+ source_language="en", # Optional - auto-detect
64
+ wait_for_completion=True
65
+ )
66
+
67
+ # URL-based translation
68
+ url_translation = client.translation.translate_speech(
69
+ url="https://example.com/audio.mp3",
70
+ target_language="fr", # French
71
+ wait_for_completion=True
72
+ )
73
+
74
+ # Job management
75
+ jobs = client.translation.list_translation_jobs(limit=10)
76
+ retry_job = client.translation.retry_translation(failed_job_id)
77
+ ```
78
+
79
+ ### 🔄 Migration Notes
80
+
81
+ - **No Breaking Changes**: Existing `translate_audio()` method continues to work
82
+ - **Enhanced Functionality**: Now uses proper speech-to-speech endpoint automatically
83
+ - **New Properties**: Additional response fields available in `TranslationResult`
84
+
85
+ ---
86
+
87
+ ## [1.1.0] - 2024-01-15
88
+
89
+ ### 🎉 Major API Compatibility Update
90
+
91
+ This release brings full compatibility with the AudioPod v1 API specifications and includes significant improvements and new features.
92
+
93
+ ### ✨ Added
94
+
95
+ - **New Stem Extraction Service**: Complete implementation of audio stem separation
96
+ - `StemExtractionService` with support for vocals, drums, bass, and instrument separation
97
+ - Support for both `htdemucs` and `htdemucs_6s` models
98
+ - Methods: `extract_stems()`, `get_stem_job()`, `list_stem_jobs()`, `delete_stem_job()`
99
+
100
+ - **Enhanced Music Generation**: New vocals generation capability
101
+ - `generate_vocals()` method for lyric-to-vocals generation
102
+ - Supports the `/api/v1/music/lyric2vocals` endpoint
103
+
104
+ - **Comprehensive Test Suite**: Production-ready testing framework
105
+ - End-to-end integration tests (`test_end_to_end_integration.py`)
106
+ - API compatibility validation tests (`test_sdk_api_compatibility.py`)
107
+ - Complete SDK structure validation (`validate_sdk_structure.py`)
108
+ - Comprehensive test runner (`test_sdk_comprehensive.py`)
109
+
110
+ ### 🔧 Fixed
111
+
112
+ - **Music Service API Schema Alignment**: Critical fixes for API compatibility
113
+ - Fixed parameter names: `duration` → `audio_duration`
114
+ - Fixed parameter names: `num_inference_steps` → `infer_step`
115
+ - Fixed parameter names: `seed` → `manual_seeds` (now accepts list)
116
+ - Fixed response handling to properly extract `job` object from API responses
117
+
118
+ - **Enhanced Music Generation Methods**: Improved existing capabilities
119
+ - `generate_music()`: Now uses correct API schema parameters
120
+ - `generate_rap()`: Enhanced with proper prompt construction and LoRA support
121
+ - `generate_instrumental()`: Improved parameter mapping
122
+ - `list_music_jobs()`: Fixed pagination parameter (`offset` → `skip`)
123
+
124
+ - **Response Format Handling**: Proper API response parsing
125
+ - All music generation endpoints now correctly handle `{"job": {...}, "message": "..."}` response format
126
+ - Improved error handling and status checking
127
+
128
+ ### 🏗️ Improved
129
+
130
+ - **Service Integration**: Better organization and accessibility
131
+ - All services properly integrated in both sync and async clients
132
+ - Enhanced error handling across all services
133
+ - Improved parameter validation
134
+
135
+ - **Code Quality**: Enhanced maintainability and reliability
136
+ - Better type hints and documentation
137
+ - Improved error messages
138
+ - Enhanced validation for all input parameters
139
+
140
+ ### 📚 Documentation
141
+
142
+ - **Comprehensive Fix Documentation**: Detailed improvement summary
143
+ - Complete documentation of all changes in `SDK_FIXES_SUMMARY.md`
144
+ - Usage examples for all new features
145
+ - Migration guide (no breaking changes)
146
+
147
+ - **Testing Documentation**: Complete testing framework
148
+ - Instructions for running validation tests
149
+ - API compatibility verification procedures
150
+ - External developer onboarding documentation
151
+
152
+ ### 🔒 Validation
153
+
154
+ - **100% Structure Validation Success**: All improvements verified
155
+ - 9/9 validation checks passed
156
+ - Complete API endpoint compatibility confirmed
157
+ - All services properly integrated and functional
158
+
159
+ ### 🚀 Usage Examples
160
+
161
+ #### New Stem Extraction
162
+ ```python
163
+ # Extract audio stems
164
+ job = client.stem_extraction.extract_stems(
165
+ audio_file="song.wav",
166
+ stem_types=["vocals", "drums", "bass", "other"],
167
+ model_name="htdemucs",
168
+ wait_for_completion=True
169
+ )
170
+ ```
171
+
172
+ #### Enhanced Music Generation
173
+ ```python
174
+ # Generate vocals from lyrics
175
+ vocals_job = client.music.generate_vocals(
176
+ lyrics="Your song lyrics here",
177
+ prompt="pop vocals, female voice",
178
+ duration=120.0
179
+ )
180
+
181
+ # Improved music generation with correct parameters
182
+ music_job = client.music.generate_music(
183
+ prompt="upbeat electronic dance music",
184
+ duration=120.0, # Now correctly maps to audio_duration
185
+ guidance_scale=7.5,
186
+ num_inference_steps=50, # Now correctly maps to infer_step
187
+ seed=12345 # Now correctly maps to manual_seeds=[12345]
188
+ )
189
+ ```
190
+
191
+ ### 🔄 Migration Notes
192
+
193
+ - **No Breaking Changes**: All existing code continues to work
194
+ - **Improved Reliability**: Better error handling and API compatibility
195
+ - **Enhanced Features**: New capabilities available immediately
196
+
197
+ ---
198
+
199
+ ## [1.0.0] - 2024-01-01
200
+
201
+ ### 🎉 Initial Release
202
+
203
+ - Initial implementation of AudioPod Python SDK
204
+ - Support for voice cloning, music generation, transcription, and translation
205
+ - Async and sync client implementations
206
+ - Basic API integration and authentication
207
+ - Core service implementations
audiopod-1.1.1/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 AudioPod AI
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,25 @@
1
+ # Include important files in source distribution
2
+ include README.md
3
+ include LICENSE
4
+ include CHANGELOG.md
5
+ include requirements.txt
6
+ include pyproject.toml
7
+
8
+ # Include package data
9
+ include audiopod/py.typed
10
+
11
+ # Include examples and tests
12
+ recursive-include examples *.py *.md
13
+ recursive-include tests *.py
14
+
15
+ # Exclude development and build artifacts
16
+ exclude BUILD_AND_PUBLISH.md
17
+ exclude INSTALLATION.md
18
+ recursive-exclude * __pycache__
19
+ recursive-exclude * *.py[co]
20
+ recursive-exclude * *.so
21
+ recursive-exclude * .DS_Store
22
+ prune dev-tools
23
+ prune dist
24
+ prune build
25
+ prune *.egg-info
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: audiopod
3
- Version: 1.0.0
3
+ Version: 1.1.1
4
4
  Summary: Professional Audio Processing API Client for Python
5
5
  Home-page: https://github.com/audiopod-ai/audiopod-python
6
6
  Author: AudioPod AI
@@ -25,6 +25,7 @@ Classifier: Topic :: Multimedia :: Sound/Audio :: Conversion
25
25
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
26
26
  Requires-Python: >=3.8
27
27
  Description-Content-Type: text/markdown
28
+ License-File: LICENSE
28
29
  Requires-Dist: requests>=2.28.0
29
30
  Requires-Dist: aiohttp>=3.8.0
30
31
  Requires-Dist: pydantic>=1.10.0
@@ -46,6 +47,7 @@ Requires-Dist: sphinx-rtd-theme>=1.2.0; extra == "docs"
46
47
  Requires-Dist: sphinx-autodoc-typehints>=1.19.0; extra == "docs"
47
48
  Dynamic: author
48
49
  Dynamic: home-page
50
+ Dynamic: license-file
49
51
  Dynamic: requires-python
50
52
 
51
53
  # AudioPod Python SDK
@@ -140,17 +142,25 @@ print(f"Transcript: {transcript.transcript}")
140
142
  print(f"Detected {len(transcript.segments)} speakers")
141
143
  ```
142
144
 
143
- #### Audio Translation
145
+ #### Speech-to-Speech Translation
144
146
 
145
147
  ```python
146
- # Translate audio to another language
147
- translation = client.translation.translate_audio(
148
+ # Translate speech while preserving voice characteristics
149
+ translation = client.translation.translate_speech(
148
150
  audio_file="path/to/english_audio.wav",
149
151
  target_language="es", # Spanish
152
+ source_language="en", # English (optional - auto-detect)
150
153
  wait_for_completion=True
151
154
  )
152
155
 
153
- print(f"Translated audio URL: {translation.audio_output_url}")
156
+ print(f"Translated audio URL: {translation.translated_audio_url}")
157
+
158
+ # Or translate from URL
159
+ url_translation = client.translation.translate_speech(
160
+ url="https://example.com/audio.mp3",
161
+ target_language="fr", # French
162
+ wait_for_completion=True
163
+ )
154
164
  ```
155
165
 
156
166
  ### Async Support
@@ -380,11 +390,10 @@ audiopod transcription transcribe audio.mp3 --language en
380
390
 
381
391
  ## Support
382
392
 
383
- - 📖 [Documentation](https://docs.audiopod.ai)
384
- - 🎯 [API Reference](https://api.audiopod.ai/docs)
393
+ - 📖 [API Reference](https://docs.audiopod.ai)
385
394
  - 💬 [Discord Community](https://discord.gg/audiopod)
386
395
  - 📧 [Email Support](mailto:support@audiopod.ai)
387
- - 🐛 [Bug Reports](https://github.com/audiopod-ai/audiopod-python/issues)
396
+ - 🐛 [Bug Reports](https://github.com/AudiopodAI/audiopod)
388
397
 
389
398
  ## License
390
399
 
@@ -90,17 +90,25 @@ print(f"Transcript: {transcript.transcript}")
90
90
  print(f"Detected {len(transcript.segments)} speakers")
91
91
  ```
92
92
 
93
- #### Audio Translation
93
+ #### Speech-to-Speech Translation
94
94
 
95
95
  ```python
96
- # Translate audio to another language
97
- translation = client.translation.translate_audio(
96
+ # Translate speech while preserving voice characteristics
97
+ translation = client.translation.translate_speech(
98
98
  audio_file="path/to/english_audio.wav",
99
99
  target_language="es", # Spanish
100
+ source_language="en", # English (optional - auto-detect)
100
101
  wait_for_completion=True
101
102
  )
102
103
 
103
- print(f"Translated audio URL: {translation.audio_output_url}")
104
+ print(f"Translated audio URL: {translation.translated_audio_url}")
105
+
106
+ # Or translate from URL
107
+ url_translation = client.translation.translate_speech(
108
+ url="https://example.com/audio.mp3",
109
+ target_language="fr", # French
110
+ wait_for_completion=True
111
+ )
104
112
  ```
105
113
 
106
114
  ### Async Support
@@ -330,11 +338,10 @@ audiopod transcription transcribe audio.mp3 --language en
330
338
 
331
339
  ## Support
332
340
 
333
- - 📖 [Documentation](https://docs.audiopod.ai)
334
- - 🎯 [API Reference](https://api.audiopod.ai/docs)
341
+ - 📖 [API Reference](https://docs.audiopod.ai)
335
342
  - 💬 [Discord Community](https://discord.gg/audiopod)
336
343
  - 📧 [Email Support](mailto:support@audiopod.ai)
337
- - 🐛 [Bug Reports](https://github.com/audiopod-ai/audiopod-python/issues)
344
+ - 🐛 [Bug Reports](https://github.com/AudiopodAI/audiopod)
338
345
 
339
346
  ## License
340
347
 
@@ -47,7 +47,7 @@ from .models import (
47
47
  TranslationResult
48
48
  )
49
49
 
50
- __version__ = "1.0.0"
50
+ __version__ = "1.1.1"
51
51
  __author__ = "AudioPod AI"
52
52
  __email__ = "support@audiopod.ai"
53
53
  __license__ = "MIT"
@@ -23,7 +23,8 @@ from .services import (
23
23
  SpeakerService,
24
24
  DenoiserService,
25
25
  KaraokeService,
26
- CreditService
26
+ CreditService,
27
+ StemExtractionService
27
28
  )
28
29
 
29
30
  logger = logging.getLogger(__name__)
@@ -139,6 +140,7 @@ class Client(BaseClient):
139
140
  self.denoiser = DenoiserService(self)
140
141
  self.karaoke = KaraokeService(self)
141
142
  self.credits = CreditService(self)
143
+ self.stem_extraction = StemExtractionService(self)
142
144
 
143
145
  def request(
144
146
  self,
@@ -227,6 +229,7 @@ class AsyncClient(BaseClient):
227
229
  self.denoiser = DenoiserService(self, async_mode=True)
228
230
  self.karaoke = KaraokeService(self, async_mode=True)
229
231
  self.credits = CreditService(self, async_mode=True)
232
+ self.stem_extraction = StemExtractionService(self, async_mode=True)
230
233
 
231
234
  @property
232
235
  def session(self) -> aiohttp.ClientSession:
@@ -151,13 +151,18 @@ class MusicGenerationResult:
151
151
 
152
152
  @dataclass
153
153
  class TranslationResult:
154
- """Translation job result"""
154
+ """Speech translation job result"""
155
155
  job: Job
156
156
  source_language: Optional[str] = None
157
157
  target_language: Optional[str] = None
158
- audio_output_url: Optional[str] = None
159
- video_output_url: Optional[str] = None
158
+ display_name: Optional[str] = None
159
+ audio_output_path: Optional[str] = None
160
+ video_output_path: Optional[str] = None
160
161
  transcript_path: Optional[str] = None
162
+ translated_audio_url: Optional[str] = None
163
+ video_output_url: Optional[str] = None
164
+ transcript_urls: Optional[Dict[str, str]] = None
165
+ is_video: bool = False
161
166
 
162
167
  @classmethod
163
168
  def from_dict(cls, data: Dict[str, Any]) -> 'TranslationResult':
@@ -166,10 +171,20 @@ class TranslationResult:
166
171
  job=Job.from_dict(data),
167
172
  source_language=data.get('source_language'),
168
173
  target_language=data.get('target_language'),
169
- audio_output_url=data.get('audio_output_path'),
170
- video_output_url=data.get('video_output_path'),
171
- transcript_path=data.get('transcript_path')
174
+ display_name=data.get('display_name'),
175
+ audio_output_path=data.get('audio_output_path'),
176
+ video_output_path=data.get('video_output_path'),
177
+ transcript_path=data.get('transcript_path'),
178
+ translated_audio_url=data.get('translated_audio_url'),
179
+ video_output_url=data.get('video_output_url'),
180
+ transcript_urls=data.get('transcript_urls'),
181
+ is_video=data.get('is_video', False)
172
182
  )
183
+
184
+ @property
185
+ def audio_output_url(self) -> Optional[str]:
186
+ """Backward compatibility property - returns translated_audio_url"""
187
+ return self.translated_audio_url
173
188
 
174
189
 
175
190
  @dataclass
@@ -0,0 +1,2 @@
1
+ # Marker file for PEP 561
2
+ # This package supports type checking
@@ -11,6 +11,7 @@ from .speaker import SpeakerService
11
11
  from .denoiser import DenoiserService
12
12
  from .karaoke import KaraokeService
13
13
  from .credits import CreditService
14
+ from .stem_extraction import StemExtractionService
14
15
 
15
16
  __all__ = [
16
17
  "VoiceService",
@@ -20,5 +21,6 @@ __all__ = [
20
21
  "SpeakerService",
21
22
  "DenoiserService",
22
23
  "KaraokeService",
23
- "CreditService"
24
+ "CreditService",
25
+ "StemExtractionService"
24
26
  ]
@@ -51,15 +51,15 @@ class MusicService(BaseService):
51
51
  if seed is not None and (seed < 0 or seed > 2**32 - 1):
52
52
  raise ValidationError("Seed must be between 0 and 2^32 - 1")
53
53
 
54
- # Prepare request data
54
+ # Prepare request data - FIXED: Use correct parameter names matching API schema
55
55
  data = {
56
56
  "prompt": prompt,
57
- "duration": duration,
57
+ "audio_duration": duration, # FIXED: API expects "audio_duration" not "duration"
58
58
  "guidance_scale": guidance_scale,
59
- "num_inference_steps": num_inference_steps
59
+ "infer_step": num_inference_steps # FIXED: API expects "infer_step" not "num_inference_steps"
60
60
  }
61
61
  if seed is not None:
62
- data["seed"] = seed
62
+ data["manual_seeds"] = [seed] # FIXED: API expects "manual_seeds" list not "seed"
63
63
  if display_name:
64
64
  data["display_name"] = display_name.strip()
65
65
 
@@ -68,7 +68,9 @@ class MusicService(BaseService):
68
68
  return self._async_generate_music(data, wait_for_completion, timeout)
69
69
  else:
70
70
  response = self.client.request("POST", "/api/v1/music/text2music", data=data)
71
- job = Job.from_dict(response)
71
+ # FIXED: Handle response format correctly - API returns {"job": {...}, "message": "..."}
72
+ job_data = response.get("job", response)
73
+ job = Job.from_dict(job_data)
72
74
 
73
75
  if wait_for_completion:
74
76
  completed_job = self._wait_for_completion(job.id, timeout)
@@ -84,7 +86,9 @@ class MusicService(BaseService):
84
86
  ) -> Union[Job, MusicGenerationResult]:
85
87
  """Async version of generate_music"""
86
88
  response = await self.client.request("POST", "/api/v1/music/text2music", data=data)
87
- job = Job.from_dict(response)
89
+ # FIXED: Handle response format correctly
90
+ job_data = response.get("job", response)
91
+ job = Job.from_dict(job_data)
88
92
 
89
93
  if wait_for_completion:
90
94
  completed_job = await self._async_wait_for_completion(job.id, timeout)
@@ -122,11 +126,14 @@ class MusicService(BaseService):
122
126
  if style not in ["modern", "classic", "trap"]:
123
127
  raise ValidationError("Style must be 'modern', 'classic', or 'trap'")
124
128
 
125
- # Prepare request data
129
+ # Prepare request data - FIXED: Match API schema for text2rap
126
130
  data = {
131
+ "prompt": f"rap music, {style} style", # FIXED: API expects "prompt" field
127
132
  "lyrics": lyrics,
128
- "style": style,
129
- "tempo": tempo
133
+ "audio_duration": 120.0, # Default duration
134
+ "guidance_scale": 7.5,
135
+ "infer_step": 50,
136
+ "lora_name_or_path": "ACE-Step/ACE-Step-v1-chinese-rap-LoRA" # Rap-specific LoRA
130
137
  }
131
138
  if display_name:
132
139
  data["display_name"] = display_name.strip()
@@ -136,7 +143,9 @@ class MusicService(BaseService):
136
143
  return self._async_generate_rap(data, wait_for_completion, timeout)
137
144
  else:
138
145
  response = self.client.request("POST", "/api/v1/music/text2rap", data=data)
139
- job = Job.from_dict(response)
146
+ # FIXED: Handle response format correctly
147
+ job_data = response.get("job", response)
148
+ job = Job.from_dict(job_data)
140
149
 
141
150
  if wait_for_completion:
142
151
  completed_job = self._wait_for_completion(job.id, timeout)
@@ -152,7 +161,9 @@ class MusicService(BaseService):
152
161
  ) -> Union[Job, MusicGenerationResult]:
153
162
  """Async version of generate_rap"""
154
163
  response = await self.client.request("POST", "/api/v1/music/text2rap", data=data)
155
- job = Job.from_dict(response)
164
+ # FIXED: Handle response format correctly
165
+ job_data = response.get("job", response)
166
+ job = Job.from_dict(job_data)
156
167
 
157
168
  if wait_for_completion:
158
169
  completed_job = await self._async_wait_for_completion(job.id, timeout)
@@ -194,10 +205,12 @@ class MusicService(BaseService):
194
205
  if tempo is not None and not 60 <= tempo <= 200:
195
206
  raise ValidationError("Tempo must be between 60 and 200 BPM")
196
207
 
197
- # Prepare request data
208
+ # Prepare request data - FIXED: Match API schema for prompt2instrumental
198
209
  data = {
199
210
  "prompt": prompt,
200
- "duration": duration
211
+ "audio_duration": duration, # FIXED: API expects "audio_duration"
212
+ "guidance_scale": 7.5,
213
+ "infer_step": 50
201
214
  }
202
215
  if instruments:
203
216
  data["instruments"] = instruments
@@ -213,7 +226,9 @@ class MusicService(BaseService):
213
226
  return self._async_generate_instrumental(data, wait_for_completion, timeout)
214
227
  else:
215
228
  response = self.client.request("POST", "/api/v1/music/prompt2instrumental", data=data)
216
- job = Job.from_dict(response)
229
+ # FIXED: Handle response format correctly
230
+ job_data = response.get("job", response)
231
+ job = Job.from_dict(job_data)
217
232
 
218
233
  if wait_for_completion:
219
234
  completed_job = self._wait_for_completion(job.id, timeout)
@@ -229,7 +244,80 @@ class MusicService(BaseService):
229
244
  ) -> Union[Job, MusicGenerationResult]:
230
245
  """Async version of generate_instrumental"""
231
246
  response = await self.client.request("POST", "/api/v1/music/prompt2instrumental", data=data)
232
- job = Job.from_dict(response)
247
+ # FIXED: Handle response format correctly
248
+ job_data = response.get("job", response)
249
+ job = Job.from_dict(job_data)
250
+
251
+ if wait_for_completion:
252
+ completed_job = await self._async_wait_for_completion(job.id, timeout)
253
+ return MusicGenerationResult.from_dict(completed_job.result or completed_job.__dict__)
254
+
255
+ return job
256
+
257
+ def generate_vocals(
258
+ self,
259
+ lyrics: str,
260
+ prompt: str = "vocals",
261
+ duration: float = 120.0,
262
+ display_name: Optional[str] = None,
263
+ wait_for_completion: bool = False,
264
+ timeout: int = 600
265
+ ) -> Union[Job, MusicGenerationResult]:
266
+ """
267
+ Generate vocals from lyrics - NEW METHOD matching API lyric2vocals endpoint
268
+
269
+ Args:
270
+ lyrics: Song lyrics
271
+ prompt: Vocal style description
272
+ duration: Duration in seconds
273
+ display_name: Custom name for the track
274
+ wait_for_completion: Whether to wait for completion
275
+ timeout: Maximum time to wait
276
+
277
+ Returns:
278
+ Job object or generation result
279
+ """
280
+ # Validate inputs
281
+ lyrics = self._validate_text_input(lyrics, max_length=10000)
282
+ prompt = self._validate_text_input(prompt, max_length=2000)
283
+ if not 10.0 <= duration <= 600.0:
284
+ raise ValidationError("Duration must be between 10 and 600 seconds")
285
+
286
+ # Prepare request data - Match API schema for lyric2vocals
287
+ data = {
288
+ "prompt": prompt,
289
+ "lyrics": lyrics,
290
+ "audio_duration": duration,
291
+ "guidance_scale": 7.5,
292
+ "infer_step": 50
293
+ }
294
+ if display_name:
295
+ data["display_name"] = display_name.strip()
296
+
297
+ # Make request
298
+ if self.async_mode:
299
+ return self._async_generate_vocals(data, wait_for_completion, timeout)
300
+ else:
301
+ response = self.client.request("POST", "/api/v1/music/lyric2vocals", data=data)
302
+ job_data = response.get("job", response)
303
+ job = Job.from_dict(job_data)
304
+
305
+ if wait_for_completion:
306
+ completed_job = self._wait_for_completion(job.id, timeout)
307
+ return MusicGenerationResult.from_dict(completed_job.result or completed_job.__dict__)
308
+
309
+ return job
310
+
311
+ async def _async_generate_vocals(
312
+ self,
313
+ data: Dict[str, Any],
314
+ wait_for_completion: bool,
315
+ timeout: int
316
+ ) -> Union[Job, MusicGenerationResult]:
317
+ """Async version of generate_vocals"""
318
+ response = await self.client.request("POST", "/api/v1/music/lyric2vocals", data=data)
319
+ job_data = response.get("job", response)
320
+ job = Job.from_dict(job_data)
233
321
 
234
322
  if wait_for_completion:
235
323
  completed_job = await self._async_wait_for_completion(job.id, timeout)
@@ -322,7 +410,7 @@ class MusicService(BaseService):
322
410
  """
323
411
  params = {
324
412
  "limit": limit,
325
- "skip": offset
413
+ "skip": offset # FIXED: API uses "skip" parameter for offset
326
414
  }
327
415
  if status:
328
416
  params["status"] = status