audiopod 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- audiopod/__init__.py +83 -0
- audiopod/cli.py +285 -0
- audiopod/client.py +332 -0
- audiopod/config.py +63 -0
- audiopod/exceptions.py +96 -0
- audiopod/models.py +235 -0
- audiopod/services/__init__.py +24 -0
- audiopod/services/base.py +213 -0
- audiopod/services/credits.py +46 -0
- audiopod/services/denoiser.py +51 -0
- audiopod/services/karaoke.py +61 -0
- audiopod/services/music.py +434 -0
- audiopod/services/speaker.py +53 -0
- audiopod/services/transcription.py +212 -0
- audiopod/services/translation.py +81 -0
- audiopod/services/voice.py +376 -0
- audiopod-1.0.0.dist-info/METADATA +395 -0
- audiopod-1.0.0.dist-info/RECORD +21 -0
- audiopod-1.0.0.dist-info/WHEEL +5 -0
- audiopod-1.0.0.dist-info/entry_points.txt +2 -0
- audiopod-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Transcription Service - Speech-to-text operations
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import List, Optional, Dict, Any, Union
|
|
6
|
+
|
|
7
|
+
from .base import BaseService
|
|
8
|
+
from ..models import Job, TranscriptionResult
|
|
9
|
+
from ..exceptions import ValidationError
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class TranscriptionService(BaseService):
|
|
13
|
+
"""Service for audio transcription operations"""
|
|
14
|
+
|
|
15
|
+
def transcribe_audio(
|
|
16
|
+
self,
|
|
17
|
+
audio_file: str,
|
|
18
|
+
language: Optional[str] = None,
|
|
19
|
+
model_type: str = "whisperx",
|
|
20
|
+
enable_speaker_diarization: bool = False,
|
|
21
|
+
enable_word_timestamps: bool = True,
|
|
22
|
+
wait_for_completion: bool = False,
|
|
23
|
+
timeout: int = 600
|
|
24
|
+
) -> Union[Job, TranscriptionResult]:
|
|
25
|
+
"""
|
|
26
|
+
Transcribe audio to text
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
audio_file: Path to audio file
|
|
30
|
+
language: Language code (auto-detect if None)
|
|
31
|
+
model_type: Model to use ('whisperx', 'faster-whisper')
|
|
32
|
+
enable_speaker_diarization: Enable speaker identification
|
|
33
|
+
enable_word_timestamps: Include word-level timestamps
|
|
34
|
+
wait_for_completion: Whether to wait for completion
|
|
35
|
+
timeout: Maximum time to wait
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
Job object or transcription result
|
|
39
|
+
"""
|
|
40
|
+
# Validate inputs
|
|
41
|
+
if language:
|
|
42
|
+
language = self._validate_language_code(language)
|
|
43
|
+
if model_type not in ["whisperx", "faster-whisper"]:
|
|
44
|
+
raise ValidationError("Model type must be 'whisperx' or 'faster-whisper'")
|
|
45
|
+
|
|
46
|
+
# Prepare file upload
|
|
47
|
+
files = self._prepare_file_upload(audio_file, "files")
|
|
48
|
+
|
|
49
|
+
# Prepare form data
|
|
50
|
+
data = {
|
|
51
|
+
"model_type": model_type,
|
|
52
|
+
"enable_speaker_diarization": enable_speaker_diarization,
|
|
53
|
+
"enable_word_timestamps": enable_word_timestamps
|
|
54
|
+
}
|
|
55
|
+
if language:
|
|
56
|
+
data["language"] = language
|
|
57
|
+
|
|
58
|
+
# Make request
|
|
59
|
+
if self.async_mode:
|
|
60
|
+
return self._async_transcribe_audio(files, data, wait_for_completion, timeout)
|
|
61
|
+
else:
|
|
62
|
+
response = self.client.request(
|
|
63
|
+
"POST",
|
|
64
|
+
"/api/v1/transcription/transcribe-upload",
|
|
65
|
+
data=data,
|
|
66
|
+
files=files
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
job = Job.from_dict(response)
|
|
70
|
+
|
|
71
|
+
if wait_for_completion:
|
|
72
|
+
completed_job = self._wait_for_completion(job.id, timeout)
|
|
73
|
+
return TranscriptionResult.from_dict(completed_job.result or completed_job.__dict__)
|
|
74
|
+
|
|
75
|
+
return job
|
|
76
|
+
|
|
77
|
+
async def _async_transcribe_audio(
|
|
78
|
+
self,
|
|
79
|
+
files: Dict[str, Any],
|
|
80
|
+
data: Dict[str, Any],
|
|
81
|
+
wait_for_completion: bool,
|
|
82
|
+
timeout: int
|
|
83
|
+
) -> Union[Job, TranscriptionResult]:
|
|
84
|
+
"""Async version of transcribe_audio"""
|
|
85
|
+
response = await self.client.request(
|
|
86
|
+
"POST",
|
|
87
|
+
"/api/v1/transcription/transcribe-upload",
|
|
88
|
+
data=data,
|
|
89
|
+
files=files
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
job = Job.from_dict(response)
|
|
93
|
+
|
|
94
|
+
if wait_for_completion:
|
|
95
|
+
completed_job = await self._async_wait_for_completion(job.id, timeout)
|
|
96
|
+
return TranscriptionResult.from_dict(completed_job.result or completed_job.__dict__)
|
|
97
|
+
|
|
98
|
+
return job
|
|
99
|
+
|
|
100
|
+
def transcribe_url(
|
|
101
|
+
self,
|
|
102
|
+
url: str,
|
|
103
|
+
language: Optional[str] = None,
|
|
104
|
+
model_type: str = "whisperx",
|
|
105
|
+
enable_speaker_diarization: bool = False,
|
|
106
|
+
wait_for_completion: bool = False,
|
|
107
|
+
timeout: int = 600
|
|
108
|
+
) -> Union[Job, TranscriptionResult]:
|
|
109
|
+
"""
|
|
110
|
+
Transcribe audio from URL (YouTube, etc.)
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
url: URL to audio/video content
|
|
114
|
+
language: Language code
|
|
115
|
+
model_type: Model to use
|
|
116
|
+
enable_speaker_diarization: Enable speaker identification
|
|
117
|
+
wait_for_completion: Whether to wait for completion
|
|
118
|
+
timeout: Maximum time to wait
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
Job object or transcription result
|
|
122
|
+
"""
|
|
123
|
+
if language:
|
|
124
|
+
language = self._validate_language_code(language)
|
|
125
|
+
|
|
126
|
+
data = {
|
|
127
|
+
"source_urls": [url],
|
|
128
|
+
"model_type": model_type,
|
|
129
|
+
"enable_speaker_diarization": enable_speaker_diarization
|
|
130
|
+
}
|
|
131
|
+
if language:
|
|
132
|
+
data["language"] = language
|
|
133
|
+
|
|
134
|
+
if self.async_mode:
|
|
135
|
+
return self._async_transcribe_url(data, wait_for_completion, timeout)
|
|
136
|
+
else:
|
|
137
|
+
response = self.client.request("POST", "/api/v1/transcription/transcribe", data=data)
|
|
138
|
+
job = Job.from_dict(response)
|
|
139
|
+
|
|
140
|
+
if wait_for_completion:
|
|
141
|
+
completed_job = self._wait_for_completion(job.id, timeout)
|
|
142
|
+
return TranscriptionResult.from_dict(completed_job.result or completed_job.__dict__)
|
|
143
|
+
|
|
144
|
+
return job
|
|
145
|
+
|
|
146
|
+
async def _async_transcribe_url(
|
|
147
|
+
self,
|
|
148
|
+
data: Dict[str, Any],
|
|
149
|
+
wait_for_completion: bool,
|
|
150
|
+
timeout: int
|
|
151
|
+
) -> Union[Job, TranscriptionResult]:
|
|
152
|
+
"""Async version of transcribe_url"""
|
|
153
|
+
response = await self.client.request("POST", "/api/v1/transcription/transcribe", data=data)
|
|
154
|
+
job = Job.from_dict(response)
|
|
155
|
+
|
|
156
|
+
if wait_for_completion:
|
|
157
|
+
completed_job = await self._async_wait_for_completion(job.id, timeout)
|
|
158
|
+
return TranscriptionResult.from_dict(completed_job.result or completed_job.__dict__)
|
|
159
|
+
|
|
160
|
+
return job
|
|
161
|
+
|
|
162
|
+
def get_transcription_job(self, job_id: int) -> TranscriptionResult:
|
|
163
|
+
"""Get transcription job details"""
|
|
164
|
+
if self.async_mode:
|
|
165
|
+
return self._async_get_transcription_job(job_id)
|
|
166
|
+
else:
|
|
167
|
+
response = self.client.request("GET", f"/api/v1/transcription/jobs/{job_id}")
|
|
168
|
+
return TranscriptionResult.from_dict(response)
|
|
169
|
+
|
|
170
|
+
async def _async_get_transcription_job(self, job_id: int) -> TranscriptionResult:
|
|
171
|
+
"""Async version of get_transcription_job"""
|
|
172
|
+
response = await self.client.request("GET", f"/api/v1/transcription/jobs/{job_id}")
|
|
173
|
+
return TranscriptionResult.from_dict(response)
|
|
174
|
+
|
|
175
|
+
def download_transcript(
|
|
176
|
+
self,
|
|
177
|
+
job_id: int,
|
|
178
|
+
format: str = "json"
|
|
179
|
+
) -> str:
|
|
180
|
+
"""
|
|
181
|
+
Download transcript in specified format
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
job_id: Transcription job ID
|
|
185
|
+
format: Output format ('json', 'txt', 'srt', 'vtt', 'pdf')
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
Transcript content
|
|
189
|
+
"""
|
|
190
|
+
if format not in ["json", "txt", "srt", "vtt", "pdf", "docx", "html"]:
|
|
191
|
+
raise ValidationError("Format must be one of: json, txt, srt, vtt, pdf, docx, html")
|
|
192
|
+
|
|
193
|
+
params = {"format": format}
|
|
194
|
+
|
|
195
|
+
if self.async_mode:
|
|
196
|
+
return self._async_download_transcript(job_id, params)
|
|
197
|
+
else:
|
|
198
|
+
response = self.client.request(
|
|
199
|
+
"GET",
|
|
200
|
+
f"/api/v1/transcription/jobs/{job_id}/transcript",
|
|
201
|
+
params=params
|
|
202
|
+
)
|
|
203
|
+
return response
|
|
204
|
+
|
|
205
|
+
async def _async_download_transcript(self, job_id: int, params: Dict[str, str]) -> str:
|
|
206
|
+
"""Async version of download_transcript"""
|
|
207
|
+
response = await self.client.request(
|
|
208
|
+
"GET",
|
|
209
|
+
f"/api/v1/transcription/jobs/{job_id}/transcript",
|
|
210
|
+
params=params
|
|
211
|
+
)
|
|
212
|
+
return response
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Translation Service - Audio/video translation operations
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import Optional, Union
|
|
6
|
+
from .base import BaseService
|
|
7
|
+
from ..models import Job, TranslationResult
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TranslationService(BaseService):
|
|
11
|
+
"""Service for audio and video translation operations"""
|
|
12
|
+
|
|
13
|
+
def translate_audio(
|
|
14
|
+
self,
|
|
15
|
+
audio_file: str,
|
|
16
|
+
target_language: str,
|
|
17
|
+
source_language: Optional[str] = None,
|
|
18
|
+
wait_for_completion: bool = False,
|
|
19
|
+
timeout: int = 900
|
|
20
|
+
) -> Union[Job, TranslationResult]:
|
|
21
|
+
"""
|
|
22
|
+
Translate audio to another language
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
audio_file: Path to audio file
|
|
26
|
+
target_language: Target language code
|
|
27
|
+
source_language: Source language (auto-detect if None)
|
|
28
|
+
wait_for_completion: Whether to wait for completion
|
|
29
|
+
timeout: Maximum time to wait
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
Job object or translation result
|
|
33
|
+
"""
|
|
34
|
+
target_language = self._validate_language_code(target_language)
|
|
35
|
+
if source_language:
|
|
36
|
+
source_language = self._validate_language_code(source_language)
|
|
37
|
+
|
|
38
|
+
files = self._prepare_file_upload(audio_file, "file")
|
|
39
|
+
data = {"target_language": target_language}
|
|
40
|
+
if source_language:
|
|
41
|
+
data["source_language"] = source_language
|
|
42
|
+
|
|
43
|
+
if self.async_mode:
|
|
44
|
+
return self._async_translate_audio(files, data, wait_for_completion, timeout)
|
|
45
|
+
else:
|
|
46
|
+
response = self.client.request(
|
|
47
|
+
"POST", "/api/v1/translation/translate", data=data, files=files
|
|
48
|
+
)
|
|
49
|
+
job = Job.from_dict(response)
|
|
50
|
+
|
|
51
|
+
if wait_for_completion:
|
|
52
|
+
completed_job = self._wait_for_completion(job.id, timeout)
|
|
53
|
+
return TranslationResult.from_dict(completed_job.result or completed_job.__dict__)
|
|
54
|
+
|
|
55
|
+
return job
|
|
56
|
+
|
|
57
|
+
async def _async_translate_audio(self, files, data, wait_for_completion, timeout):
|
|
58
|
+
"""Async version of translate_audio"""
|
|
59
|
+
response = await self.client.request(
|
|
60
|
+
"POST", "/api/v1/translation/translate", data=data, files=files
|
|
61
|
+
)
|
|
62
|
+
job = Job.from_dict(response)
|
|
63
|
+
|
|
64
|
+
if wait_for_completion:
|
|
65
|
+
completed_job = await self._async_wait_for_completion(job.id, timeout)
|
|
66
|
+
return TranslationResult.from_dict(completed_job.result or completed_job.__dict__)
|
|
67
|
+
|
|
68
|
+
return job
|
|
69
|
+
|
|
70
|
+
def get_translation_job(self, job_id: int) -> TranslationResult:
|
|
71
|
+
"""Get translation job details"""
|
|
72
|
+
if self.async_mode:
|
|
73
|
+
return self._async_get_translation_job(job_id)
|
|
74
|
+
else:
|
|
75
|
+
response = self.client.request("GET", f"/api/v1/translation/translations/{job_id}")
|
|
76
|
+
return TranslationResult.from_dict(response)
|
|
77
|
+
|
|
78
|
+
async def _async_get_translation_job(self, job_id: int) -> TranslationResult:
|
|
79
|
+
"""Async version of get_translation_job"""
|
|
80
|
+
response = await self.client.request("GET", f"/api/v1/translation/translations/{job_id}")
|
|
81
|
+
return TranslationResult.from_dict(response)
|
|
@@ -0,0 +1,376 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Voice Service - Voice cloning and TTS operations
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import List, Optional, Dict, Any, Union
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from .base import BaseService
|
|
9
|
+
from ..models import Job, VoiceProfile, JobStatus
|
|
10
|
+
from ..exceptions import ValidationError
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class VoiceService(BaseService):
|
|
14
|
+
"""Service for voice cloning and text-to-speech operations"""
|
|
15
|
+
|
|
16
|
+
def clone_voice(
|
|
17
|
+
self,
|
|
18
|
+
voice_file: str,
|
|
19
|
+
text: str,
|
|
20
|
+
language: Optional[str] = None,
|
|
21
|
+
speed: float = 1.0,
|
|
22
|
+
wait_for_completion: bool = False,
|
|
23
|
+
timeout: int = 300
|
|
24
|
+
) -> Union[Job, Dict[str, Any]]:
|
|
25
|
+
"""
|
|
26
|
+
Clone a voice from an audio file
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
voice_file: Path to audio file containing voice to clone
|
|
30
|
+
text: Text to generate with the cloned voice
|
|
31
|
+
language: Target language code (e.g., 'en', 'es')
|
|
32
|
+
speed: Speech speed (0.5 to 2.0)
|
|
33
|
+
wait_for_completion: Whether to wait for job completion
|
|
34
|
+
timeout: Maximum time to wait if wait_for_completion=True
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
Job object if wait_for_completion=False, otherwise job result
|
|
38
|
+
"""
|
|
39
|
+
# Validate inputs
|
|
40
|
+
text = self._validate_text_input(text)
|
|
41
|
+
if language:
|
|
42
|
+
language = self._validate_language_code(language)
|
|
43
|
+
if not 0.5 <= speed <= 2.0:
|
|
44
|
+
raise ValidationError("Speed must be between 0.5 and 2.0")
|
|
45
|
+
|
|
46
|
+
# Prepare file upload
|
|
47
|
+
files = self._prepare_file_upload(voice_file, "file")
|
|
48
|
+
|
|
49
|
+
# Prepare form data
|
|
50
|
+
data = {
|
|
51
|
+
"input_text": text,
|
|
52
|
+
"speed": speed
|
|
53
|
+
}
|
|
54
|
+
if language:
|
|
55
|
+
data["target_language"] = language
|
|
56
|
+
|
|
57
|
+
# Make request
|
|
58
|
+
if self.async_mode:
|
|
59
|
+
return self._async_clone_voice(files, data, wait_for_completion, timeout)
|
|
60
|
+
else:
|
|
61
|
+
response = self.client.request(
|
|
62
|
+
"POST",
|
|
63
|
+
"/api/v1/voice/voice-clone",
|
|
64
|
+
data=data,
|
|
65
|
+
files=files
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
job = Job.from_dict(response)
|
|
69
|
+
|
|
70
|
+
if wait_for_completion:
|
|
71
|
+
job = self._wait_for_completion(job.id, timeout)
|
|
72
|
+
return job.result if job.result else job
|
|
73
|
+
|
|
74
|
+
return job
|
|
75
|
+
|
|
76
|
+
async def _async_clone_voice(
|
|
77
|
+
self,
|
|
78
|
+
files: Dict[str, Any],
|
|
79
|
+
data: Dict[str, Any],
|
|
80
|
+
wait_for_completion: bool,
|
|
81
|
+
timeout: int
|
|
82
|
+
) -> Union[Job, Dict[str, Any]]:
|
|
83
|
+
"""Async version of clone_voice"""
|
|
84
|
+
response = await self.client.request(
|
|
85
|
+
"POST",
|
|
86
|
+
"/api/v1/voice/voice-clone",
|
|
87
|
+
data=data,
|
|
88
|
+
files=files
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
job = Job.from_dict(response)
|
|
92
|
+
|
|
93
|
+
if wait_for_completion:
|
|
94
|
+
job = await self._async_wait_for_completion(job.id, timeout)
|
|
95
|
+
return job.result if job.result else job
|
|
96
|
+
|
|
97
|
+
return job
|
|
98
|
+
|
|
99
|
+
def create_voice_profile(
|
|
100
|
+
self,
|
|
101
|
+
name: str,
|
|
102
|
+
voice_file: str,
|
|
103
|
+
description: Optional[str] = None,
|
|
104
|
+
is_public: bool = False,
|
|
105
|
+
wait_for_completion: bool = False,
|
|
106
|
+
timeout: int = 600
|
|
107
|
+
) -> Union[Job, VoiceProfile]:
|
|
108
|
+
"""
|
|
109
|
+
Create a reusable voice profile
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
name: Name for the voice profile
|
|
113
|
+
voice_file: Path to audio file containing voice sample
|
|
114
|
+
description: Optional description
|
|
115
|
+
is_public: Whether to make the voice profile public
|
|
116
|
+
wait_for_completion: Whether to wait for processing completion
|
|
117
|
+
timeout: Maximum time to wait if wait_for_completion=True
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
Job object if wait_for_completion=False, otherwise VoiceProfile
|
|
121
|
+
"""
|
|
122
|
+
# Validate inputs
|
|
123
|
+
if not name or len(name.strip()) < 1:
|
|
124
|
+
raise ValidationError("Voice profile name cannot be empty")
|
|
125
|
+
if len(name) > 100:
|
|
126
|
+
raise ValidationError("Voice profile name too long (max 100 characters)")
|
|
127
|
+
|
|
128
|
+
# Prepare file upload
|
|
129
|
+
files = self._prepare_file_upload(voice_file, "file")
|
|
130
|
+
|
|
131
|
+
# Prepare form data
|
|
132
|
+
data = {
|
|
133
|
+
"name": name.strip(),
|
|
134
|
+
"is_public": is_public
|
|
135
|
+
}
|
|
136
|
+
if description:
|
|
137
|
+
data["description"] = description.strip()
|
|
138
|
+
|
|
139
|
+
# Make request
|
|
140
|
+
if self.async_mode:
|
|
141
|
+
return self._async_create_voice_profile(files, data, wait_for_completion, timeout)
|
|
142
|
+
else:
|
|
143
|
+
response = self.client.request(
|
|
144
|
+
"POST",
|
|
145
|
+
"/api/v1/voice/voice-profiles",
|
|
146
|
+
data=data,
|
|
147
|
+
files=files
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
if wait_for_completion:
|
|
151
|
+
voice_id = response["id"]
|
|
152
|
+
# Poll for completion
|
|
153
|
+
import time
|
|
154
|
+
start_time = time.time()
|
|
155
|
+
while time.time() - start_time < timeout:
|
|
156
|
+
voice_data = self.client.request("GET", f"/api/v1/voice/voice-profiles/{voice_id}")
|
|
157
|
+
if voice_data["status"] == "completed":
|
|
158
|
+
return VoiceProfile.from_dict(voice_data)
|
|
159
|
+
elif voice_data["status"] == "failed":
|
|
160
|
+
raise ValidationError(f"Voice profile creation failed: {voice_data.get('error_message')}")
|
|
161
|
+
time.sleep(5)
|
|
162
|
+
raise ValidationError("Voice profile creation timed out")
|
|
163
|
+
else:
|
|
164
|
+
return VoiceProfile.from_dict(response)
|
|
165
|
+
|
|
166
|
+
async def _async_create_voice_profile(
|
|
167
|
+
self,
|
|
168
|
+
files: Dict[str, Any],
|
|
169
|
+
data: Dict[str, Any],
|
|
170
|
+
wait_for_completion: bool,
|
|
171
|
+
timeout: int
|
|
172
|
+
) -> Union[Job, VoiceProfile]:
|
|
173
|
+
"""Async version of create_voice_profile"""
|
|
174
|
+
import asyncio
|
|
175
|
+
|
|
176
|
+
response = await self.client.request(
|
|
177
|
+
"POST",
|
|
178
|
+
"/api/v1/voice/voice-profiles",
|
|
179
|
+
data=data,
|
|
180
|
+
files=files
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
if wait_for_completion:
|
|
184
|
+
voice_id = response["id"]
|
|
185
|
+
# Poll for completion
|
|
186
|
+
start_time = time.time()
|
|
187
|
+
while time.time() - start_time < timeout:
|
|
188
|
+
voice_data = await self.client.request("GET", f"/api/v1/voice/voice-profiles/{voice_id}")
|
|
189
|
+
if voice_data["status"] == "completed":
|
|
190
|
+
return VoiceProfile.from_dict(voice_data)
|
|
191
|
+
elif voice_data["status"] == "failed":
|
|
192
|
+
raise ValidationError(f"Voice profile creation failed: {voice_data.get('error_message')}")
|
|
193
|
+
await asyncio.sleep(5)
|
|
194
|
+
raise ValidationError("Voice profile creation timed out")
|
|
195
|
+
else:
|
|
196
|
+
return VoiceProfile.from_dict(response)
|
|
197
|
+
|
|
198
|
+
def generate_speech(
|
|
199
|
+
self,
|
|
200
|
+
voice_id: Union[int, str],
|
|
201
|
+
text: str,
|
|
202
|
+
language: Optional[str] = None,
|
|
203
|
+
speed: float = 1.0,
|
|
204
|
+
audio_format: str = "mp3",
|
|
205
|
+
wait_for_completion: bool = False,
|
|
206
|
+
timeout: int = 300
|
|
207
|
+
) -> Union[Job, Dict[str, Any]]:
|
|
208
|
+
"""
|
|
209
|
+
Generate speech using an existing voice profile
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
voice_id: ID or UUID of the voice profile
|
|
213
|
+
text: Text to generate speech for
|
|
214
|
+
language: Target language code
|
|
215
|
+
speed: Speech speed (0.5 to 2.0)
|
|
216
|
+
audio_format: Output audio format (mp3, wav)
|
|
217
|
+
wait_for_completion: Whether to wait for completion
|
|
218
|
+
timeout: Maximum time to wait
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
Job object or generation result
|
|
222
|
+
"""
|
|
223
|
+
# Validate inputs
|
|
224
|
+
text = self._validate_text_input(text)
|
|
225
|
+
if language:
|
|
226
|
+
language = self._validate_language_code(language)
|
|
227
|
+
if not 0.5 <= speed <= 2.0:
|
|
228
|
+
raise ValidationError("Speed must be between 0.5 and 2.0")
|
|
229
|
+
if audio_format not in ["mp3", "wav"]:
|
|
230
|
+
raise ValidationError("Audio format must be 'mp3' or 'wav'")
|
|
231
|
+
|
|
232
|
+
# Prepare form data
|
|
233
|
+
data = {
|
|
234
|
+
"input_text": text,
|
|
235
|
+
"speed": speed,
|
|
236
|
+
"audio_format": audio_format
|
|
237
|
+
}
|
|
238
|
+
if language:
|
|
239
|
+
data["language"] = language
|
|
240
|
+
|
|
241
|
+
# Make request
|
|
242
|
+
endpoint = f"/api/v1/voice/voices/{voice_id}/generate"
|
|
243
|
+
|
|
244
|
+
if self.async_mode:
|
|
245
|
+
return self._async_generate_speech(endpoint, data, wait_for_completion, timeout)
|
|
246
|
+
else:
|
|
247
|
+
response = self.client.request("POST", endpoint, data=data)
|
|
248
|
+
|
|
249
|
+
if "job_id" in response:
|
|
250
|
+
job = Job.from_dict(response)
|
|
251
|
+
if wait_for_completion:
|
|
252
|
+
job = self._wait_for_completion(job.id, timeout)
|
|
253
|
+
return job.result if job.result else job
|
|
254
|
+
return job
|
|
255
|
+
else:
|
|
256
|
+
# Direct response with audio URL
|
|
257
|
+
return response
|
|
258
|
+
|
|
259
|
+
async def _async_generate_speech(
|
|
260
|
+
self,
|
|
261
|
+
endpoint: str,
|
|
262
|
+
data: Dict[str, Any],
|
|
263
|
+
wait_for_completion: bool,
|
|
264
|
+
timeout: int
|
|
265
|
+
) -> Union[Job, Dict[str, Any]]:
|
|
266
|
+
"""Async version of generate_speech"""
|
|
267
|
+
response = await self.client.request("POST", endpoint, data=data)
|
|
268
|
+
|
|
269
|
+
if "job_id" in response:
|
|
270
|
+
job = Job.from_dict(response)
|
|
271
|
+
if wait_for_completion:
|
|
272
|
+
job = await self._async_wait_for_completion(job.id, timeout)
|
|
273
|
+
return job.result if job.result else job
|
|
274
|
+
return job
|
|
275
|
+
else:
|
|
276
|
+
return response
|
|
277
|
+
|
|
278
|
+
def list_voice_profiles(
|
|
279
|
+
self,
|
|
280
|
+
voice_type: Optional[str] = None,
|
|
281
|
+
is_public: Optional[bool] = None,
|
|
282
|
+
include_public: bool = True,
|
|
283
|
+
limit: int = 50
|
|
284
|
+
) -> List[VoiceProfile]:
|
|
285
|
+
"""
|
|
286
|
+
List available voice profiles
|
|
287
|
+
|
|
288
|
+
Args:
|
|
289
|
+
voice_type: Filter by voice type ('custom', 'standard')
|
|
290
|
+
is_public: Filter by public status
|
|
291
|
+
include_public: Include public voices
|
|
292
|
+
limit: Maximum number of results
|
|
293
|
+
|
|
294
|
+
Returns:
|
|
295
|
+
List of voice profiles
|
|
296
|
+
"""
|
|
297
|
+
params = {
|
|
298
|
+
"limit": limit,
|
|
299
|
+
"include_public": include_public
|
|
300
|
+
}
|
|
301
|
+
if voice_type:
|
|
302
|
+
params["voice_type"] = voice_type
|
|
303
|
+
if is_public is not None:
|
|
304
|
+
params["is_public"] = is_public
|
|
305
|
+
|
|
306
|
+
if self.async_mode:
|
|
307
|
+
return self._async_list_voice_profiles(params)
|
|
308
|
+
else:
|
|
309
|
+
response = self.client.request("GET", "/api/v1/voice/voice-profiles", params=params)
|
|
310
|
+
return [VoiceProfile.from_dict(voice_data) for voice_data in response]
|
|
311
|
+
|
|
312
|
+
async def _async_list_voice_profiles(self, params: Dict[str, Any]) -> List[VoiceProfile]:
|
|
313
|
+
"""Async version of list_voice_profiles"""
|
|
314
|
+
response = await self.client.request("GET", "/api/v1/voice/voice-profiles", params=params)
|
|
315
|
+
return [VoiceProfile.from_dict(voice_data) for voice_data in response]
|
|
316
|
+
|
|
317
|
+
def get_voice_profile(self, voice_id: Union[int, str]) -> VoiceProfile:
|
|
318
|
+
"""
|
|
319
|
+
Get details of a specific voice profile
|
|
320
|
+
|
|
321
|
+
Args:
|
|
322
|
+
voice_id: ID or UUID of the voice profile
|
|
323
|
+
|
|
324
|
+
Returns:
|
|
325
|
+
Voice profile details
|
|
326
|
+
"""
|
|
327
|
+
if self.async_mode:
|
|
328
|
+
return self._async_get_voice_profile(voice_id)
|
|
329
|
+
else:
|
|
330
|
+
response = self.client.request("GET", f"/api/v1/voice/voice-profiles/{voice_id}")
|
|
331
|
+
return VoiceProfile.from_dict(response)
|
|
332
|
+
|
|
333
|
+
async def _async_get_voice_profile(self, voice_id: Union[int, str]) -> VoiceProfile:
|
|
334
|
+
"""Async version of get_voice_profile"""
|
|
335
|
+
response = await self.client.request("GET", f"/api/v1/voice/voice-profiles/{voice_id}")
|
|
336
|
+
return VoiceProfile.from_dict(response)
|
|
337
|
+
|
|
338
|
+
def delete_voice_profile(self, voice_id: Union[int, str]) -> Dict[str, str]:
|
|
339
|
+
"""
|
|
340
|
+
Delete a voice profile
|
|
341
|
+
|
|
342
|
+
Args:
|
|
343
|
+
voice_id: ID or UUID of the voice profile
|
|
344
|
+
|
|
345
|
+
Returns:
|
|
346
|
+
Deletion confirmation
|
|
347
|
+
"""
|
|
348
|
+
if self.async_mode:
|
|
349
|
+
return self._async_delete_voice_profile(voice_id)
|
|
350
|
+
else:
|
|
351
|
+
return self.client.request("DELETE", f"/api/v1/voice/voices/{voice_id}")
|
|
352
|
+
|
|
353
|
+
async def _async_delete_voice_profile(self, voice_id: Union[int, str]) -> Dict[str, str]:
|
|
354
|
+
"""Async version of delete_voice_profile"""
|
|
355
|
+
return await self.client.request("DELETE", f"/api/v1/voice/voices/{voice_id}")
|
|
356
|
+
|
|
357
|
+
def get_job_status(self, job_id: int) -> Job:
|
|
358
|
+
"""
|
|
359
|
+
Get status of a voice processing job
|
|
360
|
+
|
|
361
|
+
Args:
|
|
362
|
+
job_id: ID of the job
|
|
363
|
+
|
|
364
|
+
Returns:
|
|
365
|
+
Job status and details
|
|
366
|
+
"""
|
|
367
|
+
if self.async_mode:
|
|
368
|
+
return self._async_get_job_status(job_id)
|
|
369
|
+
else:
|
|
370
|
+
response = self.client.request("GET", f"/api/v1/voice/clone/{job_id}/status")
|
|
371
|
+
return Job.from_dict(response)
|
|
372
|
+
|
|
373
|
+
async def _async_get_job_status(self, job_id: int) -> Job:
|
|
374
|
+
"""Async version of get_job_status"""
|
|
375
|
+
response = await self.client.request("GET", f"/api/v1/voice/clone/{job_id}/status")
|
|
376
|
+
return Job.from_dict(response)
|