audiopod 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- audiopod/__init__.py +1 -1
- audiopod/models.py +21 -6
- audiopod/services/translation.py +126 -11
- audiopod/services/voice.py +169 -75
- {audiopod-1.1.0.dist-info → audiopod-1.2.0.dist-info}/METADATA +72 -14
- {audiopod-1.1.0.dist-info → audiopod-1.2.0.dist-info}/RECORD +10 -10
- {audiopod-1.1.0.dist-info → audiopod-1.2.0.dist-info}/WHEEL +0 -0
- {audiopod-1.1.0.dist-info → audiopod-1.2.0.dist-info}/entry_points.txt +0 -0
- {audiopod-1.1.0.dist-info → audiopod-1.2.0.dist-info}/licenses/LICENSE +0 -0
- {audiopod-1.1.0.dist-info → audiopod-1.2.0.dist-info}/top_level.txt +0 -0
audiopod/__init__.py
CHANGED
audiopod/models.py
CHANGED
|
@@ -151,13 +151,18 @@ class MusicGenerationResult:
|
|
|
151
151
|
|
|
152
152
|
@dataclass
|
|
153
153
|
class TranslationResult:
|
|
154
|
-
"""
|
|
154
|
+
"""Speech translation job result"""
|
|
155
155
|
job: Job
|
|
156
156
|
source_language: Optional[str] = None
|
|
157
157
|
target_language: Optional[str] = None
|
|
158
|
-
|
|
159
|
-
|
|
158
|
+
display_name: Optional[str] = None
|
|
159
|
+
audio_output_path: Optional[str] = None
|
|
160
|
+
video_output_path: Optional[str] = None
|
|
160
161
|
transcript_path: Optional[str] = None
|
|
162
|
+
translated_audio_url: Optional[str] = None
|
|
163
|
+
video_output_url: Optional[str] = None
|
|
164
|
+
transcript_urls: Optional[Dict[str, str]] = None
|
|
165
|
+
is_video: bool = False
|
|
161
166
|
|
|
162
167
|
@classmethod
|
|
163
168
|
def from_dict(cls, data: Dict[str, Any]) -> 'TranslationResult':
|
|
@@ -166,10 +171,20 @@ class TranslationResult:
|
|
|
166
171
|
job=Job.from_dict(data),
|
|
167
172
|
source_language=data.get('source_language'),
|
|
168
173
|
target_language=data.get('target_language'),
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
174
|
+
display_name=data.get('display_name'),
|
|
175
|
+
audio_output_path=data.get('audio_output_path'),
|
|
176
|
+
video_output_path=data.get('video_output_path'),
|
|
177
|
+
transcript_path=data.get('transcript_path'),
|
|
178
|
+
translated_audio_url=data.get('translated_audio_url'),
|
|
179
|
+
video_output_url=data.get('video_output_url'),
|
|
180
|
+
transcript_urls=data.get('transcript_urls'),
|
|
181
|
+
is_video=data.get('is_video', False)
|
|
172
182
|
)
|
|
183
|
+
|
|
184
|
+
@property
|
|
185
|
+
def audio_output_url(self) -> Optional[str]:
|
|
186
|
+
"""Backward compatibility property - returns translated_audio_url"""
|
|
187
|
+
return self.translated_audio_url
|
|
173
188
|
|
|
174
189
|
|
|
175
190
|
@dataclass
|
audiopod/services/translation.py
CHANGED
|
@@ -1,42 +1,59 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Translation Service -
|
|
2
|
+
Translation Service - Speech-to-speech translation operations
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
from typing import Optional, Union
|
|
6
6
|
from .base import BaseService
|
|
7
7
|
from ..models import Job, TranslationResult
|
|
8
|
+
from ..exceptions import ValidationError
|
|
8
9
|
|
|
9
10
|
|
|
10
11
|
class TranslationService(BaseService):
|
|
11
|
-
"""Service for
|
|
12
|
+
"""Service for speech-to-speech translation operations"""
|
|
12
13
|
|
|
13
14
|
def translate_audio(
|
|
14
15
|
self,
|
|
15
|
-
audio_file: str,
|
|
16
|
-
|
|
16
|
+
audio_file: Optional[str] = None,
|
|
17
|
+
url: Optional[str] = None,
|
|
18
|
+
target_language: str = "en",
|
|
17
19
|
source_language: Optional[str] = None,
|
|
18
20
|
wait_for_completion: bool = False,
|
|
19
21
|
timeout: int = 900
|
|
20
22
|
) -> Union[Job, TranslationResult]:
|
|
21
23
|
"""
|
|
22
|
-
Translate audio to another language
|
|
24
|
+
Translate speech from audio/video file to another language while preserving voice characteristics
|
|
23
25
|
|
|
24
26
|
Args:
|
|
25
|
-
audio_file: Path to audio file
|
|
26
|
-
|
|
27
|
-
|
|
27
|
+
audio_file: Path to audio/video file (required if no URL)
|
|
28
|
+
url: Direct media URL (required if no file)
|
|
29
|
+
target_language: Target language code (ISO 639-1, e.g., 'es' for Spanish)
|
|
30
|
+
source_language: Source language code (auto-detect if None)
|
|
28
31
|
wait_for_completion: Whether to wait for completion
|
|
29
32
|
timeout: Maximum time to wait
|
|
30
33
|
|
|
31
34
|
Returns:
|
|
32
35
|
Job object or translation result
|
|
33
36
|
"""
|
|
37
|
+
if not audio_file and not url:
|
|
38
|
+
raise ValidationError("Either audio_file or url must be provided")
|
|
39
|
+
|
|
40
|
+
if audio_file and url:
|
|
41
|
+
raise ValidationError("Provide either audio_file or url, not both")
|
|
42
|
+
|
|
34
43
|
target_language = self._validate_language_code(target_language)
|
|
35
44
|
if source_language:
|
|
36
45
|
source_language = self._validate_language_code(source_language)
|
|
37
46
|
|
|
38
|
-
|
|
47
|
+
# Prepare request data
|
|
48
|
+
files = {}
|
|
39
49
|
data = {"target_language": target_language}
|
|
50
|
+
|
|
51
|
+
if audio_file:
|
|
52
|
+
files = self._prepare_file_upload(audio_file, "file")
|
|
53
|
+
|
|
54
|
+
if url:
|
|
55
|
+
data["url"] = url
|
|
56
|
+
|
|
40
57
|
if source_language:
|
|
41
58
|
data["source_language"] = source_language
|
|
42
59
|
|
|
@@ -44,7 +61,10 @@ class TranslationService(BaseService):
|
|
|
44
61
|
return self._async_translate_audio(files, data, wait_for_completion, timeout)
|
|
45
62
|
else:
|
|
46
63
|
response = self.client.request(
|
|
47
|
-
"POST",
|
|
64
|
+
"POST",
|
|
65
|
+
"/api/v1/translation/translate/speech", # FIXED: Use correct speech-to-speech endpoint
|
|
66
|
+
data=data,
|
|
67
|
+
files=files if files else None
|
|
48
68
|
)
|
|
49
69
|
job = Job.from_dict(response)
|
|
50
70
|
|
|
@@ -53,11 +73,35 @@ class TranslationService(BaseService):
|
|
|
53
73
|
return TranslationResult.from_dict(completed_job.result or completed_job.__dict__)
|
|
54
74
|
|
|
55
75
|
return job
|
|
76
|
+
|
|
77
|
+
def translate_speech(
|
|
78
|
+
self,
|
|
79
|
+
audio_file: Optional[str] = None,
|
|
80
|
+
url: Optional[str] = None,
|
|
81
|
+
target_language: str = "en",
|
|
82
|
+
source_language: Optional[str] = None,
|
|
83
|
+
wait_for_completion: bool = False,
|
|
84
|
+
timeout: int = 900
|
|
85
|
+
) -> Union[Job, TranslationResult]:
|
|
86
|
+
"""
|
|
87
|
+
Alias for translate_audio - more descriptive method name for speech translation
|
|
88
|
+
"""
|
|
89
|
+
return self.translate_audio(
|
|
90
|
+
audio_file=audio_file,
|
|
91
|
+
url=url,
|
|
92
|
+
target_language=target_language,
|
|
93
|
+
source_language=source_language,
|
|
94
|
+
wait_for_completion=wait_for_completion,
|
|
95
|
+
timeout=timeout
|
|
96
|
+
)
|
|
56
97
|
|
|
57
98
|
async def _async_translate_audio(self, files, data, wait_for_completion, timeout):
|
|
58
99
|
"""Async version of translate_audio"""
|
|
59
100
|
response = await self.client.request(
|
|
60
|
-
"POST",
|
|
101
|
+
"POST",
|
|
102
|
+
"/api/v1/translation/translate/speech", # FIXED: Use correct speech-to-speech endpoint
|
|
103
|
+
data=data,
|
|
104
|
+
files=files if files else None
|
|
61
105
|
)
|
|
62
106
|
job = Job.from_dict(response)
|
|
63
107
|
|
|
@@ -79,3 +123,74 @@ class TranslationService(BaseService):
|
|
|
79
123
|
"""Async version of get_translation_job"""
|
|
80
124
|
response = await self.client.request("GET", f"/api/v1/translation/translations/{job_id}")
|
|
81
125
|
return TranslationResult.from_dict(response)
|
|
126
|
+
|
|
127
|
+
def list_translation_jobs(
|
|
128
|
+
self,
|
|
129
|
+
skip: int = 0,
|
|
130
|
+
limit: int = 50
|
|
131
|
+
) -> list:
|
|
132
|
+
"""
|
|
133
|
+
List translation jobs
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
skip: Number of jobs to skip (pagination offset)
|
|
137
|
+
limit: Maximum number of jobs to return (max 100)
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
List of translation jobs
|
|
141
|
+
"""
|
|
142
|
+
params = {
|
|
143
|
+
"skip": skip,
|
|
144
|
+
"limit": min(limit, 100) # API max is 100
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
if self.async_mode:
|
|
148
|
+
return self._async_list_translation_jobs(params)
|
|
149
|
+
else:
|
|
150
|
+
response = self.client.request("GET", "/api/v1/translation/translations", params=params)
|
|
151
|
+
return [TranslationResult.from_dict(job_data) for job_data in response]
|
|
152
|
+
|
|
153
|
+
async def _async_list_translation_jobs(self, params: dict) -> list:
|
|
154
|
+
"""Async version of list_translation_jobs"""
|
|
155
|
+
response = await self.client.request("GET", "/api/v1/translation/translations", params=params)
|
|
156
|
+
return [TranslationResult.from_dict(job_data) for job_data in response]
|
|
157
|
+
|
|
158
|
+
def retry_translation(self, job_id: int) -> Job:
|
|
159
|
+
"""
|
|
160
|
+
Retry a failed translation job
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
job_id: ID of the failed translation job to retry
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
New job object for the retry attempt
|
|
167
|
+
"""
|
|
168
|
+
if self.async_mode:
|
|
169
|
+
return self._async_retry_translation(job_id)
|
|
170
|
+
else:
|
|
171
|
+
response = self.client.request("POST", f"/api/v1/translation/translations/{job_id}/retry")
|
|
172
|
+
return Job.from_dict(response)
|
|
173
|
+
|
|
174
|
+
async def _async_retry_translation(self, job_id: int) -> Job:
|
|
175
|
+
"""Async version of retry_translation"""
|
|
176
|
+
response = await self.client.request("POST", f"/api/v1/translation/translations/{job_id}/retry")
|
|
177
|
+
return Job.from_dict(response)
|
|
178
|
+
|
|
179
|
+
def delete_translation_job(self, job_id: int) -> dict:
|
|
180
|
+
"""
|
|
181
|
+
Delete a translation job
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
job_id: ID of the translation job to delete
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
Deletion confirmation
|
|
188
|
+
"""
|
|
189
|
+
if self.async_mode:
|
|
190
|
+
return self._async_delete_translation_job(job_id)
|
|
191
|
+
else:
|
|
192
|
+
return self.client.request("DELETE", f"/api/v1/translation/translations/{job_id}")
|
|
193
|
+
|
|
194
|
+
async def _async_delete_translation_job(self, job_id: int) -> dict:
|
|
195
|
+
"""Async version of delete_translation_job"""
|
|
196
|
+
return await self.client.request("DELETE", f"/api/v1/translation/translations/{job_id}")
|
audiopod/services/voice.py
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
Voice Service - Voice cloning and TTS operations
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
+
import time
|
|
5
6
|
from typing import List, Optional, Dict, Any, Union
|
|
6
7
|
from pathlib import Path
|
|
7
8
|
|
|
@@ -13,54 +14,103 @@ from ..exceptions import ValidationError
|
|
|
13
14
|
class VoiceService(BaseService):
|
|
14
15
|
"""Service for voice cloning and text-to-speech operations"""
|
|
15
16
|
|
|
16
|
-
def
|
|
17
|
+
def generate_voice(
|
|
17
18
|
self,
|
|
18
|
-
voice_file: str,
|
|
19
19
|
text: str,
|
|
20
|
+
voice_file: Optional[str] = None,
|
|
21
|
+
voice_id: Optional[Union[int, str]] = None,
|
|
20
22
|
language: Optional[str] = None,
|
|
21
23
|
speed: float = 1.0,
|
|
24
|
+
audio_format: str = "mp3",
|
|
25
|
+
generation_params: Optional[Dict[str, Any]] = None,
|
|
22
26
|
wait_for_completion: bool = False,
|
|
23
27
|
timeout: int = 300
|
|
24
28
|
) -> Union[Job, Dict[str, Any]]:
|
|
25
29
|
"""
|
|
26
|
-
|
|
30
|
+
Generate speech using either a voice file (for cloning) or existing voice profile
|
|
31
|
+
|
|
32
|
+
This unified method handles both voice cloning and text-to-speech generation:
|
|
33
|
+
- For voice cloning: Provide voice_file parameter
|
|
34
|
+
- For TTS with existing voice: Provide voice_id parameter
|
|
27
35
|
|
|
28
36
|
Args:
|
|
29
|
-
|
|
30
|
-
|
|
37
|
+
text: Text to generate speech for
|
|
38
|
+
voice_file: Path to audio file for voice cloning (mutually exclusive with voice_id)
|
|
39
|
+
voice_id: ID/UUID of existing voice profile (mutually exclusive with voice_file)
|
|
31
40
|
language: Target language code (e.g., 'en', 'es')
|
|
32
|
-
speed: Speech speed (0.
|
|
41
|
+
speed: Speech speed (0.25 to 4.0, provider dependent)
|
|
42
|
+
audio_format: Output audio format ('mp3', 'wav', 'ogg')
|
|
43
|
+
generation_params: Provider-specific parameters (speed, temperature, etc.)
|
|
33
44
|
wait_for_completion: Whether to wait for job completion
|
|
34
45
|
timeout: Maximum time to wait if wait_for_completion=True
|
|
35
46
|
|
|
36
47
|
Returns:
|
|
37
48
|
Job object if wait_for_completion=False, otherwise job result
|
|
49
|
+
|
|
50
|
+
Raises:
|
|
51
|
+
ValidationError: If both or neither voice_file and voice_id are provided
|
|
38
52
|
"""
|
|
39
53
|
# Validate inputs
|
|
54
|
+
if not voice_file and not voice_id:
|
|
55
|
+
raise ValidationError("Either voice_file (for cloning) or voice_id (for TTS) must be provided")
|
|
56
|
+
if voice_file and voice_id:
|
|
57
|
+
raise ValidationError("Provide either voice_file or voice_id, not both")
|
|
58
|
+
|
|
40
59
|
text = self._validate_text_input(text)
|
|
41
60
|
if language:
|
|
42
61
|
language = self._validate_language_code(language)
|
|
43
|
-
if not 0.
|
|
44
|
-
raise ValidationError("Speed must be between 0.
|
|
62
|
+
if not 0.25 <= speed <= 4.0:
|
|
63
|
+
raise ValidationError("Speed must be between 0.25 and 4.0")
|
|
64
|
+
if audio_format not in ["mp3", "wav", "ogg"]:
|
|
65
|
+
raise ValidationError("Audio format must be 'mp3', 'wav', or 'ogg'")
|
|
45
66
|
|
|
46
|
-
#
|
|
67
|
+
# For voice cloning, we need to create a temporary voice first, then generate
|
|
68
|
+
if voice_file:
|
|
69
|
+
return self._generate_with_voice_file(
|
|
70
|
+
voice_file, text, language, speed, audio_format,
|
|
71
|
+
generation_params, wait_for_completion, timeout
|
|
72
|
+
)
|
|
73
|
+
else:
|
|
74
|
+
# Use existing voice profile with unified endpoint
|
|
75
|
+
return self._generate_with_voice_id(
|
|
76
|
+
voice_id, text, language, speed, audio_format,
|
|
77
|
+
generation_params, wait_for_completion, timeout
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
def _generate_with_voice_file(
|
|
81
|
+
self,
|
|
82
|
+
voice_file: str,
|
|
83
|
+
text: str,
|
|
84
|
+
language: Optional[str],
|
|
85
|
+
speed: float,
|
|
86
|
+
audio_format: str,
|
|
87
|
+
generation_params: Optional[Dict[str, Any]],
|
|
88
|
+
wait_for_completion: bool,
|
|
89
|
+
timeout: int
|
|
90
|
+
) -> Union[Job, Dict[str, Any]]:
|
|
91
|
+
"""Generate speech with voice cloning using unified endpoint"""
|
|
92
|
+
# For voice cloning, we use the "clone" identifier with the unified endpoint
|
|
47
93
|
files = self._prepare_file_upload(voice_file, "file")
|
|
48
94
|
|
|
49
|
-
# Prepare form data
|
|
95
|
+
# Prepare form data for unified endpoint
|
|
50
96
|
data = {
|
|
51
97
|
"input_text": text,
|
|
52
|
-
"speed": speed
|
|
98
|
+
"speed": speed,
|
|
99
|
+
"audio_format": audio_format
|
|
53
100
|
}
|
|
54
101
|
if language:
|
|
55
|
-
data["
|
|
102
|
+
data["language"] = language
|
|
103
|
+
if generation_params:
|
|
104
|
+
# Add generation parameters
|
|
105
|
+
data.update(generation_params)
|
|
56
106
|
|
|
57
|
-
# Make request
|
|
58
107
|
if self.async_mode:
|
|
59
|
-
return self.
|
|
108
|
+
return self._async_generate_voice("clone", data, files, wait_for_completion, timeout)
|
|
60
109
|
else:
|
|
110
|
+
# Use unified voice generation endpoint with "clone" identifier
|
|
61
111
|
response = self.client.request(
|
|
62
112
|
"POST",
|
|
63
|
-
"/api/v1/voice/
|
|
113
|
+
"/api/v1/voice/voices/clone/generate",
|
|
64
114
|
data=data,
|
|
65
115
|
files=files
|
|
66
116
|
)
|
|
@@ -73,17 +123,60 @@ class VoiceService(BaseService):
|
|
|
73
123
|
|
|
74
124
|
return job
|
|
75
125
|
|
|
76
|
-
|
|
126
|
+
def _generate_with_voice_id(
|
|
77
127
|
self,
|
|
78
|
-
|
|
128
|
+
voice_id: Union[int, str],
|
|
129
|
+
text: str,
|
|
130
|
+
language: Optional[str],
|
|
131
|
+
speed: float,
|
|
132
|
+
audio_format: str,
|
|
133
|
+
generation_params: Optional[Dict[str, Any]],
|
|
134
|
+
wait_for_completion: bool,
|
|
135
|
+
timeout: int
|
|
136
|
+
) -> Union[Job, Dict[str, Any]]:
|
|
137
|
+
"""Generate speech with existing voice profile using unified endpoint"""
|
|
138
|
+
# Prepare form data for unified endpoint
|
|
139
|
+
data = {
|
|
140
|
+
"input_text": text,
|
|
141
|
+
"speed": speed,
|
|
142
|
+
"audio_format": audio_format
|
|
143
|
+
}
|
|
144
|
+
if language:
|
|
145
|
+
data["language"] = language
|
|
146
|
+
if generation_params:
|
|
147
|
+
# Add generation parameters
|
|
148
|
+
data.update(generation_params)
|
|
149
|
+
|
|
150
|
+
if self.async_mode:
|
|
151
|
+
return self._async_generate_voice(voice_id, data, None, wait_for_completion, timeout)
|
|
152
|
+
else:
|
|
153
|
+
# Use unified voice generation endpoint
|
|
154
|
+
response = self.client.request(
|
|
155
|
+
"POST",
|
|
156
|
+
f"/api/v1/voice/voices/{voice_id}/generate",
|
|
157
|
+
data=data
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
job = Job.from_dict(response)
|
|
161
|
+
|
|
162
|
+
if wait_for_completion:
|
|
163
|
+
job = self._wait_for_completion(job.id, timeout)
|
|
164
|
+
return job.result if job.result else job
|
|
165
|
+
|
|
166
|
+
return job
|
|
167
|
+
|
|
168
|
+
async def _async_generate_voice(
|
|
169
|
+
self,
|
|
170
|
+
voice_identifier: Union[int, str],
|
|
79
171
|
data: Dict[str, Any],
|
|
172
|
+
files: Optional[Dict[str, Any]],
|
|
80
173
|
wait_for_completion: bool,
|
|
81
174
|
timeout: int
|
|
82
175
|
) -> Union[Job, Dict[str, Any]]:
|
|
83
|
-
"""Async version of
|
|
176
|
+
"""Async version of unified voice generation"""
|
|
84
177
|
response = await self.client.request(
|
|
85
|
-
"POST",
|
|
86
|
-
"/api/v1/voice/
|
|
178
|
+
"POST",
|
|
179
|
+
f"/api/v1/voice/voices/{voice_identifier}/generate",
|
|
87
180
|
data=data,
|
|
88
181
|
files=files
|
|
89
182
|
)
|
|
@@ -93,7 +186,7 @@ class VoiceService(BaseService):
|
|
|
93
186
|
if wait_for_completion:
|
|
94
187
|
job = await self._async_wait_for_completion(job.id, timeout)
|
|
95
188
|
return job.result if job.result else job
|
|
96
|
-
|
|
189
|
+
|
|
97
190
|
return job
|
|
98
191
|
|
|
99
192
|
def create_voice_profile(
|
|
@@ -202,78 +295,79 @@ class VoiceService(BaseService):
|
|
|
202
295
|
language: Optional[str] = None,
|
|
203
296
|
speed: float = 1.0,
|
|
204
297
|
audio_format: str = "mp3",
|
|
298
|
+
generation_params: Optional[Dict[str, Any]] = None,
|
|
205
299
|
wait_for_completion: bool = False,
|
|
206
300
|
timeout: int = 300
|
|
207
301
|
) -> Union[Job, Dict[str, Any]]:
|
|
208
302
|
"""
|
|
209
|
-
Generate speech using an existing voice profile
|
|
303
|
+
Generate speech using an existing voice profile (unified endpoint)
|
|
304
|
+
|
|
305
|
+
This method now uses the unified voice generation endpoint for consistency.
|
|
210
306
|
|
|
211
307
|
Args:
|
|
212
308
|
voice_id: ID or UUID of the voice profile
|
|
213
309
|
text: Text to generate speech for
|
|
214
310
|
language: Target language code
|
|
215
|
-
speed: Speech speed (0.
|
|
216
|
-
audio_format: Output audio format (mp3, wav)
|
|
311
|
+
speed: Speech speed (0.25 to 4.0, provider dependent)
|
|
312
|
+
audio_format: Output audio format ('mp3', 'wav', 'ogg')
|
|
313
|
+
generation_params: Additional generation parameters
|
|
217
314
|
wait_for_completion: Whether to wait for completion
|
|
218
315
|
timeout: Maximum time to wait
|
|
219
316
|
|
|
220
317
|
Returns:
|
|
221
318
|
Job object or generation result
|
|
222
319
|
"""
|
|
223
|
-
#
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
"audio_format": audio_format
|
|
237
|
-
}
|
|
238
|
-
if language:
|
|
239
|
-
data["language"] = language
|
|
240
|
-
|
|
241
|
-
# Make request
|
|
242
|
-
endpoint = f"/api/v1/voice/voices/{voice_id}/generate"
|
|
243
|
-
|
|
244
|
-
if self.async_mode:
|
|
245
|
-
return self._async_generate_speech(endpoint, data, wait_for_completion, timeout)
|
|
246
|
-
else:
|
|
247
|
-
response = self.client.request("POST", endpoint, data=data)
|
|
248
|
-
|
|
249
|
-
if "job_id" in response:
|
|
250
|
-
job = Job.from_dict(response)
|
|
251
|
-
if wait_for_completion:
|
|
252
|
-
job = self._wait_for_completion(job.id, timeout)
|
|
253
|
-
return job.result if job.result else job
|
|
254
|
-
return job
|
|
255
|
-
else:
|
|
256
|
-
# Direct response with audio URL
|
|
257
|
-
return response
|
|
258
|
-
|
|
259
|
-
async def _async_generate_speech(
|
|
320
|
+
# Use unified voice generation method
|
|
321
|
+
return self.generate_voice(
|
|
322
|
+
text=text,
|
|
323
|
+
voice_id=voice_id,
|
|
324
|
+
language=language,
|
|
325
|
+
speed=speed,
|
|
326
|
+
audio_format=audio_format,
|
|
327
|
+
generation_params=generation_params,
|
|
328
|
+
wait_for_completion=wait_for_completion,
|
|
329
|
+
timeout=timeout
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
def clone_voice(
|
|
260
333
|
self,
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
334
|
+
voice_file: str,
|
|
335
|
+
text: str,
|
|
336
|
+
language: Optional[str] = None,
|
|
337
|
+
speed: float = 1.0,
|
|
338
|
+
generation_params: Optional[Dict[str, Any]] = None,
|
|
339
|
+
wait_for_completion: bool = False,
|
|
340
|
+
timeout: int = 300
|
|
265
341
|
) -> Union[Job, Dict[str, Any]]:
|
|
266
|
-
"""
|
|
267
|
-
|
|
342
|
+
"""
|
|
343
|
+
Clone a voice from an audio file (backward compatibility)
|
|
268
344
|
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
345
|
+
This method is now a wrapper around the unified generate_voice method.
|
|
346
|
+
For new code, consider using generate_voice() directly with voice_file parameter.
|
|
347
|
+
|
|
348
|
+
Args:
|
|
349
|
+
voice_file: Path to audio file containing voice to clone
|
|
350
|
+
text: Text to generate with the cloned voice
|
|
351
|
+
language: Target language code (e.g., 'en', 'es')
|
|
352
|
+
speed: Speech speed (0.25 to 4.0, provider dependent)
|
|
353
|
+
generation_params: Additional generation parameters
|
|
354
|
+
wait_for_completion: Whether to wait for job completion
|
|
355
|
+
timeout: Maximum time to wait if wait_for_completion=True
|
|
356
|
+
|
|
357
|
+
Returns:
|
|
358
|
+
Job object if wait_for_completion=False, otherwise job result
|
|
359
|
+
"""
|
|
360
|
+
# Use unified voice generation method
|
|
361
|
+
return self.generate_voice(
|
|
362
|
+
text=text,
|
|
363
|
+
voice_file=voice_file,
|
|
364
|
+
language=language,
|
|
365
|
+
speed=speed,
|
|
366
|
+
audio_format="mp3", # Default format for backward compatibility
|
|
367
|
+
generation_params=generation_params,
|
|
368
|
+
wait_for_completion=wait_for_completion,
|
|
369
|
+
timeout=timeout
|
|
370
|
+
)
|
|
277
371
|
|
|
278
372
|
def list_voice_profiles(
|
|
279
373
|
self,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: audiopod
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.0
|
|
4
4
|
Summary: Professional Audio Processing API Client for Python
|
|
5
5
|
Home-page: https://github.com/audiopod-ai/audiopod-python
|
|
6
6
|
Author: AudioPod AI
|
|
@@ -95,7 +95,7 @@ client = audiopod.Client(api_key="ap_your_api_key_here")
|
|
|
95
95
|
|
|
96
96
|
### Basic Usage
|
|
97
97
|
|
|
98
|
-
#### Voice Cloning
|
|
98
|
+
#### Voice Generation (Unified TTS & Cloning)
|
|
99
99
|
|
|
100
100
|
```python
|
|
101
101
|
import audiopod
|
|
@@ -103,15 +103,39 @@ import audiopod
|
|
|
103
103
|
# Initialize client
|
|
104
104
|
client = audiopod.Client()
|
|
105
105
|
|
|
106
|
-
#
|
|
107
|
-
job = client.voice.
|
|
106
|
+
# Generate voice using file cloning (unified approach)
|
|
107
|
+
job = client.voice.generate_voice(
|
|
108
|
+
text="Hello! This is voice generation using a cloned voice.",
|
|
109
|
+
voice_file="path/to/voice_sample.wav", # For voice cloning
|
|
110
|
+
language="en",
|
|
111
|
+
audio_format="mp3",
|
|
112
|
+
generation_params={
|
|
113
|
+
"speed": 1.0
|
|
114
|
+
},
|
|
115
|
+
wait_for_completion=True
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
print(f"Generated audio URL: {job.output_url}")
|
|
119
|
+
|
|
120
|
+
# Generate speech with existing voice profile (unified approach)
|
|
121
|
+
speech = client.voice.generate_voice(
|
|
122
|
+
text="Hello from my voice profile!",
|
|
123
|
+
voice_id="voice-profile-id", # For existing voice profiles
|
|
124
|
+
language="en",
|
|
125
|
+
audio_format="mp3",
|
|
126
|
+
generation_params={
|
|
127
|
+
"speed": 1.0
|
|
128
|
+
},
|
|
129
|
+
wait_for_completion=True
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
# Backward compatibility methods (deprecated - use generate_voice instead)
|
|
133
|
+
legacy_clone = client.voice.clone_voice(
|
|
108
134
|
voice_file="path/to/voice_sample.wav",
|
|
109
135
|
text="Hello! This is a cloned voice speaking.",
|
|
110
136
|
language="en",
|
|
111
137
|
wait_for_completion=True
|
|
112
138
|
)
|
|
113
|
-
|
|
114
|
-
print(f"Generated audio URL: {job['output_url']}")
|
|
115
139
|
```
|
|
116
140
|
|
|
117
141
|
#### Music Generation
|
|
@@ -142,17 +166,25 @@ print(f"Transcript: {transcript.transcript}")
|
|
|
142
166
|
print(f"Detected {len(transcript.segments)} speakers")
|
|
143
167
|
```
|
|
144
168
|
|
|
145
|
-
####
|
|
169
|
+
#### Speech-to-Speech Translation
|
|
146
170
|
|
|
147
171
|
```python
|
|
148
|
-
# Translate
|
|
149
|
-
translation = client.translation.
|
|
172
|
+
# Translate speech while preserving voice characteristics
|
|
173
|
+
translation = client.translation.translate_speech(
|
|
150
174
|
audio_file="path/to/english_audio.wav",
|
|
151
175
|
target_language="es", # Spanish
|
|
176
|
+
source_language="en", # English (optional - auto-detect)
|
|
152
177
|
wait_for_completion=True
|
|
153
178
|
)
|
|
154
179
|
|
|
155
|
-
print(f"Translated audio URL: {translation.
|
|
180
|
+
print(f"Translated audio URL: {translation.translated_audio_url}")
|
|
181
|
+
|
|
182
|
+
# Or translate from URL
|
|
183
|
+
url_translation = client.translation.translate_speech(
|
|
184
|
+
url="https://example.com/audio.mp3",
|
|
185
|
+
target_language="fr", # French
|
|
186
|
+
wait_for_completion=True
|
|
187
|
+
)
|
|
156
188
|
```
|
|
157
189
|
|
|
158
190
|
### Async Support
|
|
@@ -189,10 +221,22 @@ voice_profile = client.voice.create_voice_profile(
|
|
|
189
221
|
wait_for_completion=True
|
|
190
222
|
)
|
|
191
223
|
|
|
192
|
-
# Use the voice profile for speech generation
|
|
193
|
-
speech = client.voice.
|
|
224
|
+
# Use the voice profile for speech generation (unified approach - recommended)
|
|
225
|
+
speech = client.voice.generate_voice(
|
|
226
|
+
text="This uses my custom voice profile with the unified method!",
|
|
194
227
|
voice_id=voice_profile.id,
|
|
195
|
-
|
|
228
|
+
language="en",
|
|
229
|
+
audio_format="mp3",
|
|
230
|
+
generation_params={
|
|
231
|
+
"speed": 1.0
|
|
232
|
+
},
|
|
233
|
+
wait_for_completion=True
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
# Legacy method (still works - uses generate_voice internally)
|
|
237
|
+
legacy_speech = client.voice.generate_speech(
|
|
238
|
+
voice_id=voice_profile.id,
|
|
239
|
+
text="This uses the legacy method.",
|
|
196
240
|
wait_for_completion=True
|
|
197
241
|
)
|
|
198
242
|
```
|
|
@@ -335,7 +379,7 @@ client = audiopod.Client(
|
|
|
335
379
|
|
|
336
380
|
### Services
|
|
337
381
|
|
|
338
|
-
- `client.voice`: Voice
|
|
382
|
+
- `client.voice`: **Voice generation operations** (unified TTS & cloning using `generate_voice()`)
|
|
339
383
|
- `client.music`: Music generation and editing
|
|
340
384
|
- `client.transcription`: Speech-to-text transcription
|
|
341
385
|
- `client.translation`: Audio/video translation
|
|
@@ -344,6 +388,20 @@ client = audiopod.Client(
|
|
|
344
388
|
- `client.karaoke`: Karaoke video generation
|
|
345
389
|
- `client.credits`: Credit management and usage tracking
|
|
346
390
|
|
|
391
|
+
#### Voice Service Methods
|
|
392
|
+
|
|
393
|
+
**Recommended (Unified Approach):**
|
|
394
|
+
- `client.voice.generate_voice()` - Generate speech with voice file (cloning) or voice ID (TTS)
|
|
395
|
+
|
|
396
|
+
**Legacy Methods (Backward Compatibility):**
|
|
397
|
+
- `client.voice.clone_voice()` - Clone voice from audio file (deprecated, uses `generate_voice` internally)
|
|
398
|
+
- `client.voice.generate_speech()` - Generate speech with voice profile (deprecated, uses `generate_voice` internally)
|
|
399
|
+
|
|
400
|
+
**Voice Management:**
|
|
401
|
+
- `client.voice.create_voice_profile()` - Create reusable voice profiles
|
|
402
|
+
- `client.voice.list_voice_profiles()` - List available voice profiles
|
|
403
|
+
- `client.voice.delete_voice_profile()` - Delete voice profiles
|
|
404
|
+
|
|
347
405
|
### Models
|
|
348
406
|
|
|
349
407
|
- `Job`: Base job information and status
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
audiopod/__init__.py,sha256=
|
|
1
|
+
audiopod/__init__.py,sha256=jQtsXMdmCZArJ1pOWG9huymdE_-WlyW8Rk8q0kcQn-c,1790
|
|
2
2
|
audiopod/cli.py,sha256=ZYzAQ3UpoYuOEWivMwMneJUf2z8DGGYTx1Nb6yRfdVY,9339
|
|
3
3
|
audiopod/client.py,sha256=67oPSInSNssJpTR00ZuYSdk9lbx5KiRnDQw8UYKNVsA,11742
|
|
4
4
|
audiopod/config.py,sha256=fuGtbuES4tXdHwqQqoZa5izCH6nVfFRP06D8eK1Cg10,1683
|
|
5
5
|
audiopod/exceptions.py,sha256=c3Ym2tWyRE1kemVkXDaXFcfP3h6AokhKcUcCBImwGes,2386
|
|
6
|
-
audiopod/models.py,sha256=
|
|
6
|
+
audiopod/models.py,sha256=R70iMqKDZfLtTB9FQ7KrFLBi-bFA5-FrS-5eMOtfK1o,8517
|
|
7
7
|
audiopod/py.typed,sha256=ixa8YukDZ3kLo0WsFJRGohLMyHzbMur1ALmmASML2cs,64
|
|
8
8
|
audiopod/services/__init__.py,sha256=9Ycl9VVscwwY42joBCSL67v8DrITW2T2QyuaokbpehM,653
|
|
9
9
|
audiopod/services/base.py,sha256=mNbziYy2KsWWZrdHFlTl9pKLvoQUW-ZkkJuVWfCVP74,6731
|
|
@@ -14,11 +14,11 @@ audiopod/services/music.py,sha256=hDPjTSj-gAeEWBVB7PRPQrptMHJyQTz8e9p-p7yaRPI,20
|
|
|
14
14
|
audiopod/services/speaker.py,sha256=OPSOwArfrGXVzRgciS13n1QsCJSK1PB-Mz6VgwxuHAA,1866
|
|
15
15
|
audiopod/services/stem_extraction.py,sha256=3ibMFKFR25xKHpVs3WGMNriZ88sB5PriFNa_s2Bvon4,6026
|
|
16
16
|
audiopod/services/transcription.py,sha256=HyH6WpGWZsggYxIvt2dhB6_5UHaigk3XwXsVgarWzcE,7565
|
|
17
|
-
audiopod/services/translation.py,sha256=
|
|
18
|
-
audiopod/services/voice.py,sha256=
|
|
19
|
-
audiopod-1.
|
|
20
|
-
audiopod-1.
|
|
21
|
-
audiopod-1.
|
|
22
|
-
audiopod-1.
|
|
23
|
-
audiopod-1.
|
|
24
|
-
audiopod-1.
|
|
17
|
+
audiopod/services/translation.py,sha256=oUU82c61CeAt13lzlWx8S-9xEgYlskwX8bLMbQw2Ni8,7396
|
|
18
|
+
audiopod/services/voice.py,sha256=t0-4yjVrzWXJorfQCGbBSNRGE2wZfjoRQ76elJu1BvU,17748
|
|
19
|
+
audiopod-1.2.0.dist-info/licenses/LICENSE,sha256=hqEjnOaGNbnLSBxbtbC7WQVREU2vQI8FmwecCiZlMfA,1068
|
|
20
|
+
audiopod-1.2.0.dist-info/METADATA,sha256=mLpXkYWYxvd3k3LrDtH6Cr68C7Xk7JmpoBL4vwYPiOU,13048
|
|
21
|
+
audiopod-1.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
22
|
+
audiopod-1.2.0.dist-info/entry_points.txt,sha256=uLcNDzXuOXnJAz9j91TDGayVjjZ7-ZiHBGDydqNUErU,47
|
|
23
|
+
audiopod-1.2.0.dist-info/top_level.txt,sha256=M6yyOFFNpLdH4i1AMRqJZLRIgfpg1NvrQVmnPd8A6N8,9
|
|
24
|
+
audiopod-1.2.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|