audiopod 1.2.0__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- audiopod/__init__.py +10 -64
- audiopod/client.py +143 -172
- audiopod/config.py +4 -50
- audiopod/exceptions.py +16 -71
- audiopod/services/__init__.py +8 -6
- audiopod/services/base.py +51 -195
- audiopod/services/credits.py +26 -30
- audiopod/services/denoiser.py +120 -40
- audiopod/services/music.py +180 -485
- audiopod/services/speaker.py +117 -36
- audiopod/services/stem_extraction.py +130 -142
- audiopod/services/transcription.py +159 -184
- audiopod/services/translation.py +109 -170
- audiopod/services/voice.py +141 -424
- audiopod/services/wallet.py +235 -0
- audiopod-1.4.0.dist-info/METADATA +206 -0
- audiopod-1.4.0.dist-info/RECORD +20 -0
- {audiopod-1.2.0.dist-info → audiopod-1.4.0.dist-info}/WHEEL +1 -1
- audiopod/cli.py +0 -285
- audiopod/models.py +0 -250
- audiopod/py.typed +0 -2
- audiopod/services/karaoke.py +0 -61
- audiopod-1.2.0.dist-info/METADATA +0 -454
- audiopod-1.2.0.dist-info/RECORD +0 -24
- audiopod-1.2.0.dist-info/entry_points.txt +0 -2
- {audiopod-1.2.0.dist-info → audiopod-1.4.0.dist-info}/licenses/LICENSE +0 -0
- {audiopod-1.2.0.dist-info → audiopod-1.4.0.dist-info}/top_level.txt +0 -0
|
@@ -1,212 +1,187 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Transcription Service - Speech-to-text
|
|
3
|
-
"""
|
|
2
|
+
Transcription Service - Speech-to-text
|
|
4
3
|
|
|
5
|
-
|
|
4
|
+
API Routes:
|
|
5
|
+
- POST /api/v1/transcription/transcribe - Transcribe from URL
|
|
6
|
+
- POST /api/v1/transcription/transcribe-upload - Transcribe from file upload
|
|
7
|
+
- GET /api/v1/transcription/jobs/{id} - Get job details
|
|
8
|
+
- GET /api/v1/transcription/jobs - List jobs
|
|
9
|
+
- DELETE /api/v1/transcription/jobs/{id} - Delete job
|
|
10
|
+
"""
|
|
6
11
|
|
|
12
|
+
from typing import Optional, Dict, Any, List
|
|
7
13
|
from .base import BaseService
|
|
8
|
-
from ..models import Job, TranscriptionResult
|
|
9
|
-
from ..exceptions import ValidationError
|
|
10
14
|
|
|
11
15
|
|
|
12
16
|
class TranscriptionService(BaseService):
|
|
13
|
-
"""Service for
|
|
14
|
-
|
|
15
|
-
def
|
|
17
|
+
"""Service for speech-to-text transcription."""
|
|
18
|
+
|
|
19
|
+
def transcribe(
|
|
16
20
|
self,
|
|
17
|
-
audio_file: str,
|
|
21
|
+
audio_file: Optional[str] = None,
|
|
22
|
+
url: Optional[str] = None,
|
|
18
23
|
language: Optional[str] = None,
|
|
19
|
-
|
|
20
|
-
enable_speaker_diarization: bool = False,
|
|
21
|
-
enable_word_timestamps: bool = True,
|
|
24
|
+
speaker_diarization: bool = False,
|
|
22
25
|
wait_for_completion: bool = False,
|
|
23
|
-
timeout: int = 600
|
|
24
|
-
) ->
|
|
26
|
+
timeout: int = 600,
|
|
27
|
+
) -> Dict[str, Any]:
|
|
25
28
|
"""
|
|
26
|
-
Transcribe audio to text
|
|
27
|
-
|
|
29
|
+
Transcribe audio to text.
|
|
30
|
+
|
|
28
31
|
Args:
|
|
29
|
-
audio_file: Path to audio file
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
32
|
+
audio_file: Path to local audio file
|
|
33
|
+
url: URL of audio file (or list of URLs)
|
|
34
|
+
language: Language code (auto-detected if not provided)
|
|
35
|
+
speaker_diarization: Enable speaker separation
|
|
36
|
+
wait_for_completion: Wait for completion
|
|
37
|
+
timeout: Max wait time in seconds
|
|
38
|
+
|
|
37
39
|
Returns:
|
|
38
|
-
Job
|
|
40
|
+
Job dict with transcript when completed
|
|
39
41
|
"""
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
data["language"] = language
|
|
57
|
-
|
|
58
|
-
# Make request
|
|
59
|
-
if self.async_mode:
|
|
60
|
-
return self._async_transcribe_audio(files, data, wait_for_completion, timeout)
|
|
42
|
+
if audio_file:
|
|
43
|
+
# File upload endpoint
|
|
44
|
+
data = {
|
|
45
|
+
"enable_speaker_diarization": speaker_diarization,
|
|
46
|
+
}
|
|
47
|
+
if language:
|
|
48
|
+
data["language"] = language
|
|
49
|
+
|
|
50
|
+
files = self._prepare_file_upload(audio_file, "files")
|
|
51
|
+
|
|
52
|
+
if self.async_mode:
|
|
53
|
+
return self._async_transcribe_upload(data, files, wait_for_completion, timeout)
|
|
54
|
+
|
|
55
|
+
response = self.client.request(
|
|
56
|
+
"POST", "/api/v1/transcription/transcribe-upload", data=data, files=files
|
|
57
|
+
)
|
|
61
58
|
else:
|
|
59
|
+
# URL-based endpoint
|
|
60
|
+
data = {
|
|
61
|
+
"source_urls": [url] if isinstance(url, str) else url,
|
|
62
|
+
"enable_speaker_diarization": speaker_diarization,
|
|
63
|
+
}
|
|
64
|
+
if language:
|
|
65
|
+
data["language"] = language
|
|
66
|
+
|
|
67
|
+
if self.async_mode:
|
|
68
|
+
return self._async_transcribe(data, wait_for_completion, timeout)
|
|
69
|
+
|
|
62
70
|
response = self.client.request(
|
|
63
|
-
"POST",
|
|
64
|
-
"/api/v1/transcription/transcribe-upload",
|
|
65
|
-
data=data,
|
|
66
|
-
files=files
|
|
71
|
+
"POST", "/api/v1/transcription/transcribe", json_data=data
|
|
67
72
|
)
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
async def _async_transcribe_audio(
|
|
78
|
-
self,
|
|
79
|
-
files: Dict[str, Any],
|
|
80
|
-
data: Dict[str, Any],
|
|
81
|
-
wait_for_completion: bool,
|
|
82
|
-
timeout: int
|
|
83
|
-
) -> Union[Job, TranscriptionResult]:
|
|
84
|
-
"""Async version of transcribe_audio"""
|
|
73
|
+
|
|
74
|
+
if wait_for_completion:
|
|
75
|
+
job_id = response.get("id") or response.get("job_id")
|
|
76
|
+
return self._wait_for_transcription(job_id, timeout)
|
|
77
|
+
return response
|
|
78
|
+
|
|
79
|
+
async def _async_transcribe(
|
|
80
|
+
self, data: Dict, wait_for_completion: bool, timeout: int
|
|
81
|
+
) -> Dict[str, Any]:
|
|
85
82
|
response = await self.client.request(
|
|
86
|
-
"POST",
|
|
87
|
-
"/api/v1/transcription/transcribe-upload",
|
|
88
|
-
data=data,
|
|
89
|
-
files=files
|
|
83
|
+
"POST", "/api/v1/transcription/transcribe", json_data=data
|
|
90
84
|
)
|
|
91
|
-
|
|
92
|
-
job = Job.from_dict(response)
|
|
93
|
-
|
|
94
85
|
if wait_for_completion:
|
|
95
|
-
|
|
96
|
-
return
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
enable_speaker_diarization: bool = False,
|
|
106
|
-
wait_for_completion: bool = False,
|
|
107
|
-
timeout: int = 600
|
|
108
|
-
) -> Union[Job, TranscriptionResult]:
|
|
109
|
-
"""
|
|
110
|
-
Transcribe audio from URL (YouTube, etc.)
|
|
111
|
-
|
|
112
|
-
Args:
|
|
113
|
-
url: URL to audio/video content
|
|
114
|
-
language: Language code
|
|
115
|
-
model_type: Model to use
|
|
116
|
-
enable_speaker_diarization: Enable speaker identification
|
|
117
|
-
wait_for_completion: Whether to wait for completion
|
|
118
|
-
timeout: Maximum time to wait
|
|
119
|
-
|
|
120
|
-
Returns:
|
|
121
|
-
Job object or transcription result
|
|
122
|
-
"""
|
|
123
|
-
if language:
|
|
124
|
-
language = self._validate_language_code(language)
|
|
125
|
-
|
|
126
|
-
data = {
|
|
127
|
-
"source_urls": [url],
|
|
128
|
-
"model_type": model_type,
|
|
129
|
-
"enable_speaker_diarization": enable_speaker_diarization
|
|
130
|
-
}
|
|
131
|
-
if language:
|
|
132
|
-
data["language"] = language
|
|
133
|
-
|
|
134
|
-
if self.async_mode:
|
|
135
|
-
return self._async_transcribe_url(data, wait_for_completion, timeout)
|
|
136
|
-
else:
|
|
137
|
-
response = self.client.request("POST", "/api/v1/transcription/transcribe", data=data)
|
|
138
|
-
job = Job.from_dict(response)
|
|
139
|
-
|
|
140
|
-
if wait_for_completion:
|
|
141
|
-
completed_job = self._wait_for_completion(job.id, timeout)
|
|
142
|
-
return TranscriptionResult.from_dict(completed_job.result or completed_job.__dict__)
|
|
143
|
-
|
|
144
|
-
return job
|
|
145
|
-
|
|
146
|
-
async def _async_transcribe_url(
|
|
147
|
-
self,
|
|
148
|
-
data: Dict[str, Any],
|
|
149
|
-
wait_for_completion: bool,
|
|
150
|
-
timeout: int
|
|
151
|
-
) -> Union[Job, TranscriptionResult]:
|
|
152
|
-
"""Async version of transcribe_url"""
|
|
153
|
-
response = await self.client.request("POST", "/api/v1/transcription/transcribe", data=data)
|
|
154
|
-
job = Job.from_dict(response)
|
|
155
|
-
|
|
86
|
+
job_id = response.get("id") or response.get("job_id")
|
|
87
|
+
return await self._async_wait_for_transcription(job_id, timeout)
|
|
88
|
+
return response
|
|
89
|
+
|
|
90
|
+
async def _async_transcribe_upload(
|
|
91
|
+
self, data: Dict, files: Dict, wait_for_completion: bool, timeout: int
|
|
92
|
+
) -> Dict[str, Any]:
|
|
93
|
+
response = await self.client.request(
|
|
94
|
+
"POST", "/api/v1/transcription/transcribe-upload", data=data, files=files
|
|
95
|
+
)
|
|
156
96
|
if wait_for_completion:
|
|
157
|
-
|
|
158
|
-
return
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
"""Get transcription job details"""
|
|
97
|
+
job_id = response.get("id") or response.get("job_id")
|
|
98
|
+
return await self._async_wait_for_transcription(job_id, timeout)
|
|
99
|
+
return response
|
|
100
|
+
|
|
101
|
+
def get_job(self, job_id: int) -> Dict[str, Any]:
|
|
102
|
+
"""Get transcription job details and status."""
|
|
164
103
|
if self.async_mode:
|
|
165
|
-
return self.
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
) -> str:
|
|
104
|
+
return self._async_get_job(job_id)
|
|
105
|
+
return self.client.request("GET", f"/api/v1/transcription/jobs/{job_id}")
|
|
106
|
+
|
|
107
|
+
async def _async_get_job(self, job_id: int) -> Dict[str, Any]:
|
|
108
|
+
return await self.client.request("GET", f"/api/v1/transcription/jobs/{job_id}")
|
|
109
|
+
|
|
110
|
+
def list_jobs(self, skip: int = 0, limit: int = 50) -> List[Dict[str, Any]]:
|
|
111
|
+
"""List transcription jobs."""
|
|
112
|
+
if self.async_mode:
|
|
113
|
+
return self._async_list_jobs(skip, limit)
|
|
114
|
+
return self.client.request(
|
|
115
|
+
"GET", "/api/v1/transcription/jobs", params={"skip": skip, "limit": limit}
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
async def _async_list_jobs(self, skip: int, limit: int) -> List[Dict[str, Any]]:
|
|
119
|
+
return await self.client.request(
|
|
120
|
+
"GET", "/api/v1/transcription/jobs", params={"skip": skip, "limit": limit}
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
def delete_job(self, job_id: int) -> Dict[str, str]:
|
|
124
|
+
"""Delete a transcription job."""
|
|
125
|
+
if self.async_mode:
|
|
126
|
+
return self._async_delete_job(job_id)
|
|
127
|
+
return self.client.request("DELETE", f"/api/v1/transcription/jobs/{job_id}")
|
|
128
|
+
|
|
129
|
+
async def _async_delete_job(self, job_id: int) -> Dict[str, str]:
|
|
130
|
+
return await self.client.request("DELETE", f"/api/v1/transcription/jobs/{job_id}")
|
|
131
|
+
|
|
132
|
+
def get_transcript(self, job_id: int, format: str = "json") -> Any:
|
|
180
133
|
"""
|
|
181
|
-
|
|
134
|
+
Get transcript content.
|
|
182
135
|
|
|
183
136
|
Args:
|
|
184
|
-
job_id:
|
|
185
|
-
format: Output format
|
|
186
|
-
|
|
187
|
-
Returns:
|
|
188
|
-
Transcript content
|
|
137
|
+
job_id: Job ID
|
|
138
|
+
format: Output format - 'json', 'txt', 'srt', 'vtt'
|
|
189
139
|
"""
|
|
190
|
-
if format not in ["json", "txt", "srt", "vtt", "pdf", "docx", "html"]:
|
|
191
|
-
raise ValidationError("Format must be one of: json, txt, srt, vtt, pdf, docx, html")
|
|
192
|
-
|
|
193
|
-
params = {"format": format}
|
|
194
|
-
|
|
195
140
|
if self.async_mode:
|
|
196
|
-
return self.
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
"GET",
|
|
200
|
-
f"/api/v1/transcription/jobs/{job_id}/transcript",
|
|
201
|
-
params=params
|
|
202
|
-
)
|
|
203
|
-
return response
|
|
204
|
-
|
|
205
|
-
async def _async_download_transcript(self, job_id: int, params: Dict[str, str]) -> str:
|
|
206
|
-
"""Async version of download_transcript"""
|
|
207
|
-
response = await self.client.request(
|
|
208
|
-
"GET",
|
|
209
|
-
f"/api/v1/transcription/jobs/{job_id}/transcript",
|
|
210
|
-
params=params
|
|
141
|
+
return self._async_get_transcript(job_id, format)
|
|
142
|
+
return self.client.request(
|
|
143
|
+
"GET", f"/api/v1/transcription/jobs/{job_id}/transcript", params={"format": format}
|
|
211
144
|
)
|
|
212
|
-
|
|
145
|
+
|
|
146
|
+
async def _async_get_transcript(self, job_id: int, format: str) -> Any:
|
|
147
|
+
return await self.client.request(
|
|
148
|
+
"GET", f"/api/v1/transcription/jobs/{job_id}/transcript", params={"format": format}
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
def _wait_for_transcription(self, job_id: int, timeout: int) -> Dict[str, Any]:
|
|
152
|
+
"""Wait for transcription job completion."""
|
|
153
|
+
import time
|
|
154
|
+
start_time = time.time()
|
|
155
|
+
|
|
156
|
+
while time.time() - start_time < timeout:
|
|
157
|
+
job = self.get_job(job_id)
|
|
158
|
+
status = job.get("status", "").upper()
|
|
159
|
+
|
|
160
|
+
if status == "COMPLETED":
|
|
161
|
+
return job
|
|
162
|
+
elif status in ("FAILED", "ERROR", "CANCELLED"):
|
|
163
|
+
raise Exception(f"Transcription failed: {job.get('error_message', 'Unknown error')}")
|
|
164
|
+
|
|
165
|
+
time.sleep(3)
|
|
166
|
+
|
|
167
|
+
raise TimeoutError(f"Transcription {job_id} did not complete within {timeout} seconds")
|
|
168
|
+
|
|
169
|
+
async def _async_wait_for_transcription(self, job_id: int, timeout: int) -> Dict[str, Any]:
|
|
170
|
+
"""Async wait for transcription job completion."""
|
|
171
|
+
import asyncio
|
|
172
|
+
import time
|
|
173
|
+
start_time = time.time()
|
|
174
|
+
|
|
175
|
+
while time.time() - start_time < timeout:
|
|
176
|
+
job = await self.get_job(job_id)
|
|
177
|
+
status = job.get("status", "").upper()
|
|
178
|
+
|
|
179
|
+
if status == "COMPLETED":
|
|
180
|
+
return job
|
|
181
|
+
elif status in ("FAILED", "ERROR", "CANCELLED"):
|
|
182
|
+
raise Exception(f"Transcription failed: {job.get('error_message', 'Unknown error')}")
|
|
183
|
+
|
|
184
|
+
await asyncio.sleep(3)
|
|
185
|
+
|
|
186
|
+
raise TimeoutError(f"Transcription {job_id} did not complete within {timeout} seconds")
|
|
187
|
+
|