gl-speech-sdk 0.0.1b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gl_speech_sdk/__init__.py +69 -0
- gl_speech_sdk/client.py +86 -0
- gl_speech_sdk/models.py +409 -0
- gl_speech_sdk/py.typed +0 -0
- gl_speech_sdk/stt.py +456 -0
- gl_speech_sdk/tts.py +449 -0
- gl_speech_sdk/webhooks.py +551 -0
- gl_speech_sdk-0.0.1b1.dist-info/METADATA +417 -0
- gl_speech_sdk-0.0.1b1.dist-info/RECORD +11 -0
- gl_speech_sdk-0.0.1b1.dist-info/WHEEL +4 -0
- gl_speech_sdk-0.0.1b1.dist-info/licenses/LICENSE +21 -0
gl_speech_sdk/stt.py
ADDED
|
@@ -0,0 +1,456 @@
|
|
|
1
|
+
"""Speech-to-Text handling for the GL Speech Python client.
|
|
2
|
+
|
|
3
|
+
This module provides the SpeechToText class for handling STT operations
|
|
4
|
+
with the Prosa Speech API, including transcription, job management, and status tracking.
|
|
5
|
+
|
|
6
|
+
Authors:
|
|
7
|
+
GDP Labs
|
|
8
|
+
|
|
9
|
+
References:
|
|
10
|
+
https://docs2.prosa.ai/speech/stt/rest/api/
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
from typing import Any
|
|
15
|
+
from urllib.parse import urljoin
|
|
16
|
+
|
|
17
|
+
import httpx
|
|
18
|
+
|
|
19
|
+
from gl_speech_sdk.models import (
|
|
20
|
+
STTConfig,
|
|
21
|
+
STTJobRequest,
|
|
22
|
+
STTJobsListResponse,
|
|
23
|
+
STTRequest,
|
|
24
|
+
STTResponse,
|
|
25
|
+
STTStatusResponse,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class SpeechToText:
|
|
32
|
+
"""Handles Speech-to-Text API operations for the Prosa Speech API."""
|
|
33
|
+
|
|
34
|
+
def __init__(self, client):
|
|
35
|
+
"""Initialize SpeechToText API.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
client: SpeechClient instance
|
|
39
|
+
"""
|
|
40
|
+
self._client = client
|
|
41
|
+
|
|
42
|
+
def _prepare_headers(
|
|
43
|
+
self, extra_headers: dict[str, str] | None = None
|
|
44
|
+
) -> dict[str, str]:
|
|
45
|
+
"""Prepare headers for the API request.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
extra_headers (dict[str, str] | None): Additional headers to merge with default headers
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
dict[str, str]: Dictionary containing the request headers
|
|
52
|
+
"""
|
|
53
|
+
headers = self._client.default_headers.copy()
|
|
54
|
+
|
|
55
|
+
if self._client.api_key:
|
|
56
|
+
headers["x-api-key"] = self._client.api_key
|
|
57
|
+
|
|
58
|
+
if extra_headers:
|
|
59
|
+
headers.update(extra_headers)
|
|
60
|
+
|
|
61
|
+
return headers
|
|
62
|
+
|
|
63
|
+
def _prepare_request_data(
|
|
64
|
+
self,
|
|
65
|
+
model: str,
|
|
66
|
+
wait: bool | None = None,
|
|
67
|
+
speaker_count: int | None = None,
|
|
68
|
+
include_filler: bool | None = None,
|
|
69
|
+
include_partial_results: bool | None = None,
|
|
70
|
+
auto_punctuation: bool | None = None,
|
|
71
|
+
enable_spoken_numerals: bool | None = None,
|
|
72
|
+
enable_speech_insights: bool | None = None,
|
|
73
|
+
enable_voice_insights: bool | None = None,
|
|
74
|
+
enable_conversation_analytics: bool | None = None,
|
|
75
|
+
conversation_analytics_prompt: str | None = None,
|
|
76
|
+
label: str | None = None,
|
|
77
|
+
data: str | None = None,
|
|
78
|
+
uri: str | None = None,
|
|
79
|
+
duration: float | None = None,
|
|
80
|
+
mime_type: str | None = None,
|
|
81
|
+
sample_rate: int | None = None,
|
|
82
|
+
channels: int | None = None,
|
|
83
|
+
) -> dict[str, Any]:
|
|
84
|
+
"""Prepare request data for the STT API call.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
model (str): ASR model name
|
|
88
|
+
wait (bool | None): Whether to wait for completion
|
|
89
|
+
speaker_count (int | None): Expected number of speakers
|
|
90
|
+
include_filler (bool | None): Include filler words
|
|
91
|
+
include_partial_results (bool | None): Include partial results
|
|
92
|
+
auto_punctuation (bool | None): Auto-add punctuation
|
|
93
|
+
enable_spoken_numerals (bool | None): Convert spoken numerals
|
|
94
|
+
enable_speech_insights (bool | None): Enable speech insights
|
|
95
|
+
enable_voice_insights (bool | None): Enable voice insights
|
|
96
|
+
enable_conversation_analytics (bool | None): Enable conversation analytics
|
|
97
|
+
conversation_analytics_prompt (str | None): Prompt type for conversation analytics
|
|
98
|
+
label (str | None): Job label
|
|
99
|
+
data (str | None): Base64-encoded audio data
|
|
100
|
+
uri (str | None): URI to audio file
|
|
101
|
+
duration (float | None): Audio duration in seconds
|
|
102
|
+
mime_type (str | None): Audio MIME type
|
|
103
|
+
sample_rate (int | None): Audio sample rate
|
|
104
|
+
channels (int | None): Number of audio channels
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
dict[str, Any]: Dictionary containing the prepared request data
|
|
108
|
+
"""
|
|
109
|
+
if not model:
|
|
110
|
+
raise ValueError("model is required")
|
|
111
|
+
|
|
112
|
+
config = STTConfig(
|
|
113
|
+
model=model,
|
|
114
|
+
wait=wait,
|
|
115
|
+
speaker_count=speaker_count,
|
|
116
|
+
include_filler=include_filler,
|
|
117
|
+
include_partial_results=include_partial_results,
|
|
118
|
+
auto_punctuation=auto_punctuation,
|
|
119
|
+
enable_spoken_numerals=enable_spoken_numerals,
|
|
120
|
+
enable_speech_insights=enable_speech_insights,
|
|
121
|
+
enable_voice_insights=enable_voice_insights,
|
|
122
|
+
enable_conversation_analytics=enable_conversation_analytics,
|
|
123
|
+
conversation_analytics_prompt=conversation_analytics_prompt,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
request = STTRequest(
|
|
127
|
+
label=label,
|
|
128
|
+
data=data,
|
|
129
|
+
uri=uri,
|
|
130
|
+
duration=duration,
|
|
131
|
+
mime_type=mime_type,
|
|
132
|
+
sample_rate=sample_rate,
|
|
133
|
+
channels=channels,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
job_request = STTJobRequest(config=config, request=request)
|
|
137
|
+
return job_request.model_dump(exclude_none=True, by_alias=True)
|
|
138
|
+
|
|
139
|
+
def _make_request(
|
|
140
|
+
self,
|
|
141
|
+
method: str,
|
|
142
|
+
url: str,
|
|
143
|
+
headers: dict[str, str],
|
|
144
|
+
json_data: dict[str, Any] | None = None,
|
|
145
|
+
params: dict[str, Any] | None = None,
|
|
146
|
+
) -> dict[str, Any] | list[dict[str, Any]]:
|
|
147
|
+
"""Make an HTTP request to the API.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
method (str): HTTP method (GET, POST, DELETE, PUT)
|
|
151
|
+
url (str): Request URL
|
|
152
|
+
headers (dict[str, str]): Request headers
|
|
153
|
+
json_data (dict[str, Any] | None): JSON body data
|
|
154
|
+
params (dict[str, Any] | None): Query parameters
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
dict[str, Any] | list[dict[str, Any]]: Response JSON data
|
|
158
|
+
|
|
159
|
+
Raises:
|
|
160
|
+
httpx.HTTPStatusError: If the request fails
|
|
161
|
+
"""
|
|
162
|
+
timeout = httpx.Timeout(self._client.timeout)
|
|
163
|
+
|
|
164
|
+
logger.debug("Request: %s %s", method, url)
|
|
165
|
+
logger.debug("Headers: %s", headers)
|
|
166
|
+
if json_data:
|
|
167
|
+
logger.debug("Body: %s", json_data)
|
|
168
|
+
|
|
169
|
+
with httpx.Client(timeout=timeout) as client:
|
|
170
|
+
response = client.request(
|
|
171
|
+
method=method,
|
|
172
|
+
url=url,
|
|
173
|
+
headers=headers,
|
|
174
|
+
json=json_data,
|
|
175
|
+
params=params,
|
|
176
|
+
)
|
|
177
|
+
response.raise_for_status()
|
|
178
|
+
|
|
179
|
+
if response.status_code == 204 or not response.content:
|
|
180
|
+
return {}
|
|
181
|
+
|
|
182
|
+
try:
|
|
183
|
+
data = response.json()
|
|
184
|
+
except Exception:
|
|
185
|
+
return {}
|
|
186
|
+
|
|
187
|
+
if not isinstance(data, (dict, list)):
|
|
188
|
+
raise TypeError(f"Unexpected response type: {type(data)}")
|
|
189
|
+
return data
|
|
190
|
+
|
|
191
|
+
def list_models(
|
|
192
|
+
self,
|
|
193
|
+
extra_headers: dict[str, str] | None = None,
|
|
194
|
+
) -> list[dict[str, Any]]:
|
|
195
|
+
"""List all available ASR models.
|
|
196
|
+
|
|
197
|
+
Args:
|
|
198
|
+
extra_headers (dict[str, str] | None): Additional headers
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
list[dict[str, Any]]: List of available ASR models
|
|
202
|
+
|
|
203
|
+
Raises:
|
|
204
|
+
httpx.HTTPStatusError: If the API request fails
|
|
205
|
+
"""
|
|
206
|
+
logger.debug("Listing available ASR models")
|
|
207
|
+
|
|
208
|
+
url = urljoin(self._client.base_url, "stt/models")
|
|
209
|
+
headers = self._prepare_headers(extra_headers)
|
|
210
|
+
|
|
211
|
+
response_data = self._make_request("GET", url, headers)
|
|
212
|
+
if not isinstance(response_data, list):
|
|
213
|
+
return []
|
|
214
|
+
return response_data
|
|
215
|
+
|
|
216
|
+
def transcribe(
|
|
217
|
+
self,
|
|
218
|
+
model: str,
|
|
219
|
+
wait: bool | None = None,
|
|
220
|
+
speaker_count: int | None = None,
|
|
221
|
+
include_filler: bool | None = None,
|
|
222
|
+
include_partial_results: bool | None = None,
|
|
223
|
+
auto_punctuation: bool | None = None,
|
|
224
|
+
enable_spoken_numerals: bool | None = None,
|
|
225
|
+
enable_speech_insights: bool | None = None,
|
|
226
|
+
enable_voice_insights: bool | None = None,
|
|
227
|
+
enable_conversation_analytics: bool | None = None,
|
|
228
|
+
conversation_analytics_prompt: str | None = None,
|
|
229
|
+
label: str | None = None,
|
|
230
|
+
data: str | None = None,
|
|
231
|
+
uri: str | None = None,
|
|
232
|
+
duration: float | None = None,
|
|
233
|
+
mime_type: str | None = None,
|
|
234
|
+
sample_rate: int | None = None,
|
|
235
|
+
channels: int | None = None,
|
|
236
|
+
extra_headers: dict[str, str] | None = None,
|
|
237
|
+
) -> STTResponse:
|
|
238
|
+
"""Submit a speech-to-text transcription request.
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
model (str): ASR model name (e.g., "stt-general")
|
|
242
|
+
wait (bool | None): If True, blocks until execution finishes.
|
|
243
|
+
For short audio (<60s), typically set to True.
|
|
244
|
+
speaker_count (int | None): Expected number of speakers
|
|
245
|
+
include_filler (bool | None): Include filler words in result
|
|
246
|
+
include_partial_results (bool | None): Include partial results
|
|
247
|
+
auto_punctuation (bool | None): Automatically add punctuation
|
|
248
|
+
enable_spoken_numerals (bool | None): Convert spoken numerals to digits
|
|
249
|
+
enable_speech_insights (bool | None): Enable speech insight analytics
|
|
250
|
+
enable_voice_insights (bool | None): Enable voice insight analytics
|
|
251
|
+
enable_conversation_analytics (bool | None): Enable conversation analytics
|
|
252
|
+
conversation_analytics_prompt (str | None): Prompt type for conversation analytics
|
|
253
|
+
label (str | None): Optional label for the job
|
|
254
|
+
data (str | None): Base64-encoded audio data. Either data or uri required.
|
|
255
|
+
uri (str | None): URI to audio file. Either data or uri required.
|
|
256
|
+
Supported: https://, googledrive://
|
|
257
|
+
duration (float | None): Audio duration in seconds (for progress reporting)
|
|
258
|
+
mime_type (str | None): Audio MIME type
|
|
259
|
+
sample_rate (int | None): Audio sample rate
|
|
260
|
+
channels (int | None): Number of audio channels
|
|
261
|
+
extra_headers (dict[str, str] | None): Additional headers
|
|
262
|
+
|
|
263
|
+
Returns:
|
|
264
|
+
STTResponse: Transcription response with job_id and possibly result
|
|
265
|
+
|
|
266
|
+
Raises:
|
|
267
|
+
ValueError: If neither data nor uri is provided
|
|
268
|
+
httpx.HTTPStatusError: If the API request fails
|
|
269
|
+
"""
|
|
270
|
+
if not data and not uri:
|
|
271
|
+
raise ValueError("Either 'data' or 'uri' must be provided")
|
|
272
|
+
|
|
273
|
+
logger.debug("Submitting STT transcription request")
|
|
274
|
+
|
|
275
|
+
url = urljoin(self._client.base_url, "stt")
|
|
276
|
+
headers = self._prepare_headers(extra_headers)
|
|
277
|
+
json_data = self._prepare_request_data(
|
|
278
|
+
model=model,
|
|
279
|
+
wait=wait,
|
|
280
|
+
speaker_count=speaker_count,
|
|
281
|
+
include_filler=include_filler,
|
|
282
|
+
include_partial_results=include_partial_results,
|
|
283
|
+
auto_punctuation=auto_punctuation,
|
|
284
|
+
enable_spoken_numerals=enable_spoken_numerals,
|
|
285
|
+
enable_speech_insights=enable_speech_insights,
|
|
286
|
+
enable_voice_insights=enable_voice_insights,
|
|
287
|
+
enable_conversation_analytics=enable_conversation_analytics,
|
|
288
|
+
conversation_analytics_prompt=conversation_analytics_prompt,
|
|
289
|
+
label=label,
|
|
290
|
+
data=data,
|
|
291
|
+
uri=uri,
|
|
292
|
+
duration=duration,
|
|
293
|
+
mime_type=mime_type,
|
|
294
|
+
sample_rate=sample_rate,
|
|
295
|
+
channels=channels,
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
response_data = self._make_request("POST", url, headers, json_data)
|
|
299
|
+
if not isinstance(response_data, dict):
|
|
300
|
+
raise TypeError("Expected dict response from API")
|
|
301
|
+
return STTResponse(**response_data)
|
|
302
|
+
|
|
303
|
+
def list_jobs(
|
|
304
|
+
self,
|
|
305
|
+
page: int | None = None,
|
|
306
|
+
per_page: int | None = None,
|
|
307
|
+
from_date: str | None = None,
|
|
308
|
+
until_date: str | None = None,
|
|
309
|
+
sort_by: str | None = None,
|
|
310
|
+
sort_ascend: bool | None = None,
|
|
311
|
+
query_text: str | None = None,
|
|
312
|
+
extra_headers: dict[str, str] | None = None,
|
|
313
|
+
) -> STTJobsListResponse:
|
|
314
|
+
"""List STT jobs.
|
|
315
|
+
|
|
316
|
+
Args:
|
|
317
|
+
page (int | None): Page index for pagination
|
|
318
|
+
per_page (int | None): Number of items per page
|
|
319
|
+
from_date (str | None): Filter jobs from this date (YYYY-MM-DD)
|
|
320
|
+
until_date (str | None): Filter jobs until this date (YYYY-MM-DD)
|
|
321
|
+
sort_by (str | None): Field to sort by
|
|
322
|
+
sort_ascend (bool | None): Sort in ascending order
|
|
323
|
+
query_text (str | None): Search for jobs with matching result text
|
|
324
|
+
extra_headers (dict[str, str] | None): Additional headers
|
|
325
|
+
|
|
326
|
+
Returns:
|
|
327
|
+
STTJobsListResponse: List of STT jobs with pagination
|
|
328
|
+
|
|
329
|
+
Raises:
|
|
330
|
+
httpx.HTTPStatusError: If the API request fails
|
|
331
|
+
"""
|
|
332
|
+
logger.debug("Listing STT jobs")
|
|
333
|
+
|
|
334
|
+
url = urljoin(self._client.base_url, "stt")
|
|
335
|
+
headers = self._prepare_headers(extra_headers)
|
|
336
|
+
|
|
337
|
+
params: dict[str, Any] = {}
|
|
338
|
+
if page is not None:
|
|
339
|
+
params["page"] = page
|
|
340
|
+
if per_page is not None:
|
|
341
|
+
params["per_page"] = per_page
|
|
342
|
+
if from_date is not None:
|
|
343
|
+
params["from_date"] = from_date
|
|
344
|
+
if until_date is not None:
|
|
345
|
+
params["until_date"] = until_date
|
|
346
|
+
if sort_by is not None:
|
|
347
|
+
params["sort_by"] = sort_by
|
|
348
|
+
if sort_ascend is not None:
|
|
349
|
+
params["sort_ascend"] = sort_ascend
|
|
350
|
+
if query_text is not None:
|
|
351
|
+
params["query_text"] = query_text
|
|
352
|
+
|
|
353
|
+
response_data = self._make_request("GET", url, headers, params=params)
|
|
354
|
+
if not isinstance(response_data, dict):
|
|
355
|
+
raise TypeError("Expected dict response from API")
|
|
356
|
+
return STTJobsListResponse(**response_data)
|
|
357
|
+
|
|
358
|
+
def get_job(
|
|
359
|
+
self,
|
|
360
|
+
job_id: str,
|
|
361
|
+
extra_headers: dict[str, str] | None = None,
|
|
362
|
+
) -> STTResponse:
|
|
363
|
+
"""Retrieve a specific STT job by ID.
|
|
364
|
+
|
|
365
|
+
Args:
|
|
366
|
+
job_id (str): Unique identifier of the job
|
|
367
|
+
extra_headers (dict[str, str] | None): Additional headers
|
|
368
|
+
|
|
369
|
+
Returns:
|
|
370
|
+
STTResponse: Job details including result if complete
|
|
371
|
+
|
|
372
|
+
Raises:
|
|
373
|
+
ValueError: If job_id is empty
|
|
374
|
+
httpx.HTTPStatusError: If the API request fails
|
|
375
|
+
"""
|
|
376
|
+
if not job_id:
|
|
377
|
+
raise ValueError("job_id cannot be empty")
|
|
378
|
+
|
|
379
|
+
logger.debug("Retrieving STT job: %s", job_id)
|
|
380
|
+
|
|
381
|
+
url = urljoin(self._client.base_url, f"stt/{job_id}")
|
|
382
|
+
headers = self._prepare_headers(extra_headers)
|
|
383
|
+
|
|
384
|
+
response_data = self._make_request("GET", url, headers)
|
|
385
|
+
if not isinstance(response_data, dict):
|
|
386
|
+
raise TypeError("Expected dict response from API")
|
|
387
|
+
return STTResponse(**response_data)
|
|
388
|
+
|
|
389
|
+
def get_status(
|
|
390
|
+
self,
|
|
391
|
+
job_id: str,
|
|
392
|
+
extra_headers: dict[str, str] | None = None,
|
|
393
|
+
) -> STTStatusResponse:
|
|
394
|
+
"""Retrieve the status of a specific STT job.
|
|
395
|
+
|
|
396
|
+
Args:
|
|
397
|
+
job_id (str): Unique identifier of the job
|
|
398
|
+
extra_headers (dict[str, str] | None): Additional headers
|
|
399
|
+
|
|
400
|
+
Returns:
|
|
401
|
+
STTStatusResponse: Job status information
|
|
402
|
+
|
|
403
|
+
Raises:
|
|
404
|
+
ValueError: If job_id is empty
|
|
405
|
+
httpx.HTTPStatusError: If the API request fails
|
|
406
|
+
"""
|
|
407
|
+
if not job_id:
|
|
408
|
+
raise ValueError("job_id cannot be empty")
|
|
409
|
+
|
|
410
|
+
logger.debug("Retrieving STT job status: %s", job_id)
|
|
411
|
+
|
|
412
|
+
url = urljoin(self._client.base_url, f"stt/{job_id}/status")
|
|
413
|
+
headers = self._prepare_headers(extra_headers)
|
|
414
|
+
|
|
415
|
+
response_data = self._make_request("GET", url, headers)
|
|
416
|
+
if not isinstance(response_data, dict):
|
|
417
|
+
raise TypeError("Expected dict response from API")
|
|
418
|
+
return STTStatusResponse(**response_data)
|
|
419
|
+
|
|
420
|
+
def archive(
|
|
421
|
+
self,
|
|
422
|
+
job_id: str,
|
|
423
|
+
extra_headers: dict[str, str] | None = None,
|
|
424
|
+
) -> STTResponse:
|
|
425
|
+
"""Archive (soft-delete) an STT job.
|
|
426
|
+
|
|
427
|
+
Args:
|
|
428
|
+
job_id (str): Unique identifier of the job
|
|
429
|
+
extra_headers (dict[str, str] | None): Additional headers
|
|
430
|
+
|
|
431
|
+
Returns:
|
|
432
|
+
STTResponse: Archived job details
|
|
433
|
+
|
|
434
|
+
Raises:
|
|
435
|
+
ValueError: If job_id is empty
|
|
436
|
+
httpx.HTTPStatusError: If the API request fails
|
|
437
|
+
"""
|
|
438
|
+
if not job_id:
|
|
439
|
+
raise ValueError("job_id cannot be empty")
|
|
440
|
+
|
|
441
|
+
logger.debug("Archiving STT job: %s", job_id)
|
|
442
|
+
|
|
443
|
+
url = urljoin(self._client.base_url, f"stt/{job_id}")
|
|
444
|
+
headers = self._prepare_headers(extra_headers)
|
|
445
|
+
|
|
446
|
+
response_data = self._make_request("DELETE", url, headers)
|
|
447
|
+
if not response_data:
|
|
448
|
+
return STTResponse(
|
|
449
|
+
job_id=job_id,
|
|
450
|
+
status="archived",
|
|
451
|
+
created_at="",
|
|
452
|
+
modified_at="",
|
|
453
|
+
)
|
|
454
|
+
if not isinstance(response_data, dict):
|
|
455
|
+
raise TypeError("Expected dict response from API")
|
|
456
|
+
return STTResponse(**response_data)
|