gl-speech-sdk 0.0.1b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gl_speech_sdk/stt.py ADDED
@@ -0,0 +1,456 @@
1
+ """Speech-to-Text handling for the GL Speech Python client.
2
+
3
+ This module provides the SpeechToText class for handling STT operations
4
+ with the Prosa Speech API, including transcription, job management, and status tracking.
5
+
6
+ Authors:
7
+ GDP Labs
8
+
9
+ References:
10
+ https://docs2.prosa.ai/speech/stt/rest/api/
11
+ """
12
+
13
+ import logging
14
+ from typing import Any
15
+ from urllib.parse import urljoin
16
+
17
+ import httpx
18
+
19
+ from gl_speech_sdk.models import (
20
+ STTConfig,
21
+ STTJobRequest,
22
+ STTJobsListResponse,
23
+ STTRequest,
24
+ STTResponse,
25
+ STTStatusResponse,
26
+ )
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ class SpeechToText:
32
+ """Handles Speech-to-Text API operations for the Prosa Speech API."""
33
+
34
+ def __init__(self, client):
35
+ """Initialize SpeechToText API.
36
+
37
+ Args:
38
+ client: SpeechClient instance
39
+ """
40
+ self._client = client
41
+
42
+ def _prepare_headers(
43
+ self, extra_headers: dict[str, str] | None = None
44
+ ) -> dict[str, str]:
45
+ """Prepare headers for the API request.
46
+
47
+ Args:
48
+ extra_headers (dict[str, str] | None): Additional headers to merge with default headers
49
+
50
+ Returns:
51
+ dict[str, str]: Dictionary containing the request headers
52
+ """
53
+ headers = self._client.default_headers.copy()
54
+
55
+ if self._client.api_key:
56
+ headers["x-api-key"] = self._client.api_key
57
+
58
+ if extra_headers:
59
+ headers.update(extra_headers)
60
+
61
+ return headers
62
+
63
+ def _prepare_request_data(
64
+ self,
65
+ model: str,
66
+ wait: bool | None = None,
67
+ speaker_count: int | None = None,
68
+ include_filler: bool | None = None,
69
+ include_partial_results: bool | None = None,
70
+ auto_punctuation: bool | None = None,
71
+ enable_spoken_numerals: bool | None = None,
72
+ enable_speech_insights: bool | None = None,
73
+ enable_voice_insights: bool | None = None,
74
+ enable_conversation_analytics: bool | None = None,
75
+ conversation_analytics_prompt: str | None = None,
76
+ label: str | None = None,
77
+ data: str | None = None,
78
+ uri: str | None = None,
79
+ duration: float | None = None,
80
+ mime_type: str | None = None,
81
+ sample_rate: int | None = None,
82
+ channels: int | None = None,
83
+ ) -> dict[str, Any]:
84
+ """Prepare request data for the STT API call.
85
+
86
+ Args:
87
+ model (str): ASR model name
88
+ wait (bool | None): Whether to wait for completion
89
+ speaker_count (int | None): Expected number of speakers
90
+ include_filler (bool | None): Include filler words
91
+ include_partial_results (bool | None): Include partial results
92
+ auto_punctuation (bool | None): Auto-add punctuation
93
+ enable_spoken_numerals (bool | None): Convert spoken numerals
94
+ enable_speech_insights (bool | None): Enable speech insights
95
+ enable_voice_insights (bool | None): Enable voice insights
96
+ enable_conversation_analytics (bool | None): Enable conversation analytics
97
+ conversation_analytics_prompt (str | None): Prompt type for conversation analytics
98
+ label (str | None): Job label
99
+ data (str | None): Base64-encoded audio data
100
+ uri (str | None): URI to audio file
101
+ duration (float | None): Audio duration in seconds
102
+ mime_type (str | None): Audio MIME type
103
+ sample_rate (int | None): Audio sample rate
104
+ channels (int | None): Number of audio channels
105
+
106
+ Returns:
107
+ dict[str, Any]: Dictionary containing the prepared request data
108
+ """
109
+ if not model:
110
+ raise ValueError("model is required")
111
+
112
+ config = STTConfig(
113
+ model=model,
114
+ wait=wait,
115
+ speaker_count=speaker_count,
116
+ include_filler=include_filler,
117
+ include_partial_results=include_partial_results,
118
+ auto_punctuation=auto_punctuation,
119
+ enable_spoken_numerals=enable_spoken_numerals,
120
+ enable_speech_insights=enable_speech_insights,
121
+ enable_voice_insights=enable_voice_insights,
122
+ enable_conversation_analytics=enable_conversation_analytics,
123
+ conversation_analytics_prompt=conversation_analytics_prompt,
124
+ )
125
+
126
+ request = STTRequest(
127
+ label=label,
128
+ data=data,
129
+ uri=uri,
130
+ duration=duration,
131
+ mime_type=mime_type,
132
+ sample_rate=sample_rate,
133
+ channels=channels,
134
+ )
135
+
136
+ job_request = STTJobRequest(config=config, request=request)
137
+ return job_request.model_dump(exclude_none=True, by_alias=True)
138
+
139
+ def _make_request(
140
+ self,
141
+ method: str,
142
+ url: str,
143
+ headers: dict[str, str],
144
+ json_data: dict[str, Any] | None = None,
145
+ params: dict[str, Any] | None = None,
146
+ ) -> dict[str, Any] | list[dict[str, Any]]:
147
+ """Make an HTTP request to the API.
148
+
149
+ Args:
150
+ method (str): HTTP method (GET, POST, DELETE, PUT)
151
+ url (str): Request URL
152
+ headers (dict[str, str]): Request headers
153
+ json_data (dict[str, Any] | None): JSON body data
154
+ params (dict[str, Any] | None): Query parameters
155
+
156
+ Returns:
157
+ dict[str, Any] | list[dict[str, Any]]: Response JSON data
158
+
159
+ Raises:
160
+ httpx.HTTPStatusError: If the request fails
161
+ """
162
+ timeout = httpx.Timeout(self._client.timeout)
163
+
164
+ logger.debug("Request: %s %s", method, url)
165
+ logger.debug("Headers: %s", headers)
166
+ if json_data:
167
+ logger.debug("Body: %s", json_data)
168
+
169
+ with httpx.Client(timeout=timeout) as client:
170
+ response = client.request(
171
+ method=method,
172
+ url=url,
173
+ headers=headers,
174
+ json=json_data,
175
+ params=params,
176
+ )
177
+ response.raise_for_status()
178
+
179
+ if response.status_code == 204 or not response.content:
180
+ return {}
181
+
182
+ try:
183
+ data = response.json()
184
+ except Exception:
185
+ return {}
186
+
187
+ if not isinstance(data, (dict, list)):
188
+ raise TypeError(f"Unexpected response type: {type(data)}")
189
+ return data
190
+
191
+ def list_models(
192
+ self,
193
+ extra_headers: dict[str, str] | None = None,
194
+ ) -> list[dict[str, Any]]:
195
+ """List all available ASR models.
196
+
197
+ Args:
198
+ extra_headers (dict[str, str] | None): Additional headers
199
+
200
+ Returns:
201
+ list[dict[str, Any]]: List of available ASR models
202
+
203
+ Raises:
204
+ httpx.HTTPStatusError: If the API request fails
205
+ """
206
+ logger.debug("Listing available ASR models")
207
+
208
+ url = urljoin(self._client.base_url, "stt/models")
209
+ headers = self._prepare_headers(extra_headers)
210
+
211
+ response_data = self._make_request("GET", url, headers)
212
+ if not isinstance(response_data, list):
213
+ return []
214
+ return response_data
215
+
216
+ def transcribe(
217
+ self,
218
+ model: str,
219
+ wait: bool | None = None,
220
+ speaker_count: int | None = None,
221
+ include_filler: bool | None = None,
222
+ include_partial_results: bool | None = None,
223
+ auto_punctuation: bool | None = None,
224
+ enable_spoken_numerals: bool | None = None,
225
+ enable_speech_insights: bool | None = None,
226
+ enable_voice_insights: bool | None = None,
227
+ enable_conversation_analytics: bool | None = None,
228
+ conversation_analytics_prompt: str | None = None,
229
+ label: str | None = None,
230
+ data: str | None = None,
231
+ uri: str | None = None,
232
+ duration: float | None = None,
233
+ mime_type: str | None = None,
234
+ sample_rate: int | None = None,
235
+ channels: int | None = None,
236
+ extra_headers: dict[str, str] | None = None,
237
+ ) -> STTResponse:
238
+ """Submit a speech-to-text transcription request.
239
+
240
+ Args:
241
+ model (str): ASR model name (e.g., "stt-general")
242
+ wait (bool | None): If True, blocks until execution finishes.
243
+ For short audio (<60s), typically set to True.
244
+ speaker_count (int | None): Expected number of speakers
245
+ include_filler (bool | None): Include filler words in result
246
+ include_partial_results (bool | None): Include partial results
247
+ auto_punctuation (bool | None): Automatically add punctuation
248
+ enable_spoken_numerals (bool | None): Convert spoken numerals to digits
249
+ enable_speech_insights (bool | None): Enable speech insight analytics
250
+ enable_voice_insights (bool | None): Enable voice insight analytics
251
+ enable_conversation_analytics (bool | None): Enable conversation analytics
252
+ conversation_analytics_prompt (str | None): Prompt type for conversation analytics
253
+ label (str | None): Optional label for the job
254
+ data (str | None): Base64-encoded audio data. Either data or uri required.
255
+ uri (str | None): URI to audio file. Either data or uri required.
256
+ Supported: https://, googledrive://
257
+ duration (float | None): Audio duration in seconds (for progress reporting)
258
+ mime_type (str | None): Audio MIME type
259
+ sample_rate (int | None): Audio sample rate
260
+ channels (int | None): Number of audio channels
261
+ extra_headers (dict[str, str] | None): Additional headers
262
+
263
+ Returns:
264
+ STTResponse: Transcription response with job_id and possibly result
265
+
266
+ Raises:
267
+ ValueError: If neither data nor uri is provided
268
+ httpx.HTTPStatusError: If the API request fails
269
+ """
270
+ if not data and not uri:
271
+ raise ValueError("Either 'data' or 'uri' must be provided")
272
+
273
+ logger.debug("Submitting STT transcription request")
274
+
275
+ url = urljoin(self._client.base_url, "stt")
276
+ headers = self._prepare_headers(extra_headers)
277
+ json_data = self._prepare_request_data(
278
+ model=model,
279
+ wait=wait,
280
+ speaker_count=speaker_count,
281
+ include_filler=include_filler,
282
+ include_partial_results=include_partial_results,
283
+ auto_punctuation=auto_punctuation,
284
+ enable_spoken_numerals=enable_spoken_numerals,
285
+ enable_speech_insights=enable_speech_insights,
286
+ enable_voice_insights=enable_voice_insights,
287
+ enable_conversation_analytics=enable_conversation_analytics,
288
+ conversation_analytics_prompt=conversation_analytics_prompt,
289
+ label=label,
290
+ data=data,
291
+ uri=uri,
292
+ duration=duration,
293
+ mime_type=mime_type,
294
+ sample_rate=sample_rate,
295
+ channels=channels,
296
+ )
297
+
298
+ response_data = self._make_request("POST", url, headers, json_data)
299
+ if not isinstance(response_data, dict):
300
+ raise TypeError("Expected dict response from API")
301
+ return STTResponse(**response_data)
302
+
303
+ def list_jobs(
304
+ self,
305
+ page: int | None = None,
306
+ per_page: int | None = None,
307
+ from_date: str | None = None,
308
+ until_date: str | None = None,
309
+ sort_by: str | None = None,
310
+ sort_ascend: bool | None = None,
311
+ query_text: str | None = None,
312
+ extra_headers: dict[str, str] | None = None,
313
+ ) -> STTJobsListResponse:
314
+ """List STT jobs.
315
+
316
+ Args:
317
+ page (int | None): Page index for pagination
318
+ per_page (int | None): Number of items per page
319
+ from_date (str | None): Filter jobs from this date (YYYY-MM-DD)
320
+ until_date (str | None): Filter jobs until this date (YYYY-MM-DD)
321
+ sort_by (str | None): Field to sort by
322
+ sort_ascend (bool | None): Sort in ascending order
323
+ query_text (str | None): Search for jobs with matching result text
324
+ extra_headers (dict[str, str] | None): Additional headers
325
+
326
+ Returns:
327
+ STTJobsListResponse: List of STT jobs with pagination
328
+
329
+ Raises:
330
+ httpx.HTTPStatusError: If the API request fails
331
+ """
332
+ logger.debug("Listing STT jobs")
333
+
334
+ url = urljoin(self._client.base_url, "stt")
335
+ headers = self._prepare_headers(extra_headers)
336
+
337
+ params: dict[str, Any] = {}
338
+ if page is not None:
339
+ params["page"] = page
340
+ if per_page is not None:
341
+ params["per_page"] = per_page
342
+ if from_date is not None:
343
+ params["from_date"] = from_date
344
+ if until_date is not None:
345
+ params["until_date"] = until_date
346
+ if sort_by is not None:
347
+ params["sort_by"] = sort_by
348
+ if sort_ascend is not None:
349
+ params["sort_ascend"] = sort_ascend
350
+ if query_text is not None:
351
+ params["query_text"] = query_text
352
+
353
+ response_data = self._make_request("GET", url, headers, params=params)
354
+ if not isinstance(response_data, dict):
355
+ raise TypeError("Expected dict response from API")
356
+ return STTJobsListResponse(**response_data)
357
+
358
+ def get_job(
359
+ self,
360
+ job_id: str,
361
+ extra_headers: dict[str, str] | None = None,
362
+ ) -> STTResponse:
363
+ """Retrieve a specific STT job by ID.
364
+
365
+ Args:
366
+ job_id (str): Unique identifier of the job
367
+ extra_headers (dict[str, str] | None): Additional headers
368
+
369
+ Returns:
370
+ STTResponse: Job details including result if complete
371
+
372
+ Raises:
373
+ ValueError: If job_id is empty
374
+ httpx.HTTPStatusError: If the API request fails
375
+ """
376
+ if not job_id:
377
+ raise ValueError("job_id cannot be empty")
378
+
379
+ logger.debug("Retrieving STT job: %s", job_id)
380
+
381
+ url = urljoin(self._client.base_url, f"stt/{job_id}")
382
+ headers = self._prepare_headers(extra_headers)
383
+
384
+ response_data = self._make_request("GET", url, headers)
385
+ if not isinstance(response_data, dict):
386
+ raise TypeError("Expected dict response from API")
387
+ return STTResponse(**response_data)
388
+
389
+ def get_status(
390
+ self,
391
+ job_id: str,
392
+ extra_headers: dict[str, str] | None = None,
393
+ ) -> STTStatusResponse:
394
+ """Retrieve the status of a specific STT job.
395
+
396
+ Args:
397
+ job_id (str): Unique identifier of the job
398
+ extra_headers (dict[str, str] | None): Additional headers
399
+
400
+ Returns:
401
+ STTStatusResponse: Job status information
402
+
403
+ Raises:
404
+ ValueError: If job_id is empty
405
+ httpx.HTTPStatusError: If the API request fails
406
+ """
407
+ if not job_id:
408
+ raise ValueError("job_id cannot be empty")
409
+
410
+ logger.debug("Retrieving STT job status: %s", job_id)
411
+
412
+ url = urljoin(self._client.base_url, f"stt/{job_id}/status")
413
+ headers = self._prepare_headers(extra_headers)
414
+
415
+ response_data = self._make_request("GET", url, headers)
416
+ if not isinstance(response_data, dict):
417
+ raise TypeError("Expected dict response from API")
418
+ return STTStatusResponse(**response_data)
419
+
420
+ def archive(
421
+ self,
422
+ job_id: str,
423
+ extra_headers: dict[str, str] | None = None,
424
+ ) -> STTResponse:
425
+ """Archive (soft-delete) an STT job.
426
+
427
+ Args:
428
+ job_id (str): Unique identifier of the job
429
+ extra_headers (dict[str, str] | None): Additional headers
430
+
431
+ Returns:
432
+ STTResponse: Archived job details
433
+
434
+ Raises:
435
+ ValueError: If job_id is empty
436
+ httpx.HTTPStatusError: If the API request fails
437
+ """
438
+ if not job_id:
439
+ raise ValueError("job_id cannot be empty")
440
+
441
+ logger.debug("Archiving STT job: %s", job_id)
442
+
443
+ url = urljoin(self._client.base_url, f"stt/{job_id}")
444
+ headers = self._prepare_headers(extra_headers)
445
+
446
+ response_data = self._make_request("DELETE", url, headers)
447
+ if not response_data:
448
+ return STTResponse(
449
+ job_id=job_id,
450
+ status="archived",
451
+ created_at="",
452
+ modified_at="",
453
+ )
454
+ if not isinstance(response_data, dict):
455
+ raise TypeError("Expected dict response from API")
456
+ return STTResponse(**response_data)