gl-speech-sdk 0.0.1b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gl_speech_sdk/tts.py ADDED
@@ -0,0 +1,449 @@
1
+ """Text-to-Speech handling for the GL Speech Python client.
2
+
3
+ This module provides the TextToSpeech class for handling TTS operations
4
+ with the Prosa Speech API, including speech synthesis, job management, and status tracking.
5
+
6
+ Authors:
7
+ GDP Labs
8
+
9
+ References:
10
+ https://docs2.prosa.ai/speech/tts/rest/api/
11
+ """
12
+
13
+ import logging
14
+ from typing import Any
15
+ from urllib.parse import urljoin
16
+
17
+ import httpx
18
+
19
+ from gl_speech_sdk.models import (
20
+ TTSConfig,
21
+ TTSJobRequest,
22
+ TTSJobsListResponse,
23
+ TTSRequest,
24
+ TTSResponse,
25
+ TTSStatusResponse,
26
+ )
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ class TextToSpeech:
32
+ """Handles Text-to-Speech API operations for the Prosa Speech API."""
33
+
34
+ def __init__(self, client):
35
+ """Initialize TextToSpeech API.
36
+
37
+ Args:
38
+ client: SpeechClient instance
39
+ """
40
+ self._client = client
41
+
42
+ def _prepare_headers(
43
+ self, extra_headers: dict[str, str] | None = None
44
+ ) -> dict[str, str]:
45
+ """Prepare headers for the API request.
46
+
47
+ Args:
48
+ extra_headers (dict[str, str] | None): Additional headers to merge with default headers
49
+
50
+ Returns:
51
+ dict[str, str]: Dictionary containing the request headers
52
+ """
53
+ headers = self._client.default_headers.copy()
54
+
55
+ if self._client.api_key:
56
+ headers["x-api-key"] = self._client.api_key
57
+
58
+ if extra_headers:
59
+ headers.update(extra_headers)
60
+
61
+ return headers
62
+
63
+ def _prepare_request_data(
64
+ self,
65
+ model: str,
66
+ text: str | None = None,
67
+ ssml: str | None = None,
68
+ wait: bool | None = None,
69
+ pitch: float | None = None,
70
+ tempo: float | None = None,
71
+ audio_format: str | None = None,
72
+ sample_rate: int | None = None,
73
+ label: str | None = None,
74
+ ) -> dict[str, Any]:
75
+ """Prepare request data for the TTS API call.
76
+
77
+ Args:
78
+ model (str): TTS model name
79
+ text (str | None): Text to synthesize
80
+ pitch (float | None): Pitch offset
81
+ tempo (float | None): Tempo adjustment
82
+ audio_format (str | None): Output audio format
83
+ sample_rate (int | None): Desired sample rate
84
+ label (str | None): Job label
85
+
86
+ Returns:
87
+ dict[str, Any]: Dictionary containing the prepared request data
88
+ """
89
+ if not model:
90
+ raise ValueError("model is required")
91
+
92
+ config = TTSConfig(
93
+ model=model,
94
+ wait=wait,
95
+ pitch=pitch,
96
+ tempo=tempo,
97
+ audio_format=audio_format,
98
+ sample_rate=sample_rate,
99
+ )
100
+
101
+ request = TTSRequest(
102
+ label=label,
103
+ text=text,
104
+ ssml=ssml,
105
+ )
106
+
107
+ job_request = TTSJobRequest(config=config, request=request)
108
+ return job_request.model_dump(exclude_none=True)
109
+
110
+ def _make_request(
111
+ self,
112
+ method: str,
113
+ url: str,
114
+ headers: dict[str, str],
115
+ json_data: dict[str, Any] | None = None,
116
+ params: dict[str, Any] | None = None,
117
+ ) -> dict[str, Any] | list[dict[str, Any]] | int:
118
+ """Make an HTTP request to the API.
119
+
120
+ Args:
121
+ method (str): HTTP method (GET, POST, DELETE, PUT)
122
+ url (str): Request URL
123
+ headers (dict[str, str]): Request headers
124
+ json_data (dict[str, Any] | None): JSON body data
125
+ params (dict[str, Any] | None): Query parameters
126
+
127
+ Returns:
128
+ dict[str, Any] | list[dict[str, Any]] | int: Response data
129
+
130
+ Raises:
131
+ httpx.HTTPStatusError: If the request fails
132
+ """
133
+ timeout = httpx.Timeout(self._client.timeout)
134
+
135
+ logger.debug("Request: %s %s", method, url)
136
+ logger.debug("Headers: %s", headers)
137
+ if json_data:
138
+ logger.debug("Body: %s", json_data)
139
+
140
+ with httpx.Client(timeout=timeout) as client:
141
+ response = client.request(
142
+ method=method,
143
+ url=url,
144
+ headers=headers,
145
+ json=json_data,
146
+ params=params,
147
+ )
148
+ response.raise_for_status()
149
+
150
+ if response.status_code == 204 or not response.content:
151
+ return {}
152
+
153
+ try:
154
+ data = response.json()
155
+ except Exception:
156
+ return {}
157
+
158
+ if not isinstance(data, (dict, list, int)):
159
+ raise TypeError(f"Unexpected response type: {type(data)}")
160
+ return data
161
+
162
+ def list_models(
163
+ self,
164
+ extra_headers: dict[str, str] | None = None,
165
+ ) -> list[dict[str, Any]]:
166
+ """List all available TTS models.
167
+
168
+ Args:
169
+ extra_headers (dict[str, str] | None): Additional headers
170
+
171
+ Returns:
172
+ list[dict[str, Any]]: List of available TTS models
173
+
174
+ Raises:
175
+ httpx.HTTPStatusError: If the API request fails
176
+ """
177
+ logger.debug("Listing available TTS models")
178
+
179
+ url = urljoin(self._client.base_url, "tts/models")
180
+ headers = self._prepare_headers(extra_headers)
181
+
182
+ response_data = self._make_request("GET", url, headers)
183
+ if not isinstance(response_data, list):
184
+ return []
185
+ return response_data
186
+
187
+ def synthesize(
188
+ self,
189
+ model: str,
190
+ text: str | None = None,
191
+ ssml: str | None = None,
192
+ wait: bool | None = None,
193
+ pitch: float | None = None,
194
+ tempo: float | None = None,
195
+ audio_format: str | None = None,
196
+ sample_rate: int | None = None,
197
+ label: str | None = None,
198
+ as_signed_url: bool | None = None,
199
+ extra_headers: dict[str, str] | None = None,
200
+ ) -> TTSResponse:
201
+ """Submit a text-to-speech synthesis request.
202
+
203
+ Args:
204
+ model (str): TTS model name (e.g., "tts-dimas-formal")
205
+ text (str | None): Text to synthesize
206
+ ssml (str | None): SSML to synthesize
207
+ wait (bool | None): If True, blocks until execution finishes.
208
+ pitch (float | None): Pitch offset of generated speech
209
+ tempo (float | None): Tempo of generated speech
210
+ audio_format (str | None): Output format: "opus", "mp3", or "wav"
211
+ sample_rate (int | None): Desired sample rate
212
+ label (str | None): Optional label for the job
213
+ as_signed_url (bool | None): Return audio as signed URL instead of base64
214
+ extra_headers (dict[str, str] | None): Additional headers
215
+
216
+ Returns:
217
+ TTSResponse: Synthesis response with job_id and possibly result
218
+
219
+ Raises:
220
+ ValueError: If neither text nor ssml is provided
221
+ httpx.HTTPStatusError: If the API request fails
222
+ """
223
+ if not text and not ssml:
224
+ raise ValueError("Either 'text' or 'ssml' must be provided")
225
+
226
+ logger.debug("Submitting TTS synthesis request")
227
+
228
+ url = urljoin(self._client.base_url, "tts")
229
+ headers = self._prepare_headers(extra_headers)
230
+ json_data = self._prepare_request_data(
231
+ text=text,
232
+ ssml=ssml,
233
+ model=model,
234
+ wait=wait,
235
+ pitch=pitch,
236
+ tempo=tempo,
237
+ audio_format=audio_format,
238
+ sample_rate=sample_rate,
239
+ label=label,
240
+ )
241
+
242
+ params: dict[str, Any] = {}
243
+ if as_signed_url is not None:
244
+ params["as_signed_url"] = as_signed_url
245
+
246
+ response_data = self._make_request("POST", url, headers, json_data, params)
247
+ if not isinstance(response_data, dict):
248
+ raise TypeError("Expected dict response from API")
249
+ return TTSResponse(**response_data)
250
+
251
+ def list_jobs(
252
+ self,
253
+ page: int | None = None,
254
+ per_page: int | None = None,
255
+ from_date: str | None = None,
256
+ until_date: str | None = None,
257
+ sort_by: str | None = None,
258
+ sort_ascend: bool | None = None,
259
+ query_text: str | None = None,
260
+ extra_headers: dict[str, str] | None = None,
261
+ ) -> TTSJobsListResponse:
262
+ """List TTS jobs.
263
+
264
+ Args:
265
+ page (int | None): Page index for pagination
266
+ per_page (int | None): Number of items per page
267
+ from_date (str | None): Filter jobs from this date (YYYY-MM-DD)
268
+ until_date (str | None): Filter jobs until this date (YYYY-MM-DD)
269
+ sort_by (str | None): Field to sort by
270
+ sort_ascend (bool | None): Sort in ascending order
271
+ query_text (str | None): Search for jobs with matching request text
272
+ extra_headers (dict[str, str] | None): Additional headers
273
+
274
+ Returns:
275
+ TTSJobsListResponse: List of TTS jobs with pagination
276
+
277
+ Raises:
278
+ httpx.HTTPStatusError: If the API request fails
279
+ """
280
+ logger.debug("Listing TTS jobs")
281
+
282
+ url = urljoin(self._client.base_url, "tts")
283
+ headers = self._prepare_headers(extra_headers)
284
+
285
+ params: dict[str, Any] = {}
286
+ if page is not None:
287
+ params["page"] = page
288
+ if per_page is not None:
289
+ params["per_page"] = per_page
290
+ if from_date is not None:
291
+ params["from_date"] = from_date
292
+ if until_date is not None:
293
+ params["until_date"] = until_date
294
+ if sort_by is not None:
295
+ params["sort_by"] = sort_by
296
+ if sort_ascend is not None:
297
+ params["sort_ascend"] = sort_ascend
298
+ if query_text is not None:
299
+ params["query_text"] = query_text
300
+
301
+ response_data = self._make_request("GET", url, headers, params=params)
302
+ if not isinstance(response_data, dict):
303
+ raise TypeError("Expected dict response from API")
304
+ return TTSJobsListResponse(**response_data)
305
+
306
+ def get_job(
307
+ self,
308
+ job_id: str,
309
+ as_signed_url: bool | None = None,
310
+ extra_headers: dict[str, str] | None = None,
311
+ ) -> TTSResponse:
312
+ """Retrieve a specific TTS job by ID.
313
+
314
+ Args:
315
+ job_id (str): Unique identifier of the job
316
+ as_signed_url (bool | None): Return audio as signed URL instead of base64
317
+ extra_headers (dict[str, str] | None): Additional headers
318
+
319
+ Returns:
320
+ TTSResponse: Job details including result if complete
321
+
322
+ Raises:
323
+ ValueError: If job_id is empty
324
+ httpx.HTTPStatusError: If the API request fails
325
+ """
326
+ if not job_id:
327
+ raise ValueError("job_id cannot be empty")
328
+
329
+ logger.debug("Retrieving TTS job: %s", job_id)
330
+
331
+ url = urljoin(self._client.base_url, f"tts/{job_id}")
332
+ headers = self._prepare_headers(extra_headers)
333
+
334
+ params: dict[str, Any] = {}
335
+ if as_signed_url is not None:
336
+ params["as_signed_url"] = as_signed_url
337
+
338
+ response_data = self._make_request("GET", url, headers, params=params)
339
+ if not isinstance(response_data, dict):
340
+ raise TypeError("Expected dict response from API")
341
+ return TTSResponse(**response_data)
342
+
343
+ def get_status(
344
+ self,
345
+ job_id: str,
346
+ extra_headers: dict[str, str] | None = None,
347
+ ) -> TTSStatusResponse:
348
+ """Retrieve the status of a specific TTS job.
349
+
350
+ Args:
351
+ job_id (str): Unique identifier of the job
352
+ extra_headers (dict[str, str] | None): Additional headers
353
+
354
+ Returns:
355
+ TTSStatusResponse: Job status information
356
+
357
+ Raises:
358
+ ValueError: If job_id is empty
359
+ httpx.HTTPStatusError: If the API request fails
360
+ """
361
+ if not job_id:
362
+ raise ValueError("job_id cannot be empty")
363
+
364
+ logger.debug("Retrieving TTS job status: %s", job_id)
365
+
366
+ url = urljoin(self._client.base_url, f"tts/{job_id}/status")
367
+ headers = self._prepare_headers(extra_headers)
368
+
369
+ response_data = self._make_request("GET", url, headers)
370
+ if not isinstance(response_data, dict):
371
+ raise TypeError("Expected dict response from API")
372
+ return TTSStatusResponse(**response_data)
373
+
374
+ def archive(
375
+ self,
376
+ job_id: str,
377
+ extra_headers: dict[str, str] | None = None,
378
+ ) -> TTSResponse:
379
+ """Archive (soft-delete) a TTS job.
380
+
381
+ Args:
382
+ job_id (str): Unique identifier of the job
383
+ extra_headers (dict[str, str] | None): Additional headers
384
+
385
+ Returns:
386
+ TTSResponse: Archived job details
387
+
388
+ Raises:
389
+ ValueError: If job_id is empty
390
+ httpx.HTTPStatusError: If the API request fails
391
+ """
392
+ if not job_id:
393
+ raise ValueError("job_id cannot be empty")
394
+
395
+ logger.debug("Archiving TTS job: %s", job_id)
396
+
397
+ url = urljoin(self._client.base_url, f"tts/{job_id}")
398
+ headers = self._prepare_headers(extra_headers)
399
+
400
+ response_data = self._make_request("DELETE", url, headers)
401
+ if not response_data:
402
+ return TTSResponse(
403
+ job_id=job_id,
404
+ status="archived",
405
+ created_at="",
406
+ modified_at="",
407
+ )
408
+ if not isinstance(response_data, dict):
409
+ raise TypeError("Expected dict response from API")
410
+ return TTSResponse(**response_data)
411
+
412
+ def count_jobs(
413
+ self,
414
+ from_date: str | None = None,
415
+ until_date: str | None = None,
416
+ query_text: str | None = None,
417
+ extra_headers: dict[str, str] | None = None,
418
+ ) -> int:
419
+ """Count TTS jobs matching the query.
420
+
421
+ Args:
422
+ from_date (str | None): Filter jobs from this date (YYYY-MM-DD)
423
+ until_date (str | None): Filter jobs until this date (YYYY-MM-DD)
424
+ query_text (str | None): Search for jobs with matching request text
425
+ extra_headers (dict[str, str] | None): Additional headers
426
+
427
+ Returns:
428
+ int: Count of jobs matching the criteria
429
+
430
+ Raises:
431
+ httpx.HTTPStatusError: If the API request fails
432
+ """
433
+ logger.debug("Counting TTS jobs")
434
+
435
+ url = urljoin(self._client.base_url, "tts/count")
436
+ headers = self._prepare_headers(extra_headers)
437
+
438
+ params: dict[str, Any] = {}
439
+ if from_date is not None:
440
+ params["from_date"] = from_date
441
+ if until_date is not None:
442
+ params["until_date"] = until_date
443
+ if query_text is not None:
444
+ params["query_text"] = query_text
445
+
446
+ result = self._make_request("GET", url, headers, params=params)
447
+ if not isinstance(result, int):
448
+ raise TypeError("Expected int response from API")
449
+ return result