gl-speech-sdk 0.0.1b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,69 @@
1
+ """GL Speech Python client library for interacting with the Prosa Speech API."""
2
+
3
+ from gl_speech_sdk.client import SpeechClient
4
+ from gl_speech_sdk.models import (
5
+ AudioFormat,
6
+ DeliveryTicket,
7
+ JobStatus,
8
+ STTConfig,
9
+ STTJobRequest,
10
+ STTJobsListResponse,
11
+ STTRequest,
12
+ STTResponse,
13
+ STTStatusResponse,
14
+ TTSConfig,
15
+ TTSJobRequest,
16
+ TTSJobsListResponse,
17
+ TTSRequest,
18
+ TTSResponse,
19
+ TTSStatusResponse,
20
+ VoiceGender,
21
+ WebhookDelivery,
22
+ WebhookEndpoint,
23
+ WebhookEndpointCreate,
24
+ WebhookEndpointListing,
25
+ WebhookEndpointUpdate,
26
+ WebhookEvent,
27
+ WebhookEventListing,
28
+ WebhookEventType,
29
+ )
30
+ from gl_speech_sdk.stt import SpeechToText
31
+ from gl_speech_sdk.tts import TextToSpeech
32
+ from gl_speech_sdk.webhooks import Webhooks
33
+
34
+ __all__ = [
35
+ # Client
36
+ "SpeechClient",
37
+ # API Classes
38
+ "SpeechToText",
39
+ "TextToSpeech",
40
+ "Webhooks",
41
+ # Enums
42
+ "JobStatus",
43
+ "AudioFormat",
44
+ "VoiceGender",
45
+ "WebhookEventType",
46
+ # STT Models
47
+ "STTConfig",
48
+ "STTRequest",
49
+ "STTJobRequest",
50
+ "STTResponse",
51
+ "STTStatusResponse",
52
+ "STTJobsListResponse",
53
+ # TTS Models
54
+ "TTSConfig",
55
+ "TTSRequest",
56
+ "TTSJobRequest",
57
+ "TTSResponse",
58
+ "TTSStatusResponse",
59
+ "TTSJobsListResponse",
60
+ # Webhook Models
61
+ "WebhookEndpoint",
62
+ "WebhookEndpointCreate",
63
+ "WebhookEndpointUpdate",
64
+ "WebhookEndpointListing",
65
+ "WebhookEvent",
66
+ "WebhookEventListing",
67
+ "WebhookDelivery",
68
+ "DeliveryTicket",
69
+ ]
@@ -0,0 +1,86 @@
1
+ """GL Speech Python client library for interacting with the Prosa Speech API.
2
+
3
+ This library provides a simple interface to interact with the Prosa Speech API,
4
+ supporting Speech-to-Text (STT), Text-to-Speech (TTS), and Webhook operations.
5
+
6
+ Example:
7
+ >>> client = SpeechClient(api_key="your-api-key")
8
+ >>> # Transcribe audio
9
+ >>> result = client.stt.transcribe(
10
+ ... data="<base64-encoded-audio>",
11
+ ... model="stt-general",
12
+ ... wait=True
13
+ ... )
14
+ >>> print(result.result)
15
+ >>>
16
+ >>> # Synthesize speech
17
+ >>> result = client.tts.synthesize(
18
+ ... text="Hello, world!",
19
+ ... model="tts-dimas-formal",
20
+ ... wait=True
21
+ ... )
22
+ >>> print(result.result)
23
+
24
+ Authors:
25
+ GDP Labs
26
+
27
+ References:
28
+ https://docs2.prosa.ai/
29
+ """
30
+
31
+ import os
32
+
33
+ from gl_speech_sdk.stt import SpeechToText
34
+ from gl_speech_sdk.tts import TextToSpeech
35
+ from gl_speech_sdk.webhooks import Webhooks
36
+
37
+ # Ensure the URL ends with a slash; without the trailing slash, the base path will be incorrect.
38
+ DEFAULT_BASE_URL = "https://api.prosa.ai/v2/speech/"
39
+
40
+
41
+ class SpeechClient:
42
+ """Prosa Speech API Client.
43
+
44
+ Attributes:
45
+ api_key (str): API key for authentication
46
+ base_url (str): Base URL for the Prosa Speech API
47
+ timeout (float): Request timeout in seconds
48
+ default_headers (dict[str, str]): Default headers to include in all requests
49
+ stt (SpeechToText): SpeechToText instance for speech-to-text operations
50
+ tts (TextToSpeech): TextToSpeech instance for text-to-speech operations
51
+ webhooks (Webhooks): Webhooks instance for webhook management operations
52
+ """
53
+
54
+ def __init__(
55
+ self,
56
+ api_key: str | None = None,
57
+ base_url: str | None = None,
58
+ timeout: float = 60.0,
59
+ default_headers: dict[str, str] | None = None,
60
+ ):
61
+ """
62
+ Initialize SpeechClient.
63
+
64
+ Args:
65
+ api_key (str | None): API key for authentication. If not provided,
66
+ will try to get from GLSPEECH_API_KEY environment variable
67
+ base_url (str | None): Base URL for the Prosa Speech API. If not provided,
68
+ will try to get from GLSPEECH_BASE_URL environment variable,
69
+ otherwise uses default
70
+ timeout (float): Request timeout in seconds
71
+ default_headers (dict[str, str] | None): Default headers to include in all requests.
72
+ These will be merged with any extra_headers provided to individual methods.
73
+ """
74
+ self.api_key = api_key or os.getenv("GLSPEECH_API_KEY")
75
+ if not self.api_key:
76
+ raise ValueError(
77
+ "API key is required. Provide it via 'api_key' parameter or "
78
+ "'GLSPEECH_API_KEY' environment variable."
79
+ )
80
+
81
+ self.base_url = base_url or os.getenv("GLSPEECH_BASE_URL") or DEFAULT_BASE_URL
82
+ self.timeout = timeout
83
+ self.default_headers = default_headers or {}
84
+ self.stt = SpeechToText(self)
85
+ self.tts = TextToSpeech(self)
86
+ self.webhooks = Webhooks(self)
@@ -0,0 +1,409 @@
1
+ """Data models for the GL Speech Python client.
2
+
3
+ This module contains Pydantic models for request and response data structures
4
+ used in the GL Speech Python client library.
5
+
6
+ Example:
7
+ >>> request = STTJobRequest(
8
+ ... model="stt-general",
9
+ ... wait=True,
10
+ ... data="<base64-encoded-audio>"
11
+ ... )
12
+ >>> data = request.model_dump(exclude_none=True)
13
+
14
+ Authors:
15
+ GDP Labs
16
+
17
+ References:
18
+ https://docs2.prosa.ai/
19
+ """
20
+
21
+ from enum import Enum
22
+ from typing import Any
23
+
24
+ from pydantic import BaseModel, ConfigDict, Field, model_validator
25
+
26
+ # =============================================================================
27
+ # Common Enums
28
+ # =============================================================================
29
+
30
+
31
+ class JobStatus(str, Enum):
32
+ """Status of a job's overall progress."""
33
+
34
+ COMPLETE = "complete"
35
+ CREATED = "created"
36
+ QUEUED = "queued"
37
+ IN_PROGRESS = "in_progress"
38
+ FAILED = "failed"
39
+ CANCELLED = "cancelled"
40
+
41
+
42
+ class AudioFormat(str, Enum):
43
+ """Supported audio formats for TTS."""
44
+
45
+ OPUS = "opus"
46
+ MP3 = "mp3"
47
+ WAV = "wav"
48
+
49
+
50
+ class VoiceGender(str, Enum):
51
+ """Voice gender for TTS models."""
52
+
53
+ MALE = "male"
54
+ FEMALE = "female"
55
+
56
+
57
+ class WebhookEventType(str, Enum):
58
+ """Supported webhook event types.
59
+
60
+ These are the known event types emitted by the Prosa Speech API.
61
+ Use these constants when filtering webhook events.
62
+
63
+ Example:
64
+ >>> endpoint = client.webhooks.create_endpoint(
65
+ ... url="https://example.com/webhook",
66
+ ... event_filters=[WebhookEventType.TTS_JOB_COMPLETED.value]
67
+ ... )
68
+ """
69
+
70
+ STT_JOB_COMPLETED = "stt.job.completed"
71
+ STT_JOB_FAILED = "stt.job.failed"
72
+ TTS_JOB_COMPLETED = "tts.job.completed"
73
+ TTS_JOB_FAILED = "tts.job.failed"
74
+
75
+
76
+ # =============================================================================
77
+ # STT (Speech-to-Text) Models
78
+ # =============================================================================
79
+
80
+
81
+ class STTConfig(BaseModel):
82
+ """Configuration for STT job execution."""
83
+
84
+ model_config = ConfigDict(protected_namespaces=(), populate_by_name=True)
85
+
86
+ # API accepts both "model" and "engine", but some endpoints require "engine"
87
+ model: str = Field(serialization_alias="engine")
88
+ wait: bool | None = False
89
+ speaker_count: int | None = 1
90
+ include_filler: bool | None = False
91
+ include_partial_results: bool | None = False
92
+ auto_punctuation: bool | None = False
93
+ enable_spoken_numerals: bool | None = False
94
+ enable_speech_insights: bool | None = False
95
+ enable_voice_insights: bool | None = False
96
+ enable_conversation_analytics: bool | None = False
97
+ conversation_analytics_prompt: str | None = "basic"
98
+
99
+
100
+ class STTRequest(BaseModel):
101
+ """Request payload for an STT job."""
102
+
103
+ label: str | None = None
104
+ data: str | None = None
105
+ uri: str | None = None
106
+ duration: float | None = None
107
+ mime_type: str | None = None
108
+ sample_rate: int | None = None
109
+ channels: int | None = None
110
+
111
+ @model_validator(mode="after")
112
+ def check_data_or_uri(self) -> "STTRequest":
113
+ if not self.data and not self.uri:
114
+ raise ValueError("One of 'data' or 'uri' is required")
115
+ return self
116
+
117
+
118
+ class STTJobRequest(BaseModel):
119
+ """Complete STT job request including config and request payload."""
120
+
121
+ model_config = ConfigDict(protected_namespaces=())
122
+
123
+ config: STTConfig
124
+ request: STTRequest
125
+
126
+
127
+ class TranscriptionResult(BaseModel):
128
+ """Speech segment transcribed from the audio."""
129
+
130
+ transcript: str
131
+ final: bool
132
+ time_start: float
133
+ time_end: float
134
+ channel: int | None = 0
135
+ speaker_tag: int | None = 1
136
+
137
+
138
+ class STTResult(BaseModel):
139
+ """Result of an STT job."""
140
+
141
+ data: list[TranscriptionResult] | None = None
142
+ path: str | None = None
143
+ latency: float | None = None
144
+ speaker_count: int | None = None
145
+ speech_insights: dict[str, Any] | None = None
146
+ voice_insights: dict[str, Any] | None = None
147
+ conversation_analytics: dict[str, Any] | None = None
148
+ error: str | None = None
149
+ message: str | None = None
150
+
151
+
152
+ class STTProgress(BaseModel):
153
+ """Progress of the STT job."""
154
+
155
+ total: float
156
+ details: dict[str, float] | None = None
157
+
158
+
159
+ class ASRModel(BaseModel):
160
+ """ASR model information."""
161
+
162
+ name: str
163
+ label: str | None = None
164
+ language: str
165
+ domain: str
166
+ acoustic: str
167
+ samplerate: int
168
+
169
+
170
+ class STTResponse(BaseModel):
171
+ """Response from STT API."""
172
+
173
+ job_id: str
174
+ status: str
175
+ created_at: str
176
+ modified_at: str
177
+ request: STTRequest | None = None
178
+ result: STTResult | None = None
179
+ job_config: STTConfig | None = None
180
+ progress: STTProgress | None = None
181
+ model: ASRModel | None = None
182
+
183
+
184
+ class STTStatusResponse(BaseModel):
185
+ """Status response from STT API."""
186
+
187
+ job_id: str
188
+ status: str
189
+ created_at: str
190
+ modified_at: str
191
+
192
+
193
+ class STTJobsListResponse(BaseModel):
194
+ """List response from STT jobs API."""
195
+
196
+ pagination: dict[str, Any] | None = None
197
+ length: int
198
+ data: list[STTResponse]
199
+
200
+
201
+ # =============================================================================
202
+ # TTS (Text-to-Speech) Models
203
+ # =============================================================================
204
+
205
+
206
+ class TextWithTimestamp(BaseModel):
207
+ """Text segment with start and end timestamps."""
208
+
209
+ text: str
210
+ start: float
211
+ end: float
212
+
213
+
214
+ class TTSConfig(BaseModel):
215
+ """Configuration for TTS job execution."""
216
+
217
+ model_config = ConfigDict(protected_namespaces=())
218
+
219
+ model: str
220
+ wait: bool | None = False
221
+ pitch: float | None = 0.0
222
+ tempo: float | None = 1.0
223
+ audio_format: str | None = "opus"
224
+ sample_rate: int | None = None
225
+
226
+
227
+ class TTSRequest(BaseModel):
228
+ """Request payload for a TTS job."""
229
+
230
+ label: str | None = None
231
+ text: str | None = None
232
+ ssml: str | None = None
233
+
234
+ @model_validator(mode="after")
235
+ def check_text_or_ssml(self) -> "TTSRequest":
236
+ if not self.text and not self.ssml:
237
+ raise ValueError("One of 'text' or 'ssml' is required")
238
+ if self.text and self.ssml:
239
+ raise ValueError("Only one of 'text' or 'ssml' can be provided")
240
+ return self
241
+
242
+
243
+ class TTSJobRequest(BaseModel):
244
+ """Complete TTS job request including config and request payload."""
245
+
246
+ model_config = ConfigDict(protected_namespaces=())
247
+
248
+ config: TTSConfig
249
+ request: TTSRequest
250
+
251
+
252
+ class TTSResult(BaseModel):
253
+ """Result of a TTS job."""
254
+
255
+ data: str | None = None
256
+ path: str | None = None
257
+ format: str | None = None
258
+ duration: float | None = None
259
+ samplerate: int | None = None
260
+ channels: int | None = None
261
+ texts: list[TextWithTimestamp] | None = None
262
+ latency: float | None = None
263
+ error: str | None = None
264
+
265
+
266
+ class TTSModel(BaseModel):
267
+ """TTS model information."""
268
+
269
+ name: str
270
+ label: str | None = None
271
+ language: str
272
+ domain: str
273
+ voice: str
274
+ gender: str
275
+ channels: int
276
+ samplerate: int
277
+
278
+
279
+ class TTSResponse(BaseModel):
280
+ """Response from TTS API."""
281
+
282
+ job_id: str
283
+ status: str
284
+ created_at: str
285
+ modified_at: str
286
+ request: TTSRequest | None = None
287
+ result: TTSResult | None = None
288
+ job_config: TTSConfig | None = None
289
+ model: TTSModel | None = None
290
+
291
+
292
+ class TTSStatusResponse(BaseModel):
293
+ """Status response from TTS API."""
294
+
295
+ job_id: str
296
+ status: str
297
+ created_at: str
298
+ modified_at: str
299
+
300
+
301
+ class TTSJobsListResponse(BaseModel):
302
+ """List response from TTS jobs API."""
303
+
304
+ pagination: dict[str, Any] | None = None
305
+ length: int
306
+ data: list[TTSResponse]
307
+
308
+
309
+ # =============================================================================
310
+ # Webhook Models
311
+ # =============================================================================
312
+
313
+
314
+ class WebhookEndpointCreate(BaseModel):
315
+ """Request payload for creating a webhook endpoint."""
316
+
317
+ url: str
318
+ event_filters: list[str] | None = None
319
+ ssl_verification: bool | None = True
320
+ secret_key: str | None = None
321
+
322
+
323
+ class WebhookEndpointUpdate(BaseModel):
324
+ """Request payload for updating a webhook endpoint."""
325
+
326
+ url: str | None = None
327
+ event_filters: list[str] | None = None
328
+ ssl_verification: bool | None = True
329
+
330
+
331
+ class WebhookSecret(BaseModel):
332
+ """Webhook secret information."""
333
+
334
+ id: str
335
+ key: str
336
+ expired_at: str | None = None
337
+
338
+
339
+ class WebhookEndpoint(BaseModel):
340
+ """Webhook endpoint information."""
341
+
342
+ id: str
343
+ url: str
344
+ secrets: list[WebhookSecret] | None = None
345
+ event_filters: list[str] | None = None
346
+ ssl_verification: bool
347
+
348
+
349
+ class WebhookEndpointListing(BaseModel):
350
+ """Webhook endpoint listing information (without secrets)."""
351
+
352
+ id: str
353
+ url: str
354
+ event_filters: list[str] | None = None
355
+ ssl_verification: bool
356
+
357
+
358
+ class WebhookRotationPeriod(BaseModel):
359
+ """Webhook secret rotation period."""
360
+
361
+ days: int | None = 3
362
+ hours: int | None = 0
363
+
364
+
365
+ class WebhookRotation(BaseModel):
366
+ """Request payload for rotating webhook secrets."""
367
+
368
+ rotation_period: WebhookRotationPeriod
369
+
370
+
371
+ class WebhookEvent(BaseModel):
372
+ """Webhook event information."""
373
+
374
+ id: str
375
+ event_type: str
376
+ data: dict[str, Any]
377
+ created_at: str
378
+
379
+
380
+ class WebhookEventListing(BaseModel):
381
+ """Webhook event listing information (without data)."""
382
+
383
+ id: str
384
+ event_type: str
385
+ created_at: str
386
+
387
+
388
+ class WebhookDelivery(BaseModel):
389
+ """Webhook delivery information."""
390
+
391
+ delivery_id: str
392
+ event_id: str
393
+ endpoint_id: str
394
+ delivery: str
395
+ request_method: str
396
+ request_headers: dict[str, Any]
397
+ request_body: dict[str, Any]
398
+ response_status: int
399
+ response_headers: dict[str, Any]
400
+ response_body: str
401
+ elapsed_time: float
402
+
403
+
404
+ class DeliveryTicket(BaseModel):
405
+ """Delivery ticket response."""
406
+
407
+ delivery_tag: str
408
+ endpoint_id: str
409
+ url: str
gl_speech_sdk/py.typed ADDED
File without changes