cartesia 2.0.4__py3-none-any.whl → 2.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. cartesia/__init__.py +60 -1
  2. cartesia/auth/client.py +8 -8
  3. cartesia/auth/requests/token_grant.py +7 -1
  4. cartesia/auth/requests/token_request.py +3 -3
  5. cartesia/auth/types/token_grant.py +7 -2
  6. cartesia/auth/types/token_request.py +3 -3
  7. cartesia/base_client.py +2 -0
  8. cartesia/client.py +5 -0
  9. cartesia/core/client_wrapper.py +1 -1
  10. cartesia/stt/__init__.py +57 -0
  11. cartesia/stt/_async_websocket.py +293 -0
  12. cartesia/stt/_websocket.py +294 -0
  13. cartesia/stt/client.py +456 -0
  14. cartesia/stt/requests/__init__.py +29 -0
  15. cartesia/stt/requests/done_message.py +14 -0
  16. cartesia/stt/requests/error_message.py +16 -0
  17. cartesia/stt/requests/flush_done_message.py +14 -0
  18. cartesia/stt/requests/streaming_transcription_response.py +41 -0
  19. cartesia/stt/requests/transcript_message.py +40 -0
  20. cartesia/stt/requests/transcription_response.py +28 -0
  21. cartesia/stt/requests/transcription_word.py +20 -0
  22. cartesia/stt/socket_client.py +138 -0
  23. cartesia/stt/types/__init__.py +33 -0
  24. cartesia/stt/types/done_message.py +26 -0
  25. cartesia/stt/types/error_message.py +27 -0
  26. cartesia/stt/types/flush_done_message.py +26 -0
  27. cartesia/stt/types/streaming_transcription_response.py +94 -0
  28. cartesia/stt/types/stt_encoding.py +7 -0
  29. cartesia/stt/types/timestamp_granularity.py +5 -0
  30. cartesia/stt/types/transcript_message.py +50 -0
  31. cartesia/stt/types/transcription_response.py +38 -0
  32. cartesia/stt/types/transcription_word.py +32 -0
  33. cartesia/tts/__init__.py +8 -0
  34. cartesia/tts/client.py +50 -8
  35. cartesia/tts/requests/__init__.py +4 -0
  36. cartesia/tts/requests/generation_request.py +4 -4
  37. cartesia/tts/requests/sse_output_format.py +11 -0
  38. cartesia/tts/requests/ttssse_request.py +47 -0
  39. cartesia/tts/requests/web_socket_chunk_response.py +0 -3
  40. cartesia/tts/requests/web_socket_response.py +1 -2
  41. cartesia/tts/requests/web_socket_tts_request.py +9 -1
  42. cartesia/tts/types/__init__.py +4 -0
  43. cartesia/tts/types/generation_request.py +4 -4
  44. cartesia/tts/types/sse_output_format.py +22 -0
  45. cartesia/tts/types/ttssse_request.py +58 -0
  46. cartesia/tts/types/web_socket_chunk_response.py +1 -3
  47. cartesia/tts/types/web_socket_response.py +1 -2
  48. cartesia/tts/types/web_socket_tts_request.py +11 -3
  49. cartesia/voice_changer/requests/streaming_response.py +0 -2
  50. cartesia/voice_changer/types/streaming_response.py +0 -2
  51. {cartesia-2.0.4.dist-info → cartesia-2.0.6.dist-info}/METADATA +256 -2
  52. {cartesia-2.0.4.dist-info → cartesia-2.0.6.dist-info}/RECORD +53 -26
  53. {cartesia-2.0.4.dist-info → cartesia-2.0.6.dist-info}/WHEEL +0 -0
cartesia/stt/client.py ADDED
@@ -0,0 +1,456 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+ from ..core.client_wrapper import SyncClientWrapper
5
+ from .. import core
6
+ from .types.stt_encoding import SttEncoding
7
+ from .types.timestamp_granularity import TimestampGranularity
8
+ from ..core.request_options import RequestOptions
9
+ from .types.transcription_response import TranscriptionResponse
10
+ from ..core.pydantic_utilities import parse_obj_as
11
+ from json.decoder import JSONDecodeError
12
+ from ..core.api_error import ApiError
13
+ from ..core.client_wrapper import AsyncClientWrapper
14
+
15
+ # this is used as the default value for optional parameters
16
+ OMIT = typing.cast(typing.Any, ...)
17
+
18
+
19
+ class SttClient:
20
+ def __init__(self, *, client_wrapper: SyncClientWrapper):
21
+ self._client_wrapper = client_wrapper
22
+
23
+ def transcribe(
24
+ self,
25
+ *,
26
+ file: core.File,
27
+ model: str,
28
+ encoding: typing.Optional[SttEncoding] = None,
29
+ sample_rate: typing.Optional[int] = None,
30
+ language: typing.Optional[str] = OMIT,
31
+ timestamp_granularities: typing.Optional[typing.List[TimestampGranularity]] = OMIT,
32
+ request_options: typing.Optional[RequestOptions] = None,
33
+ ) -> TranscriptionResponse:
34
+ """
35
+ Transcribes audio files into text using Cartesia's Speech-to-Text API.
36
+
37
+ Upload an audio file and receive a complete transcription response. Supports arbitrarily long audio files with automatic intelligent chunking for longer audio.
38
+
39
+ **Supported audio formats:** flac, m4a, mp3, mp4, mpeg, mpga, oga, ogg, wav, webm
40
+
41
+ **Response format:** Returns JSON with transcribed text, duration, and language. Include `timestamp_granularities: ["word"]` to get word-level timestamps.
42
+
43
+ **Pricing:** Batch transcription is priced at **1 credit per 2 seconds** of audio processed.
44
+
45
+ <Note>
46
+ For migrating from the OpenAI SDK, see our [OpenAI Whisper to Cartesia Ink Migration Guide](/api-reference/stt/migrate-from-open-ai).
47
+ </Note>
48
+
49
+ Parameters
50
+ ----------
51
+ file : core.File
52
+ See core.File for more documentation
53
+
54
+ model : str
55
+ ID of the model to use for transcription. Use `ink-whisper` for the latest Cartesia Whisper model.
56
+
57
+
58
+ encoding : typing.Optional[SttEncoding]
59
+ The encoding format to process the audio as. If not specified, the audio file will be decoded automatically.
60
+
61
+ **Supported formats:**
62
+ - `pcm_s16le` - 16-bit signed integer PCM, little-endian (recommended for best performance)
63
+ - `pcm_s32le` - 32-bit signed integer PCM, little-endian
64
+ - `pcm_f16le` - 16-bit floating point PCM, little-endian
65
+ - `pcm_f32le` - 32-bit floating point PCM, little-endian
66
+ - `pcm_mulaw` - 8-bit μ-law encoded PCM
67
+ - `pcm_alaw` - 8-bit A-law encoded PCM
68
+
69
+ sample_rate : typing.Optional[int]
70
+ The sample rate of the audio in Hz.
71
+
72
+ language : typing.Optional[str]
73
+ The language of the input audio in ISO-639-1 format. Defaults to `en`.
74
+
75
+ <Accordion title="Supported languages">
76
+ - `en` (English)
77
+ - `zh` (Chinese)
78
+ - `de` (German)
79
+ - `es` (Spanish)
80
+ - `ru` (Russian)
81
+ - `ko` (Korean)
82
+ - `fr` (French)
83
+ - `ja` (Japanese)
84
+ - `pt` (Portuguese)
85
+ - `tr` (Turkish)
86
+ - `pl` (Polish)
87
+ - `ca` (Catalan)
88
+ - `nl` (Dutch)
89
+ - `ar` (Arabic)
90
+ - `sv` (Swedish)
91
+ - `it` (Italian)
92
+ - `id` (Indonesian)
93
+ - `hi` (Hindi)
94
+ - `fi` (Finnish)
95
+ - `vi` (Vietnamese)
96
+ - `he` (Hebrew)
97
+ - `uk` (Ukrainian)
98
+ - `el` (Greek)
99
+ - `ms` (Malay)
100
+ - `cs` (Czech)
101
+ - `ro` (Romanian)
102
+ - `da` (Danish)
103
+ - `hu` (Hungarian)
104
+ - `ta` (Tamil)
105
+ - `no` (Norwegian)
106
+ - `th` (Thai)
107
+ - `ur` (Urdu)
108
+ - `hr` (Croatian)
109
+ - `bg` (Bulgarian)
110
+ - `lt` (Lithuanian)
111
+ - `la` (Latin)
112
+ - `mi` (Maori)
113
+ - `ml` (Malayalam)
114
+ - `cy` (Welsh)
115
+ - `sk` (Slovak)
116
+ - `te` (Telugu)
117
+ - `fa` (Persian)
118
+ - `lv` (Latvian)
119
+ - `bn` (Bengali)
120
+ - `sr` (Serbian)
121
+ - `az` (Azerbaijani)
122
+ - `sl` (Slovenian)
123
+ - `kn` (Kannada)
124
+ - `et` (Estonian)
125
+ - `mk` (Macedonian)
126
+ - `br` (Breton)
127
+ - `eu` (Basque)
128
+ - `is` (Icelandic)
129
+ - `hy` (Armenian)
130
+ - `ne` (Nepali)
131
+ - `mn` (Mongolian)
132
+ - `bs` (Bosnian)
133
+ - `kk` (Kazakh)
134
+ - `sq` (Albanian)
135
+ - `sw` (Swahili)
136
+ - `gl` (Galician)
137
+ - `mr` (Marathi)
138
+ - `pa` (Punjabi)
139
+ - `si` (Sinhala)
140
+ - `km` (Khmer)
141
+ - `sn` (Shona)
142
+ - `yo` (Yoruba)
143
+ - `so` (Somali)
144
+ - `af` (Afrikaans)
145
+ - `oc` (Occitan)
146
+ - `ka` (Georgian)
147
+ - `be` (Belarusian)
148
+ - `tg` (Tajik)
149
+ - `sd` (Sindhi)
150
+ - `gu` (Gujarati)
151
+ - `am` (Amharic)
152
+ - `yi` (Yiddish)
153
+ - `lo` (Lao)
154
+ - `uz` (Uzbek)
155
+ - `fo` (Faroese)
156
+ - `ht` (Haitian Creole)
157
+ - `ps` (Pashto)
158
+ - `tk` (Turkmen)
159
+ - `nn` (Nynorsk)
160
+ - `mt` (Maltese)
161
+ - `sa` (Sanskrit)
162
+ - `lb` (Luxembourgish)
163
+ - `my` (Myanmar)
164
+ - `bo` (Tibetan)
165
+ - `tl` (Tagalog)
166
+ - `mg` (Malagasy)
167
+ - `as` (Assamese)
168
+ - `tt` (Tatar)
169
+ - `haw` (Hawaiian)
170
+ - `ln` (Lingala)
171
+ - `ha` (Hausa)
172
+ - `ba` (Bashkir)
173
+ - `jw` (Javanese)
174
+ - `su` (Sundanese)
175
+ - `yue` (Cantonese)
176
+ </Accordion>
177
+
178
+
179
+ timestamp_granularities : typing.Optional[typing.List[TimestampGranularity]]
180
+ The timestamp granularities to populate for this transcription. Currently only `word` level timestamps are supported.
181
+
182
+
183
+ request_options : typing.Optional[RequestOptions]
184
+ Request-specific configuration.
185
+
186
+ Returns
187
+ -------
188
+ TranscriptionResponse
189
+
190
+ Examples
191
+ --------
192
+ from cartesia import Cartesia
193
+
194
+ client = Cartesia(
195
+ api_key="YOUR_API_KEY",
196
+ )
197
+ client.stt.transcribe(
198
+ model="ink-whisper",
199
+ language="en",
200
+ )
201
+ """
202
+ _response = self._client_wrapper.httpx_client.request(
203
+ "stt",
204
+ method="POST",
205
+ params={
206
+ "encoding": encoding,
207
+ "sample_rate": sample_rate,
208
+ },
209
+ data={
210
+ "model": model,
211
+ "language": language,
212
+ "timestamp_granularities[]": timestamp_granularities,
213
+ },
214
+ files={
215
+ "file": file,
216
+ },
217
+ request_options=request_options,
218
+ omit=OMIT,
219
+ )
220
+ try:
221
+ if 200 <= _response.status_code < 300:
222
+ return typing.cast(
223
+ TranscriptionResponse,
224
+ parse_obj_as(
225
+ type_=TranscriptionResponse, # type: ignore
226
+ object_=_response.json(),
227
+ ),
228
+ )
229
+ _response_json = _response.json()
230
+ except JSONDecodeError:
231
+ raise ApiError(status_code=_response.status_code, body=_response.text)
232
+ raise ApiError(status_code=_response.status_code, body=_response_json)
233
+
234
+
235
+ class AsyncSttClient:
236
+ def __init__(self, *, client_wrapper: AsyncClientWrapper):
237
+ self._client_wrapper = client_wrapper
238
+
239
+ async def transcribe(
240
+ self,
241
+ *,
242
+ file: core.File,
243
+ model: str,
244
+ encoding: typing.Optional[SttEncoding] = None,
245
+ sample_rate: typing.Optional[int] = None,
246
+ language: typing.Optional[str] = OMIT,
247
+ timestamp_granularities: typing.Optional[typing.List[TimestampGranularity]] = OMIT,
248
+ request_options: typing.Optional[RequestOptions] = None,
249
+ ) -> TranscriptionResponse:
250
+ """
251
+ Transcribes audio files into text using Cartesia's Speech-to-Text API.
252
+
253
+ Upload an audio file and receive a complete transcription response. Supports arbitrarily long audio files with automatic intelligent chunking for longer audio.
254
+
255
+ **Supported audio formats:** flac, m4a, mp3, mp4, mpeg, mpga, oga, ogg, wav, webm
256
+
257
+ **Response format:** Returns JSON with transcribed text, duration, and language. Include `timestamp_granularities: ["word"]` to get word-level timestamps.
258
+
259
+ **Pricing:** Batch transcription is priced at **1 credit per 2 seconds** of audio processed.
260
+
261
+ <Note>
262
+ For migrating from the OpenAI SDK, see our [OpenAI Whisper to Cartesia Ink Migration Guide](/api-reference/stt/migrate-from-open-ai).
263
+ </Note>
264
+
265
+ Parameters
266
+ ----------
267
+ file : core.File
268
+ See core.File for more documentation
269
+
270
+ model : str
271
+ ID of the model to use for transcription. Use `ink-whisper` for the latest Cartesia Whisper model.
272
+
273
+
274
+ encoding : typing.Optional[SttEncoding]
275
+ The encoding format to process the audio as. If not specified, the audio file will be decoded automatically.
276
+
277
+ **Supported formats:**
278
+ - `pcm_s16le` - 16-bit signed integer PCM, little-endian (recommended for best performance)
279
+ - `pcm_s32le` - 32-bit signed integer PCM, little-endian
280
+ - `pcm_f16le` - 16-bit floating point PCM, little-endian
281
+ - `pcm_f32le` - 32-bit floating point PCM, little-endian
282
+ - `pcm_mulaw` - 8-bit μ-law encoded PCM
283
+ - `pcm_alaw` - 8-bit A-law encoded PCM
284
+
285
+ sample_rate : typing.Optional[int]
286
+ The sample rate of the audio in Hz.
287
+
288
+ language : typing.Optional[str]
289
+ The language of the input audio in ISO-639-1 format. Defaults to `en`.
290
+
291
+ <Accordion title="Supported languages">
292
+ - `en` (English)
293
+ - `zh` (Chinese)
294
+ - `de` (German)
295
+ - `es` (Spanish)
296
+ - `ru` (Russian)
297
+ - `ko` (Korean)
298
+ - `fr` (French)
299
+ - `ja` (Japanese)
300
+ - `pt` (Portuguese)
301
+ - `tr` (Turkish)
302
+ - `pl` (Polish)
303
+ - `ca` (Catalan)
304
+ - `nl` (Dutch)
305
+ - `ar` (Arabic)
306
+ - `sv` (Swedish)
307
+ - `it` (Italian)
308
+ - `id` (Indonesian)
309
+ - `hi` (Hindi)
310
+ - `fi` (Finnish)
311
+ - `vi` (Vietnamese)
312
+ - `he` (Hebrew)
313
+ - `uk` (Ukrainian)
314
+ - `el` (Greek)
315
+ - `ms` (Malay)
316
+ - `cs` (Czech)
317
+ - `ro` (Romanian)
318
+ - `da` (Danish)
319
+ - `hu` (Hungarian)
320
+ - `ta` (Tamil)
321
+ - `no` (Norwegian)
322
+ - `th` (Thai)
323
+ - `ur` (Urdu)
324
+ - `hr` (Croatian)
325
+ - `bg` (Bulgarian)
326
+ - `lt` (Lithuanian)
327
+ - `la` (Latin)
328
+ - `mi` (Maori)
329
+ - `ml` (Malayalam)
330
+ - `cy` (Welsh)
331
+ - `sk` (Slovak)
332
+ - `te` (Telugu)
333
+ - `fa` (Persian)
334
+ - `lv` (Latvian)
335
+ - `bn` (Bengali)
336
+ - `sr` (Serbian)
337
+ - `az` (Azerbaijani)
338
+ - `sl` (Slovenian)
339
+ - `kn` (Kannada)
340
+ - `et` (Estonian)
341
+ - `mk` (Macedonian)
342
+ - `br` (Breton)
343
+ - `eu` (Basque)
344
+ - `is` (Icelandic)
345
+ - `hy` (Armenian)
346
+ - `ne` (Nepali)
347
+ - `mn` (Mongolian)
348
+ - `bs` (Bosnian)
349
+ - `kk` (Kazakh)
350
+ - `sq` (Albanian)
351
+ - `sw` (Swahili)
352
+ - `gl` (Galician)
353
+ - `mr` (Marathi)
354
+ - `pa` (Punjabi)
355
+ - `si` (Sinhala)
356
+ - `km` (Khmer)
357
+ - `sn` (Shona)
358
+ - `yo` (Yoruba)
359
+ - `so` (Somali)
360
+ - `af` (Afrikaans)
361
+ - `oc` (Occitan)
362
+ - `ka` (Georgian)
363
+ - `be` (Belarusian)
364
+ - `tg` (Tajik)
365
+ - `sd` (Sindhi)
366
+ - `gu` (Gujarati)
367
+ - `am` (Amharic)
368
+ - `yi` (Yiddish)
369
+ - `lo` (Lao)
370
+ - `uz` (Uzbek)
371
+ - `fo` (Faroese)
372
+ - `ht` (Haitian Creole)
373
+ - `ps` (Pashto)
374
+ - `tk` (Turkmen)
375
+ - `nn` (Nynorsk)
376
+ - `mt` (Maltese)
377
+ - `sa` (Sanskrit)
378
+ - `lb` (Luxembourgish)
379
+ - `my` (Myanmar)
380
+ - `bo` (Tibetan)
381
+ - `tl` (Tagalog)
382
+ - `mg` (Malagasy)
383
+ - `as` (Assamese)
384
+ - `tt` (Tatar)
385
+ - `haw` (Hawaiian)
386
+ - `ln` (Lingala)
387
+ - `ha` (Hausa)
388
+ - `ba` (Bashkir)
389
+ - `jw` (Javanese)
390
+ - `su` (Sundanese)
391
+ - `yue` (Cantonese)
392
+ </Accordion>
393
+
394
+
395
+ timestamp_granularities : typing.Optional[typing.List[TimestampGranularity]]
396
+ The timestamp granularities to populate for this transcription. Currently only `word` level timestamps are supported.
397
+
398
+
399
+ request_options : typing.Optional[RequestOptions]
400
+ Request-specific configuration.
401
+
402
+ Returns
403
+ -------
404
+ TranscriptionResponse
405
+
406
+ Examples
407
+ --------
408
+ import asyncio
409
+
410
+ from cartesia import AsyncCartesia
411
+
412
+ client = AsyncCartesia(
413
+ api_key="YOUR_API_KEY",
414
+ )
415
+
416
+
417
+ async def main() -> None:
418
+ await client.stt.transcribe(
419
+ model="ink-whisper",
420
+ language="en",
421
+ )
422
+
423
+
424
+ asyncio.run(main())
425
+ """
426
+ _response = await self._client_wrapper.httpx_client.request(
427
+ "stt",
428
+ method="POST",
429
+ params={
430
+ "encoding": encoding,
431
+ "sample_rate": sample_rate,
432
+ },
433
+ data={
434
+ "model": model,
435
+ "language": language,
436
+ "timestamp_granularities[]": timestamp_granularities,
437
+ },
438
+ files={
439
+ "file": file,
440
+ },
441
+ request_options=request_options,
442
+ omit=OMIT,
443
+ )
444
+ try:
445
+ if 200 <= _response.status_code < 300:
446
+ return typing.cast(
447
+ TranscriptionResponse,
448
+ parse_obj_as(
449
+ type_=TranscriptionResponse, # type: ignore
450
+ object_=_response.json(),
451
+ ),
452
+ )
453
+ _response_json = _response.json()
454
+ except JSONDecodeError:
455
+ raise ApiError(status_code=_response.status_code, body=_response.text)
456
+ raise ApiError(status_code=_response.status_code, body=_response_json)
@@ -0,0 +1,29 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ from .done_message import DoneMessageParams
4
+ from .error_message import ErrorMessageParams
5
+ from .flush_done_message import FlushDoneMessageParams
6
+ from .streaming_transcription_response import (
7
+ StreamingTranscriptionResponseParams,
8
+ StreamingTranscriptionResponse_DoneParams,
9
+ StreamingTranscriptionResponse_ErrorParams,
10
+ StreamingTranscriptionResponse_FlushDoneParams,
11
+ StreamingTranscriptionResponse_TranscriptParams,
12
+ )
13
+ from .transcript_message import TranscriptMessageParams
14
+ from .transcription_response import TranscriptionResponseParams
15
+ from .transcription_word import TranscriptionWordParams
16
+
17
+ __all__ = [
18
+ "DoneMessageParams",
19
+ "ErrorMessageParams",
20
+ "FlushDoneMessageParams",
21
+ "StreamingTranscriptionResponseParams",
22
+ "StreamingTranscriptionResponse_DoneParams",
23
+ "StreamingTranscriptionResponse_ErrorParams",
24
+ "StreamingTranscriptionResponse_FlushDoneParams",
25
+ "StreamingTranscriptionResponse_TranscriptParams",
26
+ "TranscriptMessageParams",
27
+ "TranscriptionResponseParams",
28
+ "TranscriptionWordParams",
29
+ ]
@@ -0,0 +1,14 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing_extensions
4
+
5
+
6
+ class DoneMessageParams(typing_extensions.TypedDict):
7
+ """
8
+ Acknowledgment message sent in response to a `done` command, indicating that the session is complete and the WebSocket will close.
9
+ """
10
+
11
+ request_id: str
12
+ """
13
+ Unique identifier for this transcription session.
14
+ """
@@ -0,0 +1,16 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing_extensions
4
+ import typing_extensions
5
+
6
+
7
+ class ErrorMessageParams(typing_extensions.TypedDict):
8
+ request_id: typing_extensions.NotRequired[str]
9
+ """
10
+ The request ID associated with the error, if applicable.
11
+ """
12
+
13
+ message: str
14
+ """
15
+ Human-readable error message describing what went wrong.
16
+ """
@@ -0,0 +1,14 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing_extensions
4
+
5
+
6
+ class FlushDoneMessageParams(typing_extensions.TypedDict):
7
+ """
8
+ Acknowledgment message sent in response to a `finalize` command, indicating that all buffered audio has been flushed and processed.
9
+ """
10
+
11
+ request_id: str
12
+ """
13
+ Unique identifier for this transcription session.
14
+ """
@@ -0,0 +1,41 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ from __future__ import annotations
4
+ import typing_extensions
5
+ import typing
6
+ import typing_extensions
7
+ from .transcription_word import TranscriptionWordParams
8
+
9
+
10
+ class StreamingTranscriptionResponse_TranscriptParams(typing_extensions.TypedDict):
11
+ type: typing.Literal["transcript"]
12
+ request_id: str
13
+ text: str
14
+ is_final: bool
15
+ duration: typing_extensions.NotRequired[float]
16
+ language: typing_extensions.NotRequired[str]
17
+ words: typing_extensions.NotRequired[typing.Sequence[TranscriptionWordParams]]
18
+
19
+
20
+ class StreamingTranscriptionResponse_FlushDoneParams(typing_extensions.TypedDict):
21
+ type: typing.Literal["flush_done"]
22
+ request_id: str
23
+
24
+
25
+ class StreamingTranscriptionResponse_DoneParams(typing_extensions.TypedDict):
26
+ type: typing.Literal["done"]
27
+ request_id: str
28
+
29
+
30
+ class StreamingTranscriptionResponse_ErrorParams(typing_extensions.TypedDict):
31
+ type: typing.Literal["error"]
32
+ request_id: typing_extensions.NotRequired[str]
33
+ message: str
34
+
35
+
36
+ StreamingTranscriptionResponseParams = typing.Union[
37
+ StreamingTranscriptionResponse_TranscriptParams,
38
+ StreamingTranscriptionResponse_FlushDoneParams,
39
+ StreamingTranscriptionResponse_DoneParams,
40
+ StreamingTranscriptionResponse_ErrorParams,
41
+ ]
@@ -0,0 +1,40 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing_extensions
4
+ import typing_extensions
5
+ import typing
6
+ from .transcription_word import TranscriptionWordParams
7
+
8
+
9
+ class TranscriptMessageParams(typing_extensions.TypedDict):
10
+ request_id: str
11
+ """
12
+ Unique identifier for this transcription session.
13
+ """
14
+
15
+ text: str
16
+ """
17
+ The transcribed text. May be partial or final depending on is_final.
18
+
19
+ **Note**: Text may be empty in initial responses while the system accumulates sufficient audio for transcription. This is normal behavior - wait for responses with non-empty text or monitor is_final for completion status.
20
+ """
21
+
22
+ is_final: bool
23
+ """
24
+ Whether this is a final transcription result or an interim result.
25
+ """
26
+
27
+ duration: typing_extensions.NotRequired[float]
28
+ """
29
+ The duration of the audio transcribed so far, in seconds.
30
+ """
31
+
32
+ language: typing_extensions.NotRequired[str]
33
+ """
34
+ The specified language of the input audio.
35
+ """
36
+
37
+ words: typing_extensions.NotRequired[typing.Sequence[TranscriptionWordParams]]
38
+ """
39
+ Word-level timestamps showing the start and end time of each word in seconds. Always included in streaming responses.
40
+ """
@@ -0,0 +1,28 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing_extensions
4
+ import typing_extensions
5
+ import typing
6
+ from .transcription_word import TranscriptionWordParams
7
+
8
+
9
+ class TranscriptionResponseParams(typing_extensions.TypedDict):
10
+ text: str
11
+ """
12
+ The transcribed text.
13
+ """
14
+
15
+ language: typing_extensions.NotRequired[str]
16
+ """
17
+ The specified language of the input audio.
18
+ """
19
+
20
+ duration: typing_extensions.NotRequired[float]
21
+ """
22
+ The duration of the input audio in seconds.
23
+ """
24
+
25
+ words: typing_extensions.NotRequired[typing.Sequence[TranscriptionWordParams]]
26
+ """
27
+ Word-level timestamps showing the start and end time of each word. Only included when `[word]` is passed into `timestamp_granularities[]`.
28
+ """
@@ -0,0 +1,20 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing_extensions
4
+
5
+
6
+ class TranscriptionWordParams(typing_extensions.TypedDict):
7
+ word: str
8
+ """
9
+ The transcribed word.
10
+ """
11
+
12
+ start: float
13
+ """
14
+ Start time of the word in seconds.
15
+ """
16
+
17
+ end: float
18
+ """
19
+ End time of the word in seconds.
20
+ """