cartesia 1.4.0__py3-none-any.whl → 2.0.0a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. cartesia/__init__.py +288 -3
  2. cartesia/api_status/__init__.py +6 -0
  3. cartesia/api_status/client.py +104 -0
  4. cartesia/api_status/requests/__init__.py +5 -0
  5. cartesia/api_status/requests/api_info.py +8 -0
  6. cartesia/api_status/types/__init__.py +5 -0
  7. cartesia/api_status/types/api_info.py +20 -0
  8. cartesia/base_client.py +160 -0
  9. cartesia/client.py +163 -40
  10. cartesia/core/__init__.py +47 -0
  11. cartesia/core/api_error.py +15 -0
  12. cartesia/core/client_wrapper.py +55 -0
  13. cartesia/core/datetime_utils.py +28 -0
  14. cartesia/core/file.py +67 -0
  15. cartesia/core/http_client.py +499 -0
  16. cartesia/core/jsonable_encoder.py +101 -0
  17. cartesia/core/pydantic_utilities.py +296 -0
  18. cartesia/core/query_encoder.py +58 -0
  19. cartesia/core/remove_none_from_dict.py +11 -0
  20. cartesia/core/request_options.py +35 -0
  21. cartesia/core/serialization.py +272 -0
  22. cartesia/datasets/__init__.py +24 -0
  23. cartesia/datasets/client.py +422 -0
  24. cartesia/datasets/requests/__init__.py +15 -0
  25. cartesia/datasets/requests/create_dataset_request.py +7 -0
  26. cartesia/datasets/requests/dataset.py +9 -0
  27. cartesia/datasets/requests/dataset_file.py +9 -0
  28. cartesia/datasets/requests/paginated_dataset_files.py +10 -0
  29. cartesia/datasets/requests/paginated_datasets.py +10 -0
  30. cartesia/datasets/types/__init__.py +17 -0
  31. cartesia/datasets/types/create_dataset_request.py +19 -0
  32. cartesia/datasets/types/dataset.py +21 -0
  33. cartesia/datasets/types/dataset_file.py +21 -0
  34. cartesia/datasets/types/file_purpose.py +5 -0
  35. cartesia/datasets/types/paginated_dataset_files.py +21 -0
  36. cartesia/datasets/types/paginated_datasets.py +21 -0
  37. cartesia/embedding/__init__.py +5 -0
  38. cartesia/embedding/types/__init__.py +5 -0
  39. cartesia/embedding/types/embedding.py +201 -0
  40. cartesia/environment.py +7 -0
  41. cartesia/infill/__init__.py +2 -0
  42. cartesia/infill/client.py +294 -0
  43. cartesia/tts/__init__.py +167 -0
  44. cartesia/{_async_websocket.py → tts/_async_websocket.py} +159 -84
  45. cartesia/tts/_websocket.py +430 -0
  46. cartesia/tts/client.py +407 -0
  47. cartesia/tts/requests/__init__.py +76 -0
  48. cartesia/tts/requests/cancel_context_request.py +17 -0
  49. cartesia/tts/requests/controls.py +11 -0
  50. cartesia/tts/requests/generation_request.py +53 -0
  51. cartesia/tts/requests/mp_3_output_format.py +11 -0
  52. cartesia/tts/requests/output_format.py +30 -0
  53. cartesia/tts/requests/phoneme_timestamps.py +10 -0
  54. cartesia/tts/requests/raw_output_format.py +11 -0
  55. cartesia/tts/requests/speed.py +7 -0
  56. cartesia/tts/requests/tts_request.py +24 -0
  57. cartesia/tts/requests/tts_request_embedding_specifier.py +16 -0
  58. cartesia/tts/requests/tts_request_id_specifier.py +16 -0
  59. cartesia/tts/requests/tts_request_voice_specifier.py +7 -0
  60. cartesia/tts/requests/wav_output_format.py +7 -0
  61. cartesia/tts/requests/web_socket_base_response.py +11 -0
  62. cartesia/tts/requests/web_socket_chunk_response.py +8 -0
  63. cartesia/tts/requests/web_socket_done_response.py +7 -0
  64. cartesia/tts/requests/web_socket_error_response.py +7 -0
  65. cartesia/tts/requests/web_socket_flush_done_response.py +9 -0
  66. cartesia/tts/requests/web_socket_phoneme_timestamps_response.py +9 -0
  67. cartesia/tts/requests/web_socket_raw_output_format.py +11 -0
  68. cartesia/tts/requests/web_socket_request.py +7 -0
  69. cartesia/tts/requests/web_socket_response.py +69 -0
  70. cartesia/tts/requests/web_socket_stream_options.py +8 -0
  71. cartesia/tts/requests/web_socket_timestamps_response.py +9 -0
  72. cartesia/tts/requests/web_socket_tts_output.py +18 -0
  73. cartesia/tts/requests/web_socket_tts_request.py +24 -0
  74. cartesia/tts/requests/word_timestamps.py +10 -0
  75. cartesia/tts/socket_client.py +302 -0
  76. cartesia/tts/types/__init__.py +90 -0
  77. cartesia/tts/types/cancel_context_request.py +28 -0
  78. cartesia/tts/types/context_id.py +3 -0
  79. cartesia/tts/types/controls.py +22 -0
  80. cartesia/tts/types/emotion.py +29 -0
  81. cartesia/tts/types/flush_id.py +3 -0
  82. cartesia/tts/types/generation_request.py +66 -0
  83. cartesia/tts/types/mp_3_output_format.py +23 -0
  84. cartesia/tts/types/natural_specifier.py +5 -0
  85. cartesia/tts/types/numerical_specifier.py +3 -0
  86. cartesia/tts/types/output_format.py +58 -0
  87. cartesia/tts/types/phoneme_timestamps.py +21 -0
  88. cartesia/tts/types/raw_encoding.py +5 -0
  89. cartesia/tts/types/raw_output_format.py +22 -0
  90. cartesia/tts/types/speed.py +7 -0
  91. cartesia/tts/types/supported_language.py +7 -0
  92. cartesia/tts/types/tts_request.py +35 -0
  93. cartesia/tts/types/tts_request_embedding_specifier.py +27 -0
  94. cartesia/tts/types/tts_request_id_specifier.py +27 -0
  95. cartesia/tts/types/tts_request_voice_specifier.py +7 -0
  96. cartesia/tts/types/wav_output_format.py +17 -0
  97. cartesia/tts/types/web_socket_base_response.py +22 -0
  98. cartesia/tts/types/web_socket_chunk_response.py +20 -0
  99. cartesia/tts/types/web_socket_done_response.py +17 -0
  100. cartesia/tts/types/web_socket_error_response.py +19 -0
  101. cartesia/tts/types/web_socket_flush_done_response.py +21 -0
  102. cartesia/tts/types/web_socket_phoneme_timestamps_response.py +20 -0
  103. cartesia/tts/types/web_socket_raw_output_format.py +22 -0
  104. cartesia/tts/types/web_socket_request.py +7 -0
  105. cartesia/tts/types/web_socket_response.py +124 -0
  106. cartesia/tts/types/web_socket_stream_options.py +19 -0
  107. cartesia/tts/types/web_socket_timestamps_response.py +20 -0
  108. cartesia/tts/types/web_socket_tts_output.py +27 -0
  109. cartesia/tts/types/web_socket_tts_request.py +36 -0
  110. cartesia/tts/types/word_timestamps.py +21 -0
  111. cartesia/tts/utils/tts.py +64 -0
  112. cartesia/tts/utils/types.py +70 -0
  113. cartesia/version.py +3 -1
  114. cartesia/voice_changer/__init__.py +27 -0
  115. cartesia/voice_changer/client.py +395 -0
  116. cartesia/voice_changer/requests/__init__.py +15 -0
  117. cartesia/voice_changer/requests/streaming_response.py +36 -0
  118. cartesia/voice_changer/types/__init__.py +17 -0
  119. cartesia/voice_changer/types/output_format_container.py +5 -0
  120. cartesia/voice_changer/types/streaming_response.py +62 -0
  121. cartesia/voices/__init__.py +67 -0
  122. cartesia/voices/client.py +1812 -0
  123. cartesia/voices/requests/__init__.py +27 -0
  124. cartesia/voices/requests/create_voice_request.py +21 -0
  125. cartesia/voices/requests/embedding_response.py +8 -0
  126. cartesia/voices/requests/embedding_specifier.py +10 -0
  127. cartesia/voices/requests/id_specifier.py +10 -0
  128. cartesia/voices/requests/localize_dialect.py +6 -0
  129. cartesia/voices/requests/localize_voice_request.py +15 -0
  130. cartesia/voices/requests/mix_voice_specifier.py +7 -0
  131. cartesia/voices/requests/mix_voices_request.py +9 -0
  132. cartesia/voices/requests/update_voice_request.py +15 -0
  133. cartesia/voices/requests/voice.py +39 -0
  134. cartesia/voices/requests/voice_metadata.py +36 -0
  135. cartesia/voices/types/__init__.py +41 -0
  136. cartesia/voices/types/base_voice_id.py +5 -0
  137. cartesia/voices/types/clone_mode.py +5 -0
  138. cartesia/voices/types/create_voice_request.py +32 -0
  139. cartesia/voices/types/embedding_response.py +20 -0
  140. cartesia/voices/types/embedding_specifier.py +22 -0
  141. cartesia/voices/types/gender.py +5 -0
  142. cartesia/voices/types/id_specifier.py +22 -0
  143. cartesia/voices/types/localize_dialect.py +6 -0
  144. cartesia/voices/types/localize_english_dialect.py +5 -0
  145. cartesia/voices/types/localize_target_language.py +7 -0
  146. cartesia/voices/types/localize_voice_request.py +26 -0
  147. cartesia/voices/types/mix_voice_specifier.py +7 -0
  148. cartesia/voices/types/mix_voices_request.py +20 -0
  149. cartesia/voices/types/update_voice_request.py +27 -0
  150. cartesia/voices/types/voice.py +50 -0
  151. cartesia/voices/types/voice_id.py +3 -0
  152. cartesia/voices/types/voice_metadata.py +48 -0
  153. cartesia/voices/types/weight.py +3 -0
  154. cartesia-2.0.0a0.dist-info/METADATA +306 -0
  155. cartesia-2.0.0a0.dist-info/RECORD +158 -0
  156. {cartesia-1.4.0.dist-info → cartesia-2.0.0a0.dist-info}/WHEEL +1 -1
  157. cartesia/_async_sse.py +0 -95
  158. cartesia/_logger.py +0 -3
  159. cartesia/_sse.py +0 -143
  160. cartesia/_types.py +0 -70
  161. cartesia/_websocket.py +0 -358
  162. cartesia/async_client.py +0 -82
  163. cartesia/async_tts.py +0 -176
  164. cartesia/resource.py +0 -44
  165. cartesia/tts.py +0 -292
  166. cartesia/utils/deprecated.py +0 -55
  167. cartesia/utils/retry.py +0 -87
  168. cartesia/utils/tts.py +0 -78
  169. cartesia/voices.py +0 -204
  170. cartesia-1.4.0.dist-info/METADATA +0 -663
  171. cartesia-1.4.0.dist-info/RECORD +0 -23
  172. cartesia-1.4.0.dist-info/licenses/LICENSE.md +0 -21
  173. /cartesia/{utils/__init__.py → py.typed} +0 -0
  174. /cartesia/{_constants.py → tts/utils/constants.py} +0 -0
cartesia/tts/client.py ADDED
@@ -0,0 +1,407 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+ from ..core.client_wrapper import SyncClientWrapper
5
+ from .requests.tts_request_voice_specifier import TtsRequestVoiceSpecifierParams
6
+ from .requests.output_format import OutputFormatParams
7
+ from .types.supported_language import SupportedLanguage
8
+ from ..core.request_options import RequestOptions
9
+ from ..core.serialization import convert_and_respect_annotation_metadata
10
+ from json.decoder import JSONDecodeError
11
+ from ..core.api_error import ApiError
12
+ from .types.web_socket_response import WebSocketResponse
13
+ import httpx_sse
14
+ from ..core.pydantic_utilities import parse_obj_as
15
+ import json
16
+ from ..core.client_wrapper import AsyncClientWrapper
17
+
18
+ # this is used as the default value for optional parameters
19
+ OMIT = typing.cast(typing.Any, ...)
20
+
21
+
22
+ class TtsClient:
23
+ def __init__(self, *, client_wrapper: SyncClientWrapper):
24
+ self._client_wrapper = client_wrapper
25
+
26
+ def bytes(
27
+ self,
28
+ *,
29
+ model_id: str,
30
+ transcript: str,
31
+ voice: TtsRequestVoiceSpecifierParams,
32
+ output_format: OutputFormatParams,
33
+ language: typing.Optional[SupportedLanguage] = OMIT,
34
+ duration: typing.Optional[float] = OMIT,
35
+ request_options: typing.Optional[RequestOptions] = None,
36
+ ) -> typing.Iterator[bytes]:
37
+ """
38
+ Parameters
39
+ ----------
40
+ model_id : str
41
+ The ID of the model to use for the generation. See [Models](/build-with-sonic/models) for available models.
42
+
43
+ transcript : str
44
+
45
+ voice : TtsRequestVoiceSpecifierParams
46
+
47
+ output_format : OutputFormatParams
48
+
49
+ language : typing.Optional[SupportedLanguage]
50
+
51
+ duration : typing.Optional[float]
52
+ The maximum duration of the audio in seconds. You do not usually need to specify this.
53
+ If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
54
+
55
+ request_options : typing.Optional[RequestOptions]
56
+ Request-specific configuration. You can pass in configuration such as `chunk_size`, and more to customize the request and response.
57
+
58
+ Yields
59
+ ------
60
+ typing.Iterator[bytes]
61
+
62
+ Examples
63
+ --------
64
+ from cartesia import Cartesia
65
+
66
+ client = Cartesia(
67
+ api_key="YOUR_API_KEY",
68
+ )
69
+ client.tts.bytes(
70
+ model_id="sonic-english",
71
+ transcript="Hello, world!",
72
+ voice={"mode": "id", "id": "694f9389-aac1-45b6-b726-9d9369183238"},
73
+ language="en",
74
+ output_format={
75
+ "sample_rate": 44100,
76
+ "bit_rate": 128000,
77
+ "container": "mp3",
78
+ },
79
+ )
80
+ """
81
+ with self._client_wrapper.httpx_client.stream(
82
+ "tts/bytes",
83
+ method="POST",
84
+ json={
85
+ "model_id": model_id,
86
+ "transcript": transcript,
87
+ "voice": convert_and_respect_annotation_metadata(
88
+ object_=voice, annotation=TtsRequestVoiceSpecifierParams, direction="write"
89
+ ),
90
+ "language": language,
91
+ "output_format": convert_and_respect_annotation_metadata(
92
+ object_=output_format, annotation=OutputFormatParams, direction="write"
93
+ ),
94
+ "duration": duration,
95
+ },
96
+ request_options=request_options,
97
+ omit=OMIT,
98
+ ) as _response:
99
+ try:
100
+ if 200 <= _response.status_code < 300:
101
+ _chunk_size = request_options.get("chunk_size", None) if request_options is not None else None
102
+ for _chunk in _response.iter_bytes(chunk_size=_chunk_size):
103
+ yield _chunk
104
+ return
105
+ _response.read()
106
+ _response_json = _response.json()
107
+ except JSONDecodeError:
108
+ raise ApiError(status_code=_response.status_code, body=_response.text)
109
+ raise ApiError(status_code=_response.status_code, body=_response_json)
110
+
111
+ def sse(
112
+ self,
113
+ *,
114
+ model_id: str,
115
+ transcript: str,
116
+ voice: TtsRequestVoiceSpecifierParams,
117
+ output_format: OutputFormatParams,
118
+ language: typing.Optional[SupportedLanguage] = OMIT,
119
+ duration: typing.Optional[float] = OMIT,
120
+ request_options: typing.Optional[RequestOptions] = None,
121
+ ) -> typing.Iterator[WebSocketResponse]:
122
+ """
123
+ Parameters
124
+ ----------
125
+ model_id : str
126
+ The ID of the model to use for the generation. See [Models](/build-with-sonic/models) for available models.
127
+
128
+ transcript : str
129
+
130
+ voice : TtsRequestVoiceSpecifierParams
131
+
132
+ output_format : OutputFormatParams
133
+
134
+ language : typing.Optional[SupportedLanguage]
135
+
136
+ duration : typing.Optional[float]
137
+ The maximum duration of the audio in seconds. You do not usually need to specify this.
138
+ If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
139
+
140
+ request_options : typing.Optional[RequestOptions]
141
+ Request-specific configuration.
142
+
143
+ Yields
144
+ ------
145
+ typing.Iterator[WebSocketResponse]
146
+
147
+ Examples
148
+ --------
149
+ from cartesia import Cartesia
150
+
151
+ client = Cartesia(
152
+ api_key="YOUR_API_KEY",
153
+ )
154
+ response = client.tts.sse(
155
+ model_id="sonic-english",
156
+ transcript="Hello, world!",
157
+ voice={"mode": "id", "id": "694f9389-aac1-45b6-b726-9d9369183238"},
158
+ language="en",
159
+ output_format={
160
+ "sample_rate": 44100,
161
+ "encoding": "pcm_f32le",
162
+ "container": "raw",
163
+ },
164
+ )
165
+ for chunk in response:
166
+ yield chunk
167
+ """
168
+ with self._client_wrapper.httpx_client.stream(
169
+ "tts/sse",
170
+ method="POST",
171
+ json={
172
+ "model_id": model_id,
173
+ "transcript": transcript,
174
+ "voice": convert_and_respect_annotation_metadata(
175
+ object_=voice, annotation=TtsRequestVoiceSpecifierParams, direction="write"
176
+ ),
177
+ "language": language,
178
+ "output_format": convert_and_respect_annotation_metadata(
179
+ object_=output_format, annotation=OutputFormatParams, direction="write"
180
+ ),
181
+ "duration": duration,
182
+ },
183
+ request_options=request_options,
184
+ omit=OMIT,
185
+ ) as _response:
186
+ try:
187
+ if 200 <= _response.status_code < 300:
188
+ _event_source = httpx_sse.EventSource(_response)
189
+ for _sse in _event_source.iter_sse():
190
+ try:
191
+ yield typing.cast(
192
+ WebSocketResponse,
193
+ parse_obj_as(
194
+ type_=WebSocketResponse, # type: ignore
195
+ object_=json.loads(_sse.data),
196
+ ),
197
+ )
198
+ except:
199
+ pass
200
+ return
201
+ _response.read()
202
+ _response_json = _response.json()
203
+ except JSONDecodeError:
204
+ raise ApiError(status_code=_response.status_code, body=_response.text)
205
+ raise ApiError(status_code=_response.status_code, body=_response_json)
206
+
207
+
208
+ class AsyncTtsClient:
209
+ def __init__(self, *, client_wrapper: AsyncClientWrapper):
210
+ self._client_wrapper = client_wrapper
211
+
212
+ async def bytes(
213
+ self,
214
+ *,
215
+ model_id: str,
216
+ transcript: str,
217
+ voice: TtsRequestVoiceSpecifierParams,
218
+ output_format: OutputFormatParams,
219
+ language: typing.Optional[SupportedLanguage] = OMIT,
220
+ duration: typing.Optional[float] = OMIT,
221
+ request_options: typing.Optional[RequestOptions] = None,
222
+ ) -> typing.AsyncIterator[bytes]:
223
+ """
224
+ Parameters
225
+ ----------
226
+ model_id : str
227
+ The ID of the model to use for the generation. See [Models](/build-with-sonic/models) for available models.
228
+
229
+ transcript : str
230
+
231
+ voice : TtsRequestVoiceSpecifierParams
232
+
233
+ output_format : OutputFormatParams
234
+
235
+ language : typing.Optional[SupportedLanguage]
236
+
237
+ duration : typing.Optional[float]
238
+ The maximum duration of the audio in seconds. You do not usually need to specify this.
239
+ If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
240
+
241
+ request_options : typing.Optional[RequestOptions]
242
+ Request-specific configuration. You can pass in configuration such as `chunk_size`, and more to customize the request and response.
243
+
244
+ Yields
245
+ ------
246
+ typing.AsyncIterator[bytes]
247
+
248
+ Examples
249
+ --------
250
+ import asyncio
251
+
252
+ from cartesia import AsyncCartesia
253
+
254
+ client = AsyncCartesia(
255
+ api_key="YOUR_API_KEY",
256
+ )
257
+
258
+
259
+ async def main() -> None:
260
+ await client.tts.bytes(
261
+ model_id="sonic-english",
262
+ transcript="Hello, world!",
263
+ voice={"mode": "id", "id": "694f9389-aac1-45b6-b726-9d9369183238"},
264
+ language="en",
265
+ output_format={
266
+ "sample_rate": 44100,
267
+ "bit_rate": 128000,
268
+ "container": "mp3",
269
+ },
270
+ )
271
+
272
+
273
+ asyncio.run(main())
274
+ """
275
+ async with self._client_wrapper.httpx_client.stream(
276
+ "tts/bytes",
277
+ method="POST",
278
+ json={
279
+ "model_id": model_id,
280
+ "transcript": transcript,
281
+ "voice": convert_and_respect_annotation_metadata(
282
+ object_=voice, annotation=TtsRequestVoiceSpecifierParams, direction="write"
283
+ ),
284
+ "language": language,
285
+ "output_format": convert_and_respect_annotation_metadata(
286
+ object_=output_format, annotation=OutputFormatParams, direction="write"
287
+ ),
288
+ "duration": duration,
289
+ },
290
+ request_options=request_options,
291
+ omit=OMIT,
292
+ ) as _response:
293
+ try:
294
+ if 200 <= _response.status_code < 300:
295
+ _chunk_size = request_options.get("chunk_size", None) if request_options is not None else None
296
+ async for _chunk in _response.aiter_bytes(chunk_size=_chunk_size):
297
+ yield _chunk
298
+ return
299
+ await _response.aread()
300
+ _response_json = _response.json()
301
+ except JSONDecodeError:
302
+ raise ApiError(status_code=_response.status_code, body=_response.text)
303
+ raise ApiError(status_code=_response.status_code, body=_response_json)
304
+
305
+ async def sse(
306
+ self,
307
+ *,
308
+ model_id: str,
309
+ transcript: str,
310
+ voice: TtsRequestVoiceSpecifierParams,
311
+ output_format: OutputFormatParams,
312
+ language: typing.Optional[SupportedLanguage] = OMIT,
313
+ duration: typing.Optional[float] = OMIT,
314
+ request_options: typing.Optional[RequestOptions] = None,
315
+ ) -> typing.AsyncIterator[WebSocketResponse]:
316
+ """
317
+ Parameters
318
+ ----------
319
+ model_id : str
320
+ The ID of the model to use for the generation. See [Models](/build-with-sonic/models) for available models.
321
+
322
+ transcript : str
323
+
324
+ voice : TtsRequestVoiceSpecifierParams
325
+
326
+ output_format : OutputFormatParams
327
+
328
+ language : typing.Optional[SupportedLanguage]
329
+
330
+ duration : typing.Optional[float]
331
+ The maximum duration of the audio in seconds. You do not usually need to specify this.
332
+ If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
333
+
334
+ request_options : typing.Optional[RequestOptions]
335
+ Request-specific configuration.
336
+
337
+ Yields
338
+ ------
339
+ typing.AsyncIterator[WebSocketResponse]
340
+
341
+ Examples
342
+ --------
343
+ import asyncio
344
+
345
+ from cartesia import AsyncCartesia
346
+
347
+ client = AsyncCartesia(
348
+ api_key="YOUR_API_KEY",
349
+ )
350
+
351
+
352
+ async def main() -> None:
353
+ response = await client.tts.sse(
354
+ model_id="sonic-english",
355
+ transcript="Hello, world!",
356
+ voice={"mode": "id", "id": "694f9389-aac1-45b6-b726-9d9369183238"},
357
+ language="en",
358
+ output_format={
359
+ "sample_rate": 44100,
360
+ "encoding": "pcm_f32le",
361
+ "container": "raw",
362
+ },
363
+ )
364
+ async for chunk in response:
365
+ yield chunk
366
+
367
+
368
+ asyncio.run(main())
369
+ """
370
+ async with self._client_wrapper.httpx_client.stream(
371
+ "tts/sse",
372
+ method="POST",
373
+ json={
374
+ "model_id": model_id,
375
+ "transcript": transcript,
376
+ "voice": convert_and_respect_annotation_metadata(
377
+ object_=voice, annotation=TtsRequestVoiceSpecifierParams, direction="write"
378
+ ),
379
+ "language": language,
380
+ "output_format": convert_and_respect_annotation_metadata(
381
+ object_=output_format, annotation=OutputFormatParams, direction="write"
382
+ ),
383
+ "duration": duration,
384
+ },
385
+ request_options=request_options,
386
+ omit=OMIT,
387
+ ) as _response:
388
+ try:
389
+ if 200 <= _response.status_code < 300:
390
+ _event_source = httpx_sse.EventSource(_response)
391
+ async for _sse in _event_source.aiter_sse():
392
+ try:
393
+ yield typing.cast(
394
+ WebSocketResponse,
395
+ parse_obj_as(
396
+ type_=WebSocketResponse, # type: ignore
397
+ object_=json.loads(_sse.data),
398
+ ),
399
+ )
400
+ except:
401
+ pass
402
+ return
403
+ await _response.aread()
404
+ _response_json = _response.json()
405
+ except JSONDecodeError:
406
+ raise ApiError(status_code=_response.status_code, body=_response.text)
407
+ raise ApiError(status_code=_response.status_code, body=_response_json)
@@ -0,0 +1,76 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ from .cancel_context_request import CancelContextRequestParams
4
+ from .controls import ControlsParams
5
+ from .generation_request import GenerationRequestParams
6
+ from .mp_3_output_format import Mp3OutputFormatParams
7
+ from .output_format import OutputFormatParams, OutputFormat_Mp3Params, OutputFormat_RawParams, OutputFormat_WavParams
8
+ from .phoneme_timestamps import PhonemeTimestampsParams
9
+ from .raw_output_format import RawOutputFormatParams
10
+ from .speed import SpeedParams
11
+ from .tts_request import TtsRequestParams
12
+ from .tts_request_embedding_specifier import TtsRequestEmbeddingSpecifierParams
13
+ from .tts_request_id_specifier import TtsRequestIdSpecifierParams
14
+ from .tts_request_voice_specifier import TtsRequestVoiceSpecifierParams
15
+ from .wav_output_format import WavOutputFormatParams
16
+ from .web_socket_base_response import WebSocketBaseResponseParams
17
+ from .web_socket_chunk_response import WebSocketChunkResponseParams
18
+ from .web_socket_done_response import WebSocketDoneResponseParams
19
+ from .web_socket_error_response import WebSocketErrorResponseParams
20
+ from .web_socket_flush_done_response import WebSocketFlushDoneResponseParams
21
+ from .web_socket_phoneme_timestamps_response import WebSocketPhonemeTimestampsResponseParams
22
+ from .web_socket_raw_output_format import WebSocketRawOutputFormatParams
23
+ from .web_socket_request import WebSocketRequestParams
24
+ from .web_socket_response import (
25
+ WebSocketResponseParams,
26
+ WebSocketResponse_ChunkParams,
27
+ WebSocketResponse_DoneParams,
28
+ WebSocketResponse_ErrorParams,
29
+ WebSocketResponse_FlushDoneParams,
30
+ WebSocketResponse_PhonemeTimestampsParams,
31
+ WebSocketResponse_TimestampsParams,
32
+ )
33
+ from .web_socket_stream_options import WebSocketStreamOptionsParams
34
+ from .web_socket_timestamps_response import WebSocketTimestampsResponseParams
35
+ from .web_socket_tts_output import WebSocketTtsOutputParams
36
+ from .web_socket_tts_request import WebSocketTtsRequestParams
37
+ from .word_timestamps import WordTimestampsParams
38
+
39
+ __all__ = [
40
+ "CancelContextRequestParams",
41
+ "ControlsParams",
42
+ "GenerationRequestParams",
43
+ "Mp3OutputFormatParams",
44
+ "OutputFormatParams",
45
+ "OutputFormat_Mp3Params",
46
+ "OutputFormat_RawParams",
47
+ "OutputFormat_WavParams",
48
+ "PhonemeTimestampsParams",
49
+ "RawOutputFormatParams",
50
+ "SpeedParams",
51
+ "TtsRequestEmbeddingSpecifierParams",
52
+ "TtsRequestIdSpecifierParams",
53
+ "TtsRequestParams",
54
+ "TtsRequestVoiceSpecifierParams",
55
+ "WavOutputFormatParams",
56
+ "WebSocketBaseResponseParams",
57
+ "WebSocketChunkResponseParams",
58
+ "WebSocketDoneResponseParams",
59
+ "WebSocketErrorResponseParams",
60
+ "WebSocketFlushDoneResponseParams",
61
+ "WebSocketPhonemeTimestampsResponseParams",
62
+ "WebSocketRawOutputFormatParams",
63
+ "WebSocketRequestParams",
64
+ "WebSocketResponseParams",
65
+ "WebSocketResponse_ChunkParams",
66
+ "WebSocketResponse_DoneParams",
67
+ "WebSocketResponse_ErrorParams",
68
+ "WebSocketResponse_FlushDoneParams",
69
+ "WebSocketResponse_PhonemeTimestampsParams",
70
+ "WebSocketResponse_TimestampsParams",
71
+ "WebSocketStreamOptionsParams",
72
+ "WebSocketTimestampsResponseParams",
73
+ "WebSocketTtsOutputParams",
74
+ "WebSocketTtsRequestParams",
75
+ "WordTimestampsParams",
76
+ ]
@@ -0,0 +1,17 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing_extensions
4
+ from ..types.context_id import ContextId
5
+ import typing
6
+
7
+
8
+ class CancelContextRequestParams(typing_extensions.TypedDict):
9
+ context_id: ContextId
10
+ """
11
+ The ID of the context to cancel.
12
+ """
13
+
14
+ cancel: typing.Literal[True]
15
+ """
16
+ Whether to cancel the context, so that no more messages are generated for that context.
17
+ """
@@ -0,0 +1,11 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing_extensions
4
+ from .speed import SpeedParams
5
+ import typing
6
+ from ..types.emotion import Emotion
7
+
8
+
9
+ class ControlsParams(typing_extensions.TypedDict):
10
+ speed: SpeedParams
11
+ emotion: typing.Sequence[Emotion]
@@ -0,0 +1,53 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing_extensions
4
+ import typing
5
+ from .tts_request_voice_specifier import TtsRequestVoiceSpecifierParams
6
+ import typing_extensions
7
+ from ..types.supported_language import SupportedLanguage
8
+ from .web_socket_raw_output_format import WebSocketRawOutputFormatParams
9
+ from ..types.context_id import ContextId
10
+ from ...core.serialization import FieldMetadata
11
+
12
+
13
+ class GenerationRequestParams(typing_extensions.TypedDict):
14
+ model_id: str
15
+ """
16
+ The ID of the model to use for the generation. See [Models](/build-with-sonic/models) for available models.
17
+ """
18
+
19
+ transcript: typing.Optional[typing.Any]
20
+ """
21
+ The transcript to generate speech for. This can be a string or an iterator over strings.
22
+ """
23
+
24
+ voice: TtsRequestVoiceSpecifierParams
25
+ language: typing_extensions.NotRequired[SupportedLanguage]
26
+ output_format: WebSocketRawOutputFormatParams
27
+ duration: typing_extensions.NotRequired[float]
28
+ """
29
+ The maximum duration of the audio in seconds. You do not usually need to specify this.
30
+ If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
31
+ """
32
+
33
+ context_id: typing_extensions.NotRequired[ContextId]
34
+ continue_: typing_extensions.NotRequired[typing_extensions.Annotated[bool, FieldMetadata(alias="continue")]]
35
+ """
36
+ Whether this input may be followed by more inputs.
37
+ If not specified, this defaults to `false`.
38
+ """
39
+
40
+ flush: typing_extensions.NotRequired[bool]
41
+ """
42
+ Whether to flush the context.
43
+ """
44
+
45
+ add_timestamps: typing_extensions.NotRequired[bool]
46
+ """
47
+ Whether to return word-level timestamps.
48
+ """
49
+
50
+ add_phoneme_timestamps: typing_extensions.NotRequired[bool]
51
+ """
52
+ Whether to return phoneme-level timestamps.
53
+ """
@@ -0,0 +1,11 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing_extensions
4
+
5
+
6
+ class Mp3OutputFormatParams(typing_extensions.TypedDict):
7
+ sample_rate: int
8
+ bit_rate: int
9
+ """
10
+ The bit rate of the audio in bits per second. Supported bit rates are 32000, 64000, 96000, 128000, 192000.
11
+ """
@@ -0,0 +1,30 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ from __future__ import annotations
4
+ import typing_extensions
5
+ import typing
6
+ from ..types.raw_encoding import RawEncoding
7
+ import typing_extensions
8
+
9
+
10
+ class OutputFormat_RawParams(typing_extensions.TypedDict):
11
+ container: typing.Literal["raw"]
12
+ encoding: RawEncoding
13
+ sample_rate: int
14
+ bit_rate: typing_extensions.NotRequired[int]
15
+
16
+
17
+ class OutputFormat_WavParams(typing_extensions.TypedDict):
18
+ container: typing.Literal["wav"]
19
+ encoding: RawEncoding
20
+ sample_rate: int
21
+ bit_rate: typing_extensions.NotRequired[int]
22
+
23
+
24
+ class OutputFormat_Mp3Params(typing_extensions.TypedDict):
25
+ container: typing.Literal["mp3"]
26
+ sample_rate: int
27
+ bit_rate: int
28
+
29
+
30
+ OutputFormatParams = typing.Union[OutputFormat_RawParams, OutputFormat_WavParams, OutputFormat_Mp3Params]
@@ -0,0 +1,10 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing_extensions
4
+ import typing
5
+
6
+
7
+ class PhonemeTimestampsParams(typing_extensions.TypedDict):
8
+ phonemes: typing.Sequence[str]
9
+ start: typing.Sequence[float]
10
+ end: typing.Sequence[float]
@@ -0,0 +1,11 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing_extensions
4
+ from ..types.raw_encoding import RawEncoding
5
+ import typing_extensions
6
+
7
+
8
+ class RawOutputFormatParams(typing_extensions.TypedDict):
9
+ encoding: RawEncoding
10
+ sample_rate: int
11
+ bit_rate: typing_extensions.NotRequired[int]
@@ -0,0 +1,7 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+ from ..types.numerical_specifier import NumericalSpecifier
5
+ from ..types.natural_specifier import NaturalSpecifier
6
+
7
+ SpeedParams = typing.Union[NumericalSpecifier, NaturalSpecifier]
@@ -0,0 +1,24 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing_extensions
4
+ from .tts_request_voice_specifier import TtsRequestVoiceSpecifierParams
5
+ import typing_extensions
6
+ from ..types.supported_language import SupportedLanguage
7
+ from .output_format import OutputFormatParams
8
+
9
+
10
+ class TtsRequestParams(typing_extensions.TypedDict):
11
+ model_id: str
12
+ """
13
+ The ID of the model to use for the generation. See [Models](/build-with-sonic/models) for available models.
14
+ """
15
+
16
+ transcript: str
17
+ voice: TtsRequestVoiceSpecifierParams
18
+ language: typing_extensions.NotRequired[SupportedLanguage]
19
+ output_format: OutputFormatParams
20
+ duration: typing_extensions.NotRequired[float]
21
+ """
22
+ The maximum duration of the audio in seconds. You do not usually need to specify this.
23
+ If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
24
+ """
@@ -0,0 +1,16 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing_extensions
4
+ import typing
5
+ from ...embedding.types.embedding import Embedding
6
+ import typing_extensions
7
+ from .controls import ControlsParams
8
+ from ...core.serialization import FieldMetadata
9
+
10
+
11
+ class TtsRequestEmbeddingSpecifierParams(typing_extensions.TypedDict):
12
+ mode: typing.Literal["embedding"]
13
+ embedding: Embedding
14
+ experimental_controls: typing_extensions.NotRequired[
15
+ typing_extensions.Annotated[ControlsParams, FieldMetadata(alias="__experimental_controls")]
16
+ ]