cartesia 2.0.0b1__py3-none-any.whl → 2.0.0b7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. cartesia/__init__.py +8 -4
  2. cartesia/base_client.py +0 -4
  3. cartesia/core/__init__.py +3 -0
  4. cartesia/core/client_wrapper.py +2 -2
  5. cartesia/core/pagination.py +88 -0
  6. cartesia/infill/client.py +4 -4
  7. cartesia/tts/_async_websocket.py +48 -1
  8. cartesia/tts/_websocket.py +44 -3
  9. cartesia/tts/client.py +4 -4
  10. cartesia/tts/requests/generation_request.py +5 -0
  11. cartesia/tts/requests/web_socket_chunk_response.py +3 -0
  12. cartesia/tts/requests/web_socket_response.py +2 -1
  13. cartesia/tts/requests/web_socket_tts_request.py +1 -0
  14. cartesia/tts/types/emotion.py +5 -0
  15. cartesia/tts/types/generation_request.py +5 -0
  16. cartesia/tts/types/web_socket_chunk_response.py +3 -1
  17. cartesia/tts/types/web_socket_response.py +2 -1
  18. cartesia/tts/types/web_socket_tts_output.py +2 -0
  19. cartesia/tts/types/web_socket_tts_request.py +1 -0
  20. cartesia/tts/utils/constants.py +2 -2
  21. cartesia/voice_changer/requests/streaming_response.py +2 -0
  22. cartesia/voice_changer/types/streaming_response.py +2 -0
  23. cartesia/voices/__init__.py +8 -4
  24. cartesia/voices/client.py +285 -169
  25. cartesia/voices/requests/__init__.py +2 -0
  26. cartesia/voices/requests/create_voice_request.py +0 -2
  27. cartesia/voices/requests/get_voices_response.py +24 -0
  28. cartesia/voices/requests/localize_dialect.py +1 -3
  29. cartesia/voices/requests/voice.py +13 -9
  30. cartesia/voices/types/__init__.py +6 -4
  31. cartesia/voices/types/create_voice_request.py +0 -2
  32. cartesia/voices/types/gender_presentation.py +5 -0
  33. cartesia/voices/types/get_voices_response.py +34 -0
  34. cartesia/voices/types/localize_dialect.py +1 -3
  35. cartesia/voices/types/voice.py +13 -9
  36. cartesia/voices/types/voice_expand_options.py +5 -0
  37. {cartesia-2.0.0b1.dist-info → cartesia-2.0.0b7.dist-info}/METADATA +151 -49
  38. {cartesia-2.0.0b1.dist-info → cartesia-2.0.0b7.dist-info}/RECORD +39 -37
  39. cartesia/datasets/client.py +0 -392
  40. cartesia/voices/types/localize_portuguese_dialect.py +0 -5
  41. cartesia/voices/types/localize_spanish_dialect.py +0 -5
  42. {cartesia-2.0.0b1.dist-info → cartesia-2.0.0b7.dist-info}/WHEEL +0 -0
@@ -3,6 +3,7 @@
3
3
  from .create_voice_request import CreateVoiceRequestParams
4
4
  from .embedding_response import EmbeddingResponseParams
5
5
  from .embedding_specifier import EmbeddingSpecifierParams
6
+ from .get_voices_response import GetVoicesResponseParams
6
7
  from .id_specifier import IdSpecifierParams
7
8
  from .localize_dialect import LocalizeDialectParams
8
9
  from .localize_voice_request import LocalizeVoiceRequestParams
@@ -16,6 +17,7 @@ __all__ = [
16
17
  "CreateVoiceRequestParams",
17
18
  "EmbeddingResponseParams",
18
19
  "EmbeddingSpecifierParams",
20
+ "GetVoicesResponseParams",
19
21
  "IdSpecifierParams",
20
22
  "LocalizeDialectParams",
21
23
  "LocalizeVoiceRequestParams",
@@ -4,7 +4,6 @@ import typing_extensions
4
4
  from ...embedding.types.embedding import Embedding
5
5
  import typing_extensions
6
6
  from ...tts.types.supported_language import SupportedLanguage
7
- from ..types.base_voice_id import BaseVoiceId
8
7
 
9
8
 
10
9
  class CreateVoiceRequestParams(typing_extensions.TypedDict):
@@ -20,4 +19,3 @@ class CreateVoiceRequestParams(typing_extensions.TypedDict):
20
19
 
21
20
  embedding: Embedding
22
21
  language: typing_extensions.NotRequired[SupportedLanguage]
23
- base_voice_id: typing_extensions.NotRequired[BaseVoiceId]
@@ -0,0 +1,24 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing_extensions
4
+ import typing
5
+ from .voice import VoiceParams
6
+ import typing_extensions
7
+ from ..types.voice_id import VoiceId
8
+
9
+
10
+ class GetVoicesResponseParams(typing_extensions.TypedDict):
11
+ data: typing.Sequence[VoiceParams]
12
+ """
13
+ The paginated list of Voices.
14
+ """
15
+
16
+ has_more: bool
17
+ """
18
+ Whether there are more Voices to fetch (using `starting_after=id`, where id is the ID of the last Voice in the current response).
19
+ """
20
+
21
+ next_page: typing_extensions.NotRequired[VoiceId]
22
+ """
23
+ (Deprecated - use the id of the last Voice in the current response instead.) An ID that can be passed as `starting_after` to get the next page of Voices.
24
+ """
@@ -2,7 +2,5 @@
2
2
 
3
3
  import typing
4
4
  from ..types.localize_english_dialect import LocalizeEnglishDialect
5
- from ..types.localize_spanish_dialect import LocalizeSpanishDialect
6
- from ..types.localize_portuguese_dialect import LocalizePortugueseDialect
7
5
 
8
- LocalizeDialectParams = typing.Union[LocalizeEnglishDialect, LocalizeSpanishDialect, LocalizePortugueseDialect]
6
+ LocalizeDialectParams = typing.Union[LocalizeEnglishDialect]
@@ -2,22 +2,17 @@
2
2
 
3
3
  import typing_extensions
4
4
  from ..types.voice_id import VoiceId
5
- import typing_extensions
6
5
  import datetime as dt
6
+ import typing_extensions
7
7
  from ...embedding.types.embedding import Embedding
8
8
  from ...tts.types.supported_language import SupportedLanguage
9
9
 
10
10
 
11
11
  class VoiceParams(typing_extensions.TypedDict):
12
12
  id: VoiceId
13
- user_id: typing_extensions.NotRequired[str]
14
- """
15
- The ID of the user who owns the voice.
13
+ is_owner: bool
16
14
  """
17
-
18
- is_public: bool
19
- """
20
- Whether the voice is publicly accessible.
15
+ Whether the current user is the owner of the voice.
21
16
  """
22
17
 
23
18
  name: str
@@ -35,5 +30,14 @@ class VoiceParams(typing_extensions.TypedDict):
35
30
  The date and time the voice was created.
36
31
  """
37
32
 
38
- embedding: Embedding
33
+ embedding: typing_extensions.NotRequired[Embedding]
34
+ """
35
+ The vector embedding of the voice. Only included when `expand` includes `embedding`.
36
+ """
37
+
38
+ is_starred: typing_extensions.NotRequired[bool]
39
+ """
40
+ Whether the current user has starred the voice. Only included when `expand` includes `is_starred`.
41
+ """
42
+
39
43
  language: SupportedLanguage
@@ -6,17 +6,18 @@ from .create_voice_request import CreateVoiceRequest
6
6
  from .embedding_response import EmbeddingResponse
7
7
  from .embedding_specifier import EmbeddingSpecifier
8
8
  from .gender import Gender
9
+ from .gender_presentation import GenderPresentation
10
+ from .get_voices_response import GetVoicesResponse
9
11
  from .id_specifier import IdSpecifier
10
12
  from .localize_dialect import LocalizeDialect
11
13
  from .localize_english_dialect import LocalizeEnglishDialect
12
- from .localize_portuguese_dialect import LocalizePortugueseDialect
13
- from .localize_spanish_dialect import LocalizeSpanishDialect
14
14
  from .localize_target_language import LocalizeTargetLanguage
15
15
  from .localize_voice_request import LocalizeVoiceRequest
16
16
  from .mix_voice_specifier import MixVoiceSpecifier
17
17
  from .mix_voices_request import MixVoicesRequest
18
18
  from .update_voice_request import UpdateVoiceRequest
19
19
  from .voice import Voice
20
+ from .voice_expand_options import VoiceExpandOptions
20
21
  from .voice_id import VoiceId
21
22
  from .voice_metadata import VoiceMetadata
22
23
  from .weight import Weight
@@ -28,17 +29,18 @@ __all__ = [
28
29
  "EmbeddingResponse",
29
30
  "EmbeddingSpecifier",
30
31
  "Gender",
32
+ "GenderPresentation",
33
+ "GetVoicesResponse",
31
34
  "IdSpecifier",
32
35
  "LocalizeDialect",
33
36
  "LocalizeEnglishDialect",
34
- "LocalizePortugueseDialect",
35
- "LocalizeSpanishDialect",
36
37
  "LocalizeTargetLanguage",
37
38
  "LocalizeVoiceRequest",
38
39
  "MixVoiceSpecifier",
39
40
  "MixVoicesRequest",
40
41
  "UpdateVoiceRequest",
41
42
  "Voice",
43
+ "VoiceExpandOptions",
42
44
  "VoiceId",
43
45
  "VoiceMetadata",
44
46
  "Weight",
@@ -5,7 +5,6 @@ import pydantic
5
5
  from ...embedding.types.embedding import Embedding
6
6
  import typing
7
7
  from ...tts.types.supported_language import SupportedLanguage
8
- from .base_voice_id import BaseVoiceId
9
8
  from ...core.pydantic_utilities import IS_PYDANTIC_V2
10
9
 
11
10
 
@@ -22,7 +21,6 @@ class CreateVoiceRequest(UniversalBaseModel):
22
21
 
23
22
  embedding: Embedding
24
23
  language: typing.Optional[SupportedLanguage] = None
25
- base_voice_id: typing.Optional[BaseVoiceId] = None
26
24
 
27
25
  if IS_PYDANTIC_V2:
28
26
  model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
@@ -0,0 +1,5 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+
5
+ GenderPresentation = typing.Union[typing.Literal["masculine", "feminine", "gender_neutral"], typing.Any]
@@ -0,0 +1,34 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ from ...core.pydantic_utilities import UniversalBaseModel
4
+ import typing
5
+ from .voice import Voice
6
+ import pydantic
7
+ from .voice_id import VoiceId
8
+ from ...core.pydantic_utilities import IS_PYDANTIC_V2
9
+
10
+
11
+ class GetVoicesResponse(UniversalBaseModel):
12
+ data: typing.List[Voice] = pydantic.Field()
13
+ """
14
+ The paginated list of Voices.
15
+ """
16
+
17
+ has_more: bool = pydantic.Field()
18
+ """
19
+ Whether there are more Voices to fetch (using `starting_after=id`, where id is the ID of the last Voice in the current response).
20
+ """
21
+
22
+ next_page: typing.Optional[VoiceId] = pydantic.Field(default=None)
23
+ """
24
+ (Deprecated - use the id of the last Voice in the current response instead.) An ID that can be passed as `starting_after` to get the next page of Voices.
25
+ """
26
+
27
+ if IS_PYDANTIC_V2:
28
+ model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
29
+ else:
30
+
31
+ class Config:
32
+ frozen = True
33
+ smart_union = True
34
+ extra = pydantic.Extra.allow
@@ -2,7 +2,5 @@
2
2
 
3
3
  import typing
4
4
  from .localize_english_dialect import LocalizeEnglishDialect
5
- from .localize_spanish_dialect import LocalizeSpanishDialect
6
- from .localize_portuguese_dialect import LocalizePortugueseDialect
7
5
 
8
- LocalizeDialect = typing.Union[LocalizeEnglishDialect, LocalizeSpanishDialect, LocalizePortugueseDialect]
6
+ LocalizeDialect = typing.Union[LocalizeEnglishDialect]
@@ -2,9 +2,9 @@
2
2
 
3
3
  from ...core.pydantic_utilities import UniversalBaseModel
4
4
  from .voice_id import VoiceId
5
- import typing
6
5
  import pydantic
7
6
  import datetime as dt
7
+ import typing
8
8
  from ...embedding.types.embedding import Embedding
9
9
  from ...tts.types.supported_language import SupportedLanguage
10
10
  from ...core.pydantic_utilities import IS_PYDANTIC_V2
@@ -12,14 +12,9 @@ from ...core.pydantic_utilities import IS_PYDANTIC_V2
12
12
 
13
13
  class Voice(UniversalBaseModel):
14
14
  id: VoiceId
15
- user_id: typing.Optional[str] = pydantic.Field(default=None)
16
- """
17
- The ID of the user who owns the voice.
15
+ is_owner: bool = pydantic.Field()
18
16
  """
19
-
20
- is_public: bool = pydantic.Field()
21
- """
22
- Whether the voice is publicly accessible.
17
+ Whether the current user is the owner of the voice.
23
18
  """
24
19
 
25
20
  name: str = pydantic.Field()
@@ -37,7 +32,16 @@ class Voice(UniversalBaseModel):
37
32
  The date and time the voice was created.
38
33
  """
39
34
 
40
- embedding: Embedding
35
+ embedding: typing.Optional[Embedding] = pydantic.Field(default=None)
36
+ """
37
+ The vector embedding of the voice. Only included when `expand` includes `embedding`.
38
+ """
39
+
40
+ is_starred: typing.Optional[bool] = pydantic.Field(default=None)
41
+ """
42
+ Whether the current user has starred the voice. Only included when `expand` includes `is_starred`.
43
+ """
44
+
41
45
  language: SupportedLanguage
42
46
 
43
47
  if IS_PYDANTIC_V2:
@@ -0,0 +1,5 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+
5
+ VoiceExpandOptions = typing.Union[typing.Literal["embedding", "is_starred"], typing.Any]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cartesia
3
- Version: 2.0.0b1
3
+ Version: 2.0.0b7
4
4
  Summary:
5
5
  Requires-Python: >=3.8,<4.0
6
6
  Classifier: Intended Audience :: Developers
@@ -57,7 +57,7 @@ A full reference for this library is available [here](./reference.md).
57
57
  from cartesia import Cartesia
58
58
  import os
59
59
 
60
- client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
60
+ client = Cartesia(api_key=os.getenv("CARTESIA_API_KEY"))
61
61
 
62
62
  # Get all available voices
63
63
  voices = client.voices.list()
@@ -65,21 +65,32 @@ print(voices)
65
65
 
66
66
  # Get a specific voice
67
67
  voice = client.voices.get(id="a0e99841-438c-4a64-b679-ae501e7d6091")
68
- print("The embedding for", voice["name"], "is", voice["embedding"])
68
+ print("The embedding for", voice.name, "is", voice.embedding)
69
69
 
70
- # Clone a voice using filepath
71
- cloned_voice_embedding = client.voices.clone(filepath="path/to/voice")
70
+ # Clone a voice using file data
71
+ cloned_voice = client.voices.clone(
72
+ clip=open("path/to/voice.wav", "rb"),
73
+ name="Test cloned voice",
74
+ language="en",
75
+ mode="similarity", # or "stability"
76
+ enhance=False, # use enhance=True to clean and denoise the cloning audio
77
+ description="Test voice description"
78
+ )
72
79
 
73
80
  # Mix voices together
74
- mixed_voice_embedding = client.voices.mix(
75
- [{ "id": "voice_id_1", "weight": 0.5 }, { "id": "voice_id_2", "weight": 0.25 }, { "id": "voice_id_3", "weight": 0.25 }]
81
+ mixed_voice = client.voices.mix(
82
+ voices=[
83
+ {"id": "voice_id_1", "weight": 0.25},
84
+ {"id": "voice_id_2", "weight": 0.75}
85
+ ]
76
86
  )
77
87
 
78
- # Create a new voice
88
+ # Create a new voice from embedding
79
89
  new_voice = client.voices.create(
80
- name="New Voice",
81
- description="A clone of my own voice",
82
- embedding=cloned_voice_embedding,
90
+ name="Test Voice",
91
+ description="Test voice description",
92
+ embedding=[...], # List[float] with 192 dimensions
93
+ language="en"
83
94
  )
84
95
  ```
85
96
 
@@ -90,15 +101,22 @@ Instantiate and use the client with the following:
90
101
  ```python
91
102
  from cartesia import Cartesia
92
103
  from cartesia.tts import OutputFormat_Raw, TtsRequestIdSpecifier
104
+ import os
93
105
 
94
106
  client = Cartesia(
95
- api_key="YOUR_API_KEY",
107
+ api_key=os.getenv("CARTESIA_API_KEY"),
96
108
  )
97
109
  client.tts.bytes(
98
- model_id="sonic-english",
110
+ model_id="sonic-2",
99
111
  transcript="Hello, world!",
100
- voice={"id": "694f9389-aac1-45b6-b726-9d9369183238"},
101
- ),
112
+ voice={
113
+ "mode": "id",
114
+ "id": "694f9389-aac1-45b6-b726-9d9369183238",
115
+ "experimental_controls": {
116
+ "speed": 0.5, # range between [-1.0, 1.0], or "slow", "fastest", etc.
117
+ "emotion": ["positivity", "curiosity:low"] # list of emotions with optional intensity
118
+ }
119
+ },
102
120
  language="en",
103
121
  output_format={
104
122
  "container": "raw",
@@ -114,18 +132,18 @@ The SDK also exports an `async` client so that you can make non-blocking calls t
114
132
 
115
133
  ```python
116
134
  import asyncio
135
+ import os
117
136
 
118
137
  from cartesia import AsyncCartesia
119
138
  from cartesia.tts import OutputFormat_Raw, TtsRequestIdSpecifier
120
139
 
121
140
  client = AsyncCartesia(
122
- api_key="YOUR_API_KEY",
141
+ api_key=os.getenv("CARTESIA_API_KEY"),
123
142
  )
124
143
 
125
-
126
144
  async def main() -> None:
127
- await client.tts.bytes(
128
- model_id="sonic-english",
145
+ async for output in client.tts.bytes(
146
+ model_id="sonic-2",
129
147
  transcript="Hello, world!",
130
148
  voice={"id": "694f9389-aac1-45b6-b726-9d9369183238"},
131
149
  language="en",
@@ -134,7 +152,8 @@ async def main() -> None:
134
152
  "sample_rate": 44100,
135
153
  "encoding": "pcm_f32le",
136
154
  },
137
- )
155
+ ):
156
+ print(f"Received chunk of size: {len(output)}")
138
157
 
139
158
 
140
159
  asyncio.run(main())
@@ -162,26 +181,38 @@ The SDK supports streaming responses, as well, the response will be a generator
162
181
  ```python
163
182
  from cartesia import Cartesia
164
183
  from cartesia.tts import Controls, OutputFormat_RawParams, TtsRequestIdSpecifierParams
184
+ import os
165
185
 
166
- client = Cartesia(
167
- api_key="YOUR_API_KEY",
168
- )
169
- response = client.tts.sse(
170
- model_id="string",
171
- transcript="string",
172
- voice={
173
- "id": "string",
174
- "experimental_controls": {
175
- speed=1.1,
176
- emotion="anger:lowest",
186
+ def get_tts_chunks():
187
+ client = Cartesia(
188
+ api_key=os.getenv("CARTESIA_API_KEY"),
189
+ )
190
+ response = client.tts.sse(
191
+ model_id="sonic-2",
192
+ transcript="Hello world!",
193
+ voice={
194
+ "id": "f9836c6e-a0bd-460e-9d3c-f7299fa60f94",
195
+ "experimental_controls": {
196
+ "speed": "normal",
197
+ "emotion": [],
198
+ },
177
199
  },
178
- },
179
- language="en",
180
- output_format={},
181
- duration=1.1,
182
- )
183
- for chunk in response:
184
- yield chunk
200
+ language="en",
201
+ output_format={
202
+ "container": "raw",
203
+ "encoding": "pcm_f32le",
204
+ "sample_rate": 44100,
205
+ },
206
+ )
207
+
208
+ audio_chunks = []
209
+ for chunk in response:
210
+ audio_chunks.append(chunk)
211
+ return audio_chunks
212
+
213
+ chunks = get_tts_chunks()
214
+ for chunk in chunks:
215
+ print(f"Received chunk of size: {len(chunk.data)}")
185
216
  ```
186
217
 
187
218
  ## WebSocket
@@ -190,16 +221,16 @@ for chunk in response:
190
221
  from cartesia import Cartesia
191
222
  from cartesia.tts import TtsRequestEmbeddingSpecifierParams, OutputFormat_RawParams
192
223
  import pyaudio
224
+ import os
193
225
 
194
226
  client = Cartesia(
195
- api_key="YOUR_API_KEY",
227
+ api_key=os.getenv("CARTESIA_API_KEY"),
196
228
  )
197
229
  voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
198
- voice = client.voices.get(id=voice_id)
199
230
  transcript = "Hello! Welcome to Cartesia"
200
231
 
201
232
  # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
202
- model_id = "sonic-english"
233
+ model_id = "sonic-2"
203
234
 
204
235
  p = pyaudio.PyAudio()
205
236
  rate = 22050
@@ -213,11 +244,11 @@ ws = client.tts.websocket()
213
244
  for output in ws.send(
214
245
  model_id=model_id,
215
246
  transcript=transcript,
216
- voice={"embedding": voice.embedding},
247
+ voice={"id": voice_id},
217
248
  stream=True,
218
249
  output_format={
219
250
  "container": "raw",
220
- "encoding": "pcm_f32le",
251
+ "encoding": "pcm_f32le",
221
252
  "sample_rate": 22050
222
253
  },
223
254
  ):
@@ -236,6 +267,55 @@ p.terminate()
236
267
  ws.close() # Close the websocket connection
237
268
  ```
238
269
 
270
+ ## Requesting Timestamps
271
+
272
+ ```python
273
+ import asyncio
274
+ from cartesia import AsyncCartesia
275
+ import os
276
+
277
+ async def main():
278
+ client = AsyncCartesia(api_key=os.getenv("CARTESIA_API_KEY"))
279
+
280
+ # Connect to the websocket
281
+ ws = await client.tts.websocket()
282
+
283
+ # Generate speech with timestamps
284
+ output_generate = await ws.send(
285
+ model_id="sonic-2",
286
+ transcript="Hello! Welcome to Cartesia's text-to-speech.",
287
+ voice={"id": "f9836c6e-a0bd-460e-9d3c-f7299fa60f94"},
288
+ output_format={
289
+ "container": "raw",
290
+ "encoding": "pcm_f32le",
291
+ "sample_rate": 44100
292
+ },
293
+ add_timestamps=True, # Enable word-level timestamps
294
+ stream=True
295
+ )
296
+
297
+ # Process the streaming response with timestamps
298
+ all_words = []
299
+ all_starts = []
300
+ all_ends = []
301
+ audio_chunks = []
302
+
303
+ async for out in output_generate:
304
+ # Collect audio data
305
+ if out.audio is not None:
306
+ audio_chunks.append(out.audio)
307
+
308
+ # Process timestamp data
309
+ if out.word_timestamps is not None:
310
+ all_words.extend(out.word_timestamps.words) # List of words
311
+ all_starts.extend(out.word_timestamps.start) # Start time for each word (seconds)
312
+ all_ends.extend(out.word_timestamps.end) # End time for each word (seconds)
313
+
314
+ await ws.close()
315
+
316
+ asyncio.run(main())
317
+ ```
318
+
239
319
  ## Advanced
240
320
 
241
321
  ### Retries
@@ -297,11 +377,33 @@ client = Cartesia(
297
377
 
298
378
  ## Contributing
299
379
 
300
- While we value open-source contributions to this SDK, this library is generated programmatically.
301
- Additions made directly to this library would have to be moved over to our generation code,
302
- otherwise they would be overwritten upon the next generated release. Feel free to open a PR as
303
- a proof of concept, but know that we will not be able to merge it as-is. We suggest opening
304
- an issue first to discuss with us!
380
+ Note that most of this library is generated programmatically from
381
+ <https://github.com/cartesia-ai/docs> before making edits to a file, verify it's not autogenerated
382
+ by checking for this comment at the top of the file:
383
+
384
+ ```
385
+ # This file was auto-generated by Fern from our API Definition.
386
+ ```
387
+
388
+ ### Running tests
389
+
390
+ ```sh
391
+ uv pip install -r requirements.txt
392
+ uv run pytest -rP -vv tests/custom/test_client.py::test_get_voices
393
+ ```
394
+ ### Manually generating SDK code from docs
395
+
396
+ Assuming all your repos are cloned into your home directory:
397
+
398
+ ```sh
399
+ $ cd ~/docs
400
+ $ fern generate --group python-sdk --log-level debug --api version-2024-11-13 --preview
401
+ $ cd ~/cartesia-python
402
+ $ git pull ~/docs/fern/apis/version-2024-11-13/.preview/fern-python-sdk
403
+ $ git commit --amend -m "manually regenerate from docs" # optional
404
+ ```
405
+
406
+ ### Automatically generating new SDK releases
305
407
 
306
- On the other hand, contributions to the README are always very welcome!
408
+ From https://github.com/cartesia-ai/docs click `Actions` then `Release Python SDK`. (Requires permissions.)
307
409