cartesia 0.0.5rc1__tar.gz → 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cartesia
3
- Version: 0.0.5rc1
3
+ Version: 0.1.0
4
4
  Summary: The official Python library for the Cartesia API.
5
5
  Home-page:
6
6
  Author: Cartesia, Inc.
@@ -10,31 +10,8 @@ Classifier: Programming Language :: Python :: 3
10
10
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
11
11
  Requires-Python: >=3.8.0
12
12
  Description-Content-Type: text/markdown
13
- Requires-Dist: aiohttp
14
- Requires-Dist: httpx
15
- Requires-Dist: pytest-asyncio
16
- Requires-Dist: requests
17
- Requires-Dist: websockets
18
13
  Provides-Extra: dev
19
- Requires-Dist: pre-commit; extra == "dev"
20
- Requires-Dist: docformatter; extra == "dev"
21
- Requires-Dist: black==24.1.1; extra == "dev"
22
- Requires-Dist: isort==5.13.2; extra == "dev"
23
- Requires-Dist: flake8==7.0.0; extra == "dev"
24
- Requires-Dist: flake8-bugbear==24.2.6; extra == "dev"
25
- Requires-Dist: pytest>=8.0.2; extra == "dev"
26
- Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
27
- Requires-Dist: twine; extra == "dev"
28
14
  Provides-Extra: all
29
- Requires-Dist: pre-commit; extra == "all"
30
- Requires-Dist: docformatter; extra == "all"
31
- Requires-Dist: black==24.1.1; extra == "all"
32
- Requires-Dist: isort==5.13.2; extra == "all"
33
- Requires-Dist: flake8==7.0.0; extra == "all"
34
- Requires-Dist: flake8-bugbear==24.2.6; extra == "all"
35
- Requires-Dist: pytest>=8.0.2; extra == "all"
36
- Requires-Dist: pytest-cov>=4.1.0; extra == "all"
37
- Requires-Dist: twine; extra == "all"
38
15
 
39
16
 
40
17
  # Cartesia Python API Library
@@ -60,13 +37,14 @@ client = CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
60
37
  voices = client.get_voices()
61
38
  voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
62
39
  transcript = "Hello! Welcome to Cartesia"
40
+ model_id = "genial-planet-1346" # (Optional) We'll specify a default if you don't have a specific model in mind
63
41
 
64
42
  p = pyaudio.PyAudio()
65
43
 
66
44
  stream = None
67
45
 
68
46
  # Generate and stream audio
69
- for output in client.generate(transcript=transcript, voice=voice, stream=True):
47
+ for output in client.generate(transcript=transcript, voice=voice, model_id=model_id, stream=True):
70
48
  buffer = output["audio"]
71
49
  rate = output["sampling_rate"]
72
50
 
@@ -84,26 +62,68 @@ stream.close()
84
62
  p.terminate()
85
63
  ```
86
64
 
87
- If you are using Jupyter Notebook or JupyterLab, you can use IPython.display.Audio to play the generated audio directly in the notebook. Here's an example:
65
+ You can also use the async client if you want to make asynchronous API calls:
66
+ ```python
67
+ from cartesia.tts import AsyncCartesiaTTS
68
+ import asyncio
69
+ import pyaudio
70
+ import os
71
+
72
+ async def write_stream():
73
+ client = AsyncCartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
74
+ voices = client.get_voices()
75
+ voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
76
+ transcript = "Hello! Welcome to Cartesia"
77
+ model_id = "genial-planet-1346" # (Optional) We'll specify a default if you don't have a specific model in mind
78
+
79
+ p = pyaudio.PyAudio()
80
+
81
+ stream = None
82
+
83
+ # Generate and stream audio
84
+ async for output in await client.generate(transcript=transcript, voice=voice, model_id=model_id, stream=True):
85
+ buffer = output["audio"]
86
+ rate = output["sampling_rate"]
87
+
88
+ if not stream:
89
+ stream = p.open(format=pyaudio.paFloat32,
90
+ channels=1,
91
+ rate=rate,
92
+ output=True)
93
+
94
+ # Write the audio data to the stream
95
+ stream.write(buffer)
96
+
97
+ stream.stop_stream()
98
+ stream.close()
99
+ p.terminate()
100
+
101
+ asyncio.run(write_stream())
102
+ ```
103
+
104
+ If you are using Jupyter Notebook or JupyterLab, you can use IPython.display.Audio to play the generated audio directly in the notebook.
105
+ Additionally, in these notebook examples we show how to use the client as a context manager (though this is not required).
88
106
 
89
107
  ```python
90
- from cartesia.tts import CartesiaTTS
91
108
  from IPython.display import Audio
92
109
  import io
93
110
  import os
111
+ import numpy as np
94
112
 
95
- client = CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
96
- voices = client.get_voices()
97
- voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
98
- transcript = "Hello! Welcome to Cartesia"
113
+ from cartesia.tts import CartesiaTTS
99
114
 
100
- # Create a BytesIO object to store the audio data
101
- audio_data = io.BytesIO()
115
+ with CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY")) as client:
116
+ voices = client.get_voices()
117
+ voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
118
+ transcript = "Hello! Welcome to Cartesia"
102
119
 
103
- # Generate and stream audio
104
- for output in client.generate(transcript=transcript, voice=voice, stream=True):
105
- buffer = output["audio"]
106
- audio_data.write(buffer)
120
+ # Create a BytesIO object to store the audio data
121
+ audio_data = io.BytesIO()
122
+
123
+ # Generate and stream audio
124
+ for output in client.generate(transcript=transcript, voice=voice, stream=True):
125
+ buffer = output["audio"]
126
+ audio_data.write(buffer)
107
127
 
108
128
  # Set the cursor position to the beginning of the BytesIO object
109
129
  audio_data.seek(0)
@@ -115,25 +135,27 @@ audio = Audio(np.frombuffer(audio_data.read(), dtype=np.float32), rate=output["s
115
135
  display(audio)
116
136
  ```
117
137
 
118
- You can also use the async client if you want to make asynchronous API calls. The usage is very similar:
138
+ Below is the same example using the async client:
119
139
  ```python
120
- from cartesia.tts import AsyncCartesiaTTS
121
140
  from IPython.display import Audio
122
141
  import io
123
142
  import os
143
+ import numpy as np
124
144
 
125
- client = AsyncCartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
126
- voices = client.get_voices()
127
- voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
128
- transcript = "Hello! Welcome to Cartesia"
145
+ from cartesia.tts import AsyncCartesiaTTS
129
146
 
130
- # Create a BytesIO object to store the audio data
131
- audio_data = io.BytesIO()
147
+ async with AsyncCartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY")) as client:
148
+ voices = client.get_voices()
149
+ voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
150
+ transcript = "Hello! Welcome to Cartesia"
132
151
 
133
- # Generate and stream audio
134
- async for output in client.generate(transcript=transcript, voice=voice, stream=True):
135
- buffer = output["audio"]
136
- audio_data.write(buffer)
152
+ # Create a BytesIO object to store the audio data
153
+ audio_data = io.BytesIO()
154
+
155
+ # Generate and stream audio
156
+ async for output in await client.generate(transcript=transcript, voice=voice, stream=True):
157
+ buffer = output["audio"]
158
+ audio_data.write(buffer)
137
159
 
138
160
  # Set the cursor position to the beginning of the BytesIO object
139
161
  audio_data.seek(0)
@@ -21,13 +21,14 @@ client = CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
21
21
  voices = client.get_voices()
22
22
  voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
23
23
  transcript = "Hello! Welcome to Cartesia"
24
+ model_id = "genial-planet-1346" # (Optional) We'll specify a default if you don't have a specific model in mind
24
25
 
25
26
  p = pyaudio.PyAudio()
26
27
 
27
28
  stream = None
28
29
 
29
30
  # Generate and stream audio
30
- for output in client.generate(transcript=transcript, voice=voice, stream=True):
31
+ for output in client.generate(transcript=transcript, voice=voice, model_id=model_id, stream=True):
31
32
  buffer = output["audio"]
32
33
  rate = output["sampling_rate"]
33
34
 
@@ -45,26 +46,68 @@ stream.close()
45
46
  p.terminate()
46
47
  ```
47
48
 
48
- If you are using Jupyter Notebook or JupyterLab, you can use IPython.display.Audio to play the generated audio directly in the notebook. Here's an example:
49
+ You can also use the async client if you want to make asynchronous API calls:
50
+ ```python
51
+ from cartesia.tts import AsyncCartesiaTTS
52
+ import asyncio
53
+ import pyaudio
54
+ import os
55
+
56
+ async def write_stream():
57
+ client = AsyncCartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
58
+ voices = client.get_voices()
59
+ voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
60
+ transcript = "Hello! Welcome to Cartesia"
61
+ model_id = "genial-planet-1346" # (Optional) We'll specify a default if you don't have a specific model in mind
62
+
63
+ p = pyaudio.PyAudio()
64
+
65
+ stream = None
66
+
67
+ # Generate and stream audio
68
+ async for output in await client.generate(transcript=transcript, voice=voice, model_id=model_id, stream=True):
69
+ buffer = output["audio"]
70
+ rate = output["sampling_rate"]
71
+
72
+ if not stream:
73
+ stream = p.open(format=pyaudio.paFloat32,
74
+ channels=1,
75
+ rate=rate,
76
+ output=True)
77
+
78
+ # Write the audio data to the stream
79
+ stream.write(buffer)
80
+
81
+ stream.stop_stream()
82
+ stream.close()
83
+ p.terminate()
84
+
85
+ asyncio.run(write_stream())
86
+ ```
87
+
88
+ If you are using Jupyter Notebook or JupyterLab, you can use IPython.display.Audio to play the generated audio directly in the notebook.
89
+ Additionally, in these notebook examples we show how to use the client as a context manager (though this is not required).
49
90
 
50
91
  ```python
51
- from cartesia.tts import CartesiaTTS
52
92
  from IPython.display import Audio
53
93
  import io
54
94
  import os
95
+ import numpy as np
55
96
 
56
- client = CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
57
- voices = client.get_voices()
58
- voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
59
- transcript = "Hello! Welcome to Cartesia"
97
+ from cartesia.tts import CartesiaTTS
60
98
 
61
- # Create a BytesIO object to store the audio data
62
- audio_data = io.BytesIO()
99
+ with CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY")) as client:
100
+ voices = client.get_voices()
101
+ voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
102
+ transcript = "Hello! Welcome to Cartesia"
63
103
 
64
- # Generate and stream audio
65
- for output in client.generate(transcript=transcript, voice=voice, stream=True):
66
- buffer = output["audio"]
67
- audio_data.write(buffer)
104
+ # Create a BytesIO object to store the audio data
105
+ audio_data = io.BytesIO()
106
+
107
+ # Generate and stream audio
108
+ for output in client.generate(transcript=transcript, voice=voice, stream=True):
109
+ buffer = output["audio"]
110
+ audio_data.write(buffer)
68
111
 
69
112
  # Set the cursor position to the beginning of the BytesIO object
70
113
  audio_data.seek(0)
@@ -76,25 +119,27 @@ audio = Audio(np.frombuffer(audio_data.read(), dtype=np.float32), rate=output["s
76
119
  display(audio)
77
120
  ```
78
121
 
79
- You can also use the async client if you want to make asynchronous API calls. The usage is very similar:
122
+ Below is the same example using the async client:
80
123
  ```python
81
- from cartesia.tts import AsyncCartesiaTTS
82
124
  from IPython.display import Audio
83
125
  import io
84
126
  import os
127
+ import numpy as np
85
128
 
86
- client = AsyncCartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
87
- voices = client.get_voices()
88
- voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
89
- transcript = "Hello! Welcome to Cartesia"
129
+ from cartesia.tts import AsyncCartesiaTTS
90
130
 
91
- # Create a BytesIO object to store the audio data
92
- audio_data = io.BytesIO()
131
+ async with AsyncCartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY")) as client:
132
+ voices = client.get_voices()
133
+ voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
134
+ transcript = "Hello! Welcome to Cartesia"
93
135
 
94
- # Generate and stream audio
95
- async for output in client.generate(transcript=transcript, voice=voice, stream=True):
96
- buffer = output["audio"]
97
- audio_data.write(buffer)
136
+ # Create a BytesIO object to store the audio data
137
+ audio_data = io.BytesIO()
138
+
139
+ # Generate and stream audio
140
+ async for output in await client.generate(transcript=transcript, voice=voice, stream=True):
141
+ buffer = output["audio"]
142
+ audio_data.write(buffer)
98
143
 
99
144
  # Set the cursor position to the beginning of the BytesIO object
100
145
  audio_data.seek(0)
@@ -0,0 +1,3 @@
1
+ from cartesia.tts import AsyncCartesiaTTS, CartesiaTTS
2
+
3
+ __all__ = ["CartesiaTTS", "AsyncCartesiaTTS"]
@@ -0,0 +1,42 @@
1
+ from enum import Enum
2
+ from typing import List, Optional, TypedDict, Union
3
+
4
+ try:
5
+ import numpy as np
6
+
7
+ _NUMPY_AVAILABLE = True
8
+ except ImportError:
9
+ _NUMPY_AVAILABLE = False
10
+
11
+
12
+ class AudioDataReturnType(Enum):
13
+ BYTES = "bytes"
14
+ ARRAY = "array"
15
+
16
+
17
+ class AudioOutputFormat(Enum):
18
+ """Supported output formats for the audio."""
19
+
20
+ FP32 = "fp32" # float32
21
+ PCM = "pcm" # 16-bit signed integer PCM
22
+ FP32_16000 = "fp32_16000" # float32, 16 kHz
23
+ FP32_22050 = "fp32_22050" # float32, 22.05 kHz
24
+ FP32_44100 = "fp32_44100" # float32, 44.1 kHz
25
+ PCM_16000 = "pcm_16000" # 16-bit signed integer PCM, 16 kHz
26
+ PCM_22050 = "pcm_22050" # 16-bit signed integer PCM, 22.05 kHz
27
+ PCM_44100 = "pcm_44100" # 16-bit signed integer PCM, 44.1 kHz
28
+
29
+
30
+ class AudioOutput(TypedDict):
31
+ audio: Union[bytes, "np.ndarray"]
32
+ sampling_rate: int
33
+
34
+
35
+ Embedding = List[float]
36
+
37
+
38
+ class VoiceMetadata(TypedDict):
39
+ id: str
40
+ name: str
41
+ description: str
42
+ embedding: Optional[Embedding]