livekit-plugins-cartesia 1.0.0.dev5__py3-none-any.whl → 1.0.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit/plugins/cartesia/models.py +1 -1
- livekit/plugins/cartesia/tts.py +53 -35
- livekit/plugins/cartesia/version.py +1 -1
- {livekit_plugins_cartesia-1.0.0.dev5.dist-info → livekit_plugins_cartesia-1.0.0rc2.dist-info}/METADATA +2 -2
- livekit_plugins_cartesia-1.0.0rc2.dist-info/RECORD +9 -0
- livekit_plugins_cartesia-1.0.0.dev5.dist-info/RECORD +0 -9
- {livekit_plugins_cartesia-1.0.0.dev5.dist-info → livekit_plugins_cartesia-1.0.0rc2.dist-info}/WHEEL +0 -0
@@ -8,7 +8,7 @@ TTSEncoding = Literal[
|
|
8
8
|
# "pcm_alaw",
|
9
9
|
]
|
10
10
|
|
11
|
-
TTSModels = Literal["sonic-
|
11
|
+
TTSModels = Literal["sonic", "sonic-2", "sonic-lite", "sonic-preview", "sonic-turbo"]
|
12
12
|
TTSLanguages = Literal["en", "es", "fr", "de", "pt", "zh", "ja"]
|
13
13
|
TTSDefaultVoiceId = "794f9389-aac1-45b6-b726-9d9369183238"
|
14
14
|
TTSVoiceSpeed = Literal["fastest", "fast", "normal", "slow", "slowest"]
|
livekit/plugins/cartesia/tts.py
CHANGED
@@ -33,9 +33,21 @@ from livekit.agents import (
|
|
33
33
|
tts,
|
34
34
|
utils,
|
35
35
|
)
|
36
|
+
from livekit.agents.types import (
|
37
|
+
DEFAULT_API_CONNECT_OPTIONS,
|
38
|
+
NOT_GIVEN,
|
39
|
+
NotGivenOr,
|
40
|
+
)
|
41
|
+
from livekit.agents.utils import is_given
|
36
42
|
|
37
43
|
from .log import logger
|
38
|
-
from .models import
|
44
|
+
from .models import (
|
45
|
+
TTSDefaultVoiceId,
|
46
|
+
TTSEncoding,
|
47
|
+
TTSModels,
|
48
|
+
TTSVoiceEmotion,
|
49
|
+
TTSVoiceSpeed,
|
50
|
+
)
|
39
51
|
|
40
52
|
API_AUTH_HEADER = "X-API-Key"
|
41
53
|
API_VERSION_HEADER = "Cartesia-Version"
|
@@ -51,8 +63,8 @@ class _TTSOptions:
|
|
51
63
|
encoding: TTSEncoding
|
52
64
|
sample_rate: int
|
53
65
|
voice: str | list[float]
|
54
|
-
speed: TTSVoiceSpeed | float
|
55
|
-
emotion: list[TTSVoiceEmotion | str]
|
66
|
+
speed: NotGivenOr[TTSVoiceSpeed | float]
|
67
|
+
emotion: NotGivenOr[list[TTSVoiceEmotion | str]]
|
56
68
|
api_key: str
|
57
69
|
language: str
|
58
70
|
base_url: str
|
@@ -68,14 +80,14 @@ class TTS(tts.TTS):
|
|
68
80
|
def __init__(
|
69
81
|
self,
|
70
82
|
*,
|
71
|
-
model: TTSModels | str = "sonic",
|
83
|
+
model: TTSModels | str = "sonic-2",
|
72
84
|
language: str = "en",
|
73
85
|
encoding: TTSEncoding = "pcm_s16le",
|
74
86
|
voice: str | list[float] = TTSDefaultVoiceId,
|
75
|
-
speed: TTSVoiceSpeed | float
|
76
|
-
emotion: list[TTSVoiceEmotion | str]
|
87
|
+
speed: NotGivenOr[TTSVoiceSpeed | float] = NOT_GIVEN,
|
88
|
+
emotion: NotGivenOr[list[TTSVoiceEmotion | str]] = NOT_GIVEN,
|
77
89
|
sample_rate: int = 24000,
|
78
|
-
api_key: str
|
90
|
+
api_key: NotGivenOr[str] = NOT_GIVEN,
|
79
91
|
http_session: aiohttp.ClientSession | None = None,
|
80
92
|
base_url: str = "https://api.cartesia.ai",
|
81
93
|
) -> None:
|
@@ -85,7 +97,7 @@ class TTS(tts.TTS):
|
|
85
97
|
See https://docs.cartesia.ai/reference/web-socket/stream-speech/stream-speech for more details on the the Cartesia API.
|
86
98
|
|
87
99
|
Args:
|
88
|
-
model (TTSModels, optional): The Cartesia TTS model to use. Defaults to "sonic-
|
100
|
+
model (TTSModels, optional): The Cartesia TTS model to use. Defaults to "sonic-2".
|
89
101
|
language (str, optional): The language code for synthesis. Defaults to "en".
|
90
102
|
encoding (TTSEncoding, optional): The audio encoding format. Defaults to "pcm_s16le".
|
91
103
|
voice (str | list[float], optional): The voice ID or embedding array.
|
@@ -95,16 +107,15 @@ class TTS(tts.TTS):
|
|
95
107
|
api_key (str, optional): The Cartesia API key. If not provided, it will be read from the CARTESIA_API_KEY environment variable.
|
96
108
|
http_session (aiohttp.ClientSession | None, optional): An existing aiohttp ClientSession to use. If not provided, a new session will be created.
|
97
109
|
base_url (str, optional): The base URL for the Cartesia API. Defaults to "https://api.cartesia.ai".
|
98
|
-
"""
|
110
|
+
""" # noqa: E501
|
99
111
|
|
100
112
|
super().__init__(
|
101
113
|
capabilities=tts.TTSCapabilities(streaming=True),
|
102
114
|
sample_rate=sample_rate,
|
103
115
|
num_channels=NUM_CHANNELS,
|
104
116
|
)
|
105
|
-
|
106
|
-
|
107
|
-
if not api_key:
|
117
|
+
cartesia_api_key = api_key if is_given(api_key) else os.environ.get("CARTESIA_API_KEY")
|
118
|
+
if not cartesia_api_key:
|
108
119
|
raise ValueError("CARTESIA_API_KEY must be set")
|
109
120
|
|
110
121
|
self._opts = _TTSOptions(
|
@@ -115,7 +126,7 @@ class TTS(tts.TTS):
|
|
115
126
|
voice=voice,
|
116
127
|
speed=speed,
|
117
128
|
emotion=emotion,
|
118
|
-
api_key=
|
129
|
+
api_key=cartesia_api_key,
|
119
130
|
base_url=base_url,
|
120
131
|
)
|
121
132
|
self._session = http_session
|
@@ -149,11 +160,11 @@ class TTS(tts.TTS):
|
|
149
160
|
def update_options(
|
150
161
|
self,
|
151
162
|
*,
|
152
|
-
model: TTSModels |
|
153
|
-
language: str
|
154
|
-
voice: str | list[float]
|
155
|
-
speed: TTSVoiceSpeed | float
|
156
|
-
emotion: list[TTSVoiceEmotion | str]
|
163
|
+
model: NotGivenOr[TTSModels | str] = NOT_GIVEN,
|
164
|
+
language: NotGivenOr[str] = NOT_GIVEN,
|
165
|
+
voice: NotGivenOr[str | list[float]] = NOT_GIVEN,
|
166
|
+
speed: NotGivenOr[TTSVoiceSpeed | float] = NOT_GIVEN,
|
167
|
+
emotion: NotGivenOr[list[TTSVoiceEmotion | str]] = NOT_GIVEN,
|
157
168
|
) -> None:
|
158
169
|
"""
|
159
170
|
Update the Text-to-Speech (TTS) configuration options.
|
@@ -162,24 +173,28 @@ class TTS(tts.TTS):
|
|
162
173
|
and emotion. If any parameter is not provided, the existing value will be retained.
|
163
174
|
|
164
175
|
Args:
|
165
|
-
model (TTSModels, optional): The Cartesia TTS model to use. Defaults to "sonic-
|
176
|
+
model (TTSModels, optional): The Cartesia TTS model to use. Defaults to "sonic-2".
|
166
177
|
language (str, optional): The language code for synthesis. Defaults to "en".
|
167
178
|
voice (str | list[float], optional): The voice ID or embedding array.
|
168
179
|
speed (TTSVoiceSpeed | float, optional): Voice Control - Speed (https://docs.cartesia.ai/user-guides/voice-control)
|
169
180
|
emotion (list[TTSVoiceEmotion], optional): Voice Control - Emotion (https://docs.cartesia.ai/user-guides/voice-control)
|
170
181
|
"""
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
if
|
182
|
+
if is_given(model):
|
183
|
+
self._opts.model = model
|
184
|
+
if is_given(language):
|
185
|
+
self._opts.language = language
|
186
|
+
if is_given(voice):
|
187
|
+
self._opts.voice = voice
|
188
|
+
if is_given(speed):
|
189
|
+
self._opts.speed = speed
|
190
|
+
if is_given(emotion):
|
176
191
|
self._opts.emotion = emotion
|
177
192
|
|
178
193
|
def synthesize(
|
179
194
|
self,
|
180
195
|
text: str,
|
181
196
|
*,
|
182
|
-
conn_options: APIConnectOptions
|
197
|
+
conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
|
183
198
|
) -> ChunkedStream:
|
184
199
|
return ChunkedStream(
|
185
200
|
tts=self,
|
@@ -189,7 +204,9 @@ class TTS(tts.TTS):
|
|
189
204
|
session=self._ensure_session(),
|
190
205
|
)
|
191
206
|
|
192
|
-
def stream(
|
207
|
+
def stream(
|
208
|
+
self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
|
209
|
+
) -> SynthesizeStream:
|
193
210
|
return SynthesizeStream(
|
194
211
|
tts=self,
|
195
212
|
pool=self._pool,
|
@@ -214,7 +231,7 @@ class ChunkedStream(tts.ChunkedStream):
|
|
214
231
|
input_text: str,
|
215
232
|
opts: _TTSOptions,
|
216
233
|
session: aiohttp.ClientSession,
|
217
|
-
conn_options: APIConnectOptions
|
234
|
+
conn_options: APIConnectOptions,
|
218
235
|
) -> None:
|
219
236
|
super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
|
220
237
|
self._opts, self._session = opts, session
|
@@ -368,17 +385,18 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
368
385
|
|
369
386
|
def _to_cartesia_options(opts: _TTSOptions) -> dict[str, Any]:
|
370
387
|
voice: dict[str, Any] = {}
|
371
|
-
if
|
372
|
-
voice
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
388
|
+
if is_given(opts.voice):
|
389
|
+
if isinstance(opts.voice, str):
|
390
|
+
voice["mode"] = "id"
|
391
|
+
voice["id"] = opts.voice
|
392
|
+
else:
|
393
|
+
voice["mode"] = "embedding"
|
394
|
+
voice["embedding"] = opts.voice
|
377
395
|
|
378
396
|
voice_controls: dict = {}
|
379
|
-
if opts.speed
|
397
|
+
if is_given(opts.speed):
|
380
398
|
voice_controls["speed"] = opts.speed
|
381
|
-
if opts.emotion
|
399
|
+
if is_given(opts.emotion):
|
382
400
|
voice_controls["emotion"] = opts.emotion
|
383
401
|
|
384
402
|
if voice_controls:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: livekit-plugins-cartesia
|
3
|
-
Version: 1.0.
|
3
|
+
Version: 1.0.0rc2
|
4
4
|
Summary: LiveKit Agents Plugin for Cartesia
|
5
5
|
Project-URL: Documentation, https://docs.livekit.io
|
6
6
|
Project-URL: Website, https://livekit.io/
|
@@ -18,7 +18,7 @@ Classifier: Topic :: Multimedia :: Sound/Audio
|
|
18
18
|
Classifier: Topic :: Multimedia :: Video
|
19
19
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
20
20
|
Requires-Python: >=3.9.0
|
21
|
-
Requires-Dist: livekit-agents>=1.0.0.
|
21
|
+
Requires-Dist: livekit-agents>=1.0.0.rc2
|
22
22
|
Description-Content-Type: text/markdown
|
23
23
|
|
24
24
|
# LiveKit Plugins Cartesia
|
@@ -0,0 +1,9 @@
|
|
1
|
+
livekit/plugins/cartesia/__init__.py,sha256=UTa6Q7IxhRBCwPftowHEUDvmBg99J_UjGS_yxTzKD7g,1095
|
2
|
+
livekit/plugins/cartesia/log.py,sha256=4Mnhjng_DU1dIWP9IWjIQGZ67EV3LnQhWMWCHVudJbo,71
|
3
|
+
livekit/plugins/cartesia/models.py,sha256=KGY-r2luJuUNY6a3nnB0Rx-5Td12hikk-GtYLnqvysE,977
|
4
|
+
livekit/plugins/cartesia/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
livekit/plugins/cartesia/tts.py,sha256=rHDiypiCdn0sKykNvYcl5Cf6SZE2XmPRlnLGQA4m7Ks,14443
|
6
|
+
livekit/plugins/cartesia/version.py,sha256=AHsNOknrNG9prN-fv_7X-KI5-O8ZMrUnTDyl9ObQIzY,604
|
7
|
+
livekit_plugins_cartesia-1.0.0rc2.dist-info/METADATA,sha256=oy_PfwJoIM-pnM94BvVFtb-C7idxqZzXbXHjApy86WA,1262
|
8
|
+
livekit_plugins_cartesia-1.0.0rc2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
9
|
+
livekit_plugins_cartesia-1.0.0rc2.dist-info/RECORD,,
|
@@ -1,9 +0,0 @@
|
|
1
|
-
livekit/plugins/cartesia/__init__.py,sha256=UTa6Q7IxhRBCwPftowHEUDvmBg99J_UjGS_yxTzKD7g,1095
|
2
|
-
livekit/plugins/cartesia/log.py,sha256=4Mnhjng_DU1dIWP9IWjIQGZ67EV3LnQhWMWCHVudJbo,71
|
3
|
-
livekit/plugins/cartesia/models.py,sha256=56CJgo7my-w-vpedir_ImV_aqKASeLihE5DbcCCgGJI,950
|
4
|
-
livekit/plugins/cartesia/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
-
livekit/plugins/cartesia/tts.py,sha256=Zub4MXXVXQgV0t6al_uidDWH3BTVaYftyVbAFbkTU-U,13999
|
6
|
-
livekit/plugins/cartesia/version.py,sha256=pXgCpV03nQI-5Kk-74NFyAdw1htj2cx6unwQHipEcfE,605
|
7
|
-
livekit_plugins_cartesia-1.0.0.dev5.dist-info/METADATA,sha256=1Pjqf3rcfp0L7L93TlgNocQq96krnWo2WFP4rTkCV90,1265
|
8
|
-
livekit_plugins_cartesia-1.0.0.dev5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
9
|
-
livekit_plugins_cartesia-1.0.0.dev5.dist-info/RECORD,,
|
{livekit_plugins_cartesia-1.0.0.dev5.dist-info → livekit_plugins_cartesia-1.0.0rc2.dist-info}/WHEEL
RENAMED
File without changes
|