livekit-plugins-cartesia 0.4.2__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit/plugins/cartesia/__init__.py +9 -0
- livekit/plugins/cartesia/tts.py +109 -50
- livekit/plugins/cartesia/version.py +1 -1
- {livekit_plugins_cartesia-0.4.2.dist-info → livekit_plugins_cartesia-0.4.3.dist-info}/METADATA +2 -2
- livekit_plugins_cartesia-0.4.3.dist-info/RECORD +10 -0
- {livekit_plugins_cartesia-0.4.2.dist-info → livekit_plugins_cartesia-0.4.3.dist-info}/WHEEL +1 -1
- livekit_plugins_cartesia-0.4.2.dist-info/RECORD +0 -10
- {livekit_plugins_cartesia-0.4.2.dist-info → livekit_plugins_cartesia-0.4.3.dist-info}/top_level.txt +0 -0
@@ -28,3 +28,12 @@ class CartesiaPlugin(Plugin):
|
|
28
28
|
|
29
29
|
|
30
30
|
Plugin.register_plugin(CartesiaPlugin())
|
31
|
+
|
32
|
+
# Cleanup docs of unexported modules
|
33
|
+
_module = dir()
|
34
|
+
NOT_IN_ALL = [m for m in _module if m not in __all__]
|
35
|
+
|
36
|
+
__pdoc__ = {}
|
37
|
+
|
38
|
+
for n in NOT_IN_ALL:
|
39
|
+
__pdoc__[n] = False
|
livekit/plugins/cartesia/tts.py
CHANGED
@@ -22,7 +22,15 @@ from dataclasses import dataclass
|
|
22
22
|
from typing import Any
|
23
23
|
|
24
24
|
import aiohttp
|
25
|
-
from livekit
|
25
|
+
from livekit import rtc
|
26
|
+
from livekit.agents import (
|
27
|
+
APIConnectionError,
|
28
|
+
APIStatusError,
|
29
|
+
APITimeoutError,
|
30
|
+
tokenize,
|
31
|
+
tts,
|
32
|
+
utils,
|
33
|
+
)
|
26
34
|
|
27
35
|
from .log import logger
|
28
36
|
from .models import (
|
@@ -43,7 +51,7 @@ BUFFERED_WORDS_COUNT = 8
|
|
43
51
|
|
44
52
|
@dataclass
|
45
53
|
class _TTSOptions:
|
46
|
-
model: TTSModels
|
54
|
+
model: TTSModels | str
|
47
55
|
encoding: TTSEncoding
|
48
56
|
sample_rate: int
|
49
57
|
voice: str | list[float]
|
@@ -57,7 +65,7 @@ class TTS(tts.TTS):
|
|
57
65
|
def __init__(
|
58
66
|
self,
|
59
67
|
*,
|
60
|
-
model: TTSModels = "sonic-english",
|
68
|
+
model: TTSModels | str = "sonic-english",
|
61
69
|
language: str = "en",
|
62
70
|
encoding: TTSEncoding = "pcm_s16le",
|
63
71
|
voice: str | list[float] = TTSDefaultVoiceId,
|
@@ -112,63 +120,106 @@ class TTS(tts.TTS):
|
|
112
120
|
|
113
121
|
return self._session
|
114
122
|
|
123
|
+
def update_options(
|
124
|
+
self,
|
125
|
+
*,
|
126
|
+
model: TTSModels | None = None,
|
127
|
+
language: str | None = None,
|
128
|
+
voice: str | list[float] | None = None,
|
129
|
+
speed: TTSVoiceSpeed | float | None = None,
|
130
|
+
emotion: list[TTSVoiceEmotion | str] | None = None,
|
131
|
+
) -> None:
|
132
|
+
"""
|
133
|
+
Update the Text-to-Speech (TTS) configuration options.
|
134
|
+
|
135
|
+
This method allows updating the TTS settings, including model type, language, voice, speed,
|
136
|
+
and emotion. If any parameter is not provided, the existing value will be retained.
|
137
|
+
|
138
|
+
Args:
|
139
|
+
model (TTSModels, optional): The Cartesia TTS model to use. Defaults to "sonic-english".
|
140
|
+
language (str, optional): The language code for synthesis. Defaults to "en".
|
141
|
+
voice (str | list[float], optional): The voice ID or embedding array.
|
142
|
+
speed (TTSVoiceSpeed | float, optional): Voice Control - Speed (https://docs.cartesia.ai/user-guides/voice-control)
|
143
|
+
emotion (list[TTSVoiceEmotion], optional): Voice Control - Emotion (https://docs.cartesia.ai/user-guides/voice-control)
|
144
|
+
"""
|
145
|
+
self._opts.model = model or self._opts.model
|
146
|
+
self._opts.language = language or self._opts.language
|
147
|
+
self._opts.voice = voice or self._opts.voice
|
148
|
+
self._opts.speed = speed or self._opts.speed
|
149
|
+
if emotion is not None:
|
150
|
+
self._opts.emotion = emotion
|
151
|
+
|
115
152
|
def synthesize(self, text: str) -> "ChunkedStream":
|
116
|
-
return ChunkedStream(text, self._opts, self._ensure_session())
|
153
|
+
return ChunkedStream(self, text, self._opts, self._ensure_session())
|
117
154
|
|
118
155
|
def stream(self) -> "SynthesizeStream":
|
119
|
-
return SynthesizeStream(self._opts, self._ensure_session())
|
156
|
+
return SynthesizeStream(self, self._opts, self._ensure_session())
|
120
157
|
|
121
158
|
|
122
159
|
class ChunkedStream(tts.ChunkedStream):
|
123
160
|
"""Synthesize chunked text using the bytes endpoint"""
|
124
161
|
|
125
162
|
def __init__(
|
126
|
-
self, text: str, opts: _TTSOptions, session: aiohttp.ClientSession
|
163
|
+
self, tts: TTS, text: str, opts: _TTSOptions, session: aiohttp.ClientSession
|
127
164
|
) -> None:
|
128
|
-
super().__init__()
|
129
|
-
self.
|
165
|
+
super().__init__(tts, text)
|
166
|
+
self._opts, self._session = opts, session
|
130
167
|
|
131
|
-
|
132
|
-
|
168
|
+
async def _main_task(self) -> None:
|
169
|
+
request_id = utils.shortuuid()
|
133
170
|
bstream = utils.audio.AudioByteStream(
|
134
171
|
sample_rate=self._opts.sample_rate, num_channels=NUM_CHANNELS
|
135
172
|
)
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
173
|
+
|
174
|
+
json = _to_cartesia_options(self._opts)
|
175
|
+
json["transcript"] = self._input_text
|
176
|
+
|
177
|
+
headers = {
|
178
|
+
API_AUTH_HEADER: self._opts.api_key,
|
179
|
+
API_VERSION_HEADER: API_VERSION,
|
180
|
+
}
|
181
|
+
|
182
|
+
try:
|
183
|
+
async with self._session.post(
|
184
|
+
"https://api.cartesia.ai/tts/bytes",
|
185
|
+
headers=headers,
|
186
|
+
json=json,
|
187
|
+
) as resp:
|
188
|
+
resp.raise_for_status()
|
189
|
+
async for data, _ in resp.content.iter_chunks():
|
190
|
+
for frame in bstream.write(data):
|
191
|
+
self._event_ch.send_nowait(
|
192
|
+
tts.SynthesizedAudio(
|
193
|
+
request_id=request_id,
|
194
|
+
frame=frame,
|
195
|
+
)
|
154
196
|
)
|
155
|
-
)
|
156
197
|
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
request_id=request_id, segment_id=segment_id, frame=frame
|
198
|
+
for frame in bstream.flush():
|
199
|
+
self._event_ch.send_nowait(
|
200
|
+
tts.SynthesizedAudio(request_id=request_id, frame=frame)
|
161
201
|
)
|
162
|
-
|
202
|
+
except asyncio.TimeoutError as e:
|
203
|
+
raise APITimeoutError() from e
|
204
|
+
except aiohttp.ClientResponseError as e:
|
205
|
+
raise APIStatusError(
|
206
|
+
message=e.message,
|
207
|
+
status_code=e.status,
|
208
|
+
request_id=None,
|
209
|
+
body=None,
|
210
|
+
) from e
|
211
|
+
except Exception as e:
|
212
|
+
raise APIConnectionError() from e
|
163
213
|
|
164
214
|
|
165
215
|
class SynthesizeStream(tts.SynthesizeStream):
|
166
216
|
def __init__(
|
167
217
|
self,
|
218
|
+
tts: TTS,
|
168
219
|
opts: _TTSOptions,
|
169
220
|
session: aiohttp.ClientSession,
|
170
221
|
):
|
171
|
-
super().__init__()
|
222
|
+
super().__init__(tts)
|
172
223
|
self._opts, self._session = opts, session
|
173
224
|
self._sent_tokenizer_stream = tokenize.basic.SentenceTokenizer(
|
174
225
|
min_sentence_len=BUFFERED_WORDS_COUNT
|
@@ -233,6 +284,22 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
233
284
|
num_channels=NUM_CHANNELS,
|
234
285
|
)
|
235
286
|
|
287
|
+
last_frame: rtc.AudioFrame | None = None
|
288
|
+
|
289
|
+
def _send_last_frame(*, segment_id: str, is_final: bool) -> None:
|
290
|
+
nonlocal last_frame
|
291
|
+
if last_frame is not None:
|
292
|
+
self._event_ch.send_nowait(
|
293
|
+
tts.SynthesizedAudio(
|
294
|
+
request_id=request_id,
|
295
|
+
segment_id=segment_id,
|
296
|
+
frame=last_frame,
|
297
|
+
is_final=is_final,
|
298
|
+
)
|
299
|
+
)
|
300
|
+
|
301
|
+
last_frame = None
|
302
|
+
|
236
303
|
while True:
|
237
304
|
msg = await ws.receive()
|
238
305
|
if msg.type in (
|
@@ -248,26 +315,18 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
248
315
|
|
249
316
|
data = json.loads(msg.data)
|
250
317
|
segment_id = data.get("context_id")
|
251
|
-
|
318
|
+
|
252
319
|
if data.get("data"):
|
253
320
|
b64data = base64.b64decode(data["data"])
|
254
321
|
for frame in audio_bstream.write(b64data):
|
255
|
-
|
256
|
-
|
257
|
-
request_id=request_id,
|
258
|
-
segment_id=segment_id,
|
259
|
-
frame=frame,
|
260
|
-
)
|
261
|
-
)
|
322
|
+
_send_last_frame(segment_id=segment_id, is_final=False)
|
323
|
+
last_frame = frame
|
262
324
|
elif data.get("done"):
|
263
325
|
for frame in audio_bstream.flush():
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
frame=frame,
|
269
|
-
)
|
270
|
-
)
|
326
|
+
_send_last_frame(segment_id=segment_id, is_final=False)
|
327
|
+
last_frame = frame
|
328
|
+
|
329
|
+
_send_last_frame(segment_id=segment_id, is_final=True)
|
271
330
|
|
272
331
|
if segment_id == request_id:
|
273
332
|
# we're not going to receive more frames, close the connection
|
{livekit_plugins_cartesia-0.4.2.dist-info → livekit_plugins_cartesia-0.4.3.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: livekit-plugins-cartesia
|
3
|
-
Version: 0.4.
|
3
|
+
Version: 0.4.3
|
4
4
|
Summary: LiveKit Agents Plugin for Cartesia
|
5
5
|
Home-page: https://github.com/livekit/agents
|
6
6
|
License: Apache-2.0
|
@@ -19,7 +19,7 @@ Classifier: Programming Language :: Python :: 3.10
|
|
19
19
|
Classifier: Programming Language :: Python :: 3 :: Only
|
20
20
|
Requires-Python: >=3.9.0
|
21
21
|
Description-Content-Type: text/markdown
|
22
|
-
Requires-Dist: livekit-agents >=0.
|
22
|
+
Requires-Dist: livekit-agents >=0.11
|
23
23
|
|
24
24
|
# LiveKit Plugins Cartesia
|
25
25
|
|
@@ -0,0 +1,10 @@
|
|
1
|
+
livekit/plugins/cartesia/__init__.py,sha256=UTa6Q7IxhRBCwPftowHEUDvmBg99J_UjGS_yxTzKD7g,1095
|
2
|
+
livekit/plugins/cartesia/log.py,sha256=4Mnhjng_DU1dIWP9IWjIQGZ67EV3LnQhWMWCHVudJbo,71
|
3
|
+
livekit/plugins/cartesia/models.py,sha256=fOO276Vzw3OkDUWUVcw7PH95ctFy38rj3q9I6_mYQ7M,950
|
4
|
+
livekit/plugins/cartesia/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
livekit/plugins/cartesia/tts.py,sha256=2xwWOIjwLDOF4TbHlDibrZpUju9If8WrNpHQ2JMuBC0,13533
|
6
|
+
livekit/plugins/cartesia/version.py,sha256=u7PSD5TBbPRIhE8vJkBVJzq_eGqYfg6RP5c3VKNlKGk,600
|
7
|
+
livekit_plugins_cartesia-0.4.3.dist-info/METADATA,sha256=w5q0oz6rdHDL5cxAyT5hWbHqhZnOPnZYGl3aUKsr3z4,1246
|
8
|
+
livekit_plugins_cartesia-0.4.3.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
|
9
|
+
livekit_plugins_cartesia-0.4.3.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
|
10
|
+
livekit_plugins_cartesia-0.4.3.dist-info/RECORD,,
|
@@ -1,10 +0,0 @@
|
|
1
|
-
livekit/plugins/cartesia/__init__.py,sha256=BUfWY_evL5dUHn9hBDQVor6ssctDKQfbQfZy5SWndN8,926
|
2
|
-
livekit/plugins/cartesia/log.py,sha256=4Mnhjng_DU1dIWP9IWjIQGZ67EV3LnQhWMWCHVudJbo,71
|
3
|
-
livekit/plugins/cartesia/models.py,sha256=fOO276Vzw3OkDUWUVcw7PH95ctFy38rj3q9I6_mYQ7M,950
|
4
|
-
livekit/plugins/cartesia/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
-
livekit/plugins/cartesia/tts.py,sha256=kUGIhsmHqIK2m_FV44_nwjHp0c7Zb2H7UG9VayNIae8,11341
|
6
|
-
livekit/plugins/cartesia/version.py,sha256=jabhjXzHcov1Cy2z9FGgyHFpSQ3hFKqu3vly20WQeTs,600
|
7
|
-
livekit_plugins_cartesia-0.4.2.dist-info/METADATA,sha256=w9ZGYOicE_fUFVTnhgvewGgWgwmaInoG9w6BGTiOu-8,1252
|
8
|
-
livekit_plugins_cartesia-0.4.2.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
|
9
|
-
livekit_plugins_cartesia-0.4.2.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
|
10
|
-
livekit_plugins_cartesia-0.4.2.dist-info/RECORD,,
|
{livekit_plugins_cartesia-0.4.2.dist-info → livekit_plugins_cartesia-0.4.3.dist-info}/top_level.txt
RENAMED
File without changes
|