livekit-plugins-cartesia 0.4.2__py3-none-any.whl → 0.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit/plugins/cartesia/__init__.py +9 -0
- livekit/plugins/cartesia/tts.py +166 -90
- livekit/plugins/cartesia/version.py +1 -1
- {livekit_plugins_cartesia-0.4.2.dist-info → livekit_plugins_cartesia-0.4.5.dist-info}/METADATA +2 -2
- livekit_plugins_cartesia-0.4.5.dist-info/RECORD +10 -0
- {livekit_plugins_cartesia-0.4.2.dist-info → livekit_plugins_cartesia-0.4.5.dist-info}/WHEEL +1 -1
- livekit_plugins_cartesia-0.4.2.dist-info/RECORD +0 -10
- {livekit_plugins_cartesia-0.4.2.dist-info → livekit_plugins_cartesia-0.4.5.dist-info}/top_level.txt +0 -0
@@ -28,3 +28,12 @@ class CartesiaPlugin(Plugin):
|
|
28
28
|
|
29
29
|
|
30
30
|
Plugin.register_plugin(CartesiaPlugin())
|
31
|
+
|
32
|
+
# Cleanup docs of unexported modules
|
33
|
+
_module = dir()
|
34
|
+
NOT_IN_ALL = [m for m in _module if m not in __all__]
|
35
|
+
|
36
|
+
__pdoc__ = {}
|
37
|
+
|
38
|
+
for n in NOT_IN_ALL:
|
39
|
+
__pdoc__[n] = False
|
livekit/plugins/cartesia/tts.py
CHANGED
@@ -22,7 +22,17 @@ from dataclasses import dataclass
|
|
22
22
|
from typing import Any
|
23
23
|
|
24
24
|
import aiohttp
|
25
|
-
from livekit
|
25
|
+
from livekit import rtc
|
26
|
+
from livekit.agents import (
|
27
|
+
DEFAULT_API_CONNECT_OPTIONS,
|
28
|
+
APIConnectionError,
|
29
|
+
APIConnectOptions,
|
30
|
+
APIStatusError,
|
31
|
+
APITimeoutError,
|
32
|
+
tokenize,
|
33
|
+
tts,
|
34
|
+
utils,
|
35
|
+
)
|
26
36
|
|
27
37
|
from .log import logger
|
28
38
|
from .models import (
|
@@ -43,7 +53,7 @@ BUFFERED_WORDS_COUNT = 8
|
|
43
53
|
|
44
54
|
@dataclass
|
45
55
|
class _TTSOptions:
|
46
|
-
model: TTSModels
|
56
|
+
model: TTSModels | str
|
47
57
|
encoding: TTSEncoding
|
48
58
|
sample_rate: int
|
49
59
|
voice: str | list[float]
|
@@ -57,7 +67,7 @@ class TTS(tts.TTS):
|
|
57
67
|
def __init__(
|
58
68
|
self,
|
59
69
|
*,
|
60
|
-
model: TTSModels = "sonic-english",
|
70
|
+
model: TTSModels | str = "sonic-english",
|
61
71
|
language: str = "en",
|
62
72
|
encoding: TTSEncoding = "pcm_s16le",
|
63
73
|
voice: str | list[float] = TTSDefaultVoiceId,
|
@@ -112,99 +122,145 @@ class TTS(tts.TTS):
|
|
112
122
|
|
113
123
|
return self._session
|
114
124
|
|
115
|
-
def
|
116
|
-
|
125
|
+
def update_options(
|
126
|
+
self,
|
127
|
+
*,
|
128
|
+
model: TTSModels | None = None,
|
129
|
+
language: str | None = None,
|
130
|
+
voice: str | list[float] | None = None,
|
131
|
+
speed: TTSVoiceSpeed | float | None = None,
|
132
|
+
emotion: list[TTSVoiceEmotion | str] | None = None,
|
133
|
+
) -> None:
|
134
|
+
"""
|
135
|
+
Update the Text-to-Speech (TTS) configuration options.
|
136
|
+
|
137
|
+
This method allows updating the TTS settings, including model type, language, voice, speed,
|
138
|
+
and emotion. If any parameter is not provided, the existing value will be retained.
|
139
|
+
|
140
|
+
Args:
|
141
|
+
model (TTSModels, optional): The Cartesia TTS model to use. Defaults to "sonic-english".
|
142
|
+
language (str, optional): The language code for synthesis. Defaults to "en".
|
143
|
+
voice (str | list[float], optional): The voice ID or embedding array.
|
144
|
+
speed (TTSVoiceSpeed | float, optional): Voice Control - Speed (https://docs.cartesia.ai/user-guides/voice-control)
|
145
|
+
emotion (list[TTSVoiceEmotion], optional): Voice Control - Emotion (https://docs.cartesia.ai/user-guides/voice-control)
|
146
|
+
"""
|
147
|
+
self._opts.model = model or self._opts.model
|
148
|
+
self._opts.language = language or self._opts.language
|
149
|
+
self._opts.voice = voice or self._opts.voice
|
150
|
+
self._opts.speed = speed or self._opts.speed
|
151
|
+
if emotion is not None:
|
152
|
+
self._opts.emotion = emotion
|
153
|
+
|
154
|
+
def synthesize(
|
155
|
+
self,
|
156
|
+
text: str,
|
157
|
+
*,
|
158
|
+
conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
|
159
|
+
) -> ChunkedStream:
|
160
|
+
return ChunkedStream(
|
161
|
+
tts=self,
|
162
|
+
input_text=text,
|
163
|
+
conn_options=conn_options,
|
164
|
+
opts=self._opts,
|
165
|
+
session=self._ensure_session(),
|
166
|
+
)
|
117
167
|
|
118
|
-
def stream(
|
119
|
-
|
168
|
+
def stream(
|
169
|
+
self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
|
170
|
+
) -> "SynthesizeStream":
|
171
|
+
return SynthesizeStream(
|
172
|
+
tts=self,
|
173
|
+
conn_options=conn_options,
|
174
|
+
opts=self._opts,
|
175
|
+
session=self._ensure_session(),
|
176
|
+
)
|
120
177
|
|
121
178
|
|
122
179
|
class ChunkedStream(tts.ChunkedStream):
|
123
180
|
"""Synthesize chunked text using the bytes endpoint"""
|
124
181
|
|
125
182
|
def __init__(
|
126
|
-
self,
|
183
|
+
self,
|
184
|
+
*,
|
185
|
+
tts: TTS,
|
186
|
+
input_text: str,
|
187
|
+
conn_options: APIConnectOptions,
|
188
|
+
opts: _TTSOptions,
|
189
|
+
session: aiohttp.ClientSession,
|
127
190
|
) -> None:
|
128
|
-
super().__init__()
|
129
|
-
self.
|
191
|
+
super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
|
192
|
+
self._opts, self._session = opts, session
|
130
193
|
|
131
|
-
|
132
|
-
|
194
|
+
async def _run(self) -> None:
|
195
|
+
request_id = utils.shortuuid()
|
133
196
|
bstream = utils.audio.AudioByteStream(
|
134
197
|
sample_rate=self._opts.sample_rate, num_channels=NUM_CHANNELS
|
135
198
|
)
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
199
|
+
|
200
|
+
json = _to_cartesia_options(self._opts)
|
201
|
+
json["transcript"] = self._input_text
|
202
|
+
|
203
|
+
headers = {
|
204
|
+
API_AUTH_HEADER: self._opts.api_key,
|
205
|
+
API_VERSION_HEADER: API_VERSION,
|
206
|
+
}
|
207
|
+
|
208
|
+
try:
|
209
|
+
async with self._session.post(
|
210
|
+
"https://api.cartesia.ai/tts/bytes",
|
211
|
+
headers=headers,
|
212
|
+
json=json,
|
213
|
+
timeout=aiohttp.ClientTimeout(
|
214
|
+
total=30,
|
215
|
+
sock_connect=self._conn_options.timeout,
|
216
|
+
),
|
217
|
+
) as resp:
|
218
|
+
resp.raise_for_status()
|
219
|
+
async for data, _ in resp.content.iter_chunks():
|
220
|
+
for frame in bstream.write(data):
|
221
|
+
self._event_ch.send_nowait(
|
222
|
+
tts.SynthesizedAudio(
|
223
|
+
request_id=request_id,
|
224
|
+
frame=frame,
|
225
|
+
)
|
154
226
|
)
|
155
|
-
)
|
156
227
|
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
request_id=request_id, segment_id=segment_id, frame=frame
|
228
|
+
for frame in bstream.flush():
|
229
|
+
self._event_ch.send_nowait(
|
230
|
+
tts.SynthesizedAudio(request_id=request_id, frame=frame)
|
161
231
|
)
|
162
|
-
|
232
|
+
except asyncio.TimeoutError as e:
|
233
|
+
raise APITimeoutError() from e
|
234
|
+
except aiohttp.ClientResponseError as e:
|
235
|
+
raise APIStatusError(
|
236
|
+
message=e.message,
|
237
|
+
status_code=e.status,
|
238
|
+
request_id=None,
|
239
|
+
body=None,
|
240
|
+
) from e
|
241
|
+
except Exception as e:
|
242
|
+
raise APIConnectionError() from e
|
163
243
|
|
164
244
|
|
165
245
|
class SynthesizeStream(tts.SynthesizeStream):
|
166
246
|
def __init__(
|
167
247
|
self,
|
248
|
+
*,
|
249
|
+
tts: TTS,
|
250
|
+
conn_options: APIConnectOptions,
|
168
251
|
opts: _TTSOptions,
|
169
252
|
session: aiohttp.ClientSession,
|
170
253
|
):
|
171
|
-
super().__init__()
|
254
|
+
super().__init__(tts=tts, conn_options=conn_options)
|
172
255
|
self._opts, self._session = opts, session
|
173
256
|
self._sent_tokenizer_stream = tokenize.basic.SentenceTokenizer(
|
174
257
|
min_sentence_len=BUFFERED_WORDS_COUNT
|
175
258
|
).stream()
|
176
259
|
|
177
|
-
|
178
|
-
async def _main_task(self) -> None:
|
179
|
-
retry_count = 0
|
180
|
-
max_retry = 3
|
181
|
-
while self._input_ch.qsize() or not self._input_ch.closed:
|
182
|
-
try:
|
183
|
-
url = f"wss://api.cartesia.ai/tts/websocket?api_key={self._opts.api_key}&cartesia_version={API_VERSION}"
|
184
|
-
ws = await self._session.ws_connect(url)
|
185
|
-
retry_count = 0 # connected successfully, reset the retry_count
|
186
|
-
|
187
|
-
await self._run_ws(ws)
|
188
|
-
except Exception as e:
|
189
|
-
if retry_count >= max_retry:
|
190
|
-
logger.exception(
|
191
|
-
f"failed to connect to Cartesia after {max_retry} tries"
|
192
|
-
)
|
193
|
-
break
|
194
|
-
|
195
|
-
retry_delay = min(retry_count * 2, 10) # max 10s
|
196
|
-
retry_count += 1
|
197
|
-
|
198
|
-
logger.warning(
|
199
|
-
f"Cartesia connection failed, retrying in {retry_delay}s",
|
200
|
-
exc_info=e,
|
201
|
-
)
|
202
|
-
await asyncio.sleep(retry_delay)
|
203
|
-
|
204
|
-
async def _run_ws(self, ws: aiohttp.ClientWebSocketResponse) -> None:
|
260
|
+
async def _run(self) -> None:
|
205
261
|
request_id = utils.shortuuid()
|
206
262
|
|
207
|
-
async def
|
263
|
+
async def _sentence_stream_task(ws: aiohttp.ClientWebSocketResponse):
|
208
264
|
base_pkt = _to_cartesia_options(self._opts)
|
209
265
|
async for ev in self._sent_tokenizer_stream:
|
210
266
|
token_pkt = base_pkt.copy()
|
@@ -219,7 +275,7 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
219
275
|
end_pkt["continue"] = False
|
220
276
|
await ws.send_str(json.dumps(end_pkt))
|
221
277
|
|
222
|
-
async def
|
278
|
+
async def _input_task():
|
223
279
|
async for data in self._input_ch:
|
224
280
|
if isinstance(data, self._FlushSentinel):
|
225
281
|
self._sent_tokenizer_stream.flush()
|
@@ -227,12 +283,28 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
227
283
|
self._sent_tokenizer_stream.push_text(data)
|
228
284
|
self._sent_tokenizer_stream.end_input()
|
229
285
|
|
230
|
-
async def
|
286
|
+
async def _recv_task(ws: aiohttp.ClientWebSocketResponse):
|
231
287
|
audio_bstream = utils.audio.AudioByteStream(
|
232
288
|
sample_rate=self._opts.sample_rate,
|
233
289
|
num_channels=NUM_CHANNELS,
|
234
290
|
)
|
235
291
|
|
292
|
+
last_frame: rtc.AudioFrame | None = None
|
293
|
+
|
294
|
+
def _send_last_frame(*, segment_id: str, is_final: bool) -> None:
|
295
|
+
nonlocal last_frame
|
296
|
+
if last_frame is not None:
|
297
|
+
self._event_ch.send_nowait(
|
298
|
+
tts.SynthesizedAudio(
|
299
|
+
request_id=request_id,
|
300
|
+
segment_id=segment_id,
|
301
|
+
frame=last_frame,
|
302
|
+
is_final=is_final,
|
303
|
+
)
|
304
|
+
)
|
305
|
+
|
306
|
+
last_frame = None
|
307
|
+
|
236
308
|
while True:
|
237
309
|
msg = await ws.receive()
|
238
310
|
if msg.type in (
|
@@ -248,26 +320,18 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
248
320
|
|
249
321
|
data = json.loads(msg.data)
|
250
322
|
segment_id = data.get("context_id")
|
251
|
-
|
323
|
+
|
252
324
|
if data.get("data"):
|
253
325
|
b64data = base64.b64decode(data["data"])
|
254
326
|
for frame in audio_bstream.write(b64data):
|
255
|
-
|
256
|
-
|
257
|
-
request_id=request_id,
|
258
|
-
segment_id=segment_id,
|
259
|
-
frame=frame,
|
260
|
-
)
|
261
|
-
)
|
327
|
+
_send_last_frame(segment_id=segment_id, is_final=False)
|
328
|
+
last_frame = frame
|
262
329
|
elif data.get("done"):
|
263
330
|
for frame in audio_bstream.flush():
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
frame=frame,
|
269
|
-
)
|
270
|
-
)
|
331
|
+
_send_last_frame(segment_id=segment_id, is_final=False)
|
332
|
+
last_frame = frame
|
333
|
+
|
334
|
+
_send_last_frame(segment_id=segment_id, is_final=True)
|
271
335
|
|
272
336
|
if segment_id == request_id:
|
273
337
|
# we're not going to receive more frames, close the connection
|
@@ -276,16 +340,28 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
276
340
|
else:
|
277
341
|
logger.error("unexpected Cartesia message %s", data)
|
278
342
|
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
asyncio.create_task(recv_task()),
|
283
|
-
]
|
343
|
+
url = f"wss://api.cartesia.ai/tts/websocket?api_key={self._opts.api_key}&cartesia_version={API_VERSION}"
|
344
|
+
|
345
|
+
ws: aiohttp.ClientWebSocketResponse | None = None
|
284
346
|
|
285
347
|
try:
|
286
|
-
await asyncio.
|
348
|
+
ws = await asyncio.wait_for(
|
349
|
+
self._session.ws_connect(url), self._conn_options.timeout
|
350
|
+
)
|
351
|
+
|
352
|
+
tasks = [
|
353
|
+
asyncio.create_task(_input_task()),
|
354
|
+
asyncio.create_task(_sentence_stream_task(ws)),
|
355
|
+
asyncio.create_task(_recv_task(ws)),
|
356
|
+
]
|
357
|
+
|
358
|
+
try:
|
359
|
+
await asyncio.gather(*tasks)
|
360
|
+
finally:
|
361
|
+
await utils.aio.gracefully_cancel(*tasks)
|
287
362
|
finally:
|
288
|
-
|
363
|
+
if ws is not None:
|
364
|
+
await ws.close()
|
289
365
|
|
290
366
|
|
291
367
|
def _to_cartesia_options(opts: _TTSOptions) -> dict[str, Any]:
|
{livekit_plugins_cartesia-0.4.2.dist-info → livekit_plugins_cartesia-0.4.5.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: livekit-plugins-cartesia
|
3
|
-
Version: 0.4.
|
3
|
+
Version: 0.4.5
|
4
4
|
Summary: LiveKit Agents Plugin for Cartesia
|
5
5
|
Home-page: https://github.com/livekit/agents
|
6
6
|
License: Apache-2.0
|
@@ -19,7 +19,7 @@ Classifier: Programming Language :: Python :: 3.10
|
|
19
19
|
Classifier: Programming Language :: Python :: 3 :: Only
|
20
20
|
Requires-Python: >=3.9.0
|
21
21
|
Description-Content-Type: text/markdown
|
22
|
-
Requires-Dist: livekit-agents
|
22
|
+
Requires-Dist: livekit-agents>=0.11
|
23
23
|
|
24
24
|
# LiveKit Plugins Cartesia
|
25
25
|
|
@@ -0,0 +1,10 @@
|
|
1
|
+
livekit/plugins/cartesia/__init__.py,sha256=UTa6Q7IxhRBCwPftowHEUDvmBg99J_UjGS_yxTzKD7g,1095
|
2
|
+
livekit/plugins/cartesia/log.py,sha256=4Mnhjng_DU1dIWP9IWjIQGZ67EV3LnQhWMWCHVudJbo,71
|
3
|
+
livekit/plugins/cartesia/models.py,sha256=fOO276Vzw3OkDUWUVcw7PH95ctFy38rj3q9I6_mYQ7M,950
|
4
|
+
livekit/plugins/cartesia/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
livekit/plugins/cartesia/tts.py,sha256=SZH1tYHxKDgZ5PbBHkC86vATPkxu81UGnU44FCEzasI,13778
|
6
|
+
livekit/plugins/cartesia/version.py,sha256=NVa5L7bU73cSrgbGChyGQDqP6rLxpFdXF6hoIrBpXM8,600
|
7
|
+
livekit_plugins_cartesia-0.4.5.dist-info/METADATA,sha256=HRCHZl35yVnXjG5yvSYCilcJeefHsve7-xKt1bOkGsE,1245
|
8
|
+
livekit_plugins_cartesia-0.4.5.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
9
|
+
livekit_plugins_cartesia-0.4.5.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
|
10
|
+
livekit_plugins_cartesia-0.4.5.dist-info/RECORD,,
|
@@ -1,10 +0,0 @@
|
|
1
|
-
livekit/plugins/cartesia/__init__.py,sha256=BUfWY_evL5dUHn9hBDQVor6ssctDKQfbQfZy5SWndN8,926
|
2
|
-
livekit/plugins/cartesia/log.py,sha256=4Mnhjng_DU1dIWP9IWjIQGZ67EV3LnQhWMWCHVudJbo,71
|
3
|
-
livekit/plugins/cartesia/models.py,sha256=fOO276Vzw3OkDUWUVcw7PH95ctFy38rj3q9I6_mYQ7M,950
|
4
|
-
livekit/plugins/cartesia/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
-
livekit/plugins/cartesia/tts.py,sha256=kUGIhsmHqIK2m_FV44_nwjHp0c7Zb2H7UG9VayNIae8,11341
|
6
|
-
livekit/plugins/cartesia/version.py,sha256=jabhjXzHcov1Cy2z9FGgyHFpSQ3hFKqu3vly20WQeTs,600
|
7
|
-
livekit_plugins_cartesia-0.4.2.dist-info/METADATA,sha256=w9ZGYOicE_fUFVTnhgvewGgWgwmaInoG9w6BGTiOu-8,1252
|
8
|
-
livekit_plugins_cartesia-0.4.2.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
|
9
|
-
livekit_plugins_cartesia-0.4.2.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
|
10
|
-
livekit_plugins_cartesia-0.4.2.dist-info/RECORD,,
|
{livekit_plugins_cartesia-0.4.2.dist-info → livekit_plugins_cartesia-0.4.5.dist-info}/top_level.txt
RENAMED
File without changes
|