livekit-plugins-cartesia 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit/plugins/cartesia/models.py +1 -1
- livekit/plugins/cartesia/tts.py +29 -52
- livekit/plugins/cartesia/version.py +1 -1
- {livekit_plugins_cartesia-0.3.0.dist-info → livekit_plugins_cartesia-0.4.0.dist-info}/METADATA +1 -1
- livekit_plugins_cartesia-0.4.0.dist-info/RECORD +10 -0
- {livekit_plugins_cartesia-0.3.0.dist-info → livekit_plugins_cartesia-0.4.0.dist-info}/WHEEL +1 -1
- livekit_plugins_cartesia-0.3.0.dist-info/RECORD +0 -10
- {livekit_plugins_cartesia-0.3.0.dist-info → livekit_plugins_cartesia-0.4.0.dist-info}/top_level.txt +0 -0
@@ -11,4 +11,4 @@ TTSEncoding = Literal[
|
|
11
11
|
|
12
12
|
TTSModels = Literal["sonic-english", "sonic-multilingual"]
|
13
13
|
TTSLanguages = Literal["en", "es", "fr", "de", "pt", "zh", "ja"]
|
14
|
-
TTSDefaultVoiceId = "
|
14
|
+
TTSDefaultVoiceId = "c2ac25f9-ecc4-4f56-9095-651354df60c0"
|
livekit/plugins/cartesia/tts.py
CHANGED
@@ -19,6 +19,7 @@ import base64
|
|
19
19
|
import json
|
20
20
|
import os
|
21
21
|
from dataclasses import dataclass
|
22
|
+
from typing import Any
|
22
23
|
|
23
24
|
import aiohttp
|
24
25
|
from livekit.agents import tokenize, tts, utils
|
@@ -42,7 +43,6 @@ class _TTSOptions:
|
|
42
43
|
voice: str | list[float]
|
43
44
|
api_key: str
|
44
45
|
language: str
|
45
|
-
word_tokenizer: tokenize.WordTokenizer
|
46
46
|
|
47
47
|
|
48
48
|
class TTS(tts.TTS):
|
@@ -56,9 +56,6 @@ class TTS(tts.TTS):
|
|
56
56
|
sample_rate: int = 24000,
|
57
57
|
api_key: str | None = None,
|
58
58
|
http_session: aiohttp.ClientSession | None = None,
|
59
|
-
word_tokenizer: tokenize.WordTokenizer = tokenize.basic.WordTokenizer(
|
60
|
-
ignore_punctuation=False
|
61
|
-
),
|
62
59
|
) -> None:
|
63
60
|
super().__init__(
|
64
61
|
capabilities=tts.TTSCapabilities(streaming=True),
|
@@ -77,7 +74,6 @@ class TTS(tts.TTS):
|
|
77
74
|
sample_rate=sample_rate,
|
78
75
|
voice=voice,
|
79
76
|
api_key=api_key,
|
80
|
-
word_tokenizer=word_tokenizer,
|
81
77
|
)
|
82
78
|
self._session = http_session
|
83
79
|
|
@@ -145,7 +141,9 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
145
141
|
):
|
146
142
|
super().__init__()
|
147
143
|
self._opts, self._session = opts, session
|
148
|
-
self.
|
144
|
+
self._sent_tokenizer_stream = tokenize.basic.SentenceTokenizer(
|
145
|
+
min_sentence_len=BUFFERED_WORDS_COUNT
|
146
|
+
).stream()
|
149
147
|
|
150
148
|
@utils.log_exceptions(logger=logger)
|
151
149
|
async def _main_task(self) -> None:
|
@@ -176,51 +174,29 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
176
174
|
|
177
175
|
async def _run_ws(self, ws: aiohttp.ClientWebSocketResponse) -> None:
|
178
176
|
request_id = utils.shortuuid()
|
179
|
-
pending_segments = []
|
180
177
|
|
181
|
-
async def
|
178
|
+
async def sentence_stream_task():
|
182
179
|
base_pkt = _to_cartesia_options(self._opts)
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
180
|
+
async for ev in self._sent_tokenizer_stream:
|
181
|
+
token_pkt = base_pkt.copy()
|
182
|
+
token_pkt["context_id"] = request_id
|
183
|
+
token_pkt["transcript"] = ev.token + " "
|
184
|
+
token_pkt["continue"] = True
|
185
|
+
await ws.send_str(json.dumps(token_pkt))
|
186
|
+
|
187
|
+
end_pkt = base_pkt.copy()
|
188
|
+
end_pkt["context_id"] = request_id
|
189
|
+
end_pkt["transcript"] = " "
|
190
|
+
end_pkt["continue"] = False
|
191
|
+
await ws.send_str(json.dumps(end_pkt))
|
192
|
+
|
193
|
+
async def input_task():
|
191
194
|
async for data in self._input_ch:
|
192
195
|
if isinstance(data, self._FlushSentinel):
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
end_pkt["context_id"] = current_segment_id
|
198
|
-
end_pkt["transcript"] = self._buf + " "
|
199
|
-
end_pkt["continue"] = False
|
200
|
-
await ws.send_str(json.dumps(end_pkt))
|
201
|
-
|
202
|
-
current_segment_id = None
|
203
|
-
self._buf = ""
|
204
|
-
elif data:
|
205
|
-
if current_segment_id is None:
|
206
|
-
current_segment_id = _new_segment()
|
207
|
-
|
208
|
-
self._buf += data
|
209
|
-
words = self._opts.word_tokenizer.tokenize(text=self._buf)
|
210
|
-
if len(words) < BUFFERED_WORDS_COUNT + 1:
|
211
|
-
continue
|
212
|
-
|
213
|
-
data = self._opts.word_tokenizer.format_words(words[:-1]) + " "
|
214
|
-
self._buf = words[-1]
|
215
|
-
|
216
|
-
token_pkt = base_pkt.copy()
|
217
|
-
token_pkt["context_id"] = current_segment_id
|
218
|
-
token_pkt["transcript"] = data
|
219
|
-
token_pkt["continue"] = True
|
220
|
-
await ws.send_str(json.dumps(token_pkt))
|
221
|
-
|
222
|
-
if len(pending_segments) == 0:
|
223
|
-
await ws.close()
|
196
|
+
self._sent_tokenizer_stream.flush()
|
197
|
+
continue
|
198
|
+
self._sent_tokenizer_stream.push_text(data)
|
199
|
+
self._sent_tokenizer_stream.end_input()
|
224
200
|
|
225
201
|
async def recv_task():
|
226
202
|
audio_bstream = utils.audio.AudioByteStream(
|
@@ -243,6 +219,7 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
243
219
|
|
244
220
|
data = json.loads(msg.data)
|
245
221
|
segment_id = data.get("context_id")
|
222
|
+
# Once we receive audio for a segment, we can start a new segment
|
246
223
|
if data.get("data"):
|
247
224
|
b64data = base64.b64decode(data["data"])
|
248
225
|
for frame in audio_bstream.write(b64data):
|
@@ -263,8 +240,7 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
263
240
|
)
|
264
241
|
)
|
265
242
|
|
266
|
-
|
267
|
-
if len(pending_segments) == 0 and self._input_ch.closed:
|
243
|
+
if segment_id == request_id:
|
268
244
|
# we're not going to receive more frames, close the connection
|
269
245
|
await ws.close()
|
270
246
|
break
|
@@ -272,7 +248,8 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
272
248
|
logger.error("unexpected Cartesia message %s", data)
|
273
249
|
|
274
250
|
tasks = [
|
275
|
-
asyncio.create_task(
|
251
|
+
asyncio.create_task(input_task()),
|
252
|
+
asyncio.create_task(sentence_stream_task()),
|
276
253
|
asyncio.create_task(recv_task()),
|
277
254
|
]
|
278
255
|
|
@@ -282,8 +259,8 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
282
259
|
await utils.aio.gracefully_cancel(*tasks)
|
283
260
|
|
284
261
|
|
285
|
-
def _to_cartesia_options(opts: _TTSOptions) -> dict:
|
286
|
-
voice: dict = {}
|
262
|
+
def _to_cartesia_options(opts: _TTSOptions) -> dict[str, Any]:
|
263
|
+
voice: dict[str, Any] = {}
|
287
264
|
if isinstance(opts.voice, str):
|
288
265
|
voice["mode"] = "id"
|
289
266
|
voice["id"] = opts.voice
|
@@ -0,0 +1,10 @@
|
|
1
|
+
livekit/plugins/cartesia/__init__.py,sha256=BUfWY_evL5dUHn9hBDQVor6ssctDKQfbQfZy5SWndN8,926
|
2
|
+
livekit/plugins/cartesia/log.py,sha256=4Mnhjng_DU1dIWP9IWjIQGZ67EV3LnQhWMWCHVudJbo,71
|
3
|
+
livekit/plugins/cartesia/models.py,sha256=ZoSyV2ap_LqAIgvBvkmukkPxQR9DfKb3Z3oHtWxMiVg,335
|
4
|
+
livekit/plugins/cartesia/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
livekit/plugins/cartesia/tts.py,sha256=sdiiWinOZR5EBkQFwa3GZAGrkgzXY1-aSRiDZ34K8ww,9527
|
6
|
+
livekit/plugins/cartesia/version.py,sha256=yelanl1wEXtgUH0CzoNVXfi2yTc2hElSzuAhULFzANc,600
|
7
|
+
livekit_plugins_cartesia-0.4.0.dist-info/METADATA,sha256=BGgicrqKsylOpTbUcRG0B4DZF2qnaERI9q7qwIRLN7s,1252
|
8
|
+
livekit_plugins_cartesia-0.4.0.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
|
9
|
+
livekit_plugins_cartesia-0.4.0.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
|
10
|
+
livekit_plugins_cartesia-0.4.0.dist-info/RECORD,,
|
@@ -1,10 +0,0 @@
|
|
1
|
-
livekit/plugins/cartesia/__init__.py,sha256=BUfWY_evL5dUHn9hBDQVor6ssctDKQfbQfZy5SWndN8,926
|
2
|
-
livekit/plugins/cartesia/log.py,sha256=4Mnhjng_DU1dIWP9IWjIQGZ67EV3LnQhWMWCHVudJbo,71
|
3
|
-
livekit/plugins/cartesia/models.py,sha256=T1iPQ18h4-o5rgSW236PDc73qp5zR9k4r_qNCl3XPWc,335
|
4
|
-
livekit/plugins/cartesia/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
-
livekit/plugins/cartesia/tts.py,sha256=uklD9fIYL8QWUSiyypFDgflkie9VhTu1C-x4YwJcDCU,10283
|
6
|
-
livekit/plugins/cartesia/version.py,sha256=G5iYozum4q7UpHwW43F7QfhzUfwcncPxBZ0gmUGsd5I,600
|
7
|
-
livekit_plugins_cartesia-0.3.0.dist-info/METADATA,sha256=iJcOyrkQ-0yPK_lYtR-eEbIDav84xlN7DUvwncx7OpQ,1252
|
8
|
-
livekit_plugins_cartesia-0.3.0.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
9
|
-
livekit_plugins_cartesia-0.3.0.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
|
10
|
-
livekit_plugins_cartesia-0.3.0.dist-info/RECORD,,
|
{livekit_plugins_cartesia-0.3.0.dist-info → livekit_plugins_cartesia-0.4.0.dist-info}/top_level.txt
RENAMED
File without changes
|