livekit-plugins-cartesia 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,4 +11,4 @@ TTSEncoding = Literal[
11
11
 
12
12
  TTSModels = Literal["sonic-english", "sonic-multilingual"]
13
13
  TTSLanguages = Literal["en", "es", "fr", "de", "pt", "zh", "ja"]
14
- TTSDefaultVoiceId = "b7d50908-b17c-442d-ad8d-810c63997ed9"
14
+ TTSDefaultVoiceId = "c2ac25f9-ecc4-4f56-9095-651354df60c0"
@@ -19,6 +19,7 @@ import base64
19
19
  import json
20
20
  import os
21
21
  from dataclasses import dataclass
22
+ from typing import Any
22
23
 
23
24
  import aiohttp
24
25
  from livekit.agents import tokenize, tts, utils
@@ -42,7 +43,6 @@ class _TTSOptions:
42
43
  voice: str | list[float]
43
44
  api_key: str
44
45
  language: str
45
- word_tokenizer: tokenize.WordTokenizer
46
46
 
47
47
 
48
48
  class TTS(tts.TTS):
@@ -56,9 +56,6 @@ class TTS(tts.TTS):
56
56
  sample_rate: int = 24000,
57
57
  api_key: str | None = None,
58
58
  http_session: aiohttp.ClientSession | None = None,
59
- word_tokenizer: tokenize.WordTokenizer = tokenize.basic.WordTokenizer(
60
- ignore_punctuation=False
61
- ),
62
59
  ) -> None:
63
60
  super().__init__(
64
61
  capabilities=tts.TTSCapabilities(streaming=True),
@@ -77,7 +74,6 @@ class TTS(tts.TTS):
77
74
  sample_rate=sample_rate,
78
75
  voice=voice,
79
76
  api_key=api_key,
80
- word_tokenizer=word_tokenizer,
81
77
  )
82
78
  self._session = http_session
83
79
 
@@ -145,7 +141,9 @@ class SynthesizeStream(tts.SynthesizeStream):
145
141
  ):
146
142
  super().__init__()
147
143
  self._opts, self._session = opts, session
148
- self._buf = ""
144
+ self._sent_tokenizer_stream = tokenize.basic.SentenceTokenizer(
145
+ min_sentence_len=BUFFERED_WORDS_COUNT
146
+ ).stream()
149
147
 
150
148
  @utils.log_exceptions(logger=logger)
151
149
  async def _main_task(self) -> None:
@@ -176,51 +174,29 @@ class SynthesizeStream(tts.SynthesizeStream):
176
174
 
177
175
  async def _run_ws(self, ws: aiohttp.ClientWebSocketResponse) -> None:
178
176
  request_id = utils.shortuuid()
179
- pending_segments = []
180
177
 
181
- async def send_task():
178
+ async def sentence_stream_task():
182
179
  base_pkt = _to_cartesia_options(self._opts)
183
-
184
- def _new_segment():
185
- segment_id = utils.shortuuid()
186
- pending_segments.append(segment_id)
187
- return segment_id
188
-
189
- current_segment_id: str | None = _new_segment()
190
-
180
+ async for ev in self._sent_tokenizer_stream:
181
+ token_pkt = base_pkt.copy()
182
+ token_pkt["context_id"] = request_id
183
+ token_pkt["transcript"] = ev.token + " "
184
+ token_pkt["continue"] = True
185
+ await ws.send_str(json.dumps(token_pkt))
186
+
187
+ end_pkt = base_pkt.copy()
188
+ end_pkt["context_id"] = request_id
189
+ end_pkt["transcript"] = " "
190
+ end_pkt["continue"] = False
191
+ await ws.send_str(json.dumps(end_pkt))
192
+
193
+ async def input_task():
191
194
  async for data in self._input_ch:
192
195
  if isinstance(data, self._FlushSentinel):
193
- if current_segment_id is None:
194
- continue
195
-
196
- end_pkt = base_pkt.copy()
197
- end_pkt["context_id"] = current_segment_id
198
- end_pkt["transcript"] = self._buf + " "
199
- end_pkt["continue"] = False
200
- await ws.send_str(json.dumps(end_pkt))
201
-
202
- current_segment_id = None
203
- self._buf = ""
204
- elif data:
205
- if current_segment_id is None:
206
- current_segment_id = _new_segment()
207
-
208
- self._buf += data
209
- words = self._opts.word_tokenizer.tokenize(text=self._buf)
210
- if len(words) < BUFFERED_WORDS_COUNT + 1:
211
- continue
212
-
213
- data = self._opts.word_tokenizer.format_words(words[:-1]) + " "
214
- self._buf = words[-1]
215
-
216
- token_pkt = base_pkt.copy()
217
- token_pkt["context_id"] = current_segment_id
218
- token_pkt["transcript"] = data
219
- token_pkt["continue"] = True
220
- await ws.send_str(json.dumps(token_pkt))
221
-
222
- if len(pending_segments) == 0:
223
- await ws.close()
196
+ self._sent_tokenizer_stream.flush()
197
+ continue
198
+ self._sent_tokenizer_stream.push_text(data)
199
+ self._sent_tokenizer_stream.end_input()
224
200
 
225
201
  async def recv_task():
226
202
  audio_bstream = utils.audio.AudioByteStream(
@@ -243,6 +219,7 @@ class SynthesizeStream(tts.SynthesizeStream):
243
219
 
244
220
  data = json.loads(msg.data)
245
221
  segment_id = data.get("context_id")
222
+ # Once we receive audio for a segment, we can start a new segment
246
223
  if data.get("data"):
247
224
  b64data = base64.b64decode(data["data"])
248
225
  for frame in audio_bstream.write(b64data):
@@ -263,8 +240,7 @@ class SynthesizeStream(tts.SynthesizeStream):
263
240
  )
264
241
  )
265
242
 
266
- pending_segments.remove(segment_id)
267
- if len(pending_segments) == 0 and self._input_ch.closed:
243
+ if segment_id == request_id:
268
244
  # we're not going to receive more frames, close the connection
269
245
  await ws.close()
270
246
  break
@@ -272,7 +248,8 @@ class SynthesizeStream(tts.SynthesizeStream):
272
248
  logger.error("unexpected Cartesia message %s", data)
273
249
 
274
250
  tasks = [
275
- asyncio.create_task(send_task()),
251
+ asyncio.create_task(input_task()),
252
+ asyncio.create_task(sentence_stream_task()),
276
253
  asyncio.create_task(recv_task()),
277
254
  ]
278
255
 
@@ -282,8 +259,8 @@ class SynthesizeStream(tts.SynthesizeStream):
282
259
  await utils.aio.gracefully_cancel(*tasks)
283
260
 
284
261
 
285
- def _to_cartesia_options(opts: _TTSOptions) -> dict:
286
- voice: dict = {}
262
+ def _to_cartesia_options(opts: _TTSOptions) -> dict[str, Any]:
263
+ voice: dict[str, Any] = {}
287
264
  if isinstance(opts.voice, str):
288
265
  voice["mode"] = "id"
289
266
  voice["id"] = opts.voice
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "0.3.0"
15
+ __version__ = "0.4.0"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: livekit-plugins-cartesia
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: LiveKit Agents Plugin for Cartesia
5
5
  Home-page: https://github.com/livekit/agents
6
6
  License: Apache-2.0
@@ -0,0 +1,10 @@
1
+ livekit/plugins/cartesia/__init__.py,sha256=BUfWY_evL5dUHn9hBDQVor6ssctDKQfbQfZy5SWndN8,926
2
+ livekit/plugins/cartesia/log.py,sha256=4Mnhjng_DU1dIWP9IWjIQGZ67EV3LnQhWMWCHVudJbo,71
3
+ livekit/plugins/cartesia/models.py,sha256=ZoSyV2ap_LqAIgvBvkmukkPxQR9DfKb3Z3oHtWxMiVg,335
4
+ livekit/plugins/cartesia/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ livekit/plugins/cartesia/tts.py,sha256=sdiiWinOZR5EBkQFwa3GZAGrkgzXY1-aSRiDZ34K8ww,9527
6
+ livekit/plugins/cartesia/version.py,sha256=yelanl1wEXtgUH0CzoNVXfi2yTc2hElSzuAhULFzANc,600
7
+ livekit_plugins_cartesia-0.4.0.dist-info/METADATA,sha256=BGgicrqKsylOpTbUcRG0B4DZF2qnaERI9q7qwIRLN7s,1252
8
+ livekit_plugins_cartesia-0.4.0.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
9
+ livekit_plugins_cartesia-0.4.0.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
10
+ livekit_plugins_cartesia-0.4.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (72.1.0)
2
+ Generator: setuptools (72.2.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,10 +0,0 @@
1
- livekit/plugins/cartesia/__init__.py,sha256=BUfWY_evL5dUHn9hBDQVor6ssctDKQfbQfZy5SWndN8,926
2
- livekit/plugins/cartesia/log.py,sha256=4Mnhjng_DU1dIWP9IWjIQGZ67EV3LnQhWMWCHVudJbo,71
3
- livekit/plugins/cartesia/models.py,sha256=T1iPQ18h4-o5rgSW236PDc73qp5zR9k4r_qNCl3XPWc,335
4
- livekit/plugins/cartesia/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- livekit/plugins/cartesia/tts.py,sha256=uklD9fIYL8QWUSiyypFDgflkie9VhTu1C-x4YwJcDCU,10283
6
- livekit/plugins/cartesia/version.py,sha256=G5iYozum4q7UpHwW43F7QfhzUfwcncPxBZ0gmUGsd5I,600
7
- livekit_plugins_cartesia-0.3.0.dist-info/METADATA,sha256=iJcOyrkQ-0yPK_lYtR-eEbIDav84xlN7DUvwncx7OpQ,1252
8
- livekit_plugins_cartesia-0.3.0.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
9
- livekit_plugins_cartesia-0.3.0.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
10
- livekit_plugins_cartesia-0.3.0.dist-info/RECORD,,