PyPI - livekit-plugins-cartesia - Versions diffs - 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

livekit-plugins-cartesia 0.3.0py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

livekit/plugins/cartesia/models.py CHANGED Viewed

@@ -11,4 +11,4 @@ TTSEncoding = Literal[
 TTSModels = Literal["sonic-english", "sonic-multilingual"]
 TTSLanguages = Literal["en", "es", "fr", "de", "pt", "zh", "ja"]
-TTSDefaultVoiceId = "b7d50908-b17c-442d-ad8d-810c63997ed9"
+TTSDefaultVoiceId = "c2ac25f9-ecc4-4f56-9095-651354df60c0"

livekit/plugins/cartesia/tts.py CHANGED Viewed

@@ -19,6 +19,7 @@ import base64
 import json
 import os
 from dataclasses import dataclass
+from typing import Any
 import aiohttp
 from livekit.agents import tokenize, tts, utils
@@ -42,7 +43,6 @@ class _TTSOptions:
     voice: str | list[float]
     api_key: str
     language: str
-    word_tokenizer: tokenize.WordTokenizer
 class TTS(tts.TTS):
@@ -56,9 +56,6 @@ class TTS(tts.TTS):
         sample_rate: int = 24000,
         api_key: str | None = None,
         http_session: aiohttp.ClientSession | None = None,
-        word_tokenizer: tokenize.WordTokenizer = tokenize.basic.WordTokenizer(
-            ignore_punctuation=False
-        ),
     ) -> None:
         super().__init__(
             capabilities=tts.TTSCapabilities(streaming=True),
@@ -77,7 +74,6 @@ class TTS(tts.TTS):
             sample_rate=sample_rate,
             voice=voice,
             api_key=api_key,
-            word_tokenizer=word_tokenizer,
         )
         self._session = http_session
@@ -145,7 +141,9 @@ class SynthesizeStream(tts.SynthesizeStream):
     ):
         super().__init__()
         self._opts, self._session = opts, session
-        self._buf = ""
+        self._sent_tokenizer_stream = tokenize.basic.SentenceTokenizer(
+            min_sentence_len=BUFFERED_WORDS_COUNT
+        ).stream()
     @utils.log_exceptions(logger=logger)
     async def _main_task(self) -> None:
@@ -176,51 +174,29 @@ class SynthesizeStream(tts.SynthesizeStream):
     async def _run_ws(self, ws: aiohttp.ClientWebSocketResponse) -> None:
         request_id = utils.shortuuid()
-        pending_segments = []
-        async def send_task():
+        async def sentence_stream_task():
             base_pkt = _to_cartesia_options(self._opts)
-            def _new_segment():
-                segment_id = utils.shortuuid()
-                pending_segments.append(segment_id)
-                return segment_id
-            current_segment_id: str | None = _new_segment()
+            async for ev in self._sent_tokenizer_stream:
+                token_pkt = base_pkt.copy()
+                token_pkt["context_id"] = request_id
+                token_pkt["transcript"] = ev.token + " "
+                token_pkt["continue"] = True
+                await ws.send_str(json.dumps(token_pkt))
+            end_pkt = base_pkt.copy()
+            end_pkt["context_id"] = request_id
+            end_pkt["transcript"] = " "
+            end_pkt["continue"] = False
+            await ws.send_str(json.dumps(end_pkt))
+        async def input_task():
             async for data in self._input_ch:
                 if isinstance(data, self._FlushSentinel):
-                    if current_segment_id is None:
-                        continue
-                    end_pkt = base_pkt.copy()
-                    end_pkt["context_id"] = current_segment_id
-                    end_pkt["transcript"] = self._buf + " "
-                    end_pkt["continue"] = False
-                    await ws.send_str(json.dumps(end_pkt))
-                    current_segment_id = None
-                    self._buf = ""
-                elif data:
-                    if current_segment_id is None:
-                        current_segment_id = _new_segment()
-                    self._buf += data
-                    words = self._opts.word_tokenizer.tokenize(text=self._buf)
-                    if len(words) < BUFFERED_WORDS_COUNT + 1:
-                        continue
-                    data = self._opts.word_tokenizer.format_words(words[:-1]) + " "
-                    self._buf = words[-1]
-                    token_pkt = base_pkt.copy()
-                    token_pkt["context_id"] = current_segment_id
-                    token_pkt["transcript"] = data
-                    token_pkt["continue"] = True
-                    await ws.send_str(json.dumps(token_pkt))
-            if len(pending_segments) == 0:
-                await ws.close()
+                    self._sent_tokenizer_stream.flush()
+                    continue
+                self._sent_tokenizer_stream.push_text(data)
+            self._sent_tokenizer_stream.end_input()
         async def recv_task():
             audio_bstream = utils.audio.AudioByteStream(
@@ -243,6 +219,7 @@ class SynthesizeStream(tts.SynthesizeStream):
                 data = json.loads(msg.data)
                 segment_id = data.get("context_id")
+                # Once we receive audio for a segment, we can start a new segment
                 if data.get("data"):
                     b64data = base64.b64decode(data["data"])
                     for frame in audio_bstream.write(b64data):
@@ -263,8 +240,7 @@ class SynthesizeStream(tts.SynthesizeStream):
                             )
                         )
-                    pending_segments.remove(segment_id)
-                    if len(pending_segments) == 0 and self._input_ch.closed:
+                    if segment_id == request_id:
                         # we're not going to receive more frames, close the connection
                         await ws.close()
                         break
@@ -272,7 +248,8 @@ class SynthesizeStream(tts.SynthesizeStream):
                     logger.error("unexpected Cartesia message %s", data)
         tasks = [
-            asyncio.create_task(send_task()),
+            asyncio.create_task(input_task()),
+            asyncio.create_task(sentence_stream_task()),
             asyncio.create_task(recv_task()),
         ]
@@ -282,8 +259,8 @@ class SynthesizeStream(tts.SynthesizeStream):
             await utils.aio.gracefully_cancel(*tasks)
-def _to_cartesia_options(opts: _TTSOptions) -> dict:
-    voice: dict = {}
+def _to_cartesia_options(opts: _TTSOptions) -> dict[str, Any]:
+    voice: dict[str, Any] = {}
     if isinstance(opts.voice, str):
         voice["mode"] = "id"
         voice["id"] = opts.voice

livekit/plugins/cartesia/version.py CHANGED Viewed

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "0.3.0"
+__version__ = "0.4.0"

{livekit_plugins_cartesia-0.3.0.dist-info → livekit_plugins_cartesia-0.4.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: livekit-plugins-cartesia
-Version: 0.3.0
+Version: 0.4.0
 Summary: LiveKit Agents Plugin for Cartesia
 Home-page: https://github.com/livekit/agents
 License: Apache-2.0

livekit_plugins_cartesia-0.4.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,10 @@
+livekit/plugins/cartesia/__init__.py,sha256=BUfWY_evL5dUHn9hBDQVor6ssctDKQfbQfZy5SWndN8,926
+livekit/plugins/cartesia/log.py,sha256=4Mnhjng_DU1dIWP9IWjIQGZ67EV3LnQhWMWCHVudJbo,71
+livekit/plugins/cartesia/models.py,sha256=ZoSyV2ap_LqAIgvBvkmukkPxQR9DfKb3Z3oHtWxMiVg,335
+livekit/plugins/cartesia/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+livekit/plugins/cartesia/tts.py,sha256=sdiiWinOZR5EBkQFwa3GZAGrkgzXY1-aSRiDZ34K8ww,9527
+livekit/plugins/cartesia/version.py,sha256=yelanl1wEXtgUH0CzoNVXfi2yTc2hElSzuAhULFzANc,600
+livekit_plugins_cartesia-0.4.0.dist-info/METADATA,sha256=BGgicrqKsylOpTbUcRG0B4DZF2qnaERI9q7qwIRLN7s,1252
+livekit_plugins_cartesia-0.4.0.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
+livekit_plugins_cartesia-0.4.0.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
+livekit_plugins_cartesia-0.4.0.dist-info/RECORD,,

{livekit_plugins_cartesia-0.3.0.dist-info → livekit_plugins_cartesia-0.4.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (72.1.0)
+Generator: setuptools (72.2.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

livekit_plugins_cartesia-0.3.0.dist-info/RECORD DELETED Viewed

@@ -1,10 +0,0 @@
-livekit/plugins/cartesia/__init__.py,sha256=BUfWY_evL5dUHn9hBDQVor6ssctDKQfbQfZy5SWndN8,926
-livekit/plugins/cartesia/log.py,sha256=4Mnhjng_DU1dIWP9IWjIQGZ67EV3LnQhWMWCHVudJbo,71
-livekit/plugins/cartesia/models.py,sha256=T1iPQ18h4-o5rgSW236PDc73qp5zR9k4r_qNCl3XPWc,335
-livekit/plugins/cartesia/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-livekit/plugins/cartesia/tts.py,sha256=uklD9fIYL8QWUSiyypFDgflkie9VhTu1C-x4YwJcDCU,10283
-livekit/plugins/cartesia/version.py,sha256=G5iYozum4q7UpHwW43F7QfhzUfwcncPxBZ0gmUGsd5I,600
-livekit_plugins_cartesia-0.3.0.dist-info/METADATA,sha256=iJcOyrkQ-0yPK_lYtR-eEbIDav84xlN7DUvwncx7OpQ,1252
-livekit_plugins_cartesia-0.3.0.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
-livekit_plugins_cartesia-0.3.0.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
-livekit_plugins_cartesia-0.3.0.dist-info/RECORD,,

{livekit_plugins_cartesia-0.3.0.dist-info → livekit_plugins_cartesia-0.4.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

livekit-plugins-cartesia 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

livekit-plugins-cartesia 0.3.0py3-none-any.whl → 0.4.0py3-none-any.whl