PyPI - livekit-plugins-cartesia - Versions diffs - 0.1.1__py3-none-any.whl → 0.2.0.dev1__py3-none-any.whl - Mend

livekit-plugins-cartesia 0.1.1py3-none-any.whl → 0.2.0.dev1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

livekit/plugins/cartesia/models.py CHANGED Viewed

@@ -9,39 +9,6 @@ TTSEncoding = Literal[
 ]
-TTSModels = Literal["upbeat-moon"]
-# fmt: off
-# Barbershop Man in upbeat-moon
-TTSDefaultVoiceEmbedding: list[float] = [
-    -0.033633083, 0.072083704, -0.01807767, -0.083488315, -0.04407617, 0.0022592682, 0.070505895,
-    0.023946615, -0.04788024, -0.06388413, -0.0716355, -0.0022612812, -0.0053448505, -0.07848381,
-    0.0348162, -0.053745482, -0.092399485, -0.02950225, 0.028591828, -0.10556894, 0.023313355,
-    0.06224387, 0.0362463, 0.029258432, 0.10769641, 0.043595582, -0.058543224, -0.080402784,
-    -0.0953816, -0.008988032, -0.0028981369, -0.004752721, -0.20742874, 0.058907595, 0.08813939,
-    -0.06192675, 0.099082634, -0.09661578, -0.0077761724, -0.013982456, -0.025798267, 0.04467142,
-    0.026222011, 0.023023574, 0.011227064, -0.17462021, -0.09880612, -0.1521035, -0.060464993,
-    -0.04735665, -0.09725187, -0.006127679, 0.15818526, -0.039493002, -0.067719474, 0.0066190436,
-    -0.10636633, 0.17073768, -0.051717706, 0.03186961, -0.020547207, -0.02244247, 0.013196935,
-    -0.06431055, -0.115360335, 0.016918058, -0.033195216, 0.11255181, 0.020366343, -0.041032124,
-    0.08780918, -0.040567942, 0.057276532, 0.05848221, -0.077479474, -0.073524915, -0.01913317,
-    -0.029291833, 0.11210393, -0.09859328, 0.2152541, -0.022976823, 0.028627992, -0.039598297,
-    0.041829932, -0.05593181, -0.06444655, -0.018057477, -0.008098263, 0.05994528, 0.10430693,
-    -0.13121894, -0.06512868, -0.026126215, 0.046727825, -0.17180993, -0.10577226, -0.08610466,
-    0.008862588, 0.09547498, -0.010965332, -0.061217085, -0.038954042, 0.019930292, -0.017192135,
-    0.007296275, 0.03273872, 0.04389937, -0.056483064, 0.003420891, -0.10319067, -0.015706042,
-    0.1308774, -0.0018035866, -0.03582506, 0.077131025, 0.013398928, 0.003188886, 0.12039741,
-    -0.033974767, 0.06899378, -0.059775922, -0.026934423, 0.028482193, 0.100996524, 0.004498743,
-    -0.02291186, 0.078752205, -0.0063796206, 0.04206536, 0.05721349, 0.06290694, 0.06130212,
-    0.096969016, -0.057664312, -0.16727506, -0.035220966, 0.090760484, 0.010039947, 0.06513242,
-    0.011055657, -0.004258431, -0.08316792, -0.15650468, -0.076931365, 0.11385587, -0.038372636,
-    0.015648656, -0.12029895, -0.06604956, 0.009441591, -0.11912808, 0.013378132, 0.029525978,
-    -0.0056742397, -0.0075976513, 0.019999338, -0.05521377, -0.07650746, -0.017710293, -0.033986397,
-    -0.047768556, 0.13857274, 0.099290825, 0.11736938, 0.017834296, -0.07140237, -0.052047748,
-    -0.06398965, -0.037033975, -0.061061256, -0.03330076, -0.024472248, -0.059656, 0.05359946,
-    -0.043915518, -0.086325996, 0.14189173, 0.021086395, 0.02945159, 0.1029604, 0.018490415,
-    -0.028736332, -0.025272416, -0.06082937, -0.031339463, -0.0007249595, 0.025595888, 0.007144545,
-    -0.16938712, -0.1160664, -0.0654145,
-]
-# fmt: on
+TTSModels = Literal["sonic-english", "sonic-multilingual"]
+TTSLanguages = Literal["en", "es", "fr", "de", "pt", "zh", "ja"]
+TTSDefaultVoiceId = "248be419-c632-4f23-adf1-5324ed7dbf1d"

livekit/plugins/cartesia/tts.py CHANGED Viewed

@@ -14,18 +14,14 @@
 from __future__ import annotations
-import asyncio
-import contextlib
 import os
 from dataclasses import dataclass
-from typing import Optional
 import aiohttp
-from livekit import rtc
 from livekit.agents import tts, utils
 from .log import logger
-from .models import TTSDefaultVoiceEmbedding, TTSEncoding, TTSModels
+from .models import TTSDefaultVoiceId, TTSEncoding, TTSModels
 API_AUTH_HEADER = "X-API-Key"
 API_VERSION_HEADER = "Cartesia-Version"
@@ -39,21 +35,23 @@ class _TTSOptions:
     sample_rate: int
     voice: str | list[float]
     api_key: str
+    language: str
 class TTS(tts.TTS):
     def __init__(
         self,
         *,
-        model: TTSModels = "upbeat-moon",
+        model: TTSModels = "sonic-english",
+        language: str = "en",
         encoding: TTSEncoding = "pcm_s16le",
-        voice: str | list[float] = TTSDefaultVoiceEmbedding,
+        voice: str | list[float] = TTSDefaultVoiceId,
         sample_rate: int = 24000,
         api_key: str | None = None,
         http_session: aiohttp.ClientSession | None = None,
     ) -> None:
         super().__init__(
-            streaming_supported=False,
+            capabilities=tts.TTSCapabilities(streaming=False),
             sample_rate=sample_rate,
             num_channels=1,
         )
@@ -64,6 +62,7 @@ class TTS(tts.TTS):
         self._opts = _TTSOptions(
             model=model,
+            language=language,
             encoding=encoding,
             sample_rate=sample_rate,
             voice=voice,
@@ -73,14 +72,11 @@ class TTS(tts.TTS):
     def _ensure_session(self) -> aiohttp.ClientSession:
         if not self._session:
-            self._session = utils.http_session()
+            self._session = utils.http_context.http_session()
         return self._session
-    def synthesize(
-        self,
-        text: str,
-    ) -> "ChunkedStream":
+    def synthesize(self, text: str) -> "ChunkedStream":
         return ChunkedStream(text, self._opts, self._ensure_session())
@@ -88,14 +84,17 @@ class ChunkedStream(tts.ChunkedStream):
     def __init__(
         self, text: str, opts: _TTSOptions, session: aiohttp.ClientSession
     ) -> None:
-        self._opts = opts
-        self._text = text
-        self._session = session
-        self._main_task: asyncio.Task | None = None
-        self._queue = asyncio.Queue[Optional[tts.SynthesizedAudio]]()
+        super().__init__()
+        self._text, self._opts, self._session = text, opts, session
     @utils.log_exceptions(logger=logger)
-    async def _run(self):
+    async def _main_task(self):
+        bstream = utils.audio.AudioByteStream(
+            sample_rate=self._opts.sample_rate, num_channels=1
+        )
+        request_id = utils.shortuuid()
+        segment_id = utils.shortuuid()
         voice = {}
         if isinstance(self._opts.voice, str):
             voice["mode"] = "id"
@@ -104,77 +103,37 @@ class ChunkedStream(tts.ChunkedStream):
             voice["mode"] = "embedding"
             voice["embedding"] = self._opts.voice
-        try:
-            async with self._session.post(
-                "https://api.cartesia.ai/tts/bytes",
-                headers={
-                    API_AUTH_HEADER: f"{self._opts.api_key}",
-                    API_VERSION_HEADER: API_VERSION,
-                },
-                json={
-                    "model_id": self._opts.model,
-                    "transcript": self._text,
-                    "voice": voice,
-                    "output_format": {
-                        "container": "raw",
-                        "encoding": self._opts.encoding,
-                        "sample_rate": self._opts.sample_rate,
-                    },
-                },
-            ) as resp:
-                bytes_per_frame = (self._opts.sample_rate // 100) * 2
-                buf = bytearray()
-                async for data, _ in resp.content.iter_chunks():
-                    buf.extend(data)
-                    while len(buf) >= bytes_per_frame:
-                        frame_data = buf[:bytes_per_frame]
-                        buf = buf[bytes_per_frame:]
-                        self._queue.put_nowait(
-                            tts.SynthesizedAudio(
-                                text=self._text,
-                                data=rtc.AudioFrame(
-                                    data=frame_data,
-                                    sample_rate=self._opts.sample_rate,
-                                    num_channels=1,
-                                    samples_per_channel=len(frame_data) // 2,
-                                ),
-                            )
-                        )
-                # send any remaining data
-                if len(buf) > 0:
-                    self._queue.put_nowait(
+        data = {
+            "model_id": self._opts.model,
+            "transcript": self._text,
+            "voice": voice,
+            "output_format": {
+                "container": "raw",
+                "encoding": self._opts.encoding,
+                "sample_rate": self._opts.sample_rate,
+            },
+            "language": self._opts.language,
+        }
+        async with self._session.post(
+            "https://api.cartesia.ai/tts/bytes",
+            headers={
+                API_AUTH_HEADER: f"{self._opts.api_key}",
+                API_VERSION_HEADER: API_VERSION,
+            },
+            json=data,
+        ) as resp:
+            async for data, _ in resp.content.iter_chunks():
+                for frame in bstream.write(data):
+                    self._event_ch.send_nowait(
                         tts.SynthesizedAudio(
-                            text=self._text,
-                            data=rtc.AudioFrame(
-                                data=buf,
-                                sample_rate=self._opts.sample_rate,
-                                num_channels=1,
-                                samples_per_channel=len(buf) // 2,
-                            ),
+                            request_id=request_id, segment_id=segment_id, frame=frame
                         )
                     )
-        finally:
-            self._queue.put_nowait(None)
-    async def __anext__(self) -> tts.SynthesizedAudio:
-        if not self._main_task:
-            self._main_task = asyncio.create_task(self._run())
-        frame = await self._queue.get()
-        if frame is None:
-            raise StopAsyncIteration
-        return frame
-    async def aclose(self) -> None:
-        if not self._main_task:
-            return
-        self._main_task.cancel()
-        with contextlib.suppress(asyncio.CancelledError):
-            await self._main_task
+            for frame in bstream.flush():
+                self._event_ch.send_nowait(
+                    tts.SynthesizedAudio(
+                        request_id=request_id, segment_id=segment_id, frame=frame
+                    )
+                )

livekit/plugins/cartesia/version.py CHANGED Viewed

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "0.1.1"
+__version__ = "0.2.0-dev.1"

{livekit_plugins_cartesia-0.1.1.dist-info → livekit_plugins_cartesia-0.2.0.dev1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: livekit-plugins-cartesia
-Version: 0.1.1
+Version: 0.2.0.dev1
 Summary: LiveKit Agents Plugin for Cartesia
 Home-page: https://github.com/livekit/agents
 License: Apache-2.0

livekit_plugins_cartesia-0.2.0.dev1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,10 @@
+livekit/plugins/cartesia/__init__.py,sha256=_a8u7qqya1pjZTV19gNOpMKTO7ccAVZAeCukiDKAG-U,937
+livekit/plugins/cartesia/log.py,sha256=4Mnhjng_DU1dIWP9IWjIQGZ67EV3LnQhWMWCHVudJbo,71
+livekit/plugins/cartesia/models.py,sha256=06S-Z-M90kB-kEOQsQk70xfQUD-TztU4ZIU_AfAyUMc,335
+livekit/plugins/cartesia/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+livekit/plugins/cartesia/tts.py,sha256=S5BMSVtsbNI_c2PpgyFK6wvleudmJZLTUt3ZmGNKlRI,4319
+livekit/plugins/cartesia/version.py,sha256=ypu6ttoYyC198vzZ_HCF0aB8kPNeygXXxDGxbrCf9s4,606
+livekit_plugins_cartesia-0.2.0.dev1.dist-info/METADATA,sha256=fCqrA_MFJSMweSuHmtD29iZoSVYzFD1TROXmay8AWcE,1250
+livekit_plugins_cartesia-0.2.0.dev1.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
+livekit_plugins_cartesia-0.2.0.dev1.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
+livekit_plugins_cartesia-0.2.0.dev1.dist-info/RECORD,,

{livekit_plugins_cartesia-0.1.1.dist-info → livekit_plugins_cartesia-0.2.0.dev1.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: bdist_wheel (0.43.0)
+Generator: setuptools (71.1.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

livekit_plugins_cartesia-0.1.1.dist-info/RECORD DELETED Viewed

@@ -1,10 +0,0 @@
-livekit/plugins/cartesia/__init__.py,sha256=_a8u7qqya1pjZTV19gNOpMKTO7ccAVZAeCukiDKAG-U,937
-livekit/plugins/cartesia/log.py,sha256=4Mnhjng_DU1dIWP9IWjIQGZ67EV3LnQhWMWCHVudJbo,71
-livekit/plugins/cartesia/models.py,sha256=Qhl51ZScuB61bEzN1tBlHMuHO_kCXSzuVOicYa16EL8,2922
-livekit/plugins/cartesia/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-livekit/plugins/cartesia/tts.py,sha256=16BneZFQQsS-lB9Ug1HYj4QW7-VnNdpTJ0CW5A1b9EU,5725
-livekit/plugins/cartesia/version.py,sha256=3-nEcobvIJfZdV4yNIRuYpAGQ3svREnYIv2ivxoIZcQ,600
-livekit_plugins_cartesia-0.1.1.dist-info/METADATA,sha256=MfqyeBD4BF8NE4A8O9hIboC0WMmQ5EKo8RPzkGc8-a8,1245
-livekit_plugins_cartesia-0.1.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-livekit_plugins_cartesia-0.1.1.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
-livekit_plugins_cartesia-0.1.1.dist-info/RECORD,,

{livekit_plugins_cartesia-0.1.1.dist-info → livekit_plugins_cartesia-0.2.0.dev1.dist-info}/top_level.txt RENAMED Viewed

File without changes

livekit-plugins-cartesia 0.1.1__py3-none-any.whl → 0.2.0.dev1__py3-none-any.whl

livekit-plugins-cartesia 0.1.1py3-none-any.whl → 0.2.0.dev1py3-none-any.whl