PyPI - livekit-plugins-cartesia - Versions diffs - 1.0.0.dev5__py3-none-any.whl → 1.0.0rc2__py3-none-any.whl - Mend

livekit-plugins-cartesia 1.0.0.dev5py3-none-any.whl → 1.0.0rc2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

livekit/plugins/cartesia/models.py CHANGED Viewed

@@ -8,7 +8,7 @@ TTSEncoding = Literal[
     # "pcm_alaw",
 ]
-TTSModels = Literal["sonic-english", "sonic-multilingual"]
+TTSModels = Literal["sonic", "sonic-2", "sonic-lite", "sonic-preview", "sonic-turbo"]
 TTSLanguages = Literal["en", "es", "fr", "de", "pt", "zh", "ja"]
 TTSDefaultVoiceId = "794f9389-aac1-45b6-b726-9d9369183238"
 TTSVoiceSpeed = Literal["fastest", "fast", "normal", "slow", "slowest"]

livekit/plugins/cartesia/tts.py CHANGED Viewed

@@ -33,9 +33,21 @@ from livekit.agents import (
     tts,
     utils,
 )
+from livekit.agents.types import (
+    DEFAULT_API_CONNECT_OPTIONS,
+    NOT_GIVEN,
+    NotGivenOr,
+)
+from livekit.agents.utils import is_given
 from .log import logger
-from .models import TTSDefaultVoiceId, TTSEncoding, TTSModels, TTSVoiceEmotion, TTSVoiceSpeed
+from .models import (
+    TTSDefaultVoiceId,
+    TTSEncoding,
+    TTSModels,
+    TTSVoiceEmotion,
+    TTSVoiceSpeed,
+)
 API_AUTH_HEADER = "X-API-Key"
 API_VERSION_HEADER = "Cartesia-Version"
@@ -51,8 +63,8 @@ class _TTSOptions:
     encoding: TTSEncoding
     sample_rate: int
     voice: str | list[float]
-    speed: TTSVoiceSpeed | float | None
-    emotion: list[TTSVoiceEmotion | str] | None
+    speed: NotGivenOr[TTSVoiceSpeed | float]
+    emotion: NotGivenOr[list[TTSVoiceEmotion | str]]
     api_key: str
     language: str
     base_url: str
@@ -68,14 +80,14 @@ class TTS(tts.TTS):
     def __init__(
         self,
         *,
-        model: TTSModels | str = "sonic",
+        model: TTSModels | str = "sonic-2",
         language: str = "en",
         encoding: TTSEncoding = "pcm_s16le",
         voice: str | list[float] = TTSDefaultVoiceId,
-        speed: TTSVoiceSpeed | float | None = None,
-        emotion: list[TTSVoiceEmotion | str] | None = None,
+        speed: NotGivenOr[TTSVoiceSpeed | float] = NOT_GIVEN,
+        emotion: NotGivenOr[list[TTSVoiceEmotion | str]] = NOT_GIVEN,
         sample_rate: int = 24000,
-        api_key: str | None = None,
+        api_key: NotGivenOr[str] = NOT_GIVEN,
         http_session: aiohttp.ClientSession | None = None,
         base_url: str = "https://api.cartesia.ai",
     ) -> None:
@@ -85,7 +97,7 @@ class TTS(tts.TTS):
         See https://docs.cartesia.ai/reference/web-socket/stream-speech/stream-speech for more details on the the Cartesia API.
         Args:
-            model (TTSModels, optional): The Cartesia TTS model to use. Defaults to "sonic-english".
+            model (TTSModels, optional): The Cartesia TTS model to use. Defaults to "sonic-2".
             language (str, optional): The language code for synthesis. Defaults to "en".
             encoding (TTSEncoding, optional): The audio encoding format. Defaults to "pcm_s16le".
             voice (str | list[float], optional): The voice ID or embedding array.
@@ -95,16 +107,15 @@ class TTS(tts.TTS):
             api_key (str, optional): The Cartesia API key. If not provided, it will be read from the CARTESIA_API_KEY environment variable.
             http_session (aiohttp.ClientSession | None, optional): An existing aiohttp ClientSession to use. If not provided, a new session will be created.
             base_url (str, optional): The base URL for the Cartesia API. Defaults to "https://api.cartesia.ai".
-        """
+        """  # noqa: E501
         super().__init__(
             capabilities=tts.TTSCapabilities(streaming=True),
             sample_rate=sample_rate,
             num_channels=NUM_CHANNELS,
         )
-        api_key = api_key or os.environ.get("CARTESIA_API_KEY")
-        if not api_key:
+        cartesia_api_key = api_key if is_given(api_key) else os.environ.get("CARTESIA_API_KEY")
+        if not cartesia_api_key:
             raise ValueError("CARTESIA_API_KEY must be set")
         self._opts = _TTSOptions(
@@ -115,7 +126,7 @@ class TTS(tts.TTS):
             voice=voice,
             speed=speed,
             emotion=emotion,
-            api_key=api_key,
+            api_key=cartesia_api_key,
             base_url=base_url,
         )
         self._session = http_session
@@ -149,11 +160,11 @@ class TTS(tts.TTS):
     def update_options(
         self,
         *,
-        model: TTSModels | None = None,
-        language: str | None = None,
-        voice: str | list[float] | None = None,
-        speed: TTSVoiceSpeed | float | None = None,
-        emotion: list[TTSVoiceEmotion | str] | None = None,
+        model: NotGivenOr[TTSModels | str] = NOT_GIVEN,
+        language: NotGivenOr[str] = NOT_GIVEN,
+        voice: NotGivenOr[str | list[float]] = NOT_GIVEN,
+        speed: NotGivenOr[TTSVoiceSpeed | float] = NOT_GIVEN,
+        emotion: NotGivenOr[list[TTSVoiceEmotion | str]] = NOT_GIVEN,
     ) -> None:
         """
         Update the Text-to-Speech (TTS) configuration options.
@@ -162,24 +173,28 @@ class TTS(tts.TTS):
         and emotion. If any parameter is not provided, the existing value will be retained.
         Args:
-            model (TTSModels, optional): The Cartesia TTS model to use. Defaults to "sonic-english".
+            model (TTSModels, optional): The Cartesia TTS model to use. Defaults to "sonic-2".
             language (str, optional): The language code for synthesis. Defaults to "en".
             voice (str | list[float], optional): The voice ID or embedding array.
             speed (TTSVoiceSpeed | float, optional): Voice Control - Speed (https://docs.cartesia.ai/user-guides/voice-control)
             emotion (list[TTSVoiceEmotion], optional): Voice Control - Emotion (https://docs.cartesia.ai/user-guides/voice-control)
         """
-        self._opts.model = model or self._opts.model
-        self._opts.language = language or self._opts.language
-        self._opts.voice = voice or self._opts.voice
-        self._opts.speed = speed or self._opts.speed
-        if emotion is not None:
+        if is_given(model):
+            self._opts.model = model
+        if is_given(language):
+            self._opts.language = language
+        if is_given(voice):
+            self._opts.voice = voice
+        if is_given(speed):
+            self._opts.speed = speed
+        if is_given(emotion):
             self._opts.emotion = emotion
     def synthesize(
         self,
         text: str,
         *,
-        conn_options: APIConnectOptions | None = None,
+        conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
     ) -> ChunkedStream:
         return ChunkedStream(
             tts=self,
@@ -189,7 +204,9 @@ class TTS(tts.TTS):
             session=self._ensure_session(),
         )
-    def stream(self, *, conn_options: APIConnectOptions | None = None) -> SynthesizeStream:
+    def stream(
+        self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
+    ) -> SynthesizeStream:
         return SynthesizeStream(
             tts=self,
             pool=self._pool,
@@ -214,7 +231,7 @@ class ChunkedStream(tts.ChunkedStream):
         input_text: str,
         opts: _TTSOptions,
         session: aiohttp.ClientSession,
-        conn_options: APIConnectOptions | None = None,
+        conn_options: APIConnectOptions,
     ) -> None:
         super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
         self._opts, self._session = opts, session
@@ -368,17 +385,18 @@ class SynthesizeStream(tts.SynthesizeStream):
 def _to_cartesia_options(opts: _TTSOptions) -> dict[str, Any]:
     voice: dict[str, Any] = {}
-    if isinstance(opts.voice, str):
-        voice["mode"] = "id"
-        voice["id"] = opts.voice
-    else:
-        voice["mode"] = "embedding"
-        voice["embedding"] = opts.voice
+    if is_given(opts.voice):
+        if isinstance(opts.voice, str):
+            voice["mode"] = "id"
+            voice["id"] = opts.voice
+        else:
+            voice["mode"] = "embedding"
+            voice["embedding"] = opts.voice
     voice_controls: dict = {}
-    if opts.speed is not None:
+    if is_given(opts.speed):
         voice_controls["speed"] = opts.speed
-    if opts.emotion is not None:
+    if is_given(opts.emotion):
         voice_controls["emotion"] = opts.emotion
     if voice_controls:

livekit/plugins/cartesia/version.py CHANGED Viewed

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "1.0.0.dev5"
+__version__ = '1.0.0.rc2'

{livekit_plugins_cartesia-1.0.0.dev5.dist-info → livekit_plugins_cartesia-1.0.0rc2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: livekit-plugins-cartesia
-Version: 1.0.0.dev5
+Version: 1.0.0rc2
 Summary: LiveKit Agents Plugin for Cartesia
 Project-URL: Documentation, https://docs.livekit.io
 Project-URL: Website, https://livekit.io/
@@ -18,7 +18,7 @@ Classifier: Topic :: Multimedia :: Sound/Audio
 Classifier: Topic :: Multimedia :: Video
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: >=3.9.0
-Requires-Dist: livekit-agents>=1.0.0.dev5
+Requires-Dist: livekit-agents>=1.0.0.rc2
 Description-Content-Type: text/markdown
 # LiveKit Plugins Cartesia

livekit_plugins_cartesia-1.0.0rc2.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+livekit/plugins/cartesia/__init__.py,sha256=UTa6Q7IxhRBCwPftowHEUDvmBg99J_UjGS_yxTzKD7g,1095
+livekit/plugins/cartesia/log.py,sha256=4Mnhjng_DU1dIWP9IWjIQGZ67EV3LnQhWMWCHVudJbo,71
+livekit/plugins/cartesia/models.py,sha256=KGY-r2luJuUNY6a3nnB0Rx-5Td12hikk-GtYLnqvysE,977
+livekit/plugins/cartesia/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+livekit/plugins/cartesia/tts.py,sha256=rHDiypiCdn0sKykNvYcl5Cf6SZE2XmPRlnLGQA4m7Ks,14443
+livekit/plugins/cartesia/version.py,sha256=AHsNOknrNG9prN-fv_7X-KI5-O8ZMrUnTDyl9ObQIzY,604
+livekit_plugins_cartesia-1.0.0rc2.dist-info/METADATA,sha256=oy_PfwJoIM-pnM94BvVFtb-C7idxqZzXbXHjApy86WA,1262
+livekit_plugins_cartesia-1.0.0rc2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+livekit_plugins_cartesia-1.0.0rc2.dist-info/RECORD,,

livekit_plugins_cartesia-1.0.0.dev5.dist-info/RECORD DELETED Viewed

@@ -1,9 +0,0 @@
-livekit/plugins/cartesia/__init__.py,sha256=UTa6Q7IxhRBCwPftowHEUDvmBg99J_UjGS_yxTzKD7g,1095
-livekit/plugins/cartesia/log.py,sha256=4Mnhjng_DU1dIWP9IWjIQGZ67EV3LnQhWMWCHVudJbo,71
-livekit/plugins/cartesia/models.py,sha256=56CJgo7my-w-vpedir_ImV_aqKASeLihE5DbcCCgGJI,950
-livekit/plugins/cartesia/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-livekit/plugins/cartesia/tts.py,sha256=Zub4MXXVXQgV0t6al_uidDWH3BTVaYftyVbAFbkTU-U,13999
-livekit/plugins/cartesia/version.py,sha256=pXgCpV03nQI-5Kk-74NFyAdw1htj2cx6unwQHipEcfE,605
-livekit_plugins_cartesia-1.0.0.dev5.dist-info/METADATA,sha256=1Pjqf3rcfp0L7L93TlgNocQq96krnWo2WFP4rTkCV90,1265
-livekit_plugins_cartesia-1.0.0.dev5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-livekit_plugins_cartesia-1.0.0.dev5.dist-info/RECORD,,

{livekit_plugins_cartesia-1.0.0.dev5.dist-info → livekit_plugins_cartesia-1.0.0rc2.dist-info}/WHEEL RENAMED Viewed

File without changes

livekit-plugins-cartesia 1.0.0.dev5__py3-none-any.whl → 1.0.0rc2__py3-none-any.whl

livekit-plugins-cartesia 1.0.0.dev5py3-none-any.whl → 1.0.0rc2py3-none-any.whl