PyPI - livekit-plugins-elevenlabs - Versions diffs - 0.7.5__py3-none-any.whl → 0.7.7__py3-none-any.whl - Mend

livekit-plugins-elevenlabs 0.7.5py3-none-any.whl → 0.7.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

livekit/plugins/elevenlabs/__init__.py CHANGED Viewed

@@ -37,3 +37,12 @@ class ElevenLabsPlugin(Plugin):
 Plugin.register_plugin(ElevenLabsPlugin())
+# Cleanup docs of unexported modules
+_module = dir()
+NOT_IN_ALL = [m for m in _module if m not in __all__]
+__pdoc__ = {}
+for n in NOT_IN_ALL:
+    __pdoc__[n] = False

livekit/plugins/elevenlabs/tts.py CHANGED Viewed

@@ -24,7 +24,14 @@ from typing import Any, List, Literal
 import aiohttp
 from livekit import rtc
-from livekit.agents import tokenize, tts, utils
+from livekit.agents import (
+    APIConnectionError,
+    APIStatusError,
+    APITimeoutError,
+    tokenize,
+    tts,
+    utils,
+)
 from .log import logger
 from .models import TTSEncoding, TTSModels
@@ -79,7 +86,8 @@ AUTHORIZATION_HEADER = "xi-api-key"
 class _TTSOptions:
     api_key: str
     voice: Voice
-    model_id: TTSModels
+    model: TTSModels | str
+    language: str | None
     base_url: str
     encoding: TTSEncoding
     sample_rate: int
@@ -94,7 +102,7 @@ class TTS(tts.TTS):
         self,
         *,
         voice: Voice = DEFAULT_VOICE,
-        model_id: TTSModels = "eleven_turbo_v2_5",
+        model: TTSModels | str = "eleven_turbo_v2_5",
         api_key: str | None = None,
         base_url: str | None = None,
         encoding: TTSEncoding = "mp3_22050_32",
@@ -105,12 +113,25 @@ class TTS(tts.TTS):
         enable_ssml_parsing: bool = False,
         chunk_length_schedule: list[int] = [80, 120, 200, 260],  # range is [50, 500]
         http_session: aiohttp.ClientSession | None = None,
+        # deprecated
+        model_id: TTSModels | str | None = None,
+        language: str | None = None,
     ) -> None:
         """
         Create a new instance of ElevenLabs TTS.
-        ``api_key`` must be set to your ElevenLabs API key, either using the argument or by setting
-        the ``ELEVEN_API_KEY`` environmental variable.
+        Args:
+            voice (Voice): Voice configuration. Defaults to `DEFAULT_VOICE`.
+            model (TTSModels | str): TTS model to use. Defaults to "eleven_turbo_v2_5".
+            api_key (str | None): ElevenLabs API key. Can be set via argument or `ELEVEN_API_KEY` environment variable.
+            base_url (str | None): Custom base URL for the API. Optional.
+            encoding (TTSEncoding): Audio encoding format. Defaults to "mp3_22050_32".
+            streaming_latency (int): Latency in seconds for streaming. Defaults to 3.
+            word_tokenizer (tokenize.WordTokenizer): Tokenizer for processing text. Defaults to basic WordTokenizer.
+            enable_ssml_parsing (bool): Enable SSML parsing for input text. Defaults to False.
+            chunk_length_schedule (list[int]): Schedule for chunk lengths, ranging from 50 to 500. Defaults to [80, 120, 200, 260].
+            http_session (aiohttp.ClientSession | None): Custom HTTP session for API requests. Optional.
+            language (str | None): Language code for the TTS model, as of 10/24/24 only valid for "eleven_turbo_v2_5". Optional.
         """
         super().__init__(
@@ -120,13 +141,22 @@ class TTS(tts.TTS):
             sample_rate=_sample_rate_from_format(encoding),
             num_channels=1,
         )
+        if model_id is not None:
+            logger.warning(
+                "model_id is deprecated and will be removed in 1.5.0, use model instead",
+            )
+            model = model_id
         api_key = api_key or os.environ.get("ELEVEN_API_KEY")
         if not api_key:
-            raise ValueError("ELEVEN_API_KEY must be set")
+            raise ValueError(
+                "ElevenLabs API key is required, either as argument or set ELEVEN_API_KEY environmental variable"
+            )
         self._opts = _TTSOptions(
             voice=voice,
-            model_id=model_id,
+            model=model,
             api_key=api_key,
             base_url=base_url or API_BASE_URL_V1,
             encoding=encoding,
@@ -135,6 +165,7 @@ class TTS(tts.TTS):
             word_tokenizer=word_tokenizer,
             chunk_length_schedule=chunk_length_schedule,
             enable_ssml_parsing=enable_ssml_parsing,
+            language=language,
         )
         self._session = http_session
@@ -151,31 +182,43 @@ class TTS(tts.TTS):
         ) as resp:
             return _dict_to_voices_list(await resp.json())
+    def update_options(
+        self,
+        *,
+        voice: Voice = DEFAULT_VOICE,
+        model: TTSModels | str = "eleven_turbo_v2_5",
+    ) -> None:
+        """
+        Args:
+            voice (Voice): Voice configuration. Defaults to `DEFAULT_VOICE`.
+            model (TTSModels | str): TTS model to use. Defaults to "eleven_turbo_v2_5".
+        """
+        self._opts.model = model or self._opts.model
+        self._opts.voice = voice or self._opts.voice
     def synthesize(self, text: str) -> "ChunkedStream":
-        return ChunkedStream(text, self._opts, self._ensure_session())
+        return ChunkedStream(self, text, self._opts, self._ensure_session())
     def stream(self) -> "SynthesizeStream":
-        return SynthesizeStream(self._ensure_session(), self._opts)
+        return SynthesizeStream(self, self._ensure_session(), self._opts)
 class ChunkedStream(tts.ChunkedStream):
     """Synthesize using the chunked api endpoint"""
     def __init__(
-        self, text: str, opts: _TTSOptions, session: aiohttp.ClientSession
+        self, tts: TTS, text: str, opts: _TTSOptions, session: aiohttp.ClientSession
     ) -> None:
-        super().__init__()
-        self._text, self._opts, self._session = text, opts, session
+        super().__init__(tts, text)
+        self._opts, self._session = opts, session
         if _encoding_from_format(self._opts.encoding) == "mp3":
             self._mp3_decoder = utils.codecs.Mp3StreamDecoder()
-    @utils.log_exceptions(logger=logger)
     async def _main_task(self) -> None:
+        request_id = utils.shortuuid()
         bstream = utils.audio.AudioByteStream(
             sample_rate=self._opts.sample_rate, num_channels=1
         )
-        request_id = utils.shortuuid()
-        segment_id = utils.shortuuid()
         voice_settings = (
             _strip_nones(dataclasses.asdict(self._opts.voice.settings))
@@ -183,50 +226,59 @@ class ChunkedStream(tts.ChunkedStream):
             else None
         )
         data = {
-            "text": self._text,
-            "model_id": self._opts.model_id,
+            "text": self._input_text,
+            "model_id": self._opts.model,
             "voice_settings": voice_settings,
         }
-        async with self._session.post(
-            _synthesize_url(self._opts),
-            headers={AUTHORIZATION_HEADER: self._opts.api_key},
-            json=data,
-        ) as resp:
-            if not resp.content_type.startswith("audio/"):
-                content = await resp.text()
-                logger.error("11labs returned non-audio data: %s", content)
-                return
-            encoding = _encoding_from_format(self._opts.encoding)
-            if encoding == "mp3":
-                async for bytes_data, _ in resp.content.iter_chunks():
-                    for frame in self._mp3_decoder.decode_chunk(bytes_data):
-                        for frame in bstream.write(frame.data.tobytes()):
+        try:
+            async with self._session.post(
+                _synthesize_url(self._opts),
+                headers={AUTHORIZATION_HEADER: self._opts.api_key},
+                json=data,
+            ) as resp:
+                if not resp.content_type.startswith("audio/"):
+                    content = await resp.text()
+                    logger.error("11labs returned non-audio data: %s", content)
+                    return
+                encoding = _encoding_from_format(self._opts.encoding)
+                if encoding == "mp3":
+                    async for bytes_data, _ in resp.content.iter_chunks():
+                        for frame in self._mp3_decoder.decode_chunk(bytes_data):
+                            for frame in bstream.write(frame.data.tobytes()):
+                                self._event_ch.send_nowait(
+                                    tts.SynthesizedAudio(
+                                        request_id=request_id,
+                                        frame=frame,
+                                    )
+                                )
+                else:
+                    async for bytes_data, _ in resp.content.iter_chunks():
+                        for frame in bstream.write(bytes_data):
                             self._event_ch.send_nowait(
                                 tts.SynthesizedAudio(
                                     request_id=request_id,
-                                    segment_id=segment_id,
                                     frame=frame,
                                 )
                             )
-            else:
-                async for bytes_data, _ in resp.content.iter_chunks():
-                    for frame in bstream.write(bytes_data):
-                        self._event_ch.send_nowait(
-                            tts.SynthesizedAudio(
-                                request_id=request_id,
-                                segment_id=segment_id,
-                                frame=frame,
-                            )
-                        )
-            for frame in bstream.flush():
-                self._event_ch.send_nowait(
-                    tts.SynthesizedAudio(
-                        request_id=request_id, segment_id=segment_id, frame=frame
+                for frame in bstream.flush():
+                    self._event_ch.send_nowait(
+                        tts.SynthesizedAudio(request_id=request_id, frame=frame)
                     )
-                )
+        except asyncio.TimeoutError as e:
+            raise APITimeoutError() from e
+        except aiohttp.ClientResponseError as e:
+            raise APIStatusError(
+                message=e.message,
+                status_code=e.status,
+                request_id=None,
+                body=None,
+            ) from e
+        except Exception as e:
+            raise APIConnectionError() from e
 class SynthesizeStream(tts.SynthesizeStream):
@@ -234,10 +286,11 @@ class SynthesizeStream(tts.SynthesizeStream):
     def __init__(
         self,
+        tts: TTS,
         session: aiohttp.ClientSession,
         opts: _TTSOptions,
     ):
-        super().__init__()
+        super().__init__(tts)
         self._opts, self._session = opts, session
         self._mp3_decoder = utils.codecs.Mp3StreamDecoder()
@@ -360,6 +413,26 @@ class SynthesizeStream(tts.SynthesizeStream):
         async def recv_task():
             nonlocal eos_sent
+            audio_bstream = utils.audio.AudioByteStream(
+                sample_rate=self._opts.sample_rate,
+                num_channels=1,
+            )
+            last_frame: rtc.AudioFrame | None = None
+            def _send_last_frame(*, segment_id: str, is_final: bool) -> None:
+                nonlocal last_frame
+                if last_frame is not None:
+                    self._event_ch.send_nowait(
+                        tts.SynthesizedAudio(
+                            request_id=request_id,
+                            segment_id=segment_id,
+                            frame=last_frame,
+                            is_final=is_final,
+                        )
+                    )
+                    last_frame = None
             while True:
                 msg = await ws_conn.receive()
@@ -378,11 +451,33 @@ class SynthesizeStream(tts.SynthesizeStream):
                     logger.warning("unexpected 11labs message type %s", msg.type)
                     continue
-                self._process_stream_event(
-                    data=json.loads(msg.data),
-                    request_id=request_id,
-                    segment_id=segment_id,
-                )
+                data = json.loads(msg.data)
+                encoding = _encoding_from_format(self._opts.encoding)
+                if data.get("audio"):
+                    b64data = base64.b64decode(data["audio"])
+                    if encoding == "mp3":
+                        for frame in self._mp3_decoder.decode_chunk(b64data):
+                            for frame in audio_bstream.write(frame.data.tobytes()):
+                                _send_last_frame(segment_id=segment_id, is_final=False)
+                                last_frame = frame
+                    else:
+                        for frame in audio_bstream.write(b64data):
+                            _send_last_frame(segment_id=segment_id, is_final=False)
+                            last_frame = frame
+                elif data.get("isFinal"):
+                    for frame in audio_bstream.flush():
+                        _send_last_frame(segment_id=segment_id, is_final=False)
+                        last_frame = frame
+                    _send_last_frame(segment_id=segment_id, is_final=True)
+                    pass
+                elif data.get("error"):
+                    logger.error("11labs reported an error: %s", data["error"])
+                else:
+                    logger.error("unexpected 11labs message %s", data)
         tasks = [
             asyncio.create_task(send_task()),
@@ -394,40 +489,6 @@ class SynthesizeStream(tts.SynthesizeStream):
         finally:
             await utils.aio.gracefully_cancel(*tasks)
-    def _process_stream_event(
-        self, *, data: dict, request_id: str, segment_id: str
-    ) -> None:
-        encoding = _encoding_from_format(self._opts.encoding)
-        if data.get("audio"):
-            b64data = base64.b64decode(data["audio"])
-            if encoding == "mp3":
-                for frame in self._mp3_decoder.decode_chunk(b64data):
-                    self._event_ch.send_nowait(
-                        tts.SynthesizedAudio(
-                            request_id=request_id,
-                            segment_id=segment_id,
-                            frame=frame,
-                        )
-                    )
-            else:
-                chunk_frame = rtc.AudioFrame(
-                    data=b64data,
-                    sample_rate=self._opts.sample_rate,
-                    num_channels=1,
-                    samples_per_channel=len(b64data) // 2,
-                )
-                self._event_ch.send_nowait(
-                    tts.SynthesizedAudio(
-                        request_id=request_id,
-                        segment_id=segment_id,
-                        frame=chunk_frame,
-                    )
-                )
-        elif data.get("error"):
-            logger.error("11labs reported an error: %s", data["error"])
-        elif not data.get("isFinal"):
-            logger.error("unexpected 11labs message %s", data)
 def _dict_to_voices_list(data: dict[str, Any]):
     voices: List[Voice] = []
@@ -450,7 +511,7 @@ def _strip_nones(data: dict[str, Any]):
 def _synthesize_url(opts: _TTSOptions) -> str:
     base_url = opts.base_url
     voice_id = opts.voice.id
-    model_id = opts.model_id
+    model_id = opts.model
     output_format = opts.encoding
     latency = opts.streaming_latency
     return (
@@ -462,12 +523,16 @@ def _synthesize_url(opts: _TTSOptions) -> str:
 def _stream_url(opts: _TTSOptions) -> str:
     base_url = opts.base_url
     voice_id = opts.voice.id
-    model_id = opts.model_id
+    model_id = opts.model
     output_format = opts.encoding
     latency = opts.streaming_latency
     enable_ssml = str(opts.enable_ssml_parsing).lower()
-    return (
+    language = opts.language
+    url = (
         f"{base_url}/text-to-speech/{voice_id}/stream-input?"
         f"model_id={model_id}&output_format={output_format}&optimize_streaming_latency={latency}&"
         f"enable_ssml_parsing={enable_ssml}"
     )
+    if language is not None:
+        url += f"&language_code={language}"
+    return url

livekit/plugins/elevenlabs/version.py CHANGED Viewed

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "0.7.5"
+__version__ = "0.7.7"

{livekit_plugins_elevenlabs-0.7.5.dist-info → livekit_plugins_elevenlabs-0.7.7.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: livekit-plugins-elevenlabs
-Version: 0.7.5
+Version: 0.7.7
 Summary: Agent Framework plugin for voice synthesis with ElevenLabs' API.
 Home-page: https://github.com/livekit/agents
 License: Apache-2.0
@@ -19,7 +19,7 @@ Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3 :: Only
 Requires-Python: >=3.9.0
 Description-Content-Type: text/markdown
-Requires-Dist: livekit-agents[codecs] >=0.8.0.dev0
+Requires-Dist: livekit-agents[codecs] >=0.11
 # LiveKit Plugins Elevenlabs

livekit_plugins_elevenlabs-0.7.7.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,10 @@
+livekit/plugins/elevenlabs/__init__.py,sha256=YZVadomFq3JWiZN6GWXJbuE4vaNNWq1CmdH25du8qwg,1249
+livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
+livekit/plugins/elevenlabs/models.py,sha256=ddBUlDT4707f64WDJASR0B60X0yQ-LRHK1ZpTuBJXK8,387
+livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+livekit/plugins/elevenlabs/tts.py,sha256=GgpXXBumLW2r1vKGZ_k-k8rYCQJVahioPMr2aJeSWwk,18760
+livekit/plugins/elevenlabs/version.py,sha256=78n--2R9Gwuh35Oy92hkYHXCMK_Er2s6VCfDuPQa2Ic,600
+livekit_plugins_elevenlabs-0.7.7.dist-info/METADATA,sha256=nTXxc7ODYH7VljmXYPAeNUjMRTE20XB7fBl0micpQQ4,1305
+livekit_plugins_elevenlabs-0.7.7.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
+livekit_plugins_elevenlabs-0.7.7.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
+livekit_plugins_elevenlabs-0.7.7.dist-info/RECORD,,

{livekit_plugins_elevenlabs-0.7.5.dist-info → livekit_plugins_elevenlabs-0.7.7.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.1.0)
+Generator: setuptools (75.3.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

livekit_plugins_elevenlabs-0.7.5.dist-info/RECORD DELETED Viewed

@@ -1,10 +0,0 @@
-livekit/plugins/elevenlabs/__init__.py,sha256=cYRVIPXkRvB3-jK9bKZ9rYiMBACytWlCSq6yoZXaSgA,1080
-livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
-livekit/plugins/elevenlabs/models.py,sha256=ddBUlDT4707f64WDJASR0B60X0yQ-LRHK1ZpTuBJXK8,387
-livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-livekit/plugins/elevenlabs/tts.py,sha256=L9d4KppfqP9tP-PvaE3YKbezovhSboejmIk97xOmdEA,15868
-livekit/plugins/elevenlabs/version.py,sha256=4VoyPg1xoLZO0SP38sbtfe-ePEx82VqZVWRBBUr1wgA,600
-livekit_plugins_elevenlabs-0.7.5.dist-info/METADATA,sha256=KMqAU3UsRzO4wFl-Y8GfT5-Bb7s_bnm8JmuETbQ2cJo,1311
-livekit_plugins_elevenlabs-0.7.5.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
-livekit_plugins_elevenlabs-0.7.5.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
-livekit_plugins_elevenlabs-0.7.5.dist-info/RECORD,,

{livekit_plugins_elevenlabs-0.7.5.dist-info → livekit_plugins_elevenlabs-0.7.7.dist-info}/top_level.txt RENAMED Viewed

File without changes

livekit-plugins-elevenlabs 0.7.5__py3-none-any.whl → 0.7.7__py3-none-any.whl

livekit-plugins-elevenlabs 0.7.5py3-none-any.whl → 0.7.7py3-none-any.whl