PyPI - livekit-plugins-elevenlabs - Versions diffs - 0.7.3__py3-none-any.whl → 0.7.5__py3-none-any.whl - Mend

livekit-plugins-elevenlabs 0.7.3py3-none-any.whl → 0.7.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

livekit/plugins/elevenlabs/tts.py CHANGED Viewed

@@ -86,6 +86,7 @@ class _TTSOptions:
     streaming_latency: int
     word_tokenizer: tokenize.WordTokenizer
     chunk_length_schedule: list[int]
+    enable_ssml_parsing: bool
 class TTS(tts.TTS):
@@ -101,9 +102,17 @@ class TTS(tts.TTS):
         word_tokenizer: tokenize.WordTokenizer = tokenize.basic.WordTokenizer(
             ignore_punctuation=False  # punctuation can help for intonation
         ),
+        enable_ssml_parsing: bool = False,
         chunk_length_schedule: list[int] = [80, 120, 200, 260],  # range is [50, 500]
         http_session: aiohttp.ClientSession | None = None,
     ) -> None:
+        """
+        Create a new instance of ElevenLabs TTS.
+        ``api_key`` must be set to your ElevenLabs API key, either using the argument or by setting
+        the ``ELEVEN_API_KEY`` environmental variable.
+        """
         super().__init__(
             capabilities=tts.TTSCapabilities(
                 streaming=True,
@@ -125,6 +134,7 @@ class TTS(tts.TTS):
             streaming_latency=streaming_latency,
             word_tokenizer=word_tokenizer,
             chunk_length_schedule=chunk_length_schedule,
+            enable_ssml_parsing=enable_ssml_parsing,
         )
         self._session = http_session
@@ -187,17 +197,19 @@ class ChunkedStream(tts.ChunkedStream):
                 content = await resp.text()
                 logger.error("11labs returned non-audio data: %s", content)
                 return
             encoding = _encoding_from_format(self._opts.encoding)
             if encoding == "mp3":
                 async for bytes_data, _ in resp.content.iter_chunks():
                     for frame in self._mp3_decoder.decode_chunk(bytes_data):
-                        self._event_ch.send_nowait(
-                            tts.SynthesizedAudio(
-                                request_id=request_id,
-                                segment_id=segment_id,
-                                frame=frame,
+                        for frame in bstream.write(frame.data.tobytes()):
+                            self._event_ch.send_nowait(
+                                tts.SynthesizedAudio(
+                                    request_id=request_id,
+                                    segment_id=segment_id,
+                                    frame=frame,
+                                )
                             )
-                        )
             else:
                 async for bytes_data, _ in resp.content.iter_chunks():
                     for frame in bstream.write(bytes_data):
@@ -209,12 +221,12 @@ class ChunkedStream(tts.ChunkedStream):
                             )
                         )
-                for frame in bstream.flush():
-                    self._event_ch.send_nowait(
-                        tts.SynthesizedAudio(
-                            request_id=request_id, segment_id=segment_id, frame=frame
-                        )
+            for frame in bstream.flush():
+                self._event_ch.send_nowait(
+                    tts.SynthesizedAudio(
+                        request_id=request_id, segment_id=segment_id, frame=frame
                     )
+                )
 class SynthesizeStream(tts.SynthesizeStream):
@@ -313,15 +325,34 @@ class SynthesizeStream(tts.SynthesizeStream):
         async def send_task():
             nonlocal eos_sent
+            xml_content = []
             async for data in word_stream:
+                text = data.token
+                # send the xml phoneme in one go
+                if (
+                    self._opts.enable_ssml_parsing
+                    and data.token.startswith("<phoneme")
+                    or xml_content
+                ):
+                    xml_content.append(text)
+                    if data.token.find("</phoneme>") > -1:
+                        text = self._opts.word_tokenizer.format_words(xml_content)
+                        xml_content = []
+                    else:
+                        continue
                 # try_trigger_generation=True is a bad practice, we expose
                 # chunk_length_schedule instead
                 data_pkt = dict(
-                    text=f"{data.token} ",  # must always end with a space
+                    text=f"{text} ",  # must always end with a space
                     try_trigger_generation=False,
                 )
                 await ws_conn.send_str(json.dumps(data_pkt))
+            if xml_content:
+                logger.warning("11labs stream ended with incomplete xml content")
             # no more token, mark eos
             eos_pkt = dict(text="")
             await ws_conn.send_str(json.dumps(eos_pkt))
@@ -434,7 +465,9 @@ def _stream_url(opts: _TTSOptions) -> str:
     model_id = opts.model_id
     output_format = opts.encoding
     latency = opts.streaming_latency
+    enable_ssml = str(opts.enable_ssml_parsing).lower()
     return (
         f"{base_url}/text-to-speech/{voice_id}/stream-input?"
-        f"model_id={model_id}&output_format={output_format}&optimize_streaming_latency={latency}"
+        f"model_id={model_id}&output_format={output_format}&optimize_streaming_latency={latency}&"
+        f"enable_ssml_parsing={enable_ssml}"
     )

livekit/plugins/elevenlabs/version.py CHANGED Viewed

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "0.7.3"
+__version__ = "0.7.5"

{livekit_plugins_elevenlabs-0.7.3.dist-info → livekit_plugins_elevenlabs-0.7.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: livekit-plugins-elevenlabs
-Version: 0.7.3
+Version: 0.7.5
 Summary: Agent Framework plugin for voice synthesis with ElevenLabs' API.
 Home-page: https://github.com/livekit/agents
 License: Apache-2.0

livekit_plugins_elevenlabs-0.7.5.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,10 @@
+livekit/plugins/elevenlabs/__init__.py,sha256=cYRVIPXkRvB3-jK9bKZ9rYiMBACytWlCSq6yoZXaSgA,1080
+livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
+livekit/plugins/elevenlabs/models.py,sha256=ddBUlDT4707f64WDJASR0B60X0yQ-LRHK1ZpTuBJXK8,387
+livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+livekit/plugins/elevenlabs/tts.py,sha256=L9d4KppfqP9tP-PvaE3YKbezovhSboejmIk97xOmdEA,15868
+livekit/plugins/elevenlabs/version.py,sha256=4VoyPg1xoLZO0SP38sbtfe-ePEx82VqZVWRBBUr1wgA,600
+livekit_plugins_elevenlabs-0.7.5.dist-info/METADATA,sha256=KMqAU3UsRzO4wFl-Y8GfT5-Bb7s_bnm8JmuETbQ2cJo,1311
+livekit_plugins_elevenlabs-0.7.5.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
+livekit_plugins_elevenlabs-0.7.5.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
+livekit_plugins_elevenlabs-0.7.5.dist-info/RECORD,,

{livekit_plugins_elevenlabs-0.7.3.dist-info → livekit_plugins_elevenlabs-0.7.5.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (72.1.0)
+Generator: setuptools (75.1.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

livekit_plugins_elevenlabs-0.7.3.dist-info/RECORD DELETED Viewed

@@ -1,10 +0,0 @@
-livekit/plugins/elevenlabs/__init__.py,sha256=cYRVIPXkRvB3-jK9bKZ9rYiMBACytWlCSq6yoZXaSgA,1080
-livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
-livekit/plugins/elevenlabs/models.py,sha256=ddBUlDT4707f64WDJASR0B60X0yQ-LRHK1ZpTuBJXK8,387
-livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-livekit/plugins/elevenlabs/tts.py,sha256=ZSR6WxSBhntZmdK4i9U8SKcxHwNk3_4qiZNRZc5jP28,14641
-livekit/plugins/elevenlabs/version.py,sha256=yJeG0VwiekDJAk7GHcIAe43ebagJgloe-ZsqEGZnqzE,600
-livekit_plugins_elevenlabs-0.7.3.dist-info/METADATA,sha256=hdSuPch445_jz_Z-Uzt6CgU0Eb1H0ZVZ9ZA50hHYsBM,1311
-livekit_plugins_elevenlabs-0.7.3.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
-livekit_plugins_elevenlabs-0.7.3.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
-livekit_plugins_elevenlabs-0.7.3.dist-info/RECORD,,

{livekit_plugins_elevenlabs-0.7.3.dist-info → livekit_plugins_elevenlabs-0.7.5.dist-info}/top_level.txt RENAMED Viewed

File without changes

livekit-plugins-elevenlabs 0.7.3__py3-none-any.whl → 0.7.5__py3-none-any.whl

livekit-plugins-elevenlabs 0.7.3py3-none-any.whl → 0.7.5py3-none-any.whl