PyPI - livekit-plugins-elevenlabs - Versions diffs - 0.5.dev0__py3-none-any.whl → 0.6.dev0__py3-none-any.whl - Mend

livekit-plugins-elevenlabs 0.5.dev0py3-none-any.whl → 0.6.dev0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

livekit/plugins/elevenlabs/__init__.py CHANGED Viewed

@@ -12,10 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from .models import TTSEncoding, TTSModels
 from .tts import DEFAULT_VOICE, TTS, Voice, VoiceSettings
 from .version import __version__
-__all__ = ["TTS", "Voice", "VoiceSettings", "DEFAULT_VOICE", "__version__"]
+__all__ = [
+    "TTS",
+    "Voice",
+    "VoiceSettings",
+    "TTSEncoding",
+    "TTSModels",
+    "DEFAULT_VOICE",
+    "__version__",
+]
 from livekit.agents import Plugin

livekit/plugins/elevenlabs/models.py CHANGED Viewed

@@ -6,3 +6,15 @@ TTSModels = Literal[
     "eleven_multilingual_v2",
     "eleven_turbo_v2",
 ]
+TTSEncoding = Literal[
+    "mp3_22050_32",
+    "mp3_44100_32",
+    "mp3_44100_64",
+    "mp3_44100_96",
+    "mp3_44100_128",
+    "mp3_44100_192",
+    "pcm_16000",
+    "pcm_22050",
+    "pcm_44100",
+]

livekit/plugins/elevenlabs/tts.py CHANGED Viewed

@@ -21,14 +21,36 @@ import dataclasses
 import json
 import os
 from dataclasses import dataclass
-from typing import List, Optional
+from typing import List, Literal, Optional
 import aiohttp
 from livekit import rtc
-from livekit.agents import aio, tokenize, tts, utils
+from livekit.agents import aio, codecs, tokenize, tts, utils
 from .log import logger
-from .models import TTSModels
+from .models import (
+    TTSEncoding,
+    TTSModels,
+)
+_Encoding = Literal[
+    "mp3",
+    "pcm",
+]
+def _sample_rate_from_format(output_format: TTSEncoding) -> int:
+    split = output_format.split("_")  # e.g: mp3_22050_32
+    return int(split[1])
+def _encoding_from_format(output_format: TTSEncoding) -> _Encoding:
+    if output_format.startswith("mp3"):
+        return "mp3"
+    elif output_format.startswith("pcm"):
+        return "pcm"
+    raise ValueError(f"Unknown format: {output_format}")
 @dataclass
@@ -66,6 +88,7 @@ class _TTSOptions:
     voice: Voice
     model_id: TTSModels
     base_url: str
+    encoding: TTSEncoding
     sample_rate: int
     streaming_latency: int
     word_tokenizer: tokenize.WordTokenizer
@@ -80,7 +103,7 @@ class TTS(tts.TTS):
         model_id: TTSModels = "eleven_turbo_v2",
         api_key: str | None = None,
         base_url: str | None = None,
-        sample_rate: int = 24000,
+        encoding: TTSEncoding = "mp3_22050_32",
         streaming_latency: int = 3,
         word_tokenizer: tokenize.WordTokenizer = tokenize.basic.WordTokenizer(
             ignore_punctuation=False  # punctuation can help for intonation
@@ -91,7 +114,9 @@ class TTS(tts.TTS):
         http_session: aiohttp.ClientSession | None = None,
     ) -> None:
         super().__init__(
-            streaming_supported=True, sample_rate=sample_rate, num_channels=1
+            streaming_supported=True,
+            sample_rate=_sample_rate_from_format(encoding),
+            num_channels=1,
         )
         api_key = api_key or os.environ.get("ELEVEN_API_KEY")
         if not api_key:
@@ -102,7 +127,8 @@ class TTS(tts.TTS):
             model_id=model_id,
             api_key=api_key,
             base_url=base_url or API_BASE_URL_V1,
-            sample_rate=sample_rate,
+            encoding=encoding,
+            sample_rate=self.sample_rate,
             streaming_latency=streaming_latency,
             word_tokenizer=word_tokenizer,
             chunk_length_schedule=chunk_length_schedule,
@@ -150,7 +176,7 @@ class ChunkedStream(tts.ChunkedStream):
         base_url = self._opts.base_url
         voice_id = self._opts.voice.id
         model_id = self._opts.model_id
-        sample_rate = self._opts.sample_rate
+        sample_rate = _sample_rate_from_format(self._opts.encoding)
         latency = self._opts.streaming_latency
         url = (
             f"{base_url}/text-to-speech/{voice_id}/stream?"
@@ -260,11 +286,11 @@ class SynthesizeStream(tts.SynthesizeStream):
         base_url = self._opts.base_url
         voice_id = self._opts.voice.id
         model_id = self._opts.model_id
-        sample_rate = self._opts.sample_rate
+        output_format = self._opts.encoding
         latency = self._opts.streaming_latency
         url = (
             f"{base_url}/text-to-speech/{voice_id}/stream-input?"
-            f"model_id={model_id}&output_format=pcm_{sample_rate}&optimize_streaming_latency={latency}"
+            f"model_id={model_id}&output_format={output_format}&optimize_streaming_latency={latency}"
         )
         return url
@@ -417,6 +443,8 @@ class SynthesizeStream(tts.SynthesizeStream):
             all_tokens_consumed = True
         async def recv_task():
+            encoding = _encoding_from_format(self._opts.encoding)
+            mp3_decoder = codecs.Mp3StreamDecoder()
             while True:
                 msg = await ws_conn.receive()
                 if msg.type in (
@@ -437,19 +465,32 @@ class SynthesizeStream(tts.SynthesizeStream):
                     continue
                 data: dict = json.loads(msg.data)
-                if data.get("audio"):
-                    b64data = base64.b64decode(data["audio"])
-                    frame = rtc.AudioFrame(
-                        data=b64data,
-                        sample_rate=self._opts.sample_rate,
-                        num_channels=1,
-                        samples_per_channel=len(b64data) // 2,
-                    )
+                audio = data.get("audio")
+                if data.get("error"):
+                    logger.error("11labs error %s", data)
+                    return
+                elif audio is not None:
+                    if audio == "":
+                        # 11labs sometimes sends empty audio, ignore
+                        continue
+                    b64data = base64.b64decode(audio)
+                    frame: rtc.AudioFrame
+                    if encoding == "mp3":
+                        frames = mp3_decoder.decode_chunk(b64data)
+                        frame = utils.merge_frames(frames)
+                    else:
+                        frame = rtc.AudioFrame(
+                            data=b64data,
+                            sample_rate=self._opts.sample_rate,
+                            num_channels=1,
+                            samples_per_channel=len(b64data) // 2,
+                        )
                     text = ""
                     if data.get("alignment"):
-                        text = data["alignment"].get("chars", "")
+                        text = "".join(data["alignment"].get("chars", ""))
                     audio_tx.send_nowait(tts.SynthesizedAudio(text=text, data=frame))
                     continue

livekit/plugins/elevenlabs/version.py CHANGED Viewed

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "0.5.dev0"
+__version__ = "0.6.dev0"

{livekit_plugins_elevenlabs-0.5.dev0.dist-info → livekit_plugins_elevenlabs-0.6.dev0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: livekit-plugins-elevenlabs
-Version: 0.5.dev0
+Version: 0.6.dev0
 Summary: Agent Framework plugin for voice synthesis with ElevenLabs' API.
 Home-page: https://github.com/livekit/agents
 License: Apache-2.0
@@ -20,7 +20,7 @@ Classifier: Programming Language :: Python :: 3 :: Only
 Requires-Python: >=3.9.0
 Description-Content-Type: text/markdown
 Requires-Dist: livekit ~=0.11
-Requires-Dist: livekit-agents ~=0.7.dev0
+Requires-Dist: livekit-agents[codecs] ~=0.8.dev0
 Requires-Dist: aiohttp >=3.8.5
 # LiveKit Plugins Elevenlabs

livekit_plugins_elevenlabs-0.6.dev0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,10 @@
+livekit/plugins/elevenlabs/__init__.py,sha256=ez1ybDPt7GfKAKgPkxZFRB7Vyd-_i-0hfUMI79GQ5w4,1091
+livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
+livekit/plugins/elevenlabs/models.py,sha256=8jTchztgpiTokHEaWUK8PPxWWfvm5SMrOGsJpzxbYAw,362
+livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+livekit/plugins/elevenlabs/tts.py,sha256=GTcyQwBVVPzCYLgsnw9q5oFOq9cV3hIKndDaBPSFMr4,17738
+livekit/plugins/elevenlabs/version.py,sha256=yB6WnbnD5MFhQDT5ItJ02XWVsNanlDYiOezzwv0IdcM,603
+livekit_plugins_elevenlabs-0.6.dev0.dist-info/METADATA,sha256=kfWET-iNGQYX7TGoo87CiMIoMINIwE28YT4-hbp8NDY,1373
+livekit_plugins_elevenlabs-0.6.dev0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+livekit_plugins_elevenlabs-0.6.dev0.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
+livekit_plugins_elevenlabs-0.6.dev0.dist-info/RECORD,,

livekit_plugins_elevenlabs-0.5.dev0.dist-info/RECORD DELETED Viewed

@@ -1,10 +0,0 @@
-livekit/plugins/elevenlabs/__init__.py,sha256=_IMIfE4YA7d3NxrN-iCrdfQ19mwh93SY676RJGEA57c,989
-livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
-livekit/plugins/elevenlabs/models.py,sha256=g46mCMMHP3x3qtHmybHHMcid1UwmjKCcF0T4IWjMjWE,163
-livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-livekit/plugins/elevenlabs/tts.py,sha256=p7mEiUDR6gbqEUrLp1lgTkJ3ounN6rhnenYoYqWNF2k,16418
-livekit/plugins/elevenlabs/version.py,sha256=h2gCxcJSMvCrVP7h14ON6HaghqLCkbl3--HZKEopR_8,603
-livekit_plugins_elevenlabs-0.5.dev0.dist-info/METADATA,sha256=5uCb2q4zTTGaCSSN448GLqhj9-41bg0jjR2CSeov8ms,1365
-livekit_plugins_elevenlabs-0.5.dev0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-livekit_plugins_elevenlabs-0.5.dev0.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
-livekit_plugins_elevenlabs-0.5.dev0.dist-info/RECORD,,

{livekit_plugins_elevenlabs-0.5.dev0.dist-info → livekit_plugins_elevenlabs-0.6.dev0.dist-info}/WHEEL RENAMED Viewed

File without changes

{livekit_plugins_elevenlabs-0.5.dev0.dist-info → livekit_plugins_elevenlabs-0.6.dev0.dist-info}/top_level.txt RENAMED Viewed

File without changes

livekit-plugins-elevenlabs 0.5.dev0__py3-none-any.whl → 0.6.dev0__py3-none-any.whl

livekit-plugins-elevenlabs 0.5.dev0py3-none-any.whl → 0.6.dev0py3-none-any.whl