livekit-plugins-elevenlabs 0.5.0__py3-none-any.whl → 0.5.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,19 +12,10 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from .models import TTSEncoding, TTSModels
16
15
  from .tts import DEFAULT_VOICE, TTS, Voice, VoiceSettings
17
16
  from .version import __version__
18
17
 
19
- __all__ = [
20
- "TTS",
21
- "Voice",
22
- "VoiceSettings",
23
- "TTSEncoding",
24
- "TTSModels",
25
- "DEFAULT_VOICE",
26
- "__version__",
27
- ]
18
+ __all__ = ["TTS", "Voice", "VoiceSettings", "DEFAULT_VOICE", "__version__"]
28
19
 
29
20
  from livekit.agents import Plugin
30
21
 
@@ -6,15 +6,3 @@ TTSModels = Literal[
6
6
  "eleven_multilingual_v2",
7
7
  "eleven_turbo_v2",
8
8
  ]
9
-
10
- TTSEncoding = Literal[
11
- "mp3_22050_32",
12
- "mp3_44100_32",
13
- "mp3_44100_64",
14
- "mp3_44100_96",
15
- "mp3_44100_128",
16
- "mp3_44100_192",
17
- "pcm_16000",
18
- "pcm_22050",
19
- "pcm_44100",
20
- ]
@@ -21,36 +21,14 @@ import dataclasses
21
21
  import json
22
22
  import os
23
23
  from dataclasses import dataclass
24
- from typing import List, Literal, Optional
24
+ from typing import List, Optional
25
25
 
26
26
  import aiohttp
27
27
  from livekit import rtc
28
- from livekit.agents import aio, codecs, tokenize, tts, utils
28
+ from livekit.agents import aio, tokenize, tts, utils
29
29
 
30
30
  from .log import logger
31
- from .models import (
32
- TTSEncoding,
33
- TTSModels,
34
- )
35
-
36
- _Encoding = Literal[
37
- "mp3",
38
- "pcm",
39
- ]
40
-
41
-
42
- def _sample_rate_from_format(output_format: TTSEncoding) -> int:
43
- split = output_format.split("_") # e.g: mp3_22050_32
44
- return int(split[1])
45
-
46
-
47
- def _encoding_from_format(output_format: TTSEncoding) -> _Encoding:
48
- if output_format.startswith("mp3"):
49
- return "mp3"
50
- elif output_format.startswith("pcm"):
51
- return "pcm"
52
-
53
- raise ValueError(f"Unknown format: {output_format}")
31
+ from .models import TTSModels
54
32
 
55
33
 
56
34
  @dataclass
@@ -88,7 +66,6 @@ class _TTSOptions:
88
66
  voice: Voice
89
67
  model_id: TTSModels
90
68
  base_url: str
91
- encoding: TTSEncoding
92
69
  sample_rate: int
93
70
  streaming_latency: int
94
71
  word_tokenizer: tokenize.WordTokenizer
@@ -103,7 +80,7 @@ class TTS(tts.TTS):
103
80
  model_id: TTSModels = "eleven_turbo_v2",
104
81
  api_key: str | None = None,
105
82
  base_url: str | None = None,
106
- encoding: TTSEncoding = "mp3_22050_32",
83
+ sample_rate: int = 24000,
107
84
  streaming_latency: int = 3,
108
85
  word_tokenizer: tokenize.WordTokenizer = tokenize.basic.WordTokenizer(
109
86
  ignore_punctuation=False # punctuation can help for intonation
@@ -114,9 +91,7 @@ class TTS(tts.TTS):
114
91
  http_session: aiohttp.ClientSession | None = None,
115
92
  ) -> None:
116
93
  super().__init__(
117
- streaming_supported=True,
118
- sample_rate=_sample_rate_from_format(encoding),
119
- num_channels=1,
94
+ streaming_supported=True, sample_rate=sample_rate, num_channels=1
120
95
  )
121
96
  api_key = api_key or os.environ.get("ELEVEN_API_KEY")
122
97
  if not api_key:
@@ -127,8 +102,7 @@ class TTS(tts.TTS):
127
102
  model_id=model_id,
128
103
  api_key=api_key,
129
104
  base_url=base_url or API_BASE_URL_V1,
130
- encoding=encoding,
131
- sample_rate=self.sample_rate,
105
+ sample_rate=sample_rate,
132
106
  streaming_latency=streaming_latency,
133
107
  word_tokenizer=word_tokenizer,
134
108
  chunk_length_schedule=chunk_length_schedule,
@@ -176,7 +150,7 @@ class ChunkedStream(tts.ChunkedStream):
176
150
  base_url = self._opts.base_url
177
151
  voice_id = self._opts.voice.id
178
152
  model_id = self._opts.model_id
179
- sample_rate = _sample_rate_from_format(self._opts.encoding)
153
+ sample_rate = self._opts.sample_rate
180
154
  latency = self._opts.streaming_latency
181
155
  url = (
182
156
  f"{base_url}/text-to-speech/{voice_id}/stream?"
@@ -286,11 +260,11 @@ class SynthesizeStream(tts.SynthesizeStream):
286
260
  base_url = self._opts.base_url
287
261
  voice_id = self._opts.voice.id
288
262
  model_id = self._opts.model_id
289
- output_format = self._opts.encoding
263
+ sample_rate = self._opts.sample_rate
290
264
  latency = self._opts.streaming_latency
291
265
  url = (
292
266
  f"{base_url}/text-to-speech/{voice_id}/stream-input?"
293
- f"model_id={model_id}&output_format={output_format}&optimize_streaming_latency={latency}"
267
+ f"model_id={model_id}&output_format=pcm_{sample_rate}&optimize_streaming_latency={latency}"
294
268
  )
295
269
 
296
270
  return url
@@ -443,8 +417,6 @@ class SynthesizeStream(tts.SynthesizeStream):
443
417
  all_tokens_consumed = True
444
418
 
445
419
  async def recv_task():
446
- encoding = _encoding_from_format(self._opts.encoding)
447
- mp3_decoder = codecs.Mp3StreamDecoder()
448
420
  while True:
449
421
  msg = await ws_conn.receive()
450
422
  if msg.type in (
@@ -465,32 +437,19 @@ class SynthesizeStream(tts.SynthesizeStream):
465
437
  continue
466
438
 
467
439
  data: dict = json.loads(msg.data)
468
- audio = data.get("audio")
469
-
470
- if data.get("error"):
471
- logger.error("11labs error %s", data)
472
- return
473
- elif audio is not None:
474
- if audio == "":
475
- # 11labs sometimes sends empty audio, ignore
476
- continue
477
-
478
- b64data = base64.b64decode(audio)
479
- frame: rtc.AudioFrame
480
- if encoding == "mp3":
481
- frames = mp3_decoder.decode_chunk(b64data)
482
- frame = utils.merge_frames(frames)
483
- else:
484
- frame = rtc.AudioFrame(
485
- data=b64data,
486
- sample_rate=self._opts.sample_rate,
487
- num_channels=1,
488
- samples_per_channel=len(b64data) // 2,
489
- )
440
+ if data.get("audio"):
441
+ b64data = base64.b64decode(data["audio"])
442
+
443
+ frame = rtc.AudioFrame(
444
+ data=b64data,
445
+ sample_rate=self._opts.sample_rate,
446
+ num_channels=1,
447
+ samples_per_channel=len(b64data) // 2,
448
+ )
490
449
 
491
450
  text = ""
492
451
  if data.get("alignment"):
493
- text = "".join(data["alignment"].get("chars", ""))
452
+ text = data["alignment"].get("chars", "")
494
453
 
495
454
  audio_tx.send_nowait(tts.SynthesizedAudio(text=text, data=frame))
496
455
  continue
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "0.5.0"
15
+ __version__ = "0.5.dev0"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: livekit-plugins-elevenlabs
3
- Version: 0.5.0
3
+ Version: 0.5.dev0
4
4
  Summary: Agent Framework plugin for voice synthesis with ElevenLabs' API.
5
5
  Home-page: https://github.com/livekit/agents
6
6
  License: Apache-2.0
@@ -20,7 +20,7 @@ Classifier: Programming Language :: Python :: 3 :: Only
20
20
  Requires-Python: >=3.9.0
21
21
  Description-Content-Type: text/markdown
22
22
  Requires-Dist: livekit ~=0.11
23
- Requires-Dist: livekit-agents[codecs] ~=0.7.0
23
+ Requires-Dist: livekit-agents ~=0.7.dev0
24
24
  Requires-Dist: aiohttp >=3.8.5
25
25
 
26
26
  # LiveKit Plugins Elevenlabs
@@ -0,0 +1,10 @@
1
+ livekit/plugins/elevenlabs/__init__.py,sha256=_IMIfE4YA7d3NxrN-iCrdfQ19mwh93SY676RJGEA57c,989
2
+ livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
3
+ livekit/plugins/elevenlabs/models.py,sha256=g46mCMMHP3x3qtHmybHHMcid1UwmjKCcF0T4IWjMjWE,163
4
+ livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ livekit/plugins/elevenlabs/tts.py,sha256=p7mEiUDR6gbqEUrLp1lgTkJ3ounN6rhnenYoYqWNF2k,16418
6
+ livekit/plugins/elevenlabs/version.py,sha256=h2gCxcJSMvCrVP7h14ON6HaghqLCkbl3--HZKEopR_8,603
7
+ livekit_plugins_elevenlabs-0.5.dev0.dist-info/METADATA,sha256=5uCb2q4zTTGaCSSN448GLqhj9-41bg0jjR2CSeov8ms,1365
8
+ livekit_plugins_elevenlabs-0.5.dev0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
9
+ livekit_plugins_elevenlabs-0.5.dev0.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
10
+ livekit_plugins_elevenlabs-0.5.dev0.dist-info/RECORD,,
@@ -1,10 +0,0 @@
1
- livekit/plugins/elevenlabs/__init__.py,sha256=ez1ybDPt7GfKAKgPkxZFRB7Vyd-_i-0hfUMI79GQ5w4,1091
2
- livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
3
- livekit/plugins/elevenlabs/models.py,sha256=8jTchztgpiTokHEaWUK8PPxWWfvm5SMrOGsJpzxbYAw,362
4
- livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- livekit/plugins/elevenlabs/tts.py,sha256=GTcyQwBVVPzCYLgsnw9q5oFOq9cV3hIKndDaBPSFMr4,17738
6
- livekit/plugins/elevenlabs/version.py,sha256=pZ7bgeWLjw4VCWymU1ntHaHorKRusUkm56y6tZe5gmQ,600
7
- livekit_plugins_elevenlabs-0.5.0.dist-info/METADATA,sha256=nmaTaWHwzuzT9nBjaLsJlzTAanMsxl7lv8wH5Sq7boI,1367
8
- livekit_plugins_elevenlabs-0.5.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
9
- livekit_plugins_elevenlabs-0.5.0.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
10
- livekit_plugins_elevenlabs-0.5.0.dist-info/RECORD,,