livekit-plugins-elevenlabs 0.7.1__py3-none-any.whl → 0.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,13 +28,12 @@ __all__ = [
28
28
 
29
29
  from livekit.agents import Plugin
30
30
 
31
+ from .log import logger
32
+
31
33
 
32
34
  class ElevenLabsPlugin(Plugin):
33
35
  def __init__(self):
34
- super().__init__(__name__, __version__, __package__)
35
-
36
- def download_files(self):
37
- pass
36
+ super().__init__(__name__, __version__, __package__, logger)
38
37
 
39
38
 
40
39
  Plugin.register_plugin(ElevenLabsPlugin())
@@ -5,6 +5,7 @@ TTSModels = Literal[
5
5
  "eleven_multilingual_v1",
6
6
  "eleven_multilingual_v2",
7
7
  "eleven_turbo_v2",
8
+ "eleven_turbo_v2_5",
8
9
  ]
9
10
 
10
11
  TTSEncoding = Literal[
@@ -93,7 +93,7 @@ class TTS(tts.TTS):
93
93
  self,
94
94
  *,
95
95
  voice: Voice = DEFAULT_VOICE,
96
- model_id: TTSModels = "eleven_turbo_v2",
96
+ model_id: TTSModels = "eleven_turbo_v2_5",
97
97
  api_key: str | None = None,
98
98
  base_url: str | None = None,
99
99
  encoding: TTSEncoding = "mp3_22050_32",
@@ -156,6 +156,8 @@ class ChunkedStream(tts.ChunkedStream):
156
156
  ) -> None:
157
157
  super().__init__()
158
158
  self._text, self._opts, self._session = text, opts, session
159
+ if _encoding_from_format(self._opts.encoding) == "mp3":
160
+ self._mp3_decoder = utils.codecs.Mp3StreamDecoder()
159
161
 
160
162
  @utils.log_exceptions(logger=logger)
161
163
  async def _main_task(self) -> None:
@@ -166,7 +168,7 @@ class ChunkedStream(tts.ChunkedStream):
166
168
  segment_id = utils.shortuuid()
167
169
 
168
170
  voice_settings = (
169
- dataclasses.asdict(self._opts.voice.settings)
171
+ _strip_nones(dataclasses.asdict(self._opts.voice.settings))
170
172
  if self._opts.voice.settings
171
173
  else None
172
174
  )
@@ -181,21 +183,39 @@ class ChunkedStream(tts.ChunkedStream):
181
183
  headers={AUTHORIZATION_HEADER: self._opts.api_key},
182
184
  json=data,
183
185
  ) as resp:
184
- async for bytes_data, _ in resp.content.iter_chunks():
185
- for frame in bstream.write(bytes_data):
186
+ if not resp.content_type.startswith("audio/"):
187
+ content = await resp.text()
188
+ logger.error("11labs returned non-audio data: %s", content)
189
+ return
190
+ encoding = _encoding_from_format(self._opts.encoding)
191
+ if encoding == "mp3":
192
+ async for bytes_data, _ in resp.content.iter_chunks():
193
+ for frame in self._mp3_decoder.decode_chunk(bytes_data):
194
+ self._event_ch.send_nowait(
195
+ tts.SynthesizedAudio(
196
+ request_id=request_id,
197
+ segment_id=segment_id,
198
+ frame=frame,
199
+ )
200
+ )
201
+ else:
202
+ async for bytes_data, _ in resp.content.iter_chunks():
203
+ for frame in bstream.write(bytes_data):
204
+ self._event_ch.send_nowait(
205
+ tts.SynthesizedAudio(
206
+ request_id=request_id,
207
+ segment_id=segment_id,
208
+ frame=frame,
209
+ )
210
+ )
211
+
212
+ for frame in bstream.flush():
186
213
  self._event_ch.send_nowait(
187
214
  tts.SynthesizedAudio(
188
215
  request_id=request_id, segment_id=segment_id, frame=frame
189
216
  )
190
217
  )
191
218
 
192
- for frame in bstream.flush():
193
- self._event_ch.send_nowait(
194
- tts.SynthesizedAudio(
195
- request_id=request_id, segment_id=segment_id, frame=frame
196
- )
197
- )
198
-
199
219
 
200
220
  class SynthesizeStream(tts.SynthesizeStream):
201
221
  """Streamed API using websockets"""
@@ -280,7 +300,7 @@ class SynthesizeStream(tts.SynthesizeStream):
280
300
  init_pkt = dict(
281
301
  text=" ",
282
302
  try_trigger_generation=True,
283
- voice_settings=dataclasses.asdict(self._opts.voice.settings)
303
+ voice_settings=_strip_nones(dataclasses.asdict(self._opts.voice.settings))
284
304
  if self._opts.voice.settings
285
305
  else None,
286
306
  generation_config=dict(
@@ -333,7 +353,15 @@ class SynthesizeStream(tts.SynthesizeStream):
333
353
  segment_id=segment_id,
334
354
  )
335
355
 
336
- await asyncio.gather(send_task(), recv_task())
356
+ tasks = [
357
+ asyncio.create_task(send_task()),
358
+ asyncio.create_task(recv_task()),
359
+ ]
360
+
361
+ try:
362
+ await asyncio.gather(*tasks)
363
+ finally:
364
+ await utils.aio.gracefully_cancel(*tasks)
337
365
 
338
366
  def _process_stream_event(
339
367
  self, *, data: dict, request_id: str, segment_id: str
@@ -384,15 +412,19 @@ def _dict_to_voices_list(data: dict[str, Any]):
384
412
  return voices
385
413
 
386
414
 
415
+ def _strip_nones(data: dict[str, Any]):
416
+ return {k: v for k, v in data.items() if v is not None}
417
+
418
+
387
419
  def _synthesize_url(opts: _TTSOptions) -> str:
388
420
  base_url = opts.base_url
389
421
  voice_id = opts.voice.id
390
422
  model_id = opts.model_id
391
- sample_rate = _sample_rate_from_format(opts.encoding)
423
+ output_format = opts.encoding
392
424
  latency = opts.streaming_latency
393
425
  return (
394
426
  f"{base_url}/text-to-speech/{voice_id}/stream?"
395
- f"model_id={model_id}&output_format=pcm_{sample_rate}&optimize_streaming_latency={latency}"
427
+ f"model_id={model_id}&output_format={output_format}&optimize_streaming_latency={latency}"
396
428
  )
397
429
 
398
430
 
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "0.7.1"
15
+ __version__ = "0.7.3"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: livekit-plugins-elevenlabs
3
- Version: 0.7.1
3
+ Version: 0.7.3
4
4
  Summary: Agent Framework plugin for voice synthesis with ElevenLabs' API.
5
5
  Home-page: https://github.com/livekit/agents
6
6
  License: Apache-2.0
@@ -0,0 +1,10 @@
1
+ livekit/plugins/elevenlabs/__init__.py,sha256=cYRVIPXkRvB3-jK9bKZ9rYiMBACytWlCSq6yoZXaSgA,1080
2
+ livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
3
+ livekit/plugins/elevenlabs/models.py,sha256=ddBUlDT4707f64WDJASR0B60X0yQ-LRHK1ZpTuBJXK8,387
4
+ livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ livekit/plugins/elevenlabs/tts.py,sha256=ZSR6WxSBhntZmdK4i9U8SKcxHwNk3_4qiZNRZc5jP28,14641
6
+ livekit/plugins/elevenlabs/version.py,sha256=yJeG0VwiekDJAk7GHcIAe43ebagJgloe-ZsqEGZnqzE,600
7
+ livekit_plugins_elevenlabs-0.7.3.dist-info/METADATA,sha256=hdSuPch445_jz_Z-Uzt6CgU0Eb1H0ZVZ9ZA50hHYsBM,1311
8
+ livekit_plugins_elevenlabs-0.7.3.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
9
+ livekit_plugins_elevenlabs-0.7.3.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
10
+ livekit_plugins_elevenlabs-0.7.3.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (71.1.0)
2
+ Generator: setuptools (72.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,10 +0,0 @@
1
- livekit/plugins/elevenlabs/__init__.py,sha256=ez1ybDPt7GfKAKgPkxZFRB7Vyd-_i-0hfUMI79GQ5w4,1091
2
- livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
3
- livekit/plugins/elevenlabs/models.py,sha256=8jTchztgpiTokHEaWUK8PPxWWfvm5SMrOGsJpzxbYAw,362
4
- livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- livekit/plugins/elevenlabs/tts.py,sha256=fEqtmbzvuJ0Pso0kzJ_37_2aCHES7W1kKUwTycLRGpM,13318
6
- livekit/plugins/elevenlabs/version.py,sha256=JOBYrlKcxbTTRXkUKH0921GsmV-i71_KHczg2cgQiLc,600
7
- livekit_plugins_elevenlabs-0.7.1.dist-info/METADATA,sha256=PuFr70N0Y4YzxtzkeMmxwnyLkrQbynCUN0YKFu6gQV0,1311
8
- livekit_plugins_elevenlabs-0.7.1.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
9
- livekit_plugins_elevenlabs-0.7.1.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
10
- livekit_plugins_elevenlabs-0.7.1.dist-info/RECORD,,