livekit-plugins-elevenlabs 0.7.5__py3-none-any.whl → 0.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -37,3 +37,12 @@ class ElevenLabsPlugin(Plugin):
37
37
 
38
38
 
39
39
  Plugin.register_plugin(ElevenLabsPlugin())
40
+
41
+ # Cleanup docs of unexported modules
42
+ _module = dir()
43
+ NOT_IN_ALL = [m for m in _module if m not in __all__]
44
+
45
+ __pdoc__ = {}
46
+
47
+ for n in NOT_IN_ALL:
48
+ __pdoc__[n] = False
@@ -24,7 +24,14 @@ from typing import Any, List, Literal
24
24
 
25
25
  import aiohttp
26
26
  from livekit import rtc
27
- from livekit.agents import tokenize, tts, utils
27
+ from livekit.agents import (
28
+ APIConnectionError,
29
+ APIStatusError,
30
+ APITimeoutError,
31
+ tokenize,
32
+ tts,
33
+ utils,
34
+ )
28
35
 
29
36
  from .log import logger
30
37
  from .models import TTSEncoding, TTSModels
@@ -79,7 +86,7 @@ AUTHORIZATION_HEADER = "xi-api-key"
79
86
  class _TTSOptions:
80
87
  api_key: str
81
88
  voice: Voice
82
- model_id: TTSModels
89
+ model: TTSModels | str
83
90
  base_url: str
84
91
  encoding: TTSEncoding
85
92
  sample_rate: int
@@ -94,7 +101,7 @@ class TTS(tts.TTS):
94
101
  self,
95
102
  *,
96
103
  voice: Voice = DEFAULT_VOICE,
97
- model_id: TTSModels = "eleven_turbo_v2_5",
104
+ model: TTSModels | str = "eleven_turbo_v2_5",
98
105
  api_key: str | None = None,
99
106
  base_url: str | None = None,
100
107
  encoding: TTSEncoding = "mp3_22050_32",
@@ -105,12 +112,23 @@ class TTS(tts.TTS):
105
112
  enable_ssml_parsing: bool = False,
106
113
  chunk_length_schedule: list[int] = [80, 120, 200, 260], # range is [50, 500]
107
114
  http_session: aiohttp.ClientSession | None = None,
115
+ # deprecated
116
+ model_id: TTSModels | str | None = None,
108
117
  ) -> None:
109
118
  """
110
119
  Create a new instance of ElevenLabs TTS.
111
120
 
112
- ``api_key`` must be set to your ElevenLabs API key, either using the argument or by setting
113
- the ``ELEVEN_API_KEY`` environmental variable.
121
+ Args:
122
+ voice (Voice): Voice configuration. Defaults to `DEFAULT_VOICE`.
123
+ model (TTSModels | str): TTS model to use. Defaults to "eleven_turbo_v2_5".
124
+ api_key (str | None): ElevenLabs API key. Can be set via argument or `ELEVEN_API_KEY` environment variable.
125
+ base_url (str | None): Custom base URL for the API. Optional.
126
+ encoding (TTSEncoding): Audio encoding format. Defaults to "mp3_22050_32".
127
+ streaming_latency (int): Latency in seconds for streaming. Defaults to 3.
128
+ word_tokenizer (tokenize.WordTokenizer): Tokenizer for processing text. Defaults to basic WordTokenizer.
129
+ enable_ssml_parsing (bool): Enable SSML parsing for input text. Defaults to False.
130
+ chunk_length_schedule (list[int]): Schedule for chunk lengths, ranging from 50 to 500. Defaults to [80, 120, 200, 260].
131
+ http_session (aiohttp.ClientSession | None): Custom HTTP session for API requests. Optional.
114
132
  """
115
133
 
116
134
  super().__init__(
@@ -120,13 +138,22 @@ class TTS(tts.TTS):
120
138
  sample_rate=_sample_rate_from_format(encoding),
121
139
  num_channels=1,
122
140
  )
141
+
142
+ if model_id is not None:
143
+ logger.warning(
144
+ "model_id is deprecated and will be removed in 1.5.0, use model instead",
145
+ )
146
+ model = model_id
147
+
123
148
  api_key = api_key or os.environ.get("ELEVEN_API_KEY")
124
149
  if not api_key:
125
- raise ValueError("ELEVEN_API_KEY must be set")
150
+ raise ValueError(
151
+ "ElevenLabs API key is required, either as argument or set ELEVEN_API_KEY environmental variable"
152
+ )
126
153
 
127
154
  self._opts = _TTSOptions(
128
155
  voice=voice,
129
- model_id=model_id,
156
+ model=model,
130
157
  api_key=api_key,
131
158
  base_url=base_url or API_BASE_URL_V1,
132
159
  encoding=encoding,
@@ -151,31 +178,43 @@ class TTS(tts.TTS):
151
178
  ) as resp:
152
179
  return _dict_to_voices_list(await resp.json())
153
180
 
181
+ def update_options(
182
+ self,
183
+ *,
184
+ voice: Voice = DEFAULT_VOICE,
185
+ model: TTSModels | str = "eleven_turbo_v2_5",
186
+ ) -> None:
187
+ """
188
+ Args:
189
+ voice (Voice): Voice configuration. Defaults to `DEFAULT_VOICE`.
190
+ model (TTSModels | str): TTS model to use. Defaults to "eleven_turbo_v2_5".
191
+ """
192
+ self._opts.model = model or self._opts.model
193
+ self._opts.voice = voice or self._opts.voice
194
+
154
195
  def synthesize(self, text: str) -> "ChunkedStream":
155
- return ChunkedStream(text, self._opts, self._ensure_session())
196
+ return ChunkedStream(self, text, self._opts, self._ensure_session())
156
197
 
157
198
  def stream(self) -> "SynthesizeStream":
158
- return SynthesizeStream(self._ensure_session(), self._opts)
199
+ return SynthesizeStream(self, self._ensure_session(), self._opts)
159
200
 
160
201
 
161
202
  class ChunkedStream(tts.ChunkedStream):
162
203
  """Synthesize using the chunked api endpoint"""
163
204
 
164
205
  def __init__(
165
- self, text: str, opts: _TTSOptions, session: aiohttp.ClientSession
206
+ self, tts: TTS, text: str, opts: _TTSOptions, session: aiohttp.ClientSession
166
207
  ) -> None:
167
- super().__init__()
168
- self._text, self._opts, self._session = text, opts, session
208
+ super().__init__(tts, text)
209
+ self._opts, self._session = opts, session
169
210
  if _encoding_from_format(self._opts.encoding) == "mp3":
170
211
  self._mp3_decoder = utils.codecs.Mp3StreamDecoder()
171
212
 
172
- @utils.log_exceptions(logger=logger)
173
213
  async def _main_task(self) -> None:
214
+ request_id = utils.shortuuid()
174
215
  bstream = utils.audio.AudioByteStream(
175
216
  sample_rate=self._opts.sample_rate, num_channels=1
176
217
  )
177
- request_id = utils.shortuuid()
178
- segment_id = utils.shortuuid()
179
218
 
180
219
  voice_settings = (
181
220
  _strip_nones(dataclasses.asdict(self._opts.voice.settings))
@@ -183,50 +222,59 @@ class ChunkedStream(tts.ChunkedStream):
183
222
  else None
184
223
  )
185
224
  data = {
186
- "text": self._text,
187
- "model_id": self._opts.model_id,
225
+ "text": self._input_text,
226
+ "model_id": self._opts.model,
188
227
  "voice_settings": voice_settings,
189
228
  }
190
229
 
191
- async with self._session.post(
192
- _synthesize_url(self._opts),
193
- headers={AUTHORIZATION_HEADER: self._opts.api_key},
194
- json=data,
195
- ) as resp:
196
- if not resp.content_type.startswith("audio/"):
197
- content = await resp.text()
198
- logger.error("11labs returned non-audio data: %s", content)
199
- return
200
-
201
- encoding = _encoding_from_format(self._opts.encoding)
202
- if encoding == "mp3":
203
- async for bytes_data, _ in resp.content.iter_chunks():
204
- for frame in self._mp3_decoder.decode_chunk(bytes_data):
205
- for frame in bstream.write(frame.data.tobytes()):
230
+ try:
231
+ async with self._session.post(
232
+ _synthesize_url(self._opts),
233
+ headers={AUTHORIZATION_HEADER: self._opts.api_key},
234
+ json=data,
235
+ ) as resp:
236
+ if not resp.content_type.startswith("audio/"):
237
+ content = await resp.text()
238
+ logger.error("11labs returned non-audio data: %s", content)
239
+ return
240
+
241
+ encoding = _encoding_from_format(self._opts.encoding)
242
+ if encoding == "mp3":
243
+ async for bytes_data, _ in resp.content.iter_chunks():
244
+ for frame in self._mp3_decoder.decode_chunk(bytes_data):
245
+ for frame in bstream.write(frame.data.tobytes()):
246
+ self._event_ch.send_nowait(
247
+ tts.SynthesizedAudio(
248
+ request_id=request_id,
249
+ frame=frame,
250
+ )
251
+ )
252
+ else:
253
+ async for bytes_data, _ in resp.content.iter_chunks():
254
+ for frame in bstream.write(bytes_data):
206
255
  self._event_ch.send_nowait(
207
256
  tts.SynthesizedAudio(
208
257
  request_id=request_id,
209
- segment_id=segment_id,
210
258
  frame=frame,
211
259
  )
212
260
  )
213
- else:
214
- async for bytes_data, _ in resp.content.iter_chunks():
215
- for frame in bstream.write(bytes_data):
216
- self._event_ch.send_nowait(
217
- tts.SynthesizedAudio(
218
- request_id=request_id,
219
- segment_id=segment_id,
220
- frame=frame,
221
- )
222
- )
223
261
 
224
- for frame in bstream.flush():
225
- self._event_ch.send_nowait(
226
- tts.SynthesizedAudio(
227
- request_id=request_id, segment_id=segment_id, frame=frame
262
+ for frame in bstream.flush():
263
+ self._event_ch.send_nowait(
264
+ tts.SynthesizedAudio(request_id=request_id, frame=frame)
228
265
  )
229
- )
266
+
267
+ except asyncio.TimeoutError as e:
268
+ raise APITimeoutError() from e
269
+ except aiohttp.ClientResponseError as e:
270
+ raise APIStatusError(
271
+ message=e.message,
272
+ status_code=e.status,
273
+ request_id=None,
274
+ body=None,
275
+ ) from e
276
+ except Exception as e:
277
+ raise APIConnectionError() from e
230
278
 
231
279
 
232
280
  class SynthesizeStream(tts.SynthesizeStream):
@@ -234,10 +282,11 @@ class SynthesizeStream(tts.SynthesizeStream):
234
282
 
235
283
  def __init__(
236
284
  self,
285
+ tts: TTS,
237
286
  session: aiohttp.ClientSession,
238
287
  opts: _TTSOptions,
239
288
  ):
240
- super().__init__()
289
+ super().__init__(tts)
241
290
  self._opts, self._session = opts, session
242
291
  self._mp3_decoder = utils.codecs.Mp3StreamDecoder()
243
292
 
@@ -360,6 +409,26 @@ class SynthesizeStream(tts.SynthesizeStream):
360
409
 
361
410
  async def recv_task():
362
411
  nonlocal eos_sent
412
+ audio_bstream = utils.audio.AudioByteStream(
413
+ sample_rate=self._opts.sample_rate,
414
+ num_channels=1,
415
+ )
416
+
417
+ last_frame: rtc.AudioFrame | None = None
418
+
419
+ def _send_last_frame(*, segment_id: str, is_final: bool) -> None:
420
+ nonlocal last_frame
421
+ if last_frame is not None:
422
+ self._event_ch.send_nowait(
423
+ tts.SynthesizedAudio(
424
+ request_id=request_id,
425
+ segment_id=segment_id,
426
+ frame=last_frame,
427
+ is_final=is_final,
428
+ )
429
+ )
430
+
431
+ last_frame = None
363
432
 
364
433
  while True:
365
434
  msg = await ws_conn.receive()
@@ -378,11 +447,33 @@ class SynthesizeStream(tts.SynthesizeStream):
378
447
  logger.warning("unexpected 11labs message type %s", msg.type)
379
448
  continue
380
449
 
381
- self._process_stream_event(
382
- data=json.loads(msg.data),
383
- request_id=request_id,
384
- segment_id=segment_id,
385
- )
450
+ data = json.loads(msg.data)
451
+ encoding = _encoding_from_format(self._opts.encoding)
452
+ if data.get("audio"):
453
+ b64data = base64.b64decode(data["audio"])
454
+ if encoding == "mp3":
455
+ for frame in self._mp3_decoder.decode_chunk(b64data):
456
+ for frame in audio_bstream.write(frame.data.tobytes()):
457
+ _send_last_frame(segment_id=segment_id, is_final=False)
458
+ last_frame = frame
459
+
460
+ else:
461
+ for frame in audio_bstream.write(b64data):
462
+ _send_last_frame(segment_id=segment_id, is_final=False)
463
+ last_frame = frame
464
+
465
+ elif data.get("isFinal"):
466
+ for frame in audio_bstream.flush():
467
+ _send_last_frame(segment_id=segment_id, is_final=False)
468
+ last_frame = frame
469
+
470
+ _send_last_frame(segment_id=segment_id, is_final=True)
471
+
472
+ pass
473
+ elif data.get("error"):
474
+ logger.error("11labs reported an error: %s", data["error"])
475
+ else:
476
+ logger.error("unexpected 11labs message %s", data)
386
477
 
387
478
  tasks = [
388
479
  asyncio.create_task(send_task()),
@@ -394,40 +485,6 @@ class SynthesizeStream(tts.SynthesizeStream):
394
485
  finally:
395
486
  await utils.aio.gracefully_cancel(*tasks)
396
487
 
397
- def _process_stream_event(
398
- self, *, data: dict, request_id: str, segment_id: str
399
- ) -> None:
400
- encoding = _encoding_from_format(self._opts.encoding)
401
- if data.get("audio"):
402
- b64data = base64.b64decode(data["audio"])
403
- if encoding == "mp3":
404
- for frame in self._mp3_decoder.decode_chunk(b64data):
405
- self._event_ch.send_nowait(
406
- tts.SynthesizedAudio(
407
- request_id=request_id,
408
- segment_id=segment_id,
409
- frame=frame,
410
- )
411
- )
412
- else:
413
- chunk_frame = rtc.AudioFrame(
414
- data=b64data,
415
- sample_rate=self._opts.sample_rate,
416
- num_channels=1,
417
- samples_per_channel=len(b64data) // 2,
418
- )
419
- self._event_ch.send_nowait(
420
- tts.SynthesizedAudio(
421
- request_id=request_id,
422
- segment_id=segment_id,
423
- frame=chunk_frame,
424
- )
425
- )
426
- elif data.get("error"):
427
- logger.error("11labs reported an error: %s", data["error"])
428
- elif not data.get("isFinal"):
429
- logger.error("unexpected 11labs message %s", data)
430
-
431
488
 
432
489
  def _dict_to_voices_list(data: dict[str, Any]):
433
490
  voices: List[Voice] = []
@@ -450,7 +507,7 @@ def _strip_nones(data: dict[str, Any]):
450
507
  def _synthesize_url(opts: _TTSOptions) -> str:
451
508
  base_url = opts.base_url
452
509
  voice_id = opts.voice.id
453
- model_id = opts.model_id
510
+ model_id = opts.model
454
511
  output_format = opts.encoding
455
512
  latency = opts.streaming_latency
456
513
  return (
@@ -462,7 +519,7 @@ def _synthesize_url(opts: _TTSOptions) -> str:
462
519
  def _stream_url(opts: _TTSOptions) -> str:
463
520
  base_url = opts.base_url
464
521
  voice_id = opts.voice.id
465
- model_id = opts.model_id
522
+ model_id = opts.model
466
523
  output_format = opts.encoding
467
524
  latency = opts.streaming_latency
468
525
  enable_ssml = str(opts.enable_ssml_parsing).lower()
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "0.7.5"
15
+ __version__ = "0.7.6"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: livekit-plugins-elevenlabs
3
- Version: 0.7.5
3
+ Version: 0.7.6
4
4
  Summary: Agent Framework plugin for voice synthesis with ElevenLabs' API.
5
5
  Home-page: https://github.com/livekit/agents
6
6
  License: Apache-2.0
@@ -19,7 +19,7 @@ Classifier: Programming Language :: Python :: 3.10
19
19
  Classifier: Programming Language :: Python :: 3 :: Only
20
20
  Requires-Python: >=3.9.0
21
21
  Description-Content-Type: text/markdown
22
- Requires-Dist: livekit-agents[codecs] >=0.8.0.dev0
22
+ Requires-Dist: livekit-agents[codecs] >=0.11
23
23
 
24
24
  # LiveKit Plugins Elevenlabs
25
25
 
@@ -0,0 +1,10 @@
1
+ livekit/plugins/elevenlabs/__init__.py,sha256=YZVadomFq3JWiZN6GWXJbuE4vaNNWq1CmdH25du8qwg,1249
2
+ livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
3
+ livekit/plugins/elevenlabs/models.py,sha256=ddBUlDT4707f64WDJASR0B60X0yQ-LRHK1ZpTuBJXK8,387
4
+ livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ livekit/plugins/elevenlabs/tts.py,sha256=mxdypC-zSbS1R15FmztT49ssk_arkKGUPe_d5uVqOUk,18422
6
+ livekit/plugins/elevenlabs/version.py,sha256=vOFNGWowZUhIrmyHBGtCx5dGhCp1T2FPt0h7KU_XKJg,600
7
+ livekit_plugins_elevenlabs-0.7.6.dist-info/METADATA,sha256=DY1JbHdgfNivv0p0xA5ZRenYUGEYC33yX4TcNh__srg,1305
8
+ livekit_plugins_elevenlabs-0.7.6.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
9
+ livekit_plugins_elevenlabs-0.7.6.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
10
+ livekit_plugins_elevenlabs-0.7.6.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.1.0)
2
+ Generator: setuptools (75.3.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,10 +0,0 @@
1
- livekit/plugins/elevenlabs/__init__.py,sha256=cYRVIPXkRvB3-jK9bKZ9rYiMBACytWlCSq6yoZXaSgA,1080
2
- livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
3
- livekit/plugins/elevenlabs/models.py,sha256=ddBUlDT4707f64WDJASR0B60X0yQ-LRHK1ZpTuBJXK8,387
4
- livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- livekit/plugins/elevenlabs/tts.py,sha256=L9d4KppfqP9tP-PvaE3YKbezovhSboejmIk97xOmdEA,15868
6
- livekit/plugins/elevenlabs/version.py,sha256=4VoyPg1xoLZO0SP38sbtfe-ePEx82VqZVWRBBUr1wgA,600
7
- livekit_plugins_elevenlabs-0.7.5.dist-info/METADATA,sha256=KMqAU3UsRzO4wFl-Y8GfT5-Bb7s_bnm8JmuETbQ2cJo,1311
8
- livekit_plugins_elevenlabs-0.7.5.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
9
- livekit_plugins_elevenlabs-0.7.5.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
10
- livekit_plugins_elevenlabs-0.7.5.dist-info/RECORD,,