livekit-plugins-elevenlabs 1.0.23__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -39,7 +39,7 @@ from .log import logger
39
39
 
40
40
 
41
41
  class ElevenLabsPlugin(Plugin):
42
- def __init__(self):
42
+ def __init__(self) -> None:
43
43
  super().__init__(__name__, __version__, __package__, logger)
44
44
 
45
45
 
@@ -8,6 +8,7 @@ TTSModels = Literal[
8
8
  "eleven_turbo_v2_5",
9
9
  "eleven_flash_v2_5",
10
10
  "eleven_flash_v2",
11
+ "eleven_v3",
11
12
  ]
12
13
 
13
14
  TTSEncoding = Literal[
@@ -72,7 +72,7 @@ class STT(stt.STT):
72
72
  self._opts = _STTOptions(
73
73
  api_key=elevenlabs_api_key,
74
74
  base_url=base_url if is_given(base_url) else API_BASE_URL_V1,
75
- language_code=language_code,
75
+ language_code=language_code if is_given(language_code) else "en",
76
76
  )
77
77
  self._session = http_session
78
78
 
@@ -20,7 +20,7 @@ import dataclasses
20
20
  import json
21
21
  import os
22
22
  import weakref
23
- from dataclasses import dataclass
23
+ from dataclasses import dataclass, replace
24
24
  from typing import Any
25
25
 
26
26
  import aiohttp
@@ -28,17 +28,14 @@ import aiohttp
28
28
  from livekit.agents import (
29
29
  APIConnectionError,
30
30
  APIConnectOptions,
31
+ APIError,
31
32
  APIStatusError,
32
33
  APITimeoutError,
33
34
  tokenize,
34
35
  tts,
35
36
  utils,
36
37
  )
37
- from livekit.agents.types import (
38
- DEFAULT_API_CONNECT_OPTIONS,
39
- NOT_GIVEN,
40
- NotGivenOr,
41
- )
38
+ from livekit.agents.types import DEFAULT_API_CONNECT_OPTIONS, NOT_GIVEN, NotGivenOr
42
39
  from livekit.agents.utils import is_given
43
40
 
44
41
  from .log import logger
@@ -70,7 +67,7 @@ class Voice:
70
67
  category: str
71
68
 
72
69
 
73
- DEFAULT_VOICE_ID = "EXAVITQu4vr4xnSDxMaL"
70
+ DEFAULT_VOICE_ID = "bIHbv24MWmeRgasZH58o"
74
71
  API_BASE_URL_V1 = "https://api.elevenlabs.io/v1"
75
72
  AUTHORIZATION_HEADER = "xi-api-key"
76
73
  WS_INACTIVITY_TIMEOUT = 300
@@ -207,102 +204,70 @@ class TTS(tts.TTS):
207
204
  self._opts.language = language
208
205
 
209
206
  def synthesize(
210
- self,
211
- text: str,
212
- *,
213
- conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
207
+ self, text: str, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
214
208
  ) -> ChunkedStream:
215
- return ChunkedStream(
216
- tts=self,
217
- input_text=text,
218
- conn_options=conn_options,
219
- opts=self._opts,
220
- session=self._ensure_session(),
221
- )
209
+ return ChunkedStream(tts=self, input_text=text, conn_options=conn_options)
222
210
 
223
211
  def stream(
224
212
  self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
225
213
  ) -> SynthesizeStream:
226
- stream = SynthesizeStream(
227
- tts=self,
228
- conn_options=conn_options,
229
- opts=self._opts,
230
- session=self._ensure_session(),
231
- )
214
+ stream = SynthesizeStream(tts=self, conn_options=conn_options)
232
215
  self._streams.add(stream)
233
216
  return stream
234
217
 
235
218
  async def aclose(self) -> None:
236
219
  for stream in list(self._streams):
237
220
  await stream.aclose()
221
+
238
222
  self._streams.clear()
239
- await super().aclose()
240
223
 
241
224
 
242
225
  class ChunkedStream(tts.ChunkedStream):
243
226
  """Synthesize using the chunked api endpoint"""
244
227
 
245
- def __init__(
246
- self,
247
- *,
248
- tts: TTS,
249
- input_text: str,
250
- opts: _TTSOptions,
251
- conn_options: APIConnectOptions,
252
- session: aiohttp.ClientSession,
253
- ) -> None:
228
+ def __init__(self, *, tts: TTS, input_text: str, conn_options: APIConnectOptions) -> None:
254
229
  super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
255
- self._opts, self._session = opts, session
230
+ self._tts: TTS = tts
231
+ self._opts = replace(tts._opts)
256
232
 
257
- async def _run(self) -> None:
258
- request_id = utils.shortuuid()
233
+ async def _run(self, output_emitter: tts.AudioEmitter) -> None:
259
234
  voice_settings = (
260
235
  _strip_nones(dataclasses.asdict(self._opts.voice_settings))
261
236
  if is_given(self._opts.voice_settings)
262
237
  else None
263
238
  )
264
- data = {
265
- "text": self._input_text,
266
- "model_id": self._opts.model,
267
- "voice_settings": voice_settings,
268
- }
269
-
270
- decoder = utils.codecs.AudioStreamDecoder(
271
- sample_rate=self._opts.sample_rate,
272
- num_channels=1,
273
- )
274
-
275
- decode_task: asyncio.Task | None = None
276
239
  try:
277
- async with self._session.post(
240
+ async with self._tts._ensure_session().post(
278
241
  _synthesize_url(self._opts),
279
242
  headers={AUTHORIZATION_HEADER: self._opts.api_key},
280
- json=data,
243
+ json={
244
+ "text": self._input_text,
245
+ "model_id": self._opts.model,
246
+ "voice_settings": voice_settings,
247
+ },
281
248
  timeout=aiohttp.ClientTimeout(
282
249
  total=30,
283
250
  sock_connect=self._conn_options.timeout,
284
251
  ),
285
252
  ) as resp:
253
+ resp.raise_for_status()
254
+
286
255
  if not resp.content_type.startswith("audio/"):
287
256
  content = await resp.text()
288
- logger.error("11labs returned non-audio data: %s", content)
289
- return
257
+ raise APIError(message="11labs returned non-audio data", body=content)
290
258
 
291
- async def _decode_loop():
292
- try:
293
- async for bytes_data, _ in resp.content.iter_chunks():
294
- decoder.push(bytes_data)
295
- finally:
296
- decoder.end_input()
297
-
298
- decode_task = asyncio.create_task(_decode_loop())
299
- emitter = tts.SynthesizedAudioEmitter(
300
- event_ch=self._event_ch,
301
- request_id=request_id,
259
+ output_emitter.initialize(
260
+ request_id=utils.shortuuid(),
261
+ sample_rate=self._opts.sample_rate,
262
+ num_channels=1,
263
+ mime_type="audio/mp3",
302
264
  )
303
- async for frame in decoder:
304
- emitter.push(frame)
305
- emitter.flush()
265
+
266
+ async for data, _ in resp.content.iter_chunks():
267
+ output_emitter.push(data)
268
+
269
+ output_emitter.flush()
270
+
306
271
  except asyncio.TimeoutError as e:
307
272
  raise APITimeoutError() from e
308
273
  except aiohttp.ClientResponseError as e:
@@ -314,53 +279,51 @@ class ChunkedStream(tts.ChunkedStream):
314
279
  ) from e
315
280
  except Exception as e:
316
281
  raise APIConnectionError() from e
317
- finally:
318
- if decode_task:
319
- await utils.aio.gracefully_cancel(decode_task)
320
- await decoder.aclose()
321
282
 
322
283
 
323
284
  class SynthesizeStream(tts.SynthesizeStream):
324
285
  """Streamed API using websockets"""
325
286
 
326
- def __init__(
327
- self,
328
- *,
329
- tts: TTS,
330
- session: aiohttp.ClientSession,
331
- opts: _TTSOptions,
332
- conn_options: APIConnectOptions,
333
- ):
287
+ def __init__(self, *, tts: TTS, conn_options: APIConnectOptions):
334
288
  super().__init__(tts=tts, conn_options=conn_options)
335
- self._opts, self._session = opts, session
289
+ self._tts: TTS = tts
290
+ self._opts = replace(tts._opts)
291
+ self._segments_ch = utils.aio.Chan[tokenize.WordStream]()
336
292
 
337
- async def _run(self) -> None:
293
+ async def _run(self, output_emitter: tts.AudioEmitter) -> None:
338
294
  request_id = utils.shortuuid()
339
- self._segments_ch = utils.aio.Chan[tokenize.WordStream]()
295
+ output_emitter.initialize(
296
+ request_id=request_id,
297
+ sample_rate=self._opts.sample_rate,
298
+ num_channels=1,
299
+ stream=True,
300
+ mime_type="audio/mp3",
301
+ )
340
302
 
341
- @utils.log_exceptions(logger=logger)
342
- async def _tokenize_input():
303
+ async def _tokenize_input() -> None:
343
304
  """tokenize text from the input_ch to words"""
344
305
  word_stream = None
345
306
  async for input in self._input_ch:
346
307
  if isinstance(input, str):
347
308
  if word_stream is None:
348
- # new segment (after flush for e.g)
349
309
  word_stream = self._opts.word_tokenizer.stream()
350
310
  self._segments_ch.send_nowait(word_stream)
311
+
351
312
  word_stream.push_text(input)
352
313
  elif isinstance(input, self._FlushSentinel):
353
314
  if word_stream is not None:
354
315
  word_stream.end_input()
316
+
355
317
  word_stream = None
318
+
356
319
  if word_stream is not None:
357
320
  word_stream.end_input()
321
+
358
322
  self._segments_ch.close()
359
323
 
360
- @utils.log_exceptions(logger=logger)
361
- async def _process_segments():
324
+ async def _process_segments() -> None:
362
325
  async for word_stream in self._segments_ch:
363
- await self._run_ws(word_stream, request_id)
326
+ await self._run_ws(word_stream, output_emitter)
364
327
 
365
328
  tasks = [
366
329
  asyncio.create_task(_tokenize_input()),
@@ -368,38 +331,32 @@ class SynthesizeStream(tts.SynthesizeStream):
368
331
  ]
369
332
  try:
370
333
  await asyncio.gather(*tasks)
371
- except asyncio.TimeoutError as e:
372
- raise APITimeoutError() from e
334
+ except asyncio.TimeoutError:
335
+ raise APITimeoutError() from None
373
336
  except aiohttp.ClientResponseError as e:
374
337
  raise APIStatusError(
375
- message=e.message,
376
- status_code=e.status,
377
- request_id=request_id,
378
- body=None,
379
- ) from e
338
+ message=e.message, status_code=e.status, request_id=request_id, body=None
339
+ ) from None
380
340
  except Exception as e:
381
341
  raise APIConnectionError() from e
382
342
  finally:
383
343
  await utils.aio.gracefully_cancel(*tasks)
384
344
 
385
345
  async def _run_ws(
386
- self,
387
- word_stream: tokenize.WordStream,
388
- request_id: str,
346
+ self, word_stream: tokenize.WordStream, output_emitter: tts.AudioEmitter
389
347
  ) -> None:
390
- ws_conn = await self._session.ws_connect(
391
- _stream_url(self._opts),
392
- headers={AUTHORIZATION_HEADER: self._opts.api_key},
393
- )
394
-
395
348
  segment_id = utils.shortuuid()
396
- decoder = utils.codecs.AudioStreamDecoder(
397
- sample_rate=self._opts.sample_rate,
398
- num_channels=1,
349
+ output_emitter.start_segment(segment_id=segment_id)
350
+
351
+ ws_conn = await asyncio.wait_for(
352
+ self._tts._ensure_session().ws_connect(
353
+ _stream_url(self._opts), headers={AUTHORIZATION_HEADER: self._opts.api_key}
354
+ ),
355
+ timeout=self._conn_options.timeout,
399
356
  )
400
357
 
401
358
  # 11labs protocol expects the first message to be an "init msg"
402
- init_pkt = {
359
+ init_pkt: dict = {
403
360
  "text": " ",
404
361
  }
405
362
  if is_given(self._opts.chunk_length_schedule):
@@ -412,9 +369,9 @@ class SynthesizeStream(tts.SynthesizeStream):
412
369
  eos_sent = False
413
370
 
414
371
  @utils.log_exceptions(logger=logger)
415
- async def send_task():
372
+ async def send_task() -> None:
416
373
  nonlocal eos_sent
417
- xml_content = []
374
+ xml_content: list[str] = []
418
375
  async for data in word_stream:
419
376
  text = data.token
420
377
  # send xml tags fully formed
@@ -446,21 +403,9 @@ class SynthesizeStream(tts.SynthesizeStream):
446
403
  await ws_conn.send_str(json.dumps(eos_pkt))
447
404
  eos_sent = True
448
405
 
449
- # consumes from decoder and generates events
450
- @utils.log_exceptions(logger=logger)
451
- async def generate_task():
452
- emitter = tts.SynthesizedAudioEmitter(
453
- event_ch=self._event_ch,
454
- request_id=request_id,
455
- segment_id=segment_id,
456
- )
457
- async for frame in decoder:
458
- emitter.push(frame)
459
- emitter.flush()
460
-
461
406
  # receives from ws and decodes audio
462
407
  @utils.log_exceptions(logger=logger)
463
- async def recv_task():
408
+ async def recv_task() -> None:
464
409
  nonlocal eos_sent
465
410
 
466
411
  while True:
@@ -473,7 +418,6 @@ class SynthesizeStream(tts.SynthesizeStream):
473
418
  if not eos_sent:
474
419
  raise APIStatusError(
475
420
  "11labs connection closed unexpectedly, not all tokens have been consumed", # noqa: E501
476
- request_id=request_id,
477
421
  )
478
422
  return
479
423
 
@@ -484,61 +428,35 @@ class SynthesizeStream(tts.SynthesizeStream):
484
428
  data = json.loads(msg.data)
485
429
  if data.get("audio"):
486
430
  b64data = base64.b64decode(data["audio"])
487
- decoder.push(b64data)
431
+ output_emitter.push(b64data)
488
432
  elif data.get("isFinal"):
489
- decoder.end_input()
490
- break
433
+ output_emitter.end_input()
434
+ return # 11labs only allow one segment per connection
491
435
  elif data.get("error"):
492
- raise APIStatusError(
493
- message=data["error"],
494
- status_code=500,
495
- request_id=request_id,
496
- body=None,
497
- )
436
+ raise APIError(message=data["error"])
498
437
  else:
499
- logger.warning(f"unexpected 11labs message {data}")
438
+ raise APIError("unexpected 11labs message {data}")
500
439
 
501
440
  tasks = [
502
441
  asyncio.create_task(send_task()),
503
442
  asyncio.create_task(recv_task()),
504
- asyncio.create_task(generate_task()),
505
443
  ]
506
444
  try:
507
445
  await asyncio.gather(*tasks)
508
- except asyncio.TimeoutError as e:
509
- raise APITimeoutError() from e
510
- except aiohttp.ClientResponseError as e:
511
- raise APIStatusError(
512
- message=e.message,
513
- status_code=e.status,
514
- request_id=request_id,
515
- body=None,
516
- ) from e
517
- except APIStatusError:
518
- raise
519
- except Exception as e:
520
- raise APIConnectionError() from e
521
446
  finally:
522
447
  await utils.aio.gracefully_cancel(*tasks)
523
- await decoder.aclose()
524
- if ws_conn is not None:
525
- await ws_conn.close()
448
+ await ws_conn.close()
526
449
 
527
450
 
528
- def _dict_to_voices_list(data: dict[str, Any]):
451
+ def _dict_to_voices_list(data: dict[str, Any]) -> list[Voice]:
529
452
  voices: list[Voice] = []
530
453
  for voice in data["voices"]:
531
- voices.append(
532
- Voice(
533
- id=voice["voice_id"],
534
- name=voice["name"],
535
- category=voice["category"],
536
- )
537
- )
454
+ voices.append(Voice(id=voice["voice_id"], name=voice["name"], category=voice["category"]))
455
+
538
456
  return voices
539
457
 
540
458
 
541
- def _strip_nones(data: dict[str, Any]):
459
+ def _strip_nones(data: dict[str, Any]) -> dict[str, Any]:
542
460
  return {k: v for k, v in data.items() if is_given(v) and v is not None}
543
461
 
544
462
 
@@ -573,4 +491,5 @@ def _stream_url(opts: _TTSOptions) -> str:
573
491
  url += f"&language_code={language}"
574
492
  if is_given(opts.streaming_latency):
575
493
  url += f"&optimize_streaming_latency={opts.streaming_latency}"
494
+
576
495
  return url
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "1.0.23"
15
+ __version__ = "1.1.1"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: livekit-plugins-elevenlabs
3
- Version: 1.0.23
3
+ Version: 1.1.1
4
4
  Summary: Agent Framework plugin for voice synthesis with ElevenLabs' API.
5
5
  Project-URL: Documentation, https://docs.livekit.io
6
6
  Project-URL: Website, https://livekit.io/
@@ -18,7 +18,7 @@ Classifier: Topic :: Multimedia :: Sound/Audio
18
18
  Classifier: Topic :: Multimedia :: Video
19
19
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
20
  Requires-Python: >=3.9.0
21
- Requires-Dist: livekit-agents[codecs]>=1.0.23
21
+ Requires-Dist: livekit-agents[codecs]>=1.1.1
22
22
  Description-Content-Type: text/markdown
23
23
 
24
24
  # ElevenLabs plugin for LiveKit Agents
@@ -0,0 +1,10 @@
1
+ livekit/plugins/elevenlabs/__init__.py,sha256=g3O0QnvcHwntPA_Wz0iKBPiL5xYeIYcMGc9UYI7No80,1427
2
+ livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
3
+ livekit/plugins/elevenlabs/models.py,sha256=fGZhu8IW1nKVWyeU-L3MFzhVjsRXqUhJblO9jt7jplY,418
4
+ livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ livekit/plugins/elevenlabs/stt.py,sha256=xgGF3M15Wd_xb5Oh8YVSrQkeB7BkDAQ2lZi86n7Ecdk,4565
6
+ livekit/plugins/elevenlabs/tts.py,sha256=GObEz_g74Dh79UHrKl1XTXQjjwcArZua2LD0mBmAzlc,18063
7
+ livekit/plugins/elevenlabs/version.py,sha256=E83fn58yMTAPuXx54IpvCWS3c9SBNF9zNIhbism7Hz0,600
8
+ livekit_plugins_elevenlabs-1.1.1.dist-info/METADATA,sha256=nL78CaTKgZas0_WM2zUytLiEhSYL9nUvs4KHg5bNx7Y,1453
9
+ livekit_plugins_elevenlabs-1.1.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
10
+ livekit_plugins_elevenlabs-1.1.1.dist-info/RECORD,,
@@ -1,10 +0,0 @@
1
- livekit/plugins/elevenlabs/__init__.py,sha256=wDHAYf2M89U-wTdW8UjDPI_TQaO9XK03Y0mJpsXvobk,1419
2
- livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
3
- livekit/plugins/elevenlabs/models.py,sha256=p_wHEz15bdsNEqwzN831ysm70PNWQ-xeN__BKvGPZxA,401
4
- livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- livekit/plugins/elevenlabs/stt.py,sha256=1B8c7t_52GIbnPSFLq44Fkm0gnnFUZs7xX9nIWbsAQM,4528
6
- livekit/plugins/elevenlabs/tts.py,sha256=MxIt8FR0V9vs6VEKkeC_LNBnksa1f-U3YHKh243m9fo,20109
7
- livekit/plugins/elevenlabs/version.py,sha256=BRUqwxRBnPVqEcIODJdaZHGAanu4zkwM4NsAQjNtUEM,601
8
- livekit_plugins_elevenlabs-1.0.23.dist-info/METADATA,sha256=8Jk3H2h12K4Rbdoykcb_so8CKqh13PtTMOhs0BM1cds,1455
9
- livekit_plugins_elevenlabs-1.0.23.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
10
- livekit_plugins_elevenlabs-1.0.23.dist-info/RECORD,,