livekit-plugins-elevenlabs 0.8.0__py3-none-any.whl → 0.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -171,36 +171,14 @@ class TTS(tts.TTS):
171
171
  inactivity_timeout=inactivity_timeout,
172
172
  )
173
173
  self._session = http_session
174
- self._pool = utils.ConnectionPool[aiohttp.ClientWebSocketResponse](
175
- connect_cb=self._connect_ws,
176
- close_cb=self._close_ws,
177
- max_session_duration=inactivity_timeout,
178
- mark_refreshed_on_get=True,
179
- )
180
174
  self._streams = weakref.WeakSet[SynthesizeStream]()
181
175
 
182
- async def _connect_ws(self) -> aiohttp.ClientWebSocketResponse:
183
- session = self._ensure_session()
184
- return await asyncio.wait_for(
185
- session.ws_connect(
186
- _stream_url(self._opts),
187
- headers={AUTHORIZATION_HEADER: self._opts.api_key},
188
- ),
189
- self._conn_options.timeout,
190
- )
191
-
192
- async def _close_ws(self, ws: aiohttp.ClientWebSocketResponse):
193
- await ws.close()
194
-
195
176
  def _ensure_session(self) -> aiohttp.ClientSession:
196
177
  if not self._session:
197
178
  self._session = utils.http_context.http_session()
198
179
 
199
180
  return self._session
200
181
 
201
- def prewarm(self) -> None:
202
- self._pool.prewarm()
203
-
204
182
  async def list_voices(self) -> List[Voice]:
205
183
  async with self._ensure_session().get(
206
184
  f"{self._opts.base_url}/voices",
@@ -242,7 +220,12 @@ class TTS(tts.TTS):
242
220
  def stream(
243
221
  self, *, conn_options: Optional[APIConnectOptions] = None
244
222
  ) -> "SynthesizeStream":
245
- stream = SynthesizeStream(tts=self, pool=self._pool, opts=self._opts)
223
+ stream = SynthesizeStream(
224
+ tts=self,
225
+ conn_options=conn_options,
226
+ opts=self._opts,
227
+ session=self._ensure_session(),
228
+ )
246
229
  self._streams.add(stream)
247
230
  return stream
248
231
 
@@ -250,7 +233,6 @@ class TTS(tts.TTS):
250
233
  for stream in list(self._streams):
251
234
  await stream.aclose()
252
235
  self._streams.clear()
253
- await self._pool.aclose()
254
236
  await super().aclose()
255
237
 
256
238
 
@@ -338,11 +320,12 @@ class SynthesizeStream(tts.SynthesizeStream):
338
320
  self,
339
321
  *,
340
322
  tts: TTS,
341
- pool: utils.ConnectionPool[aiohttp.ClientWebSocketResponse],
323
+ session: aiohttp.ClientSession,
342
324
  opts: _TTSOptions,
325
+ conn_options: Optional[APIConnectOptions] = None,
343
326
  ):
344
- super().__init__(tts=tts)
345
- self._opts, self._pool = opts, pool
327
+ super().__init__(tts=tts, conn_options=conn_options)
328
+ self._opts, self._session = opts, session
346
329
 
347
330
  async def _run(self) -> None:
348
331
  request_id = utils.shortuuid()
@@ -397,140 +380,143 @@ class SynthesizeStream(tts.SynthesizeStream):
397
380
  word_stream: tokenize.WordStream,
398
381
  request_id: str,
399
382
  ) -> None:
400
- async with self._pool.connection() as ws_conn:
401
- segment_id = utils.shortuuid()
402
- expected_text = "" # accumulate all tokens sent
383
+ ws_conn = await self._session.ws_connect(
384
+ _stream_url(self._opts),
385
+ headers={AUTHORIZATION_HEADER: self._opts.api_key},
386
+ )
403
387
 
404
- decoder = utils.codecs.AudioStreamDecoder(
405
- sample_rate=self._opts.sample_rate,
406
- num_channels=1,
407
- )
388
+ segment_id = utils.shortuuid()
389
+ decoder = utils.codecs.AudioStreamDecoder(
390
+ sample_rate=self._opts.sample_rate,
391
+ num_channels=1,
392
+ )
408
393
 
409
- # 11labs protocol expects the first message to be an "init msg"
410
- init_pkt = dict(
411
- text=" ",
412
- voice_settings=_strip_nones(
413
- dataclasses.asdict(self._opts.voice.settings)
414
- )
415
- if self._opts.voice.settings
416
- else None,
417
- generation_config=dict(
418
- chunk_length_schedule=self._opts.chunk_length_schedule
419
- ),
394
+ # 11labs protocol expects the first message to be an "init msg"
395
+ init_pkt = dict(
396
+ text=" ",
397
+ voice_settings=_strip_nones(dataclasses.asdict(self._opts.voice.settings))
398
+ if self._opts.voice.settings
399
+ else None,
400
+ generation_config=dict(
401
+ chunk_length_schedule=self._opts.chunk_length_schedule
402
+ ),
403
+ )
404
+ await ws_conn.send_str(json.dumps(init_pkt))
405
+ eos_sent = False
406
+
407
+ @utils.log_exceptions(logger=logger)
408
+ async def send_task():
409
+ nonlocal eos_sent
410
+ xml_content = []
411
+ async for data in word_stream:
412
+ text = data.token
413
+ # send the xml phoneme in one go
414
+ if (
415
+ self._opts.enable_ssml_parsing
416
+ and data.token.startswith("<phoneme")
417
+ or xml_content
418
+ ):
419
+ xml_content.append(text)
420
+ if data.token.find("</phoneme>") > -1:
421
+ text = self._opts.word_tokenizer.format_words(xml_content)
422
+ xml_content = []
423
+ else:
424
+ continue
425
+
426
+ data_pkt = dict(text=f"{text} ") # must always end with a space
427
+ self._mark_started()
428
+ await ws_conn.send_str(json.dumps(data_pkt))
429
+ if xml_content:
430
+ logger.warning("11labs stream ended with incomplete xml content")
431
+
432
+ # no more token, mark eos
433
+ eos_pkt = dict(text="")
434
+ await ws_conn.send_str(json.dumps(eos_pkt))
435
+ eos_sent = True
436
+
437
+ # consumes from decoder and generates events
438
+ @utils.log_exceptions(logger=logger)
439
+ async def generate_task():
440
+ emitter = tts.SynthesizedAudioEmitter(
441
+ event_ch=self._event_ch,
442
+ request_id=request_id,
443
+ segment_id=segment_id,
420
444
  )
421
- await ws_conn.send_str(json.dumps(init_pkt))
422
-
423
- @utils.log_exceptions(logger=logger)
424
- async def send_task():
425
- nonlocal expected_text
426
- xml_content = []
427
- async for data in word_stream:
428
- text = data.token
429
- expected_text += text
430
- # send the xml phoneme in one go
431
- if (
432
- self._opts.enable_ssml_parsing
433
- and data.token.startswith("<phoneme")
434
- or xml_content
435
- ):
436
- xml_content.append(text)
437
- if text.find("</phoneme>") > -1:
438
- text = self._opts.word_tokenizer.format_words(xml_content)
439
- xml_content = []
440
- else:
441
- continue
442
-
443
- data_pkt = dict(text=f"{text} ") # must always end with a space
444
- self._mark_started()
445
- await ws_conn.send_str(json.dumps(data_pkt))
446
- if xml_content:
447
- logger.warning("11labs stream ended with incomplete xml content")
448
- await ws_conn.send_str(json.dumps({"flush": True}))
449
-
450
- # consumes from decoder and generates events
451
- @utils.log_exceptions(logger=logger)
452
- async def generate_task():
453
- emitter = tts.SynthesizedAudioEmitter(
454
- event_ch=self._event_ch,
455
- request_id=request_id,
456
- segment_id=segment_id,
457
- )
458
- async for frame in decoder:
459
- emitter.push(frame)
460
- emitter.flush()
445
+ async for frame in decoder:
446
+ emitter.push(frame)
447
+ emitter.flush()
461
448
 
462
- # receives from ws and decodes audio
463
- @utils.log_exceptions(logger=logger)
464
- async def recv_task():
465
- nonlocal expected_text
466
- received_text = ""
467
-
468
- while True:
469
- msg = await ws_conn.receive()
470
- if msg.type in (
471
- aiohttp.WSMsgType.CLOSED,
472
- aiohttp.WSMsgType.CLOSE,
473
- aiohttp.WSMsgType.CLOSING,
474
- ):
449
+ # receives from ws and decodes audio
450
+ @utils.log_exceptions(logger=logger)
451
+ async def recv_task():
452
+ nonlocal eos_sent
453
+
454
+ while True:
455
+ msg = await ws_conn.receive()
456
+ if msg.type in (
457
+ aiohttp.WSMsgType.CLOSED,
458
+ aiohttp.WSMsgType.CLOSE,
459
+ aiohttp.WSMsgType.CLOSING,
460
+ ):
461
+ if not eos_sent:
475
462
  raise APIStatusError(
476
463
  "11labs connection closed unexpectedly, not all tokens have been consumed",
477
464
  request_id=request_id,
478
465
  )
466
+ return
479
467
 
480
- if msg.type != aiohttp.WSMsgType.TEXT:
481
- logger.warning("unexpected 11labs message type %s", msg.type)
482
- continue
483
-
484
- data = json.loads(msg.data)
485
- if data.get("audio"):
486
- b64data = base64.b64decode(data["audio"])
487
- decoder.push(b64data)
488
-
489
- if alignment := data.get("normalizedAlignment"):
490
- received_text += "".join(
491
- alignment.get("chars", [])
492
- ).replace(" ", "")
493
- if received_text == expected_text:
494
- decoder.end_input()
495
- break
496
- elif data.get("error"):
497
- raise APIStatusError(
498
- message=data["error"],
499
- status_code=500,
500
- request_id=request_id,
501
- body=None,
502
- )
503
- else:
504
- raise APIStatusError(
505
- message=f"unexpected 11labs message {data}",
506
- status_code=500,
507
- request_id=request_id,
508
- body=None,
509
- )
468
+ if msg.type != aiohttp.WSMsgType.TEXT:
469
+ logger.warning("unexpected 11labs message type %s", msg.type)
470
+ continue
471
+
472
+ data = json.loads(msg.data)
473
+ if data.get("audio"):
474
+ b64data = base64.b64decode(data["audio"])
475
+ decoder.push(b64data)
476
+
477
+ elif data.get("isFinal"):
478
+ decoder.end_input()
479
+ break
480
+ elif data.get("error"):
481
+ raise APIStatusError(
482
+ message=data["error"],
483
+ status_code=500,
484
+ request_id=request_id,
485
+ body=None,
486
+ )
487
+ else:
488
+ raise APIStatusError(
489
+ message=f"unexpected 11labs message {data}",
490
+ status_code=500,
491
+ request_id=request_id,
492
+ body=None,
493
+ )
510
494
 
511
- tasks = [
512
- asyncio.create_task(send_task()),
513
- asyncio.create_task(recv_task()),
514
- asyncio.create_task(generate_task()),
515
- ]
516
- try:
517
- await asyncio.gather(*tasks)
518
- except asyncio.TimeoutError as e:
519
- raise APITimeoutError() from e
520
- except aiohttp.ClientResponseError as e:
521
- raise APIStatusError(
522
- message=e.message,
523
- status_code=e.status,
524
- request_id=request_id,
525
- body=None,
526
- ) from e
527
- except APIStatusError:
528
- raise
529
- except Exception as e:
530
- raise APIConnectionError() from e
531
- finally:
532
- await utils.aio.gracefully_cancel(*tasks)
533
- await decoder.aclose()
495
+ tasks = [
496
+ asyncio.create_task(send_task()),
497
+ asyncio.create_task(recv_task()),
498
+ asyncio.create_task(generate_task()),
499
+ ]
500
+ try:
501
+ await asyncio.gather(*tasks)
502
+ except asyncio.TimeoutError as e:
503
+ raise APITimeoutError() from e
504
+ except aiohttp.ClientResponseError as e:
505
+ raise APIStatusError(
506
+ message=e.message,
507
+ status_code=e.status,
508
+ request_id=request_id,
509
+ body=None,
510
+ ) from e
511
+ except APIStatusError:
512
+ raise
513
+ except Exception as e:
514
+ raise APIConnectionError() from e
515
+ finally:
516
+ await utils.aio.gracefully_cancel(*tasks)
517
+ await decoder.aclose()
518
+ if ws_conn is not None:
519
+ await ws_conn.close()
534
520
 
535
521
 
536
522
  def _dict_to_voices_list(data: dict[str, Any]):
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "0.8.0"
15
+ __version__ = "0.8.1"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: livekit-plugins-elevenlabs
3
- Version: 0.8.0
3
+ Version: 0.8.1
4
4
  Summary: Agent Framework plugin for voice synthesis with ElevenLabs' API.
5
5
  Home-page: https://github.com/livekit/agents
6
6
  License: Apache-2.0
@@ -0,0 +1,10 @@
1
+ livekit/plugins/elevenlabs/__init__.py,sha256=YZVadomFq3JWiZN6GWXJbuE4vaNNWq1CmdH25du8qwg,1249
2
+ livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
3
+ livekit/plugins/elevenlabs/models.py,sha256=nB43wLS1ilzS7IxLYVSQxBjKPnbiPl4AHpHAOlG2i00,273
4
+ livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ livekit/plugins/elevenlabs/tts.py,sha256=-w8IeAvyQER4PLraajJz6OWDufvKpD_fPM8oPsYtX9s,19335
6
+ livekit/plugins/elevenlabs/version.py,sha256=PoHw-_DNE2B5SpeoQ-r6HSfVmbDgYuGamg0dN2jhayQ,600
7
+ livekit_plugins_elevenlabs-0.8.1.dist-info/METADATA,sha256=l8gbEDr8EsedqYQiqBhx6K9XwAdTtnQWVCxmlyjVG9w,1529
8
+ livekit_plugins_elevenlabs-0.8.1.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
9
+ livekit_plugins_elevenlabs-0.8.1.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
10
+ livekit_plugins_elevenlabs-0.8.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.2)
2
+ Generator: setuptools (76.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,10 +0,0 @@
1
- livekit/plugins/elevenlabs/__init__.py,sha256=YZVadomFq3JWiZN6GWXJbuE4vaNNWq1CmdH25du8qwg,1249
2
- livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
3
- livekit/plugins/elevenlabs/models.py,sha256=nB43wLS1ilzS7IxLYVSQxBjKPnbiPl4AHpHAOlG2i00,273
4
- livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- livekit/plugins/elevenlabs/tts.py,sha256=KCZnuAngDZck4zIMMgp0BLV0GS31kKChMvdvXUVZ8vY,20491
6
- livekit/plugins/elevenlabs/version.py,sha256=fObgfvFfJb5Vj0qY1hgEiVKSo6z6atjrJvwAVl4KvR4,600
7
- livekit_plugins_elevenlabs-0.8.0.dist-info/METADATA,sha256=BwddENtvF9zqxTgjgIsHyavyRfA82TBISYEVwFfo2vs,1529
8
- livekit_plugins_elevenlabs-0.8.0.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
9
- livekit_plugins_elevenlabs-0.8.0.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
10
- livekit_plugins_elevenlabs-0.8.0.dist-info/RECORD,,