livekit-plugins-elevenlabs 0.8.0__py3-none-any.whl → 0.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit/plugins/elevenlabs/tts.py +137 -151
- livekit/plugins/elevenlabs/version.py +1 -1
- {livekit_plugins_elevenlabs-0.8.0.dist-info → livekit_plugins_elevenlabs-0.8.1.dist-info}/METADATA +1 -1
- livekit_plugins_elevenlabs-0.8.1.dist-info/RECORD +10 -0
- {livekit_plugins_elevenlabs-0.8.0.dist-info → livekit_plugins_elevenlabs-0.8.1.dist-info}/WHEEL +1 -1
- livekit_plugins_elevenlabs-0.8.0.dist-info/RECORD +0 -10
- {livekit_plugins_elevenlabs-0.8.0.dist-info → livekit_plugins_elevenlabs-0.8.1.dist-info}/top_level.txt +0 -0
@@ -171,36 +171,14 @@ class TTS(tts.TTS):
|
|
171
171
|
inactivity_timeout=inactivity_timeout,
|
172
172
|
)
|
173
173
|
self._session = http_session
|
174
|
-
self._pool = utils.ConnectionPool[aiohttp.ClientWebSocketResponse](
|
175
|
-
connect_cb=self._connect_ws,
|
176
|
-
close_cb=self._close_ws,
|
177
|
-
max_session_duration=inactivity_timeout,
|
178
|
-
mark_refreshed_on_get=True,
|
179
|
-
)
|
180
174
|
self._streams = weakref.WeakSet[SynthesizeStream]()
|
181
175
|
|
182
|
-
async def _connect_ws(self) -> aiohttp.ClientWebSocketResponse:
|
183
|
-
session = self._ensure_session()
|
184
|
-
return await asyncio.wait_for(
|
185
|
-
session.ws_connect(
|
186
|
-
_stream_url(self._opts),
|
187
|
-
headers={AUTHORIZATION_HEADER: self._opts.api_key},
|
188
|
-
),
|
189
|
-
self._conn_options.timeout,
|
190
|
-
)
|
191
|
-
|
192
|
-
async def _close_ws(self, ws: aiohttp.ClientWebSocketResponse):
|
193
|
-
await ws.close()
|
194
|
-
|
195
176
|
def _ensure_session(self) -> aiohttp.ClientSession:
|
196
177
|
if not self._session:
|
197
178
|
self._session = utils.http_context.http_session()
|
198
179
|
|
199
180
|
return self._session
|
200
181
|
|
201
|
-
def prewarm(self) -> None:
|
202
|
-
self._pool.prewarm()
|
203
|
-
|
204
182
|
async def list_voices(self) -> List[Voice]:
|
205
183
|
async with self._ensure_session().get(
|
206
184
|
f"{self._opts.base_url}/voices",
|
@@ -242,7 +220,12 @@ class TTS(tts.TTS):
|
|
242
220
|
def stream(
|
243
221
|
self, *, conn_options: Optional[APIConnectOptions] = None
|
244
222
|
) -> "SynthesizeStream":
|
245
|
-
stream = SynthesizeStream(
|
223
|
+
stream = SynthesizeStream(
|
224
|
+
tts=self,
|
225
|
+
conn_options=conn_options,
|
226
|
+
opts=self._opts,
|
227
|
+
session=self._ensure_session(),
|
228
|
+
)
|
246
229
|
self._streams.add(stream)
|
247
230
|
return stream
|
248
231
|
|
@@ -250,7 +233,6 @@ class TTS(tts.TTS):
|
|
250
233
|
for stream in list(self._streams):
|
251
234
|
await stream.aclose()
|
252
235
|
self._streams.clear()
|
253
|
-
await self._pool.aclose()
|
254
236
|
await super().aclose()
|
255
237
|
|
256
238
|
|
@@ -338,11 +320,12 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
338
320
|
self,
|
339
321
|
*,
|
340
322
|
tts: TTS,
|
341
|
-
|
323
|
+
session: aiohttp.ClientSession,
|
342
324
|
opts: _TTSOptions,
|
325
|
+
conn_options: Optional[APIConnectOptions] = None,
|
343
326
|
):
|
344
|
-
super().__init__(tts=tts)
|
345
|
-
self._opts, self.
|
327
|
+
super().__init__(tts=tts, conn_options=conn_options)
|
328
|
+
self._opts, self._session = opts, session
|
346
329
|
|
347
330
|
async def _run(self) -> None:
|
348
331
|
request_id = utils.shortuuid()
|
@@ -397,140 +380,143 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
397
380
|
word_stream: tokenize.WordStream,
|
398
381
|
request_id: str,
|
399
382
|
) -> None:
|
400
|
-
|
401
|
-
|
402
|
-
|
383
|
+
ws_conn = await self._session.ws_connect(
|
384
|
+
_stream_url(self._opts),
|
385
|
+
headers={AUTHORIZATION_HEADER: self._opts.api_key},
|
386
|
+
)
|
403
387
|
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
388
|
+
segment_id = utils.shortuuid()
|
389
|
+
decoder = utils.codecs.AudioStreamDecoder(
|
390
|
+
sample_rate=self._opts.sample_rate,
|
391
|
+
num_channels=1,
|
392
|
+
)
|
408
393
|
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
394
|
+
# 11labs protocol expects the first message to be an "init msg"
|
395
|
+
init_pkt = dict(
|
396
|
+
text=" ",
|
397
|
+
voice_settings=_strip_nones(dataclasses.asdict(self._opts.voice.settings))
|
398
|
+
if self._opts.voice.settings
|
399
|
+
else None,
|
400
|
+
generation_config=dict(
|
401
|
+
chunk_length_schedule=self._opts.chunk_length_schedule
|
402
|
+
),
|
403
|
+
)
|
404
|
+
await ws_conn.send_str(json.dumps(init_pkt))
|
405
|
+
eos_sent = False
|
406
|
+
|
407
|
+
@utils.log_exceptions(logger=logger)
|
408
|
+
async def send_task():
|
409
|
+
nonlocal eos_sent
|
410
|
+
xml_content = []
|
411
|
+
async for data in word_stream:
|
412
|
+
text = data.token
|
413
|
+
# send the xml phoneme in one go
|
414
|
+
if (
|
415
|
+
self._opts.enable_ssml_parsing
|
416
|
+
and data.token.startswith("<phoneme")
|
417
|
+
or xml_content
|
418
|
+
):
|
419
|
+
xml_content.append(text)
|
420
|
+
if data.token.find("</phoneme>") > -1:
|
421
|
+
text = self._opts.word_tokenizer.format_words(xml_content)
|
422
|
+
xml_content = []
|
423
|
+
else:
|
424
|
+
continue
|
425
|
+
|
426
|
+
data_pkt = dict(text=f"{text} ") # must always end with a space
|
427
|
+
self._mark_started()
|
428
|
+
await ws_conn.send_str(json.dumps(data_pkt))
|
429
|
+
if xml_content:
|
430
|
+
logger.warning("11labs stream ended with incomplete xml content")
|
431
|
+
|
432
|
+
# no more token, mark eos
|
433
|
+
eos_pkt = dict(text="")
|
434
|
+
await ws_conn.send_str(json.dumps(eos_pkt))
|
435
|
+
eos_sent = True
|
436
|
+
|
437
|
+
# consumes from decoder and generates events
|
438
|
+
@utils.log_exceptions(logger=logger)
|
439
|
+
async def generate_task():
|
440
|
+
emitter = tts.SynthesizedAudioEmitter(
|
441
|
+
event_ch=self._event_ch,
|
442
|
+
request_id=request_id,
|
443
|
+
segment_id=segment_id,
|
420
444
|
)
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
async def send_task():
|
425
|
-
nonlocal expected_text
|
426
|
-
xml_content = []
|
427
|
-
async for data in word_stream:
|
428
|
-
text = data.token
|
429
|
-
expected_text += text
|
430
|
-
# send the xml phoneme in one go
|
431
|
-
if (
|
432
|
-
self._opts.enable_ssml_parsing
|
433
|
-
and data.token.startswith("<phoneme")
|
434
|
-
or xml_content
|
435
|
-
):
|
436
|
-
xml_content.append(text)
|
437
|
-
if text.find("</phoneme>") > -1:
|
438
|
-
text = self._opts.word_tokenizer.format_words(xml_content)
|
439
|
-
xml_content = []
|
440
|
-
else:
|
441
|
-
continue
|
442
|
-
|
443
|
-
data_pkt = dict(text=f"{text} ") # must always end with a space
|
444
|
-
self._mark_started()
|
445
|
-
await ws_conn.send_str(json.dumps(data_pkt))
|
446
|
-
if xml_content:
|
447
|
-
logger.warning("11labs stream ended with incomplete xml content")
|
448
|
-
await ws_conn.send_str(json.dumps({"flush": True}))
|
449
|
-
|
450
|
-
# consumes from decoder and generates events
|
451
|
-
@utils.log_exceptions(logger=logger)
|
452
|
-
async def generate_task():
|
453
|
-
emitter = tts.SynthesizedAudioEmitter(
|
454
|
-
event_ch=self._event_ch,
|
455
|
-
request_id=request_id,
|
456
|
-
segment_id=segment_id,
|
457
|
-
)
|
458
|
-
async for frame in decoder:
|
459
|
-
emitter.push(frame)
|
460
|
-
emitter.flush()
|
445
|
+
async for frame in decoder:
|
446
|
+
emitter.push(frame)
|
447
|
+
emitter.flush()
|
461
448
|
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
449
|
+
# receives from ws and decodes audio
|
450
|
+
@utils.log_exceptions(logger=logger)
|
451
|
+
async def recv_task():
|
452
|
+
nonlocal eos_sent
|
453
|
+
|
454
|
+
while True:
|
455
|
+
msg = await ws_conn.receive()
|
456
|
+
if msg.type in (
|
457
|
+
aiohttp.WSMsgType.CLOSED,
|
458
|
+
aiohttp.WSMsgType.CLOSE,
|
459
|
+
aiohttp.WSMsgType.CLOSING,
|
460
|
+
):
|
461
|
+
if not eos_sent:
|
475
462
|
raise APIStatusError(
|
476
463
|
"11labs connection closed unexpectedly, not all tokens have been consumed",
|
477
464
|
request_id=request_id,
|
478
465
|
)
|
466
|
+
return
|
479
467
|
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
status_code=500,
|
507
|
-
request_id=request_id,
|
508
|
-
body=None,
|
509
|
-
)
|
468
|
+
if msg.type != aiohttp.WSMsgType.TEXT:
|
469
|
+
logger.warning("unexpected 11labs message type %s", msg.type)
|
470
|
+
continue
|
471
|
+
|
472
|
+
data = json.loads(msg.data)
|
473
|
+
if data.get("audio"):
|
474
|
+
b64data = base64.b64decode(data["audio"])
|
475
|
+
decoder.push(b64data)
|
476
|
+
|
477
|
+
elif data.get("isFinal"):
|
478
|
+
decoder.end_input()
|
479
|
+
break
|
480
|
+
elif data.get("error"):
|
481
|
+
raise APIStatusError(
|
482
|
+
message=data["error"],
|
483
|
+
status_code=500,
|
484
|
+
request_id=request_id,
|
485
|
+
body=None,
|
486
|
+
)
|
487
|
+
else:
|
488
|
+
raise APIStatusError(
|
489
|
+
message=f"unexpected 11labs message {data}",
|
490
|
+
status_code=500,
|
491
|
+
request_id=request_id,
|
492
|
+
body=None,
|
493
|
+
)
|
510
494
|
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
495
|
+
tasks = [
|
496
|
+
asyncio.create_task(send_task()),
|
497
|
+
asyncio.create_task(recv_task()),
|
498
|
+
asyncio.create_task(generate_task()),
|
499
|
+
]
|
500
|
+
try:
|
501
|
+
await asyncio.gather(*tasks)
|
502
|
+
except asyncio.TimeoutError as e:
|
503
|
+
raise APITimeoutError() from e
|
504
|
+
except aiohttp.ClientResponseError as e:
|
505
|
+
raise APIStatusError(
|
506
|
+
message=e.message,
|
507
|
+
status_code=e.status,
|
508
|
+
request_id=request_id,
|
509
|
+
body=None,
|
510
|
+
) from e
|
511
|
+
except APIStatusError:
|
512
|
+
raise
|
513
|
+
except Exception as e:
|
514
|
+
raise APIConnectionError() from e
|
515
|
+
finally:
|
516
|
+
await utils.aio.gracefully_cancel(*tasks)
|
517
|
+
await decoder.aclose()
|
518
|
+
if ws_conn is not None:
|
519
|
+
await ws_conn.close()
|
534
520
|
|
535
521
|
|
536
522
|
def _dict_to_voices_list(data: dict[str, Any]):
|
@@ -0,0 +1,10 @@
|
|
1
|
+
livekit/plugins/elevenlabs/__init__.py,sha256=YZVadomFq3JWiZN6GWXJbuE4vaNNWq1CmdH25du8qwg,1249
|
2
|
+
livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
|
3
|
+
livekit/plugins/elevenlabs/models.py,sha256=nB43wLS1ilzS7IxLYVSQxBjKPnbiPl4AHpHAOlG2i00,273
|
4
|
+
livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
livekit/plugins/elevenlabs/tts.py,sha256=-w8IeAvyQER4PLraajJz6OWDufvKpD_fPM8oPsYtX9s,19335
|
6
|
+
livekit/plugins/elevenlabs/version.py,sha256=PoHw-_DNE2B5SpeoQ-r6HSfVmbDgYuGamg0dN2jhayQ,600
|
7
|
+
livekit_plugins_elevenlabs-0.8.1.dist-info/METADATA,sha256=l8gbEDr8EsedqYQiqBhx6K9XwAdTtnQWVCxmlyjVG9w,1529
|
8
|
+
livekit_plugins_elevenlabs-0.8.1.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
|
9
|
+
livekit_plugins_elevenlabs-0.8.1.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
|
10
|
+
livekit_plugins_elevenlabs-0.8.1.dist-info/RECORD,,
|
@@ -1,10 +0,0 @@
|
|
1
|
-
livekit/plugins/elevenlabs/__init__.py,sha256=YZVadomFq3JWiZN6GWXJbuE4vaNNWq1CmdH25du8qwg,1249
|
2
|
-
livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
|
3
|
-
livekit/plugins/elevenlabs/models.py,sha256=nB43wLS1ilzS7IxLYVSQxBjKPnbiPl4AHpHAOlG2i00,273
|
4
|
-
livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
-
livekit/plugins/elevenlabs/tts.py,sha256=KCZnuAngDZck4zIMMgp0BLV0GS31kKChMvdvXUVZ8vY,20491
|
6
|
-
livekit/plugins/elevenlabs/version.py,sha256=fObgfvFfJb5Vj0qY1hgEiVKSo6z6atjrJvwAVl4KvR4,600
|
7
|
-
livekit_plugins_elevenlabs-0.8.0.dist-info/METADATA,sha256=BwddENtvF9zqxTgjgIsHyavyRfA82TBISYEVwFfo2vs,1529
|
8
|
-
livekit_plugins_elevenlabs-0.8.0.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
9
|
-
livekit_plugins_elevenlabs-0.8.0.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
|
10
|
-
livekit_plugins_elevenlabs-0.8.0.dist-info/RECORD,,
|
File without changes
|