livekit-plugins-elevenlabs 0.8.1__py3-none-any.whl → 1.0.0.dev4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,9 +21,10 @@ import json
21
21
  import os
22
22
  import weakref
23
23
  from dataclasses import dataclass
24
- from typing import Any, List, Optional
24
+ from typing import Any
25
25
 
26
26
  import aiohttp
27
+
27
28
  from livekit.agents import (
28
29
  APIConnectionError,
29
30
  APIConnectOptions,
@@ -106,9 +107,9 @@ class TTS(tts.TTS):
106
107
  base_url: str | None = None,
107
108
  streaming_latency: int = 0,
108
109
  inactivity_timeout: int = WS_INACTIVITY_TIMEOUT,
109
- word_tokenizer: Optional[tokenize.WordTokenizer] = None,
110
+ word_tokenizer: tokenize.WordTokenizer | None = None,
110
111
  enable_ssml_parsing: bool = False,
111
- chunk_length_schedule: list[int] = [80, 120, 200, 260], # range is [50, 500]
112
+ chunk_length_schedule: list[int] = None, # range is [50, 500]
112
113
  http_session: aiohttp.ClientSession | None = None,
113
114
  # deprecated
114
115
  model_id: TTSModels | str | None = None,
@@ -131,6 +132,8 @@ class TTS(tts.TTS):
131
132
  language (str | None): Language code for the TTS model, as of 10/24/24 only valid for "eleven_turbo_v2_5". Optional.
132
133
  """
133
134
 
135
+ if chunk_length_schedule is None:
136
+ chunk_length_schedule = [80, 120, 200, 260]
134
137
  super().__init__(
135
138
  capabilities=tts.TTSCapabilities(
136
139
  streaming=True,
@@ -171,15 +174,37 @@ class TTS(tts.TTS):
171
174
  inactivity_timeout=inactivity_timeout,
172
175
  )
173
176
  self._session = http_session
177
+ self._pool = utils.ConnectionPool[aiohttp.ClientWebSocketResponse](
178
+ connect_cb=self._connect_ws,
179
+ close_cb=self._close_ws,
180
+ max_session_duration=inactivity_timeout,
181
+ mark_refreshed_on_get=True,
182
+ )
174
183
  self._streams = weakref.WeakSet[SynthesizeStream]()
175
184
 
185
+ async def _connect_ws(self) -> aiohttp.ClientWebSocketResponse:
186
+ session = self._ensure_session()
187
+ return await asyncio.wait_for(
188
+ session.ws_connect(
189
+ _stream_url(self._opts),
190
+ headers={AUTHORIZATION_HEADER: self._opts.api_key},
191
+ ),
192
+ self._conn_options.timeout,
193
+ )
194
+
195
+ async def _close_ws(self, ws: aiohttp.ClientWebSocketResponse):
196
+ await ws.close()
197
+
176
198
  def _ensure_session(self) -> aiohttp.ClientSession:
177
199
  if not self._session:
178
200
  self._session = utils.http_context.http_session()
179
201
 
180
202
  return self._session
181
203
 
182
- async def list_voices(self) -> List[Voice]:
204
+ def prewarm(self) -> None:
205
+ self._pool.prewarm()
206
+
207
+ async def list_voices(self) -> list[Voice]:
183
208
  async with self._ensure_session().get(
184
209
  f"{self._opts.base_url}/voices",
185
210
  headers={AUTHORIZATION_HEADER: self._opts.api_key},
@@ -207,8 +232,8 @@ class TTS(tts.TTS):
207
232
  self,
208
233
  text: str,
209
234
  *,
210
- conn_options: Optional[APIConnectOptions] = None,
211
- ) -> "ChunkedStream":
235
+ conn_options: APIConnectOptions | None = None,
236
+ ) -> ChunkedStream:
212
237
  return ChunkedStream(
213
238
  tts=self,
214
239
  input_text=text,
@@ -217,15 +242,8 @@ class TTS(tts.TTS):
217
242
  session=self._ensure_session(),
218
243
  )
219
244
 
220
- def stream(
221
- self, *, conn_options: Optional[APIConnectOptions] = None
222
- ) -> "SynthesizeStream":
223
- stream = SynthesizeStream(
224
- tts=self,
225
- conn_options=conn_options,
226
- opts=self._opts,
227
- session=self._ensure_session(),
228
- )
245
+ def stream(self, *, conn_options: APIConnectOptions | None = None) -> SynthesizeStream:
246
+ stream = SynthesizeStream(tts=self, pool=self._pool, opts=self._opts)
229
247
  self._streams.add(stream)
230
248
  return stream
231
249
 
@@ -233,6 +251,7 @@ class TTS(tts.TTS):
233
251
  for stream in list(self._streams):
234
252
  await stream.aclose()
235
253
  self._streams.clear()
254
+ await self._pool.aclose()
236
255
  await super().aclose()
237
256
 
238
257
 
@@ -245,7 +264,7 @@ class ChunkedStream(tts.ChunkedStream):
245
264
  tts: TTS,
246
265
  input_text: str,
247
266
  opts: _TTSOptions,
248
- conn_options: Optional[APIConnectOptions] = None,
267
+ conn_options: APIConnectOptions | None = None,
249
268
  session: aiohttp.ClientSession,
250
269
  ) -> None:
251
270
  super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
@@ -320,12 +339,11 @@ class SynthesizeStream(tts.SynthesizeStream):
320
339
  self,
321
340
  *,
322
341
  tts: TTS,
323
- session: aiohttp.ClientSession,
342
+ pool: utils.ConnectionPool[aiohttp.ClientWebSocketResponse],
324
343
  opts: _TTSOptions,
325
- conn_options: Optional[APIConnectOptions] = None,
326
344
  ):
327
- super().__init__(tts=tts, conn_options=conn_options)
328
- self._opts, self._session = opts, session
345
+ super().__init__(tts=tts)
346
+ self._opts, self._pool = opts, pool
329
347
 
330
348
  async def _run(self) -> None:
331
349
  request_id = utils.shortuuid()
@@ -380,147 +398,138 @@ class SynthesizeStream(tts.SynthesizeStream):
380
398
  word_stream: tokenize.WordStream,
381
399
  request_id: str,
382
400
  ) -> None:
383
- ws_conn = await self._session.ws_connect(
384
- _stream_url(self._opts),
385
- headers={AUTHORIZATION_HEADER: self._opts.api_key},
386
- )
387
-
388
- segment_id = utils.shortuuid()
389
- decoder = utils.codecs.AudioStreamDecoder(
390
- sample_rate=self._opts.sample_rate,
391
- num_channels=1,
392
- )
393
-
394
- # 11labs protocol expects the first message to be an "init msg"
395
- init_pkt = dict(
396
- text=" ",
397
- voice_settings=_strip_nones(dataclasses.asdict(self._opts.voice.settings))
398
- if self._opts.voice.settings
399
- else None,
400
- generation_config=dict(
401
- chunk_length_schedule=self._opts.chunk_length_schedule
402
- ),
403
- )
404
- await ws_conn.send_str(json.dumps(init_pkt))
405
- eos_sent = False
406
-
407
- @utils.log_exceptions(logger=logger)
408
- async def send_task():
409
- nonlocal eos_sent
410
- xml_content = []
411
- async for data in word_stream:
412
- text = data.token
413
- # send the xml phoneme in one go
414
- if (
415
- self._opts.enable_ssml_parsing
416
- and data.token.startswith("<phoneme")
417
- or xml_content
418
- ):
419
- xml_content.append(text)
420
- if data.token.find("</phoneme>") > -1:
421
- text = self._opts.word_tokenizer.format_words(xml_content)
422
- xml_content = []
423
- else:
424
- continue
425
-
426
- data_pkt = dict(text=f"{text} ") # must always end with a space
427
- self._mark_started()
428
- await ws_conn.send_str(json.dumps(data_pkt))
429
- if xml_content:
430
- logger.warning("11labs stream ended with incomplete xml content")
431
-
432
- # no more token, mark eos
433
- eos_pkt = dict(text="")
434
- await ws_conn.send_str(json.dumps(eos_pkt))
435
- eos_sent = True
401
+ async with self._pool.connection() as ws_conn:
402
+ segment_id = utils.shortuuid()
403
+ expected_text = "" # accumulate all tokens sent
436
404
 
437
- # consumes from decoder and generates events
438
- @utils.log_exceptions(logger=logger)
439
- async def generate_task():
440
- emitter = tts.SynthesizedAudioEmitter(
441
- event_ch=self._event_ch,
442
- request_id=request_id,
443
- segment_id=segment_id,
405
+ decoder = utils.codecs.AudioStreamDecoder(
406
+ sample_rate=self._opts.sample_rate,
407
+ num_channels=1,
444
408
  )
445
- async for frame in decoder:
446
- emitter.push(frame)
447
- emitter.flush()
448
409
 
449
- # receives from ws and decodes audio
450
- @utils.log_exceptions(logger=logger)
451
- async def recv_task():
452
- nonlocal eos_sent
453
-
454
- while True:
455
- msg = await ws_conn.receive()
456
- if msg.type in (
457
- aiohttp.WSMsgType.CLOSED,
458
- aiohttp.WSMsgType.CLOSE,
459
- aiohttp.WSMsgType.CLOSING,
460
- ):
461
- if not eos_sent:
410
+ # 11labs protocol expects the first message to be an "init msg"
411
+ init_pkt = {
412
+ "text": " ",
413
+ "voice_settings": _strip_nones(dataclasses.asdict(self._opts.voice.settings))
414
+ if self._opts.voice.settings
415
+ else None,
416
+ "generation_config": {"chunk_length_schedule": self._opts.chunk_length_schedule},
417
+ }
418
+ await ws_conn.send_str(json.dumps(init_pkt))
419
+
420
+ @utils.log_exceptions(logger=logger)
421
+ async def send_task():
422
+ nonlocal expected_text
423
+ xml_content = []
424
+ async for data in word_stream:
425
+ text = data.token
426
+ expected_text += text
427
+ # send the xml phoneme in one go
428
+ if (
429
+ self._opts.enable_ssml_parsing
430
+ and data.token.startswith("<phoneme")
431
+ or xml_content
432
+ ):
433
+ xml_content.append(text)
434
+ if text.find("</phoneme>") > -1:
435
+ text = self._opts.word_tokenizer.format_words(xml_content)
436
+ xml_content = []
437
+ else:
438
+ continue
439
+
440
+ data_pkt = {"text": f"{text} "} # must always end with a space
441
+ self._mark_started()
442
+ await ws_conn.send_str(json.dumps(data_pkt))
443
+ if xml_content:
444
+ logger.warning("11labs stream ended with incomplete xml content")
445
+ await ws_conn.send_str(json.dumps({"flush": True}))
446
+
447
+ # consumes from decoder and generates events
448
+ @utils.log_exceptions(logger=logger)
449
+ async def generate_task():
450
+ emitter = tts.SynthesizedAudioEmitter(
451
+ event_ch=self._event_ch,
452
+ request_id=request_id,
453
+ segment_id=segment_id,
454
+ )
455
+ async for frame in decoder:
456
+ emitter.push(frame)
457
+ emitter.flush()
458
+
459
+ # receives from ws and decodes audio
460
+ @utils.log_exceptions(logger=logger)
461
+ async def recv_task():
462
+ nonlocal expected_text
463
+ received_text = ""
464
+
465
+ while True:
466
+ msg = await ws_conn.receive()
467
+ if msg.type in (
468
+ aiohttp.WSMsgType.CLOSED,
469
+ aiohttp.WSMsgType.CLOSE,
470
+ aiohttp.WSMsgType.CLOSING,
471
+ ):
462
472
  raise APIStatusError(
463
473
  "11labs connection closed unexpectedly, not all tokens have been consumed",
464
474
  request_id=request_id,
465
475
  )
466
- return
467
476
 
468
- if msg.type != aiohttp.WSMsgType.TEXT:
469
- logger.warning("unexpected 11labs message type %s", msg.type)
470
- continue
471
-
472
- data = json.loads(msg.data)
473
- if data.get("audio"):
474
- b64data = base64.b64decode(data["audio"])
475
- decoder.push(b64data)
476
-
477
- elif data.get("isFinal"):
478
- decoder.end_input()
479
- break
480
- elif data.get("error"):
481
- raise APIStatusError(
482
- message=data["error"],
483
- status_code=500,
484
- request_id=request_id,
485
- body=None,
486
- )
487
- else:
488
- raise APIStatusError(
489
- message=f"unexpected 11labs message {data}",
490
- status_code=500,
491
- request_id=request_id,
492
- body=None,
493
- )
477
+ if msg.type != aiohttp.WSMsgType.TEXT:
478
+ logger.warning("unexpected 11labs message type %s", msg.type)
479
+ continue
494
480
 
495
- tasks = [
496
- asyncio.create_task(send_task()),
497
- asyncio.create_task(recv_task()),
498
- asyncio.create_task(generate_task()),
499
- ]
500
- try:
501
- await asyncio.gather(*tasks)
502
- except asyncio.TimeoutError as e:
503
- raise APITimeoutError() from e
504
- except aiohttp.ClientResponseError as e:
505
- raise APIStatusError(
506
- message=e.message,
507
- status_code=e.status,
508
- request_id=request_id,
509
- body=None,
510
- ) from e
511
- except APIStatusError:
512
- raise
513
- except Exception as e:
514
- raise APIConnectionError() from e
515
- finally:
516
- await utils.aio.gracefully_cancel(*tasks)
517
- await decoder.aclose()
518
- if ws_conn is not None:
519
- await ws_conn.close()
481
+ data = json.loads(msg.data)
482
+ if data.get("audio"):
483
+ b64data = base64.b64decode(data["audio"])
484
+ decoder.push(b64data)
485
+
486
+ if alignment := data.get("normalizedAlignment"):
487
+ received_text += "".join(alignment.get("chars", [])).replace(" ", "")
488
+ if received_text == expected_text:
489
+ decoder.end_input()
490
+ break
491
+ elif data.get("error"):
492
+ raise APIStatusError(
493
+ message=data["error"],
494
+ status_code=500,
495
+ request_id=request_id,
496
+ body=None,
497
+ )
498
+ else:
499
+ raise APIStatusError(
500
+ message=f"unexpected 11labs message {data}",
501
+ status_code=500,
502
+ request_id=request_id,
503
+ body=None,
504
+ )
505
+
506
+ tasks = [
507
+ asyncio.create_task(send_task()),
508
+ asyncio.create_task(recv_task()),
509
+ asyncio.create_task(generate_task()),
510
+ ]
511
+ try:
512
+ await asyncio.gather(*tasks)
513
+ except asyncio.TimeoutError as e:
514
+ raise APITimeoutError() from e
515
+ except aiohttp.ClientResponseError as e:
516
+ raise APIStatusError(
517
+ message=e.message,
518
+ status_code=e.status,
519
+ request_id=request_id,
520
+ body=None,
521
+ ) from e
522
+ except APIStatusError:
523
+ raise
524
+ except Exception as e:
525
+ raise APIConnectionError() from e
526
+ finally:
527
+ await utils.aio.gracefully_cancel(*tasks)
528
+ await decoder.aclose()
520
529
 
521
530
 
522
531
  def _dict_to_voices_list(data: dict[str, Any]):
523
- voices: List[Voice] = []
532
+ voices: list[Voice] = []
524
533
  for voice in data["voices"]:
525
534
  voices.append(
526
535
  Voice(
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "0.8.1"
15
+ __version__ = "1.0.0.dev4"
@@ -1,35 +1,25 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: livekit-plugins-elevenlabs
3
- Version: 0.8.1
3
+ Version: 1.0.0.dev4
4
4
  Summary: Agent Framework plugin for voice synthesis with ElevenLabs' API.
5
- Home-page: https://github.com/livekit/agents
6
- License: Apache-2.0
7
5
  Project-URL: Documentation, https://docs.livekit.io
8
6
  Project-URL: Website, https://livekit.io/
9
7
  Project-URL: Source, https://github.com/livekit/agents
10
- Keywords: webrtc,realtime,audio,video,livekit,elevenlabs
8
+ Author-email: LiveKit <support@livekit.io>
9
+ License-Expression: Apache-2.0
10
+ Keywords: audio,elevenlabs,livekit,realtime,video,webrtc
11
11
  Classifier: Intended Audience :: Developers
12
12
  Classifier: License :: OSI Approved :: Apache Software License
13
- Classifier: Topic :: Multimedia :: Sound/Audio
14
- Classifier: Topic :: Multimedia :: Video
15
- Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
16
13
  Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3 :: Only
17
15
  Classifier: Programming Language :: Python :: 3.9
18
16
  Classifier: Programming Language :: Python :: 3.10
19
- Classifier: Programming Language :: Python :: 3 :: Only
17
+ Classifier: Topic :: Multimedia :: Sound/Audio
18
+ Classifier: Topic :: Multimedia :: Video
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
20
  Requires-Python: >=3.9.0
21
+ Requires-Dist: livekit-agents>=1.0.0.dev4
21
22
  Description-Content-Type: text/markdown
22
- Requires-Dist: livekit-agents[codecs]<1.0.0,>=0.12.16
23
- Dynamic: classifier
24
- Dynamic: description
25
- Dynamic: description-content-type
26
- Dynamic: home-page
27
- Dynamic: keywords
28
- Dynamic: license
29
- Dynamic: project-url
30
- Dynamic: requires-dist
31
- Dynamic: requires-python
32
- Dynamic: summary
33
23
 
34
24
  # LiveKit Plugins Elevenlabs
35
25
 
@@ -0,0 +1,9 @@
1
+ livekit/plugins/elevenlabs/__init__.py,sha256=YZVadomFq3JWiZN6GWXJbuE4vaNNWq1CmdH25du8qwg,1249
2
+ livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
3
+ livekit/plugins/elevenlabs/models.py,sha256=nB43wLS1ilzS7IxLYVSQxBjKPnbiPl4AHpHAOlG2i00,273
4
+ livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ livekit/plugins/elevenlabs/tts.py,sha256=eJ66yP3ta2FH0LgQ64wHdjOHEoavwguOg6GeaMIr9IU,20394
6
+ livekit/plugins/elevenlabs/version.py,sha256=koM_bT4QbztrKQ60Gjg7V4oe99CuxgGcpuUtWMOEKqU,605
7
+ livekit_plugins_elevenlabs-1.0.0.dev4.dist-info/METADATA,sha256=1YSGTLIaJURkWYEOIl2LqZLdgU3y1KFM4YvGvd8s4G8,1316
8
+ livekit_plugins_elevenlabs-1.0.0.dev4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
9
+ livekit_plugins_elevenlabs-1.0.0.dev4.dist-info/RECORD,,
@@ -1,5 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (76.1.0)
2
+ Generator: hatchling 1.27.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
-
@@ -1,10 +0,0 @@
1
- livekit/plugins/elevenlabs/__init__.py,sha256=YZVadomFq3JWiZN6GWXJbuE4vaNNWq1CmdH25du8qwg,1249
2
- livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
3
- livekit/plugins/elevenlabs/models.py,sha256=nB43wLS1ilzS7IxLYVSQxBjKPnbiPl4AHpHAOlG2i00,273
4
- livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- livekit/plugins/elevenlabs/tts.py,sha256=-w8IeAvyQER4PLraajJz6OWDufvKpD_fPM8oPsYtX9s,19335
6
- livekit/plugins/elevenlabs/version.py,sha256=PoHw-_DNE2B5SpeoQ-r6HSfVmbDgYuGamg0dN2jhayQ,600
7
- livekit_plugins_elevenlabs-0.8.1.dist-info/METADATA,sha256=l8gbEDr8EsedqYQiqBhx6K9XwAdTtnQWVCxmlyjVG9w,1529
8
- livekit_plugins_elevenlabs-0.8.1.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
9
- livekit_plugins_elevenlabs-0.8.1.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
10
- livekit_plugins_elevenlabs-0.8.1.dist-info/RECORD,,
@@ -1 +0,0 @@
1
- livekit