livekit-plugins-elevenlabs 0.7.14__py3-none-any.whl → 0.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,14 +10,4 @@ TTSModels = Literal[
10
10
  "eleven_flash_v2",
11
11
  ]
12
12
 
13
- TTSEncoding = Literal[
14
- "mp3_22050_32",
15
- "mp3_44100_32",
16
- "mp3_44100_64",
17
- "mp3_44100_96",
18
- "mp3_44100_128",
19
- "mp3_44100_192",
20
- "pcm_16000",
21
- "pcm_22050",
22
- "pcm_44100",
23
- ]
13
+ TTSEncoding = Literal["mp3_44100",]
@@ -21,10 +21,9 @@ import json
21
21
  import os
22
22
  import weakref
23
23
  from dataclasses import dataclass
24
- from typing import Any, List, Literal, Optional
24
+ from typing import Any, List, Optional
25
25
 
26
26
  import aiohttp
27
- from livekit import rtc
28
27
  from livekit.agents import (
29
28
  APIConnectionError,
30
29
  APIConnectOptions,
@@ -38,28 +37,20 @@ from livekit.agents import (
38
37
  from .log import logger
39
38
  from .models import TTSEncoding, TTSModels
40
39
 
41
- _Encoding = Literal["mp3", "pcm"]
40
+ _DefaultEncoding: TTSEncoding = "mp3_44100"
42
41
 
43
42
 
44
43
  def _sample_rate_from_format(output_format: TTSEncoding) -> int:
45
- split = output_format.split("_") # e.g: mp3_22050_32
44
+ split = output_format.split("_") # e.g: mp3_44100
46
45
  return int(split[1])
47
46
 
48
47
 
49
- def _encoding_from_format(output_format: TTSEncoding) -> _Encoding:
50
- if output_format.startswith("mp3"):
51
- return "mp3"
52
- elif output_format.startswith("pcm"):
53
- return "pcm"
54
-
55
- raise ValueError(f"Unknown format: {output_format}")
56
-
57
-
58
48
  @dataclass
59
49
  class VoiceSettings:
60
50
  stability: float # [0.0 - 1.0]
61
51
  similarity_boost: float # [0.0 - 1.0]
62
52
  style: float | None = None # [0.0 - 1.0]
53
+ speed: float | None = 1.0 # [0.8 - 1.2]
63
54
  use_speaker_boost: bool | None = False
64
55
 
65
56
 
@@ -76,12 +67,17 @@ DEFAULT_VOICE = Voice(
76
67
  name="Bella",
77
68
  category="premade",
78
69
  settings=VoiceSettings(
79
- stability=0.71, similarity_boost=0.5, style=0.0, use_speaker_boost=True
70
+ stability=0.71,
71
+ speed=1.0,
72
+ similarity_boost=0.5,
73
+ style=0.0,
74
+ use_speaker_boost=True,
80
75
  ),
81
76
  )
82
77
 
83
78
  API_BASE_URL_V1 = "https://api.elevenlabs.io/v1"
84
79
  AUTHORIZATION_HEADER = "xi-api-key"
80
+ WS_INACTIVITY_TIMEOUT = 300
85
81
 
86
82
 
87
83
  @dataclass
@@ -97,6 +93,7 @@ class _TTSOptions:
97
93
  word_tokenizer: tokenize.WordTokenizer
98
94
  chunk_length_schedule: list[int]
99
95
  enable_ssml_parsing: bool
96
+ inactivity_timeout: int
100
97
 
101
98
 
102
99
  class TTS(tts.TTS):
@@ -107,11 +104,9 @@ class TTS(tts.TTS):
107
104
  model: TTSModels | str = "eleven_flash_v2_5",
108
105
  api_key: str | None = None,
109
106
  base_url: str | None = None,
110
- encoding: TTSEncoding = "mp3_22050_32",
111
- streaming_latency: int = 3,
112
- word_tokenizer: tokenize.WordTokenizer = tokenize.basic.WordTokenizer(
113
- ignore_punctuation=False # punctuation can help for intonation
114
- ),
107
+ streaming_latency: int = 0,
108
+ inactivity_timeout: int = WS_INACTIVITY_TIMEOUT,
109
+ word_tokenizer: Optional[tokenize.WordTokenizer] = None,
115
110
  enable_ssml_parsing: bool = False,
116
111
  chunk_length_schedule: list[int] = [80, 120, 200, 260], # range is [50, 500]
117
112
  http_session: aiohttp.ClientSession | None = None,
@@ -127,8 +122,8 @@ class TTS(tts.TTS):
127
122
  model (TTSModels | str): TTS model to use. Defaults to "eleven_turbo_v2_5".
128
123
  api_key (str | None): ElevenLabs API key. Can be set via argument or `ELEVEN_API_KEY` environment variable.
129
124
  base_url (str | None): Custom base URL for the API. Optional.
130
- encoding (TTSEncoding): Audio encoding format. Defaults to "mp3_22050_32".
131
- streaming_latency (int): Latency in seconds for streaming. Defaults to 3.
125
+ streaming_latency (int): Optimize for streaming latency, defaults to 0 - disabled. 4 for max latency optimizations. deprecated
126
+ inactivity_timeout (int): Inactivity timeout in seconds for the websocket connection. Defaults to 300.
132
127
  word_tokenizer (tokenize.WordTokenizer): Tokenizer for processing text. Defaults to basic WordTokenizer.
133
128
  enable_ssml_parsing (bool): Enable SSML parsing for input text. Defaults to False.
134
129
  chunk_length_schedule (list[int]): Schedule for chunk lengths, ranging from 50 to 500. Defaults to [80, 120, 200, 260].
@@ -140,7 +135,7 @@ class TTS(tts.TTS):
140
135
  capabilities=tts.TTSCapabilities(
141
136
  streaming=True,
142
137
  ),
143
- sample_rate=_sample_rate_from_format(encoding),
138
+ sample_rate=_sample_rate_from_format(_DefaultEncoding),
144
139
  num_channels=1,
145
140
  )
146
141
 
@@ -156,39 +151,28 @@ class TTS(tts.TTS):
156
151
  "ElevenLabs API key is required, either as argument or set ELEVEN_API_KEY environmental variable"
157
152
  )
158
153
 
154
+ if word_tokenizer is None:
155
+ word_tokenizer = tokenize.basic.WordTokenizer(
156
+ ignore_punctuation=False # punctuation can help for intonation
157
+ )
158
+
159
159
  self._opts = _TTSOptions(
160
160
  voice=voice,
161
161
  model=model,
162
162
  api_key=api_key,
163
163
  base_url=base_url or API_BASE_URL_V1,
164
- encoding=encoding,
164
+ encoding=_DefaultEncoding,
165
165
  sample_rate=self.sample_rate,
166
166
  streaming_latency=streaming_latency,
167
167
  word_tokenizer=word_tokenizer,
168
168
  chunk_length_schedule=chunk_length_schedule,
169
169
  enable_ssml_parsing=enable_ssml_parsing,
170
170
  language=language,
171
+ inactivity_timeout=inactivity_timeout,
171
172
  )
172
173
  self._session = http_session
173
- self._pool = utils.ConnectionPool[aiohttp.ClientWebSocketResponse](
174
- connect_cb=self._connect_ws,
175
- close_cb=self._close_ws,
176
- )
177
174
  self._streams = weakref.WeakSet[SynthesizeStream]()
178
175
 
179
- async def _connect_ws(self) -> aiohttp.ClientWebSocketResponse:
180
- session = self._ensure_session()
181
- return await asyncio.wait_for(
182
- session.ws_connect(
183
- _stream_url(self._opts),
184
- headers={AUTHORIZATION_HEADER: self._opts.api_key},
185
- ),
186
- self._conn_options.timeout,
187
- )
188
-
189
- async def _close_ws(self, ws: aiohttp.ClientWebSocketResponse):
190
- await ws.close()
191
-
192
176
  def _ensure_session(self) -> aiohttp.ClientSession:
193
177
  if not self._session:
194
178
  self._session = utils.http_context.http_session()
@@ -236,7 +220,12 @@ class TTS(tts.TTS):
236
220
  def stream(
237
221
  self, *, conn_options: Optional[APIConnectOptions] = None
238
222
  ) -> "SynthesizeStream":
239
- stream = SynthesizeStream(tts=self, pool=self._pool, opts=self._opts)
223
+ stream = SynthesizeStream(
224
+ tts=self,
225
+ conn_options=conn_options,
226
+ opts=self._opts,
227
+ session=self._ensure_session(),
228
+ )
240
229
  self._streams.add(stream)
241
230
  return stream
242
231
 
@@ -244,7 +233,6 @@ class TTS(tts.TTS):
244
233
  for stream in list(self._streams):
245
234
  await stream.aclose()
246
235
  self._streams.clear()
247
- await self._pool.aclose()
248
236
  await super().aclose()
249
237
 
250
238
 
@@ -262,15 +250,9 @@ class ChunkedStream(tts.ChunkedStream):
262
250
  ) -> None:
263
251
  super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
264
252
  self._opts, self._session = opts, session
265
- if _encoding_from_format(self._opts.encoding) == "mp3":
266
- self._mp3_decoder = utils.codecs.Mp3StreamDecoder()
267
253
 
268
254
  async def _run(self) -> None:
269
255
  request_id = utils.shortuuid()
270
- bstream = utils.audio.AudioByteStream(
271
- sample_rate=self._opts.sample_rate, num_channels=1
272
- )
273
-
274
256
  voice_settings = (
275
257
  _strip_nones(dataclasses.asdict(self._opts.voice.settings))
276
258
  if self._opts.voice.settings
@@ -282,6 +264,12 @@ class ChunkedStream(tts.ChunkedStream):
282
264
  "voice_settings": voice_settings,
283
265
  }
284
266
 
267
+ decoder = utils.codecs.AudioStreamDecoder(
268
+ sample_rate=self._opts.sample_rate,
269
+ num_channels=1,
270
+ )
271
+
272
+ decode_task: asyncio.Task | None = None
285
273
  try:
286
274
  async with self._session.post(
287
275
  _synthesize_url(self._opts),
@@ -293,32 +281,21 @@ class ChunkedStream(tts.ChunkedStream):
293
281
  logger.error("11labs returned non-audio data: %s", content)
294
282
  return
295
283
 
296
- encoding = _encoding_from_format(self._opts.encoding)
297
- if encoding == "mp3":
298
- async for bytes_data, _ in resp.content.iter_chunks():
299
- for frame in self._mp3_decoder.decode_chunk(bytes_data):
300
- for frame in bstream.write(frame.data.tobytes()):
301
- self._event_ch.send_nowait(
302
- tts.SynthesizedAudio(
303
- request_id=request_id,
304
- frame=frame,
305
- )
306
- )
307
- else:
308
- async for bytes_data, _ in resp.content.iter_chunks():
309
- for frame in bstream.write(bytes_data):
310
- self._event_ch.send_nowait(
311
- tts.SynthesizedAudio(
312
- request_id=request_id,
313
- frame=frame,
314
- )
315
- )
316
-
317
- for frame in bstream.flush():
318
- self._event_ch.send_nowait(
319
- tts.SynthesizedAudio(request_id=request_id, frame=frame)
320
- )
284
+ async def _decode_loop():
285
+ try:
286
+ async for bytes_data, _ in resp.content.iter_chunks():
287
+ decoder.push(bytes_data)
288
+ finally:
289
+ decoder.end_input()
321
290
 
291
+ decode_task = asyncio.create_task(_decode_loop())
292
+ emitter = tts.SynthesizedAudioEmitter(
293
+ event_ch=self._event_ch,
294
+ request_id=request_id,
295
+ )
296
+ async for frame in decoder:
297
+ emitter.push(frame)
298
+ emitter.flush()
322
299
  except asyncio.TimeoutError as e:
323
300
  raise APITimeoutError() from e
324
301
  except aiohttp.ClientResponseError as e:
@@ -330,6 +307,10 @@ class ChunkedStream(tts.ChunkedStream):
330
307
  ) from e
331
308
  except Exception as e:
332
309
  raise APIConnectionError() from e
310
+ finally:
311
+ if decode_task:
312
+ await utils.aio.gracefully_cancel(decode_task)
313
+ await decoder.aclose()
333
314
 
334
315
 
335
316
  class SynthesizeStream(tts.SynthesizeStream):
@@ -339,12 +320,12 @@ class SynthesizeStream(tts.SynthesizeStream):
339
320
  self,
340
321
  *,
341
322
  tts: TTS,
342
- pool: utils.ConnectionPool[aiohttp.ClientWebSocketResponse],
323
+ session: aiohttp.ClientSession,
343
324
  opts: _TTSOptions,
325
+ conn_options: Optional[APIConnectOptions] = None,
344
326
  ):
345
- super().__init__(tts=tts)
346
- self._opts, self._pool = opts, pool
347
- self._mp3_decoder = utils.codecs.Mp3StreamDecoder()
327
+ super().__init__(tts=tts, conn_options=conn_options)
328
+ self._opts, self._session = opts, session
348
329
 
349
330
  async def _run(self) -> None:
350
331
  request_id = utils.shortuuid()
@@ -360,12 +341,13 @@ class SynthesizeStream(tts.SynthesizeStream):
360
341
  # new segment (after flush for e.g)
361
342
  word_stream = self._opts.word_tokenizer.stream()
362
343
  self._segments_ch.send_nowait(word_stream)
363
-
364
344
  word_stream.push_text(input)
365
345
  elif isinstance(input, self._FlushSentinel):
366
346
  if word_stream is not None:
367
347
  word_stream.end_input()
368
348
  word_stream = None
349
+ if word_stream is not None:
350
+ word_stream.end_input()
369
351
  self._segments_ch.close()
370
352
 
371
353
  @utils.log_exceptions(logger=logger)
@@ -398,144 +380,143 @@ class SynthesizeStream(tts.SynthesizeStream):
398
380
  word_stream: tokenize.WordStream,
399
381
  request_id: str,
400
382
  ) -> None:
401
- async with self._pool.connection() as ws_conn:
402
- segment_id = utils.shortuuid()
403
- expected_text = "" # accumulate all tokens sent
404
-
405
- # 11labs protocol expects the first message to be an "init msg"
406
- init_pkt = dict(
407
- text=" ",
408
- voice_settings=_strip_nones(
409
- dataclasses.asdict(self._opts.voice.settings)
410
- )
411
- if self._opts.voice.settings
412
- else None,
413
- generation_config=dict(
414
- chunk_length_schedule=self._opts.chunk_length_schedule
415
- ),
383
+ ws_conn = await self._session.ws_connect(
384
+ _stream_url(self._opts),
385
+ headers={AUTHORIZATION_HEADER: self._opts.api_key},
386
+ )
387
+
388
+ segment_id = utils.shortuuid()
389
+ decoder = utils.codecs.AudioStreamDecoder(
390
+ sample_rate=self._opts.sample_rate,
391
+ num_channels=1,
392
+ )
393
+
394
+ # 11labs protocol expects the first message to be an "init msg"
395
+ init_pkt = dict(
396
+ text=" ",
397
+ voice_settings=_strip_nones(dataclasses.asdict(self._opts.voice.settings))
398
+ if self._opts.voice.settings
399
+ else None,
400
+ generation_config=dict(
401
+ chunk_length_schedule=self._opts.chunk_length_schedule
402
+ ),
403
+ )
404
+ await ws_conn.send_str(json.dumps(init_pkt))
405
+ eos_sent = False
406
+
407
+ @utils.log_exceptions(logger=logger)
408
+ async def send_task():
409
+ nonlocal eos_sent
410
+ xml_content = []
411
+ async for data in word_stream:
412
+ text = data.token
413
+ # send the xml phoneme in one go
414
+ if (
415
+ self._opts.enable_ssml_parsing
416
+ and data.token.startswith("<phoneme")
417
+ or xml_content
418
+ ):
419
+ xml_content.append(text)
420
+ if data.token.find("</phoneme>") > -1:
421
+ text = self._opts.word_tokenizer.format_words(xml_content)
422
+ xml_content = []
423
+ else:
424
+ continue
425
+
426
+ data_pkt = dict(text=f"{text} ") # must always end with a space
427
+ self._mark_started()
428
+ await ws_conn.send_str(json.dumps(data_pkt))
429
+ if xml_content:
430
+ logger.warning("11labs stream ended with incomplete xml content")
431
+
432
+ # no more token, mark eos
433
+ eos_pkt = dict(text="")
434
+ await ws_conn.send_str(json.dumps(eos_pkt))
435
+ eos_sent = True
436
+
437
+ # consumes from decoder and generates events
438
+ @utils.log_exceptions(logger=logger)
439
+ async def generate_task():
440
+ emitter = tts.SynthesizedAudioEmitter(
441
+ event_ch=self._event_ch,
442
+ request_id=request_id,
443
+ segment_id=segment_id,
416
444
  )
417
- await ws_conn.send_str(json.dumps(init_pkt))
418
-
419
- async def send_task():
420
- nonlocal expected_text
421
- xml_content = []
422
- async for data in word_stream:
423
- text = data.token
424
- expected_text += text
425
- # send the xml phoneme in one go
426
- if (
427
- self._opts.enable_ssml_parsing
428
- and data.token.startswith("<phoneme")
429
- or xml_content
430
- ):
431
- xml_content.append(text)
432
- if text.find("</phoneme>") > -1:
433
- text = self._opts.word_tokenizer.format_words(xml_content)
434
- xml_content = []
435
- else:
436
- continue
437
-
438
- data_pkt = dict(text=f"{text} ") # must always end with a space
439
- self._mark_started()
440
- await ws_conn.send_str(json.dumps(data_pkt))
441
- if xml_content:
442
- logger.warning("11labs stream ended with incomplete xml content")
443
- await ws_conn.send_str(json.dumps({"flush": True}))
444
-
445
- async def recv_task():
446
- nonlocal expected_text
447
- received_text = ""
448
- audio_bstream = utils.audio.AudioByteStream(
449
- sample_rate=self._opts.sample_rate,
450
- num_channels=1,
451
- )
452
- last_frame: rtc.AudioFrame | None = None
453
-
454
- def _send_last_frame(*, segment_id: str, is_final: bool) -> None:
455
- nonlocal last_frame
456
- if last_frame is not None:
457
- self._event_ch.send_nowait(
458
- tts.SynthesizedAudio(
459
- request_id=request_id,
460
- segment_id=segment_id,
461
- frame=last_frame,
462
- is_final=is_final,
463
- )
464
- )
465
- last_frame = None
466
-
467
- while True:
468
- msg = await ws_conn.receive()
469
- if msg.type in (
470
- aiohttp.WSMsgType.CLOSED,
471
- aiohttp.WSMsgType.CLOSE,
472
- aiohttp.WSMsgType.CLOSING,
473
- ):
445
+ async for frame in decoder:
446
+ emitter.push(frame)
447
+ emitter.flush()
448
+
449
+ # receives from ws and decodes audio
450
+ @utils.log_exceptions(logger=logger)
451
+ async def recv_task():
452
+ nonlocal eos_sent
453
+
454
+ while True:
455
+ msg = await ws_conn.receive()
456
+ if msg.type in (
457
+ aiohttp.WSMsgType.CLOSED,
458
+ aiohttp.WSMsgType.CLOSE,
459
+ aiohttp.WSMsgType.CLOSING,
460
+ ):
461
+ if not eos_sent:
474
462
  raise APIStatusError(
475
463
  "11labs connection closed unexpectedly, not all tokens have been consumed",
476
464
  request_id=request_id,
477
465
  )
466
+ return
478
467
 
479
- if msg.type != aiohttp.WSMsgType.TEXT:
480
- logger.warning("unexpected 11labs message type %s", msg.type)
481
- continue
482
-
483
- data = json.loads(msg.data)
484
- encoding = _encoding_from_format(self._opts.encoding)
485
- if data.get("audio"):
486
- b64data = base64.b64decode(data["audio"])
487
- if encoding == "mp3":
488
- for frame in self._mp3_decoder.decode_chunk(b64data):
489
- for frame in audio_bstream.write(frame.data.tobytes()):
490
- _send_last_frame(
491
- segment_id=segment_id, is_final=False
492
- )
493
- last_frame = frame
494
- else:
495
- for frame in audio_bstream.write(b64data):
496
- _send_last_frame(segment_id=segment_id, is_final=False)
497
- last_frame = frame
498
- elif data.get("isFinal"):
499
- for frame in audio_bstream.flush():
500
- _send_last_frame(segment_id=segment_id, is_final=False)
501
- last_frame = frame
502
- _send_last_frame(segment_id=segment_id, is_final=True)
503
- break
504
- elif data.get("error"):
505
- logger.error("11labs reported an error: %s", data["error"])
506
- else:
507
- logger.error("unexpected 11labs message %s", data)
468
+ if msg.type != aiohttp.WSMsgType.TEXT:
469
+ logger.warning("unexpected 11labs message type %s", msg.type)
470
+ continue
471
+
472
+ data = json.loads(msg.data)
473
+ if data.get("audio"):
474
+ b64data = base64.b64decode(data["audio"])
475
+ decoder.push(b64data)
476
+
477
+ elif data.get("isFinal"):
478
+ decoder.end_input()
479
+ break
480
+ elif data.get("error"):
481
+ raise APIStatusError(
482
+ message=data["error"],
483
+ status_code=500,
484
+ request_id=request_id,
485
+ body=None,
486
+ )
487
+ else:
488
+ raise APIStatusError(
489
+ message=f"unexpected 11labs message {data}",
490
+ status_code=500,
491
+ request_id=request_id,
492
+ body=None,
493
+ )
508
494
 
509
- if alignment := data.get("normalizedAlignment"):
510
- received_text += "".join(alignment.get("chars", [])).replace(
511
- " ", ""
512
- )
513
- if received_text == expected_text:
514
- for frame in audio_bstream.flush():
515
- _send_last_frame(segment_id=segment_id, is_final=False)
516
- last_frame = frame
517
- _send_last_frame(segment_id=segment_id, is_final=True)
518
- break
519
-
520
- tasks = [
521
- asyncio.create_task(send_task()),
522
- asyncio.create_task(recv_task()),
523
- ]
524
- try:
525
- await asyncio.gather(*tasks)
526
- except asyncio.TimeoutError as e:
527
- raise APITimeoutError() from e
528
- except aiohttp.ClientResponseError as e:
529
- raise APIStatusError(
530
- message=e.message,
531
- status_code=e.status,
532
- request_id=request_id,
533
- body=None,
534
- ) from e
535
- except Exception as e:
536
- raise APIConnectionError() from e
537
- finally:
538
- await utils.aio.gracefully_cancel(*tasks)
495
+ tasks = [
496
+ asyncio.create_task(send_task()),
497
+ asyncio.create_task(recv_task()),
498
+ asyncio.create_task(generate_task()),
499
+ ]
500
+ try:
501
+ await asyncio.gather(*tasks)
502
+ except asyncio.TimeoutError as e:
503
+ raise APITimeoutError() from e
504
+ except aiohttp.ClientResponseError as e:
505
+ raise APIStatusError(
506
+ message=e.message,
507
+ status_code=e.status,
508
+ request_id=request_id,
509
+ body=None,
510
+ ) from e
511
+ except APIStatusError:
512
+ raise
513
+ except Exception as e:
514
+ raise APIConnectionError() from e
515
+ finally:
516
+ await utils.aio.gracefully_cancel(*tasks)
517
+ await decoder.aclose()
518
+ if ws_conn is not None:
519
+ await ws_conn.close()
539
520
 
540
521
 
541
522
  def _dict_to_voices_list(data: dict[str, Any]):
@@ -561,11 +542,13 @@ def _synthesize_url(opts: _TTSOptions) -> str:
561
542
  voice_id = opts.voice.id
562
543
  model_id = opts.model
563
544
  output_format = opts.encoding
564
- latency = opts.streaming_latency
565
- return (
545
+ url = (
566
546
  f"{base_url}/text-to-speech/{voice_id}/stream?"
567
- f"model_id={model_id}&output_format={output_format}&optimize_streaming_latency={latency}"
547
+ f"model_id={model_id}&output_format={output_format}"
568
548
  )
549
+ if opts.streaming_latency:
550
+ url += f"&optimize_streaming_latency={opts.streaming_latency}"
551
+ return url
569
552
 
570
553
 
571
554
  def _stream_url(opts: _TTSOptions) -> str:
@@ -573,14 +556,16 @@ def _stream_url(opts: _TTSOptions) -> str:
573
556
  voice_id = opts.voice.id
574
557
  model_id = opts.model
575
558
  output_format = opts.encoding
576
- latency = opts.streaming_latency
577
559
  enable_ssml = str(opts.enable_ssml_parsing).lower()
578
560
  language = opts.language
561
+ inactivity_timeout = opts.inactivity_timeout
579
562
  url = (
580
563
  f"{base_url}/text-to-speech/{voice_id}/stream-input?"
581
- f"model_id={model_id}&output_format={output_format}&optimize_streaming_latency={latency}&"
582
- f"enable_ssml_parsing={enable_ssml}"
564
+ f"model_id={model_id}&output_format={output_format}&"
565
+ f"enable_ssml_parsing={enable_ssml}&inactivity_timeout={inactivity_timeout}"
583
566
  )
584
567
  if language is not None:
585
568
  url += f"&language_code={language}"
569
+ if opts.streaming_latency:
570
+ url += f"&optimize_streaming_latency={opts.streaming_latency}"
586
571
  return url
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "0.7.14"
15
+ __version__ = "0.8.1"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: livekit-plugins-elevenlabs
3
- Version: 0.7.14
3
+ Version: 0.8.1
4
4
  Summary: Agent Framework plugin for voice synthesis with ElevenLabs' API.
5
5
  Home-page: https://github.com/livekit/agents
6
6
  License: Apache-2.0
@@ -19,7 +19,7 @@ Classifier: Programming Language :: Python :: 3.10
19
19
  Classifier: Programming Language :: Python :: 3 :: Only
20
20
  Requires-Python: >=3.9.0
21
21
  Description-Content-Type: text/markdown
22
- Requires-Dist: livekit-agents[codecs]>=0.12.11
22
+ Requires-Dist: livekit-agents[codecs]<1.0.0,>=0.12.16
23
23
  Dynamic: classifier
24
24
  Dynamic: description
25
25
  Dynamic: description-content-type
@@ -0,0 +1,10 @@
1
+ livekit/plugins/elevenlabs/__init__.py,sha256=YZVadomFq3JWiZN6GWXJbuE4vaNNWq1CmdH25du8qwg,1249
2
+ livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
3
+ livekit/plugins/elevenlabs/models.py,sha256=nB43wLS1ilzS7IxLYVSQxBjKPnbiPl4AHpHAOlG2i00,273
4
+ livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ livekit/plugins/elevenlabs/tts.py,sha256=-w8IeAvyQER4PLraajJz6OWDufvKpD_fPM8oPsYtX9s,19335
6
+ livekit/plugins/elevenlabs/version.py,sha256=PoHw-_DNE2B5SpeoQ-r6HSfVmbDgYuGamg0dN2jhayQ,600
7
+ livekit_plugins_elevenlabs-0.8.1.dist-info/METADATA,sha256=l8gbEDr8EsedqYQiqBhx6K9XwAdTtnQWVCxmlyjVG9w,1529
8
+ livekit_plugins_elevenlabs-0.8.1.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
9
+ livekit_plugins_elevenlabs-0.8.1.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
10
+ livekit_plugins_elevenlabs-0.8.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.2)
2
+ Generator: setuptools (76.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,10 +0,0 @@
1
- livekit/plugins/elevenlabs/__init__.py,sha256=YZVadomFq3JWiZN6GWXJbuE4vaNNWq1CmdH25du8qwg,1249
2
- livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
3
- livekit/plugins/elevenlabs/models.py,sha256=cVoaMYNlUXZzP-HOpbtU16OM9m-bACnSat8-o87tTyk,435
4
- livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- livekit/plugins/elevenlabs/tts.py,sha256=_d8V_YLx1tuScKtmDipoKHhqF3y68lXg03phixEHU3M,21419
6
- livekit/plugins/elevenlabs/version.py,sha256=1Trenk6kp4J1gdS0z55hdro60GNOnD1s0F3-AoNr4VM,601
7
- livekit_plugins_elevenlabs-0.7.14.dist-info/METADATA,sha256=WGgcKpZb9PYymh1pNvF7B5dhLXUlQj3n0ALlwJmfYfE,1523
8
- livekit_plugins_elevenlabs-0.7.14.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
9
- livekit_plugins_elevenlabs-0.7.14.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
10
- livekit_plugins_elevenlabs-0.7.14.dist-info/RECORD,,