livekit-plugins-elevenlabs 0.7.14__py3-none-any.whl → 0.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit/plugins/elevenlabs/models.py +1 -11
- livekit/plugins/elevenlabs/tts.py +200 -215
- livekit/plugins/elevenlabs/version.py +1 -1
- {livekit_plugins_elevenlabs-0.7.14.dist-info → livekit_plugins_elevenlabs-0.8.1.dist-info}/METADATA +2 -2
- livekit_plugins_elevenlabs-0.8.1.dist-info/RECORD +10 -0
- {livekit_plugins_elevenlabs-0.7.14.dist-info → livekit_plugins_elevenlabs-0.8.1.dist-info}/WHEEL +1 -1
- livekit_plugins_elevenlabs-0.7.14.dist-info/RECORD +0 -10
- {livekit_plugins_elevenlabs-0.7.14.dist-info → livekit_plugins_elevenlabs-0.8.1.dist-info}/top_level.txt +0 -0
@@ -10,14 +10,4 @@ TTSModels = Literal[
|
|
10
10
|
"eleven_flash_v2",
|
11
11
|
]
|
12
12
|
|
13
|
-
TTSEncoding = Literal[
|
14
|
-
"mp3_22050_32",
|
15
|
-
"mp3_44100_32",
|
16
|
-
"mp3_44100_64",
|
17
|
-
"mp3_44100_96",
|
18
|
-
"mp3_44100_128",
|
19
|
-
"mp3_44100_192",
|
20
|
-
"pcm_16000",
|
21
|
-
"pcm_22050",
|
22
|
-
"pcm_44100",
|
23
|
-
]
|
13
|
+
TTSEncoding = Literal["mp3_44100",]
|
@@ -21,10 +21,9 @@ import json
|
|
21
21
|
import os
|
22
22
|
import weakref
|
23
23
|
from dataclasses import dataclass
|
24
|
-
from typing import Any, List,
|
24
|
+
from typing import Any, List, Optional
|
25
25
|
|
26
26
|
import aiohttp
|
27
|
-
from livekit import rtc
|
28
27
|
from livekit.agents import (
|
29
28
|
APIConnectionError,
|
30
29
|
APIConnectOptions,
|
@@ -38,28 +37,20 @@ from livekit.agents import (
|
|
38
37
|
from .log import logger
|
39
38
|
from .models import TTSEncoding, TTSModels
|
40
39
|
|
41
|
-
|
40
|
+
_DefaultEncoding: TTSEncoding = "mp3_44100"
|
42
41
|
|
43
42
|
|
44
43
|
def _sample_rate_from_format(output_format: TTSEncoding) -> int:
|
45
|
-
split = output_format.split("_") # e.g:
|
44
|
+
split = output_format.split("_") # e.g: mp3_44100
|
46
45
|
return int(split[1])
|
47
46
|
|
48
47
|
|
49
|
-
def _encoding_from_format(output_format: TTSEncoding) -> _Encoding:
|
50
|
-
if output_format.startswith("mp3"):
|
51
|
-
return "mp3"
|
52
|
-
elif output_format.startswith("pcm"):
|
53
|
-
return "pcm"
|
54
|
-
|
55
|
-
raise ValueError(f"Unknown format: {output_format}")
|
56
|
-
|
57
|
-
|
58
48
|
@dataclass
|
59
49
|
class VoiceSettings:
|
60
50
|
stability: float # [0.0 - 1.0]
|
61
51
|
similarity_boost: float # [0.0 - 1.0]
|
62
52
|
style: float | None = None # [0.0 - 1.0]
|
53
|
+
speed: float | None = 1.0 # [0.8 - 1.2]
|
63
54
|
use_speaker_boost: bool | None = False
|
64
55
|
|
65
56
|
|
@@ -76,12 +67,17 @@ DEFAULT_VOICE = Voice(
|
|
76
67
|
name="Bella",
|
77
68
|
category="premade",
|
78
69
|
settings=VoiceSettings(
|
79
|
-
stability=0.71,
|
70
|
+
stability=0.71,
|
71
|
+
speed=1.0,
|
72
|
+
similarity_boost=0.5,
|
73
|
+
style=0.0,
|
74
|
+
use_speaker_boost=True,
|
80
75
|
),
|
81
76
|
)
|
82
77
|
|
83
78
|
API_BASE_URL_V1 = "https://api.elevenlabs.io/v1"
|
84
79
|
AUTHORIZATION_HEADER = "xi-api-key"
|
80
|
+
WS_INACTIVITY_TIMEOUT = 300
|
85
81
|
|
86
82
|
|
87
83
|
@dataclass
|
@@ -97,6 +93,7 @@ class _TTSOptions:
|
|
97
93
|
word_tokenizer: tokenize.WordTokenizer
|
98
94
|
chunk_length_schedule: list[int]
|
99
95
|
enable_ssml_parsing: bool
|
96
|
+
inactivity_timeout: int
|
100
97
|
|
101
98
|
|
102
99
|
class TTS(tts.TTS):
|
@@ -107,11 +104,9 @@ class TTS(tts.TTS):
|
|
107
104
|
model: TTSModels | str = "eleven_flash_v2_5",
|
108
105
|
api_key: str | None = None,
|
109
106
|
base_url: str | None = None,
|
110
|
-
|
111
|
-
|
112
|
-
word_tokenizer: tokenize.WordTokenizer =
|
113
|
-
ignore_punctuation=False # punctuation can help for intonation
|
114
|
-
),
|
107
|
+
streaming_latency: int = 0,
|
108
|
+
inactivity_timeout: int = WS_INACTIVITY_TIMEOUT,
|
109
|
+
word_tokenizer: Optional[tokenize.WordTokenizer] = None,
|
115
110
|
enable_ssml_parsing: bool = False,
|
116
111
|
chunk_length_schedule: list[int] = [80, 120, 200, 260], # range is [50, 500]
|
117
112
|
http_session: aiohttp.ClientSession | None = None,
|
@@ -127,8 +122,8 @@ class TTS(tts.TTS):
|
|
127
122
|
model (TTSModels | str): TTS model to use. Defaults to "eleven_turbo_v2_5".
|
128
123
|
api_key (str | None): ElevenLabs API key. Can be set via argument or `ELEVEN_API_KEY` environment variable.
|
129
124
|
base_url (str | None): Custom base URL for the API. Optional.
|
130
|
-
|
131
|
-
|
125
|
+
streaming_latency (int): Optimize for streaming latency, defaults to 0 - disabled. 4 for max latency optimizations. deprecated
|
126
|
+
inactivity_timeout (int): Inactivity timeout in seconds for the websocket connection. Defaults to 300.
|
132
127
|
word_tokenizer (tokenize.WordTokenizer): Tokenizer for processing text. Defaults to basic WordTokenizer.
|
133
128
|
enable_ssml_parsing (bool): Enable SSML parsing for input text. Defaults to False.
|
134
129
|
chunk_length_schedule (list[int]): Schedule for chunk lengths, ranging from 50 to 500. Defaults to [80, 120, 200, 260].
|
@@ -140,7 +135,7 @@ class TTS(tts.TTS):
|
|
140
135
|
capabilities=tts.TTSCapabilities(
|
141
136
|
streaming=True,
|
142
137
|
),
|
143
|
-
sample_rate=_sample_rate_from_format(
|
138
|
+
sample_rate=_sample_rate_from_format(_DefaultEncoding),
|
144
139
|
num_channels=1,
|
145
140
|
)
|
146
141
|
|
@@ -156,39 +151,28 @@ class TTS(tts.TTS):
|
|
156
151
|
"ElevenLabs API key is required, either as argument or set ELEVEN_API_KEY environmental variable"
|
157
152
|
)
|
158
153
|
|
154
|
+
if word_tokenizer is None:
|
155
|
+
word_tokenizer = tokenize.basic.WordTokenizer(
|
156
|
+
ignore_punctuation=False # punctuation can help for intonation
|
157
|
+
)
|
158
|
+
|
159
159
|
self._opts = _TTSOptions(
|
160
160
|
voice=voice,
|
161
161
|
model=model,
|
162
162
|
api_key=api_key,
|
163
163
|
base_url=base_url or API_BASE_URL_V1,
|
164
|
-
encoding=
|
164
|
+
encoding=_DefaultEncoding,
|
165
165
|
sample_rate=self.sample_rate,
|
166
166
|
streaming_latency=streaming_latency,
|
167
167
|
word_tokenizer=word_tokenizer,
|
168
168
|
chunk_length_schedule=chunk_length_schedule,
|
169
169
|
enable_ssml_parsing=enable_ssml_parsing,
|
170
170
|
language=language,
|
171
|
+
inactivity_timeout=inactivity_timeout,
|
171
172
|
)
|
172
173
|
self._session = http_session
|
173
|
-
self._pool = utils.ConnectionPool[aiohttp.ClientWebSocketResponse](
|
174
|
-
connect_cb=self._connect_ws,
|
175
|
-
close_cb=self._close_ws,
|
176
|
-
)
|
177
174
|
self._streams = weakref.WeakSet[SynthesizeStream]()
|
178
175
|
|
179
|
-
async def _connect_ws(self) -> aiohttp.ClientWebSocketResponse:
|
180
|
-
session = self._ensure_session()
|
181
|
-
return await asyncio.wait_for(
|
182
|
-
session.ws_connect(
|
183
|
-
_stream_url(self._opts),
|
184
|
-
headers={AUTHORIZATION_HEADER: self._opts.api_key},
|
185
|
-
),
|
186
|
-
self._conn_options.timeout,
|
187
|
-
)
|
188
|
-
|
189
|
-
async def _close_ws(self, ws: aiohttp.ClientWebSocketResponse):
|
190
|
-
await ws.close()
|
191
|
-
|
192
176
|
def _ensure_session(self) -> aiohttp.ClientSession:
|
193
177
|
if not self._session:
|
194
178
|
self._session = utils.http_context.http_session()
|
@@ -236,7 +220,12 @@ class TTS(tts.TTS):
|
|
236
220
|
def stream(
|
237
221
|
self, *, conn_options: Optional[APIConnectOptions] = None
|
238
222
|
) -> "SynthesizeStream":
|
239
|
-
stream = SynthesizeStream(
|
223
|
+
stream = SynthesizeStream(
|
224
|
+
tts=self,
|
225
|
+
conn_options=conn_options,
|
226
|
+
opts=self._opts,
|
227
|
+
session=self._ensure_session(),
|
228
|
+
)
|
240
229
|
self._streams.add(stream)
|
241
230
|
return stream
|
242
231
|
|
@@ -244,7 +233,6 @@ class TTS(tts.TTS):
|
|
244
233
|
for stream in list(self._streams):
|
245
234
|
await stream.aclose()
|
246
235
|
self._streams.clear()
|
247
|
-
await self._pool.aclose()
|
248
236
|
await super().aclose()
|
249
237
|
|
250
238
|
|
@@ -262,15 +250,9 @@ class ChunkedStream(tts.ChunkedStream):
|
|
262
250
|
) -> None:
|
263
251
|
super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
|
264
252
|
self._opts, self._session = opts, session
|
265
|
-
if _encoding_from_format(self._opts.encoding) == "mp3":
|
266
|
-
self._mp3_decoder = utils.codecs.Mp3StreamDecoder()
|
267
253
|
|
268
254
|
async def _run(self) -> None:
|
269
255
|
request_id = utils.shortuuid()
|
270
|
-
bstream = utils.audio.AudioByteStream(
|
271
|
-
sample_rate=self._opts.sample_rate, num_channels=1
|
272
|
-
)
|
273
|
-
|
274
256
|
voice_settings = (
|
275
257
|
_strip_nones(dataclasses.asdict(self._opts.voice.settings))
|
276
258
|
if self._opts.voice.settings
|
@@ -282,6 +264,12 @@ class ChunkedStream(tts.ChunkedStream):
|
|
282
264
|
"voice_settings": voice_settings,
|
283
265
|
}
|
284
266
|
|
267
|
+
decoder = utils.codecs.AudioStreamDecoder(
|
268
|
+
sample_rate=self._opts.sample_rate,
|
269
|
+
num_channels=1,
|
270
|
+
)
|
271
|
+
|
272
|
+
decode_task: asyncio.Task | None = None
|
285
273
|
try:
|
286
274
|
async with self._session.post(
|
287
275
|
_synthesize_url(self._opts),
|
@@ -293,32 +281,21 @@ class ChunkedStream(tts.ChunkedStream):
|
|
293
281
|
logger.error("11labs returned non-audio data: %s", content)
|
294
282
|
return
|
295
283
|
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
tts.SynthesizedAudio(
|
303
|
-
request_id=request_id,
|
304
|
-
frame=frame,
|
305
|
-
)
|
306
|
-
)
|
307
|
-
else:
|
308
|
-
async for bytes_data, _ in resp.content.iter_chunks():
|
309
|
-
for frame in bstream.write(bytes_data):
|
310
|
-
self._event_ch.send_nowait(
|
311
|
-
tts.SynthesizedAudio(
|
312
|
-
request_id=request_id,
|
313
|
-
frame=frame,
|
314
|
-
)
|
315
|
-
)
|
316
|
-
|
317
|
-
for frame in bstream.flush():
|
318
|
-
self._event_ch.send_nowait(
|
319
|
-
tts.SynthesizedAudio(request_id=request_id, frame=frame)
|
320
|
-
)
|
284
|
+
async def _decode_loop():
|
285
|
+
try:
|
286
|
+
async for bytes_data, _ in resp.content.iter_chunks():
|
287
|
+
decoder.push(bytes_data)
|
288
|
+
finally:
|
289
|
+
decoder.end_input()
|
321
290
|
|
291
|
+
decode_task = asyncio.create_task(_decode_loop())
|
292
|
+
emitter = tts.SynthesizedAudioEmitter(
|
293
|
+
event_ch=self._event_ch,
|
294
|
+
request_id=request_id,
|
295
|
+
)
|
296
|
+
async for frame in decoder:
|
297
|
+
emitter.push(frame)
|
298
|
+
emitter.flush()
|
322
299
|
except asyncio.TimeoutError as e:
|
323
300
|
raise APITimeoutError() from e
|
324
301
|
except aiohttp.ClientResponseError as e:
|
@@ -330,6 +307,10 @@ class ChunkedStream(tts.ChunkedStream):
|
|
330
307
|
) from e
|
331
308
|
except Exception as e:
|
332
309
|
raise APIConnectionError() from e
|
310
|
+
finally:
|
311
|
+
if decode_task:
|
312
|
+
await utils.aio.gracefully_cancel(decode_task)
|
313
|
+
await decoder.aclose()
|
333
314
|
|
334
315
|
|
335
316
|
class SynthesizeStream(tts.SynthesizeStream):
|
@@ -339,12 +320,12 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
339
320
|
self,
|
340
321
|
*,
|
341
322
|
tts: TTS,
|
342
|
-
|
323
|
+
session: aiohttp.ClientSession,
|
343
324
|
opts: _TTSOptions,
|
325
|
+
conn_options: Optional[APIConnectOptions] = None,
|
344
326
|
):
|
345
|
-
super().__init__(tts=tts)
|
346
|
-
self._opts, self.
|
347
|
-
self._mp3_decoder = utils.codecs.Mp3StreamDecoder()
|
327
|
+
super().__init__(tts=tts, conn_options=conn_options)
|
328
|
+
self._opts, self._session = opts, session
|
348
329
|
|
349
330
|
async def _run(self) -> None:
|
350
331
|
request_id = utils.shortuuid()
|
@@ -360,12 +341,13 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
360
341
|
# new segment (after flush for e.g)
|
361
342
|
word_stream = self._opts.word_tokenizer.stream()
|
362
343
|
self._segments_ch.send_nowait(word_stream)
|
363
|
-
|
364
344
|
word_stream.push_text(input)
|
365
345
|
elif isinstance(input, self._FlushSentinel):
|
366
346
|
if word_stream is not None:
|
367
347
|
word_stream.end_input()
|
368
348
|
word_stream = None
|
349
|
+
if word_stream is not None:
|
350
|
+
word_stream.end_input()
|
369
351
|
self._segments_ch.close()
|
370
352
|
|
371
353
|
@utils.log_exceptions(logger=logger)
|
@@ -398,144 +380,143 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
398
380
|
word_stream: tokenize.WordStream,
|
399
381
|
request_id: str,
|
400
382
|
) -> None:
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
383
|
+
ws_conn = await self._session.ws_connect(
|
384
|
+
_stream_url(self._opts),
|
385
|
+
headers={AUTHORIZATION_HEADER: self._opts.api_key},
|
386
|
+
)
|
387
|
+
|
388
|
+
segment_id = utils.shortuuid()
|
389
|
+
decoder = utils.codecs.AudioStreamDecoder(
|
390
|
+
sample_rate=self._opts.sample_rate,
|
391
|
+
num_channels=1,
|
392
|
+
)
|
393
|
+
|
394
|
+
# 11labs protocol expects the first message to be an "init msg"
|
395
|
+
init_pkt = dict(
|
396
|
+
text=" ",
|
397
|
+
voice_settings=_strip_nones(dataclasses.asdict(self._opts.voice.settings))
|
398
|
+
if self._opts.voice.settings
|
399
|
+
else None,
|
400
|
+
generation_config=dict(
|
401
|
+
chunk_length_schedule=self._opts.chunk_length_schedule
|
402
|
+
),
|
403
|
+
)
|
404
|
+
await ws_conn.send_str(json.dumps(init_pkt))
|
405
|
+
eos_sent = False
|
406
|
+
|
407
|
+
@utils.log_exceptions(logger=logger)
|
408
|
+
async def send_task():
|
409
|
+
nonlocal eos_sent
|
410
|
+
xml_content = []
|
411
|
+
async for data in word_stream:
|
412
|
+
text = data.token
|
413
|
+
# send the xml phoneme in one go
|
414
|
+
if (
|
415
|
+
self._opts.enable_ssml_parsing
|
416
|
+
and data.token.startswith("<phoneme")
|
417
|
+
or xml_content
|
418
|
+
):
|
419
|
+
xml_content.append(text)
|
420
|
+
if data.token.find("</phoneme>") > -1:
|
421
|
+
text = self._opts.word_tokenizer.format_words(xml_content)
|
422
|
+
xml_content = []
|
423
|
+
else:
|
424
|
+
continue
|
425
|
+
|
426
|
+
data_pkt = dict(text=f"{text} ") # must always end with a space
|
427
|
+
self._mark_started()
|
428
|
+
await ws_conn.send_str(json.dumps(data_pkt))
|
429
|
+
if xml_content:
|
430
|
+
logger.warning("11labs stream ended with incomplete xml content")
|
431
|
+
|
432
|
+
# no more token, mark eos
|
433
|
+
eos_pkt = dict(text="")
|
434
|
+
await ws_conn.send_str(json.dumps(eos_pkt))
|
435
|
+
eos_sent = True
|
436
|
+
|
437
|
+
# consumes from decoder and generates events
|
438
|
+
@utils.log_exceptions(logger=logger)
|
439
|
+
async def generate_task():
|
440
|
+
emitter = tts.SynthesizedAudioEmitter(
|
441
|
+
event_ch=self._event_ch,
|
442
|
+
request_id=request_id,
|
443
|
+
segment_id=segment_id,
|
416
444
|
)
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
xml_content = []
|
435
|
-
else:
|
436
|
-
continue
|
437
|
-
|
438
|
-
data_pkt = dict(text=f"{text} ") # must always end with a space
|
439
|
-
self._mark_started()
|
440
|
-
await ws_conn.send_str(json.dumps(data_pkt))
|
441
|
-
if xml_content:
|
442
|
-
logger.warning("11labs stream ended with incomplete xml content")
|
443
|
-
await ws_conn.send_str(json.dumps({"flush": True}))
|
444
|
-
|
445
|
-
async def recv_task():
|
446
|
-
nonlocal expected_text
|
447
|
-
received_text = ""
|
448
|
-
audio_bstream = utils.audio.AudioByteStream(
|
449
|
-
sample_rate=self._opts.sample_rate,
|
450
|
-
num_channels=1,
|
451
|
-
)
|
452
|
-
last_frame: rtc.AudioFrame | None = None
|
453
|
-
|
454
|
-
def _send_last_frame(*, segment_id: str, is_final: bool) -> None:
|
455
|
-
nonlocal last_frame
|
456
|
-
if last_frame is not None:
|
457
|
-
self._event_ch.send_nowait(
|
458
|
-
tts.SynthesizedAudio(
|
459
|
-
request_id=request_id,
|
460
|
-
segment_id=segment_id,
|
461
|
-
frame=last_frame,
|
462
|
-
is_final=is_final,
|
463
|
-
)
|
464
|
-
)
|
465
|
-
last_frame = None
|
466
|
-
|
467
|
-
while True:
|
468
|
-
msg = await ws_conn.receive()
|
469
|
-
if msg.type in (
|
470
|
-
aiohttp.WSMsgType.CLOSED,
|
471
|
-
aiohttp.WSMsgType.CLOSE,
|
472
|
-
aiohttp.WSMsgType.CLOSING,
|
473
|
-
):
|
445
|
+
async for frame in decoder:
|
446
|
+
emitter.push(frame)
|
447
|
+
emitter.flush()
|
448
|
+
|
449
|
+
# receives from ws and decodes audio
|
450
|
+
@utils.log_exceptions(logger=logger)
|
451
|
+
async def recv_task():
|
452
|
+
nonlocal eos_sent
|
453
|
+
|
454
|
+
while True:
|
455
|
+
msg = await ws_conn.receive()
|
456
|
+
if msg.type in (
|
457
|
+
aiohttp.WSMsgType.CLOSED,
|
458
|
+
aiohttp.WSMsgType.CLOSE,
|
459
|
+
aiohttp.WSMsgType.CLOSING,
|
460
|
+
):
|
461
|
+
if not eos_sent:
|
474
462
|
raise APIStatusError(
|
475
463
|
"11labs connection closed unexpectedly, not all tokens have been consumed",
|
476
464
|
request_id=request_id,
|
477
465
|
)
|
466
|
+
return
|
478
467
|
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
logger.error("11labs reported an error: %s", data["error"])
|
506
|
-
else:
|
507
|
-
logger.error("unexpected 11labs message %s", data)
|
468
|
+
if msg.type != aiohttp.WSMsgType.TEXT:
|
469
|
+
logger.warning("unexpected 11labs message type %s", msg.type)
|
470
|
+
continue
|
471
|
+
|
472
|
+
data = json.loads(msg.data)
|
473
|
+
if data.get("audio"):
|
474
|
+
b64data = base64.b64decode(data["audio"])
|
475
|
+
decoder.push(b64data)
|
476
|
+
|
477
|
+
elif data.get("isFinal"):
|
478
|
+
decoder.end_input()
|
479
|
+
break
|
480
|
+
elif data.get("error"):
|
481
|
+
raise APIStatusError(
|
482
|
+
message=data["error"],
|
483
|
+
status_code=500,
|
484
|
+
request_id=request_id,
|
485
|
+
body=None,
|
486
|
+
)
|
487
|
+
else:
|
488
|
+
raise APIStatusError(
|
489
|
+
message=f"unexpected 11labs message {data}",
|
490
|
+
status_code=500,
|
491
|
+
request_id=request_id,
|
492
|
+
body=None,
|
493
|
+
)
|
508
494
|
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
) from e
|
535
|
-
except Exception as e:
|
536
|
-
raise APIConnectionError() from e
|
537
|
-
finally:
|
538
|
-
await utils.aio.gracefully_cancel(*tasks)
|
495
|
+
tasks = [
|
496
|
+
asyncio.create_task(send_task()),
|
497
|
+
asyncio.create_task(recv_task()),
|
498
|
+
asyncio.create_task(generate_task()),
|
499
|
+
]
|
500
|
+
try:
|
501
|
+
await asyncio.gather(*tasks)
|
502
|
+
except asyncio.TimeoutError as e:
|
503
|
+
raise APITimeoutError() from e
|
504
|
+
except aiohttp.ClientResponseError as e:
|
505
|
+
raise APIStatusError(
|
506
|
+
message=e.message,
|
507
|
+
status_code=e.status,
|
508
|
+
request_id=request_id,
|
509
|
+
body=None,
|
510
|
+
) from e
|
511
|
+
except APIStatusError:
|
512
|
+
raise
|
513
|
+
except Exception as e:
|
514
|
+
raise APIConnectionError() from e
|
515
|
+
finally:
|
516
|
+
await utils.aio.gracefully_cancel(*tasks)
|
517
|
+
await decoder.aclose()
|
518
|
+
if ws_conn is not None:
|
519
|
+
await ws_conn.close()
|
539
520
|
|
540
521
|
|
541
522
|
def _dict_to_voices_list(data: dict[str, Any]):
|
@@ -561,11 +542,13 @@ def _synthesize_url(opts: _TTSOptions) -> str:
|
|
561
542
|
voice_id = opts.voice.id
|
562
543
|
model_id = opts.model
|
563
544
|
output_format = opts.encoding
|
564
|
-
|
565
|
-
return (
|
545
|
+
url = (
|
566
546
|
f"{base_url}/text-to-speech/{voice_id}/stream?"
|
567
|
-
f"model_id={model_id}&output_format={output_format}
|
547
|
+
f"model_id={model_id}&output_format={output_format}"
|
568
548
|
)
|
549
|
+
if opts.streaming_latency:
|
550
|
+
url += f"&optimize_streaming_latency={opts.streaming_latency}"
|
551
|
+
return url
|
569
552
|
|
570
553
|
|
571
554
|
def _stream_url(opts: _TTSOptions) -> str:
|
@@ -573,14 +556,16 @@ def _stream_url(opts: _TTSOptions) -> str:
|
|
573
556
|
voice_id = opts.voice.id
|
574
557
|
model_id = opts.model
|
575
558
|
output_format = opts.encoding
|
576
|
-
latency = opts.streaming_latency
|
577
559
|
enable_ssml = str(opts.enable_ssml_parsing).lower()
|
578
560
|
language = opts.language
|
561
|
+
inactivity_timeout = opts.inactivity_timeout
|
579
562
|
url = (
|
580
563
|
f"{base_url}/text-to-speech/{voice_id}/stream-input?"
|
581
|
-
f"model_id={model_id}&output_format={output_format}&
|
582
|
-
f"enable_ssml_parsing={enable_ssml}"
|
564
|
+
f"model_id={model_id}&output_format={output_format}&"
|
565
|
+
f"enable_ssml_parsing={enable_ssml}&inactivity_timeout={inactivity_timeout}"
|
583
566
|
)
|
584
567
|
if language is not None:
|
585
568
|
url += f"&language_code={language}"
|
569
|
+
if opts.streaming_latency:
|
570
|
+
url += f"&optimize_streaming_latency={opts.streaming_latency}"
|
586
571
|
return url
|
{livekit_plugins_elevenlabs-0.7.14.dist-info → livekit_plugins_elevenlabs-0.8.1.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: livekit-plugins-elevenlabs
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.8.1
|
4
4
|
Summary: Agent Framework plugin for voice synthesis with ElevenLabs' API.
|
5
5
|
Home-page: https://github.com/livekit/agents
|
6
6
|
License: Apache-2.0
|
@@ -19,7 +19,7 @@ Classifier: Programming Language :: Python :: 3.10
|
|
19
19
|
Classifier: Programming Language :: Python :: 3 :: Only
|
20
20
|
Requires-Python: >=3.9.0
|
21
21
|
Description-Content-Type: text/markdown
|
22
|
-
Requires-Dist: livekit-agents[codecs]
|
22
|
+
Requires-Dist: livekit-agents[codecs]<1.0.0,>=0.12.16
|
23
23
|
Dynamic: classifier
|
24
24
|
Dynamic: description
|
25
25
|
Dynamic: description-content-type
|
@@ -0,0 +1,10 @@
|
|
1
|
+
livekit/plugins/elevenlabs/__init__.py,sha256=YZVadomFq3JWiZN6GWXJbuE4vaNNWq1CmdH25du8qwg,1249
|
2
|
+
livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
|
3
|
+
livekit/plugins/elevenlabs/models.py,sha256=nB43wLS1ilzS7IxLYVSQxBjKPnbiPl4AHpHAOlG2i00,273
|
4
|
+
livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
livekit/plugins/elevenlabs/tts.py,sha256=-w8IeAvyQER4PLraajJz6OWDufvKpD_fPM8oPsYtX9s,19335
|
6
|
+
livekit/plugins/elevenlabs/version.py,sha256=PoHw-_DNE2B5SpeoQ-r6HSfVmbDgYuGamg0dN2jhayQ,600
|
7
|
+
livekit_plugins_elevenlabs-0.8.1.dist-info/METADATA,sha256=l8gbEDr8EsedqYQiqBhx6K9XwAdTtnQWVCxmlyjVG9w,1529
|
8
|
+
livekit_plugins_elevenlabs-0.8.1.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
|
9
|
+
livekit_plugins_elevenlabs-0.8.1.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
|
10
|
+
livekit_plugins_elevenlabs-0.8.1.dist-info/RECORD,,
|
@@ -1,10 +0,0 @@
|
|
1
|
-
livekit/plugins/elevenlabs/__init__.py,sha256=YZVadomFq3JWiZN6GWXJbuE4vaNNWq1CmdH25du8qwg,1249
|
2
|
-
livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
|
3
|
-
livekit/plugins/elevenlabs/models.py,sha256=cVoaMYNlUXZzP-HOpbtU16OM9m-bACnSat8-o87tTyk,435
|
4
|
-
livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
-
livekit/plugins/elevenlabs/tts.py,sha256=_d8V_YLx1tuScKtmDipoKHhqF3y68lXg03phixEHU3M,21419
|
6
|
-
livekit/plugins/elevenlabs/version.py,sha256=1Trenk6kp4J1gdS0z55hdro60GNOnD1s0F3-AoNr4VM,601
|
7
|
-
livekit_plugins_elevenlabs-0.7.14.dist-info/METADATA,sha256=WGgcKpZb9PYymh1pNvF7B5dhLXUlQj3n0ALlwJmfYfE,1523
|
8
|
-
livekit_plugins_elevenlabs-0.7.14.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
9
|
-
livekit_plugins_elevenlabs-0.7.14.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
|
10
|
-
livekit_plugins_elevenlabs-0.7.14.dist-info/RECORD,,
|
File without changes
|