livekit-plugins-elevenlabs 0.7.5__py3-none-any.whl → 0.7.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit/plugins/elevenlabs/__init__.py +9 -0
- livekit/plugins/elevenlabs/tts.py +147 -90
- livekit/plugins/elevenlabs/version.py +1 -1
- {livekit_plugins_elevenlabs-0.7.5.dist-info → livekit_plugins_elevenlabs-0.7.6.dist-info}/METADATA +2 -2
- livekit_plugins_elevenlabs-0.7.6.dist-info/RECORD +10 -0
- {livekit_plugins_elevenlabs-0.7.5.dist-info → livekit_plugins_elevenlabs-0.7.6.dist-info}/WHEEL +1 -1
- livekit_plugins_elevenlabs-0.7.5.dist-info/RECORD +0 -10
- {livekit_plugins_elevenlabs-0.7.5.dist-info → livekit_plugins_elevenlabs-0.7.6.dist-info}/top_level.txt +0 -0
@@ -37,3 +37,12 @@ class ElevenLabsPlugin(Plugin):
|
|
37
37
|
|
38
38
|
|
39
39
|
Plugin.register_plugin(ElevenLabsPlugin())
|
40
|
+
|
41
|
+
# Cleanup docs of unexported modules
|
42
|
+
_module = dir()
|
43
|
+
NOT_IN_ALL = [m for m in _module if m not in __all__]
|
44
|
+
|
45
|
+
__pdoc__ = {}
|
46
|
+
|
47
|
+
for n in NOT_IN_ALL:
|
48
|
+
__pdoc__[n] = False
|
@@ -24,7 +24,14 @@ from typing import Any, List, Literal
|
|
24
24
|
|
25
25
|
import aiohttp
|
26
26
|
from livekit import rtc
|
27
|
-
from livekit.agents import
|
27
|
+
from livekit.agents import (
|
28
|
+
APIConnectionError,
|
29
|
+
APIStatusError,
|
30
|
+
APITimeoutError,
|
31
|
+
tokenize,
|
32
|
+
tts,
|
33
|
+
utils,
|
34
|
+
)
|
28
35
|
|
29
36
|
from .log import logger
|
30
37
|
from .models import TTSEncoding, TTSModels
|
@@ -79,7 +86,7 @@ AUTHORIZATION_HEADER = "xi-api-key"
|
|
79
86
|
class _TTSOptions:
|
80
87
|
api_key: str
|
81
88
|
voice: Voice
|
82
|
-
|
89
|
+
model: TTSModels | str
|
83
90
|
base_url: str
|
84
91
|
encoding: TTSEncoding
|
85
92
|
sample_rate: int
|
@@ -94,7 +101,7 @@ class TTS(tts.TTS):
|
|
94
101
|
self,
|
95
102
|
*,
|
96
103
|
voice: Voice = DEFAULT_VOICE,
|
97
|
-
|
104
|
+
model: TTSModels | str = "eleven_turbo_v2_5",
|
98
105
|
api_key: str | None = None,
|
99
106
|
base_url: str | None = None,
|
100
107
|
encoding: TTSEncoding = "mp3_22050_32",
|
@@ -105,12 +112,23 @@ class TTS(tts.TTS):
|
|
105
112
|
enable_ssml_parsing: bool = False,
|
106
113
|
chunk_length_schedule: list[int] = [80, 120, 200, 260], # range is [50, 500]
|
107
114
|
http_session: aiohttp.ClientSession | None = None,
|
115
|
+
# deprecated
|
116
|
+
model_id: TTSModels | str | None = None,
|
108
117
|
) -> None:
|
109
118
|
"""
|
110
119
|
Create a new instance of ElevenLabs TTS.
|
111
120
|
|
112
|
-
|
113
|
-
|
121
|
+
Args:
|
122
|
+
voice (Voice): Voice configuration. Defaults to `DEFAULT_VOICE`.
|
123
|
+
model (TTSModels | str): TTS model to use. Defaults to "eleven_turbo_v2_5".
|
124
|
+
api_key (str | None): ElevenLabs API key. Can be set via argument or `ELEVEN_API_KEY` environment variable.
|
125
|
+
base_url (str | None): Custom base URL for the API. Optional.
|
126
|
+
encoding (TTSEncoding): Audio encoding format. Defaults to "mp3_22050_32".
|
127
|
+
streaming_latency (int): Latency in seconds for streaming. Defaults to 3.
|
128
|
+
word_tokenizer (tokenize.WordTokenizer): Tokenizer for processing text. Defaults to basic WordTokenizer.
|
129
|
+
enable_ssml_parsing (bool): Enable SSML parsing for input text. Defaults to False.
|
130
|
+
chunk_length_schedule (list[int]): Schedule for chunk lengths, ranging from 50 to 500. Defaults to [80, 120, 200, 260].
|
131
|
+
http_session (aiohttp.ClientSession | None): Custom HTTP session for API requests. Optional.
|
114
132
|
"""
|
115
133
|
|
116
134
|
super().__init__(
|
@@ -120,13 +138,22 @@ class TTS(tts.TTS):
|
|
120
138
|
sample_rate=_sample_rate_from_format(encoding),
|
121
139
|
num_channels=1,
|
122
140
|
)
|
141
|
+
|
142
|
+
if model_id is not None:
|
143
|
+
logger.warning(
|
144
|
+
"model_id is deprecated and will be removed in 1.5.0, use model instead",
|
145
|
+
)
|
146
|
+
model = model_id
|
147
|
+
|
123
148
|
api_key = api_key or os.environ.get("ELEVEN_API_KEY")
|
124
149
|
if not api_key:
|
125
|
-
raise ValueError(
|
150
|
+
raise ValueError(
|
151
|
+
"ElevenLabs API key is required, either as argument or set ELEVEN_API_KEY environmental variable"
|
152
|
+
)
|
126
153
|
|
127
154
|
self._opts = _TTSOptions(
|
128
155
|
voice=voice,
|
129
|
-
|
156
|
+
model=model,
|
130
157
|
api_key=api_key,
|
131
158
|
base_url=base_url or API_BASE_URL_V1,
|
132
159
|
encoding=encoding,
|
@@ -151,31 +178,43 @@ class TTS(tts.TTS):
|
|
151
178
|
) as resp:
|
152
179
|
return _dict_to_voices_list(await resp.json())
|
153
180
|
|
181
|
+
def update_options(
|
182
|
+
self,
|
183
|
+
*,
|
184
|
+
voice: Voice = DEFAULT_VOICE,
|
185
|
+
model: TTSModels | str = "eleven_turbo_v2_5",
|
186
|
+
) -> None:
|
187
|
+
"""
|
188
|
+
Args:
|
189
|
+
voice (Voice): Voice configuration. Defaults to `DEFAULT_VOICE`.
|
190
|
+
model (TTSModels | str): TTS model to use. Defaults to "eleven_turbo_v2_5".
|
191
|
+
"""
|
192
|
+
self._opts.model = model or self._opts.model
|
193
|
+
self._opts.voice = voice or self._opts.voice
|
194
|
+
|
154
195
|
def synthesize(self, text: str) -> "ChunkedStream":
|
155
|
-
return ChunkedStream(text, self._opts, self._ensure_session())
|
196
|
+
return ChunkedStream(self, text, self._opts, self._ensure_session())
|
156
197
|
|
157
198
|
def stream(self) -> "SynthesizeStream":
|
158
|
-
return SynthesizeStream(self._ensure_session(), self._opts)
|
199
|
+
return SynthesizeStream(self, self._ensure_session(), self._opts)
|
159
200
|
|
160
201
|
|
161
202
|
class ChunkedStream(tts.ChunkedStream):
|
162
203
|
"""Synthesize using the chunked api endpoint"""
|
163
204
|
|
164
205
|
def __init__(
|
165
|
-
self, text: str, opts: _TTSOptions, session: aiohttp.ClientSession
|
206
|
+
self, tts: TTS, text: str, opts: _TTSOptions, session: aiohttp.ClientSession
|
166
207
|
) -> None:
|
167
|
-
super().__init__()
|
168
|
-
self.
|
208
|
+
super().__init__(tts, text)
|
209
|
+
self._opts, self._session = opts, session
|
169
210
|
if _encoding_from_format(self._opts.encoding) == "mp3":
|
170
211
|
self._mp3_decoder = utils.codecs.Mp3StreamDecoder()
|
171
212
|
|
172
|
-
@utils.log_exceptions(logger=logger)
|
173
213
|
async def _main_task(self) -> None:
|
214
|
+
request_id = utils.shortuuid()
|
174
215
|
bstream = utils.audio.AudioByteStream(
|
175
216
|
sample_rate=self._opts.sample_rate, num_channels=1
|
176
217
|
)
|
177
|
-
request_id = utils.shortuuid()
|
178
|
-
segment_id = utils.shortuuid()
|
179
218
|
|
180
219
|
voice_settings = (
|
181
220
|
_strip_nones(dataclasses.asdict(self._opts.voice.settings))
|
@@ -183,50 +222,59 @@ class ChunkedStream(tts.ChunkedStream):
|
|
183
222
|
else None
|
184
223
|
)
|
185
224
|
data = {
|
186
|
-
"text": self.
|
187
|
-
"model_id": self._opts.
|
225
|
+
"text": self._input_text,
|
226
|
+
"model_id": self._opts.model,
|
188
227
|
"voice_settings": voice_settings,
|
189
228
|
}
|
190
229
|
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
for
|
205
|
-
for frame in
|
230
|
+
try:
|
231
|
+
async with self._session.post(
|
232
|
+
_synthesize_url(self._opts),
|
233
|
+
headers={AUTHORIZATION_HEADER: self._opts.api_key},
|
234
|
+
json=data,
|
235
|
+
) as resp:
|
236
|
+
if not resp.content_type.startswith("audio/"):
|
237
|
+
content = await resp.text()
|
238
|
+
logger.error("11labs returned non-audio data: %s", content)
|
239
|
+
return
|
240
|
+
|
241
|
+
encoding = _encoding_from_format(self._opts.encoding)
|
242
|
+
if encoding == "mp3":
|
243
|
+
async for bytes_data, _ in resp.content.iter_chunks():
|
244
|
+
for frame in self._mp3_decoder.decode_chunk(bytes_data):
|
245
|
+
for frame in bstream.write(frame.data.tobytes()):
|
246
|
+
self._event_ch.send_nowait(
|
247
|
+
tts.SynthesizedAudio(
|
248
|
+
request_id=request_id,
|
249
|
+
frame=frame,
|
250
|
+
)
|
251
|
+
)
|
252
|
+
else:
|
253
|
+
async for bytes_data, _ in resp.content.iter_chunks():
|
254
|
+
for frame in bstream.write(bytes_data):
|
206
255
|
self._event_ch.send_nowait(
|
207
256
|
tts.SynthesizedAudio(
|
208
257
|
request_id=request_id,
|
209
|
-
segment_id=segment_id,
|
210
258
|
frame=frame,
|
211
259
|
)
|
212
260
|
)
|
213
|
-
else:
|
214
|
-
async for bytes_data, _ in resp.content.iter_chunks():
|
215
|
-
for frame in bstream.write(bytes_data):
|
216
|
-
self._event_ch.send_nowait(
|
217
|
-
tts.SynthesizedAudio(
|
218
|
-
request_id=request_id,
|
219
|
-
segment_id=segment_id,
|
220
|
-
frame=frame,
|
221
|
-
)
|
222
|
-
)
|
223
261
|
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
request_id=request_id, segment_id=segment_id, frame=frame
|
262
|
+
for frame in bstream.flush():
|
263
|
+
self._event_ch.send_nowait(
|
264
|
+
tts.SynthesizedAudio(request_id=request_id, frame=frame)
|
228
265
|
)
|
229
|
-
|
266
|
+
|
267
|
+
except asyncio.TimeoutError as e:
|
268
|
+
raise APITimeoutError() from e
|
269
|
+
except aiohttp.ClientResponseError as e:
|
270
|
+
raise APIStatusError(
|
271
|
+
message=e.message,
|
272
|
+
status_code=e.status,
|
273
|
+
request_id=None,
|
274
|
+
body=None,
|
275
|
+
) from e
|
276
|
+
except Exception as e:
|
277
|
+
raise APIConnectionError() from e
|
230
278
|
|
231
279
|
|
232
280
|
class SynthesizeStream(tts.SynthesizeStream):
|
@@ -234,10 +282,11 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
234
282
|
|
235
283
|
def __init__(
|
236
284
|
self,
|
285
|
+
tts: TTS,
|
237
286
|
session: aiohttp.ClientSession,
|
238
287
|
opts: _TTSOptions,
|
239
288
|
):
|
240
|
-
super().__init__()
|
289
|
+
super().__init__(tts)
|
241
290
|
self._opts, self._session = opts, session
|
242
291
|
self._mp3_decoder = utils.codecs.Mp3StreamDecoder()
|
243
292
|
|
@@ -360,6 +409,26 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
360
409
|
|
361
410
|
async def recv_task():
|
362
411
|
nonlocal eos_sent
|
412
|
+
audio_bstream = utils.audio.AudioByteStream(
|
413
|
+
sample_rate=self._opts.sample_rate,
|
414
|
+
num_channels=1,
|
415
|
+
)
|
416
|
+
|
417
|
+
last_frame: rtc.AudioFrame | None = None
|
418
|
+
|
419
|
+
def _send_last_frame(*, segment_id: str, is_final: bool) -> None:
|
420
|
+
nonlocal last_frame
|
421
|
+
if last_frame is not None:
|
422
|
+
self._event_ch.send_nowait(
|
423
|
+
tts.SynthesizedAudio(
|
424
|
+
request_id=request_id,
|
425
|
+
segment_id=segment_id,
|
426
|
+
frame=last_frame,
|
427
|
+
is_final=is_final,
|
428
|
+
)
|
429
|
+
)
|
430
|
+
|
431
|
+
last_frame = None
|
363
432
|
|
364
433
|
while True:
|
365
434
|
msg = await ws_conn.receive()
|
@@ -378,11 +447,33 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
378
447
|
logger.warning("unexpected 11labs message type %s", msg.type)
|
379
448
|
continue
|
380
449
|
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
450
|
+
data = json.loads(msg.data)
|
451
|
+
encoding = _encoding_from_format(self._opts.encoding)
|
452
|
+
if data.get("audio"):
|
453
|
+
b64data = base64.b64decode(data["audio"])
|
454
|
+
if encoding == "mp3":
|
455
|
+
for frame in self._mp3_decoder.decode_chunk(b64data):
|
456
|
+
for frame in audio_bstream.write(frame.data.tobytes()):
|
457
|
+
_send_last_frame(segment_id=segment_id, is_final=False)
|
458
|
+
last_frame = frame
|
459
|
+
|
460
|
+
else:
|
461
|
+
for frame in audio_bstream.write(b64data):
|
462
|
+
_send_last_frame(segment_id=segment_id, is_final=False)
|
463
|
+
last_frame = frame
|
464
|
+
|
465
|
+
elif data.get("isFinal"):
|
466
|
+
for frame in audio_bstream.flush():
|
467
|
+
_send_last_frame(segment_id=segment_id, is_final=False)
|
468
|
+
last_frame = frame
|
469
|
+
|
470
|
+
_send_last_frame(segment_id=segment_id, is_final=True)
|
471
|
+
|
472
|
+
pass
|
473
|
+
elif data.get("error"):
|
474
|
+
logger.error("11labs reported an error: %s", data["error"])
|
475
|
+
else:
|
476
|
+
logger.error("unexpected 11labs message %s", data)
|
386
477
|
|
387
478
|
tasks = [
|
388
479
|
asyncio.create_task(send_task()),
|
@@ -394,40 +485,6 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
394
485
|
finally:
|
395
486
|
await utils.aio.gracefully_cancel(*tasks)
|
396
487
|
|
397
|
-
def _process_stream_event(
|
398
|
-
self, *, data: dict, request_id: str, segment_id: str
|
399
|
-
) -> None:
|
400
|
-
encoding = _encoding_from_format(self._opts.encoding)
|
401
|
-
if data.get("audio"):
|
402
|
-
b64data = base64.b64decode(data["audio"])
|
403
|
-
if encoding == "mp3":
|
404
|
-
for frame in self._mp3_decoder.decode_chunk(b64data):
|
405
|
-
self._event_ch.send_nowait(
|
406
|
-
tts.SynthesizedAudio(
|
407
|
-
request_id=request_id,
|
408
|
-
segment_id=segment_id,
|
409
|
-
frame=frame,
|
410
|
-
)
|
411
|
-
)
|
412
|
-
else:
|
413
|
-
chunk_frame = rtc.AudioFrame(
|
414
|
-
data=b64data,
|
415
|
-
sample_rate=self._opts.sample_rate,
|
416
|
-
num_channels=1,
|
417
|
-
samples_per_channel=len(b64data) // 2,
|
418
|
-
)
|
419
|
-
self._event_ch.send_nowait(
|
420
|
-
tts.SynthesizedAudio(
|
421
|
-
request_id=request_id,
|
422
|
-
segment_id=segment_id,
|
423
|
-
frame=chunk_frame,
|
424
|
-
)
|
425
|
-
)
|
426
|
-
elif data.get("error"):
|
427
|
-
logger.error("11labs reported an error: %s", data["error"])
|
428
|
-
elif not data.get("isFinal"):
|
429
|
-
logger.error("unexpected 11labs message %s", data)
|
430
|
-
|
431
488
|
|
432
489
|
def _dict_to_voices_list(data: dict[str, Any]):
|
433
490
|
voices: List[Voice] = []
|
@@ -450,7 +507,7 @@ def _strip_nones(data: dict[str, Any]):
|
|
450
507
|
def _synthesize_url(opts: _TTSOptions) -> str:
|
451
508
|
base_url = opts.base_url
|
452
509
|
voice_id = opts.voice.id
|
453
|
-
model_id = opts.
|
510
|
+
model_id = opts.model
|
454
511
|
output_format = opts.encoding
|
455
512
|
latency = opts.streaming_latency
|
456
513
|
return (
|
@@ -462,7 +519,7 @@ def _synthesize_url(opts: _TTSOptions) -> str:
|
|
462
519
|
def _stream_url(opts: _TTSOptions) -> str:
|
463
520
|
base_url = opts.base_url
|
464
521
|
voice_id = opts.voice.id
|
465
|
-
model_id = opts.
|
522
|
+
model_id = opts.model
|
466
523
|
output_format = opts.encoding
|
467
524
|
latency = opts.streaming_latency
|
468
525
|
enable_ssml = str(opts.enable_ssml_parsing).lower()
|
{livekit_plugins_elevenlabs-0.7.5.dist-info → livekit_plugins_elevenlabs-0.7.6.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: livekit-plugins-elevenlabs
|
3
|
-
Version: 0.7.
|
3
|
+
Version: 0.7.6
|
4
4
|
Summary: Agent Framework plugin for voice synthesis with ElevenLabs' API.
|
5
5
|
Home-page: https://github.com/livekit/agents
|
6
6
|
License: Apache-2.0
|
@@ -19,7 +19,7 @@ Classifier: Programming Language :: Python :: 3.10
|
|
19
19
|
Classifier: Programming Language :: Python :: 3 :: Only
|
20
20
|
Requires-Python: >=3.9.0
|
21
21
|
Description-Content-Type: text/markdown
|
22
|
-
Requires-Dist: livekit-agents[codecs] >=0.
|
22
|
+
Requires-Dist: livekit-agents[codecs] >=0.11
|
23
23
|
|
24
24
|
# LiveKit Plugins Elevenlabs
|
25
25
|
|
@@ -0,0 +1,10 @@
|
|
1
|
+
livekit/plugins/elevenlabs/__init__.py,sha256=YZVadomFq3JWiZN6GWXJbuE4vaNNWq1CmdH25du8qwg,1249
|
2
|
+
livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
|
3
|
+
livekit/plugins/elevenlabs/models.py,sha256=ddBUlDT4707f64WDJASR0B60X0yQ-LRHK1ZpTuBJXK8,387
|
4
|
+
livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
livekit/plugins/elevenlabs/tts.py,sha256=mxdypC-zSbS1R15FmztT49ssk_arkKGUPe_d5uVqOUk,18422
|
6
|
+
livekit/plugins/elevenlabs/version.py,sha256=vOFNGWowZUhIrmyHBGtCx5dGhCp1T2FPt0h7KU_XKJg,600
|
7
|
+
livekit_plugins_elevenlabs-0.7.6.dist-info/METADATA,sha256=DY1JbHdgfNivv0p0xA5ZRenYUGEYC33yX4TcNh__srg,1305
|
8
|
+
livekit_plugins_elevenlabs-0.7.6.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
|
9
|
+
livekit_plugins_elevenlabs-0.7.6.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
|
10
|
+
livekit_plugins_elevenlabs-0.7.6.dist-info/RECORD,,
|
@@ -1,10 +0,0 @@
|
|
1
|
-
livekit/plugins/elevenlabs/__init__.py,sha256=cYRVIPXkRvB3-jK9bKZ9rYiMBACytWlCSq6yoZXaSgA,1080
|
2
|
-
livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
|
3
|
-
livekit/plugins/elevenlabs/models.py,sha256=ddBUlDT4707f64WDJASR0B60X0yQ-LRHK1ZpTuBJXK8,387
|
4
|
-
livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
-
livekit/plugins/elevenlabs/tts.py,sha256=L9d4KppfqP9tP-PvaE3YKbezovhSboejmIk97xOmdEA,15868
|
6
|
-
livekit/plugins/elevenlabs/version.py,sha256=4VoyPg1xoLZO0SP38sbtfe-ePEx82VqZVWRBBUr1wgA,600
|
7
|
-
livekit_plugins_elevenlabs-0.7.5.dist-info/METADATA,sha256=KMqAU3UsRzO4wFl-Y8GfT5-Bb7s_bnm8JmuETbQ2cJo,1311
|
8
|
-
livekit_plugins_elevenlabs-0.7.5.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
9
|
-
livekit_plugins_elevenlabs-0.7.5.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
|
10
|
-
livekit_plugins_elevenlabs-0.7.5.dist-info/RECORD,,
|
File without changes
|