livekit-plugins-elevenlabs 0.7.0.dev7__py3-none-any.whl → 0.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit/plugins/elevenlabs/__init__.py +3 -4
- livekit/plugins/elevenlabs/models.py +1 -0
- livekit/plugins/elevenlabs/tts.py +55 -25
- livekit/plugins/elevenlabs/version.py +1 -1
- {livekit_plugins_elevenlabs-0.7.0.dev7.dist-info → livekit_plugins_elevenlabs-0.7.2.dist-info}/METADATA +1 -1
- livekit_plugins_elevenlabs-0.7.2.dist-info/RECORD +10 -0
- {livekit_plugins_elevenlabs-0.7.0.dev7.dist-info → livekit_plugins_elevenlabs-0.7.2.dist-info}/WHEEL +1 -1
- livekit_plugins_elevenlabs-0.7.0.dev7.dist-info/RECORD +0 -10
- {livekit_plugins_elevenlabs-0.7.0.dev7.dist-info → livekit_plugins_elevenlabs-0.7.2.dist-info}/top_level.txt +0 -0
@@ -28,13 +28,12 @@ __all__ = [
|
|
28
28
|
|
29
29
|
from livekit.agents import Plugin
|
30
30
|
|
31
|
+
from .log import logger
|
32
|
+
|
31
33
|
|
32
34
|
class ElevenLabsPlugin(Plugin):
|
33
35
|
def __init__(self):
|
34
|
-
super().__init__(__name__, __version__, __package__)
|
35
|
-
|
36
|
-
def download_files(self):
|
37
|
-
pass
|
36
|
+
super().__init__(__name__, __version__, __package__, logger)
|
38
37
|
|
39
38
|
|
40
39
|
Plugin.register_plugin(ElevenLabsPlugin())
|
@@ -93,7 +93,7 @@ class TTS(tts.TTS):
|
|
93
93
|
self,
|
94
94
|
*,
|
95
95
|
voice: Voice = DEFAULT_VOICE,
|
96
|
-
model_id: TTSModels = "
|
96
|
+
model_id: TTSModels = "eleven_turbo_v2_5",
|
97
97
|
api_key: str | None = None,
|
98
98
|
base_url: str | None = None,
|
99
99
|
encoding: TTSEncoding = "mp3_22050_32",
|
@@ -101,9 +101,7 @@ class TTS(tts.TTS):
|
|
101
101
|
word_tokenizer: tokenize.WordTokenizer = tokenize.basic.WordTokenizer(
|
102
102
|
ignore_punctuation=False # punctuation can help for intonation
|
103
103
|
),
|
104
|
-
|
105
|
-
# (range is 50-500)
|
106
|
-
chunk_length_schedule: list[int] = [80, 120, 200, 260],
|
104
|
+
chunk_length_schedule: list[int] = [80, 120, 200, 260], # range is [50, 500]
|
107
105
|
http_session: aiohttp.ClientSession | None = None,
|
108
106
|
) -> None:
|
109
107
|
super().__init__(
|
@@ -158,6 +156,8 @@ class ChunkedStream(tts.ChunkedStream):
|
|
158
156
|
) -> None:
|
159
157
|
super().__init__()
|
160
158
|
self._text, self._opts, self._session = text, opts, session
|
159
|
+
if _encoding_from_format(self._opts.encoding) == "mp3":
|
160
|
+
self._mp3_decoder = utils.codecs.Mp3StreamDecoder()
|
161
161
|
|
162
162
|
@utils.log_exceptions(logger=logger)
|
163
163
|
async def _main_task(self) -> None:
|
@@ -183,21 +183,39 @@ class ChunkedStream(tts.ChunkedStream):
|
|
183
183
|
headers={AUTHORIZATION_HEADER: self._opts.api_key},
|
184
184
|
json=data,
|
185
185
|
) as resp:
|
186
|
-
|
187
|
-
|
186
|
+
if not resp.content_type.startswith("audio/"):
|
187
|
+
content = await resp.text()
|
188
|
+
logger.error("11labs returned non-audio data: %s", content)
|
189
|
+
return
|
190
|
+
encoding = _encoding_from_format(self._opts.encoding)
|
191
|
+
if encoding == "mp3":
|
192
|
+
async for bytes_data, _ in resp.content.iter_chunks():
|
193
|
+
for frame in self._mp3_decoder.decode_chunk(bytes_data):
|
194
|
+
self._event_ch.send_nowait(
|
195
|
+
tts.SynthesizedAudio(
|
196
|
+
request_id=request_id,
|
197
|
+
segment_id=segment_id,
|
198
|
+
frame=frame,
|
199
|
+
)
|
200
|
+
)
|
201
|
+
else:
|
202
|
+
async for bytes_data, _ in resp.content.iter_chunks():
|
203
|
+
for frame in bstream.write(bytes_data):
|
204
|
+
self._event_ch.send_nowait(
|
205
|
+
tts.SynthesizedAudio(
|
206
|
+
request_id=request_id,
|
207
|
+
segment_id=segment_id,
|
208
|
+
frame=frame,
|
209
|
+
)
|
210
|
+
)
|
211
|
+
|
212
|
+
for frame in bstream.flush():
|
188
213
|
self._event_ch.send_nowait(
|
189
214
|
tts.SynthesizedAudio(
|
190
215
|
request_id=request_id, segment_id=segment_id, frame=frame
|
191
216
|
)
|
192
217
|
)
|
193
218
|
|
194
|
-
for frame in bstream.flush():
|
195
|
-
self._event_ch.send_nowait(
|
196
|
-
tts.SynthesizedAudio(
|
197
|
-
request_id=request_id, segment_id=segment_id, frame=frame
|
198
|
-
)
|
199
|
-
)
|
200
|
-
|
201
219
|
|
202
220
|
class SynthesizeStream(tts.SynthesizeStream):
|
203
221
|
"""Streamed API using websockets"""
|
@@ -208,8 +226,7 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
208
226
|
opts: _TTSOptions,
|
209
227
|
):
|
210
228
|
super().__init__()
|
211
|
-
self._opts = opts
|
212
|
-
self._session = session
|
229
|
+
self._opts, self._session = opts, session
|
213
230
|
self._mp3_decoder = utils.codecs.Mp3StreamDecoder()
|
214
231
|
|
215
232
|
@utils.log_exceptions(logger=logger)
|
@@ -222,31 +239,39 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
222
239
|
word_stream = None
|
223
240
|
async for input in self._input_ch:
|
224
241
|
if isinstance(input, str):
|
225
|
-
if
|
242
|
+
if word_stream is None:
|
243
|
+
# new segment (after flush for e.g)
|
226
244
|
word_stream = self._opts.word_tokenizer.stream()
|
227
245
|
self._segments_ch.send_nowait(word_stream)
|
228
246
|
|
229
247
|
word_stream.push_text(input)
|
230
248
|
elif isinstance(input, self._FlushSentinel):
|
231
|
-
word_stream
|
249
|
+
if word_stream is not None:
|
250
|
+
word_stream.end_input()
|
251
|
+
|
232
252
|
word_stream = None
|
233
253
|
|
234
254
|
self._segments_ch.close()
|
235
255
|
|
256
|
+
@utils.log_exceptions(logger=logger)
|
236
257
|
async def _run():
|
237
258
|
async for word_stream in self._segments_ch:
|
238
259
|
await self._run_ws(word_stream)
|
239
260
|
|
240
|
-
|
261
|
+
tasks = [
|
262
|
+
asyncio.create_task(_tokenize_input()),
|
263
|
+
asyncio.create_task(_run()),
|
264
|
+
]
|
265
|
+
try:
|
266
|
+
await asyncio.gather(*tasks)
|
267
|
+
finally:
|
268
|
+
await utils.aio.gracefully_cancel(*tasks)
|
241
269
|
|
242
270
|
async def _run_ws(
|
243
271
|
self,
|
244
272
|
word_stream: tokenize.WordStream,
|
245
|
-
max_retry: int =
|
273
|
+
max_retry: int = 3,
|
246
274
|
) -> None:
|
247
|
-
request_id = utils.shortuuid()
|
248
|
-
segment_id = utils.shortuuid()
|
249
|
-
|
250
275
|
ws_conn: aiohttp.ClientWebSocketResponse | None = None
|
251
276
|
for try_i in range(max_retry):
|
252
277
|
retry_delay = 5
|
@@ -268,6 +293,10 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
268
293
|
if ws_conn is None:
|
269
294
|
raise Exception(f"failed to connect to 11labs after {max_retry} retries")
|
270
295
|
|
296
|
+
request_id = utils.shortuuid()
|
297
|
+
segment_id = utils.shortuuid()
|
298
|
+
|
299
|
+
# 11labs protocol expects the first message to be an "init msg"
|
271
300
|
init_pkt = dict(
|
272
301
|
text=" ",
|
273
302
|
try_trigger_generation=True,
|
@@ -291,7 +320,6 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
291
320
|
text=f"{data.token} ", # must always end with a space
|
292
321
|
try_trigger_generation=False,
|
293
322
|
)
|
294
|
-
print(data_pkt)
|
295
323
|
await ws_conn.send_str(json.dumps(data_pkt))
|
296
324
|
|
297
325
|
# no more token, mark eos
|
@@ -300,6 +328,8 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
300
328
|
eos_sent = True
|
301
329
|
|
302
330
|
async def recv_task():
|
331
|
+
nonlocal eos_sent
|
332
|
+
|
303
333
|
while True:
|
304
334
|
msg = await ws_conn.receive()
|
305
335
|
if msg.type in (
|
@@ -378,11 +408,11 @@ def _synthesize_url(opts: _TTSOptions) -> str:
|
|
378
408
|
base_url = opts.base_url
|
379
409
|
voice_id = opts.voice.id
|
380
410
|
model_id = opts.model_id
|
381
|
-
|
411
|
+
output_format = opts.encoding
|
382
412
|
latency = opts.streaming_latency
|
383
413
|
return (
|
384
414
|
f"{base_url}/text-to-speech/{voice_id}/stream?"
|
385
|
-
f"model_id={model_id}&output_format=
|
415
|
+
f"model_id={model_id}&output_format={output_format}&optimize_streaming_latency={latency}"
|
386
416
|
)
|
387
417
|
|
388
418
|
|
@@ -0,0 +1,10 @@
|
|
1
|
+
livekit/plugins/elevenlabs/__init__.py,sha256=cYRVIPXkRvB3-jK9bKZ9rYiMBACytWlCSq6yoZXaSgA,1080
|
2
|
+
livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
|
3
|
+
livekit/plugins/elevenlabs/models.py,sha256=ddBUlDT4707f64WDJASR0B60X0yQ-LRHK1ZpTuBJXK8,387
|
4
|
+
livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
livekit/plugins/elevenlabs/tts.py,sha256=g5w__bq5OdtZDjjclw3zYq4mAPMpkVgWPqlhkb_qpBg,14320
|
6
|
+
livekit/plugins/elevenlabs/version.py,sha256=wNTnO8L3jrMdUjS-xAEFoMTKPaPYiFY9Kxnvzm4hTBc,600
|
7
|
+
livekit_plugins_elevenlabs-0.7.2.dist-info/METADATA,sha256=WdOaTQBGsLgrjKQIM2_pgXLyPUqzBfTml14OFRv2qLQ,1311
|
8
|
+
livekit_plugins_elevenlabs-0.7.2.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
9
|
+
livekit_plugins_elevenlabs-0.7.2.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
|
10
|
+
livekit_plugins_elevenlabs-0.7.2.dist-info/RECORD,,
|
@@ -1,10 +0,0 @@
|
|
1
|
-
livekit/plugins/elevenlabs/__init__.py,sha256=ez1ybDPt7GfKAKgPkxZFRB7Vyd-_i-0hfUMI79GQ5w4,1091
|
2
|
-
livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
|
3
|
-
livekit/plugins/elevenlabs/models.py,sha256=8jTchztgpiTokHEaWUK8PPxWWfvm5SMrOGsJpzxbYAw,362
|
4
|
-
livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
-
livekit/plugins/elevenlabs/tts.py,sha256=HpaHJQysUhThDdlYDHpQxroo9L2_m6G6QBAaNXs04K4,13032
|
6
|
-
livekit/plugins/elevenlabs/version.py,sha256=z7YbosRr6jiTE2IIvHSRWSl4-yyS21CiHE5WD547wJo,606
|
7
|
-
livekit_plugins_elevenlabs-0.7.0.dev7.dist-info/METADATA,sha256=7yOB_7fjkuBAXd6RRWzO6n8FnERRNUT3FP1uhn3-JtY,1316
|
8
|
-
livekit_plugins_elevenlabs-0.7.0.dev7.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
|
9
|
-
livekit_plugins_elevenlabs-0.7.0.dev7.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
|
10
|
-
livekit_plugins_elevenlabs-0.7.0.dev7.dist-info/RECORD,,
|
File without changes
|