livekit-plugins-elevenlabs 0.7.3__py3-none-any.whl → 0.7.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit/plugins/elevenlabs/tts.py +46 -13
- livekit/plugins/elevenlabs/version.py +1 -1
- {livekit_plugins_elevenlabs-0.7.3.dist-info → livekit_plugins_elevenlabs-0.7.5.dist-info}/METADATA +1 -1
- livekit_plugins_elevenlabs-0.7.5.dist-info/RECORD +10 -0
- {livekit_plugins_elevenlabs-0.7.3.dist-info → livekit_plugins_elevenlabs-0.7.5.dist-info}/WHEEL +1 -1
- livekit_plugins_elevenlabs-0.7.3.dist-info/RECORD +0 -10
- {livekit_plugins_elevenlabs-0.7.3.dist-info → livekit_plugins_elevenlabs-0.7.5.dist-info}/top_level.txt +0 -0
@@ -86,6 +86,7 @@ class _TTSOptions:
|
|
86
86
|
streaming_latency: int
|
87
87
|
word_tokenizer: tokenize.WordTokenizer
|
88
88
|
chunk_length_schedule: list[int]
|
89
|
+
enable_ssml_parsing: bool
|
89
90
|
|
90
91
|
|
91
92
|
class TTS(tts.TTS):
|
@@ -101,9 +102,17 @@ class TTS(tts.TTS):
|
|
101
102
|
word_tokenizer: tokenize.WordTokenizer = tokenize.basic.WordTokenizer(
|
102
103
|
ignore_punctuation=False # punctuation can help for intonation
|
103
104
|
),
|
105
|
+
enable_ssml_parsing: bool = False,
|
104
106
|
chunk_length_schedule: list[int] = [80, 120, 200, 260], # range is [50, 500]
|
105
107
|
http_session: aiohttp.ClientSession | None = None,
|
106
108
|
) -> None:
|
109
|
+
"""
|
110
|
+
Create a new instance of ElevenLabs TTS.
|
111
|
+
|
112
|
+
``api_key`` must be set to your ElevenLabs API key, either using the argument or by setting
|
113
|
+
the ``ELEVEN_API_KEY`` environmental variable.
|
114
|
+
"""
|
115
|
+
|
107
116
|
super().__init__(
|
108
117
|
capabilities=tts.TTSCapabilities(
|
109
118
|
streaming=True,
|
@@ -125,6 +134,7 @@ class TTS(tts.TTS):
|
|
125
134
|
streaming_latency=streaming_latency,
|
126
135
|
word_tokenizer=word_tokenizer,
|
127
136
|
chunk_length_schedule=chunk_length_schedule,
|
137
|
+
enable_ssml_parsing=enable_ssml_parsing,
|
128
138
|
)
|
129
139
|
self._session = http_session
|
130
140
|
|
@@ -187,17 +197,19 @@ class ChunkedStream(tts.ChunkedStream):
|
|
187
197
|
content = await resp.text()
|
188
198
|
logger.error("11labs returned non-audio data: %s", content)
|
189
199
|
return
|
200
|
+
|
190
201
|
encoding = _encoding_from_format(self._opts.encoding)
|
191
202
|
if encoding == "mp3":
|
192
203
|
async for bytes_data, _ in resp.content.iter_chunks():
|
193
204
|
for frame in self._mp3_decoder.decode_chunk(bytes_data):
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
205
|
+
for frame in bstream.write(frame.data.tobytes()):
|
206
|
+
self._event_ch.send_nowait(
|
207
|
+
tts.SynthesizedAudio(
|
208
|
+
request_id=request_id,
|
209
|
+
segment_id=segment_id,
|
210
|
+
frame=frame,
|
211
|
+
)
|
199
212
|
)
|
200
|
-
)
|
201
213
|
else:
|
202
214
|
async for bytes_data, _ in resp.content.iter_chunks():
|
203
215
|
for frame in bstream.write(bytes_data):
|
@@ -209,12 +221,12 @@ class ChunkedStream(tts.ChunkedStream):
|
|
209
221
|
)
|
210
222
|
)
|
211
223
|
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
)
|
224
|
+
for frame in bstream.flush():
|
225
|
+
self._event_ch.send_nowait(
|
226
|
+
tts.SynthesizedAudio(
|
227
|
+
request_id=request_id, segment_id=segment_id, frame=frame
|
217
228
|
)
|
229
|
+
)
|
218
230
|
|
219
231
|
|
220
232
|
class SynthesizeStream(tts.SynthesizeStream):
|
@@ -313,15 +325,34 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
313
325
|
async def send_task():
|
314
326
|
nonlocal eos_sent
|
315
327
|
|
328
|
+
xml_content = []
|
316
329
|
async for data in word_stream:
|
330
|
+
text = data.token
|
331
|
+
|
332
|
+
# send the xml phoneme in one go
|
333
|
+
if (
|
334
|
+
self._opts.enable_ssml_parsing
|
335
|
+
and data.token.startswith("<phoneme")
|
336
|
+
or xml_content
|
337
|
+
):
|
338
|
+
xml_content.append(text)
|
339
|
+
if data.token.find("</phoneme>") > -1:
|
340
|
+
text = self._opts.word_tokenizer.format_words(xml_content)
|
341
|
+
xml_content = []
|
342
|
+
else:
|
343
|
+
continue
|
344
|
+
|
317
345
|
# try_trigger_generation=True is a bad practice, we expose
|
318
346
|
# chunk_length_schedule instead
|
319
347
|
data_pkt = dict(
|
320
|
-
text=f"{
|
348
|
+
text=f"{text} ", # must always end with a space
|
321
349
|
try_trigger_generation=False,
|
322
350
|
)
|
323
351
|
await ws_conn.send_str(json.dumps(data_pkt))
|
324
352
|
|
353
|
+
if xml_content:
|
354
|
+
logger.warning("11labs stream ended with incomplete xml content")
|
355
|
+
|
325
356
|
# no more token, mark eos
|
326
357
|
eos_pkt = dict(text="")
|
327
358
|
await ws_conn.send_str(json.dumps(eos_pkt))
|
@@ -434,7 +465,9 @@ def _stream_url(opts: _TTSOptions) -> str:
|
|
434
465
|
model_id = opts.model_id
|
435
466
|
output_format = opts.encoding
|
436
467
|
latency = opts.streaming_latency
|
468
|
+
enable_ssml = str(opts.enable_ssml_parsing).lower()
|
437
469
|
return (
|
438
470
|
f"{base_url}/text-to-speech/{voice_id}/stream-input?"
|
439
|
-
f"model_id={model_id}&output_format={output_format}&optimize_streaming_latency={latency}"
|
471
|
+
f"model_id={model_id}&output_format={output_format}&optimize_streaming_latency={latency}&"
|
472
|
+
f"enable_ssml_parsing={enable_ssml}"
|
440
473
|
)
|
@@ -0,0 +1,10 @@
|
|
1
|
+
livekit/plugins/elevenlabs/__init__.py,sha256=cYRVIPXkRvB3-jK9bKZ9rYiMBACytWlCSq6yoZXaSgA,1080
|
2
|
+
livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
|
3
|
+
livekit/plugins/elevenlabs/models.py,sha256=ddBUlDT4707f64WDJASR0B60X0yQ-LRHK1ZpTuBJXK8,387
|
4
|
+
livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
livekit/plugins/elevenlabs/tts.py,sha256=L9d4KppfqP9tP-PvaE3YKbezovhSboejmIk97xOmdEA,15868
|
6
|
+
livekit/plugins/elevenlabs/version.py,sha256=4VoyPg1xoLZO0SP38sbtfe-ePEx82VqZVWRBBUr1wgA,600
|
7
|
+
livekit_plugins_elevenlabs-0.7.5.dist-info/METADATA,sha256=KMqAU3UsRzO4wFl-Y8GfT5-Bb7s_bnm8JmuETbQ2cJo,1311
|
8
|
+
livekit_plugins_elevenlabs-0.7.5.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
9
|
+
livekit_plugins_elevenlabs-0.7.5.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
|
10
|
+
livekit_plugins_elevenlabs-0.7.5.dist-info/RECORD,,
|
@@ -1,10 +0,0 @@
|
|
1
|
-
livekit/plugins/elevenlabs/__init__.py,sha256=cYRVIPXkRvB3-jK9bKZ9rYiMBACytWlCSq6yoZXaSgA,1080
|
2
|
-
livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
|
3
|
-
livekit/plugins/elevenlabs/models.py,sha256=ddBUlDT4707f64WDJASR0B60X0yQ-LRHK1ZpTuBJXK8,387
|
4
|
-
livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
-
livekit/plugins/elevenlabs/tts.py,sha256=ZSR6WxSBhntZmdK4i9U8SKcxHwNk3_4qiZNRZc5jP28,14641
|
6
|
-
livekit/plugins/elevenlabs/version.py,sha256=yJeG0VwiekDJAk7GHcIAe43ebagJgloe-ZsqEGZnqzE,600
|
7
|
-
livekit_plugins_elevenlabs-0.7.3.dist-info/METADATA,sha256=hdSuPch445_jz_Z-Uzt6CgU0Eb1H0ZVZ9ZA50hHYsBM,1311
|
8
|
-
livekit_plugins_elevenlabs-0.7.3.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
9
|
-
livekit_plugins_elevenlabs-0.7.3.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
|
10
|
-
livekit_plugins_elevenlabs-0.7.3.dist-info/RECORD,,
|
File without changes
|