livekit-plugins-elevenlabs 0.7.3__py3-none-any.whl → 0.7.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -86,6 +86,7 @@ class _TTSOptions:
86
86
  streaming_latency: int
87
87
  word_tokenizer: tokenize.WordTokenizer
88
88
  chunk_length_schedule: list[int]
89
+ enable_ssml_parsing: bool
89
90
 
90
91
 
91
92
  class TTS(tts.TTS):
@@ -101,9 +102,17 @@ class TTS(tts.TTS):
101
102
  word_tokenizer: tokenize.WordTokenizer = tokenize.basic.WordTokenizer(
102
103
  ignore_punctuation=False # punctuation can help for intonation
103
104
  ),
105
+ enable_ssml_parsing: bool = False,
104
106
  chunk_length_schedule: list[int] = [80, 120, 200, 260], # range is [50, 500]
105
107
  http_session: aiohttp.ClientSession | None = None,
106
108
  ) -> None:
109
+ """
110
+ Create a new instance of ElevenLabs TTS.
111
+
112
+ ``api_key`` must be set to your ElevenLabs API key, either using the argument or by setting
113
+ the ``ELEVEN_API_KEY`` environmental variable.
114
+ """
115
+
107
116
  super().__init__(
108
117
  capabilities=tts.TTSCapabilities(
109
118
  streaming=True,
@@ -125,6 +134,7 @@ class TTS(tts.TTS):
125
134
  streaming_latency=streaming_latency,
126
135
  word_tokenizer=word_tokenizer,
127
136
  chunk_length_schedule=chunk_length_schedule,
137
+ enable_ssml_parsing=enable_ssml_parsing,
128
138
  )
129
139
  self._session = http_session
130
140
 
@@ -187,17 +197,19 @@ class ChunkedStream(tts.ChunkedStream):
187
197
  content = await resp.text()
188
198
  logger.error("11labs returned non-audio data: %s", content)
189
199
  return
200
+
190
201
  encoding = _encoding_from_format(self._opts.encoding)
191
202
  if encoding == "mp3":
192
203
  async for bytes_data, _ in resp.content.iter_chunks():
193
204
  for frame in self._mp3_decoder.decode_chunk(bytes_data):
194
- self._event_ch.send_nowait(
195
- tts.SynthesizedAudio(
196
- request_id=request_id,
197
- segment_id=segment_id,
198
- frame=frame,
205
+ for frame in bstream.write(frame.data.tobytes()):
206
+ self._event_ch.send_nowait(
207
+ tts.SynthesizedAudio(
208
+ request_id=request_id,
209
+ segment_id=segment_id,
210
+ frame=frame,
211
+ )
199
212
  )
200
- )
201
213
  else:
202
214
  async for bytes_data, _ in resp.content.iter_chunks():
203
215
  for frame in bstream.write(bytes_data):
@@ -209,12 +221,12 @@ class ChunkedStream(tts.ChunkedStream):
209
221
  )
210
222
  )
211
223
 
212
- for frame in bstream.flush():
213
- self._event_ch.send_nowait(
214
- tts.SynthesizedAudio(
215
- request_id=request_id, segment_id=segment_id, frame=frame
216
- )
224
+ for frame in bstream.flush():
225
+ self._event_ch.send_nowait(
226
+ tts.SynthesizedAudio(
227
+ request_id=request_id, segment_id=segment_id, frame=frame
217
228
  )
229
+ )
218
230
 
219
231
 
220
232
  class SynthesizeStream(tts.SynthesizeStream):
@@ -313,15 +325,34 @@ class SynthesizeStream(tts.SynthesizeStream):
313
325
  async def send_task():
314
326
  nonlocal eos_sent
315
327
 
328
+ xml_content = []
316
329
  async for data in word_stream:
330
+ text = data.token
331
+
332
+ # send the xml phoneme in one go
333
+ if (
334
+ self._opts.enable_ssml_parsing
335
+ and data.token.startswith("<phoneme")
336
+ or xml_content
337
+ ):
338
+ xml_content.append(text)
339
+ if data.token.find("</phoneme>") > -1:
340
+ text = self._opts.word_tokenizer.format_words(xml_content)
341
+ xml_content = []
342
+ else:
343
+ continue
344
+
317
345
  # try_trigger_generation=True is a bad practice, we expose
318
346
  # chunk_length_schedule instead
319
347
  data_pkt = dict(
320
- text=f"{data.token} ", # must always end with a space
348
+ text=f"{text} ", # must always end with a space
321
349
  try_trigger_generation=False,
322
350
  )
323
351
  await ws_conn.send_str(json.dumps(data_pkt))
324
352
 
353
+ if xml_content:
354
+ logger.warning("11labs stream ended with incomplete xml content")
355
+
325
356
  # no more token, mark eos
326
357
  eos_pkt = dict(text="")
327
358
  await ws_conn.send_str(json.dumps(eos_pkt))
@@ -434,7 +465,9 @@ def _stream_url(opts: _TTSOptions) -> str:
434
465
  model_id = opts.model_id
435
466
  output_format = opts.encoding
436
467
  latency = opts.streaming_latency
468
+ enable_ssml = str(opts.enable_ssml_parsing).lower()
437
469
  return (
438
470
  f"{base_url}/text-to-speech/{voice_id}/stream-input?"
439
- f"model_id={model_id}&output_format={output_format}&optimize_streaming_latency={latency}"
471
+ f"model_id={model_id}&output_format={output_format}&optimize_streaming_latency={latency}&"
472
+ f"enable_ssml_parsing={enable_ssml}"
440
473
  )
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "0.7.3"
15
+ __version__ = "0.7.5"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: livekit-plugins-elevenlabs
3
- Version: 0.7.3
3
+ Version: 0.7.5
4
4
  Summary: Agent Framework plugin for voice synthesis with ElevenLabs' API.
5
5
  Home-page: https://github.com/livekit/agents
6
6
  License: Apache-2.0
@@ -0,0 +1,10 @@
1
+ livekit/plugins/elevenlabs/__init__.py,sha256=cYRVIPXkRvB3-jK9bKZ9rYiMBACytWlCSq6yoZXaSgA,1080
2
+ livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
3
+ livekit/plugins/elevenlabs/models.py,sha256=ddBUlDT4707f64WDJASR0B60X0yQ-LRHK1ZpTuBJXK8,387
4
+ livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ livekit/plugins/elevenlabs/tts.py,sha256=L9d4KppfqP9tP-PvaE3YKbezovhSboejmIk97xOmdEA,15868
6
+ livekit/plugins/elevenlabs/version.py,sha256=4VoyPg1xoLZO0SP38sbtfe-ePEx82VqZVWRBBUr1wgA,600
7
+ livekit_plugins_elevenlabs-0.7.5.dist-info/METADATA,sha256=KMqAU3UsRzO4wFl-Y8GfT5-Bb7s_bnm8JmuETbQ2cJo,1311
8
+ livekit_plugins_elevenlabs-0.7.5.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
9
+ livekit_plugins_elevenlabs-0.7.5.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
10
+ livekit_plugins_elevenlabs-0.7.5.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (72.1.0)
2
+ Generator: setuptools (75.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,10 +0,0 @@
1
- livekit/plugins/elevenlabs/__init__.py,sha256=cYRVIPXkRvB3-jK9bKZ9rYiMBACytWlCSq6yoZXaSgA,1080
2
- livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
3
- livekit/plugins/elevenlabs/models.py,sha256=ddBUlDT4707f64WDJASR0B60X0yQ-LRHK1ZpTuBJXK8,387
4
- livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- livekit/plugins/elevenlabs/tts.py,sha256=ZSR6WxSBhntZmdK4i9U8SKcxHwNk3_4qiZNRZc5jP28,14641
6
- livekit/plugins/elevenlabs/version.py,sha256=yJeG0VwiekDJAk7GHcIAe43ebagJgloe-ZsqEGZnqzE,600
7
- livekit_plugins_elevenlabs-0.7.3.dist-info/METADATA,sha256=hdSuPch445_jz_Z-Uzt6CgU0Eb1H0ZVZ9ZA50hHYsBM,1311
8
- livekit_plugins_elevenlabs-0.7.3.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
9
- livekit_plugins_elevenlabs-0.7.3.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
10
- livekit_plugins_elevenlabs-0.7.3.dist-info/RECORD,,