livekit-plugins-elevenlabs 0.7.0.dev7__py3-none-any.whl → 0.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,13 +28,12 @@ __all__ = [
28
28
 
29
29
  from livekit.agents import Plugin
30
30
 
31
+ from .log import logger
32
+
31
33
 
32
34
  class ElevenLabsPlugin(Plugin):
33
35
  def __init__(self):
34
- super().__init__(__name__, __version__, __package__)
35
-
36
- def download_files(self):
37
- pass
36
+ super().__init__(__name__, __version__, __package__, logger)
38
37
 
39
38
 
40
39
  Plugin.register_plugin(ElevenLabsPlugin())
@@ -5,6 +5,7 @@ TTSModels = Literal[
5
5
  "eleven_multilingual_v1",
6
6
  "eleven_multilingual_v2",
7
7
  "eleven_turbo_v2",
8
+ "eleven_turbo_v2_5",
8
9
  ]
9
10
 
10
11
  TTSEncoding = Literal[
@@ -93,7 +93,7 @@ class TTS(tts.TTS):
93
93
  self,
94
94
  *,
95
95
  voice: Voice = DEFAULT_VOICE,
96
- model_id: TTSModels = "eleven_turbo_v2",
96
+ model_id: TTSModels = "eleven_turbo_v2_5",
97
97
  api_key: str | None = None,
98
98
  base_url: str | None = None,
99
99
  encoding: TTSEncoding = "mp3_22050_32",
@@ -101,9 +101,7 @@ class TTS(tts.TTS):
101
101
  word_tokenizer: tokenize.WordTokenizer = tokenize.basic.WordTokenizer(
102
102
  ignore_punctuation=False # punctuation can help for intonation
103
103
  ),
104
- # default value of 11labs is [120, 160, 250, 290], but we want faster responses by default
105
- # (range is 50-500)
106
- chunk_length_schedule: list[int] = [80, 120, 200, 260],
104
+ chunk_length_schedule: list[int] = [80, 120, 200, 260], # range is [50, 500]
107
105
  http_session: aiohttp.ClientSession | None = None,
108
106
  ) -> None:
109
107
  super().__init__(
@@ -158,6 +156,8 @@ class ChunkedStream(tts.ChunkedStream):
158
156
  ) -> None:
159
157
  super().__init__()
160
158
  self._text, self._opts, self._session = text, opts, session
159
+ if _encoding_from_format(self._opts.encoding) == "mp3":
160
+ self._mp3_decoder = utils.codecs.Mp3StreamDecoder()
161
161
 
162
162
  @utils.log_exceptions(logger=logger)
163
163
  async def _main_task(self) -> None:
@@ -183,21 +183,39 @@ class ChunkedStream(tts.ChunkedStream):
183
183
  headers={AUTHORIZATION_HEADER: self._opts.api_key},
184
184
  json=data,
185
185
  ) as resp:
186
- async for bytes_data, _ in resp.content.iter_chunks():
187
- for frame in bstream.write(bytes_data):
186
+ if not resp.content_type.startswith("audio/"):
187
+ content = await resp.text()
188
+ logger.error("11labs returned non-audio data: %s", content)
189
+ return
190
+ encoding = _encoding_from_format(self._opts.encoding)
191
+ if encoding == "mp3":
192
+ async for bytes_data, _ in resp.content.iter_chunks():
193
+ for frame in self._mp3_decoder.decode_chunk(bytes_data):
194
+ self._event_ch.send_nowait(
195
+ tts.SynthesizedAudio(
196
+ request_id=request_id,
197
+ segment_id=segment_id,
198
+ frame=frame,
199
+ )
200
+ )
201
+ else:
202
+ async for bytes_data, _ in resp.content.iter_chunks():
203
+ for frame in bstream.write(bytes_data):
204
+ self._event_ch.send_nowait(
205
+ tts.SynthesizedAudio(
206
+ request_id=request_id,
207
+ segment_id=segment_id,
208
+ frame=frame,
209
+ )
210
+ )
211
+
212
+ for frame in bstream.flush():
188
213
  self._event_ch.send_nowait(
189
214
  tts.SynthesizedAudio(
190
215
  request_id=request_id, segment_id=segment_id, frame=frame
191
216
  )
192
217
  )
193
218
 
194
- for frame in bstream.flush():
195
- self._event_ch.send_nowait(
196
- tts.SynthesizedAudio(
197
- request_id=request_id, segment_id=segment_id, frame=frame
198
- )
199
- )
200
-
201
219
 
202
220
  class SynthesizeStream(tts.SynthesizeStream):
203
221
  """Streamed API using websockets"""
@@ -208,8 +226,7 @@ class SynthesizeStream(tts.SynthesizeStream):
208
226
  opts: _TTSOptions,
209
227
  ):
210
228
  super().__init__()
211
- self._opts = opts
212
- self._session = session
229
+ self._opts, self._session = opts, session
213
230
  self._mp3_decoder = utils.codecs.Mp3StreamDecoder()
214
231
 
215
232
  @utils.log_exceptions(logger=logger)
@@ -222,31 +239,39 @@ class SynthesizeStream(tts.SynthesizeStream):
222
239
  word_stream = None
223
240
  async for input in self._input_ch:
224
241
  if isinstance(input, str):
225
- if not word_stream:
242
+ if word_stream is None:
243
+ # new segment (after flush for e.g)
226
244
  word_stream = self._opts.word_tokenizer.stream()
227
245
  self._segments_ch.send_nowait(word_stream)
228
246
 
229
247
  word_stream.push_text(input)
230
248
  elif isinstance(input, self._FlushSentinel):
231
- word_stream.end_input()
249
+ if word_stream is not None:
250
+ word_stream.end_input()
251
+
232
252
  word_stream = None
233
253
 
234
254
  self._segments_ch.close()
235
255
 
256
+ @utils.log_exceptions(logger=logger)
236
257
  async def _run():
237
258
  async for word_stream in self._segments_ch:
238
259
  await self._run_ws(word_stream)
239
260
 
240
- await asyncio.gather(_tokenize_input(), _run(), return_exceptions=True)
261
+ tasks = [
262
+ asyncio.create_task(_tokenize_input()),
263
+ asyncio.create_task(_run()),
264
+ ]
265
+ try:
266
+ await asyncio.gather(*tasks)
267
+ finally:
268
+ await utils.aio.gracefully_cancel(*tasks)
241
269
 
242
270
  async def _run_ws(
243
271
  self,
244
272
  word_stream: tokenize.WordStream,
245
- max_retry: int = 1,
273
+ max_retry: int = 3,
246
274
  ) -> None:
247
- request_id = utils.shortuuid()
248
- segment_id = utils.shortuuid()
249
-
250
275
  ws_conn: aiohttp.ClientWebSocketResponse | None = None
251
276
  for try_i in range(max_retry):
252
277
  retry_delay = 5
@@ -268,6 +293,10 @@ class SynthesizeStream(tts.SynthesizeStream):
268
293
  if ws_conn is None:
269
294
  raise Exception(f"failed to connect to 11labs after {max_retry} retries")
270
295
 
296
+ request_id = utils.shortuuid()
297
+ segment_id = utils.shortuuid()
298
+
299
+ # 11labs protocol expects the first message to be an "init msg"
271
300
  init_pkt = dict(
272
301
  text=" ",
273
302
  try_trigger_generation=True,
@@ -291,7 +320,6 @@ class SynthesizeStream(tts.SynthesizeStream):
291
320
  text=f"{data.token} ", # must always end with a space
292
321
  try_trigger_generation=False,
293
322
  )
294
- print(data_pkt)
295
323
  await ws_conn.send_str(json.dumps(data_pkt))
296
324
 
297
325
  # no more token, mark eos
@@ -300,6 +328,8 @@ class SynthesizeStream(tts.SynthesizeStream):
300
328
  eos_sent = True
301
329
 
302
330
  async def recv_task():
331
+ nonlocal eos_sent
332
+
303
333
  while True:
304
334
  msg = await ws_conn.receive()
305
335
  if msg.type in (
@@ -378,11 +408,11 @@ def _synthesize_url(opts: _TTSOptions) -> str:
378
408
  base_url = opts.base_url
379
409
  voice_id = opts.voice.id
380
410
  model_id = opts.model_id
381
- sample_rate = _sample_rate_from_format(opts.encoding)
411
+ output_format = opts.encoding
382
412
  latency = opts.streaming_latency
383
413
  return (
384
414
  f"{base_url}/text-to-speech/{voice_id}/stream?"
385
- f"model_id={model_id}&output_format=pcm_{sample_rate}&optimize_streaming_latency={latency}"
415
+ f"model_id={model_id}&output_format={output_format}&optimize_streaming_latency={latency}"
386
416
  )
387
417
 
388
418
 
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "0.7.0-dev.7"
15
+ __version__ = "0.7.2"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: livekit-plugins-elevenlabs
3
- Version: 0.7.0.dev7
3
+ Version: 0.7.2
4
4
  Summary: Agent Framework plugin for voice synthesis with ElevenLabs' API.
5
5
  Home-page: https://github.com/livekit/agents
6
6
  License: Apache-2.0
@@ -0,0 +1,10 @@
1
+ livekit/plugins/elevenlabs/__init__.py,sha256=cYRVIPXkRvB3-jK9bKZ9rYiMBACytWlCSq6yoZXaSgA,1080
2
+ livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
3
+ livekit/plugins/elevenlabs/models.py,sha256=ddBUlDT4707f64WDJASR0B60X0yQ-LRHK1ZpTuBJXK8,387
4
+ livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ livekit/plugins/elevenlabs/tts.py,sha256=g5w__bq5OdtZDjjclw3zYq4mAPMpkVgWPqlhkb_qpBg,14320
6
+ livekit/plugins/elevenlabs/version.py,sha256=wNTnO8L3jrMdUjS-xAEFoMTKPaPYiFY9Kxnvzm4hTBc,600
7
+ livekit_plugins_elevenlabs-0.7.2.dist-info/METADATA,sha256=WdOaTQBGsLgrjKQIM2_pgXLyPUqzBfTml14OFRv2qLQ,1311
8
+ livekit_plugins_elevenlabs-0.7.2.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
9
+ livekit_plugins_elevenlabs-0.7.2.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
10
+ livekit_plugins_elevenlabs-0.7.2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (71.1.0)
2
+ Generator: setuptools (72.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,10 +0,0 @@
1
- livekit/plugins/elevenlabs/__init__.py,sha256=ez1ybDPt7GfKAKgPkxZFRB7Vyd-_i-0hfUMI79GQ5w4,1091
2
- livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
3
- livekit/plugins/elevenlabs/models.py,sha256=8jTchztgpiTokHEaWUK8PPxWWfvm5SMrOGsJpzxbYAw,362
4
- livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- livekit/plugins/elevenlabs/tts.py,sha256=HpaHJQysUhThDdlYDHpQxroo9L2_m6G6QBAaNXs04K4,13032
6
- livekit/plugins/elevenlabs/version.py,sha256=z7YbosRr6jiTE2IIvHSRWSl4-yyS21CiHE5WD547wJo,606
7
- livekit_plugins_elevenlabs-0.7.0.dev7.dist-info/METADATA,sha256=7yOB_7fjkuBAXd6RRWzO6n8FnERRNUT3FP1uhn3-JtY,1316
8
- livekit_plugins_elevenlabs-0.7.0.dev7.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
9
- livekit_plugins_elevenlabs-0.7.0.dev7.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
10
- livekit_plugins_elevenlabs-0.7.0.dev7.dist-info/RECORD,,