livekit-plugins-elevenlabs 1.1.3__py3-none-any.whl → 1.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,7 +21,7 @@ import json
21
21
  import os
22
22
  import weakref
23
23
  from dataclasses import dataclass, replace
24
- from typing import Any
24
+ from typing import Any, Union
25
25
 
26
26
  import aiohttp
27
27
 
@@ -84,10 +84,11 @@ class _TTSOptions:
84
84
  encoding: TTSEncoding
85
85
  sample_rate: int
86
86
  streaming_latency: NotGivenOr[int]
87
- word_tokenizer: tokenize.WordTokenizer
87
+ word_tokenizer: tokenize.WordTokenizer | tokenize.SentenceTokenizer
88
88
  chunk_length_schedule: NotGivenOr[list[int]]
89
89
  enable_ssml_parsing: bool
90
90
  inactivity_timeout: int
91
+ auto_mode: NotGivenOr[bool]
91
92
 
92
93
 
93
94
  class TTS(tts.TTS):
@@ -102,7 +103,8 @@ class TTS(tts.TTS):
102
103
  base_url: NotGivenOr[str] = NOT_GIVEN,
103
104
  streaming_latency: NotGivenOr[int] = NOT_GIVEN,
104
105
  inactivity_timeout: int = WS_INACTIVITY_TIMEOUT,
105
- word_tokenizer: NotGivenOr[tokenize.WordTokenizer] = NOT_GIVEN,
106
+ auto_mode: NotGivenOr[bool] = NOT_GIVEN,
107
+ word_tokenizer: NotGivenOr[tokenize.WordTokenizer | tokenize.SentenceTokenizer] = NOT_GIVEN,
106
108
  enable_ssml_parsing: bool = False,
107
109
  chunk_length_schedule: NotGivenOr[list[int]] = NOT_GIVEN, # range is [50, 500]
108
110
  http_session: aiohttp.ClientSession | None = None,
@@ -119,7 +121,8 @@ class TTS(tts.TTS):
119
121
  base_url (NotGivenOr[str]): Custom base URL for the API. Optional.
120
122
  streaming_latency (NotGivenOr[int]): Optimize for streaming latency, defaults to 0 - disabled. 4 for max latency optimizations. deprecated
121
123
  inactivity_timeout (int): Inactivity timeout in seconds for the websocket connection. Defaults to 300.
122
- word_tokenizer (NotGivenOr[tokenize.WordTokenizer]): Tokenizer for processing text. Defaults to basic WordTokenizer.
124
+ auto_mode (bool): Reduces latency by disabling chunk schedule and buffers. Recommended for full sentences/phrases. Defaults to False.
125
+ word_tokenizer (NotGivenOr[tokenize.WordTokenizer | tokenize.SentenceTokenizer]): Tokenizer for processing text. Defaults to basic WordTokenizer.
123
126
  enable_ssml_parsing (bool): Enable SSML parsing for input text. Defaults to False.
124
127
  chunk_length_schedule (NotGivenOr[list[int]]): Schedule for chunk lengths, ranging from 50 to 500. Defaults are [120, 160, 250, 290].
125
128
  http_session (aiohttp.ClientSession | None): Custom HTTP session for API requests. Optional.
@@ -144,8 +147,15 @@ class TTS(tts.TTS):
144
147
  )
145
148
 
146
149
  if not is_given(word_tokenizer):
147
- word_tokenizer = tokenize.basic.WordTokenizer(
148
- ignore_punctuation=False # punctuation can help for intonation
150
+ word_tokenizer = (
151
+ tokenize.basic.WordTokenizer(ignore_punctuation=False)
152
+ if not auto_mode
153
+ else tokenize.basic.SentenceTokenizer()
154
+ )
155
+ elif auto_mode and not isinstance(word_tokenizer, tokenize.SentenceTokenizer):
156
+ logger.warning(
157
+ "auto_mode is enabled, it expects full sentences or phrases, "
158
+ "please provide a SentenceTokenizer instead of a WordTokenizer."
149
159
  )
150
160
 
151
161
  self._opts = _TTSOptions(
@@ -162,6 +172,7 @@ class TTS(tts.TTS):
162
172
  enable_ssml_parsing=enable_ssml_parsing,
163
173
  language=language,
164
174
  inactivity_timeout=inactivity_timeout,
175
+ auto_mode=auto_mode,
165
176
  )
166
177
  self._session = http_session
167
178
  self._streams = weakref.WeakSet[SynthesizeStream]()
@@ -288,7 +299,7 @@ class SynthesizeStream(tts.SynthesizeStream):
288
299
  super().__init__(tts=tts, conn_options=conn_options)
289
300
  self._tts: TTS = tts
290
301
  self._opts = replace(tts._opts)
291
- self._segments_ch = utils.aio.Chan[tokenize.WordStream]()
302
+ self._segments_ch = utils.aio.Chan[Union[tokenize.WordStream, tokenize.SentenceStream]]()
292
303
 
293
304
  async def _run(self, output_emitter: tts.AudioEmitter) -> None:
294
305
  request_id = utils.shortuuid()
@@ -343,7 +354,9 @@ class SynthesizeStream(tts.SynthesizeStream):
343
354
  await utils.aio.gracefully_cancel(*tasks)
344
355
 
345
356
  async def _run_ws(
346
- self, word_stream: tokenize.WordStream, output_emitter: tts.AudioEmitter
357
+ self,
358
+ word_stream: tokenize.WordStream | tokenize.SentenceStream,
359
+ output_emitter: tts.AudioEmitter,
347
360
  ) -> None:
348
361
  segment_id = utils.shortuuid()
349
362
  output_emitter.start_segment(segment_id=segment_id)
@@ -386,7 +399,11 @@ class SynthesizeStream(tts.SynthesizeStream):
386
399
  xml_content.append(text)
387
400
 
388
401
  if any(data.token.find(end) > -1 for end in xml_end_tokens):
389
- text = self._opts.word_tokenizer.format_words(xml_content)
402
+ text = (
403
+ self._opts.word_tokenizer.format_words(xml_content)
404
+ if isinstance(self._opts.word_tokenizer, tokenize.WordTokenizer)
405
+ else " ".join(xml_content)
406
+ )
390
407
  xml_content = []
391
408
  else:
392
409
  continue
@@ -491,5 +508,7 @@ def _stream_url(opts: _TTSOptions) -> str:
491
508
  url += f"&language_code={language}"
492
509
  if is_given(opts.streaming_latency):
493
510
  url += f"&optimize_streaming_latency={opts.streaming_latency}"
511
+ if is_given(opts.auto_mode):
512
+ url += f"&auto_mode={opts.auto_mode}"
494
513
 
495
514
  return url
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "1.1.3"
15
+ __version__ = "1.1.5"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: livekit-plugins-elevenlabs
3
- Version: 1.1.3
3
+ Version: 1.1.5
4
4
  Summary: Agent Framework plugin for voice synthesis with ElevenLabs' API.
5
5
  Project-URL: Documentation, https://docs.livekit.io
6
6
  Project-URL: Website, https://livekit.io/
@@ -18,7 +18,7 @@ Classifier: Topic :: Multimedia :: Sound/Audio
18
18
  Classifier: Topic :: Multimedia :: Video
19
19
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
20
  Requires-Python: >=3.9.0
21
- Requires-Dist: livekit-agents[codecs]>=1.1.3
21
+ Requires-Dist: livekit-agents[codecs]>=1.1.5
22
22
  Description-Content-Type: text/markdown
23
23
 
24
24
  # ElevenLabs plugin for LiveKit Agents
@@ -3,8 +3,8 @@ livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz
3
3
  livekit/plugins/elevenlabs/models.py,sha256=fGZhu8IW1nKVWyeU-L3MFzhVjsRXqUhJblO9jt7jplY,418
4
4
  livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  livekit/plugins/elevenlabs/stt.py,sha256=xgGF3M15Wd_xb5Oh8YVSrQkeB7BkDAQ2lZi86n7Ecdk,4565
6
- livekit/plugins/elevenlabs/tts.py,sha256=GObEz_g74Dh79UHrKl1XTXQjjwcArZua2LD0mBmAzlc,18063
7
- livekit/plugins/elevenlabs/version.py,sha256=CX0B8KLm54mWslofdGA4Ue0sqe3NNbkDowPcEG7tAXA,600
8
- livekit_plugins_elevenlabs-1.1.3.dist-info/METADATA,sha256=Wxh6F4fUQpSwdTwHLu00_Ufv3mSaLPebUgawuN1Xffg,1453
9
- livekit_plugins_elevenlabs-1.1.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
10
- livekit_plugins_elevenlabs-1.1.3.dist-info/RECORD,,
6
+ livekit/plugins/elevenlabs/tts.py,sha256=NZmVIzrfNvrjTW33MQmoXHU1KFU6dLNy_qNePMDIBsk,19116
7
+ livekit/plugins/elevenlabs/version.py,sha256=OKtayGMVDYKyoKBO2yNM4kfRbH-PODJqECIiYhUzNWg,600
8
+ livekit_plugins_elevenlabs-1.1.5.dist-info/METADATA,sha256=MLsiLnNG9HB2bBeKvEqdjxueK9ASbZjlazWRD1ZXMw4,1453
9
+ livekit_plugins_elevenlabs-1.1.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
10
+ livekit_plugins_elevenlabs-1.1.5.dist-info/RECORD,,