livekit-plugins-elevenlabs 1.0.0.dev5__py3-none-any.whl → 1.0.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit/plugins/elevenlabs/tts.py +185 -192
- livekit/plugins/elevenlabs/version.py +1 -1
- {livekit_plugins_elevenlabs-1.0.0.dev5.dist-info → livekit_plugins_elevenlabs-1.0.0rc1.dist-info}/METADATA +2 -2
- livekit_plugins_elevenlabs-1.0.0rc1.dist-info/RECORD +9 -0
- livekit_plugins_elevenlabs-1.0.0.dev5.dist-info/RECORD +0 -9
- {livekit_plugins_elevenlabs-1.0.0.dev5.dist-info → livekit_plugins_elevenlabs-1.0.0rc1.dist-info}/WHEEL +0 -0
@@ -34,6 +34,12 @@ from livekit.agents import (
|
|
34
34
|
tts,
|
35
35
|
utils,
|
36
36
|
)
|
37
|
+
from livekit.agents.types import (
|
38
|
+
DEFAULT_API_CONNECT_OPTIONS,
|
39
|
+
NOT_GIVEN,
|
40
|
+
NotGivenOr,
|
41
|
+
)
|
42
|
+
from livekit.agents.utils import is_given
|
37
43
|
|
38
44
|
from .log import logger
|
39
45
|
from .models import TTSEncoding, TTSModels
|
@@ -50,9 +56,9 @@ def _sample_rate_from_format(output_format: TTSEncoding) -> int:
|
|
50
56
|
class VoiceSettings:
|
51
57
|
stability: float # [0.0 - 1.0]
|
52
58
|
similarity_boost: float # [0.0 - 1.0]
|
53
|
-
style: float
|
54
|
-
speed: float
|
55
|
-
use_speaker_boost: bool
|
59
|
+
style: NotGivenOr[float] = NOT_GIVEN # [0.0 - 1.0]
|
60
|
+
speed: NotGivenOr[float] = NOT_GIVEN # [0.8 - 1.2]
|
61
|
+
use_speaker_boost: NotGivenOr[bool] = NOT_GIVEN
|
56
62
|
|
57
63
|
|
58
64
|
@dataclass
|
@@ -60,7 +66,7 @@ class Voice:
|
|
60
66
|
id: str
|
61
67
|
name: str
|
62
68
|
category: str
|
63
|
-
settings: VoiceSettings
|
69
|
+
settings: NotGivenOr[VoiceSettings] = NOT_GIVEN
|
64
70
|
|
65
71
|
|
66
72
|
DEFAULT_VOICE = Voice(
|
@@ -86,11 +92,11 @@ class _TTSOptions:
|
|
86
92
|
api_key: str
|
87
93
|
voice: Voice
|
88
94
|
model: TTSModels | str
|
89
|
-
language: str
|
95
|
+
language: NotGivenOr[str]
|
90
96
|
base_url: str
|
91
97
|
encoding: TTSEncoding
|
92
98
|
sample_rate: int
|
93
|
-
streaming_latency: int
|
99
|
+
streaming_latency: NotGivenOr[int]
|
94
100
|
word_tokenizer: tokenize.WordTokenizer
|
95
101
|
chunk_length_schedule: list[int]
|
96
102
|
enable_ssml_parsing: bool
|
@@ -103,17 +109,15 @@ class TTS(tts.TTS):
|
|
103
109
|
*,
|
104
110
|
voice: Voice = DEFAULT_VOICE,
|
105
111
|
model: TTSModels | str = "eleven_flash_v2_5",
|
106
|
-
api_key: str
|
107
|
-
base_url: str
|
108
|
-
streaming_latency: int =
|
112
|
+
api_key: NotGivenOr[str] = NOT_GIVEN,
|
113
|
+
base_url: NotGivenOr[str] = NOT_GIVEN,
|
114
|
+
streaming_latency: NotGivenOr[int] = NOT_GIVEN,
|
109
115
|
inactivity_timeout: int = WS_INACTIVITY_TIMEOUT,
|
110
|
-
word_tokenizer: tokenize.WordTokenizer
|
116
|
+
word_tokenizer: NotGivenOr[tokenize.WordTokenizer] = NOT_GIVEN,
|
111
117
|
enable_ssml_parsing: bool = False,
|
112
|
-
chunk_length_schedule: list[int] =
|
118
|
+
chunk_length_schedule: NotGivenOr[list[int]] = NOT_GIVEN, # range is [50, 500]
|
113
119
|
http_session: aiohttp.ClientSession | None = None,
|
114
|
-
|
115
|
-
model_id: TTSModels | str | None = None,
|
116
|
-
language: str | None = None,
|
120
|
+
language: NotGivenOr[str] = NOT_GIVEN,
|
117
121
|
) -> None:
|
118
122
|
"""
|
119
123
|
Create a new instance of ElevenLabs TTS.
|
@@ -129,10 +133,10 @@ class TTS(tts.TTS):
|
|
129
133
|
enable_ssml_parsing (bool): Enable SSML parsing for input text. Defaults to False.
|
130
134
|
chunk_length_schedule (list[int]): Schedule for chunk lengths, ranging from 50 to 500. Defaults to [80, 120, 200, 260].
|
131
135
|
http_session (aiohttp.ClientSession | None): Custom HTTP session for API requests. Optional.
|
132
|
-
language (str
|
133
|
-
"""
|
136
|
+
language (NotGivenOr[str]): Language code for the TTS model, as of 10/24/24 only valid for "eleven_turbo_v2_5".
|
137
|
+
""" # noqa: E501
|
134
138
|
|
135
|
-
if chunk_length_schedule
|
139
|
+
if not is_given(chunk_length_schedule):
|
136
140
|
chunk_length_schedule = [80, 120, 200, 260]
|
137
141
|
super().__init__(
|
138
142
|
capabilities=tts.TTSCapabilities(
|
@@ -142,19 +146,13 @@ class TTS(tts.TTS):
|
|
142
146
|
num_channels=1,
|
143
147
|
)
|
144
148
|
|
145
|
-
if
|
146
|
-
|
147
|
-
"model_id is deprecated and will be removed in 1.5.0, use model instead",
|
148
|
-
)
|
149
|
-
model = model_id
|
150
|
-
|
151
|
-
api_key = api_key or os.environ.get("ELEVEN_API_KEY")
|
152
|
-
if not api_key:
|
149
|
+
elevenlabs_api_key = api_key if is_given(api_key) else os.environ.get("ELEVEN_API_KEY")
|
150
|
+
if not elevenlabs_api_key:
|
153
151
|
raise ValueError(
|
154
|
-
"ElevenLabs API key is required, either as argument or set ELEVEN_API_KEY environmental variable"
|
152
|
+
"ElevenLabs API key is required, either as argument or set ELEVEN_API_KEY environmental variable" # noqa: E501
|
155
153
|
)
|
156
154
|
|
157
|
-
if word_tokenizer
|
155
|
+
if not is_given(word_tokenizer):
|
158
156
|
word_tokenizer = tokenize.basic.WordTokenizer(
|
159
157
|
ignore_punctuation=False # punctuation can help for intonation
|
160
158
|
)
|
@@ -162,8 +160,8 @@ class TTS(tts.TTS):
|
|
162
160
|
self._opts = _TTSOptions(
|
163
161
|
voice=voice,
|
164
162
|
model=model,
|
165
|
-
api_key=
|
166
|
-
base_url=base_url
|
163
|
+
api_key=elevenlabs_api_key,
|
164
|
+
base_url=base_url if is_given(base_url) else API_BASE_URL_V1,
|
167
165
|
encoding=_DefaultEncoding,
|
168
166
|
sample_rate=self.sample_rate,
|
169
167
|
streaming_latency=streaming_latency,
|
@@ -174,36 +172,14 @@ class TTS(tts.TTS):
|
|
174
172
|
inactivity_timeout=inactivity_timeout,
|
175
173
|
)
|
176
174
|
self._session = http_session
|
177
|
-
self._pool = utils.ConnectionPool[aiohttp.ClientWebSocketResponse](
|
178
|
-
connect_cb=self._connect_ws,
|
179
|
-
close_cb=self._close_ws,
|
180
|
-
max_session_duration=inactivity_timeout,
|
181
|
-
mark_refreshed_on_get=True,
|
182
|
-
)
|
183
175
|
self._streams = weakref.WeakSet[SynthesizeStream]()
|
184
176
|
|
185
|
-
async def _connect_ws(self) -> aiohttp.ClientWebSocketResponse:
|
186
|
-
session = self._ensure_session()
|
187
|
-
return await asyncio.wait_for(
|
188
|
-
session.ws_connect(
|
189
|
-
_stream_url(self._opts),
|
190
|
-
headers={AUTHORIZATION_HEADER: self._opts.api_key},
|
191
|
-
),
|
192
|
-
self._conn_options.timeout,
|
193
|
-
)
|
194
|
-
|
195
|
-
async def _close_ws(self, ws: aiohttp.ClientWebSocketResponse):
|
196
|
-
await ws.close()
|
197
|
-
|
198
177
|
def _ensure_session(self) -> aiohttp.ClientSession:
|
199
178
|
if not self._session:
|
200
179
|
self._session = utils.http_context.http_session()
|
201
180
|
|
202
181
|
return self._session
|
203
182
|
|
204
|
-
def prewarm(self) -> None:
|
205
|
-
self._pool.prewarm()
|
206
|
-
|
207
183
|
async def list_voices(self) -> list[Voice]:
|
208
184
|
async with self._ensure_session().get(
|
209
185
|
f"{self._opts.base_url}/voices",
|
@@ -214,25 +190,28 @@ class TTS(tts.TTS):
|
|
214
190
|
def update_options(
|
215
191
|
self,
|
216
192
|
*,
|
217
|
-
voice: Voice =
|
218
|
-
model: TTSModels | str =
|
219
|
-
language: str
|
193
|
+
voice: NotGivenOr[Voice] = NOT_GIVEN,
|
194
|
+
model: NotGivenOr[TTSModels | str] = NOT_GIVEN,
|
195
|
+
language: NotGivenOr[str] = NOT_GIVEN,
|
220
196
|
) -> None:
|
221
197
|
"""
|
222
198
|
Args:
|
223
|
-
voice (Voice): Voice configuration.
|
224
|
-
model (TTSModels | str): TTS model to use.
|
225
|
-
language (str
|
199
|
+
voice (NotGivenOr[Voice]): Voice configuration.
|
200
|
+
model (NotGivenOr[TTSModels | str]): TTS model to use.
|
201
|
+
language (NotGivenOr[str]): Language code for the TTS model.
|
226
202
|
"""
|
227
|
-
|
228
|
-
|
229
|
-
|
203
|
+
if is_given(model):
|
204
|
+
self._opts.model = model
|
205
|
+
if is_given(voice):
|
206
|
+
self._opts.voice = voice
|
207
|
+
if is_given(language):
|
208
|
+
self._opts.language = language
|
230
209
|
|
231
210
|
def synthesize(
|
232
211
|
self,
|
233
212
|
text: str,
|
234
213
|
*,
|
235
|
-
conn_options: APIConnectOptions
|
214
|
+
conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
|
236
215
|
) -> ChunkedStream:
|
237
216
|
return ChunkedStream(
|
238
217
|
tts=self,
|
@@ -242,8 +221,15 @@ class TTS(tts.TTS):
|
|
242
221
|
session=self._ensure_session(),
|
243
222
|
)
|
244
223
|
|
245
|
-
def stream(
|
246
|
-
|
224
|
+
def stream(
|
225
|
+
self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
|
226
|
+
) -> SynthesizeStream:
|
227
|
+
stream = SynthesizeStream(
|
228
|
+
tts=self,
|
229
|
+
conn_options=conn_options,
|
230
|
+
opts=self._opts,
|
231
|
+
session=self._ensure_session(),
|
232
|
+
)
|
247
233
|
self._streams.add(stream)
|
248
234
|
return stream
|
249
235
|
|
@@ -251,7 +237,6 @@ class TTS(tts.TTS):
|
|
251
237
|
for stream in list(self._streams):
|
252
238
|
await stream.aclose()
|
253
239
|
self._streams.clear()
|
254
|
-
await self._pool.aclose()
|
255
240
|
await super().aclose()
|
256
241
|
|
257
242
|
|
@@ -264,7 +249,7 @@ class ChunkedStream(tts.ChunkedStream):
|
|
264
249
|
tts: TTS,
|
265
250
|
input_text: str,
|
266
251
|
opts: _TTSOptions,
|
267
|
-
conn_options: APIConnectOptions
|
252
|
+
conn_options: APIConnectOptions,
|
268
253
|
session: aiohttp.ClientSession,
|
269
254
|
) -> None:
|
270
255
|
super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
|
@@ -274,7 +259,7 @@ class ChunkedStream(tts.ChunkedStream):
|
|
274
259
|
request_id = utils.shortuuid()
|
275
260
|
voice_settings = (
|
276
261
|
_strip_nones(dataclasses.asdict(self._opts.voice.settings))
|
277
|
-
if self._opts.voice.settings
|
262
|
+
if is_given(self._opts.voice.settings)
|
278
263
|
else None
|
279
264
|
)
|
280
265
|
data = {
|
@@ -339,11 +324,12 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
339
324
|
self,
|
340
325
|
*,
|
341
326
|
tts: TTS,
|
342
|
-
|
327
|
+
session: aiohttp.ClientSession,
|
343
328
|
opts: _TTSOptions,
|
329
|
+
conn_options: APIConnectOptions,
|
344
330
|
):
|
345
|
-
super().__init__(tts=tts)
|
346
|
-
self._opts, self.
|
331
|
+
super().__init__(tts=tts, conn_options=conn_options)
|
332
|
+
self._opts, self._session = opts, session
|
347
333
|
|
348
334
|
async def _run(self) -> None:
|
349
335
|
request_id = utils.shortuuid()
|
@@ -398,134 +384,141 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
398
384
|
word_stream: tokenize.WordStream,
|
399
385
|
request_id: str,
|
400
386
|
) -> None:
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
decoder = utils.codecs.AudioStreamDecoder(
|
406
|
-
sample_rate=self._opts.sample_rate,
|
407
|
-
num_channels=1,
|
408
|
-
)
|
387
|
+
ws_conn = await self._session.ws_connect(
|
388
|
+
_stream_url(self._opts),
|
389
|
+
headers={AUTHORIZATION_HEADER: self._opts.api_key},
|
390
|
+
)
|
409
391
|
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
else None,
|
416
|
-
"generation_config": {"chunk_length_schedule": self._opts.chunk_length_schedule},
|
417
|
-
}
|
418
|
-
await ws_conn.send_str(json.dumps(init_pkt))
|
419
|
-
|
420
|
-
@utils.log_exceptions(logger=logger)
|
421
|
-
async def send_task():
|
422
|
-
nonlocal expected_text
|
423
|
-
xml_content = []
|
424
|
-
async for data in word_stream:
|
425
|
-
text = data.token
|
426
|
-
expected_text += text
|
427
|
-
# send the xml phoneme in one go
|
428
|
-
if (
|
429
|
-
self._opts.enable_ssml_parsing
|
430
|
-
and data.token.startswith("<phoneme")
|
431
|
-
or xml_content
|
432
|
-
):
|
433
|
-
xml_content.append(text)
|
434
|
-
if text.find("</phoneme>") > -1:
|
435
|
-
text = self._opts.word_tokenizer.format_words(xml_content)
|
436
|
-
xml_content = []
|
437
|
-
else:
|
438
|
-
continue
|
439
|
-
|
440
|
-
data_pkt = {"text": f"{text} "} # must always end with a space
|
441
|
-
self._mark_started()
|
442
|
-
await ws_conn.send_str(json.dumps(data_pkt))
|
443
|
-
if xml_content:
|
444
|
-
logger.warning("11labs stream ended with incomplete xml content")
|
445
|
-
await ws_conn.send_str(json.dumps({"flush": True}))
|
446
|
-
|
447
|
-
# consumes from decoder and generates events
|
448
|
-
@utils.log_exceptions(logger=logger)
|
449
|
-
async def generate_task():
|
450
|
-
emitter = tts.SynthesizedAudioEmitter(
|
451
|
-
event_ch=self._event_ch,
|
452
|
-
request_id=request_id,
|
453
|
-
segment_id=segment_id,
|
454
|
-
)
|
455
|
-
async for frame in decoder:
|
456
|
-
emitter.push(frame)
|
457
|
-
emitter.flush()
|
392
|
+
segment_id = utils.shortuuid()
|
393
|
+
decoder = utils.codecs.AudioStreamDecoder(
|
394
|
+
sample_rate=self._opts.sample_rate,
|
395
|
+
num_channels=1,
|
396
|
+
)
|
458
397
|
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
aiohttp.WSMsgType.CLOSE,
|
470
|
-
aiohttp.WSMsgType.CLOSING,
|
471
|
-
):
|
472
|
-
raise APIStatusError(
|
473
|
-
"11labs connection closed unexpectedly, not all tokens have been consumed",
|
474
|
-
request_id=request_id,
|
475
|
-
)
|
398
|
+
# 11labs protocol expects the first message to be an "init msg"
|
399
|
+
init_pkt = {
|
400
|
+
"text": " ",
|
401
|
+
"voice_settings": _strip_nones(dataclasses.asdict(self._opts.voice.settings))
|
402
|
+
if is_given(self._opts.voice.settings)
|
403
|
+
else None,
|
404
|
+
"generation_config": {"chunk_length_schedule": self._opts.chunk_length_schedule},
|
405
|
+
}
|
406
|
+
await ws_conn.send_str(json.dumps(init_pkt))
|
407
|
+
eos_sent = False
|
476
408
|
|
477
|
-
|
478
|
-
|
409
|
+
@utils.log_exceptions(logger=logger)
|
410
|
+
async def send_task():
|
411
|
+
nonlocal eos_sent
|
412
|
+
xml_content = []
|
413
|
+
async for data in word_stream:
|
414
|
+
text = data.token
|
415
|
+
# send the xml phoneme in one go
|
416
|
+
if (
|
417
|
+
self._opts.enable_ssml_parsing
|
418
|
+
and data.token.startswith("<phoneme")
|
419
|
+
or xml_content
|
420
|
+
):
|
421
|
+
xml_content.append(text)
|
422
|
+
if data.token.find("</phoneme>") > -1:
|
423
|
+
text = self._opts.word_tokenizer.format_words(xml_content)
|
424
|
+
xml_content = []
|
425
|
+
else:
|
479
426
|
continue
|
480
427
|
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
428
|
+
data_pkt = {"text": f"{text} "} # must always end with a space
|
429
|
+
self._mark_started()
|
430
|
+
await ws_conn.send_str(json.dumps(data_pkt))
|
431
|
+
if xml_content:
|
432
|
+
logger.warning("11labs stream ended with incomplete xml content")
|
433
|
+
|
434
|
+
# no more token, mark eos
|
435
|
+
eos_pkt = {"text": ""}
|
436
|
+
await ws_conn.send_str(json.dumps(eos_pkt))
|
437
|
+
eos_sent = True
|
438
|
+
|
439
|
+
# consumes from decoder and generates events
|
440
|
+
@utils.log_exceptions(logger=logger)
|
441
|
+
async def generate_task():
|
442
|
+
emitter = tts.SynthesizedAudioEmitter(
|
443
|
+
event_ch=self._event_ch,
|
444
|
+
request_id=request_id,
|
445
|
+
segment_id=segment_id,
|
446
|
+
)
|
447
|
+
async for frame in decoder:
|
448
|
+
emitter.push(frame)
|
449
|
+
emitter.flush()
|
450
|
+
|
451
|
+
# receives from ws and decodes audio
|
452
|
+
@utils.log_exceptions(logger=logger)
|
453
|
+
async def recv_task():
|
454
|
+
nonlocal eos_sent
|
455
|
+
|
456
|
+
while True:
|
457
|
+
msg = await ws_conn.receive()
|
458
|
+
if msg.type in (
|
459
|
+
aiohttp.WSMsgType.CLOSED,
|
460
|
+
aiohttp.WSMsgType.CLOSE,
|
461
|
+
aiohttp.WSMsgType.CLOSING,
|
462
|
+
):
|
463
|
+
if not eos_sent:
|
499
464
|
raise APIStatusError(
|
500
|
-
|
501
|
-
status_code=500,
|
465
|
+
"11labs connection closed unexpectedly, not all tokens have been consumed", # noqa: E501
|
502
466
|
request_id=request_id,
|
503
|
-
body=None,
|
504
467
|
)
|
468
|
+
return
|
505
469
|
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
470
|
+
if msg.type != aiohttp.WSMsgType.TEXT:
|
471
|
+
logger.warning("unexpected 11labs message type %s", msg.type)
|
472
|
+
continue
|
473
|
+
|
474
|
+
data = json.loads(msg.data)
|
475
|
+
if data.get("audio"):
|
476
|
+
b64data = base64.b64decode(data["audio"])
|
477
|
+
decoder.push(b64data)
|
478
|
+
|
479
|
+
elif data.get("isFinal"):
|
480
|
+
decoder.end_input()
|
481
|
+
break
|
482
|
+
elif data.get("error"):
|
483
|
+
raise APIStatusError(
|
484
|
+
message=data["error"],
|
485
|
+
status_code=500,
|
486
|
+
request_id=request_id,
|
487
|
+
body=None,
|
488
|
+
)
|
489
|
+
else:
|
490
|
+
raise APIStatusError(
|
491
|
+
message=f"unexpected 11labs message {data}",
|
492
|
+
status_code=500,
|
493
|
+
request_id=request_id,
|
494
|
+
body=None,
|
495
|
+
)
|
496
|
+
|
497
|
+
tasks = [
|
498
|
+
asyncio.create_task(send_task()),
|
499
|
+
asyncio.create_task(recv_task()),
|
500
|
+
asyncio.create_task(generate_task()),
|
501
|
+
]
|
502
|
+
try:
|
503
|
+
await asyncio.gather(*tasks)
|
504
|
+
except asyncio.TimeoutError as e:
|
505
|
+
raise APITimeoutError() from e
|
506
|
+
except aiohttp.ClientResponseError as e:
|
507
|
+
raise APIStatusError(
|
508
|
+
message=e.message,
|
509
|
+
status_code=e.status,
|
510
|
+
request_id=request_id,
|
511
|
+
body=None,
|
512
|
+
) from e
|
513
|
+
except APIStatusError:
|
514
|
+
raise
|
515
|
+
except Exception as e:
|
516
|
+
raise APIConnectionError() from e
|
517
|
+
finally:
|
518
|
+
await utils.aio.gracefully_cancel(*tasks)
|
519
|
+
await decoder.aclose()
|
520
|
+
if ws_conn is not None:
|
521
|
+
await ws_conn.close()
|
529
522
|
|
530
523
|
|
531
524
|
def _dict_to_voices_list(data: dict[str, Any]):
|
@@ -543,7 +536,7 @@ def _dict_to_voices_list(data: dict[str, Any]):
|
|
543
536
|
|
544
537
|
|
545
538
|
def _strip_nones(data: dict[str, Any]):
|
546
|
-
return {k: v for k, v in data.items() if v is not None}
|
539
|
+
return {k: v for k, v in data.items() if is_given(v) and v is not None}
|
547
540
|
|
548
541
|
|
549
542
|
def _synthesize_url(opts: _TTSOptions) -> str:
|
@@ -555,7 +548,7 @@ def _synthesize_url(opts: _TTSOptions) -> str:
|
|
555
548
|
f"{base_url}/text-to-speech/{voice_id}/stream?"
|
556
549
|
f"model_id={model_id}&output_format={output_format}"
|
557
550
|
)
|
558
|
-
if opts.streaming_latency:
|
551
|
+
if is_given(opts.streaming_latency):
|
559
552
|
url += f"&optimize_streaming_latency={opts.streaming_latency}"
|
560
553
|
return url
|
561
554
|
|
@@ -573,8 +566,8 @@ def _stream_url(opts: _TTSOptions) -> str:
|
|
573
566
|
f"model_id={model_id}&output_format={output_format}&"
|
574
567
|
f"enable_ssml_parsing={enable_ssml}&inactivity_timeout={inactivity_timeout}"
|
575
568
|
)
|
576
|
-
if language
|
569
|
+
if is_given(language):
|
577
570
|
url += f"&language_code={language}"
|
578
|
-
if opts.streaming_latency:
|
571
|
+
if is_given(opts.streaming_latency):
|
579
572
|
url += f"&optimize_streaming_latency={opts.streaming_latency}"
|
580
573
|
return url
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: livekit-plugins-elevenlabs
|
3
|
-
Version: 1.0.
|
3
|
+
Version: 1.0.0rc1
|
4
4
|
Summary: Agent Framework plugin for voice synthesis with ElevenLabs' API.
|
5
5
|
Project-URL: Documentation, https://docs.livekit.io
|
6
6
|
Project-URL: Website, https://livekit.io/
|
@@ -18,7 +18,7 @@ Classifier: Topic :: Multimedia :: Sound/Audio
|
|
18
18
|
Classifier: Topic :: Multimedia :: Video
|
19
19
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
20
20
|
Requires-Python: >=3.9.0
|
21
|
-
Requires-Dist: livekit-agents[codecs]>=1.0.0.
|
21
|
+
Requires-Dist: livekit-agents[codecs]>=1.0.0.rc1
|
22
22
|
Description-Content-Type: text/markdown
|
23
23
|
|
24
24
|
# LiveKit Plugins Elevenlabs
|
@@ -0,0 +1,9 @@
|
|
1
|
+
livekit/plugins/elevenlabs/__init__.py,sha256=YZVadomFq3JWiZN6GWXJbuE4vaNNWq1CmdH25du8qwg,1249
|
2
|
+
livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
|
3
|
+
livekit/plugins/elevenlabs/models.py,sha256=nB43wLS1ilzS7IxLYVSQxBjKPnbiPl4AHpHAOlG2i00,273
|
4
|
+
livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
livekit/plugins/elevenlabs/tts.py,sha256=NrW-xONOPOHXycmX9mL1m0wrQjwfb_3ZIxy9Jf0oVf4,19568
|
6
|
+
livekit/plugins/elevenlabs/version.py,sha256=pF0lh6G9GYL7Mj7EnfhjFifzlzdWx6u3RvB0Itch4UE,604
|
7
|
+
livekit_plugins_elevenlabs-1.0.0rc1.dist-info/METADATA,sha256=KaLRiJ7Q7Ei1YH5mlkRmiNAR9nmln-kQsuINM_QeAM4,1321
|
8
|
+
livekit_plugins_elevenlabs-1.0.0rc1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
9
|
+
livekit_plugins_elevenlabs-1.0.0rc1.dist-info/RECORD,,
|
@@ -1,9 +0,0 @@
|
|
1
|
-
livekit/plugins/elevenlabs/__init__.py,sha256=YZVadomFq3JWiZN6GWXJbuE4vaNNWq1CmdH25du8qwg,1249
|
2
|
-
livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
|
3
|
-
livekit/plugins/elevenlabs/models.py,sha256=nB43wLS1ilzS7IxLYVSQxBjKPnbiPl4AHpHAOlG2i00,273
|
4
|
-
livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
-
livekit/plugins/elevenlabs/tts.py,sha256=eJ66yP3ta2FH0LgQ64wHdjOHEoavwguOg6GeaMIr9IU,20394
|
6
|
-
livekit/plugins/elevenlabs/version.py,sha256=pXgCpV03nQI-5Kk-74NFyAdw1htj2cx6unwQHipEcfE,605
|
7
|
-
livekit_plugins_elevenlabs-1.0.0.dev5.dist-info/METADATA,sha256=wOs4IhcM1fxfVMU3hg3F1mycvv_8pYQiG3vQHkTQ6wc,1324
|
8
|
-
livekit_plugins_elevenlabs-1.0.0.dev5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
9
|
-
livekit_plugins_elevenlabs-1.0.0.dev5.dist-info/RECORD,,
|
File without changes
|