livekit-plugins-elevenlabs 0.8.1__tar.gz → 1.0.0.dev5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit_plugins_elevenlabs-1.0.0.dev5/.gitignore +168 -0
- {livekit_plugins_elevenlabs-0.8.1 → livekit_plugins_elevenlabs-1.0.0.dev5}/PKG-INFO +10 -20
- {livekit_plugins_elevenlabs-0.8.1 → livekit_plugins_elevenlabs-1.0.0.dev5}/livekit/plugins/elevenlabs/tts.py +158 -149
- {livekit_plugins_elevenlabs-0.8.1 → livekit_plugins_elevenlabs-1.0.0.dev5}/livekit/plugins/elevenlabs/version.py +1 -1
- livekit_plugins_elevenlabs-1.0.0.dev5/pyproject.toml +39 -0
- livekit_plugins_elevenlabs-0.8.1/livekit_plugins_elevenlabs.egg-info/PKG-INFO +0 -46
- livekit_plugins_elevenlabs-0.8.1/livekit_plugins_elevenlabs.egg-info/SOURCES.txt +0 -14
- livekit_plugins_elevenlabs-0.8.1/livekit_plugins_elevenlabs.egg-info/dependency_links.txt +0 -1
- livekit_plugins_elevenlabs-0.8.1/livekit_plugins_elevenlabs.egg-info/requires.txt +0 -1
- livekit_plugins_elevenlabs-0.8.1/livekit_plugins_elevenlabs.egg-info/top_level.txt +0 -1
- livekit_plugins_elevenlabs-0.8.1/pyproject.toml +0 -3
- livekit_plugins_elevenlabs-0.8.1/setup.cfg +0 -4
- livekit_plugins_elevenlabs-0.8.1/setup.py +0 -59
- {livekit_plugins_elevenlabs-0.8.1 → livekit_plugins_elevenlabs-1.0.0.dev5}/README.md +0 -0
- {livekit_plugins_elevenlabs-0.8.1 → livekit_plugins_elevenlabs-1.0.0.dev5}/livekit/plugins/elevenlabs/__init__.py +0 -0
- {livekit_plugins_elevenlabs-0.8.1 → livekit_plugins_elevenlabs-1.0.0.dev5}/livekit/plugins/elevenlabs/log.py +0 -0
- {livekit_plugins_elevenlabs-0.8.1 → livekit_plugins_elevenlabs-1.0.0.dev5}/livekit/plugins/elevenlabs/models.py +0 -0
- {livekit_plugins_elevenlabs-0.8.1 → livekit_plugins_elevenlabs-1.0.0.dev5}/livekit/plugins/elevenlabs/py.typed +0 -0
@@ -0,0 +1,168 @@
|
|
1
|
+
**/.vscode
|
2
|
+
**/.DS_Store
|
3
|
+
|
4
|
+
# Byte-compiled / optimized / DLL files
|
5
|
+
__pycache__/
|
6
|
+
*.py[cod]
|
7
|
+
*$py.class
|
8
|
+
|
9
|
+
# C extensions
|
10
|
+
*.so
|
11
|
+
|
12
|
+
# Distribution / packaging
|
13
|
+
.Python
|
14
|
+
build/
|
15
|
+
develop-eggs/
|
16
|
+
dist/
|
17
|
+
downloads/
|
18
|
+
eggs/
|
19
|
+
.eggs/
|
20
|
+
lib/
|
21
|
+
lib64/
|
22
|
+
parts/
|
23
|
+
sdist/
|
24
|
+
var/
|
25
|
+
wheels/
|
26
|
+
share/python-wheels/
|
27
|
+
*.egg-info/
|
28
|
+
.installed.cfg
|
29
|
+
*.egg
|
30
|
+
MANIFEST
|
31
|
+
|
32
|
+
# PyInstaller
|
33
|
+
# Usually these files are written by a python script from a template
|
34
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
35
|
+
*.manifest
|
36
|
+
*.spec
|
37
|
+
|
38
|
+
# Installer logs
|
39
|
+
pip-log.txt
|
40
|
+
pip-delete-this-directory.txt
|
41
|
+
|
42
|
+
# Unit test / coverage reports
|
43
|
+
htmlcov/
|
44
|
+
.tox/
|
45
|
+
.nox/
|
46
|
+
.coverage
|
47
|
+
.coverage.*
|
48
|
+
.cache
|
49
|
+
nosetests.xml
|
50
|
+
coverage.xml
|
51
|
+
*.cover
|
52
|
+
*.py,cover
|
53
|
+
.hypothesis/
|
54
|
+
.pytest_cache/
|
55
|
+
cover/
|
56
|
+
|
57
|
+
# Translations
|
58
|
+
*.mo
|
59
|
+
*.pot
|
60
|
+
|
61
|
+
# Django stuff:
|
62
|
+
*.log
|
63
|
+
local_settings.py
|
64
|
+
db.sqlite3
|
65
|
+
db.sqlite3-journal
|
66
|
+
|
67
|
+
# Flask stuff:
|
68
|
+
instance/
|
69
|
+
.webassets-cache
|
70
|
+
|
71
|
+
# Scrapy stuff:
|
72
|
+
.scrapy
|
73
|
+
|
74
|
+
# Sphinx documentation
|
75
|
+
docs/_build/
|
76
|
+
|
77
|
+
# PyBuilder
|
78
|
+
.pybuilder/
|
79
|
+
target/
|
80
|
+
|
81
|
+
# Jupyter Notebook
|
82
|
+
.ipynb_checkpoints
|
83
|
+
|
84
|
+
# IPython
|
85
|
+
profile_default/
|
86
|
+
ipython_config.py
|
87
|
+
|
88
|
+
# pyenv
|
89
|
+
# For a library or package, you might want to ignore these files since the code is
|
90
|
+
# intended to run in multiple environments; otherwise, check them in:
|
91
|
+
# .python-version
|
92
|
+
|
93
|
+
# pipenv
|
94
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
95
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
96
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
97
|
+
# install all needed dependencies.
|
98
|
+
#Pipfile.lock
|
99
|
+
|
100
|
+
# poetry
|
101
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
102
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
103
|
+
# commonly ignored for libraries.
|
104
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
105
|
+
#poetry.lock
|
106
|
+
|
107
|
+
# pdm
|
108
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
109
|
+
#pdm.lock
|
110
|
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
111
|
+
# in version control.
|
112
|
+
# https://pdm.fming.dev/#use-with-ide
|
113
|
+
.pdm.toml
|
114
|
+
|
115
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
116
|
+
__pypackages__/
|
117
|
+
|
118
|
+
# Celery stuff
|
119
|
+
celerybeat-schedule
|
120
|
+
celerybeat.pid
|
121
|
+
|
122
|
+
# SageMath parsed files
|
123
|
+
*.sage.py
|
124
|
+
|
125
|
+
# Environments
|
126
|
+
.env
|
127
|
+
.venv
|
128
|
+
env/
|
129
|
+
venv/
|
130
|
+
ENV/
|
131
|
+
env.bak/
|
132
|
+
venv.bak/
|
133
|
+
|
134
|
+
# Spyder project settings
|
135
|
+
.spyderproject
|
136
|
+
.spyproject
|
137
|
+
|
138
|
+
# Rope project settings
|
139
|
+
.ropeproject
|
140
|
+
|
141
|
+
# mkdocs documentation
|
142
|
+
/site
|
143
|
+
|
144
|
+
# mypy
|
145
|
+
.mypy_cache/
|
146
|
+
.dmypy.json
|
147
|
+
dmypy.json
|
148
|
+
|
149
|
+
# Pyre type checker
|
150
|
+
.pyre/
|
151
|
+
|
152
|
+
# pytype static type analyzer
|
153
|
+
.pytype/
|
154
|
+
|
155
|
+
# Cython debug symbols
|
156
|
+
cython_debug/
|
157
|
+
|
158
|
+
# PyCharm
|
159
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
160
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
161
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
162
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
163
|
+
.idea/
|
164
|
+
|
165
|
+
node_modules
|
166
|
+
|
167
|
+
credentials.json
|
168
|
+
pyrightconfig.json
|
@@ -1,35 +1,25 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: livekit-plugins-elevenlabs
|
3
|
-
Version: 0.
|
3
|
+
Version: 1.0.0.dev5
|
4
4
|
Summary: Agent Framework plugin for voice synthesis with ElevenLabs' API.
|
5
|
-
Home-page: https://github.com/livekit/agents
|
6
|
-
License: Apache-2.0
|
7
5
|
Project-URL: Documentation, https://docs.livekit.io
|
8
6
|
Project-URL: Website, https://livekit.io/
|
9
7
|
Project-URL: Source, https://github.com/livekit/agents
|
10
|
-
|
8
|
+
Author-email: LiveKit <support@livekit.io>
|
9
|
+
License-Expression: Apache-2.0
|
10
|
+
Keywords: audio,elevenlabs,livekit,realtime,video,webrtc
|
11
11
|
Classifier: Intended Audience :: Developers
|
12
12
|
Classifier: License :: OSI Approved :: Apache Software License
|
13
|
-
Classifier: Topic :: Multimedia :: Sound/Audio
|
14
|
-
Classifier: Topic :: Multimedia :: Video
|
15
|
-
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
16
13
|
Classifier: Programming Language :: Python :: 3
|
14
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
17
15
|
Classifier: Programming Language :: Python :: 3.9
|
18
16
|
Classifier: Programming Language :: Python :: 3.10
|
19
|
-
Classifier:
|
17
|
+
Classifier: Topic :: Multimedia :: Sound/Audio
|
18
|
+
Classifier: Topic :: Multimedia :: Video
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
20
20
|
Requires-Python: >=3.9.0
|
21
|
+
Requires-Dist: livekit-agents[codecs]>=1.0.0.dev5
|
21
22
|
Description-Content-Type: text/markdown
|
22
|
-
Requires-Dist: livekit-agents[codecs]<1.0.0,>=0.12.16
|
23
|
-
Dynamic: classifier
|
24
|
-
Dynamic: description
|
25
|
-
Dynamic: description-content-type
|
26
|
-
Dynamic: home-page
|
27
|
-
Dynamic: keywords
|
28
|
-
Dynamic: license
|
29
|
-
Dynamic: project-url
|
30
|
-
Dynamic: requires-dist
|
31
|
-
Dynamic: requires-python
|
32
|
-
Dynamic: summary
|
33
23
|
|
34
24
|
# LiveKit Plugins Elevenlabs
|
35
25
|
|
@@ -21,9 +21,10 @@ import json
|
|
21
21
|
import os
|
22
22
|
import weakref
|
23
23
|
from dataclasses import dataclass
|
24
|
-
from typing import Any
|
24
|
+
from typing import Any
|
25
25
|
|
26
26
|
import aiohttp
|
27
|
+
|
27
28
|
from livekit.agents import (
|
28
29
|
APIConnectionError,
|
29
30
|
APIConnectOptions,
|
@@ -106,9 +107,9 @@ class TTS(tts.TTS):
|
|
106
107
|
base_url: str | None = None,
|
107
108
|
streaming_latency: int = 0,
|
108
109
|
inactivity_timeout: int = WS_INACTIVITY_TIMEOUT,
|
109
|
-
word_tokenizer:
|
110
|
+
word_tokenizer: tokenize.WordTokenizer | None = None,
|
110
111
|
enable_ssml_parsing: bool = False,
|
111
|
-
chunk_length_schedule: list[int] =
|
112
|
+
chunk_length_schedule: list[int] = None, # range is [50, 500]
|
112
113
|
http_session: aiohttp.ClientSession | None = None,
|
113
114
|
# deprecated
|
114
115
|
model_id: TTSModels | str | None = None,
|
@@ -131,6 +132,8 @@ class TTS(tts.TTS):
|
|
131
132
|
language (str | None): Language code for the TTS model, as of 10/24/24 only valid for "eleven_turbo_v2_5". Optional.
|
132
133
|
"""
|
133
134
|
|
135
|
+
if chunk_length_schedule is None:
|
136
|
+
chunk_length_schedule = [80, 120, 200, 260]
|
134
137
|
super().__init__(
|
135
138
|
capabilities=tts.TTSCapabilities(
|
136
139
|
streaming=True,
|
@@ -171,15 +174,37 @@ class TTS(tts.TTS):
|
|
171
174
|
inactivity_timeout=inactivity_timeout,
|
172
175
|
)
|
173
176
|
self._session = http_session
|
177
|
+
self._pool = utils.ConnectionPool[aiohttp.ClientWebSocketResponse](
|
178
|
+
connect_cb=self._connect_ws,
|
179
|
+
close_cb=self._close_ws,
|
180
|
+
max_session_duration=inactivity_timeout,
|
181
|
+
mark_refreshed_on_get=True,
|
182
|
+
)
|
174
183
|
self._streams = weakref.WeakSet[SynthesizeStream]()
|
175
184
|
|
185
|
+
async def _connect_ws(self) -> aiohttp.ClientWebSocketResponse:
|
186
|
+
session = self._ensure_session()
|
187
|
+
return await asyncio.wait_for(
|
188
|
+
session.ws_connect(
|
189
|
+
_stream_url(self._opts),
|
190
|
+
headers={AUTHORIZATION_HEADER: self._opts.api_key},
|
191
|
+
),
|
192
|
+
self._conn_options.timeout,
|
193
|
+
)
|
194
|
+
|
195
|
+
async def _close_ws(self, ws: aiohttp.ClientWebSocketResponse):
|
196
|
+
await ws.close()
|
197
|
+
|
176
198
|
def _ensure_session(self) -> aiohttp.ClientSession:
|
177
199
|
if not self._session:
|
178
200
|
self._session = utils.http_context.http_session()
|
179
201
|
|
180
202
|
return self._session
|
181
203
|
|
182
|
-
|
204
|
+
def prewarm(self) -> None:
|
205
|
+
self._pool.prewarm()
|
206
|
+
|
207
|
+
async def list_voices(self) -> list[Voice]:
|
183
208
|
async with self._ensure_session().get(
|
184
209
|
f"{self._opts.base_url}/voices",
|
185
210
|
headers={AUTHORIZATION_HEADER: self._opts.api_key},
|
@@ -207,8 +232,8 @@ class TTS(tts.TTS):
|
|
207
232
|
self,
|
208
233
|
text: str,
|
209
234
|
*,
|
210
|
-
conn_options:
|
211
|
-
) ->
|
235
|
+
conn_options: APIConnectOptions | None = None,
|
236
|
+
) -> ChunkedStream:
|
212
237
|
return ChunkedStream(
|
213
238
|
tts=self,
|
214
239
|
input_text=text,
|
@@ -217,15 +242,8 @@ class TTS(tts.TTS):
|
|
217
242
|
session=self._ensure_session(),
|
218
243
|
)
|
219
244
|
|
220
|
-
def stream(
|
221
|
-
self,
|
222
|
-
) -> "SynthesizeStream":
|
223
|
-
stream = SynthesizeStream(
|
224
|
-
tts=self,
|
225
|
-
conn_options=conn_options,
|
226
|
-
opts=self._opts,
|
227
|
-
session=self._ensure_session(),
|
228
|
-
)
|
245
|
+
def stream(self, *, conn_options: APIConnectOptions | None = None) -> SynthesizeStream:
|
246
|
+
stream = SynthesizeStream(tts=self, pool=self._pool, opts=self._opts)
|
229
247
|
self._streams.add(stream)
|
230
248
|
return stream
|
231
249
|
|
@@ -233,6 +251,7 @@ class TTS(tts.TTS):
|
|
233
251
|
for stream in list(self._streams):
|
234
252
|
await stream.aclose()
|
235
253
|
self._streams.clear()
|
254
|
+
await self._pool.aclose()
|
236
255
|
await super().aclose()
|
237
256
|
|
238
257
|
|
@@ -245,7 +264,7 @@ class ChunkedStream(tts.ChunkedStream):
|
|
245
264
|
tts: TTS,
|
246
265
|
input_text: str,
|
247
266
|
opts: _TTSOptions,
|
248
|
-
conn_options:
|
267
|
+
conn_options: APIConnectOptions | None = None,
|
249
268
|
session: aiohttp.ClientSession,
|
250
269
|
) -> None:
|
251
270
|
super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
|
@@ -320,12 +339,11 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
320
339
|
self,
|
321
340
|
*,
|
322
341
|
tts: TTS,
|
323
|
-
|
342
|
+
pool: utils.ConnectionPool[aiohttp.ClientWebSocketResponse],
|
324
343
|
opts: _TTSOptions,
|
325
|
-
conn_options: Optional[APIConnectOptions] = None,
|
326
344
|
):
|
327
|
-
super().__init__(tts=tts
|
328
|
-
self._opts, self.
|
345
|
+
super().__init__(tts=tts)
|
346
|
+
self._opts, self._pool = opts, pool
|
329
347
|
|
330
348
|
async def _run(self) -> None:
|
331
349
|
request_id = utils.shortuuid()
|
@@ -380,147 +398,138 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
380
398
|
word_stream: tokenize.WordStream,
|
381
399
|
request_id: str,
|
382
400
|
) -> None:
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
)
|
387
|
-
|
388
|
-
segment_id = utils.shortuuid()
|
389
|
-
decoder = utils.codecs.AudioStreamDecoder(
|
390
|
-
sample_rate=self._opts.sample_rate,
|
391
|
-
num_channels=1,
|
392
|
-
)
|
393
|
-
|
394
|
-
# 11labs protocol expects the first message to be an "init msg"
|
395
|
-
init_pkt = dict(
|
396
|
-
text=" ",
|
397
|
-
voice_settings=_strip_nones(dataclasses.asdict(self._opts.voice.settings))
|
398
|
-
if self._opts.voice.settings
|
399
|
-
else None,
|
400
|
-
generation_config=dict(
|
401
|
-
chunk_length_schedule=self._opts.chunk_length_schedule
|
402
|
-
),
|
403
|
-
)
|
404
|
-
await ws_conn.send_str(json.dumps(init_pkt))
|
405
|
-
eos_sent = False
|
406
|
-
|
407
|
-
@utils.log_exceptions(logger=logger)
|
408
|
-
async def send_task():
|
409
|
-
nonlocal eos_sent
|
410
|
-
xml_content = []
|
411
|
-
async for data in word_stream:
|
412
|
-
text = data.token
|
413
|
-
# send the xml phoneme in one go
|
414
|
-
if (
|
415
|
-
self._opts.enable_ssml_parsing
|
416
|
-
and data.token.startswith("<phoneme")
|
417
|
-
or xml_content
|
418
|
-
):
|
419
|
-
xml_content.append(text)
|
420
|
-
if data.token.find("</phoneme>") > -1:
|
421
|
-
text = self._opts.word_tokenizer.format_words(xml_content)
|
422
|
-
xml_content = []
|
423
|
-
else:
|
424
|
-
continue
|
425
|
-
|
426
|
-
data_pkt = dict(text=f"{text} ") # must always end with a space
|
427
|
-
self._mark_started()
|
428
|
-
await ws_conn.send_str(json.dumps(data_pkt))
|
429
|
-
if xml_content:
|
430
|
-
logger.warning("11labs stream ended with incomplete xml content")
|
431
|
-
|
432
|
-
# no more token, mark eos
|
433
|
-
eos_pkt = dict(text="")
|
434
|
-
await ws_conn.send_str(json.dumps(eos_pkt))
|
435
|
-
eos_sent = True
|
401
|
+
async with self._pool.connection() as ws_conn:
|
402
|
+
segment_id = utils.shortuuid()
|
403
|
+
expected_text = "" # accumulate all tokens sent
|
436
404
|
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
emitter = tts.SynthesizedAudioEmitter(
|
441
|
-
event_ch=self._event_ch,
|
442
|
-
request_id=request_id,
|
443
|
-
segment_id=segment_id,
|
405
|
+
decoder = utils.codecs.AudioStreamDecoder(
|
406
|
+
sample_rate=self._opts.sample_rate,
|
407
|
+
num_channels=1,
|
444
408
|
)
|
445
|
-
async for frame in decoder:
|
446
|
-
emitter.push(frame)
|
447
|
-
emitter.flush()
|
448
409
|
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
410
|
+
# 11labs protocol expects the first message to be an "init msg"
|
411
|
+
init_pkt = {
|
412
|
+
"text": " ",
|
413
|
+
"voice_settings": _strip_nones(dataclasses.asdict(self._opts.voice.settings))
|
414
|
+
if self._opts.voice.settings
|
415
|
+
else None,
|
416
|
+
"generation_config": {"chunk_length_schedule": self._opts.chunk_length_schedule},
|
417
|
+
}
|
418
|
+
await ws_conn.send_str(json.dumps(init_pkt))
|
419
|
+
|
420
|
+
@utils.log_exceptions(logger=logger)
|
421
|
+
async def send_task():
|
422
|
+
nonlocal expected_text
|
423
|
+
xml_content = []
|
424
|
+
async for data in word_stream:
|
425
|
+
text = data.token
|
426
|
+
expected_text += text
|
427
|
+
# send the xml phoneme in one go
|
428
|
+
if (
|
429
|
+
self._opts.enable_ssml_parsing
|
430
|
+
and data.token.startswith("<phoneme")
|
431
|
+
or xml_content
|
432
|
+
):
|
433
|
+
xml_content.append(text)
|
434
|
+
if text.find("</phoneme>") > -1:
|
435
|
+
text = self._opts.word_tokenizer.format_words(xml_content)
|
436
|
+
xml_content = []
|
437
|
+
else:
|
438
|
+
continue
|
439
|
+
|
440
|
+
data_pkt = {"text": f"{text} "} # must always end with a space
|
441
|
+
self._mark_started()
|
442
|
+
await ws_conn.send_str(json.dumps(data_pkt))
|
443
|
+
if xml_content:
|
444
|
+
logger.warning("11labs stream ended with incomplete xml content")
|
445
|
+
await ws_conn.send_str(json.dumps({"flush": True}))
|
446
|
+
|
447
|
+
# consumes from decoder and generates events
|
448
|
+
@utils.log_exceptions(logger=logger)
|
449
|
+
async def generate_task():
|
450
|
+
emitter = tts.SynthesizedAudioEmitter(
|
451
|
+
event_ch=self._event_ch,
|
452
|
+
request_id=request_id,
|
453
|
+
segment_id=segment_id,
|
454
|
+
)
|
455
|
+
async for frame in decoder:
|
456
|
+
emitter.push(frame)
|
457
|
+
emitter.flush()
|
458
|
+
|
459
|
+
# receives from ws and decodes audio
|
460
|
+
@utils.log_exceptions(logger=logger)
|
461
|
+
async def recv_task():
|
462
|
+
nonlocal expected_text
|
463
|
+
received_text = ""
|
464
|
+
|
465
|
+
while True:
|
466
|
+
msg = await ws_conn.receive()
|
467
|
+
if msg.type in (
|
468
|
+
aiohttp.WSMsgType.CLOSED,
|
469
|
+
aiohttp.WSMsgType.CLOSE,
|
470
|
+
aiohttp.WSMsgType.CLOSING,
|
471
|
+
):
|
462
472
|
raise APIStatusError(
|
463
473
|
"11labs connection closed unexpectedly, not all tokens have been consumed",
|
464
474
|
request_id=request_id,
|
465
475
|
)
|
466
|
-
return
|
467
476
|
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
data = json.loads(msg.data)
|
473
|
-
if data.get("audio"):
|
474
|
-
b64data = base64.b64decode(data["audio"])
|
475
|
-
decoder.push(b64data)
|
476
|
-
|
477
|
-
elif data.get("isFinal"):
|
478
|
-
decoder.end_input()
|
479
|
-
break
|
480
|
-
elif data.get("error"):
|
481
|
-
raise APIStatusError(
|
482
|
-
message=data["error"],
|
483
|
-
status_code=500,
|
484
|
-
request_id=request_id,
|
485
|
-
body=None,
|
486
|
-
)
|
487
|
-
else:
|
488
|
-
raise APIStatusError(
|
489
|
-
message=f"unexpected 11labs message {data}",
|
490
|
-
status_code=500,
|
491
|
-
request_id=request_id,
|
492
|
-
body=None,
|
493
|
-
)
|
477
|
+
if msg.type != aiohttp.WSMsgType.TEXT:
|
478
|
+
logger.warning("unexpected 11labs message type %s", msg.type)
|
479
|
+
continue
|
494
480
|
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
481
|
+
data = json.loads(msg.data)
|
482
|
+
if data.get("audio"):
|
483
|
+
b64data = base64.b64decode(data["audio"])
|
484
|
+
decoder.push(b64data)
|
485
|
+
|
486
|
+
if alignment := data.get("normalizedAlignment"):
|
487
|
+
received_text += "".join(alignment.get("chars", [])).replace(" ", "")
|
488
|
+
if received_text == expected_text:
|
489
|
+
decoder.end_input()
|
490
|
+
break
|
491
|
+
elif data.get("error"):
|
492
|
+
raise APIStatusError(
|
493
|
+
message=data["error"],
|
494
|
+
status_code=500,
|
495
|
+
request_id=request_id,
|
496
|
+
body=None,
|
497
|
+
)
|
498
|
+
else:
|
499
|
+
raise APIStatusError(
|
500
|
+
message=f"unexpected 11labs message {data}",
|
501
|
+
status_code=500,
|
502
|
+
request_id=request_id,
|
503
|
+
body=None,
|
504
|
+
)
|
505
|
+
|
506
|
+
tasks = [
|
507
|
+
asyncio.create_task(send_task()),
|
508
|
+
asyncio.create_task(recv_task()),
|
509
|
+
asyncio.create_task(generate_task()),
|
510
|
+
]
|
511
|
+
try:
|
512
|
+
await asyncio.gather(*tasks)
|
513
|
+
except asyncio.TimeoutError as e:
|
514
|
+
raise APITimeoutError() from e
|
515
|
+
except aiohttp.ClientResponseError as e:
|
516
|
+
raise APIStatusError(
|
517
|
+
message=e.message,
|
518
|
+
status_code=e.status,
|
519
|
+
request_id=request_id,
|
520
|
+
body=None,
|
521
|
+
) from e
|
522
|
+
except APIStatusError:
|
523
|
+
raise
|
524
|
+
except Exception as e:
|
525
|
+
raise APIConnectionError() from e
|
526
|
+
finally:
|
527
|
+
await utils.aio.gracefully_cancel(*tasks)
|
528
|
+
await decoder.aclose()
|
520
529
|
|
521
530
|
|
522
531
|
def _dict_to_voices_list(data: dict[str, Any]):
|
523
|
-
voices:
|
532
|
+
voices: list[Voice] = []
|
524
533
|
for voice in data["voices"]:
|
525
534
|
voices.append(
|
526
535
|
Voice(
|
@@ -0,0 +1,39 @@
|
|
1
|
+
[build-system]
|
2
|
+
requires = ["hatchling"]
|
3
|
+
build-backend = "hatchling.build"
|
4
|
+
|
5
|
+
[project]
|
6
|
+
name = "livekit-plugins-elevenlabs"
|
7
|
+
dynamic = ["version"]
|
8
|
+
description = "Agent Framework plugin for voice synthesis with ElevenLabs' API."
|
9
|
+
readme = "README.md"
|
10
|
+
license = "Apache-2.0"
|
11
|
+
requires-python = ">=3.9.0"
|
12
|
+
authors = [{ name = "LiveKit", email = "support@livekit.io" }]
|
13
|
+
keywords = ["webrtc", "realtime", "audio", "video", "livekit", "elevenlabs"]
|
14
|
+
classifiers = [
|
15
|
+
"Intended Audience :: Developers",
|
16
|
+
"License :: OSI Approved :: Apache Software License",
|
17
|
+
"Topic :: Multimedia :: Sound/Audio",
|
18
|
+
"Topic :: Multimedia :: Video",
|
19
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
20
|
+
"Programming Language :: Python :: 3",
|
21
|
+
"Programming Language :: Python :: 3.9",
|
22
|
+
"Programming Language :: Python :: 3.10",
|
23
|
+
"Programming Language :: Python :: 3 :: Only",
|
24
|
+
]
|
25
|
+
dependencies = ["livekit-agents[codecs]>=1.0.0.dev5"]
|
26
|
+
|
27
|
+
[project.urls]
|
28
|
+
Documentation = "https://docs.livekit.io"
|
29
|
+
Website = "https://livekit.io/"
|
30
|
+
Source = "https://github.com/livekit/agents"
|
31
|
+
|
32
|
+
[tool.hatch.version]
|
33
|
+
path = "livekit/plugins/elevenlabs/version.py"
|
34
|
+
|
35
|
+
[tool.hatch.build.targets.wheel]
|
36
|
+
packages = ["livekit"]
|
37
|
+
|
38
|
+
[tool.hatch.build.targets.sdist]
|
39
|
+
include = ["/livekit"]
|
@@ -1,46 +0,0 @@
|
|
1
|
-
Metadata-Version: 2.2
|
2
|
-
Name: livekit-plugins-elevenlabs
|
3
|
-
Version: 0.8.1
|
4
|
-
Summary: Agent Framework plugin for voice synthesis with ElevenLabs' API.
|
5
|
-
Home-page: https://github.com/livekit/agents
|
6
|
-
License: Apache-2.0
|
7
|
-
Project-URL: Documentation, https://docs.livekit.io
|
8
|
-
Project-URL: Website, https://livekit.io/
|
9
|
-
Project-URL: Source, https://github.com/livekit/agents
|
10
|
-
Keywords: webrtc,realtime,audio,video,livekit,elevenlabs
|
11
|
-
Classifier: Intended Audience :: Developers
|
12
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
13
|
-
Classifier: Topic :: Multimedia :: Sound/Audio
|
14
|
-
Classifier: Topic :: Multimedia :: Video
|
15
|
-
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
16
|
-
Classifier: Programming Language :: Python :: 3
|
17
|
-
Classifier: Programming Language :: Python :: 3.9
|
18
|
-
Classifier: Programming Language :: Python :: 3.10
|
19
|
-
Classifier: Programming Language :: Python :: 3 :: Only
|
20
|
-
Requires-Python: >=3.9.0
|
21
|
-
Description-Content-Type: text/markdown
|
22
|
-
Requires-Dist: livekit-agents[codecs]<1.0.0,>=0.12.16
|
23
|
-
Dynamic: classifier
|
24
|
-
Dynamic: description
|
25
|
-
Dynamic: description-content-type
|
26
|
-
Dynamic: home-page
|
27
|
-
Dynamic: keywords
|
28
|
-
Dynamic: license
|
29
|
-
Dynamic: project-url
|
30
|
-
Dynamic: requires-dist
|
31
|
-
Dynamic: requires-python
|
32
|
-
Dynamic: summary
|
33
|
-
|
34
|
-
# LiveKit Plugins Elevenlabs
|
35
|
-
|
36
|
-
Agent Framework plugin for voice synthesis with [ElevenLabs](https://elevenlabs.io/) API.
|
37
|
-
|
38
|
-
## Installation
|
39
|
-
|
40
|
-
```bash
|
41
|
-
pip install livekit-plugins-elevenlabs
|
42
|
-
```
|
43
|
-
|
44
|
-
## Pre-requisites
|
45
|
-
|
46
|
-
You'll need an API key from ElevenLabs. It can be set as an environment variable: `ELEVEN_API_KEY`
|
@@ -1,14 +0,0 @@
|
|
1
|
-
README.md
|
2
|
-
pyproject.toml
|
3
|
-
setup.py
|
4
|
-
livekit/plugins/elevenlabs/__init__.py
|
5
|
-
livekit/plugins/elevenlabs/log.py
|
6
|
-
livekit/plugins/elevenlabs/models.py
|
7
|
-
livekit/plugins/elevenlabs/py.typed
|
8
|
-
livekit/plugins/elevenlabs/tts.py
|
9
|
-
livekit/plugins/elevenlabs/version.py
|
10
|
-
livekit_plugins_elevenlabs.egg-info/PKG-INFO
|
11
|
-
livekit_plugins_elevenlabs.egg-info/SOURCES.txt
|
12
|
-
livekit_plugins_elevenlabs.egg-info/dependency_links.txt
|
13
|
-
livekit_plugins_elevenlabs.egg-info/requires.txt
|
14
|
-
livekit_plugins_elevenlabs.egg-info/top_level.txt
|
@@ -1 +0,0 @@
|
|
1
|
-
|
@@ -1 +0,0 @@
|
|
1
|
-
livekit-agents[codecs]<1.0.0,>=0.12.16
|
@@ -1 +0,0 @@
|
|
1
|
-
livekit
|
@@ -1,59 +0,0 @@
|
|
1
|
-
# Copyright 2023 LiveKit, Inc.
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
|
15
|
-
import os
|
16
|
-
import pathlib
|
17
|
-
|
18
|
-
import setuptools
|
19
|
-
import setuptools.command.build_py
|
20
|
-
|
21
|
-
here = pathlib.Path(__file__).parent.resolve()
|
22
|
-
about = {}
|
23
|
-
with open(
|
24
|
-
os.path.join(here, "livekit", "plugins", "elevenlabs", "version.py"), "r"
|
25
|
-
) as f:
|
26
|
-
exec(f.read(), about)
|
27
|
-
|
28
|
-
|
29
|
-
setuptools.setup(
|
30
|
-
name="livekit-plugins-elevenlabs",
|
31
|
-
version=about["__version__"],
|
32
|
-
description="Agent Framework plugin for voice synthesis with ElevenLabs' API.",
|
33
|
-
long_description=(here / "README.md").read_text(encoding="utf-8"),
|
34
|
-
long_description_content_type="text/markdown",
|
35
|
-
url="https://github.com/livekit/agents",
|
36
|
-
cmdclass={},
|
37
|
-
classifiers=[
|
38
|
-
"Intended Audience :: Developers",
|
39
|
-
"License :: OSI Approved :: Apache Software License",
|
40
|
-
"Topic :: Multimedia :: Sound/Audio",
|
41
|
-
"Topic :: Multimedia :: Video",
|
42
|
-
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
43
|
-
"Programming Language :: Python :: 3",
|
44
|
-
"Programming Language :: Python :: 3.9",
|
45
|
-
"Programming Language :: Python :: 3.10",
|
46
|
-
"Programming Language :: Python :: 3 :: Only",
|
47
|
-
],
|
48
|
-
keywords=["webrtc", "realtime", "audio", "video", "livekit", "elevenlabs"],
|
49
|
-
license="Apache-2.0",
|
50
|
-
packages=setuptools.find_namespace_packages(include=["livekit.*"]),
|
51
|
-
python_requires=">=3.9.0",
|
52
|
-
install_requires=["livekit-agents[codecs]>=0.12.16,<1.0.0"],
|
53
|
-
package_data={"livekit.plugins.elevenlabs": ["py.typed"]},
|
54
|
-
project_urls={
|
55
|
-
"Documentation": "https://docs.livekit.io",
|
56
|
-
"Website": "https://livekit.io/",
|
57
|
-
"Source": "https://github.com/livekit/agents",
|
58
|
-
},
|
59
|
-
)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|