livekit-plugins-elevenlabs 0.8.1__tar.gz → 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit_plugins_elevenlabs-1.0.0/.gitignore +168 -0
- {livekit_plugins_elevenlabs-0.8.1 → livekit_plugins_elevenlabs-1.0.0}/PKG-INFO +10 -20
- {livekit_plugins_elevenlabs-0.8.1 → livekit_plugins_elevenlabs-1.0.0}/livekit/plugins/elevenlabs/__init__.py +2 -2
- {livekit_plugins_elevenlabs-0.8.1 → livekit_plugins_elevenlabs-1.0.0}/livekit/plugins/elevenlabs/models.py +9 -1
- {livekit_plugins_elevenlabs-0.8.1 → livekit_plugins_elevenlabs-1.0.0}/livekit/plugins/elevenlabs/tts.py +95 -92
- {livekit_plugins_elevenlabs-0.8.1 → livekit_plugins_elevenlabs-1.0.0}/livekit/plugins/elevenlabs/version.py +1 -1
- livekit_plugins_elevenlabs-1.0.0/pyproject.toml +39 -0
- livekit_plugins_elevenlabs-0.8.1/livekit_plugins_elevenlabs.egg-info/PKG-INFO +0 -46
- livekit_plugins_elevenlabs-0.8.1/livekit_plugins_elevenlabs.egg-info/SOURCES.txt +0 -14
- livekit_plugins_elevenlabs-0.8.1/livekit_plugins_elevenlabs.egg-info/dependency_links.txt +0 -1
- livekit_plugins_elevenlabs-0.8.1/livekit_plugins_elevenlabs.egg-info/requires.txt +0 -1
- livekit_plugins_elevenlabs-0.8.1/livekit_plugins_elevenlabs.egg-info/top_level.txt +0 -1
- livekit_plugins_elevenlabs-0.8.1/pyproject.toml +0 -3
- livekit_plugins_elevenlabs-0.8.1/setup.cfg +0 -4
- livekit_plugins_elevenlabs-0.8.1/setup.py +0 -59
- {livekit_plugins_elevenlabs-0.8.1 → livekit_plugins_elevenlabs-1.0.0}/README.md +0 -0
- {livekit_plugins_elevenlabs-0.8.1 → livekit_plugins_elevenlabs-1.0.0}/livekit/plugins/elevenlabs/log.py +0 -0
- {livekit_plugins_elevenlabs-0.8.1 → livekit_plugins_elevenlabs-1.0.0}/livekit/plugins/elevenlabs/py.typed +0 -0
@@ -0,0 +1,168 @@
|
|
1
|
+
**/.vscode
|
2
|
+
**/.DS_Store
|
3
|
+
|
4
|
+
# Byte-compiled / optimized / DLL files
|
5
|
+
__pycache__/
|
6
|
+
*.py[cod]
|
7
|
+
*$py.class
|
8
|
+
|
9
|
+
# C extensions
|
10
|
+
*.so
|
11
|
+
|
12
|
+
# Distribution / packaging
|
13
|
+
.Python
|
14
|
+
build/
|
15
|
+
develop-eggs/
|
16
|
+
dist/
|
17
|
+
downloads/
|
18
|
+
eggs/
|
19
|
+
.eggs/
|
20
|
+
lib/
|
21
|
+
lib64/
|
22
|
+
parts/
|
23
|
+
sdist/
|
24
|
+
var/
|
25
|
+
wheels/
|
26
|
+
share/python-wheels/
|
27
|
+
*.egg-info/
|
28
|
+
.installed.cfg
|
29
|
+
*.egg
|
30
|
+
MANIFEST
|
31
|
+
|
32
|
+
# PyInstaller
|
33
|
+
# Usually these files are written by a python script from a template
|
34
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
35
|
+
*.manifest
|
36
|
+
*.spec
|
37
|
+
|
38
|
+
# Installer logs
|
39
|
+
pip-log.txt
|
40
|
+
pip-delete-this-directory.txt
|
41
|
+
|
42
|
+
# Unit test / coverage reports
|
43
|
+
htmlcov/
|
44
|
+
.tox/
|
45
|
+
.nox/
|
46
|
+
.coverage
|
47
|
+
.coverage.*
|
48
|
+
.cache
|
49
|
+
nosetests.xml
|
50
|
+
coverage.xml
|
51
|
+
*.cover
|
52
|
+
*.py,cover
|
53
|
+
.hypothesis/
|
54
|
+
.pytest_cache/
|
55
|
+
cover/
|
56
|
+
|
57
|
+
# Translations
|
58
|
+
*.mo
|
59
|
+
*.pot
|
60
|
+
|
61
|
+
# Django stuff:
|
62
|
+
*.log
|
63
|
+
local_settings.py
|
64
|
+
db.sqlite3
|
65
|
+
db.sqlite3-journal
|
66
|
+
|
67
|
+
# Flask stuff:
|
68
|
+
instance/
|
69
|
+
.webassets-cache
|
70
|
+
|
71
|
+
# Scrapy stuff:
|
72
|
+
.scrapy
|
73
|
+
|
74
|
+
# Sphinx documentation
|
75
|
+
docs/_build/
|
76
|
+
|
77
|
+
# PyBuilder
|
78
|
+
.pybuilder/
|
79
|
+
target/
|
80
|
+
|
81
|
+
# Jupyter Notebook
|
82
|
+
.ipynb_checkpoints
|
83
|
+
|
84
|
+
# IPython
|
85
|
+
profile_default/
|
86
|
+
ipython_config.py
|
87
|
+
|
88
|
+
# pyenv
|
89
|
+
# For a library or package, you might want to ignore these files since the code is
|
90
|
+
# intended to run in multiple environments; otherwise, check them in:
|
91
|
+
# .python-version
|
92
|
+
|
93
|
+
# pipenv
|
94
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
95
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
96
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
97
|
+
# install all needed dependencies.
|
98
|
+
#Pipfile.lock
|
99
|
+
|
100
|
+
# poetry
|
101
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
102
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
103
|
+
# commonly ignored for libraries.
|
104
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
105
|
+
#poetry.lock
|
106
|
+
|
107
|
+
# pdm
|
108
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
109
|
+
#pdm.lock
|
110
|
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
111
|
+
# in version control.
|
112
|
+
# https://pdm.fming.dev/#use-with-ide
|
113
|
+
.pdm.toml
|
114
|
+
|
115
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
116
|
+
__pypackages__/
|
117
|
+
|
118
|
+
# Celery stuff
|
119
|
+
celerybeat-schedule
|
120
|
+
celerybeat.pid
|
121
|
+
|
122
|
+
# SageMath parsed files
|
123
|
+
*.sage.py
|
124
|
+
|
125
|
+
# Environments
|
126
|
+
.env
|
127
|
+
.venv
|
128
|
+
env/
|
129
|
+
venv/
|
130
|
+
ENV/
|
131
|
+
env.bak/
|
132
|
+
venv.bak/
|
133
|
+
|
134
|
+
# Spyder project settings
|
135
|
+
.spyderproject
|
136
|
+
.spyproject
|
137
|
+
|
138
|
+
# Rope project settings
|
139
|
+
.ropeproject
|
140
|
+
|
141
|
+
# mkdocs documentation
|
142
|
+
/site
|
143
|
+
|
144
|
+
# mypy
|
145
|
+
.mypy_cache/
|
146
|
+
.dmypy.json
|
147
|
+
dmypy.json
|
148
|
+
|
149
|
+
# Pyre type checker
|
150
|
+
.pyre/
|
151
|
+
|
152
|
+
# pytype static type analyzer
|
153
|
+
.pytype/
|
154
|
+
|
155
|
+
# Cython debug symbols
|
156
|
+
cython_debug/
|
157
|
+
|
158
|
+
# PyCharm
|
159
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
160
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
161
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
162
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
163
|
+
.idea/
|
164
|
+
|
165
|
+
node_modules
|
166
|
+
|
167
|
+
credentials.json
|
168
|
+
pyrightconfig.json
|
@@ -1,35 +1,25 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: livekit-plugins-elevenlabs
|
3
|
-
Version: 0.
|
3
|
+
Version: 1.0.0
|
4
4
|
Summary: Agent Framework plugin for voice synthesis with ElevenLabs' API.
|
5
|
-
Home-page: https://github.com/livekit/agents
|
6
|
-
License: Apache-2.0
|
7
5
|
Project-URL: Documentation, https://docs.livekit.io
|
8
6
|
Project-URL: Website, https://livekit.io/
|
9
7
|
Project-URL: Source, https://github.com/livekit/agents
|
10
|
-
|
8
|
+
Author-email: LiveKit <hello@livekit.io>
|
9
|
+
License-Expression: Apache-2.0
|
10
|
+
Keywords: audio,elevenlabs,livekit,realtime,video,webrtc
|
11
11
|
Classifier: Intended Audience :: Developers
|
12
12
|
Classifier: License :: OSI Approved :: Apache Software License
|
13
|
-
Classifier: Topic :: Multimedia :: Sound/Audio
|
14
|
-
Classifier: Topic :: Multimedia :: Video
|
15
|
-
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
16
13
|
Classifier: Programming Language :: Python :: 3
|
14
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
17
15
|
Classifier: Programming Language :: Python :: 3.9
|
18
16
|
Classifier: Programming Language :: Python :: 3.10
|
19
|
-
Classifier:
|
17
|
+
Classifier: Topic :: Multimedia :: Sound/Audio
|
18
|
+
Classifier: Topic :: Multimedia :: Video
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
20
20
|
Requires-Python: >=3.9.0
|
21
|
+
Requires-Dist: livekit-agents[codecs]>=1.0.0
|
21
22
|
Description-Content-Type: text/markdown
|
22
|
-
Requires-Dist: livekit-agents[codecs]<1.0.0,>=0.12.16
|
23
|
-
Dynamic: classifier
|
24
|
-
Dynamic: description
|
25
|
-
Dynamic: description-content-type
|
26
|
-
Dynamic: home-page
|
27
|
-
Dynamic: keywords
|
28
|
-
Dynamic: license
|
29
|
-
Dynamic: project-url
|
30
|
-
Dynamic: requires-dist
|
31
|
-
Dynamic: requires-python
|
32
|
-
Dynamic: summary
|
33
23
|
|
34
24
|
# LiveKit Plugins Elevenlabs
|
35
25
|
|
@@ -13,7 +13,7 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
15
|
from .models import TTSEncoding, TTSModels
|
16
|
-
from .tts import
|
16
|
+
from .tts import DEFAULT_VOICE_ID, TTS, Voice, VoiceSettings
|
17
17
|
from .version import __version__
|
18
18
|
|
19
19
|
__all__ = [
|
@@ -22,7 +22,7 @@ __all__ = [
|
|
22
22
|
"VoiceSettings",
|
23
23
|
"TTSEncoding",
|
24
24
|
"TTSModels",
|
25
|
-
"
|
25
|
+
"DEFAULT_VOICE_ID",
|
26
26
|
"__version__",
|
27
27
|
]
|
28
28
|
|
@@ -10,4 +10,12 @@ TTSModels = Literal[
|
|
10
10
|
"eleven_flash_v2",
|
11
11
|
]
|
12
12
|
|
13
|
-
TTSEncoding = Literal[
|
13
|
+
TTSEncoding = Literal[
|
14
|
+
"mp3_22050_32",
|
15
|
+
"mp3_44100",
|
16
|
+
"mp3_44100_32",
|
17
|
+
"mp3_44100_64",
|
18
|
+
"mp3_44100_96",
|
19
|
+
"mp3_44100_128",
|
20
|
+
"mp3_44100_192",
|
21
|
+
]
|
@@ -21,9 +21,10 @@ import json
|
|
21
21
|
import os
|
22
22
|
import weakref
|
23
23
|
from dataclasses import dataclass
|
24
|
-
from typing import Any
|
24
|
+
from typing import Any
|
25
25
|
|
26
26
|
import aiohttp
|
27
|
+
|
27
28
|
from livekit.agents import (
|
28
29
|
APIConnectionError,
|
29
30
|
APIConnectOptions,
|
@@ -33,11 +34,19 @@ from livekit.agents import (
|
|
33
34
|
tts,
|
34
35
|
utils,
|
35
36
|
)
|
37
|
+
from livekit.agents.types import (
|
38
|
+
DEFAULT_API_CONNECT_OPTIONS,
|
39
|
+
NOT_GIVEN,
|
40
|
+
NotGivenOr,
|
41
|
+
)
|
42
|
+
from livekit.agents.utils import is_given
|
36
43
|
|
37
44
|
from .log import logger
|
38
45
|
from .models import TTSEncoding, TTSModels
|
39
46
|
|
40
|
-
|
47
|
+
# by default, use 22.05kHz sample rate at 32kbps
|
48
|
+
# in our testing, reduce TTFB by about ~110ms
|
49
|
+
_DefaultEncoding: TTSEncoding = "mp3_22050_32"
|
41
50
|
|
42
51
|
|
43
52
|
def _sample_rate_from_format(output_format: TTSEncoding) -> int:
|
@@ -49,9 +58,9 @@ def _sample_rate_from_format(output_format: TTSEncoding) -> int:
|
|
49
58
|
class VoiceSettings:
|
50
59
|
stability: float # [0.0 - 1.0]
|
51
60
|
similarity_boost: float # [0.0 - 1.0]
|
52
|
-
style: float
|
53
|
-
speed: float
|
54
|
-
use_speaker_boost: bool
|
61
|
+
style: NotGivenOr[float] = NOT_GIVEN # [0.0 - 1.0]
|
62
|
+
speed: NotGivenOr[float] = NOT_GIVEN # [0.8 - 1.2]
|
63
|
+
use_speaker_boost: NotGivenOr[bool] = NOT_GIVEN
|
55
64
|
|
56
65
|
|
57
66
|
@dataclass
|
@@ -59,22 +68,9 @@ class Voice:
|
|
59
68
|
id: str
|
60
69
|
name: str
|
61
70
|
category: str
|
62
|
-
settings: VoiceSettings | None = None
|
63
|
-
|
64
|
-
|
65
|
-
DEFAULT_VOICE = Voice(
|
66
|
-
id="EXAVITQu4vr4xnSDxMaL",
|
67
|
-
name="Bella",
|
68
|
-
category="premade",
|
69
|
-
settings=VoiceSettings(
|
70
|
-
stability=0.71,
|
71
|
-
speed=1.0,
|
72
|
-
similarity_boost=0.5,
|
73
|
-
style=0.0,
|
74
|
-
use_speaker_boost=True,
|
75
|
-
),
|
76
|
-
)
|
77
71
|
|
72
|
+
|
73
|
+
DEFAULT_VOICE_ID = "EXAVITQu4vr4xnSDxMaL"
|
78
74
|
API_BASE_URL_V1 = "https://api.elevenlabs.io/v1"
|
79
75
|
AUTHORIZATION_HEADER = "xi-api-key"
|
80
76
|
WS_INACTIVITY_TIMEOUT = 300
|
@@ -83,13 +79,14 @@ WS_INACTIVITY_TIMEOUT = 300
|
|
83
79
|
@dataclass
|
84
80
|
class _TTSOptions:
|
85
81
|
api_key: str
|
86
|
-
|
82
|
+
voice_id: str
|
83
|
+
voice_settings: NotGivenOr[VoiceSettings]
|
87
84
|
model: TTSModels | str
|
88
|
-
language: str
|
85
|
+
language: NotGivenOr[str]
|
89
86
|
base_url: str
|
90
87
|
encoding: TTSEncoding
|
91
88
|
sample_rate: int
|
92
|
-
streaming_latency: int
|
89
|
+
streaming_latency: NotGivenOr[int]
|
93
90
|
word_tokenizer: tokenize.WordTokenizer
|
94
91
|
chunk_length_schedule: list[int]
|
95
92
|
enable_ssml_parsing: bool
|
@@ -100,68 +97,70 @@ class TTS(tts.TTS):
|
|
100
97
|
def __init__(
|
101
98
|
self,
|
102
99
|
*,
|
103
|
-
|
100
|
+
voice_id: str = DEFAULT_VOICE_ID,
|
101
|
+
voice_settings: NotGivenOr[VoiceSettings] = NOT_GIVEN,
|
104
102
|
model: TTSModels | str = "eleven_flash_v2_5",
|
105
|
-
|
106
|
-
|
107
|
-
|
103
|
+
encoding: NotGivenOr[TTSEncoding] = NOT_GIVEN,
|
104
|
+
api_key: NotGivenOr[str] = NOT_GIVEN,
|
105
|
+
base_url: NotGivenOr[str] = NOT_GIVEN,
|
106
|
+
streaming_latency: NotGivenOr[int] = NOT_GIVEN,
|
108
107
|
inactivity_timeout: int = WS_INACTIVITY_TIMEOUT,
|
109
|
-
word_tokenizer:
|
108
|
+
word_tokenizer: NotGivenOr[tokenize.WordTokenizer] = NOT_GIVEN,
|
110
109
|
enable_ssml_parsing: bool = False,
|
111
|
-
chunk_length_schedule: list[int] =
|
110
|
+
chunk_length_schedule: NotGivenOr[list[int]] = NOT_GIVEN, # range is [50, 500]
|
112
111
|
http_session: aiohttp.ClientSession | None = None,
|
113
|
-
|
114
|
-
model_id: TTSModels | str | None = None,
|
115
|
-
language: str | None = None,
|
112
|
+
language: NotGivenOr[str] = NOT_GIVEN,
|
116
113
|
) -> None:
|
117
114
|
"""
|
118
115
|
Create a new instance of ElevenLabs TTS.
|
119
116
|
|
120
117
|
Args:
|
121
|
-
|
118
|
+
voice_id (str): Voice ID. Defaults to `DEFAULT_VOICE_ID`.
|
119
|
+
voice_settings (NotGivenOr[VoiceSettings]): Voice settings.
|
122
120
|
model (TTSModels | str): TTS model to use. Defaults to "eleven_turbo_v2_5".
|
123
|
-
api_key (str
|
124
|
-
base_url (str
|
125
|
-
streaming_latency (int): Optimize for streaming latency, defaults to 0 - disabled. 4 for max latency optimizations. deprecated
|
121
|
+
api_key (NotGivenOr[str]): ElevenLabs API key. Can be set via argument or `ELEVEN_API_KEY` environment variable.
|
122
|
+
base_url (NotGivenOr[str]): Custom base URL for the API. Optional.
|
123
|
+
streaming_latency (NotGivenOr[int]): Optimize for streaming latency, defaults to 0 - disabled. 4 for max latency optimizations. deprecated
|
126
124
|
inactivity_timeout (int): Inactivity timeout in seconds for the websocket connection. Defaults to 300.
|
127
|
-
word_tokenizer (tokenize.WordTokenizer): Tokenizer for processing text. Defaults to basic WordTokenizer.
|
125
|
+
word_tokenizer (NotGivenOr[tokenize.WordTokenizer]): Tokenizer for processing text. Defaults to basic WordTokenizer.
|
128
126
|
enable_ssml_parsing (bool): Enable SSML parsing for input text. Defaults to False.
|
129
|
-
chunk_length_schedule (list[int]): Schedule for chunk lengths, ranging from 50 to 500. Defaults to [80, 120, 200, 260].
|
127
|
+
chunk_length_schedule (NotGivenOr[list[int]]): Schedule for chunk lengths, ranging from 50 to 500. Defaults to [80, 120, 200, 260].
|
130
128
|
http_session (aiohttp.ClientSession | None): Custom HTTP session for API requests. Optional.
|
131
|
-
language (str
|
132
|
-
"""
|
129
|
+
language (NotGivenOr[str]): Language code for the TTS model, as of 10/24/24 only valid for "eleven_turbo_v2_5".
|
130
|
+
""" # noqa: E501
|
131
|
+
|
132
|
+
if not is_given(chunk_length_schedule):
|
133
|
+
chunk_length_schedule = [80, 120, 200, 260]
|
134
|
+
|
135
|
+
if not is_given(encoding):
|
136
|
+
encoding = _DefaultEncoding
|
133
137
|
|
134
138
|
super().__init__(
|
135
139
|
capabilities=tts.TTSCapabilities(
|
136
140
|
streaming=True,
|
137
141
|
),
|
138
|
-
sample_rate=_sample_rate_from_format(
|
142
|
+
sample_rate=_sample_rate_from_format(encoding),
|
139
143
|
num_channels=1,
|
140
144
|
)
|
141
145
|
|
142
|
-
if
|
143
|
-
|
144
|
-
"model_id is deprecated and will be removed in 1.5.0, use model instead",
|
145
|
-
)
|
146
|
-
model = model_id
|
147
|
-
|
148
|
-
api_key = api_key or os.environ.get("ELEVEN_API_KEY")
|
149
|
-
if not api_key:
|
146
|
+
elevenlabs_api_key = api_key if is_given(api_key) else os.environ.get("ELEVEN_API_KEY")
|
147
|
+
if not elevenlabs_api_key:
|
150
148
|
raise ValueError(
|
151
|
-
"ElevenLabs API key is required, either as argument or set ELEVEN_API_KEY environmental variable"
|
149
|
+
"ElevenLabs API key is required, either as argument or set ELEVEN_API_KEY environmental variable" # noqa: E501
|
152
150
|
)
|
153
151
|
|
154
|
-
if word_tokenizer
|
152
|
+
if not is_given(word_tokenizer):
|
155
153
|
word_tokenizer = tokenize.basic.WordTokenizer(
|
156
154
|
ignore_punctuation=False # punctuation can help for intonation
|
157
155
|
)
|
158
156
|
|
159
157
|
self._opts = _TTSOptions(
|
160
|
-
|
158
|
+
voice_id=voice_id,
|
159
|
+
voice_settings=voice_settings,
|
161
160
|
model=model,
|
162
|
-
api_key=
|
163
|
-
base_url=base_url
|
164
|
-
encoding=
|
161
|
+
api_key=elevenlabs_api_key,
|
162
|
+
base_url=base_url if is_given(base_url) else API_BASE_URL_V1,
|
163
|
+
encoding=encoding,
|
165
164
|
sample_rate=self.sample_rate,
|
166
165
|
streaming_latency=streaming_latency,
|
167
166
|
word_tokenizer=word_tokenizer,
|
@@ -179,7 +178,7 @@ class TTS(tts.TTS):
|
|
179
178
|
|
180
179
|
return self._session
|
181
180
|
|
182
|
-
async def list_voices(self) ->
|
181
|
+
async def list_voices(self) -> list[Voice]:
|
183
182
|
async with self._ensure_session().get(
|
184
183
|
f"{self._opts.base_url}/voices",
|
185
184
|
headers={AUTHORIZATION_HEADER: self._opts.api_key},
|
@@ -189,26 +188,33 @@ class TTS(tts.TTS):
|
|
189
188
|
def update_options(
|
190
189
|
self,
|
191
190
|
*,
|
192
|
-
|
193
|
-
|
194
|
-
|
191
|
+
voice_id: NotGivenOr[str] = NOT_GIVEN,
|
192
|
+
voice_settings: NotGivenOr[VoiceSettings] = NOT_GIVEN,
|
193
|
+
model: NotGivenOr[TTSModels | str] = NOT_GIVEN,
|
194
|
+
language: NotGivenOr[str] = NOT_GIVEN,
|
195
195
|
) -> None:
|
196
196
|
"""
|
197
197
|
Args:
|
198
|
-
|
199
|
-
|
200
|
-
|
198
|
+
voice_id (NotGivenOr[str]): Voice ID.
|
199
|
+
voice_settings (NotGivenOr[VoiceSettings]): Voice settings.
|
200
|
+
model (NotGivenOr[TTSModels | str]): TTS model to use.
|
201
|
+
language (NotGivenOr[str]): Language code for the TTS model.
|
201
202
|
"""
|
202
|
-
|
203
|
-
|
204
|
-
|
203
|
+
if is_given(model):
|
204
|
+
self._opts.model = model
|
205
|
+
if is_given(voice_id):
|
206
|
+
self._opts.voice_id = voice_id
|
207
|
+
if is_given(voice_settings):
|
208
|
+
self._opts.voice_settings = voice_settings
|
209
|
+
if is_given(language):
|
210
|
+
self._opts.language = language
|
205
211
|
|
206
212
|
def synthesize(
|
207
213
|
self,
|
208
214
|
text: str,
|
209
215
|
*,
|
210
|
-
conn_options:
|
211
|
-
) ->
|
216
|
+
conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
|
217
|
+
) -> ChunkedStream:
|
212
218
|
return ChunkedStream(
|
213
219
|
tts=self,
|
214
220
|
input_text=text,
|
@@ -218,8 +224,8 @@ class TTS(tts.TTS):
|
|
218
224
|
)
|
219
225
|
|
220
226
|
def stream(
|
221
|
-
self, *, conn_options:
|
222
|
-
) ->
|
227
|
+
self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
|
228
|
+
) -> SynthesizeStream:
|
223
229
|
stream = SynthesizeStream(
|
224
230
|
tts=self,
|
225
231
|
conn_options=conn_options,
|
@@ -245,7 +251,7 @@ class ChunkedStream(tts.ChunkedStream):
|
|
245
251
|
tts: TTS,
|
246
252
|
input_text: str,
|
247
253
|
opts: _TTSOptions,
|
248
|
-
conn_options:
|
254
|
+
conn_options: APIConnectOptions,
|
249
255
|
session: aiohttp.ClientSession,
|
250
256
|
) -> None:
|
251
257
|
super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
|
@@ -254,8 +260,8 @@ class ChunkedStream(tts.ChunkedStream):
|
|
254
260
|
async def _run(self) -> None:
|
255
261
|
request_id = utils.shortuuid()
|
256
262
|
voice_settings = (
|
257
|
-
_strip_nones(dataclasses.asdict(self._opts.
|
258
|
-
if self._opts.
|
263
|
+
_strip_nones(dataclasses.asdict(self._opts.voice_settings))
|
264
|
+
if is_given(self._opts.voice_settings)
|
259
265
|
else None
|
260
266
|
)
|
261
267
|
data = {
|
@@ -322,7 +328,7 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
322
328
|
tts: TTS,
|
323
329
|
session: aiohttp.ClientSession,
|
324
330
|
opts: _TTSOptions,
|
325
|
-
conn_options:
|
331
|
+
conn_options: APIConnectOptions,
|
326
332
|
):
|
327
333
|
super().__init__(tts=tts, conn_options=conn_options)
|
328
334
|
self._opts, self._session = opts, session
|
@@ -392,15 +398,13 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
392
398
|
)
|
393
399
|
|
394
400
|
# 11labs protocol expects the first message to be an "init msg"
|
395
|
-
init_pkt =
|
396
|
-
text
|
397
|
-
voice_settings
|
398
|
-
if self._opts.
|
401
|
+
init_pkt = {
|
402
|
+
"text": " ",
|
403
|
+
"voice_settings": _strip_nones(dataclasses.asdict(self._opts.voice_settings))
|
404
|
+
if is_given(self._opts.voice_settings)
|
399
405
|
else None,
|
400
|
-
generation_config
|
401
|
-
|
402
|
-
),
|
403
|
-
)
|
406
|
+
"generation_config": {"chunk_length_schedule": self._opts.chunk_length_schedule},
|
407
|
+
}
|
404
408
|
await ws_conn.send_str(json.dumps(init_pkt))
|
405
409
|
eos_sent = False
|
406
410
|
|
@@ -423,14 +427,14 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
423
427
|
else:
|
424
428
|
continue
|
425
429
|
|
426
|
-
data_pkt =
|
430
|
+
data_pkt = {"text": f"{text} "} # must always end with a space
|
427
431
|
self._mark_started()
|
428
432
|
await ws_conn.send_str(json.dumps(data_pkt))
|
429
433
|
if xml_content:
|
430
434
|
logger.warning("11labs stream ended with incomplete xml content")
|
431
435
|
|
432
436
|
# no more token, mark eos
|
433
|
-
eos_pkt =
|
437
|
+
eos_pkt = {"text": ""}
|
434
438
|
await ws_conn.send_str(json.dumps(eos_pkt))
|
435
439
|
eos_sent = True
|
436
440
|
|
@@ -460,7 +464,7 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
460
464
|
):
|
461
465
|
if not eos_sent:
|
462
466
|
raise APIStatusError(
|
463
|
-
"11labs connection closed unexpectedly, not all tokens have been consumed",
|
467
|
+
"11labs connection closed unexpectedly, not all tokens have been consumed", # noqa: E501
|
464
468
|
request_id=request_id,
|
465
469
|
)
|
466
470
|
return
|
@@ -520,40 +524,39 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
520
524
|
|
521
525
|
|
522
526
|
def _dict_to_voices_list(data: dict[str, Any]):
|
523
|
-
voices:
|
527
|
+
voices: list[Voice] = []
|
524
528
|
for voice in data["voices"]:
|
525
529
|
voices.append(
|
526
530
|
Voice(
|
527
531
|
id=voice["voice_id"],
|
528
532
|
name=voice["name"],
|
529
533
|
category=voice["category"],
|
530
|
-
settings=None,
|
531
534
|
)
|
532
535
|
)
|
533
536
|
return voices
|
534
537
|
|
535
538
|
|
536
539
|
def _strip_nones(data: dict[str, Any]):
|
537
|
-
return {k: v for k, v in data.items() if v is not None}
|
540
|
+
return {k: v for k, v in data.items() if is_given(v) and v is not None}
|
538
541
|
|
539
542
|
|
540
543
|
def _synthesize_url(opts: _TTSOptions) -> str:
|
541
544
|
base_url = opts.base_url
|
542
|
-
voice_id = opts.
|
545
|
+
voice_id = opts.voice_id
|
543
546
|
model_id = opts.model
|
544
547
|
output_format = opts.encoding
|
545
548
|
url = (
|
546
549
|
f"{base_url}/text-to-speech/{voice_id}/stream?"
|
547
550
|
f"model_id={model_id}&output_format={output_format}"
|
548
551
|
)
|
549
|
-
if opts.streaming_latency:
|
552
|
+
if is_given(opts.streaming_latency):
|
550
553
|
url += f"&optimize_streaming_latency={opts.streaming_latency}"
|
551
554
|
return url
|
552
555
|
|
553
556
|
|
554
557
|
def _stream_url(opts: _TTSOptions) -> str:
|
555
558
|
base_url = opts.base_url
|
556
|
-
voice_id = opts.
|
559
|
+
voice_id = opts.voice_id
|
557
560
|
model_id = opts.model
|
558
561
|
output_format = opts.encoding
|
559
562
|
enable_ssml = str(opts.enable_ssml_parsing).lower()
|
@@ -564,8 +567,8 @@ def _stream_url(opts: _TTSOptions) -> str:
|
|
564
567
|
f"model_id={model_id}&output_format={output_format}&"
|
565
568
|
f"enable_ssml_parsing={enable_ssml}&inactivity_timeout={inactivity_timeout}"
|
566
569
|
)
|
567
|
-
if language
|
570
|
+
if is_given(language):
|
568
571
|
url += f"&language_code={language}"
|
569
|
-
if opts.streaming_latency:
|
572
|
+
if is_given(opts.streaming_latency):
|
570
573
|
url += f"&optimize_streaming_latency={opts.streaming_latency}"
|
571
574
|
return url
|
@@ -0,0 +1,39 @@
|
|
1
|
+
[build-system]
|
2
|
+
requires = ["hatchling"]
|
3
|
+
build-backend = "hatchling.build"
|
4
|
+
|
5
|
+
[project]
|
6
|
+
name = "livekit-plugins-elevenlabs"
|
7
|
+
dynamic = ["version"]
|
8
|
+
description = "Agent Framework plugin for voice synthesis with ElevenLabs' API."
|
9
|
+
readme = "README.md"
|
10
|
+
license = "Apache-2.0"
|
11
|
+
requires-python = ">=3.9.0"
|
12
|
+
authors = [{ name = "LiveKit", email = "hello@livekit.io" }]
|
13
|
+
keywords = ["webrtc", "realtime", "audio", "video", "livekit", "elevenlabs"]
|
14
|
+
classifiers = [
|
15
|
+
"Intended Audience :: Developers",
|
16
|
+
"License :: OSI Approved :: Apache Software License",
|
17
|
+
"Topic :: Multimedia :: Sound/Audio",
|
18
|
+
"Topic :: Multimedia :: Video",
|
19
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
20
|
+
"Programming Language :: Python :: 3",
|
21
|
+
"Programming Language :: Python :: 3.9",
|
22
|
+
"Programming Language :: Python :: 3.10",
|
23
|
+
"Programming Language :: Python :: 3 :: Only",
|
24
|
+
]
|
25
|
+
dependencies = ["livekit-agents[codecs]>=1.0.0"]
|
26
|
+
|
27
|
+
[project.urls]
|
28
|
+
Documentation = "https://docs.livekit.io"
|
29
|
+
Website = "https://livekit.io/"
|
30
|
+
Source = "https://github.com/livekit/agents"
|
31
|
+
|
32
|
+
[tool.hatch.version]
|
33
|
+
path = "livekit/plugins/elevenlabs/version.py"
|
34
|
+
|
35
|
+
[tool.hatch.build.targets.wheel]
|
36
|
+
packages = ["livekit"]
|
37
|
+
|
38
|
+
[tool.hatch.build.targets.sdist]
|
39
|
+
include = ["/livekit"]
|
@@ -1,46 +0,0 @@
|
|
1
|
-
Metadata-Version: 2.2
|
2
|
-
Name: livekit-plugins-elevenlabs
|
3
|
-
Version: 0.8.1
|
4
|
-
Summary: Agent Framework plugin for voice synthesis with ElevenLabs' API.
|
5
|
-
Home-page: https://github.com/livekit/agents
|
6
|
-
License: Apache-2.0
|
7
|
-
Project-URL: Documentation, https://docs.livekit.io
|
8
|
-
Project-URL: Website, https://livekit.io/
|
9
|
-
Project-URL: Source, https://github.com/livekit/agents
|
10
|
-
Keywords: webrtc,realtime,audio,video,livekit,elevenlabs
|
11
|
-
Classifier: Intended Audience :: Developers
|
12
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
13
|
-
Classifier: Topic :: Multimedia :: Sound/Audio
|
14
|
-
Classifier: Topic :: Multimedia :: Video
|
15
|
-
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
16
|
-
Classifier: Programming Language :: Python :: 3
|
17
|
-
Classifier: Programming Language :: Python :: 3.9
|
18
|
-
Classifier: Programming Language :: Python :: 3.10
|
19
|
-
Classifier: Programming Language :: Python :: 3 :: Only
|
20
|
-
Requires-Python: >=3.9.0
|
21
|
-
Description-Content-Type: text/markdown
|
22
|
-
Requires-Dist: livekit-agents[codecs]<1.0.0,>=0.12.16
|
23
|
-
Dynamic: classifier
|
24
|
-
Dynamic: description
|
25
|
-
Dynamic: description-content-type
|
26
|
-
Dynamic: home-page
|
27
|
-
Dynamic: keywords
|
28
|
-
Dynamic: license
|
29
|
-
Dynamic: project-url
|
30
|
-
Dynamic: requires-dist
|
31
|
-
Dynamic: requires-python
|
32
|
-
Dynamic: summary
|
33
|
-
|
34
|
-
# LiveKit Plugins Elevenlabs
|
35
|
-
|
36
|
-
Agent Framework plugin for voice synthesis with [ElevenLabs](https://elevenlabs.io/) API.
|
37
|
-
|
38
|
-
## Installation
|
39
|
-
|
40
|
-
```bash
|
41
|
-
pip install livekit-plugins-elevenlabs
|
42
|
-
```
|
43
|
-
|
44
|
-
## Pre-requisites
|
45
|
-
|
46
|
-
You'll need an API key from ElevenLabs. It can be set as an environment variable: `ELEVEN_API_KEY`
|
@@ -1,14 +0,0 @@
|
|
1
|
-
README.md
|
2
|
-
pyproject.toml
|
3
|
-
setup.py
|
4
|
-
livekit/plugins/elevenlabs/__init__.py
|
5
|
-
livekit/plugins/elevenlabs/log.py
|
6
|
-
livekit/plugins/elevenlabs/models.py
|
7
|
-
livekit/plugins/elevenlabs/py.typed
|
8
|
-
livekit/plugins/elevenlabs/tts.py
|
9
|
-
livekit/plugins/elevenlabs/version.py
|
10
|
-
livekit_plugins_elevenlabs.egg-info/PKG-INFO
|
11
|
-
livekit_plugins_elevenlabs.egg-info/SOURCES.txt
|
12
|
-
livekit_plugins_elevenlabs.egg-info/dependency_links.txt
|
13
|
-
livekit_plugins_elevenlabs.egg-info/requires.txt
|
14
|
-
livekit_plugins_elevenlabs.egg-info/top_level.txt
|
@@ -1 +0,0 @@
|
|
1
|
-
|
@@ -1 +0,0 @@
|
|
1
|
-
livekit-agents[codecs]<1.0.0,>=0.12.16
|
@@ -1 +0,0 @@
|
|
1
|
-
livekit
|
@@ -1,59 +0,0 @@
|
|
1
|
-
# Copyright 2023 LiveKit, Inc.
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
|
15
|
-
import os
|
16
|
-
import pathlib
|
17
|
-
|
18
|
-
import setuptools
|
19
|
-
import setuptools.command.build_py
|
20
|
-
|
21
|
-
here = pathlib.Path(__file__).parent.resolve()
|
22
|
-
about = {}
|
23
|
-
with open(
|
24
|
-
os.path.join(here, "livekit", "plugins", "elevenlabs", "version.py"), "r"
|
25
|
-
) as f:
|
26
|
-
exec(f.read(), about)
|
27
|
-
|
28
|
-
|
29
|
-
setuptools.setup(
|
30
|
-
name="livekit-plugins-elevenlabs",
|
31
|
-
version=about["__version__"],
|
32
|
-
description="Agent Framework plugin for voice synthesis with ElevenLabs' API.",
|
33
|
-
long_description=(here / "README.md").read_text(encoding="utf-8"),
|
34
|
-
long_description_content_type="text/markdown",
|
35
|
-
url="https://github.com/livekit/agents",
|
36
|
-
cmdclass={},
|
37
|
-
classifiers=[
|
38
|
-
"Intended Audience :: Developers",
|
39
|
-
"License :: OSI Approved :: Apache Software License",
|
40
|
-
"Topic :: Multimedia :: Sound/Audio",
|
41
|
-
"Topic :: Multimedia :: Video",
|
42
|
-
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
43
|
-
"Programming Language :: Python :: 3",
|
44
|
-
"Programming Language :: Python :: 3.9",
|
45
|
-
"Programming Language :: Python :: 3.10",
|
46
|
-
"Programming Language :: Python :: 3 :: Only",
|
47
|
-
],
|
48
|
-
keywords=["webrtc", "realtime", "audio", "video", "livekit", "elevenlabs"],
|
49
|
-
license="Apache-2.0",
|
50
|
-
packages=setuptools.find_namespace_packages(include=["livekit.*"]),
|
51
|
-
python_requires=">=3.9.0",
|
52
|
-
install_requires=["livekit-agents[codecs]>=0.12.16,<1.0.0"],
|
53
|
-
package_data={"livekit.plugins.elevenlabs": ["py.typed"]},
|
54
|
-
project_urls={
|
55
|
-
"Documentation": "https://docs.livekit.io",
|
56
|
-
"Website": "https://livekit.io/",
|
57
|
-
"Source": "https://github.com/livekit/agents",
|
58
|
-
},
|
59
|
-
)
|
File without changes
|
File without changes
|
File without changes
|