livekit-plugins-upliftai 1.2.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit_plugins_upliftai-1.2.7/.gitignore +175 -0
- livekit_plugins_upliftai-1.2.7/PKG-INFO +44 -0
- livekit_plugins_upliftai-1.2.7/README.md +20 -0
- livekit_plugins_upliftai-1.2.7/livekit/plugins/upliftai/__init__.py +49 -0
- livekit_plugins_upliftai-1.2.7/livekit/plugins/upliftai/log.py +3 -0
- livekit_plugins_upliftai-1.2.7/livekit/plugins/upliftai/py.typed +0 -0
- livekit_plugins_upliftai-1.2.7/livekit/plugins/upliftai/tts.py +515 -0
- livekit_plugins_upliftai-1.2.7/livekit/plugins/upliftai/version.py +15 -0
- livekit_plugins_upliftai-1.2.7/pyproject.toml +39 -0
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
**/.vscode
|
|
2
|
+
**/.DS_Store
|
|
3
|
+
|
|
4
|
+
# Byte-compiled / optimized / DLL files
|
|
5
|
+
__pycache__/
|
|
6
|
+
*.py[cod]
|
|
7
|
+
*$py.class
|
|
8
|
+
|
|
9
|
+
# C extensions
|
|
10
|
+
*.so
|
|
11
|
+
|
|
12
|
+
# Distribution / packaging
|
|
13
|
+
.Python
|
|
14
|
+
build/
|
|
15
|
+
develop-eggs/
|
|
16
|
+
dist/
|
|
17
|
+
downloads/
|
|
18
|
+
eggs/
|
|
19
|
+
.eggs/
|
|
20
|
+
lib/
|
|
21
|
+
lib64/
|
|
22
|
+
parts/
|
|
23
|
+
sdist/
|
|
24
|
+
var/
|
|
25
|
+
wheels/
|
|
26
|
+
share/python-wheels/
|
|
27
|
+
*.egg-info/
|
|
28
|
+
.installed.cfg
|
|
29
|
+
*.egg
|
|
30
|
+
MANIFEST
|
|
31
|
+
|
|
32
|
+
# PyInstaller
|
|
33
|
+
# Usually these files are written by a python script from a template
|
|
34
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
35
|
+
*.manifest
|
|
36
|
+
*.spec
|
|
37
|
+
|
|
38
|
+
# Installer logs
|
|
39
|
+
pip-log.txt
|
|
40
|
+
pip-delete-this-directory.txt
|
|
41
|
+
|
|
42
|
+
# Unit test / coverage reports
|
|
43
|
+
htmlcov/
|
|
44
|
+
.tox/
|
|
45
|
+
.nox/
|
|
46
|
+
.coverage
|
|
47
|
+
.coverage.*
|
|
48
|
+
.cache
|
|
49
|
+
nosetests.xml
|
|
50
|
+
coverage.xml
|
|
51
|
+
*.cover
|
|
52
|
+
*.py,cover
|
|
53
|
+
.hypothesis/
|
|
54
|
+
.pytest_cache/
|
|
55
|
+
cover/
|
|
56
|
+
|
|
57
|
+
# Translations
|
|
58
|
+
*.mo
|
|
59
|
+
*.pot
|
|
60
|
+
|
|
61
|
+
# Django stuff:
|
|
62
|
+
*.log
|
|
63
|
+
local_settings.py
|
|
64
|
+
db.sqlite3
|
|
65
|
+
db.sqlite3-journal
|
|
66
|
+
|
|
67
|
+
# Flask stuff:
|
|
68
|
+
instance/
|
|
69
|
+
.webassets-cache
|
|
70
|
+
|
|
71
|
+
# Scrapy stuff:
|
|
72
|
+
.scrapy
|
|
73
|
+
|
|
74
|
+
# Sphinx documentation
|
|
75
|
+
docs/_build/
|
|
76
|
+
|
|
77
|
+
# PyBuilder
|
|
78
|
+
.pybuilder/
|
|
79
|
+
target/
|
|
80
|
+
|
|
81
|
+
# Jupyter Notebook
|
|
82
|
+
.ipynb_checkpoints
|
|
83
|
+
|
|
84
|
+
# IPython
|
|
85
|
+
profile_default/
|
|
86
|
+
ipython_config.py
|
|
87
|
+
|
|
88
|
+
# pyenv
|
|
89
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
90
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
91
|
+
# .python-version
|
|
92
|
+
|
|
93
|
+
# pipenv
|
|
94
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
95
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
96
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
97
|
+
# install all needed dependencies.
|
|
98
|
+
#Pipfile.lock
|
|
99
|
+
|
|
100
|
+
# poetry
|
|
101
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
102
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
103
|
+
# commonly ignored for libraries.
|
|
104
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
105
|
+
#poetry.lock
|
|
106
|
+
|
|
107
|
+
# pdm
|
|
108
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
109
|
+
#pdm.lock
|
|
110
|
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
|
111
|
+
# in version control.
|
|
112
|
+
# https://pdm.fming.dev/#use-with-ide
|
|
113
|
+
.pdm.toml
|
|
114
|
+
|
|
115
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
116
|
+
__pypackages__/
|
|
117
|
+
|
|
118
|
+
# Celery stuff
|
|
119
|
+
celerybeat-schedule
|
|
120
|
+
celerybeat.pid
|
|
121
|
+
|
|
122
|
+
# SageMath parsed files
|
|
123
|
+
*.sage.py
|
|
124
|
+
|
|
125
|
+
# Environments
|
|
126
|
+
.env
|
|
127
|
+
.venv
|
|
128
|
+
env/
|
|
129
|
+
venv/
|
|
130
|
+
ENV/
|
|
131
|
+
env.bak/
|
|
132
|
+
venv.bak/
|
|
133
|
+
|
|
134
|
+
# Spyder project settings
|
|
135
|
+
.spyderproject
|
|
136
|
+
.spyproject
|
|
137
|
+
|
|
138
|
+
# Rope project settings
|
|
139
|
+
.ropeproject
|
|
140
|
+
|
|
141
|
+
# mkdocs documentation
|
|
142
|
+
/site
|
|
143
|
+
|
|
144
|
+
# mypy
|
|
145
|
+
.mypy_cache/
|
|
146
|
+
.dmypy.json
|
|
147
|
+
dmypy.json
|
|
148
|
+
|
|
149
|
+
# trunk
|
|
150
|
+
.trunk/
|
|
151
|
+
|
|
152
|
+
# Pyre type checker
|
|
153
|
+
.pyre/
|
|
154
|
+
|
|
155
|
+
# pytype static type analyzer
|
|
156
|
+
.pytype/
|
|
157
|
+
|
|
158
|
+
# Cython debug symbols
|
|
159
|
+
cython_debug/
|
|
160
|
+
|
|
161
|
+
# PyCharm
|
|
162
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
163
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
164
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
165
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
166
|
+
.idea/
|
|
167
|
+
|
|
168
|
+
node_modules
|
|
169
|
+
|
|
170
|
+
credentials.json
|
|
171
|
+
pyrightconfig.json
|
|
172
|
+
docs/
|
|
173
|
+
|
|
174
|
+
# Database files
|
|
175
|
+
*.db
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: livekit-plugins-upliftai
|
|
3
|
+
Version: 1.2.7
|
|
4
|
+
Summary: Agent Framework plugin for speech synthesis with the Uplift AI.
|
|
5
|
+
Project-URL: Documentation, https://docs.livekit.io
|
|
6
|
+
Project-URL: Website, https://livekit.io/
|
|
7
|
+
Project-URL: Source, https://github.com/livekit/agents
|
|
8
|
+
Author-email: LiveKit <hello@livekit.io>
|
|
9
|
+
License-Expression: Apache-2.0
|
|
10
|
+
Keywords: audio,livekit,multilingual,realtime,tts,upliftai,urdu,video,webrtc
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Topic :: Multimedia :: Sound/Audio
|
|
18
|
+
Classifier: Topic :: Multimedia :: Video
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
+
Requires-Python: >=3.9.0
|
|
21
|
+
Requires-Dist: livekit-agents[codecs]>=1.2.7
|
|
22
|
+
Requires-Dist: numpy>=1.26
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
|
|
25
|
+
# Uplift AI plugin for LiveKit Agents
|
|
26
|
+
|
|
27
|
+
Support for voice synthesis with [Uplift AI](https://upliftai.org) for underserved languages.
|
|
28
|
+
|
|
29
|
+
See [https://docs.upliftai.org/orator_voices](https://docs.upliftai.org/orator_voices) for supported voices and languages.
|
|
30
|
+
|
|
31
|
+
## Installation
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
pip install livekit-plugins-upliftai
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Pre-requisites
|
|
38
|
+
|
|
39
|
+
You'll need an API key from Uplift AI. It can be set as an environment variable: `UPLIFTAI_API_KEY`. You can get your API key by signing up at [https://upliftai.org](https://upliftai.org).
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
## Tutorial
|
|
43
|
+
|
|
44
|
+
Follow along at [https://docs.upliftai.org/tutorials/livekit-voice-agent](https://docs.upliftai.org/tutorials/livekit-voice-agent) where we build a voice agent using LiveKit and Uplift AI.
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Uplift AI plugin for LiveKit Agents
|
|
2
|
+
|
|
3
|
+
Support for voice synthesis with [Uplift AI](https://upliftai.org) for underserved languages.
|
|
4
|
+
|
|
5
|
+
See [https://docs.upliftai.org/orator_voices](https://docs.upliftai.org/orator_voices) for supported voices and languages.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install livekit-plugins-upliftai
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Pre-requisites
|
|
14
|
+
|
|
15
|
+
You'll need an API key from Uplift AI. It can be set as an environment variable: `UPLIFTAI_API_KEY`. You can get your API key by signing up at [https://upliftai.org](https://upliftai.org).
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
## Tutorial
|
|
19
|
+
|
|
20
|
+
Follow along at [https://docs.upliftai.org/tutorials/livekit-voice-agent](https://docs.upliftai.org/tutorials/livekit-voice-agent) where we build a voice agent using LiveKit and Uplift AI.
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# Copyright 2023 LiveKit, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""Uplift AI plugin for LiveKit Agents
|
|
16
|
+
|
|
17
|
+
See https://docs.livekit.io/agents/integrations/tts/upliftai/ for more information.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from .tts import DEFAULT_VOICE_ID, TTS, OutputFormat
|
|
21
|
+
from .version import __version__
|
|
22
|
+
|
|
23
|
+
__all__ = [
|
|
24
|
+
"TTS",
|
|
25
|
+
"OutputFormat",
|
|
26
|
+
"DEFAULT_VOICE_ID",
|
|
27
|
+
"__version__",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
from livekit.agents import Plugin
|
|
31
|
+
|
|
32
|
+
from .log import logger
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class UpliftAIPlugin(Plugin):
|
|
36
|
+
def __init__(self) -> None:
|
|
37
|
+
super().__init__(__name__, __version__, __package__, logger)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
Plugin.register_plugin(UpliftAIPlugin())
|
|
41
|
+
|
|
42
|
+
# Cleanup docs of unexported modules
|
|
43
|
+
_module = dir()
|
|
44
|
+
NOT_IN_ALL = [m for m in _module if m not in __all__]
|
|
45
|
+
|
|
46
|
+
__pdoc__ = {}
|
|
47
|
+
|
|
48
|
+
for n in NOT_IN_ALL:
|
|
49
|
+
__pdoc__[n] = False
|
|
File without changes
|
|
@@ -0,0 +1,515 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Uplift TTS Plugin for LiveKit, this will soon be available as a python lib
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import base64
|
|
9
|
+
import os
|
|
10
|
+
import time
|
|
11
|
+
import uuid
|
|
12
|
+
import weakref
|
|
13
|
+
from dataclasses import dataclass
|
|
14
|
+
from typing import Literal
|
|
15
|
+
|
|
16
|
+
import socketio
|
|
17
|
+
|
|
18
|
+
from livekit.agents import (
|
|
19
|
+
APIConnectionError,
|
|
20
|
+
APIConnectOptions,
|
|
21
|
+
APIError,
|
|
22
|
+
APITimeoutError,
|
|
23
|
+
tokenize,
|
|
24
|
+
tts,
|
|
25
|
+
utils,
|
|
26
|
+
)
|
|
27
|
+
from livekit.agents.types import DEFAULT_API_CONNECT_OPTIONS, NOT_GIVEN, NotGivenOr
|
|
28
|
+
from livekit.agents.utils import is_given
|
|
29
|
+
|
|
30
|
+
from .log import logger
|
|
31
|
+
|
|
32
|
+
# Output format options
|
|
33
|
+
OutputFormat = Literal[
|
|
34
|
+
"PCM_22050_16",
|
|
35
|
+
"WAV_22050_16",
|
|
36
|
+
"WAV_22050_32",
|
|
37
|
+
"MP3_22050_32",
|
|
38
|
+
"MP3_22050_64",
|
|
39
|
+
"MP3_22050_128",
|
|
40
|
+
"OGG_22050_16",
|
|
41
|
+
"ULAW_8000_8",
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
# Default configuration
|
|
45
|
+
DEFAULT_BASE_URL = "wss://api.upliftai.org"
|
|
46
|
+
DEFAULT_SAMPLE_RATE = 22050
|
|
47
|
+
DEFAULT_NUM_CHANNELS = 1
|
|
48
|
+
DEFAULT_VOICE_ID = "v_meklc281"
|
|
49
|
+
DEFAULT_OUTPUT_FORMAT: OutputFormat = "MP3_22050_32"
|
|
50
|
+
WEBSOCKET_NAMESPACE = "/text-to-speech/multi-stream"
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def get_content_type_from_output_format(output_format: OutputFormat):
|
|
54
|
+
"""Get MIME type based on output format"""
|
|
55
|
+
if output_format == "PCM_22050_16":
|
|
56
|
+
return "audio/pcm"
|
|
57
|
+
elif output_format == "WAV_22050_16":
|
|
58
|
+
return "audio/wav"
|
|
59
|
+
elif output_format == "WAV_22050_32":
|
|
60
|
+
return "audio/wav"
|
|
61
|
+
elif output_format.startswith("MP3"):
|
|
62
|
+
return "audio/mpeg"
|
|
63
|
+
elif output_format.startswith("OGG"):
|
|
64
|
+
return "audio/ogg"
|
|
65
|
+
elif output_format == "ULAW_8000_8":
|
|
66
|
+
return "audio/x-mulaw"
|
|
67
|
+
else:
|
|
68
|
+
raise ValueError(f"Unsupported output format: {output_format}")
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@dataclass
|
|
72
|
+
class VoiceSettings:
|
|
73
|
+
"""Voice configuration settings"""
|
|
74
|
+
|
|
75
|
+
voice_id: str = DEFAULT_VOICE_ID
|
|
76
|
+
output_format: OutputFormat = DEFAULT_OUTPUT_FORMAT
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@dataclass
|
|
80
|
+
class _TTSOptions:
|
|
81
|
+
"""Internal TTS options"""
|
|
82
|
+
|
|
83
|
+
base_url: str
|
|
84
|
+
api_key: str
|
|
85
|
+
voice_settings: VoiceSettings
|
|
86
|
+
word_tokenizer: tokenize.WordTokenizer | tokenize.SentenceTokenizer
|
|
87
|
+
sample_rate: int
|
|
88
|
+
num_channels: int
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class TTS(tts.TTS):
|
|
92
|
+
"""Uplift TTS implementation for LiveKit"""
|
|
93
|
+
|
|
94
|
+
def __init__(
|
|
95
|
+
self,
|
|
96
|
+
*,
|
|
97
|
+
base_url: NotGivenOr[str] = NOT_GIVEN,
|
|
98
|
+
api_key: NotGivenOr[str] = NOT_GIVEN,
|
|
99
|
+
voice_id: str = DEFAULT_VOICE_ID,
|
|
100
|
+
output_format: OutputFormat = DEFAULT_OUTPUT_FORMAT,
|
|
101
|
+
num_channels: int = DEFAULT_NUM_CHANNELS,
|
|
102
|
+
word_tokenizer: NotGivenOr[tokenize.WordTokenizer | tokenize.SentenceTokenizer] = NOT_GIVEN,
|
|
103
|
+
) -> None:
|
|
104
|
+
"""
|
|
105
|
+
Create a new instance of Uplift TTS.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
base_url: Base URL for TTS service. Defaults to wss://api.upliftai.org
|
|
109
|
+
api_key: API key for authentication
|
|
110
|
+
voice_id: Voice ID to use. Defaults to "17"
|
|
111
|
+
output_format: Audio output format. Options:
|
|
112
|
+
- 'PCM_22050_16': PCM format, 22.05kHz, 16-bit
|
|
113
|
+
- 'WAV_22050_16': WAV format, 22.05kHz, 16-bit
|
|
114
|
+
- 'WAV_22050_32': WAV format, 22.05kHz, 32-bit
|
|
115
|
+
- 'MP3_22050_32': MP3 format, 22.05kHz, 32kbps (default)
|
|
116
|
+
- 'MP3_22050_64': MP3 format, 22.05kHz, 64kbps
|
|
117
|
+
- 'MP3_22050_128': MP3 format, 22.05kHz, 128kbps
|
|
118
|
+
- 'OGG_22050_16': OGG format, 22.05kHz, 16-bit
|
|
119
|
+
- 'ULAW_8000_8': μ-law format, 8kHz, 8-bit
|
|
120
|
+
sample_rate: Sample rate for audio output. Defaults to 22050
|
|
121
|
+
num_channels: Number of audio channels. Defaults to 1 (mono)
|
|
122
|
+
word_tokenizer: Tokenizer for processing text
|
|
123
|
+
"""
|
|
124
|
+
super().__init__(
|
|
125
|
+
capabilities=tts.TTSCapabilities(
|
|
126
|
+
streaming=True,
|
|
127
|
+
aligned_transcript=False,
|
|
128
|
+
),
|
|
129
|
+
sample_rate=DEFAULT_SAMPLE_RATE,
|
|
130
|
+
num_channels=num_channels,
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
# Get configuration from environment if not provided
|
|
134
|
+
base_url = (
|
|
135
|
+
base_url
|
|
136
|
+
if is_given(base_url)
|
|
137
|
+
else os.environ.get("UPLIFTAI_BASE_URL", DEFAULT_BASE_URL)
|
|
138
|
+
)
|
|
139
|
+
api_key = api_key if is_given(api_key) else os.environ.get("UPLIFTAI_API_KEY")
|
|
140
|
+
|
|
141
|
+
if not api_key:
|
|
142
|
+
raise ValueError(
|
|
143
|
+
"API key is required, either as argument or set UPLIFTAI_API_KEY environment variable"
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
# Use provided tokenizer or create default
|
|
147
|
+
if not is_given(word_tokenizer):
|
|
148
|
+
word_tokenizer = tokenize.basic.WordTokenizer(ignore_punctuation=False)
|
|
149
|
+
|
|
150
|
+
self._opts = _TTSOptions(
|
|
151
|
+
base_url=base_url,
|
|
152
|
+
api_key=api_key,
|
|
153
|
+
voice_settings=VoiceSettings(voice_id=voice_id, output_format=output_format),
|
|
154
|
+
word_tokenizer=word_tokenizer,
|
|
155
|
+
sample_rate=DEFAULT_SAMPLE_RATE,
|
|
156
|
+
num_channels=num_channels,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
self._client: WebSocketClient | None = None
|
|
160
|
+
self._streams = weakref.WeakSet[SynthesizeStream]()
|
|
161
|
+
|
|
162
|
+
def update_options(
|
|
163
|
+
self,
|
|
164
|
+
*,
|
|
165
|
+
voice_id: NotGivenOr[str] = NOT_GIVEN,
|
|
166
|
+
output_format: NotGivenOr[OutputFormat] = NOT_GIVEN,
|
|
167
|
+
) -> None:
|
|
168
|
+
"""
|
|
169
|
+
Update TTS configuration options.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
voice_id: New voice ID
|
|
173
|
+
output_format: New output format (see __init__ for options)
|
|
174
|
+
"""
|
|
175
|
+
if is_given(voice_id):
|
|
176
|
+
self._opts.voice_settings.voice_id = voice_id
|
|
177
|
+
if is_given(output_format):
|
|
178
|
+
self._opts.voice_settings.output_format = output_format
|
|
179
|
+
|
|
180
|
+
def synthesize(
|
|
181
|
+
self, text: str, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
|
|
182
|
+
) -> ChunkedStream:
|
|
183
|
+
"""Synthesize text to speech using chunked stream."""
|
|
184
|
+
return ChunkedStream(tts=self, input_text=text, conn_options=conn_options)
|
|
185
|
+
|
|
186
|
+
def stream(
|
|
187
|
+
self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
|
|
188
|
+
) -> SynthesizeStream:
|
|
189
|
+
"""Create a streaming synthesis session."""
|
|
190
|
+
stream = SynthesizeStream(tts=self, conn_options=conn_options)
|
|
191
|
+
self._streams.add(stream)
|
|
192
|
+
return stream
|
|
193
|
+
|
|
194
|
+
async def aclose(self) -> None:
|
|
195
|
+
"""Clean up resources"""
|
|
196
|
+
for stream in list(self._streams):
|
|
197
|
+
await stream.aclose()
|
|
198
|
+
|
|
199
|
+
self._streams.clear()
|
|
200
|
+
|
|
201
|
+
if self._client:
|
|
202
|
+
await self._client.disconnect()
|
|
203
|
+
self._client = None
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
class WebSocketClient:
|
|
207
|
+
"""Manages WebSocket connection to TTS service"""
|
|
208
|
+
|
|
209
|
+
def __init__(self, opts: _TTSOptions):
|
|
210
|
+
self.opts = opts
|
|
211
|
+
self.sio: socketio.AsyncClient | None = None
|
|
212
|
+
self.connected = False
|
|
213
|
+
self.audio_callbacks: dict[str, asyncio.Queue] = {}
|
|
214
|
+
self.active_requests: dict[str, bool] = {}
|
|
215
|
+
|
|
216
|
+
async def connect(self) -> bool:
|
|
217
|
+
"""Establish WebSocket connection"""
|
|
218
|
+
if self.connected:
|
|
219
|
+
return True
|
|
220
|
+
|
|
221
|
+
try:
|
|
222
|
+
self.sio = socketio.AsyncClient(
|
|
223
|
+
reconnection=True,
|
|
224
|
+
reconnection_attempts=3,
|
|
225
|
+
reconnection_delay=1,
|
|
226
|
+
logger=False,
|
|
227
|
+
engineio_logger=False,
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
# Register handlers
|
|
231
|
+
self.sio.on("message", self._on_message, namespace=WEBSOCKET_NAMESPACE)
|
|
232
|
+
self.sio.on("connect", self._on_connect, namespace=WEBSOCKET_NAMESPACE)
|
|
233
|
+
self.sio.on("disconnect", self._on_disconnect, namespace=WEBSOCKET_NAMESPACE)
|
|
234
|
+
|
|
235
|
+
# Prepare auth
|
|
236
|
+
auth_data = {"token": self.opts.api_key}
|
|
237
|
+
|
|
238
|
+
# Connect
|
|
239
|
+
await self.sio.connect(
|
|
240
|
+
self.opts.base_url,
|
|
241
|
+
auth=auth_data,
|
|
242
|
+
namespaces=[WEBSOCKET_NAMESPACE],
|
|
243
|
+
transports=["websocket"],
|
|
244
|
+
wait_timeout=10,
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
# Wait for connection
|
|
248
|
+
max_wait = 5.0
|
|
249
|
+
start_time = time.time()
|
|
250
|
+
while not self.connected and (time.time() - start_time) < max_wait:
|
|
251
|
+
await asyncio.sleep(0.1)
|
|
252
|
+
|
|
253
|
+
if not self.connected and self.sio.connected:
|
|
254
|
+
self.connected = True
|
|
255
|
+
|
|
256
|
+
return self.connected
|
|
257
|
+
|
|
258
|
+
except Exception as e:
|
|
259
|
+
logger.error(f"Connection failed: {e}")
|
|
260
|
+
return False
|
|
261
|
+
|
|
262
|
+
async def synthesize(self, text: str, request_id: str | None = None) -> asyncio.Queue:
|
|
263
|
+
"""Send synthesis request and return audio queue"""
|
|
264
|
+
if not self.sio or not self.connected:
|
|
265
|
+
if not await self.connect():
|
|
266
|
+
raise ConnectionError("Failed to connect to TTS service")
|
|
267
|
+
|
|
268
|
+
if not request_id:
|
|
269
|
+
request_id = str(uuid.uuid4())
|
|
270
|
+
|
|
271
|
+
# Create audio queue
|
|
272
|
+
audio_queue = asyncio.Queue()
|
|
273
|
+
self.audio_callbacks[request_id] = audio_queue
|
|
274
|
+
self.active_requests[request_id] = True
|
|
275
|
+
|
|
276
|
+
# Build message
|
|
277
|
+
message = {
|
|
278
|
+
"type": "synthesize",
|
|
279
|
+
"requestId": request_id,
|
|
280
|
+
"text": text,
|
|
281
|
+
"voiceId": self.opts.voice_settings.voice_id,
|
|
282
|
+
"outputFormat": self.opts.voice_settings.output_format,
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
logger.debug(f"Sending synthesis request {request_id[:8]} for text: '{text[:50]}...'")
|
|
286
|
+
|
|
287
|
+
try:
|
|
288
|
+
await self.sio.emit("synthesize", message, namespace=WEBSOCKET_NAMESPACE)
|
|
289
|
+
except Exception as e:
|
|
290
|
+
logger.error(f"Failed to emit synthesis: {e}")
|
|
291
|
+
del self.audio_callbacks[request_id]
|
|
292
|
+
del self.active_requests[request_id]
|
|
293
|
+
raise
|
|
294
|
+
|
|
295
|
+
return audio_queue
|
|
296
|
+
|
|
297
|
+
async def disconnect(self):
|
|
298
|
+
"""Disconnect from service"""
|
|
299
|
+
if self.sio and self.connected:
|
|
300
|
+
await self.sio.disconnect()
|
|
301
|
+
self.connected = False
|
|
302
|
+
|
|
303
|
+
async def _on_connect(self):
|
|
304
|
+
"""Handle connection"""
|
|
305
|
+
logger.debug("WebSocket connected")
|
|
306
|
+
|
|
307
|
+
async def _on_message(self, data):
|
|
308
|
+
"""Handle messages"""
|
|
309
|
+
message_type = data.get("type")
|
|
310
|
+
|
|
311
|
+
if message_type == "ready":
|
|
312
|
+
self.connected = True
|
|
313
|
+
logger.debug(f"Ready with session: {data.get('sessionId')}")
|
|
314
|
+
|
|
315
|
+
elif message_type == "audio":
|
|
316
|
+
request_id = data.get("requestId")
|
|
317
|
+
audio_b64 = data.get("audio")
|
|
318
|
+
|
|
319
|
+
if audio_b64 and request_id in self.audio_callbacks:
|
|
320
|
+
audio_bytes = base64.b64decode(audio_b64)
|
|
321
|
+
if self.active_requests.get(request_id, False):
|
|
322
|
+
await self.audio_callbacks[request_id].put(audio_bytes)
|
|
323
|
+
|
|
324
|
+
elif message_type == "audio_end":
|
|
325
|
+
request_id = data.get("requestId")
|
|
326
|
+
if request_id in self.audio_callbacks:
|
|
327
|
+
await self.audio_callbacks[request_id].put(None)
|
|
328
|
+
del self.audio_callbacks[request_id]
|
|
329
|
+
if request_id in self.active_requests:
|
|
330
|
+
del self.active_requests[request_id]
|
|
331
|
+
|
|
332
|
+
elif message_type == "error":
|
|
333
|
+
request_id = data.get("requestId", "unknown")
|
|
334
|
+
error_msg = data.get("message", str(data))
|
|
335
|
+
logger.error(f"Error for {request_id}: {error_msg}")
|
|
336
|
+
|
|
337
|
+
if request_id in self.audio_callbacks:
|
|
338
|
+
await self.audio_callbacks[request_id].put(None)
|
|
339
|
+
del self.audio_callbacks[request_id]
|
|
340
|
+
if request_id in self.active_requests:
|
|
341
|
+
del self.active_requests[request_id]
|
|
342
|
+
|
|
343
|
+
async def _on_disconnect(self):
|
|
344
|
+
"""Handle disconnection"""
|
|
345
|
+
self.connected = False
|
|
346
|
+
for queue in self.audio_callbacks.values():
|
|
347
|
+
await queue.put(None)
|
|
348
|
+
self.audio_callbacks.clear()
|
|
349
|
+
self.active_requests.clear()
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
class ChunkedStream(tts.ChunkedStream):
|
|
353
|
+
"""Chunked synthesis implementation"""
|
|
354
|
+
|
|
355
|
+
def __init__(self, *, tts: TTS, input_text: str, conn_options: APIConnectOptions) -> None:
|
|
356
|
+
super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
|
|
357
|
+
self._tts: TTS = tts
|
|
358
|
+
|
|
359
|
+
async def _run(self, output_emitter: tts.AudioEmitter) -> None:
|
|
360
|
+
"""Execute synthesis"""
|
|
361
|
+
request_id = utils.shortuuid()
|
|
362
|
+
|
|
363
|
+
try:
|
|
364
|
+
# Initialize emitter
|
|
365
|
+
output_emitter.initialize(
|
|
366
|
+
request_id=request_id,
|
|
367
|
+
sample_rate=self._tts._opts.sample_rate,
|
|
368
|
+
num_channels=self._tts._opts.num_channels,
|
|
369
|
+
mime_type=get_content_type_from_output_format(
|
|
370
|
+
self._tts._opts.voice_settings.output_format
|
|
371
|
+
),
|
|
372
|
+
)
|
|
373
|
+
|
|
374
|
+
# Create client if needed
|
|
375
|
+
if not self._tts._client:
|
|
376
|
+
self._tts._client = WebSocketClient(self._tts._opts)
|
|
377
|
+
|
|
378
|
+
# Get audio queue
|
|
379
|
+
audio_queue = await self._tts._client.synthesize(self._input_text, request_id)
|
|
380
|
+
|
|
381
|
+
# Stream audio
|
|
382
|
+
while True:
|
|
383
|
+
try:
|
|
384
|
+
audio_data = await asyncio.wait_for(audio_queue.get(), timeout=30.0)
|
|
385
|
+
|
|
386
|
+
if audio_data is None:
|
|
387
|
+
break
|
|
388
|
+
|
|
389
|
+
output_emitter.push(audio_data)
|
|
390
|
+
|
|
391
|
+
except asyncio.TimeoutError:
|
|
392
|
+
logger.warning("Audio timeout")
|
|
393
|
+
break
|
|
394
|
+
|
|
395
|
+
output_emitter.flush()
|
|
396
|
+
|
|
397
|
+
except asyncio.TimeoutError as e:
|
|
398
|
+
raise APITimeoutError() from e
|
|
399
|
+
except Exception as e:
|
|
400
|
+
raise APIConnectionError(f"TTS synthesis failed: {str(e)}") from e
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
class SynthesizeStream(tts.SynthesizeStream):
|
|
404
|
+
"""Streaming synthesis implementation"""
|
|
405
|
+
|
|
406
|
+
def __init__(self, *, tts: TTS, conn_options: APIConnectOptions):
|
|
407
|
+
super().__init__(tts=tts, conn_options=conn_options)
|
|
408
|
+
self._tts: TTS = tts
|
|
409
|
+
self._segments_ch = utils.aio.Chan[tokenize.WordStream | tokenize.SentenceStream]()
|
|
410
|
+
|
|
411
|
+
async def _run(self, output_emitter: tts.AudioEmitter) -> None:
|
|
412
|
+
"""Execute streaming synthesis"""
|
|
413
|
+
request_id = utils.shortuuid()
|
|
414
|
+
|
|
415
|
+
output_emitter.initialize(
|
|
416
|
+
request_id=request_id,
|
|
417
|
+
sample_rate=self._tts._opts.sample_rate,
|
|
418
|
+
num_channels=self._tts._opts.num_channels,
|
|
419
|
+
stream=True,
|
|
420
|
+
mime_type=get_content_type_from_output_format(
|
|
421
|
+
self._tts._opts.voice_settings.output_format
|
|
422
|
+
),
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
async def _tokenize_input() -> None:
|
|
426
|
+
"""Tokenize input text"""
|
|
427
|
+
word_stream = None
|
|
428
|
+
async for input in self._input_ch:
|
|
429
|
+
if isinstance(input, str):
|
|
430
|
+
if word_stream is None:
|
|
431
|
+
word_stream = self._tts._opts.word_tokenizer.stream()
|
|
432
|
+
self._segments_ch.send_nowait(word_stream)
|
|
433
|
+
|
|
434
|
+
word_stream.push_text(input)
|
|
435
|
+
elif isinstance(input, self._FlushSentinel):
|
|
436
|
+
if word_stream is not None:
|
|
437
|
+
word_stream.end_input()
|
|
438
|
+
word_stream = None
|
|
439
|
+
|
|
440
|
+
if word_stream is not None:
|
|
441
|
+
word_stream.end_input()
|
|
442
|
+
|
|
443
|
+
self._segments_ch.close()
|
|
444
|
+
|
|
445
|
+
async def _process_segments() -> None:
|
|
446
|
+
"""Process segments"""
|
|
447
|
+
async for word_stream in self._segments_ch:
|
|
448
|
+
await self._run_segment(word_stream, output_emitter)
|
|
449
|
+
|
|
450
|
+
tasks = [
|
|
451
|
+
asyncio.create_task(_tokenize_input()),
|
|
452
|
+
asyncio.create_task(_process_segments()),
|
|
453
|
+
]
|
|
454
|
+
|
|
455
|
+
try:
|
|
456
|
+
await asyncio.gather(*tasks)
|
|
457
|
+
except asyncio.TimeoutError:
|
|
458
|
+
raise APITimeoutError() from None
|
|
459
|
+
except Exception as e:
|
|
460
|
+
raise APIConnectionError() from e
|
|
461
|
+
finally:
|
|
462
|
+
await utils.aio.gracefully_cancel(*tasks)
|
|
463
|
+
|
|
464
|
+
async def _run_segment(
|
|
465
|
+
self,
|
|
466
|
+
word_stream: tokenize.WordStream | tokenize.SentenceStream,
|
|
467
|
+
output_emitter: tts.AudioEmitter,
|
|
468
|
+
) -> None:
|
|
469
|
+
"""Process a single segment"""
|
|
470
|
+
segment_id = utils.shortuuid()
|
|
471
|
+
output_emitter.start_segment(segment_id=segment_id)
|
|
472
|
+
|
|
473
|
+
try:
|
|
474
|
+
# Create client if needed
|
|
475
|
+
if not self._tts._client:
|
|
476
|
+
self._tts._client = WebSocketClient(self._tts._opts)
|
|
477
|
+
|
|
478
|
+
# Collect text
|
|
479
|
+
text_parts = []
|
|
480
|
+
async for data in word_stream:
|
|
481
|
+
text_parts.append(data.token)
|
|
482
|
+
|
|
483
|
+
if not text_parts:
|
|
484
|
+
return
|
|
485
|
+
|
|
486
|
+
# Format text
|
|
487
|
+
if isinstance(self._tts._opts.word_tokenizer, tokenize.WordTokenizer):
|
|
488
|
+
full_text = self._tts._opts.word_tokenizer.format_words(text_parts)
|
|
489
|
+
else:
|
|
490
|
+
full_text = " ".join(text_parts)
|
|
491
|
+
|
|
492
|
+
self._mark_started()
|
|
493
|
+
|
|
494
|
+
# Synthesize
|
|
495
|
+
request_id = str(uuid.uuid4())
|
|
496
|
+
audio_queue = await self._tts._client.synthesize(full_text, request_id)
|
|
497
|
+
|
|
498
|
+
# Stream audio
|
|
499
|
+
while True:
|
|
500
|
+
try:
|
|
501
|
+
audio_data = await asyncio.wait_for(audio_queue.get(), timeout=30.0)
|
|
502
|
+
|
|
503
|
+
if audio_data is None:
|
|
504
|
+
break
|
|
505
|
+
|
|
506
|
+
output_emitter.push(audio_data)
|
|
507
|
+
|
|
508
|
+
except asyncio.TimeoutError:
|
|
509
|
+
break
|
|
510
|
+
|
|
511
|
+
output_emitter.end_input()
|
|
512
|
+
|
|
513
|
+
except Exception as e:
|
|
514
|
+
logger.error(f"Segment synthesis error: {e}")
|
|
515
|
+
raise APIError(f"Segment synthesis failed: {str(e)}") from e
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Copyright 2025 LiveKit, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
__version__ = "1.2.7"
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "livekit-plugins-upliftai"
|
|
7
|
+
dynamic = ["version"]
|
|
8
|
+
description = "Agent Framework plugin for speech synthesis with the Uplift AI."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "Apache-2.0"
|
|
11
|
+
requires-python = ">=3.9.0"
|
|
12
|
+
authors = [{ name = "LiveKit", email = "hello@livekit.io" }]
|
|
13
|
+
keywords = ["webrtc", "realtime", "audio", "video", "livekit", "upliftai", "multilingual", "tts", "urdu"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Intended Audience :: Developers",
|
|
16
|
+
"License :: OSI Approved :: Apache Software License",
|
|
17
|
+
"Topic :: Multimedia :: Sound/Audio",
|
|
18
|
+
"Topic :: Multimedia :: Video",
|
|
19
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.9",
|
|
22
|
+
"Programming Language :: Python :: 3.10",
|
|
23
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
24
|
+
]
|
|
25
|
+
dependencies = ["livekit-agents[codecs]>=1.2.7", "numpy>=1.26"]
|
|
26
|
+
|
|
27
|
+
[project.urls]
|
|
28
|
+
Documentation = "https://docs.livekit.io"
|
|
29
|
+
Website = "https://livekit.io/"
|
|
30
|
+
Source = "https://github.com/livekit/agents"
|
|
31
|
+
|
|
32
|
+
[tool.hatch.version]
|
|
33
|
+
path = "livekit/plugins/upliftai/version.py"
|
|
34
|
+
|
|
35
|
+
[tool.hatch.build.targets.wheel]
|
|
36
|
+
packages = ["livekit"]
|
|
37
|
+
|
|
38
|
+
[tool.hatch.build.targets.sdist]
|
|
39
|
+
include = ["/livekit"]
|