livekit-plugins-resemble 0.1.0__tar.gz → 0.1.0rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of livekit-plugins-resemble might be problematic. Click here for more details.
- livekit_plugins_resemble-0.1.0rc1/.gitignore +168 -0
- {livekit_plugins_resemble-0.1.0 → livekit_plugins_resemble-0.1.0rc1}/PKG-INFO +12 -21
- {livekit_plugins_resemble-0.1.0 → livekit_plugins_resemble-0.1.0rc1}/livekit/plugins/resemble/models.py +0 -5
- livekit_plugins_resemble-0.1.0rc1/livekit/plugins/resemble/tts.py +452 -0
- {livekit_plugins_resemble-0.1.0 → livekit_plugins_resemble-0.1.0rc1}/livekit/plugins/resemble/version.py +1 -1
- livekit_plugins_resemble-0.1.0rc1/pyproject.toml +41 -0
- livekit_plugins_resemble-0.1.0/livekit/plugins/resemble/tts.py +0 -620
- livekit_plugins_resemble-0.1.0/livekit_plugins_resemble.egg-info/PKG-INFO +0 -150
- livekit_plugins_resemble-0.1.0/livekit_plugins_resemble.egg-info/SOURCES.txt +0 -14
- livekit_plugins_resemble-0.1.0/livekit_plugins_resemble.egg-info/dependency_links.txt +0 -1
- livekit_plugins_resemble-0.1.0/livekit_plugins_resemble.egg-info/requires.txt +0 -2
- livekit_plugins_resemble-0.1.0/livekit_plugins_resemble.egg-info/top_level.txt +0 -1
- livekit_plugins_resemble-0.1.0/pyproject.toml +0 -3
- livekit_plugins_resemble-0.1.0/setup.cfg +0 -4
- livekit_plugins_resemble-0.1.0/setup.py +0 -55
- {livekit_plugins_resemble-0.1.0 → livekit_plugins_resemble-0.1.0rc1}/README.md +0 -0
- {livekit_plugins_resemble-0.1.0 → livekit_plugins_resemble-0.1.0rc1}/livekit/plugins/resemble/__init__.py +0 -0
- {livekit_plugins_resemble-0.1.0 → livekit_plugins_resemble-0.1.0rc1}/livekit/plugins/resemble/log.py +0 -0
- {livekit_plugins_resemble-0.1.0 → livekit_plugins_resemble-0.1.0rc1}/livekit/plugins/resemble/py.typed +0 -0
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
**/.vscode
|
|
2
|
+
**/.DS_Store
|
|
3
|
+
|
|
4
|
+
# Byte-compiled / optimized / DLL files
|
|
5
|
+
__pycache__/
|
|
6
|
+
*.py[cod]
|
|
7
|
+
*$py.class
|
|
8
|
+
|
|
9
|
+
# C extensions
|
|
10
|
+
*.so
|
|
11
|
+
|
|
12
|
+
# Distribution / packaging
|
|
13
|
+
.Python
|
|
14
|
+
build/
|
|
15
|
+
develop-eggs/
|
|
16
|
+
dist/
|
|
17
|
+
downloads/
|
|
18
|
+
eggs/
|
|
19
|
+
.eggs/
|
|
20
|
+
lib/
|
|
21
|
+
lib64/
|
|
22
|
+
parts/
|
|
23
|
+
sdist/
|
|
24
|
+
var/
|
|
25
|
+
wheels/
|
|
26
|
+
share/python-wheels/
|
|
27
|
+
*.egg-info/
|
|
28
|
+
.installed.cfg
|
|
29
|
+
*.egg
|
|
30
|
+
MANIFEST
|
|
31
|
+
|
|
32
|
+
# PyInstaller
|
|
33
|
+
# Usually these files are written by a python script from a template
|
|
34
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
35
|
+
*.manifest
|
|
36
|
+
*.spec
|
|
37
|
+
|
|
38
|
+
# Installer logs
|
|
39
|
+
pip-log.txt
|
|
40
|
+
pip-delete-this-directory.txt
|
|
41
|
+
|
|
42
|
+
# Unit test / coverage reports
|
|
43
|
+
htmlcov/
|
|
44
|
+
.tox/
|
|
45
|
+
.nox/
|
|
46
|
+
.coverage
|
|
47
|
+
.coverage.*
|
|
48
|
+
.cache
|
|
49
|
+
nosetests.xml
|
|
50
|
+
coverage.xml
|
|
51
|
+
*.cover
|
|
52
|
+
*.py,cover
|
|
53
|
+
.hypothesis/
|
|
54
|
+
.pytest_cache/
|
|
55
|
+
cover/
|
|
56
|
+
|
|
57
|
+
# Translations
|
|
58
|
+
*.mo
|
|
59
|
+
*.pot
|
|
60
|
+
|
|
61
|
+
# Django stuff:
|
|
62
|
+
*.log
|
|
63
|
+
local_settings.py
|
|
64
|
+
db.sqlite3
|
|
65
|
+
db.sqlite3-journal
|
|
66
|
+
|
|
67
|
+
# Flask stuff:
|
|
68
|
+
instance/
|
|
69
|
+
.webassets-cache
|
|
70
|
+
|
|
71
|
+
# Scrapy stuff:
|
|
72
|
+
.scrapy
|
|
73
|
+
|
|
74
|
+
# Sphinx documentation
|
|
75
|
+
docs/_build/
|
|
76
|
+
|
|
77
|
+
# PyBuilder
|
|
78
|
+
.pybuilder/
|
|
79
|
+
target/
|
|
80
|
+
|
|
81
|
+
# Jupyter Notebook
|
|
82
|
+
.ipynb_checkpoints
|
|
83
|
+
|
|
84
|
+
# IPython
|
|
85
|
+
profile_default/
|
|
86
|
+
ipython_config.py
|
|
87
|
+
|
|
88
|
+
# pyenv
|
|
89
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
90
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
91
|
+
# .python-version
|
|
92
|
+
|
|
93
|
+
# pipenv
|
|
94
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
95
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
96
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
97
|
+
# install all needed dependencies.
|
|
98
|
+
#Pipfile.lock
|
|
99
|
+
|
|
100
|
+
# poetry
|
|
101
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
102
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
103
|
+
# commonly ignored for libraries.
|
|
104
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
105
|
+
#poetry.lock
|
|
106
|
+
|
|
107
|
+
# pdm
|
|
108
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
109
|
+
#pdm.lock
|
|
110
|
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
|
111
|
+
# in version control.
|
|
112
|
+
# https://pdm.fming.dev/#use-with-ide
|
|
113
|
+
.pdm.toml
|
|
114
|
+
|
|
115
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
116
|
+
__pypackages__/
|
|
117
|
+
|
|
118
|
+
# Celery stuff
|
|
119
|
+
celerybeat-schedule
|
|
120
|
+
celerybeat.pid
|
|
121
|
+
|
|
122
|
+
# SageMath parsed files
|
|
123
|
+
*.sage.py
|
|
124
|
+
|
|
125
|
+
# Environments
|
|
126
|
+
.env
|
|
127
|
+
.venv
|
|
128
|
+
env/
|
|
129
|
+
venv/
|
|
130
|
+
ENV/
|
|
131
|
+
env.bak/
|
|
132
|
+
venv.bak/
|
|
133
|
+
|
|
134
|
+
# Spyder project settings
|
|
135
|
+
.spyderproject
|
|
136
|
+
.spyproject
|
|
137
|
+
|
|
138
|
+
# Rope project settings
|
|
139
|
+
.ropeproject
|
|
140
|
+
|
|
141
|
+
# mkdocs documentation
|
|
142
|
+
/site
|
|
143
|
+
|
|
144
|
+
# mypy
|
|
145
|
+
.mypy_cache/
|
|
146
|
+
.dmypy.json
|
|
147
|
+
dmypy.json
|
|
148
|
+
|
|
149
|
+
# Pyre type checker
|
|
150
|
+
.pyre/
|
|
151
|
+
|
|
152
|
+
# pytype static type analyzer
|
|
153
|
+
.pytype/
|
|
154
|
+
|
|
155
|
+
# Cython debug symbols
|
|
156
|
+
cython_debug/
|
|
157
|
+
|
|
158
|
+
# PyCharm
|
|
159
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
160
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
161
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
162
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
163
|
+
.idea/
|
|
164
|
+
|
|
165
|
+
node_modules
|
|
166
|
+
|
|
167
|
+
credentials.json
|
|
168
|
+
pyrightconfig.json
|
|
@@ -1,34 +1,25 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: livekit-plugins-resemble
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.0rc1
|
|
4
4
|
Summary: LiveKit Agents Plugin for Resemble AI
|
|
5
|
-
Home-page: https://github.com/livekit/agents
|
|
6
|
-
License: Apache-2.0
|
|
7
5
|
Project-URL: Documentation, https://docs.livekit.io
|
|
8
6
|
Project-URL: Website, https://livekit.io/
|
|
9
7
|
Project-URL: Source, https://github.com/livekit/agents
|
|
10
|
-
|
|
8
|
+
Author-email: LiveKit <hello@livekit.io>
|
|
9
|
+
License-Expression: Apache-2.0
|
|
10
|
+
Keywords: audio,livekit,realtime,video,webrtc
|
|
11
11
|
Classifier: Intended Audience :: Developers
|
|
12
|
-
Classifier:
|
|
13
|
-
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
12
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
14
13
|
Classifier: Programming Language :: Python :: 3
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
17
14
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Topic :: Multimedia :: Sound/Audio
|
|
18
|
+
Classifier: Topic :: Multimedia :: Video
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
18
20
|
Requires-Python: >=3.9.0
|
|
21
|
+
Requires-Dist: livekit-agents>=1.0.0.rc7
|
|
19
22
|
Description-Content-Type: text/markdown
|
|
20
|
-
Requires-Dist: livekit-agents[codecs]>=0.12.3
|
|
21
|
-
Requires-Dist: websockets==12.0
|
|
22
|
-
Dynamic: classifier
|
|
23
|
-
Dynamic: description
|
|
24
|
-
Dynamic: description-content-type
|
|
25
|
-
Dynamic: home-page
|
|
26
|
-
Dynamic: keywords
|
|
27
|
-
Dynamic: license
|
|
28
|
-
Dynamic: project-url
|
|
29
|
-
Dynamic: requires-dist
|
|
30
|
-
Dynamic: requires-python
|
|
31
|
-
Dynamic: summary
|
|
32
23
|
|
|
33
24
|
# LiveKit Plugins Resemble
|
|
34
25
|
|
|
@@ -147,4 +138,4 @@ This plugin uses two different approaches to generate speech:
|
|
|
147
138
|
1. **One-off Synthesis** - Uses Resemble's REST API for simple text-to-speech conversion
|
|
148
139
|
2. **Streaming Synthesis** - Uses Resemble's WebSocket API for real-time streaming synthesis
|
|
149
140
|
|
|
150
|
-
The WebSocket streaming API is only available for Resemble AI Business plan users.
|
|
141
|
+
The WebSocket streaming API is only available for Resemble AI Business plan users.
|
|
@@ -0,0 +1,452 @@
|
|
|
1
|
+
# Copyright 2025 LiveKit, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import asyncio
|
|
18
|
+
import base64
|
|
19
|
+
import json
|
|
20
|
+
import os
|
|
21
|
+
import weakref
|
|
22
|
+
from dataclasses import dataclass
|
|
23
|
+
|
|
24
|
+
import aiohttp
|
|
25
|
+
|
|
26
|
+
from livekit.agents import (
|
|
27
|
+
APIConnectionError,
|
|
28
|
+
APIConnectOptions,
|
|
29
|
+
APIStatusError,
|
|
30
|
+
APITimeoutError,
|
|
31
|
+
tokenize,
|
|
32
|
+
tts,
|
|
33
|
+
utils,
|
|
34
|
+
)
|
|
35
|
+
from livekit.agents.types import DEFAULT_API_CONNECT_OPTIONS
|
|
36
|
+
|
|
37
|
+
from .log import logger
|
|
38
|
+
|
|
39
|
+
RESEMBLE_WEBSOCKET_URL = "wss://websocket.cluster.resemble.ai/stream"
|
|
40
|
+
RESEMBLE_REST_API_URL = "https://f.cluster.resemble.ai/synthesize"
|
|
41
|
+
NUM_CHANNELS = 1
|
|
42
|
+
DEFAULT_VOICE_UUID = "55592656"
|
|
43
|
+
BUFFERED_WORDS_COUNT = 3
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class _TTSOptions:
|
|
48
|
+
voice_uuid: str
|
|
49
|
+
sample_rate: int
|
|
50
|
+
tokenizer: tokenize.SentenceTokenizer
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class TTS(tts.TTS):
|
|
54
|
+
def __init__(
|
|
55
|
+
self,
|
|
56
|
+
*,
|
|
57
|
+
api_key: str | None = None,
|
|
58
|
+
voice_uuid: str | None = None,
|
|
59
|
+
tokenizer: tokenize.SentenceTokenizer | None = None,
|
|
60
|
+
sample_rate: int = 44100,
|
|
61
|
+
http_session: aiohttp.ClientSession | None = None,
|
|
62
|
+
use_streaming: bool = True,
|
|
63
|
+
) -> None:
|
|
64
|
+
"""
|
|
65
|
+
Create a new instance of the Resemble TTS.
|
|
66
|
+
|
|
67
|
+
See https://docs.app.resemble.ai/docs/text_to_speech/ for more documentation on all of these options.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
voice_uuid (str, optional): The voice UUID for the desired voice. Defaults to None.
|
|
71
|
+
sample_rate (int, optional): The audio sample rate in Hz. Defaults to 44100.
|
|
72
|
+
api_key (str | None, optional): The Resemble API key. If not provided, it will be read from the RESEMBLE_API_KEY environment variable.
|
|
73
|
+
http_session (aiohttp.ClientSession | None, optional): An existing aiohttp ClientSession to use. If not provided, a new session will be created.
|
|
74
|
+
tokenizer (tokenize.SentenceTokenizer, optional): The tokenizer to use. Defaults to tokenize.SentenceTokenizer().
|
|
75
|
+
use_streaming (bool, optional): Whether to use streaming or not. Defaults to True.
|
|
76
|
+
""" # noqa: E501
|
|
77
|
+
super().__init__(
|
|
78
|
+
capabilities=tts.TTSCapabilities(streaming=use_streaming),
|
|
79
|
+
sample_rate=sample_rate,
|
|
80
|
+
num_channels=NUM_CHANNELS,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
api_key = api_key or os.environ.get("RESEMBLE_API_KEY")
|
|
84
|
+
if not api_key:
|
|
85
|
+
raise ValueError(
|
|
86
|
+
"Resemble API key is required, either as argument or set RESEMBLE_API_KEY"
|
|
87
|
+
" environment variable"
|
|
88
|
+
)
|
|
89
|
+
self._api_key = api_key
|
|
90
|
+
|
|
91
|
+
if tokenizer is None:
|
|
92
|
+
tokenizer = tokenize.basic.SentenceTokenizer(min_sentence_len=BUFFERED_WORDS_COUNT)
|
|
93
|
+
|
|
94
|
+
if voice_uuid is None:
|
|
95
|
+
voice_uuid = DEFAULT_VOICE_UUID
|
|
96
|
+
|
|
97
|
+
self._opts = _TTSOptions(
|
|
98
|
+
voice_uuid=voice_uuid,
|
|
99
|
+
sample_rate=sample_rate,
|
|
100
|
+
tokenizer=tokenizer,
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
self._session = http_session
|
|
104
|
+
self._streams = weakref.WeakSet[SynthesizeStream]()
|
|
105
|
+
self._pool = utils.ConnectionPool[aiohttp.ClientWebSocketResponse](
|
|
106
|
+
connect_cb=self._connect_ws,
|
|
107
|
+
close_cb=self._close_ws,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
async def _connect_ws(self) -> aiohttp.ClientWebSocketResponse:
|
|
111
|
+
session = self._ensure_session()
|
|
112
|
+
|
|
113
|
+
return await asyncio.wait_for(
|
|
114
|
+
session.ws_connect(
|
|
115
|
+
RESEMBLE_WEBSOCKET_URL,
|
|
116
|
+
headers={"Authorization": f"Bearer {self._api_key}"},
|
|
117
|
+
),
|
|
118
|
+
self._conn_options.timeout,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
async def _close_ws(self, ws: aiohttp.ClientWebSocketResponse):
|
|
122
|
+
await ws.close()
|
|
123
|
+
|
|
124
|
+
def _ensure_session(self) -> aiohttp.ClientSession:
|
|
125
|
+
if not self._session:
|
|
126
|
+
self._session = utils.http_context.http_session()
|
|
127
|
+
|
|
128
|
+
return self._session
|
|
129
|
+
|
|
130
|
+
def prewarm(self) -> None:
|
|
131
|
+
self._pool.prewarm()
|
|
132
|
+
|
|
133
|
+
def update_options(
|
|
134
|
+
self,
|
|
135
|
+
*,
|
|
136
|
+
voice_uuid: str | None = None,
|
|
137
|
+
sample_rate: int | None = None,
|
|
138
|
+
) -> None:
|
|
139
|
+
"""
|
|
140
|
+
Update the Text-to-Speech (TTS) configuration options.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
voice_uuid (str, optional): The voice UUID for the desired voice.
|
|
144
|
+
sample_rate (int, optional): The audio sample rate in Hz.
|
|
145
|
+
""" # noqa: E501
|
|
146
|
+
self._opts.voice_uuid = voice_uuid or self._opts.voice_uuid
|
|
147
|
+
self._opts.sample_rate = sample_rate or self._opts.sample_rate
|
|
148
|
+
|
|
149
|
+
def synthesize(
|
|
150
|
+
self,
|
|
151
|
+
text: str,
|
|
152
|
+
*,
|
|
153
|
+
conn_options: APIConnectOptions | None = None,
|
|
154
|
+
) -> ChunkedStream:
|
|
155
|
+
return ChunkedStream(
|
|
156
|
+
tts=self,
|
|
157
|
+
input_text=text,
|
|
158
|
+
conn_options=conn_options or DEFAULT_API_CONNECT_OPTIONS,
|
|
159
|
+
opts=self._opts,
|
|
160
|
+
api_key=self._api_key,
|
|
161
|
+
session=self._ensure_session(),
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
def stream(self, *, conn_options: APIConnectOptions | None = None) -> SynthesizeStream:
|
|
165
|
+
stream = SynthesizeStream(
|
|
166
|
+
tts=self,
|
|
167
|
+
pool=self._pool,
|
|
168
|
+
opts=self._opts,
|
|
169
|
+
api_key=self._api_key,
|
|
170
|
+
)
|
|
171
|
+
self._streams.add(stream)
|
|
172
|
+
return stream
|
|
173
|
+
|
|
174
|
+
async def aclose(self) -> None:
|
|
175
|
+
for stream in list(self._streams):
|
|
176
|
+
await stream.aclose()
|
|
177
|
+
self._streams.clear()
|
|
178
|
+
await self._pool.aclose()
|
|
179
|
+
await super().aclose()
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
class ChunkedStream(tts.ChunkedStream):
|
|
183
|
+
"""Synthesize text into speech in one go using Resemble AI's REST API."""
|
|
184
|
+
|
|
185
|
+
def __init__(
|
|
186
|
+
self,
|
|
187
|
+
*,
|
|
188
|
+
tts: TTS,
|
|
189
|
+
input_text: str,
|
|
190
|
+
opts: _TTSOptions,
|
|
191
|
+
conn_options: APIConnectOptions,
|
|
192
|
+
api_key: str,
|
|
193
|
+
session: aiohttp.ClientSession,
|
|
194
|
+
) -> None:
|
|
195
|
+
super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
|
|
196
|
+
self._opts, self._session, self._api_key = opts, session, api_key
|
|
197
|
+
|
|
198
|
+
async def _run(self) -> None:
|
|
199
|
+
request_id = utils.shortuuid()
|
|
200
|
+
|
|
201
|
+
# Create request headers
|
|
202
|
+
headers = {
|
|
203
|
+
"Authorization": f"Bearer {self._api_key}",
|
|
204
|
+
"Content-Type": "application/json",
|
|
205
|
+
"Accept": "application/json", # Expect JSON response
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
# Create request payload
|
|
209
|
+
payload = {
|
|
210
|
+
"voice_uuid": self._opts.voice_uuid,
|
|
211
|
+
"data": self._input_text,
|
|
212
|
+
"sample_rate": self._opts.sample_rate,
|
|
213
|
+
"precision": "PCM_16",
|
|
214
|
+
}
|
|
215
|
+
decoder = utils.codecs.AudioStreamDecoder(
|
|
216
|
+
sample_rate=self._opts.sample_rate,
|
|
217
|
+
num_channels=NUM_CHANNELS,
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
try:
|
|
221
|
+
async with self._session.post(
|
|
222
|
+
RESEMBLE_REST_API_URL,
|
|
223
|
+
headers=headers,
|
|
224
|
+
json=payload,
|
|
225
|
+
timeout=aiohttp.ClientTimeout(
|
|
226
|
+
total=30,
|
|
227
|
+
sock_connect=self._conn_options.timeout,
|
|
228
|
+
),
|
|
229
|
+
) as response:
|
|
230
|
+
response.raise_for_status()
|
|
231
|
+
response_json = await response.json()
|
|
232
|
+
|
|
233
|
+
# Check for success
|
|
234
|
+
if not response_json.get("success", False):
|
|
235
|
+
issues = response_json.get("issues", ["Unknown error"])
|
|
236
|
+
error_msg = "; ".join(issues)
|
|
237
|
+
raise APIStatusError(
|
|
238
|
+
message=f"Resemble API returned failure: {error_msg}",
|
|
239
|
+
status_code=response.status,
|
|
240
|
+
request_id=request_id,
|
|
241
|
+
body=json.dumps(response_json),
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
# Extract base64-encoded audio content
|
|
245
|
+
audio_content_b64 = response_json.get("audio_content")
|
|
246
|
+
if not audio_content_b64:
|
|
247
|
+
raise APIStatusError(
|
|
248
|
+
message="No audio content in response",
|
|
249
|
+
status_code=response.status,
|
|
250
|
+
request_id=request_id,
|
|
251
|
+
body=json.dumps(response_json),
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
# Decode base64 to get raw audio bytes
|
|
255
|
+
audio_bytes = base64.b64decode(audio_content_b64)
|
|
256
|
+
decoder.push(audio_bytes)
|
|
257
|
+
decoder.end_input()
|
|
258
|
+
|
|
259
|
+
emitter = tts.SynthesizedAudioEmitter(
|
|
260
|
+
event_ch=self._event_ch,
|
|
261
|
+
request_id=request_id,
|
|
262
|
+
)
|
|
263
|
+
async for frame in decoder:
|
|
264
|
+
emitter.push(frame)
|
|
265
|
+
emitter.flush()
|
|
266
|
+
|
|
267
|
+
except aiohttp.ClientResponseError as e:
|
|
268
|
+
raise APIStatusError(
|
|
269
|
+
message=e.message,
|
|
270
|
+
status_code=e.status,
|
|
271
|
+
request_id=request_id,
|
|
272
|
+
body=f"resemble api error: {str(e)}",
|
|
273
|
+
) from e
|
|
274
|
+
except asyncio.TimeoutError as e:
|
|
275
|
+
raise APITimeoutError() from e
|
|
276
|
+
except aiohttp.ClientError as e:
|
|
277
|
+
raise APIConnectionError(
|
|
278
|
+
message=f"Resemble API connection error: {str(e)}",
|
|
279
|
+
) from e
|
|
280
|
+
except Exception as e:
|
|
281
|
+
raise APIConnectionError(f"Error during synthesis: {str(e)}") from e
|
|
282
|
+
finally:
|
|
283
|
+
await decoder.aclose()
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
class SynthesizeStream(tts.SynthesizeStream):
|
|
287
|
+
"""Stream-based text-to-speech synthesis using Resemble AI WebSocket API.
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
This implementation connects to Resemble's WebSocket API for real-time streaming
|
|
291
|
+
synthesis. Note that this requires a Business plan subscription with Resemble AI.
|
|
292
|
+
"""
|
|
293
|
+
|
|
294
|
+
def __init__(
|
|
295
|
+
self,
|
|
296
|
+
*,
|
|
297
|
+
tts: TTS,
|
|
298
|
+
opts: _TTSOptions,
|
|
299
|
+
pool: utils.ConnectionPool[aiohttp.ClientWebSocketResponse],
|
|
300
|
+
api_key: str,
|
|
301
|
+
):
|
|
302
|
+
super().__init__(tts=tts)
|
|
303
|
+
self._opts, self._pool, self._api_key = opts, pool, api_key
|
|
304
|
+
|
|
305
|
+
async def _run(self) -> None:
|
|
306
|
+
request_id = utils.shortuuid()
|
|
307
|
+
self._segments_ch = utils.aio.Chan[tokenize.SentenceStream]()
|
|
308
|
+
|
|
309
|
+
@utils.log_exceptions(logger=logger)
|
|
310
|
+
async def _tokenize_input():
|
|
311
|
+
"""tokenize text from the input_ch to words"""
|
|
312
|
+
input_stream = None
|
|
313
|
+
async for input in self._input_ch:
|
|
314
|
+
if isinstance(input, str):
|
|
315
|
+
if input_stream is None:
|
|
316
|
+
# new segment (after flush for e.g)
|
|
317
|
+
input_stream = self._opts.tokenizer.stream()
|
|
318
|
+
self._segments_ch.send_nowait(input_stream)
|
|
319
|
+
input_stream.push_text(input)
|
|
320
|
+
elif isinstance(input, self._FlushSentinel):
|
|
321
|
+
if input_stream is not None:
|
|
322
|
+
input_stream.end_input()
|
|
323
|
+
input_stream = None
|
|
324
|
+
if input_stream is not None:
|
|
325
|
+
input_stream.end_input()
|
|
326
|
+
self._segments_ch.close()
|
|
327
|
+
|
|
328
|
+
@utils.log_exceptions(logger=logger)
|
|
329
|
+
async def _process_segments():
|
|
330
|
+
async for input_stream in self._segments_ch:
|
|
331
|
+
await self._run_ws(input_stream)
|
|
332
|
+
|
|
333
|
+
tasks = [
|
|
334
|
+
asyncio.create_task(_tokenize_input()),
|
|
335
|
+
asyncio.create_task(_process_segments()),
|
|
336
|
+
]
|
|
337
|
+
try:
|
|
338
|
+
await asyncio.gather(*tasks)
|
|
339
|
+
except asyncio.TimeoutError as e:
|
|
340
|
+
raise APITimeoutError() from e
|
|
341
|
+
except aiohttp.ClientResponseError as e:
|
|
342
|
+
raise APIStatusError(
|
|
343
|
+
message=e.message,
|
|
344
|
+
status_code=e.status,
|
|
345
|
+
request_id=request_id,
|
|
346
|
+
body=None,
|
|
347
|
+
) from e
|
|
348
|
+
except Exception as e:
|
|
349
|
+
raise APIConnectionError() from e
|
|
350
|
+
finally:
|
|
351
|
+
await utils.aio.gracefully_cancel(*tasks)
|
|
352
|
+
|
|
353
|
+
async def _run_ws(
|
|
354
|
+
self,
|
|
355
|
+
input_stream: tokenize.SentenceStream,
|
|
356
|
+
) -> None:
|
|
357
|
+
async with self._pool.connection() as ws:
|
|
358
|
+
segment_id = utils.shortuuid()
|
|
359
|
+
decoder = utils.codecs.AudioStreamDecoder(
|
|
360
|
+
sample_rate=self._opts.sample_rate,
|
|
361
|
+
num_channels=NUM_CHANNELS,
|
|
362
|
+
)
|
|
363
|
+
index_lock = asyncio.Lock()
|
|
364
|
+
current_index = 0
|
|
365
|
+
pending_requests = set()
|
|
366
|
+
|
|
367
|
+
@utils.log_exceptions(logger=logger)
|
|
368
|
+
async def _send_task(ws: aiohttp.ClientWebSocketResponse):
|
|
369
|
+
nonlocal current_index
|
|
370
|
+
index = 0
|
|
371
|
+
async for data in input_stream:
|
|
372
|
+
payload = {
|
|
373
|
+
"voice_uuid": self._opts.voice_uuid,
|
|
374
|
+
"data": data.token,
|
|
375
|
+
"request_id": index,
|
|
376
|
+
"sample_rate": self._opts.sample_rate,
|
|
377
|
+
"precision": "PCM_16",
|
|
378
|
+
"output_format": "mp3",
|
|
379
|
+
}
|
|
380
|
+
async with index_lock:
|
|
381
|
+
pending_requests.add(index)
|
|
382
|
+
index += 1
|
|
383
|
+
current_index = index
|
|
384
|
+
await ws.send_str(json.dumps(payload))
|
|
385
|
+
|
|
386
|
+
@utils.log_exceptions(logger=logger)
|
|
387
|
+
async def _emit_task():
|
|
388
|
+
emitter = tts.SynthesizedAudioEmitter(
|
|
389
|
+
event_ch=self._event_ch,
|
|
390
|
+
request_id=str(current_index),
|
|
391
|
+
segment_id=segment_id,
|
|
392
|
+
)
|
|
393
|
+
async for frame in decoder:
|
|
394
|
+
emitter.push(frame)
|
|
395
|
+
emitter.flush()
|
|
396
|
+
|
|
397
|
+
@utils.log_exceptions(logger=logger)
|
|
398
|
+
async def _recv_task(ws: aiohttp.ClientWebSocketResponse):
|
|
399
|
+
while True:
|
|
400
|
+
msg = await ws.receive()
|
|
401
|
+
if msg.type in (
|
|
402
|
+
aiohttp.WSMsgType.CLOSED,
|
|
403
|
+
aiohttp.WSMsgType.CLOSE,
|
|
404
|
+
aiohttp.WSMsgType.CLOSING,
|
|
405
|
+
):
|
|
406
|
+
raise APIStatusError(
|
|
407
|
+
"Resemble connection closed unexpectedly",
|
|
408
|
+
request_id=str(current_index),
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
if msg.type != aiohttp.WSMsgType.TEXT:
|
|
412
|
+
logger.warning("Unexpected Resemble message type %s", msg.type)
|
|
413
|
+
continue
|
|
414
|
+
|
|
415
|
+
data = json.loads(msg.data)
|
|
416
|
+
|
|
417
|
+
if data.get("type") == "audio":
|
|
418
|
+
if data.get("audio_content", None):
|
|
419
|
+
b64data = base64.b64decode(data["audio_content"])
|
|
420
|
+
decoder.push(b64data)
|
|
421
|
+
|
|
422
|
+
elif data.get("type") == "audio_end":
|
|
423
|
+
async with index_lock:
|
|
424
|
+
index = data["request_id"]
|
|
425
|
+
pending_requests.remove(index)
|
|
426
|
+
if not pending_requests:
|
|
427
|
+
decoder.end_input()
|
|
428
|
+
break # we are not going to receive any more audio
|
|
429
|
+
else:
|
|
430
|
+
logger.error("Unexpected Resemble message %s", data)
|
|
431
|
+
|
|
432
|
+
tasks = [
|
|
433
|
+
asyncio.create_task(_send_task(ws)),
|
|
434
|
+
asyncio.create_task(_recv_task(ws)),
|
|
435
|
+
asyncio.create_task(_emit_task()),
|
|
436
|
+
]
|
|
437
|
+
|
|
438
|
+
try:
|
|
439
|
+
await asyncio.gather(*tasks)
|
|
440
|
+
except asyncio.TimeoutError as e:
|
|
441
|
+
raise APITimeoutError() from e
|
|
442
|
+
except aiohttp.ClientResponseError as e:
|
|
443
|
+
raise APIStatusError(
|
|
444
|
+
message=e.message,
|
|
445
|
+
status_code=e.status,
|
|
446
|
+
request_id=str(current_index),
|
|
447
|
+
body=None,
|
|
448
|
+
) from e
|
|
449
|
+
except Exception as e:
|
|
450
|
+
raise APIConnectionError() from e
|
|
451
|
+
finally:
|
|
452
|
+
await utils.aio.gracefully_cancel(*tasks)
|