livekit-plugins-neuphonic 0.1.0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit/plugins/neuphonic/tts.py +50 -77
- livekit/plugins/neuphonic/version.py +1 -1
- {livekit_plugins_neuphonic-0.1.0.dist-info → livekit_plugins_neuphonic-1.0.0.dist-info}/METADATA +11 -23
- livekit_plugins_neuphonic-1.0.0.dist-info/RECORD +9 -0
- {livekit_plugins_neuphonic-0.1.0.dist-info → livekit_plugins_neuphonic-1.0.0.dist-info}/WHEEL +1 -2
- livekit_plugins_neuphonic-0.1.0.dist-info/RECORD +0 -10
- livekit_plugins_neuphonic-0.1.0.dist-info/top_level.txt +0 -1
livekit/plugins/neuphonic/tts.py
CHANGED
@@ -20,9 +20,9 @@ import json
|
|
20
20
|
import os
|
21
21
|
import weakref
|
22
22
|
from dataclasses import dataclass
|
23
|
-
from typing import Optional
|
24
23
|
|
25
24
|
import aiohttp
|
25
|
+
|
26
26
|
from livekit.agents import (
|
27
27
|
APIConnectionError,
|
28
28
|
APIConnectOptions,
|
@@ -31,6 +31,8 @@ from livekit.agents import (
|
|
31
31
|
tts,
|
32
32
|
utils,
|
33
33
|
)
|
34
|
+
from livekit.agents.types import DEFAULT_API_CONNECT_OPTIONS, NOT_GIVEN, NotGivenOr
|
35
|
+
from livekit.agents.utils import is_given
|
34
36
|
|
35
37
|
from .log import logger
|
36
38
|
from .models import TTSEncodings, TTSLangCodes, TTSModels
|
@@ -49,26 +51,20 @@ class _TTSOptions:
|
|
49
51
|
encoding: TTSEncodings | str
|
50
52
|
sampling_rate: int
|
51
53
|
speed: float
|
52
|
-
voice_id: str
|
54
|
+
voice_id: NotGivenOr[str] = NOT_GIVEN
|
53
55
|
|
54
56
|
@property
|
55
57
|
def model_params(self) -> dict:
|
56
|
-
"""Returns a
|
57
|
-
params =
|
58
|
-
"voice_id",
|
59
|
-
"model",
|
60
|
-
"lang_code",
|
61
|
-
"encoding",
|
62
|
-
"sampling_rate",
|
63
|
-
"speed",
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
for param in params:
|
68
|
-
if hasattr(self, param) and getattr(self, param) is not None:
|
69
|
-
values[param] = getattr(self, param)
|
70
|
-
|
71
|
-
return values
|
58
|
+
"""Returns a dictionary of model parameters for API requests."""
|
59
|
+
params = {
|
60
|
+
"voice_id": self.voice_id,
|
61
|
+
"model": self.model,
|
62
|
+
"lang_code": self.lang_code,
|
63
|
+
"encoding": self.encoding,
|
64
|
+
"sampling_rate": self.sampling_rate,
|
65
|
+
"speed": self.speed,
|
66
|
+
}
|
67
|
+
return {k: v for k, v in params.items() if is_given(v) and v is not None}
|
72
68
|
|
73
69
|
def get_query_param_string(self):
|
74
70
|
"""Forms the query parameter string from all model parameters."""
|
@@ -97,9 +93,7 @@ def _parse_sse_message(message: str) -> dict:
|
|
97
93
|
message = json.loads(value)
|
98
94
|
|
99
95
|
if message.get("errors") is not None:
|
100
|
-
raise Exception(
|
101
|
-
f"Status {message.status_code} error received: {message.errors}."
|
102
|
-
)
|
96
|
+
raise Exception(f"Status {message.status_code} error received: {message.errors}.")
|
103
97
|
|
104
98
|
return message
|
105
99
|
|
@@ -109,12 +103,12 @@ class TTS(tts.TTS):
|
|
109
103
|
self,
|
110
104
|
*,
|
111
105
|
model: TTSModels | str = "neu_hq",
|
112
|
-
voice_id: str
|
106
|
+
voice_id: NotGivenOr[str] = NOT_GIVEN,
|
113
107
|
lang_code: TTSLangCodes | str = "en",
|
114
108
|
encoding: TTSEncodings | str = "pcm_linear",
|
115
109
|
speed: float = 1.0,
|
116
110
|
sample_rate: int = 22050,
|
117
|
-
api_key: str
|
111
|
+
api_key: NotGivenOr[str] = NOT_GIVEN,
|
118
112
|
http_session: aiohttp.ClientSession | None = None,
|
119
113
|
base_url: str = API_BASE_URL,
|
120
114
|
) -> None:
|
@@ -133,17 +127,16 @@ class TTS(tts.TTS):
|
|
133
127
|
api_key (str | None, optional): The Neuphonic API key. If not provided, it will be read from the NEUPHONIC_API_TOKEN environment variable.
|
134
128
|
http_session (aiohttp.ClientSession | None, optional): An existing aiohttp ClientSession to use. If not provided, a new session will be created.
|
135
129
|
base_url (str, optional): The base URL for the Neuphonic API. Defaults to "api.neuphonic.com".
|
136
|
-
"""
|
130
|
+
""" # noqa: E501
|
137
131
|
super().__init__(
|
138
132
|
capabilities=tts.TTSCapabilities(streaming=True),
|
139
133
|
sample_rate=sample_rate,
|
140
134
|
num_channels=NUM_CHANNELS,
|
141
135
|
)
|
142
136
|
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
raise ValueError("NEUPHONIC_API_TOKEN must be set")
|
137
|
+
neuphonic_api_key = api_key if is_given(api_key) else os.environ.get("NEUPHONIC_API_TOKEN")
|
138
|
+
if not neuphonic_api_key:
|
139
|
+
raise ValueError("API key must be provided or set in NEUPHONIC_API_TOKEN")
|
147
140
|
|
148
141
|
self._opts = _TTSOptions(
|
149
142
|
model=model,
|
@@ -152,7 +145,7 @@ class TTS(tts.TTS):
|
|
152
145
|
encoding=encoding,
|
153
146
|
speed=speed,
|
154
147
|
sampling_rate=sample_rate,
|
155
|
-
api_key=
|
148
|
+
api_key=neuphonic_api_key,
|
156
149
|
base_url=base_url,
|
157
150
|
)
|
158
151
|
|
@@ -189,12 +182,12 @@ class TTS(tts.TTS):
|
|
189
182
|
def update_options(
|
190
183
|
self,
|
191
184
|
*,
|
192
|
-
model: TTSModels
|
193
|
-
voice_id: str
|
194
|
-
lang_code: TTSLangCodes
|
195
|
-
encoding: TTSEncodings
|
196
|
-
speed: float
|
197
|
-
sample_rate: int
|
185
|
+
model: NotGivenOr[TTSModels] = NOT_GIVEN,
|
186
|
+
voice_id: NotGivenOr[str] = NOT_GIVEN,
|
187
|
+
lang_code: NotGivenOr[TTSLangCodes] = NOT_GIVEN,
|
188
|
+
encoding: NotGivenOr[TTSEncodings] = NOT_GIVEN,
|
189
|
+
speed: NotGivenOr[float] = NOT_GIVEN,
|
190
|
+
sample_rate: NotGivenOr[int] = NOT_GIVEN,
|
198
191
|
) -> None:
|
199
192
|
"""
|
200
193
|
Update the Text-to-Speech (TTS) configuration options.
|
@@ -210,20 +203,26 @@ class TTS(tts.TTS):
|
|
210
203
|
encoding (TTSEncodings | str, optional): The audio encoding format.
|
211
204
|
speed (float, optional): The audio playback speed.
|
212
205
|
sample_rate (int, optional): The audio sample rate in Hz.
|
213
|
-
"""
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
206
|
+
""" # noqa: E501
|
207
|
+
if is_given(model):
|
208
|
+
self._opts.model = model
|
209
|
+
if is_given(voice_id):
|
210
|
+
self._opts.voice_id = voice_id
|
211
|
+
if is_given(lang_code):
|
212
|
+
self._opts.lang_code = lang_code
|
213
|
+
if is_given(encoding):
|
214
|
+
self._opts.encoding = encoding
|
215
|
+
if is_given(speed):
|
216
|
+
self._opts.speed = speed
|
217
|
+
if is_given(sample_rate):
|
218
|
+
self._opts.sampling_rate = sample_rate
|
220
219
|
self._pool.invalidate()
|
221
220
|
|
222
221
|
def synthesize(
|
223
222
|
self,
|
224
223
|
text: str,
|
225
224
|
*,
|
226
|
-
conn_options:
|
225
|
+
conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
|
227
226
|
) -> ChunkedStream:
|
228
227
|
return ChunkedStream(
|
229
228
|
tts=self,
|
@@ -234,7 +233,7 @@ class TTS(tts.TTS):
|
|
234
233
|
)
|
235
234
|
|
236
235
|
def stream(
|
237
|
-
self, *, conn_options:
|
236
|
+
self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
|
238
237
|
) -> SynthesizeStream:
|
239
238
|
stream = SynthesizeStream(
|
240
239
|
tts=self,
|
@@ -262,7 +261,7 @@ class ChunkedStream(tts.ChunkedStream):
|
|
262
261
|
input_text: str,
|
263
262
|
opts: _TTSOptions,
|
264
263
|
session: aiohttp.ClientSession,
|
265
|
-
conn_options:
|
264
|
+
conn_options: APIConnectOptions,
|
266
265
|
) -> None:
|
267
266
|
super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
|
268
267
|
self._opts, self._session = opts, session
|
@@ -310,9 +309,7 @@ class ChunkedStream(tts.ChunkedStream):
|
|
310
309
|
parsed_message is not None
|
311
310
|
and parsed_message.get("data", {}).get("audio") is not None
|
312
311
|
):
|
313
|
-
audio_bytes = base64.b64decode(
|
314
|
-
parsed_message["data"]["audio"]
|
315
|
-
)
|
312
|
+
audio_bytes = base64.b64decode(parsed_message["data"]["audio"])
|
316
313
|
|
317
314
|
for frame in bstream.write(audio_bytes):
|
318
315
|
emitter.push(frame)
|
@@ -346,26 +343,6 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
346
343
|
|
347
344
|
async def _run(self) -> None:
|
348
345
|
request_id = utils.shortuuid()
|
349
|
-
request_data = {request_id: {"sent": "", "recv": ""}}
|
350
|
-
|
351
|
-
def _is_all_audio_recv():
|
352
|
-
"""Check whether all audio has been recieved."""
|
353
|
-
recv_text = (
|
354
|
-
request_data[request_id]["recv"]
|
355
|
-
.lower()
|
356
|
-
.replace(" ", "")
|
357
|
-
.replace("\n", "")
|
358
|
-
.replace("<stop>", "")
|
359
|
-
)
|
360
|
-
sent_text = (
|
361
|
-
request_data[request_id]["sent"]
|
362
|
-
.lower()
|
363
|
-
.replace(" ", "")
|
364
|
-
.replace("\n", "")
|
365
|
-
.replace("<stop>", "")
|
366
|
-
)
|
367
|
-
|
368
|
-
return sent_text == recv_text
|
369
346
|
|
370
347
|
async def _send_task(ws: aiohttp.ClientWebSocketResponse):
|
371
348
|
"""Stream text to the websocket."""
|
@@ -376,7 +353,6 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
376
353
|
await ws.send_str(json.dumps({"text": "<STOP>"}))
|
377
354
|
continue
|
378
355
|
|
379
|
-
request_data[request_id]["sent"] += data
|
380
356
|
await ws.send_str(json.dumps({"text": data}))
|
381
357
|
|
382
358
|
async def _recv_task(ws: aiohttp.ClientWebSocketResponse):
|
@@ -409,20 +385,17 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
409
385
|
|
410
386
|
if data.get("data"):
|
411
387
|
b64data = base64.b64decode(data["data"]["audio"])
|
412
|
-
recv_text = data["data"]["text"]
|
413
388
|
for frame in audio_bstream.write(b64data):
|
414
389
|
emitter.push(frame)
|
415
390
|
|
416
|
-
|
391
|
+
if data["data"].get("stop"): # A bool flag, is True when audio reaches "<STOP>"
|
392
|
+
for frame in audio_bstream.flush():
|
393
|
+
emitter.push(frame)
|
394
|
+
emitter.flush()
|
395
|
+
break # we are not going to receive any more audio
|
417
396
|
else:
|
418
397
|
logger.error("Unexpected Neuphonic message %s", data)
|
419
398
|
|
420
|
-
if _is_all_audio_recv():
|
421
|
-
for frame in audio_bstream.flush():
|
422
|
-
emitter.push(frame)
|
423
|
-
emitter.flush()
|
424
|
-
break # we are not going to receive any more audio
|
425
|
-
|
426
399
|
async with self._pool.connection() as ws:
|
427
400
|
tasks = [
|
428
401
|
asyncio.create_task(_send_task(ws)),
|
{livekit_plugins_neuphonic-0.1.0.dist-info → livekit_plugins_neuphonic-1.0.0.dist-info}/METADATA
RENAMED
@@ -1,35 +1,23 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: livekit-plugins-neuphonic
|
3
|
-
Version:
|
4
|
-
Summary:
|
5
|
-
Home-page: https://github.com/livekit/agents
|
6
|
-
License: Apache-2.0
|
3
|
+
Version: 1.0.0
|
4
|
+
Summary: Neuphonic inference plugin for LiveKit Agents
|
7
5
|
Project-URL: Documentation, https://docs.livekit.io
|
8
6
|
Project-URL: Website, https://livekit.io/
|
9
7
|
Project-URL: Source, https://github.com/livekit/agents
|
10
|
-
|
8
|
+
Author-email: LiveKit <hello@livekit.io>
|
9
|
+
License-Expression: Apache-2.0
|
10
|
+
Keywords: audio,livekit,neuphonic,realtime,webrtc
|
11
11
|
Classifier: Intended Audience :: Developers
|
12
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
13
|
-
Classifier: Topic :: Multimedia :: Sound/Audio
|
14
|
-
Classifier: Topic :: Multimedia :: Video
|
15
|
-
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
16
12
|
Classifier: Programming Language :: Python :: 3
|
17
|
-
Classifier: Programming Language :: Python :: 3.9
|
18
|
-
Classifier: Programming Language :: Python :: 3.10
|
19
13
|
Classifier: Programming Language :: Python :: 3 :: Only
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
16
|
+
Classifier: Topic :: Multimedia :: Sound/Audio
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
20
18
|
Requires-Python: >=3.9.0
|
19
|
+
Requires-Dist: livekit-agents>=1.0.0
|
21
20
|
Description-Content-Type: text/markdown
|
22
|
-
Requires-Dist: livekit-agents<1.0.0,>=0.12.16
|
23
|
-
Dynamic: classifier
|
24
|
-
Dynamic: description
|
25
|
-
Dynamic: description-content-type
|
26
|
-
Dynamic: home-page
|
27
|
-
Dynamic: keywords
|
28
|
-
Dynamic: license
|
29
|
-
Dynamic: project-url
|
30
|
-
Dynamic: requires-dist
|
31
|
-
Dynamic: requires-python
|
32
|
-
Dynamic: summary
|
33
21
|
|
34
22
|
# LiveKit Plugins Neuphonic
|
35
23
|
|
@@ -0,0 +1,9 @@
|
|
1
|
+
livekit/plugins/neuphonic/__init__.py,sha256=mJnPVLsKAdUkdWuHWd16A0n2vsVBi3GjgNmB8gv9jjI,1097
|
2
|
+
livekit/plugins/neuphonic/log.py,sha256=rAHz71IcbvPkixndXBVffPQsmWUKTLqRaYRuPIxO29w,72
|
3
|
+
livekit/plugins/neuphonic/models.py,sha256=Svfn_sWA3Q2ZXsPBXY-K5hslq5FE62hvyXBES2C3aSc,201
|
4
|
+
livekit/plugins/neuphonic/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
livekit/plugins/neuphonic/tts.py,sha256=vRFkQl9sxI4lMeEWSvyQ8YWJcxX2N7eCG7e5fzTTgbM,14240
|
6
|
+
livekit/plugins/neuphonic/version.py,sha256=nW89L_U9N4ukT3wAO3BeTqOaa87zLUOsEFz8TkiKIP8,600
|
7
|
+
livekit_plugins_neuphonic-1.0.0.dist-info/METADATA,sha256=dGjs8X3smzMOK8XCp-lxBLg2aHQ5jIYg3-3I3F_9880,1174
|
8
|
+
livekit_plugins_neuphonic-1.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
9
|
+
livekit_plugins_neuphonic-1.0.0.dist-info/RECORD,,
|
@@ -1,10 +0,0 @@
|
|
1
|
-
livekit/plugins/neuphonic/__init__.py,sha256=mJnPVLsKAdUkdWuHWd16A0n2vsVBi3GjgNmB8gv9jjI,1097
|
2
|
-
livekit/plugins/neuphonic/log.py,sha256=rAHz71IcbvPkixndXBVffPQsmWUKTLqRaYRuPIxO29w,72
|
3
|
-
livekit/plugins/neuphonic/models.py,sha256=Svfn_sWA3Q2ZXsPBXY-K5hslq5FE62hvyXBES2C3aSc,201
|
4
|
-
livekit/plugins/neuphonic/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
-
livekit/plugins/neuphonic/tts.py,sha256=FFLDghnve_Mrx-0qfEYb4o_Bs8VNnZqAsQNI-M6Zxkw,14736
|
6
|
-
livekit/plugins/neuphonic/version.py,sha256=vQH9cItKAVYAmrLbOntkbLqmxrUZrPiKb1TjkZ8jRKQ,600
|
7
|
-
livekit_plugins_neuphonic-0.1.0.dist-info/METADATA,sha256=Q7Skn-28cnC318qr28oepZyaWnQ9etIO-H2c7D-M9jo,1480
|
8
|
-
livekit_plugins_neuphonic-0.1.0.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
|
9
|
-
livekit_plugins_neuphonic-0.1.0.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
|
10
|
-
livekit_plugins_neuphonic-0.1.0.dist-info/RECORD,,
|
@@ -1 +0,0 @@
|
|
1
|
-
livekit
|