livekit-plugins-neuphonic 0.1.1__py3-none-any.whl → 1.0.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit/plugins/neuphonic/tts.py +51 -80
- livekit/plugins/neuphonic/version.py +1 -1
- {livekit_plugins_neuphonic-0.1.1.dist-info → livekit_plugins_neuphonic-1.0.0rc2.dist-info}/METADATA +12 -24
- livekit_plugins_neuphonic-1.0.0rc2.dist-info/RECORD +9 -0
- {livekit_plugins_neuphonic-0.1.1.dist-info → livekit_plugins_neuphonic-1.0.0rc2.dist-info}/WHEEL +1 -2
- livekit_plugins_neuphonic-0.1.1.dist-info/RECORD +0 -10
- livekit_plugins_neuphonic-0.1.1.dist-info/top_level.txt +0 -1
livekit/plugins/neuphonic/tts.py
CHANGED
@@ -20,9 +20,9 @@ import json
|
|
20
20
|
import os
|
21
21
|
import weakref
|
22
22
|
from dataclasses import dataclass
|
23
|
-
from typing import Optional
|
24
23
|
|
25
24
|
import aiohttp
|
25
|
+
|
26
26
|
from livekit.agents import (
|
27
27
|
APIConnectionError,
|
28
28
|
APIConnectOptions,
|
@@ -31,6 +31,8 @@ from livekit.agents import (
|
|
31
31
|
tts,
|
32
32
|
utils,
|
33
33
|
)
|
34
|
+
from livekit.agents.types import DEFAULT_API_CONNECT_OPTIONS, NOT_GIVEN, NotGivenOr
|
35
|
+
from livekit.agents.utils import is_given
|
34
36
|
|
35
37
|
from .log import logger
|
36
38
|
from .models import TTSEncodings, TTSLangCodes, TTSModels
|
@@ -49,26 +51,20 @@ class _TTSOptions:
|
|
49
51
|
encoding: TTSEncodings | str
|
50
52
|
sampling_rate: int
|
51
53
|
speed: float
|
52
|
-
voice_id: str
|
54
|
+
voice_id: NotGivenOr[str] = NOT_GIVEN
|
53
55
|
|
54
56
|
@property
|
55
57
|
def model_params(self) -> dict:
|
56
|
-
"""Returns a
|
57
|
-
params =
|
58
|
-
"voice_id",
|
59
|
-
"model",
|
60
|
-
"lang_code",
|
61
|
-
"encoding",
|
62
|
-
"sampling_rate",
|
63
|
-
"speed",
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
for param in params:
|
68
|
-
if hasattr(self, param) and getattr(self, param) is not None:
|
69
|
-
values[param] = getattr(self, param)
|
70
|
-
|
71
|
-
return values
|
58
|
+
"""Returns a dictionary of model parameters for API requests."""
|
59
|
+
params = {
|
60
|
+
"voice_id": self.voice_id,
|
61
|
+
"model": self.model,
|
62
|
+
"lang_code": self.lang_code,
|
63
|
+
"encoding": self.encoding,
|
64
|
+
"sampling_rate": self.sampling_rate,
|
65
|
+
"speed": self.speed,
|
66
|
+
}
|
67
|
+
return {k: v for k, v in params.items() if is_given(v) and v is not None}
|
72
68
|
|
73
69
|
def get_query_param_string(self):
|
74
70
|
"""Forms the query parameter string from all model parameters."""
|
@@ -97,9 +93,7 @@ def _parse_sse_message(message: str) -> dict:
|
|
97
93
|
message = json.loads(value)
|
98
94
|
|
99
95
|
if message.get("errors") is not None:
|
100
|
-
raise Exception(
|
101
|
-
f"Status {message.status_code} error received: {message.errors}."
|
102
|
-
)
|
96
|
+
raise Exception(f"Status {message.status_code} error received: {message.errors}.")
|
103
97
|
|
104
98
|
return message
|
105
99
|
|
@@ -109,12 +103,12 @@ class TTS(tts.TTS):
|
|
109
103
|
self,
|
110
104
|
*,
|
111
105
|
model: TTSModels | str = "neu_hq",
|
112
|
-
voice_id: str
|
106
|
+
voice_id: NotGivenOr[str] = NOT_GIVEN,
|
113
107
|
lang_code: TTSLangCodes | str = "en",
|
114
108
|
encoding: TTSEncodings | str = "pcm_linear",
|
115
109
|
speed: float = 1.0,
|
116
110
|
sample_rate: int = 22050,
|
117
|
-
api_key: str
|
111
|
+
api_key: NotGivenOr[str] = NOT_GIVEN,
|
118
112
|
http_session: aiohttp.ClientSession | None = None,
|
119
113
|
base_url: str = API_BASE_URL,
|
120
114
|
) -> None:
|
@@ -130,22 +124,19 @@ class TTS(tts.TTS):
|
|
130
124
|
encoding (TTSEncodings | str, optional): The audio encoding format. Defaults to "pcm_mulaw".
|
131
125
|
speed (float, optional): The audio playback speed. Defaults to 1.0.
|
132
126
|
sample_rate (int, optional): The audio sample rate in Hz. Defaults to 22050.
|
133
|
-
api_key (str | None, optional): The Neuphonic API key. If not provided, it will be read from the
|
127
|
+
api_key (str | None, optional): The Neuphonic API key. If not provided, it will be read from the NEUPHONIC_API_TOKEN environment variable.
|
134
128
|
http_session (aiohttp.ClientSession | None, optional): An existing aiohttp ClientSession to use. If not provided, a new session will be created.
|
135
129
|
base_url (str, optional): The base URL for the Neuphonic API. Defaults to "api.neuphonic.com".
|
136
|
-
"""
|
130
|
+
""" # noqa: E501
|
137
131
|
super().__init__(
|
138
132
|
capabilities=tts.TTSCapabilities(streaming=True),
|
139
133
|
sample_rate=sample_rate,
|
140
134
|
num_channels=NUM_CHANNELS,
|
141
135
|
)
|
142
136
|
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
raise ValueError(
|
147
|
-
"NEUPHONIC_API_KEY must be set using the argument or by setting the NEUPHONIC_API_KEY environment variable."
|
148
|
-
)
|
137
|
+
neuphonic_api_key = api_key if is_given(api_key) else os.environ.get("NEUPHONIC_API_TOKEN")
|
138
|
+
if not neuphonic_api_key:
|
139
|
+
raise ValueError("API key must be provided or set in NEUPHONIC_API_TOKEN")
|
149
140
|
|
150
141
|
self._opts = _TTSOptions(
|
151
142
|
model=model,
|
@@ -154,7 +145,7 @@ class TTS(tts.TTS):
|
|
154
145
|
encoding=encoding,
|
155
146
|
speed=speed,
|
156
147
|
sampling_rate=sample_rate,
|
157
|
-
api_key=
|
148
|
+
api_key=neuphonic_api_key,
|
158
149
|
base_url=base_url,
|
159
150
|
)
|
160
151
|
|
@@ -191,12 +182,12 @@ class TTS(tts.TTS):
|
|
191
182
|
def update_options(
|
192
183
|
self,
|
193
184
|
*,
|
194
|
-
model: TTSModels
|
195
|
-
voice_id: str
|
196
|
-
lang_code: TTSLangCodes
|
197
|
-
encoding: TTSEncodings
|
198
|
-
speed: float
|
199
|
-
sample_rate: int
|
185
|
+
model: NotGivenOr[TTSModels] = NOT_GIVEN,
|
186
|
+
voice_id: NotGivenOr[str] = NOT_GIVEN,
|
187
|
+
lang_code: NotGivenOr[TTSLangCodes] = NOT_GIVEN,
|
188
|
+
encoding: NotGivenOr[TTSEncodings] = NOT_GIVEN,
|
189
|
+
speed: NotGivenOr[float] = NOT_GIVEN,
|
190
|
+
sample_rate: NotGivenOr[int] = NOT_GIVEN,
|
200
191
|
) -> None:
|
201
192
|
"""
|
202
193
|
Update the Text-to-Speech (TTS) configuration options.
|
@@ -212,20 +203,26 @@ class TTS(tts.TTS):
|
|
212
203
|
encoding (TTSEncodings | str, optional): The audio encoding format.
|
213
204
|
speed (float, optional): The audio playback speed.
|
214
205
|
sample_rate (int, optional): The audio sample rate in Hz.
|
215
|
-
"""
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
206
|
+
""" # noqa: E501
|
207
|
+
if is_given(model):
|
208
|
+
self._opts.model = model
|
209
|
+
if is_given(voice_id):
|
210
|
+
self._opts.voice_id = voice_id
|
211
|
+
if is_given(lang_code):
|
212
|
+
self._opts.lang_code = lang_code
|
213
|
+
if is_given(encoding):
|
214
|
+
self._opts.encoding = encoding
|
215
|
+
if is_given(speed):
|
216
|
+
self._opts.speed = speed
|
217
|
+
if is_given(sample_rate):
|
218
|
+
self._opts.sampling_rate = sample_rate
|
222
219
|
self._pool.invalidate()
|
223
220
|
|
224
221
|
def synthesize(
|
225
222
|
self,
|
226
223
|
text: str,
|
227
224
|
*,
|
228
|
-
conn_options:
|
225
|
+
conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
|
229
226
|
) -> ChunkedStream:
|
230
227
|
return ChunkedStream(
|
231
228
|
tts=self,
|
@@ -236,7 +233,7 @@ class TTS(tts.TTS):
|
|
236
233
|
)
|
237
234
|
|
238
235
|
def stream(
|
239
|
-
self, *, conn_options:
|
236
|
+
self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
|
240
237
|
) -> SynthesizeStream:
|
241
238
|
stream = SynthesizeStream(
|
242
239
|
tts=self,
|
@@ -264,7 +261,7 @@ class ChunkedStream(tts.ChunkedStream):
|
|
264
261
|
input_text: str,
|
265
262
|
opts: _TTSOptions,
|
266
263
|
session: aiohttp.ClientSession,
|
267
|
-
conn_options:
|
264
|
+
conn_options: APIConnectOptions,
|
268
265
|
) -> None:
|
269
266
|
super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
|
270
267
|
self._opts, self._session = opts, session
|
@@ -312,9 +309,7 @@ class ChunkedStream(tts.ChunkedStream):
|
|
312
309
|
parsed_message is not None
|
313
310
|
and parsed_message.get("data", {}).get("audio") is not None
|
314
311
|
):
|
315
|
-
audio_bytes = base64.b64decode(
|
316
|
-
parsed_message["data"]["audio"]
|
317
|
-
)
|
312
|
+
audio_bytes = base64.b64decode(parsed_message["data"]["audio"])
|
318
313
|
|
319
314
|
for frame in bstream.write(audio_bytes):
|
320
315
|
emitter.push(frame)
|
@@ -348,26 +343,6 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
348
343
|
|
349
344
|
async def _run(self) -> None:
|
350
345
|
request_id = utils.shortuuid()
|
351
|
-
request_data = {request_id: {"sent": "", "recv": ""}}
|
352
|
-
|
353
|
-
def _is_all_audio_recv():
|
354
|
-
"""Check whether all audio has been recieved."""
|
355
|
-
recv_text = (
|
356
|
-
request_data[request_id]["recv"]
|
357
|
-
.lower()
|
358
|
-
.replace(" ", "")
|
359
|
-
.replace("\n", "")
|
360
|
-
.replace("<stop>", "")
|
361
|
-
)
|
362
|
-
sent_text = (
|
363
|
-
request_data[request_id]["sent"]
|
364
|
-
.lower()
|
365
|
-
.replace(" ", "")
|
366
|
-
.replace("\n", "")
|
367
|
-
.replace("<stop>", "")
|
368
|
-
)
|
369
|
-
|
370
|
-
return sent_text == recv_text
|
371
346
|
|
372
347
|
async def _send_task(ws: aiohttp.ClientWebSocketResponse):
|
373
348
|
"""Stream text to the websocket."""
|
@@ -378,7 +353,6 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
378
353
|
await ws.send_str(json.dumps({"text": "<STOP>"}))
|
379
354
|
continue
|
380
355
|
|
381
|
-
request_data[request_id]["sent"] += data
|
382
356
|
await ws.send_str(json.dumps({"text": data}))
|
383
357
|
|
384
358
|
async def _recv_task(ws: aiohttp.ClientWebSocketResponse):
|
@@ -411,20 +385,17 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
411
385
|
|
412
386
|
if data.get("data"):
|
413
387
|
b64data = base64.b64decode(data["data"]["audio"])
|
414
|
-
recv_text = data["data"]["text"]
|
415
388
|
for frame in audio_bstream.write(b64data):
|
416
389
|
emitter.push(frame)
|
417
390
|
|
418
|
-
|
391
|
+
if data["data"].get("stop"): # A bool flag, is True when audio reaches "<STOP>"
|
392
|
+
for frame in audio_bstream.flush():
|
393
|
+
emitter.push(frame)
|
394
|
+
emitter.flush()
|
395
|
+
break # we are not going to receive any more audio
|
419
396
|
else:
|
420
397
|
logger.error("Unexpected Neuphonic message %s", data)
|
421
398
|
|
422
|
-
if _is_all_audio_recv():
|
423
|
-
for frame in audio_bstream.flush():
|
424
|
-
emitter.push(frame)
|
425
|
-
emitter.flush()
|
426
|
-
break # we are not going to receive any more audio
|
427
|
-
|
428
399
|
async with self._pool.connection() as ws:
|
429
400
|
tasks = [
|
430
401
|
asyncio.create_task(_send_task(ws)),
|
{livekit_plugins_neuphonic-0.1.1.dist-info → livekit_plugins_neuphonic-1.0.0rc2.dist-info}/METADATA
RENAMED
@@ -1,35 +1,23 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: livekit-plugins-neuphonic
|
3
|
-
Version: 0.
|
4
|
-
Summary:
|
5
|
-
Home-page: https://github.com/livekit/agents
|
6
|
-
License: Apache-2.0
|
3
|
+
Version: 1.0.0rc2
|
4
|
+
Summary: Neuphonic inference plugin for LiveKit Agents
|
7
5
|
Project-URL: Documentation, https://docs.livekit.io
|
8
6
|
Project-URL: Website, https://livekit.io/
|
9
7
|
Project-URL: Source, https://github.com/livekit/agents
|
10
|
-
|
8
|
+
Author-email: LiveKit <support@livekit.io>
|
9
|
+
License-Expression: Apache-2.0
|
10
|
+
Keywords: audio,livekit,neuphonic,realtime,webrtc
|
11
11
|
Classifier: Intended Audience :: Developers
|
12
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
13
|
-
Classifier: Topic :: Multimedia :: Sound/Audio
|
14
|
-
Classifier: Topic :: Multimedia :: Video
|
15
|
-
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
16
12
|
Classifier: Programming Language :: Python :: 3
|
17
|
-
Classifier: Programming Language :: Python :: 3.9
|
18
|
-
Classifier: Programming Language :: Python :: 3.10
|
19
13
|
Classifier: Programming Language :: Python :: 3 :: Only
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
16
|
+
Classifier: Topic :: Multimedia :: Sound/Audio
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
20
18
|
Requires-Python: >=3.9.0
|
19
|
+
Requires-Dist: livekit-agents>=1.0.0.rc2
|
21
20
|
Description-Content-Type: text/markdown
|
22
|
-
Requires-Dist: livekit-agents<1.0.0,>=0.12.16
|
23
|
-
Dynamic: classifier
|
24
|
-
Dynamic: description
|
25
|
-
Dynamic: description-content-type
|
26
|
-
Dynamic: home-page
|
27
|
-
Dynamic: keywords
|
28
|
-
Dynamic: license
|
29
|
-
Dynamic: project-url
|
30
|
-
Dynamic: requires-dist
|
31
|
-
Dynamic: requires-python
|
32
|
-
Dynamic: summary
|
33
21
|
|
34
22
|
# LiveKit Plugins Neuphonic
|
35
23
|
|
@@ -43,4 +31,4 @@ pip install livekit-plugins-neuphonic
|
|
43
31
|
|
44
32
|
## Pre-requisites
|
45
33
|
|
46
|
-
You'll need an API key from Neuphonic. It can be set as an environment variable: `
|
34
|
+
You'll need an API key from Neuphonic. It can be set as an environment variable: `NEUPHONIC_API_TOKEN`
|
@@ -0,0 +1,9 @@
|
|
1
|
+
livekit/plugins/neuphonic/__init__.py,sha256=mJnPVLsKAdUkdWuHWd16A0n2vsVBi3GjgNmB8gv9jjI,1097
|
2
|
+
livekit/plugins/neuphonic/log.py,sha256=rAHz71IcbvPkixndXBVffPQsmWUKTLqRaYRuPIxO29w,72
|
3
|
+
livekit/plugins/neuphonic/models.py,sha256=Svfn_sWA3Q2ZXsPBXY-K5hslq5FE62hvyXBES2C3aSc,201
|
4
|
+
livekit/plugins/neuphonic/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
livekit/plugins/neuphonic/tts.py,sha256=vRFkQl9sxI4lMeEWSvyQ8YWJcxX2N7eCG7e5fzTTgbM,14240
|
6
|
+
livekit/plugins/neuphonic/version.py,sha256=AHsNOknrNG9prN-fv_7X-KI5-O8ZMrUnTDyl9ObQIzY,604
|
7
|
+
livekit_plugins_neuphonic-1.0.0rc2.dist-info/METADATA,sha256=ue4oeSg-B_NjwiCq8IvhLpdki_0sgK1VY1LnOqc8tTY,1183
|
8
|
+
livekit_plugins_neuphonic-1.0.0rc2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
9
|
+
livekit_plugins_neuphonic-1.0.0rc2.dist-info/RECORD,,
|
@@ -1,10 +0,0 @@
|
|
1
|
-
livekit/plugins/neuphonic/__init__.py,sha256=mJnPVLsKAdUkdWuHWd16A0n2vsVBi3GjgNmB8gv9jjI,1097
|
2
|
-
livekit/plugins/neuphonic/log.py,sha256=rAHz71IcbvPkixndXBVffPQsmWUKTLqRaYRuPIxO29w,72
|
3
|
-
livekit/plugins/neuphonic/models.py,sha256=Svfn_sWA3Q2ZXsPBXY-K5hslq5FE62hvyXBES2C3aSc,201
|
4
|
-
livekit/plugins/neuphonic/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
-
livekit/plugins/neuphonic/tts.py,sha256=VvAQ7-fja3TzfM0POyk1LyPXSZbXnyiE7_-Bsd98qEk,14837
|
6
|
-
livekit/plugins/neuphonic/version.py,sha256=3-nEcobvIJfZdV4yNIRuYpAGQ3svREnYIv2ivxoIZcQ,600
|
7
|
-
livekit_plugins_neuphonic-0.1.1.dist-info/METADATA,sha256=0_fdr8DXqhh5HJ47byiBfuB3Qrjz3jJo-i8URkpeTtY,1478
|
8
|
-
livekit_plugins_neuphonic-0.1.1.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
|
9
|
-
livekit_plugins_neuphonic-0.1.1.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
|
10
|
-
livekit_plugins_neuphonic-0.1.1.dist-info/RECORD,,
|
@@ -1 +0,0 @@
|
|
1
|
-
livekit
|