livekit-plugins-neuphonic 0.1.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,9 +20,9 @@ import json
20
20
  import os
21
21
  import weakref
22
22
  from dataclasses import dataclass
23
- from typing import Optional
24
23
 
25
24
  import aiohttp
25
+
26
26
  from livekit.agents import (
27
27
  APIConnectionError,
28
28
  APIConnectOptions,
@@ -31,6 +31,8 @@ from livekit.agents import (
31
31
  tts,
32
32
  utils,
33
33
  )
34
+ from livekit.agents.types import DEFAULT_API_CONNECT_OPTIONS, NOT_GIVEN, NotGivenOr
35
+ from livekit.agents.utils import is_given
34
36
 
35
37
  from .log import logger
36
38
  from .models import TTSEncodings, TTSLangCodes, TTSModels
@@ -49,26 +51,20 @@ class _TTSOptions:
49
51
  encoding: TTSEncodings | str
50
52
  sampling_rate: int
51
53
  speed: float
52
- voice_id: str | None = None
54
+ voice_id: NotGivenOr[str] = NOT_GIVEN
53
55
 
54
56
  @property
55
57
  def model_params(self) -> dict:
56
- """Returns a dict of all model parameters and their values."""
57
- params = [
58
- "voice_id",
59
- "model",
60
- "lang_code",
61
- "encoding",
62
- "sampling_rate",
63
- "speed",
64
- ]
65
- values = {}
66
-
67
- for param in params:
68
- if hasattr(self, param) and getattr(self, param) is not None:
69
- values[param] = getattr(self, param)
70
-
71
- return values
58
+ """Returns a dictionary of model parameters for API requests."""
59
+ params = {
60
+ "voice_id": self.voice_id,
61
+ "model": self.model,
62
+ "lang_code": self.lang_code,
63
+ "encoding": self.encoding,
64
+ "sampling_rate": self.sampling_rate,
65
+ "speed": self.speed,
66
+ }
67
+ return {k: v for k, v in params.items() if is_given(v) and v is not None}
72
68
 
73
69
  def get_query_param_string(self):
74
70
  """Forms the query parameter string from all model parameters."""
@@ -97,9 +93,7 @@ def _parse_sse_message(message: str) -> dict:
97
93
  message = json.loads(value)
98
94
 
99
95
  if message.get("errors") is not None:
100
- raise Exception(
101
- f"Status {message.status_code} error received: {message.errors}."
102
- )
96
+ raise Exception(f"Status {message.status_code} error received: {message.errors}.")
103
97
 
104
98
  return message
105
99
 
@@ -109,12 +103,12 @@ class TTS(tts.TTS):
109
103
  self,
110
104
  *,
111
105
  model: TTSModels | str = "neu_hq",
112
- voice_id: str | None = None,
106
+ voice_id: NotGivenOr[str] = NOT_GIVEN,
113
107
  lang_code: TTSLangCodes | str = "en",
114
108
  encoding: TTSEncodings | str = "pcm_linear",
115
109
  speed: float = 1.0,
116
110
  sample_rate: int = 22050,
117
- api_key: str | None = None,
111
+ api_key: NotGivenOr[str] = NOT_GIVEN,
118
112
  http_session: aiohttp.ClientSession | None = None,
119
113
  base_url: str = API_BASE_URL,
120
114
  ) -> None:
@@ -133,17 +127,16 @@ class TTS(tts.TTS):
133
127
  api_key (str | None, optional): The Neuphonic API key. If not provided, it will be read from the NEUPHONIC_API_TOKEN environment variable.
134
128
  http_session (aiohttp.ClientSession | None, optional): An existing aiohttp ClientSession to use. If not provided, a new session will be created.
135
129
  base_url (str, optional): The base URL for the Neuphonic API. Defaults to "api.neuphonic.com".
136
- """
130
+ """ # noqa: E501
137
131
  super().__init__(
138
132
  capabilities=tts.TTSCapabilities(streaming=True),
139
133
  sample_rate=sample_rate,
140
134
  num_channels=NUM_CHANNELS,
141
135
  )
142
136
 
143
- api_key = api_key or os.environ.get("NEUPHONIC_API_TOKEN")
144
-
145
- if not api_key:
146
- raise ValueError("NEUPHONIC_API_TOKEN must be set")
137
+ neuphonic_api_key = api_key if is_given(api_key) else os.environ.get("NEUPHONIC_API_TOKEN")
138
+ if not neuphonic_api_key:
139
+ raise ValueError("API key must be provided or set in NEUPHONIC_API_TOKEN")
147
140
 
148
141
  self._opts = _TTSOptions(
149
142
  model=model,
@@ -152,7 +145,7 @@ class TTS(tts.TTS):
152
145
  encoding=encoding,
153
146
  speed=speed,
154
147
  sampling_rate=sample_rate,
155
- api_key=api_key,
148
+ api_key=neuphonic_api_key,
156
149
  base_url=base_url,
157
150
  )
158
151
 
@@ -189,12 +182,12 @@ class TTS(tts.TTS):
189
182
  def update_options(
190
183
  self,
191
184
  *,
192
- model: TTSModels | str = None,
193
- voice_id: str | None = None,
194
- lang_code: TTSLangCodes | str | None = None,
195
- encoding: TTSEncodings | str | None = None,
196
- speed: float | None = None,
197
- sample_rate: int | None = None,
185
+ model: NotGivenOr[TTSModels] = NOT_GIVEN,
186
+ voice_id: NotGivenOr[str] = NOT_GIVEN,
187
+ lang_code: NotGivenOr[TTSLangCodes] = NOT_GIVEN,
188
+ encoding: NotGivenOr[TTSEncodings] = NOT_GIVEN,
189
+ speed: NotGivenOr[float] = NOT_GIVEN,
190
+ sample_rate: NotGivenOr[int] = NOT_GIVEN,
198
191
  ) -> None:
199
192
  """
200
193
  Update the Text-to-Speech (TTS) configuration options.
@@ -210,20 +203,26 @@ class TTS(tts.TTS):
210
203
  encoding (TTSEncodings | str, optional): The audio encoding format.
211
204
  speed (float, optional): The audio playback speed.
212
205
  sample_rate (int, optional): The audio sample rate in Hz.
213
- """
214
- self._opts.model = model or self._opts.model
215
- self._opts.voice_id = voice_id or self._opts.voice_id
216
- self._opts.lang_code = lang_code or self._opts.lang_code
217
- self._opts.encoding = encoding or self._opts.encoding
218
- self._opts.speed = speed or self._opts.speed
219
- self._opts.sampling_rate = sample_rate or self._opts.sampling_rate
206
+ """ # noqa: E501
207
+ if is_given(model):
208
+ self._opts.model = model
209
+ if is_given(voice_id):
210
+ self._opts.voice_id = voice_id
211
+ if is_given(lang_code):
212
+ self._opts.lang_code = lang_code
213
+ if is_given(encoding):
214
+ self._opts.encoding = encoding
215
+ if is_given(speed):
216
+ self._opts.speed = speed
217
+ if is_given(sample_rate):
218
+ self._opts.sampling_rate = sample_rate
220
219
  self._pool.invalidate()
221
220
 
222
221
  def synthesize(
223
222
  self,
224
223
  text: str,
225
224
  *,
226
- conn_options: Optional[APIConnectOptions] = None,
225
+ conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
227
226
  ) -> ChunkedStream:
228
227
  return ChunkedStream(
229
228
  tts=self,
@@ -234,7 +233,7 @@ class TTS(tts.TTS):
234
233
  )
235
234
 
236
235
  def stream(
237
- self, *, conn_options: Optional[APIConnectOptions] = None
236
+ self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
238
237
  ) -> SynthesizeStream:
239
238
  stream = SynthesizeStream(
240
239
  tts=self,
@@ -262,7 +261,7 @@ class ChunkedStream(tts.ChunkedStream):
262
261
  input_text: str,
263
262
  opts: _TTSOptions,
264
263
  session: aiohttp.ClientSession,
265
- conn_options: Optional[APIConnectOptions] = None,
264
+ conn_options: APIConnectOptions,
266
265
  ) -> None:
267
266
  super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
268
267
  self._opts, self._session = opts, session
@@ -310,9 +309,7 @@ class ChunkedStream(tts.ChunkedStream):
310
309
  parsed_message is not None
311
310
  and parsed_message.get("data", {}).get("audio") is not None
312
311
  ):
313
- audio_bytes = base64.b64decode(
314
- parsed_message["data"]["audio"]
315
- )
312
+ audio_bytes = base64.b64decode(parsed_message["data"]["audio"])
316
313
 
317
314
  for frame in bstream.write(audio_bytes):
318
315
  emitter.push(frame)
@@ -346,26 +343,6 @@ class SynthesizeStream(tts.SynthesizeStream):
346
343
 
347
344
  async def _run(self) -> None:
348
345
  request_id = utils.shortuuid()
349
- request_data = {request_id: {"sent": "", "recv": ""}}
350
-
351
- def _is_all_audio_recv():
352
- """Check whether all audio has been recieved."""
353
- recv_text = (
354
- request_data[request_id]["recv"]
355
- .lower()
356
- .replace(" ", "")
357
- .replace("\n", "")
358
- .replace("<stop>", "")
359
- )
360
- sent_text = (
361
- request_data[request_id]["sent"]
362
- .lower()
363
- .replace(" ", "")
364
- .replace("\n", "")
365
- .replace("<stop>", "")
366
- )
367
-
368
- return sent_text == recv_text
369
346
 
370
347
  async def _send_task(ws: aiohttp.ClientWebSocketResponse):
371
348
  """Stream text to the websocket."""
@@ -376,7 +353,6 @@ class SynthesizeStream(tts.SynthesizeStream):
376
353
  await ws.send_str(json.dumps({"text": "<STOP>"}))
377
354
  continue
378
355
 
379
- request_data[request_id]["sent"] += data
380
356
  await ws.send_str(json.dumps({"text": data}))
381
357
 
382
358
  async def _recv_task(ws: aiohttp.ClientWebSocketResponse):
@@ -409,20 +385,17 @@ class SynthesizeStream(tts.SynthesizeStream):
409
385
 
410
386
  if data.get("data"):
411
387
  b64data = base64.b64decode(data["data"]["audio"])
412
- recv_text = data["data"]["text"]
413
388
  for frame in audio_bstream.write(b64data):
414
389
  emitter.push(frame)
415
390
 
416
- request_data[request_id]["recv"] += recv_text
391
+ if data["data"].get("stop"): # A bool flag, is True when audio reaches "<STOP>"
392
+ for frame in audio_bstream.flush():
393
+ emitter.push(frame)
394
+ emitter.flush()
395
+ break # we are not going to receive any more audio
417
396
  else:
418
397
  logger.error("Unexpected Neuphonic message %s", data)
419
398
 
420
- if _is_all_audio_recv():
421
- for frame in audio_bstream.flush():
422
- emitter.push(frame)
423
- emitter.flush()
424
- break # we are not going to receive any more audio
425
-
426
399
  async with self._pool.connection() as ws:
427
400
  tasks = [
428
401
  asyncio.create_task(_send_task(ws)),
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "0.1.0"
15
+ __version__ = "1.0.0"
@@ -1,35 +1,23 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: livekit-plugins-neuphonic
3
- Version: 0.1.0
4
- Summary: LiveKit Agents Plugin for Neuphonic
5
- Home-page: https://github.com/livekit/agents
6
- License: Apache-2.0
3
+ Version: 1.0.0
4
+ Summary: Neuphonic inference plugin for LiveKit Agents
7
5
  Project-URL: Documentation, https://docs.livekit.io
8
6
  Project-URL: Website, https://livekit.io/
9
7
  Project-URL: Source, https://github.com/livekit/agents
10
- Keywords: webrtc,realtime,audio,video,livekit
8
+ Author-email: LiveKit <hello@livekit.io>
9
+ License-Expression: Apache-2.0
10
+ Keywords: audio,livekit,neuphonic,realtime,webrtc
11
11
  Classifier: Intended Audience :: Developers
12
- Classifier: License :: OSI Approved :: Apache Software License
13
- Classifier: Topic :: Multimedia :: Sound/Audio
14
- Classifier: Topic :: Multimedia :: Video
15
- Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
16
12
  Classifier: Programming Language :: Python :: 3
17
- Classifier: Programming Language :: Python :: 3.9
18
- Classifier: Programming Language :: Python :: 3.10
19
13
  Classifier: Programming Language :: Python :: 3 :: Only
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Topic :: Multimedia :: Sound/Audio
17
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
18
  Requires-Python: >=3.9.0
19
+ Requires-Dist: livekit-agents>=1.0.0
21
20
  Description-Content-Type: text/markdown
22
- Requires-Dist: livekit-agents<1.0.0,>=0.12.16
23
- Dynamic: classifier
24
- Dynamic: description
25
- Dynamic: description-content-type
26
- Dynamic: home-page
27
- Dynamic: keywords
28
- Dynamic: license
29
- Dynamic: project-url
30
- Dynamic: requires-dist
31
- Dynamic: requires-python
32
- Dynamic: summary
33
21
 
34
22
  # LiveKit Plugins Neuphonic
35
23
 
@@ -0,0 +1,9 @@
1
+ livekit/plugins/neuphonic/__init__.py,sha256=mJnPVLsKAdUkdWuHWd16A0n2vsVBi3GjgNmB8gv9jjI,1097
2
+ livekit/plugins/neuphonic/log.py,sha256=rAHz71IcbvPkixndXBVffPQsmWUKTLqRaYRuPIxO29w,72
3
+ livekit/plugins/neuphonic/models.py,sha256=Svfn_sWA3Q2ZXsPBXY-K5hslq5FE62hvyXBES2C3aSc,201
4
+ livekit/plugins/neuphonic/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ livekit/plugins/neuphonic/tts.py,sha256=vRFkQl9sxI4lMeEWSvyQ8YWJcxX2N7eCG7e5fzTTgbM,14240
6
+ livekit/plugins/neuphonic/version.py,sha256=nW89L_U9N4ukT3wAO3BeTqOaa87zLUOsEFz8TkiKIP8,600
7
+ livekit_plugins_neuphonic-1.0.0.dist-info/METADATA,sha256=dGjs8X3smzMOK8XCp-lxBLg2aHQ5jIYg3-3I3F_9880,1174
8
+ livekit_plugins_neuphonic-1.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
9
+ livekit_plugins_neuphonic-1.0.0.dist-info/RECORD,,
@@ -1,5 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (76.0.0)
2
+ Generator: hatchling 1.27.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
-
@@ -1,10 +0,0 @@
1
- livekit/plugins/neuphonic/__init__.py,sha256=mJnPVLsKAdUkdWuHWd16A0n2vsVBi3GjgNmB8gv9jjI,1097
2
- livekit/plugins/neuphonic/log.py,sha256=rAHz71IcbvPkixndXBVffPQsmWUKTLqRaYRuPIxO29w,72
3
- livekit/plugins/neuphonic/models.py,sha256=Svfn_sWA3Q2ZXsPBXY-K5hslq5FE62hvyXBES2C3aSc,201
4
- livekit/plugins/neuphonic/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- livekit/plugins/neuphonic/tts.py,sha256=FFLDghnve_Mrx-0qfEYb4o_Bs8VNnZqAsQNI-M6Zxkw,14736
6
- livekit/plugins/neuphonic/version.py,sha256=vQH9cItKAVYAmrLbOntkbLqmxrUZrPiKb1TjkZ8jRKQ,600
7
- livekit_plugins_neuphonic-0.1.0.dist-info/METADATA,sha256=Q7Skn-28cnC318qr28oepZyaWnQ9etIO-H2c7D-M9jo,1480
8
- livekit_plugins_neuphonic-0.1.0.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
9
- livekit_plugins_neuphonic-0.1.0.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
10
- livekit_plugins_neuphonic-0.1.0.dist-info/RECORD,,
@@ -1 +0,0 @@
1
- livekit