livekit-plugins-neuphonic 0.1.1__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,9 +20,9 @@ import json
20
20
  import os
21
21
  import weakref
22
22
  from dataclasses import dataclass
23
- from typing import Optional
24
23
 
25
24
  import aiohttp
25
+
26
26
  from livekit.agents import (
27
27
  APIConnectionError,
28
28
  APIConnectOptions,
@@ -31,6 +31,8 @@ from livekit.agents import (
31
31
  tts,
32
32
  utils,
33
33
  )
34
+ from livekit.agents.types import DEFAULT_API_CONNECT_OPTIONS, NOT_GIVEN, NotGivenOr
35
+ from livekit.agents.utils import is_given
34
36
 
35
37
  from .log import logger
36
38
  from .models import TTSEncodings, TTSLangCodes, TTSModels
@@ -49,26 +51,20 @@ class _TTSOptions:
49
51
  encoding: TTSEncodings | str
50
52
  sampling_rate: int
51
53
  speed: float
52
- voice_id: str | None = None
54
+ voice_id: NotGivenOr[str] = NOT_GIVEN
53
55
 
54
56
  @property
55
57
  def model_params(self) -> dict:
56
- """Returns a dict of all model parameters and their values."""
57
- params = [
58
- "voice_id",
59
- "model",
60
- "lang_code",
61
- "encoding",
62
- "sampling_rate",
63
- "speed",
64
- ]
65
- values = {}
66
-
67
- for param in params:
68
- if hasattr(self, param) and getattr(self, param) is not None:
69
- values[param] = getattr(self, param)
70
-
71
- return values
58
+ """Returns a dictionary of model parameters for API requests."""
59
+ params = {
60
+ "voice_id": self.voice_id,
61
+ "model": self.model,
62
+ "lang_code": self.lang_code,
63
+ "encoding": self.encoding,
64
+ "sampling_rate": self.sampling_rate,
65
+ "speed": self.speed,
66
+ }
67
+ return {k: v for k, v in params.items() if is_given(v) and v is not None}
72
68
 
73
69
  def get_query_param_string(self):
74
70
  """Forms the query parameter string from all model parameters."""
@@ -97,9 +93,7 @@ def _parse_sse_message(message: str) -> dict:
97
93
  message = json.loads(value)
98
94
 
99
95
  if message.get("errors") is not None:
100
- raise Exception(
101
- f"Status {message.status_code} error received: {message.errors}."
102
- )
96
+ raise Exception(f"Status {message.status_code} error received: {message.errors}.")
103
97
 
104
98
  return message
105
99
 
@@ -109,12 +103,12 @@ class TTS(tts.TTS):
109
103
  self,
110
104
  *,
111
105
  model: TTSModels | str = "neu_hq",
112
- voice_id: str | None = None,
106
+ voice_id: NotGivenOr[str] = NOT_GIVEN,
113
107
  lang_code: TTSLangCodes | str = "en",
114
108
  encoding: TTSEncodings | str = "pcm_linear",
115
109
  speed: float = 1.0,
116
110
  sample_rate: int = 22050,
117
- api_key: str | None = None,
111
+ api_key: NotGivenOr[str] = NOT_GIVEN,
118
112
  http_session: aiohttp.ClientSession | None = None,
119
113
  base_url: str = API_BASE_URL,
120
114
  ) -> None:
@@ -130,22 +124,19 @@ class TTS(tts.TTS):
130
124
  encoding (TTSEncodings | str, optional): The audio encoding format. Defaults to "pcm_mulaw".
131
125
  speed (float, optional): The audio playback speed. Defaults to 1.0.
132
126
  sample_rate (int, optional): The audio sample rate in Hz. Defaults to 22050.
133
- api_key (str | None, optional): The Neuphonic API key. If not provided, it will be read from the NEUPHONIC_API_KEY environment variable.
127
+ api_key (str | None, optional): The Neuphonic API key. If not provided, it will be read from the NEUPHONIC_API_TOKEN environment variable.
134
128
  http_session (aiohttp.ClientSession | None, optional): An existing aiohttp ClientSession to use. If not provided, a new session will be created.
135
129
  base_url (str, optional): The base URL for the Neuphonic API. Defaults to "api.neuphonic.com".
136
- """
130
+ """ # noqa: E501
137
131
  super().__init__(
138
132
  capabilities=tts.TTSCapabilities(streaming=True),
139
133
  sample_rate=sample_rate,
140
134
  num_channels=NUM_CHANNELS,
141
135
  )
142
136
 
143
- api_key = api_key or os.environ.get("NEUPHONIC_API_KEY")
144
-
145
- if not api_key:
146
- raise ValueError(
147
- "NEUPHONIC_API_KEY must be set using the argument or by setting the NEUPHONIC_API_KEY environment variable."
148
- )
137
+ neuphonic_api_key = api_key if is_given(api_key) else os.environ.get("NEUPHONIC_API_TOKEN")
138
+ if not neuphonic_api_key:
139
+ raise ValueError("API key must be provided or set in NEUPHONIC_API_TOKEN")
149
140
 
150
141
  self._opts = _TTSOptions(
151
142
  model=model,
@@ -154,7 +145,7 @@ class TTS(tts.TTS):
154
145
  encoding=encoding,
155
146
  speed=speed,
156
147
  sampling_rate=sample_rate,
157
- api_key=api_key,
148
+ api_key=neuphonic_api_key,
158
149
  base_url=base_url,
159
150
  )
160
151
 
@@ -191,12 +182,12 @@ class TTS(tts.TTS):
191
182
  def update_options(
192
183
  self,
193
184
  *,
194
- model: TTSModels | str = None,
195
- voice_id: str | None = None,
196
- lang_code: TTSLangCodes | str | None = None,
197
- encoding: TTSEncodings | str | None = None,
198
- speed: float | None = None,
199
- sample_rate: int | None = None,
185
+ model: NotGivenOr[TTSModels] = NOT_GIVEN,
186
+ voice_id: NotGivenOr[str] = NOT_GIVEN,
187
+ lang_code: NotGivenOr[TTSLangCodes] = NOT_GIVEN,
188
+ encoding: NotGivenOr[TTSEncodings] = NOT_GIVEN,
189
+ speed: NotGivenOr[float] = NOT_GIVEN,
190
+ sample_rate: NotGivenOr[int] = NOT_GIVEN,
200
191
  ) -> None:
201
192
  """
202
193
  Update the Text-to-Speech (TTS) configuration options.
@@ -212,20 +203,26 @@ class TTS(tts.TTS):
212
203
  encoding (TTSEncodings | str, optional): The audio encoding format.
213
204
  speed (float, optional): The audio playback speed.
214
205
  sample_rate (int, optional): The audio sample rate in Hz.
215
- """
216
- self._opts.model = model or self._opts.model
217
- self._opts.voice_id = voice_id or self._opts.voice_id
218
- self._opts.lang_code = lang_code or self._opts.lang_code
219
- self._opts.encoding = encoding or self._opts.encoding
220
- self._opts.speed = speed or self._opts.speed
221
- self._opts.sampling_rate = sample_rate or self._opts.sampling_rate
206
+ """ # noqa: E501
207
+ if is_given(model):
208
+ self._opts.model = model
209
+ if is_given(voice_id):
210
+ self._opts.voice_id = voice_id
211
+ if is_given(lang_code):
212
+ self._opts.lang_code = lang_code
213
+ if is_given(encoding):
214
+ self._opts.encoding = encoding
215
+ if is_given(speed):
216
+ self._opts.speed = speed
217
+ if is_given(sample_rate):
218
+ self._opts.sampling_rate = sample_rate
222
219
  self._pool.invalidate()
223
220
 
224
221
  def synthesize(
225
222
  self,
226
223
  text: str,
227
224
  *,
228
- conn_options: Optional[APIConnectOptions] = None,
225
+ conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
229
226
  ) -> ChunkedStream:
230
227
  return ChunkedStream(
231
228
  tts=self,
@@ -236,7 +233,7 @@ class TTS(tts.TTS):
236
233
  )
237
234
 
238
235
  def stream(
239
- self, *, conn_options: Optional[APIConnectOptions] = None
236
+ self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
240
237
  ) -> SynthesizeStream:
241
238
  stream = SynthesizeStream(
242
239
  tts=self,
@@ -264,7 +261,7 @@ class ChunkedStream(tts.ChunkedStream):
264
261
  input_text: str,
265
262
  opts: _TTSOptions,
266
263
  session: aiohttp.ClientSession,
267
- conn_options: Optional[APIConnectOptions] = None,
264
+ conn_options: APIConnectOptions,
268
265
  ) -> None:
269
266
  super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
270
267
  self._opts, self._session = opts, session
@@ -312,9 +309,7 @@ class ChunkedStream(tts.ChunkedStream):
312
309
  parsed_message is not None
313
310
  and parsed_message.get("data", {}).get("audio") is not None
314
311
  ):
315
- audio_bytes = base64.b64decode(
316
- parsed_message["data"]["audio"]
317
- )
312
+ audio_bytes = base64.b64decode(parsed_message["data"]["audio"])
318
313
 
319
314
  for frame in bstream.write(audio_bytes):
320
315
  emitter.push(frame)
@@ -348,26 +343,6 @@ class SynthesizeStream(tts.SynthesizeStream):
348
343
 
349
344
  async def _run(self) -> None:
350
345
  request_id = utils.shortuuid()
351
- request_data = {request_id: {"sent": "", "recv": ""}}
352
-
353
- def _is_all_audio_recv():
354
- """Check whether all audio has been recieved."""
355
- recv_text = (
356
- request_data[request_id]["recv"]
357
- .lower()
358
- .replace(" ", "")
359
- .replace("\n", "")
360
- .replace("<stop>", "")
361
- )
362
- sent_text = (
363
- request_data[request_id]["sent"]
364
- .lower()
365
- .replace(" ", "")
366
- .replace("\n", "")
367
- .replace("<stop>", "")
368
- )
369
-
370
- return sent_text == recv_text
371
346
 
372
347
  async def _send_task(ws: aiohttp.ClientWebSocketResponse):
373
348
  """Stream text to the websocket."""
@@ -378,7 +353,6 @@ class SynthesizeStream(tts.SynthesizeStream):
378
353
  await ws.send_str(json.dumps({"text": "<STOP>"}))
379
354
  continue
380
355
 
381
- request_data[request_id]["sent"] += data
382
356
  await ws.send_str(json.dumps({"text": data}))
383
357
 
384
358
  async def _recv_task(ws: aiohttp.ClientWebSocketResponse):
@@ -411,20 +385,17 @@ class SynthesizeStream(tts.SynthesizeStream):
411
385
 
412
386
  if data.get("data"):
413
387
  b64data = base64.b64decode(data["data"]["audio"])
414
- recv_text = data["data"]["text"]
415
388
  for frame in audio_bstream.write(b64data):
416
389
  emitter.push(frame)
417
390
 
418
- request_data[request_id]["recv"] += recv_text
391
+ if data["data"].get("stop"): # A bool flag, is True when audio reaches "<STOP>"
392
+ for frame in audio_bstream.flush():
393
+ emitter.push(frame)
394
+ emitter.flush()
395
+ break # we are not going to receive any more audio
419
396
  else:
420
397
  logger.error("Unexpected Neuphonic message %s", data)
421
398
 
422
- if _is_all_audio_recv():
423
- for frame in audio_bstream.flush():
424
- emitter.push(frame)
425
- emitter.flush()
426
- break # we are not going to receive any more audio
427
-
428
399
  async with self._pool.connection() as ws:
429
400
  tasks = [
430
401
  asyncio.create_task(_send_task(ws)),
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "0.1.1"
15
+ __version__ = "1.0.0"
@@ -1,35 +1,23 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: livekit-plugins-neuphonic
3
- Version: 0.1.1
4
- Summary: LiveKit Agents Plugin for Neuphonic
5
- Home-page: https://github.com/livekit/agents
6
- License: Apache-2.0
3
+ Version: 1.0.0
4
+ Summary: Neuphonic inference plugin for LiveKit Agents
7
5
  Project-URL: Documentation, https://docs.livekit.io
8
6
  Project-URL: Website, https://livekit.io/
9
7
  Project-URL: Source, https://github.com/livekit/agents
10
- Keywords: webrtc,realtime,audio,video,livekit
8
+ Author-email: LiveKit <hello@livekit.io>
9
+ License-Expression: Apache-2.0
10
+ Keywords: audio,livekit,neuphonic,realtime,webrtc
11
11
  Classifier: Intended Audience :: Developers
12
- Classifier: License :: OSI Approved :: Apache Software License
13
- Classifier: Topic :: Multimedia :: Sound/Audio
14
- Classifier: Topic :: Multimedia :: Video
15
- Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
16
12
  Classifier: Programming Language :: Python :: 3
17
- Classifier: Programming Language :: Python :: 3.9
18
- Classifier: Programming Language :: Python :: 3.10
19
13
  Classifier: Programming Language :: Python :: 3 :: Only
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Topic :: Multimedia :: Sound/Audio
17
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
18
  Requires-Python: >=3.9.0
19
+ Requires-Dist: livekit-agents>=1.0.0
21
20
  Description-Content-Type: text/markdown
22
- Requires-Dist: livekit-agents<1.0.0,>=0.12.16
23
- Dynamic: classifier
24
- Dynamic: description
25
- Dynamic: description-content-type
26
- Dynamic: home-page
27
- Dynamic: keywords
28
- Dynamic: license
29
- Dynamic: project-url
30
- Dynamic: requires-dist
31
- Dynamic: requires-python
32
- Dynamic: summary
33
21
 
34
22
  # LiveKit Plugins Neuphonic
35
23
 
@@ -43,4 +31,4 @@ pip install livekit-plugins-neuphonic
43
31
 
44
32
  ## Pre-requisites
45
33
 
46
- You'll need an API key from Neuphonic. It can be set as an environment variable: `NEUPHONIC_API_KEY`
34
+ You'll need an API key from Neuphonic. It can be set as an environment variable: `NEUPHONIC_API_TOKEN`
@@ -0,0 +1,9 @@
1
+ livekit/plugins/neuphonic/__init__.py,sha256=mJnPVLsKAdUkdWuHWd16A0n2vsVBi3GjgNmB8gv9jjI,1097
2
+ livekit/plugins/neuphonic/log.py,sha256=rAHz71IcbvPkixndXBVffPQsmWUKTLqRaYRuPIxO29w,72
3
+ livekit/plugins/neuphonic/models.py,sha256=Svfn_sWA3Q2ZXsPBXY-K5hslq5FE62hvyXBES2C3aSc,201
4
+ livekit/plugins/neuphonic/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ livekit/plugins/neuphonic/tts.py,sha256=vRFkQl9sxI4lMeEWSvyQ8YWJcxX2N7eCG7e5fzTTgbM,14240
6
+ livekit/plugins/neuphonic/version.py,sha256=nW89L_U9N4ukT3wAO3BeTqOaa87zLUOsEFz8TkiKIP8,600
7
+ livekit_plugins_neuphonic-1.0.0.dist-info/METADATA,sha256=dGjs8X3smzMOK8XCp-lxBLg2aHQ5jIYg3-3I3F_9880,1174
8
+ livekit_plugins_neuphonic-1.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
9
+ livekit_plugins_neuphonic-1.0.0.dist-info/RECORD,,
@@ -1,5 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (76.1.0)
2
+ Generator: hatchling 1.27.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
-
@@ -1,10 +0,0 @@
1
- livekit/plugins/neuphonic/__init__.py,sha256=mJnPVLsKAdUkdWuHWd16A0n2vsVBi3GjgNmB8gv9jjI,1097
2
- livekit/plugins/neuphonic/log.py,sha256=rAHz71IcbvPkixndXBVffPQsmWUKTLqRaYRuPIxO29w,72
3
- livekit/plugins/neuphonic/models.py,sha256=Svfn_sWA3Q2ZXsPBXY-K5hslq5FE62hvyXBES2C3aSc,201
4
- livekit/plugins/neuphonic/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- livekit/plugins/neuphonic/tts.py,sha256=VvAQ7-fja3TzfM0POyk1LyPXSZbXnyiE7_-Bsd98qEk,14837
6
- livekit/plugins/neuphonic/version.py,sha256=3-nEcobvIJfZdV4yNIRuYpAGQ3svREnYIv2ivxoIZcQ,600
7
- livekit_plugins_neuphonic-0.1.1.dist-info/METADATA,sha256=0_fdr8DXqhh5HJ47byiBfuB3Qrjz3jJo-i8URkpeTtY,1478
8
- livekit_plugins_neuphonic-0.1.1.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
9
- livekit_plugins_neuphonic-0.1.1.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
10
- livekit_plugins_neuphonic-0.1.1.dist-info/RECORD,,
@@ -1 +0,0 @@
1
- livekit