livekit-plugins-neuphonic 1.2.15__py3-none-any.whl → 1.2.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of livekit-plugins-neuphonic might be problematic. Click here for more details.

@@ -12,89 +12,131 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from __future__ import annotations
15
+ from __future__ import annotations # noqa: I001
16
16
 
17
17
  import asyncio
18
18
  import base64
19
19
  import json
20
20
  import os
21
+ import weakref
21
22
  from dataclasses import dataclass, replace
22
23
 
23
24
  import aiohttp
24
-
25
25
  from livekit.agents import (
26
26
  APIConnectionError,
27
27
  APIConnectOptions,
28
+ APIError,
28
29
  APIStatusError,
29
30
  APITimeoutError,
31
+ tokenize,
30
32
  tts,
31
33
  utils,
32
34
  )
33
35
  from livekit.agents.types import DEFAULT_API_CONNECT_OPTIONS, NOT_GIVEN, NotGivenOr
34
36
  from livekit.agents.utils import is_given
35
37
 
36
- from .models import TTSLangCodes
38
+ from .log import logger # noqa: I001
39
+ from .models import TTSLangCodes # noqa: I001
37
40
 
38
- API_BASE_URL = "api.neuphonic.com"
39
- AUTHORIZATION_HEADER = "X-API-KEY"
41
+ API_AUTH_HEADER = "x-api-key"
40
42
 
41
43
 
42
44
  @dataclass
43
45
  class _TTSOptions:
44
- base_url: str
45
46
  lang_code: TTSLangCodes | str
46
- api_key: str
47
+ encoding: str
47
48
  sample_rate: int
48
- speed: float
49
- voice_id: str | None
49
+ voice_id: str
50
+ speed: float | None
51
+ api_key: str
52
+ base_url: str
53
+ word_tokenizer: tokenize.WordTokenizer
54
+
55
+ def get_http_url(self, path: str) -> str:
56
+ return f"{self.base_url}{path}"
57
+
58
+ def get_ws_url(self, path: str) -> str:
59
+ return f"{self.base_url.replace('http', 'ws', 1)}{path}"
50
60
 
51
61
 
52
62
  class TTS(tts.TTS):
53
63
  def __init__(
54
64
  self,
55
65
  *,
56
- voice_id: str = "8e9c4bc8-3979-48ab-8626-df53befc2090",
57
66
  api_key: str | None = None,
58
67
  lang_code: TTSLangCodes | str = "en",
59
- speed: float = 1.0,
68
+ encoding: str = "pcm_linear",
69
+ voice_id: str = "8e9c4bc8-3979-48ab-8626-df53befc2090",
70
+ speed: float | None = 1.0,
60
71
  sample_rate: int = 22050,
61
72
  http_session: aiohttp.ClientSession | None = None,
62
- base_url: str = API_BASE_URL,
73
+ word_tokenizer: NotGivenOr[tokenize.WordTokenizer] = NOT_GIVEN,
74
+ tokenizer: NotGivenOr[tokenize.SentenceTokenizer] = NOT_GIVEN,
75
+ base_url: str = "https://api.neuphonic.com",
63
76
  ) -> None:
64
77
  """
65
- Create a new instance of the Neuphonic TTS.
78
+ Create a new instance of NeuPhonic TTS.
66
79
 
67
- See https://docs.neuphonic.com for more documentation on all of these options, or go to https://app.neuphonic.com/ to test out different options.
80
+ See https://docs.neuphonic.com for more details on the NeuPhonic API.
68
81
 
69
82
  Args:
70
- voice_id (str, optional): The voice ID for the desired voice. Defaults to None.
71
- lang_code (TTSLanguages | str, optional): The language code for synthesis. Defaults to "en".
72
- encoding (TTSEncodings | str, optional): The audio encoding format. Defaults to "pcm_mulaw".
83
+ lang_code (TTSLangCodes | str, optional): The language code for synthesis. Defaults to "en".
84
+ encoding (str, optional): The audio encoding format. Defaults to "pcm_linear".
85
+ voice_id (str, optional): The voice ID for the desired voice.
73
86
  speed (float, optional): The audio playback speed. Defaults to 1.0.
74
87
  sample_rate (int, optional): The audio sample rate in Hz. Defaults to 22050.
75
- api_key (str | None, optional): The Neuphonic API key. If not provided, it will be read from the NEUPHONIC_API_KEY environment variable.
88
+ api_key (str, optional): The NeuPhonic API key. If not provided, it will be read from the NEUPHONIC_API_KEY environment variable.
76
89
  http_session (aiohttp.ClientSession | None, optional): An existing aiohttp ClientSession to use. If not provided, a new session will be created.
77
- base_url (str, optional): The base URL for the Neuphonic API. Defaults to "api.neuphonic.com".
90
+ word_tokenizer (tokenize.WordTokenizer, optional): The word tokenizer to use. Defaults to tokenize.basic.WordTokenizer().
91
+ tokenizer (tokenize.SentenceTokenizer, optional): The sentence tokenizer to use. Defaults to tokenize.blingfire.SentenceTokenizer().
92
+ base_url (str, optional): The base URL for the NeuPhonic API. Defaults to "https://api.neuphonic.com".
78
93
  """ # noqa: E501
94
+
79
95
  super().__init__(
80
- capabilities=tts.TTSCapabilities(streaming=False),
96
+ capabilities=tts.TTSCapabilities(streaming=True),
81
97
  sample_rate=sample_rate,
82
98
  num_channels=1,
83
99
  )
100
+ neuphonic_api_key = api_key or os.environ.get("NEUPHONIC_API_KEY")
101
+ if not neuphonic_api_key:
102
+ raise ValueError("NEUPHONIC_API_KEY must be set")
84
103
 
85
- api_key = api_key or os.environ.get("NEUPHONIC_API_KEY")
86
- if not api_key:
87
- raise ValueError("API key must be provided or set in NEUPHONIC_API_KEY")
104
+ if not is_given(word_tokenizer):
105
+ word_tokenizer = tokenize.basic.WordTokenizer(ignore_punctuation=False)
88
106
 
89
107
  self._opts = _TTSOptions(
90
- voice_id=voice_id,
91
108
  lang_code=lang_code,
92
- api_key=api_key,
93
- speed=speed,
109
+ encoding=encoding,
94
110
  sample_rate=sample_rate,
111
+ voice_id=voice_id,
112
+ speed=speed,
113
+ api_key=neuphonic_api_key,
95
114
  base_url=base_url,
115
+ word_tokenizer=word_tokenizer,
96
116
  )
97
117
  self._session = http_session
118
+ self._pool = utils.ConnectionPool[aiohttp.ClientWebSocketResponse](
119
+ connect_cb=self._connect_ws,
120
+ close_cb=self._close_ws,
121
+ max_session_duration=300,
122
+ mark_refreshed_on_get=True,
123
+ )
124
+ self._streams = weakref.WeakSet[SynthesizeStream]()
125
+ self._sentence_tokenizer = (
126
+ tokenizer if is_given(tokenizer) else tokenize.blingfire.SentenceTokenizer()
127
+ )
128
+
129
+ async def _connect_ws(self, timeout: float) -> aiohttp.ClientWebSocketResponse:
130
+ session = self._ensure_session()
131
+ url = self._opts.get_ws_url(
132
+ f"/speak/en?api_key={self._opts.api_key}&speed={self._opts.speed}&lang_code={self._opts.lang_code}&sampling_rate={self._opts.sample_rate}&voice_id={self._opts.voice_id}"
133
+ )
134
+
135
+ headers = {API_AUTH_HEADER: self._opts.api_key}
136
+ return await asyncio.wait_for(session.ws_connect(url, headers=headers), timeout)
137
+
138
+ async def _close_ws(self, ws: aiohttp.ClientWebSocketResponse) -> None:
139
+ await ws.close()
98
140
 
99
141
  @property
100
142
  def model(self) -> str:
@@ -110,43 +152,56 @@ class TTS(tts.TTS):
110
152
 
111
153
  return self._session
112
154
 
155
+ def prewarm(self) -> None:
156
+ self._pool.prewarm()
157
+
113
158
  def update_options(
114
159
  self,
115
160
  *,
161
+ lang_code: NotGivenOr[TTSLangCodes | str] = NOT_GIVEN,
116
162
  voice_id: NotGivenOr[str] = NOT_GIVEN,
117
- lang_code: NotGivenOr[TTSLangCodes] = NOT_GIVEN,
118
- speed: NotGivenOr[float] = NOT_GIVEN,
119
- sample_rate: NotGivenOr[int] = NOT_GIVEN,
163
+ speed: NotGivenOr[float | None] = NOT_GIVEN,
120
164
  ) -> None:
121
165
  """
122
166
  Update the Text-to-Speech (TTS) configuration options.
123
167
 
124
- This method allows updating the TTS settings, including model type, voice_id, lang_code,
125
- encoding, speed and sample_rate. If any parameter is not provided, the existing value will be
126
- retained.
168
+ This allows updating the TTS settings, including lang_code, voice_id, and speed.
169
+ If any parameter is not provided, the existing value will be retained.
127
170
 
128
171
  Args:
129
- model (TTSModels | str, optional): The Neuphonic model to use.
172
+ lang_code (TTSLangCodes | str, optional): The language code for synthesis.
130
173
  voice_id (str, optional): The voice ID for the desired voice.
131
- lang_code (TTSLanguages | str, optional): The language code for synthesis..
132
- encoding (TTSEncodings | str, optional): The audio encoding format.
133
174
  speed (float, optional): The audio playback speed.
134
- sample_rate (int, optional): The audio sample rate in Hz.
135
- """ # noqa: E501
136
- if is_given(voice_id):
137
- self._opts.voice_id = voice_id
175
+ """
138
176
  if is_given(lang_code):
139
177
  self._opts.lang_code = lang_code
178
+ if is_given(voice_id):
179
+ self._opts.voice_id = voice_id
140
180
  if is_given(speed):
141
181
  self._opts.speed = speed
142
- if is_given(sample_rate):
143
- self._opts.sample_rate = sample_rate
144
182
 
145
183
  def synthesize(
146
- self, text: str, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
184
+ self,
185
+ text: str,
186
+ *,
187
+ conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
147
188
  ) -> ChunkedStream:
148
189
  return ChunkedStream(tts=self, input_text=text, conn_options=conn_options)
149
190
 
191
+ def stream(
192
+ self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
193
+ ) -> SynthesizeStream:
194
+ stream = SynthesizeStream(tts=self, conn_options=conn_options)
195
+ self._streams.add(stream)
196
+ return stream
197
+
198
+ async def aclose(self) -> None:
199
+ for stream in list(self._streams):
200
+ await stream.aclose()
201
+
202
+ self._streams.clear()
203
+ await self._pool.aclose()
204
+
150
205
 
151
206
  class ChunkedStream(tts.ChunkedStream):
152
207
  """Synthesize chunked text using the SSE endpoint"""
@@ -165,8 +220,8 @@ class ChunkedStream(tts.ChunkedStream):
165
220
  async def _run(self, output_emitter: tts.AudioEmitter) -> None:
166
221
  try:
167
222
  async with self._tts._ensure_session().post(
168
- f"https://{self._opts.base_url}/sse/speak/{self._opts.lang_code}",
169
- headers={AUTHORIZATION_HEADER: self._opts.api_key},
223
+ f"{self._opts.base_url}/sse/speak/{self._opts.lang_code}",
224
+ headers={API_AUTH_HEADER: self._opts.api_key},
170
225
  json={
171
226
  "text": self._input_text,
172
227
  "voice_id": self._opts.voice_id,
@@ -235,7 +290,130 @@ def _parse_sse_message(message: str) -> dict | None:
235
290
 
236
291
  if message_dict.get("errors") is not None:
237
292
  raise Exception(
238
- f"received error status {message_dict['status_code']}: {message_dict['errors']}"
293
+ f"received error status {message_dict['status_code']}:{message_dict['errors']}"
239
294
  )
240
295
 
241
296
  return message_dict
297
+
298
+
299
+ class SynthesizeStream(tts.SynthesizeStream):
300
+ def __init__(self, *, tts: TTS, conn_options: APIConnectOptions):
301
+ super().__init__(tts=tts, conn_options=conn_options)
302
+ self._tts: TTS = tts
303
+ self._opts = replace(tts._opts)
304
+ self._segments_ch = utils.aio.Chan[tokenize.WordStream]()
305
+
306
+ async def _run(self, output_emitter: tts.AudioEmitter) -> None:
307
+ request_id = utils.shortuuid()
308
+ output_emitter.initialize(
309
+ request_id=request_id,
310
+ sample_rate=self._opts.sample_rate,
311
+ num_channels=1,
312
+ mime_type="audio/pcm",
313
+ stream=True,
314
+ )
315
+
316
+ async def _tokenize_input() -> None:
317
+ word_stream = None
318
+ async for input in self._input_ch:
319
+ if isinstance(input, str):
320
+ if word_stream is None:
321
+ word_stream = self._opts.word_tokenizer.stream()
322
+ self._segments_ch.send_nowait(word_stream)
323
+ word_stream.push_text(input)
324
+ elif isinstance(input, self._FlushSentinel):
325
+ if word_stream:
326
+ word_stream.end_input()
327
+ word_stream = None
328
+
329
+ self._segments_ch.close()
330
+
331
+ async def _run_segments() -> None:
332
+ async for word_stream in self._segments_ch:
333
+ await self._run_ws(word_stream, output_emitter)
334
+
335
+ tasks = [
336
+ asyncio.create_task(_tokenize_input()),
337
+ asyncio.create_task(_run_segments()),
338
+ ]
339
+ try:
340
+ await asyncio.gather(*tasks)
341
+ except asyncio.TimeoutError:
342
+ raise APITimeoutError() from None
343
+ except aiohttp.ClientResponseError as e:
344
+ raise APIStatusError(
345
+ message=e.message,
346
+ status_code=e.status,
347
+ request_id=request_id,
348
+ body=None,
349
+ ) from None
350
+ except Exception as e:
351
+ raise APIConnectionError() from e
352
+ finally:
353
+ await utils.aio.gracefully_cancel(*tasks)
354
+
355
+ async def _run_ws(
356
+ self, word_stream: tokenize.WordStream, output_emitter: tts.AudioEmitter
357
+ ) -> None:
358
+ segment_id = utils.shortuuid()
359
+ output_emitter.start_segment(segment_id=segment_id)
360
+
361
+ async def send_task(ws: aiohttp.ClientWebSocketResponse) -> None:
362
+ async for word in word_stream:
363
+ text_msg = {"text": f"{word.token} "}
364
+ self._mark_started()
365
+ await ws.send_str(json.dumps(text_msg))
366
+
367
+ stop_msg = {"text": "<STOP>"}
368
+ await ws.send_str(json.dumps(stop_msg))
369
+
370
+ async def recv_task(ws: aiohttp.ClientWebSocketResponse) -> None:
371
+ while True:
372
+ msg = await ws.receive()
373
+
374
+ if msg.type in (
375
+ aiohttp.WSMsgType.CLOSE,
376
+ aiohttp.WSMsgType.CLOSED,
377
+ aiohttp.WSMsgType.CLOSING,
378
+ ):
379
+ raise APIStatusError("NeuPhonic websocket connection closed unexpectedly")
380
+
381
+ if msg.type == aiohttp.WSMsgType.TEXT:
382
+ try:
383
+ resp = json.loads(msg.data)
384
+ except json.JSONDecodeError:
385
+ logger.warning("Invalid JSON from NeuPhonic")
386
+ continue
387
+
388
+ if resp.get("type") == "error":
389
+ raise APIError(f"NeuPhonic returned error: {resp}")
390
+
391
+ data = resp.get("data", {})
392
+ audio_data = data.get("audio")
393
+ if audio_data and audio_data != "":
394
+ try:
395
+ b64data = base64.b64decode(audio_data)
396
+ if b64data:
397
+ output_emitter.push(b64data)
398
+ except Exception as e:
399
+ logger.warning("Failed to decode NeuPhonic audio data: %s", e)
400
+
401
+ if data.get("stop"):
402
+ output_emitter.end_segment()
403
+ break
404
+
405
+ elif msg.type == aiohttp.WSMsgType.BINARY:
406
+ pass
407
+ else:
408
+ logger.warning("Unexpected NeuPhonic message type: %s", msg.type)
409
+
410
+ async with self._tts._pool.connection(timeout=self._conn_options.timeout) as ws:
411
+ tasks = [
412
+ asyncio.create_task(send_task(ws)),
413
+ asyncio.create_task(recv_task(ws)),
414
+ ]
415
+
416
+ try:
417
+ await asyncio.gather(*tasks)
418
+ finally:
419
+ await utils.aio.gracefully_cancel(*tasks)
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "1.2.15"
15
+ __version__ = "1.2.16"
@@ -1,13 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: livekit-plugins-neuphonic
3
- Version: 1.2.15
3
+ Version: 1.2.16
4
4
  Summary: Neuphonic inference plugin for LiveKit Agents
5
5
  Project-URL: Documentation, https://docs.livekit.io
6
6
  Project-URL: Website, https://livekit.io/
7
7
  Project-URL: Source, https://github.com/livekit/agents
8
8
  Author-email: LiveKit <hello@livekit.io>
9
9
  License-Expression: Apache-2.0
10
- Keywords: audio,livekit,neuphonic,realtime,webrtc
10
+ Keywords: ai,audio,livekit,neuphonic,realtime,video,voice
11
11
  Classifier: Intended Audience :: Developers
12
12
  Classifier: Programming Language :: Python :: 3
13
13
  Classifier: Programming Language :: Python :: 3 :: Only
@@ -16,7 +16,7 @@ Classifier: Programming Language :: Python :: 3.12
16
16
  Classifier: Topic :: Multimedia :: Sound/Audio
17
17
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
18
  Requires-Python: >=3.9.0
19
- Requires-Dist: livekit-agents>=1.2.15
19
+ Requires-Dist: livekit-agents>=1.2.16
20
20
  Description-Content-Type: text/markdown
21
21
 
22
22
  # Neuphonic plugin for LiveKit Agents
@@ -0,0 +1,9 @@
1
+ livekit/plugins/neuphonic/__init__.py,sha256=c2yzK8LhbqZooNlJaX8TKKIjknTrZaEv6CmU9KF6dc4,1235
2
+ livekit/plugins/neuphonic/log.py,sha256=rAHz71IcbvPkixndXBVffPQsmWUKTLqRaYRuPIxO29w,72
3
+ livekit/plugins/neuphonic/models.py,sha256=dn6xtU7qJOI5XvEFupyww8IbCcwt5Ki-yS7ua_v6YxM,96
4
+ livekit/plugins/neuphonic/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ livekit/plugins/neuphonic/tts.py,sha256=Y89hyagvg6Q3Z8_PLSvriSr4bKWh3WwhAUiywMuS6Cc,15234
6
+ livekit/plugins/neuphonic/version.py,sha256=6RxW2Q7KoSNRlDtulIUp5F0_o0atksX-Xpp45NaSCaI,601
7
+ livekit_plugins_neuphonic-1.2.16.dist-info/METADATA,sha256=LoahARleDD10mD8DZBgfHQzicII2oFqHqyuVSrub_fc,1323
8
+ livekit_plugins_neuphonic-1.2.16.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
9
+ livekit_plugins_neuphonic-1.2.16.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- livekit/plugins/neuphonic/__init__.py,sha256=c2yzK8LhbqZooNlJaX8TKKIjknTrZaEv6CmU9KF6dc4,1235
2
- livekit/plugins/neuphonic/log.py,sha256=rAHz71IcbvPkixndXBVffPQsmWUKTLqRaYRuPIxO29w,72
3
- livekit/plugins/neuphonic/models.py,sha256=dn6xtU7qJOI5XvEFupyww8IbCcwt5Ki-yS7ua_v6YxM,96
4
- livekit/plugins/neuphonic/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- livekit/plugins/neuphonic/tts.py,sha256=uoKZL2-VRm5UK4JX5KIJVKdAkGghzkBXZ55YAOrjvfE,8511
6
- livekit/plugins/neuphonic/version.py,sha256=R5FvTAJuFKBJlKNE37WH1vS6st7RUEFAUNaLi-rjprE,601
7
- livekit_plugins_neuphonic-1.2.15.dist-info/METADATA,sha256=7Z7Opz7pL_BOHOsJ_nFL62cErAJEgG98m0rRikJwM2s,1315
8
- livekit_plugins_neuphonic-1.2.15.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
9
- livekit_plugins_neuphonic-1.2.15.dist-info/RECORD,,