PyPI - livekit-plugins-neuphonic - Versions diffs - 1.0.23__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

livekit-plugins-neuphonic 1.0.23py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

livekit/plugins/neuphonic/__init__.py CHANGED Viewed

@@ -28,7 +28,7 @@ from .log import logger
 class NeuphonicPlugin(Plugin):
-    def __init__(self):
+    def __init__(self) -> None:
         super().__init__(__name__, __version__, __package__, logger)

livekit/plugins/neuphonic/models.py CHANGED Viewed

@@ -1,10 +1,3 @@
 from typing import Literal
-TTSEncodings = Literal[
-    "pcm_linear",
-    "pcm_mulaw",
-]
-TTSModels = Literal["neu-fast", "neu-hq"]
 TTSLangCodes = Literal["en", "nl", "es", "de", "hi", "en-hi", "ar"]

livekit/plugins/neuphonic/tts.py CHANGED Viewed

@@ -18,8 +18,7 @@ import asyncio
 import base64
 import json
 import os
-import weakref
-from dataclasses import dataclass
+from dataclasses import dataclass, replace
 import aiohttp
@@ -34,81 +33,31 @@ from livekit.agents import (
 from livekit.agents.types import DEFAULT_API_CONNECT_OPTIONS, NOT_GIVEN, NotGivenOr
 from livekit.agents.utils import is_given
-from .log import logger
-from .models import TTSEncodings, TTSLangCodes, TTSModels
+from .models import TTSLangCodes
 API_BASE_URL = "api.neuphonic.com"
 AUTHORIZATION_HEADER = "X-API-KEY"
-NUM_CHANNELS = 1
 @dataclass
 class _TTSOptions:
     base_url: str
-    api_key: str
-    model: TTSModels | str
     lang_code: TTSLangCodes | str
-    encoding: TTSEncodings | str
-    sampling_rate: int
+    api_key: str
+    sample_rate: int
     speed: float
-    voice_id: NotGivenOr[str] = NOT_GIVEN
-    @property
-    def model_params(self) -> dict:
-        """Returns a dictionary of model parameters for API requests."""
-        params = {
-            "voice_id": self.voice_id,
-            "model": self.model,
-            "lang_code": self.lang_code,
-            "encoding": self.encoding,
-            "sampling_rate": self.sampling_rate,
-            "speed": self.speed,
-        }
-        return {k: v for k, v in params.items() if is_given(v) and v is not None}
-    def get_query_param_string(self):
-        """Forms the query parameter string from all model parameters."""
-        queries = []
-        for key, value in self.model_params.items():
-            queries.append(f"{key}={value}")
-        return "?" + "&".join(queries)
-def _parse_sse_message(message: str) -> dict:
-    """
-    Parse each response from the SSE endpoint.
-    The message will either be a string reading:
-    - `event: error`
-    - `event: message`
-    - `data: { "status_code": 200, "data": {"audio": ... } }`
-    """
-    message = message.strip()
-    if not message or "data" not in message:
-        return None
-    _, value = message.split(": ", 1)
-    message = json.loads(value)
-    if message.get("errors") is not None:
-        raise Exception(f"Status {message.status_code} error received: {message.errors}.")
-    return message
+    voice_id: str | None
 class TTS(tts.TTS):
     def __init__(
         self,
         *,
-        model: TTSModels | str = "neu_hq",
-        voice_id: NotGivenOr[str] = NOT_GIVEN,
+        voice_id: str = "8e9c4bc8-3979-48ab-8626-df53befc2090",
+        api_key: str | None = None,
         lang_code: TTSLangCodes | str = "en",
-        encoding: TTSEncodings | str = "pcm_linear",
         speed: float = 1.0,
         sample_rate: int = 22050,
-        api_key: NotGivenOr[str] = NOT_GIVEN,
         http_session: aiohttp.ClientSession | None = None,
         base_url: str = API_BASE_URL,
     ) -> None:
@@ -118,62 +67,34 @@ class TTS(tts.TTS):
         See https://docs.neuphonic.com for more documentation on all of these options, or go to https://app.neuphonic.com/ to test out different options.
         Args:
-            model (TTSModels | str, optional): The Neuphonic model to use. See Defaults to "neu_hq".
             voice_id (str, optional): The voice ID for the desired voice. Defaults to None.
             lang_code (TTSLanguages | str, optional): The language code for synthesis. Defaults to "en".
             encoding (TTSEncodings | str, optional): The audio encoding format. Defaults to "pcm_mulaw".
             speed (float, optional): The audio playback speed. Defaults to 1.0.
             sample_rate (int, optional): The audio sample rate in Hz. Defaults to 22050.
-            api_key (str | None, optional): The Neuphonic API key. If not provided, it will be read from the NEUPHONIC_API_TOKEN environment variable.
+            api_key (str | None, optional): The Neuphonic API key. If not provided, it will be read from the NEUPHONIC_API_KEY environment variable.
             http_session (aiohttp.ClientSession | None, optional): An existing aiohttp ClientSession to use. If not provided, a new session will be created.
             base_url (str, optional): The base URL for the Neuphonic API. Defaults to "api.neuphonic.com".
         """  # noqa: E501
         super().__init__(
             capabilities=tts.TTSCapabilities(streaming=True),
             sample_rate=sample_rate,
-            num_channels=NUM_CHANNELS,
-        )
-        neuphonic_api_key = (
-            api_key
-            if is_given(api_key)
-            else os.environ.get("NEUPHONIC_API_KEY") or os.environ.get("NEUPHONIC_API_TOKEN")
+            num_channels=1,
         )
-        if not neuphonic_api_key:
+        api_key = api_key or os.environ.get("NEUPHONIC_API_KEY")
+        if not api_key:
             raise ValueError("API key must be provided or set in NEUPHONIC_API_KEY")
         self._opts = _TTSOptions(
-            model=model,
             voice_id=voice_id,
             lang_code=lang_code,
-            encoding=encoding,
+            api_key=api_key,
             speed=speed,
-            sampling_rate=sample_rate,
-            api_key=neuphonic_api_key,
+            sample_rate=sample_rate,
             base_url=base_url,
         )
         self._session = http_session
-        self._pool = utils.ConnectionPool[aiohttp.ClientWebSocketResponse](
-            connect_cb=self._connect_ws,
-            close_cb=self._close_ws,
-            max_session_duration=90,
-            mark_refreshed_on_get=True,
-        )
-        self._streams = weakref.WeakSet[SynthesizeStream]()
-    async def _connect_ws(self) -> aiohttp.ClientWebSocketResponse:
-        session = self._ensure_session()
-        url = f"wss://{self._opts.base_url}/speak/{self._opts.lang_code}{self._opts.get_query_param_string()}"
-        return await asyncio.wait_for(
-            session.ws_connect(url, headers={AUTHORIZATION_HEADER: self._opts.api_key}),
-            self._conn_options.timeout,
-        )
-    async def _close_ws(self, ws: aiohttp.ClientWebSocketResponse):
-        await ws.close()
     def _ensure_session(self) -> aiohttp.ClientSession:
         if not self._session:
@@ -181,16 +102,11 @@ class TTS(tts.TTS):
         return self._session
-    def prewarm(self) -> None:
-        self._pool.prewarm()
     def update_options(
         self,
         *,
-        model: NotGivenOr[TTSModels] = NOT_GIVEN,
         voice_id: NotGivenOr[str] = NOT_GIVEN,
         lang_code: NotGivenOr[TTSLangCodes] = NOT_GIVEN,
-        encoding: NotGivenOr[TTSEncodings] = NOT_GIVEN,
         speed: NotGivenOr[float] = NOT_GIVEN,
         sample_rate: NotGivenOr[int] = NOT_GIVEN,
     ) -> None:
@@ -209,51 +125,19 @@ class TTS(tts.TTS):
             speed (float, optional): The audio playback speed.
             sample_rate (int, optional): The audio sample rate in Hz.
         """  # noqa: E501
-        if is_given(model):
-            self._opts.model = model
         if is_given(voice_id):
             self._opts.voice_id = voice_id
         if is_given(lang_code):
             self._opts.lang_code = lang_code
-        if is_given(encoding):
-            self._opts.encoding = encoding
         if is_given(speed):
             self._opts.speed = speed
         if is_given(sample_rate):
-            self._opts.sampling_rate = sample_rate
-        self._pool.invalidate()
+            self._opts.sample_rate = sample_rate
     def synthesize(
-        self,
-        text: str,
-        *,
-        conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
+        self, text: str, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
     ) -> ChunkedStream:
-        return ChunkedStream(
-            tts=self,
-            input_text=text,
-            conn_options=conn_options,
-            opts=self._opts,
-            session=self._ensure_session(),
-        )
-    def stream(
-        self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
-    ) -> SynthesizeStream:
-        stream = SynthesizeStream(
-            tts=self,
-            pool=self._pool,
-            opts=self._opts,
-        )
-        self._streams.add(stream)
-        return stream
-    async def aclose(self) -> None:
-        for stream in list(self._streams):
-            await stream.aclose()
-        self._streams.clear()
-        await self._pool.aclose()
-        await super().aclose()
+        return ChunkedStream(tts=self, input_text=text, conn_options=conn_options)
 class ChunkedStream(tts.ChunkedStream):
@@ -264,157 +148,86 @@ class ChunkedStream(tts.ChunkedStream):
         *,
         tts: TTS,
         input_text: str,
-        opts: _TTSOptions,
-        session: aiohttp.ClientSession,
         conn_options: APIConnectOptions,
     ) -> None:
         super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
-        self._opts, self._session = opts, session
-    async def _run(self) -> None:
-        request_id = utils.shortuuid()
-        bstream = utils.audio.AudioByteStream(
-            sample_rate=self._opts.sampling_rate, num_channels=NUM_CHANNELS
-        )
-        json_data = {
-            "text": self._input_text,
-            **self._opts.model_params,
-        }
-        headers = {
-            AUTHORIZATION_HEADER: self._opts.api_key,
-        }
+        self._tts: TTS = tts
+        self._opts = replace(tts._opts)
+    async def _run(self, output_emitter: tts.AudioEmitter) -> None:
         try:
-            async with self._session.post(
+            async with self._tts._ensure_session().post(
                 f"https://{self._opts.base_url}/sse/speak/{self._opts.lang_code}",
-                headers=headers,
-                json=json_data,
+                headers={AUTHORIZATION_HEADER: self._opts.api_key},
+                json={
+                    "text": self._input_text,
+                    "voice_id": self._opts.voice_id,
+                    "lang_code": self._opts.lang_code,
+                    "encoding": "pcm_linear",
+                    "sampling_rate": self._opts.sample_rate,
+                    "speed": self._opts.speed,
+                },
                 timeout=aiohttp.ClientTimeout(
                     total=30,
                     sock_connect=self._conn_options.timeout,
                 ),
-                read_bufsize=10
-                * 1024
-                * 1024,  # large read_bufsize to avoid `ValueError: Chunk too big`
-            ) as response:
-                response.raise_for_status()
-                emitter = tts.SynthesizedAudioEmitter(
-                    event_ch=self._event_ch,
-                    request_id=request_id,
+                # large read_bufsize to avoid `ValueError: Chunk too big`
+                read_bufsize=10 * 1024 * 1024,
+            ) as resp:
+                resp.raise_for_status()
+                output_emitter.initialize(
+                    request_id=utils.shortuuid(),
+                    sample_rate=self._opts.sample_rate,
+                    num_channels=1,
+                    mime_type="audio/pcm",
                 )
-                async for line in response.content:
-                    message = line.decode("utf-8").strip()
-                    if message:
-                        parsed_message = _parse_sse_message(message)
-                        if (
-                            parsed_message is not None
-                            and parsed_message.get("data", {}).get("audio") is not None
-                        ):
-                            audio_bytes = base64.b64decode(parsed_message["data"]["audio"])
-                            for frame in bstream.write(audio_bytes):
-                                emitter.push(frame)
-                for frame in bstream.flush():
-                    emitter.push(frame)
-                emitter.flush()
-        except asyncio.TimeoutError as e:
-            raise APITimeoutError() from e
+                async for line in resp.content:
+                    message = line.decode("utf-8")
+                    if not message:
+                        continue
+                    parsed_message = _parse_sse_message(message)
+                    if (
+                        parsed_message is not None
+                        and parsed_message.get("data", {}).get("audio") is not None
+                    ):
+                        audio_bytes = base64.b64decode(parsed_message["data"]["audio"])
+                        output_emitter.push(audio_bytes)
+                output_emitter.flush()
+        except asyncio.TimeoutError:
+            raise APITimeoutError() from None
         except aiohttp.ClientResponseError as e:
             raise APIStatusError(
-                message=e.message,
-                status_code=e.status,
-                request_id=None,
-                body=None,
-            ) from e
+                message=e.message, status_code=e.status, request_id=None, body=None
+            ) from None
         except Exception as e:
             raise APIConnectionError() from e
-class SynthesizeStream(tts.SynthesizeStream):
-    def __init__(
-        self,
-        *,
-        tts: TTS,
-        opts: _TTSOptions,
-        pool: utils.ConnectionPool[aiohttp.ClientWebSocketResponse],
-    ):
-        super().__init__(tts=tts)
-        self._opts, self._pool = opts, pool
-    async def _run(self) -> None:
-        request_id = utils.shortuuid()
-        async def _send_task(ws: aiohttp.ClientWebSocketResponse):
-            """Stream text to the websocket."""
-            async for data in self._input_ch:
-                self._mark_started()
-                if isinstance(data, self._FlushSentinel):
-                    await ws.send_str(json.dumps({"text": "<STOP>"}))
-                    continue
-                await ws.send_str(json.dumps({"text": data}))
-        async def _recv_task(ws: aiohttp.ClientWebSocketResponse):
-            audio_bstream = utils.audio.AudioByteStream(
-                sample_rate=self._opts.sampling_rate,
-                num_channels=NUM_CHANNELS,
-            )
-            emitter = tts.SynthesizedAudioEmitter(
-                event_ch=self._event_ch,
-                request_id=request_id,
-            )
-            while True:
-                try:
-                    msg = await ws.receive()
-                except Exception as e:
-                    raise APIStatusError(
-                        "Neuphonic connection closed unexpectedly",
-                        request_id=request_id,
-                    ) from e
-                if msg.type in (
-                    aiohttp.WSMsgType.CLOSED,
-                    aiohttp.WSMsgType.CLOSE,
-                    aiohttp.WSMsgType.CLOSING,
-                ):
-                    raise APIStatusError(
-                        "Neuphonic connection closed unexpectedly",
-                        request_id=request_id,
-                    )
-                if msg.type != aiohttp.WSMsgType.TEXT:
-                    logger.warning("Unexpected Neuphonic message type %s", msg.type)
-                    continue
-                data = json.loads(msg.data)
-                if data.get("data"):
-                    b64data = base64.b64decode(data["data"]["audio"])
-                    for frame in audio_bstream.write(b64data):
-                        emitter.push(frame)
-                    if data["data"].get("stop"):  # A bool flag, is True when audio reaches "<STOP>"
-                        for frame in audio_bstream.flush():
-                            emitter.push(frame)
-                        emitter.flush()
-                        break  # we are not going to receive any more audio
-                else:
-                    logger.error("Unexpected Neuphonic message %s", data)
-        async with self._pool.connection() as ws:
-            tasks = [
-                asyncio.create_task(_send_task(ws)),
-                asyncio.create_task(_recv_task(ws)),
-            ]
-            try:
-                await asyncio.gather(*tasks)
-            finally:
-                await utils.aio.gracefully_cancel(*tasks)
+def _parse_sse_message(message: str) -> dict | None:
+    """
+    Parse each response from the SSE endpoint.
+    The message will either be a string reading:
+    - `event: error`
+    - `event: message`
+    - `data: { "status_code": 200, "data": {"audio": ... } }`
+    """
+    message = message.strip()
+    if not message or "data" not in message:
+        return None
+    _, value = message.split(": ", 1)
+    message_dict: dict = json.loads(value)
+    if message_dict.get("errors") is not None:
+        raise Exception(
+            f"received error status {message_dict['status_code']}: {message_dict['errors']}"
+        )
+    return message_dict

livekit/plugins/neuphonic/version.py CHANGED Viewed

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "1.0.23"
+__version__ = "1.1.0"

{livekit_plugins_neuphonic-1.0.23.dist-info → livekit_plugins_neuphonic-1.1.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: livekit-plugins-neuphonic
-Version: 1.0.23
+Version: 1.1.0
 Summary: Neuphonic inference plugin for LiveKit Agents
 Project-URL: Documentation, https://docs.livekit.io
 Project-URL: Website, https://livekit.io/
@@ -16,7 +16,7 @@ Classifier: Programming Language :: Python :: 3.12
 Classifier: Topic :: Multimedia :: Sound/Audio
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: >=3.9.0
-Requires-Dist: livekit-agents>=1.0.23
+Requires-Dist: livekit-agents>=1.1.0
 Description-Content-Type: text/markdown
 # Neuphonic plugin for LiveKit Agents

livekit_plugins_neuphonic-1.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+livekit/plugins/neuphonic/__init__.py,sha256=c2yzK8LhbqZooNlJaX8TKKIjknTrZaEv6CmU9KF6dc4,1235
+livekit/plugins/neuphonic/log.py,sha256=rAHz71IcbvPkixndXBVffPQsmWUKTLqRaYRuPIxO29w,72
+livekit/plugins/neuphonic/models.py,sha256=dn6xtU7qJOI5XvEFupyww8IbCcwt5Ki-yS7ua_v6YxM,96
+livekit/plugins/neuphonic/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+livekit/plugins/neuphonic/tts.py,sha256=q-rV5SjadtFRNr19lLCyeb4tZvAdLoUy44tz2xSv1jQ,8370
+livekit/plugins/neuphonic/version.py,sha256=7SjyflIFTjH0djSotKGIRoRykPCqMpVYetIlvHMFuh0,600
+livekit_plugins_neuphonic-1.1.0.dist-info/METADATA,sha256=9GFF2OOfCGccJAjWvZ32MBd_QE0tABpzBDklbGZEx00,1313
+livekit_plugins_neuphonic-1.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+livekit_plugins_neuphonic-1.1.0.dist-info/RECORD,,

livekit_plugins_neuphonic-1.0.23.dist-info/RECORD DELETED Viewed

@@ -1,9 +0,0 @@
-livekit/plugins/neuphonic/__init__.py,sha256=QtlNAqYqv0gIHcVddh5INfdC1bnGkXErplWzhThtN3Q,1227
-livekit/plugins/neuphonic/log.py,sha256=rAHz71IcbvPkixndXBVffPQsmWUKTLqRaYRuPIxO29w,72
-livekit/plugins/neuphonic/models.py,sha256=Svfn_sWA3Q2ZXsPBXY-K5hslq5FE62hvyXBES2C3aSc,201
-livekit/plugins/neuphonic/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-livekit/plugins/neuphonic/tts.py,sha256=OIi6kqAJqqbczW1etEfy2-c244RqK2agm9kKalUdO28,14577
-livekit/plugins/neuphonic/version.py,sha256=BRUqwxRBnPVqEcIODJdaZHGAanu4zkwM4NsAQjNtUEM,601
-livekit_plugins_neuphonic-1.0.23.dist-info/METADATA,sha256=mcvg57hZGjsqiVWQJzwEQ4br0cLWUF-b0vrWB8pw-xo,1315
-livekit_plugins_neuphonic-1.0.23.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-livekit_plugins_neuphonic-1.0.23.dist-info/RECORD,,

{livekit_plugins_neuphonic-1.0.23.dist-info → livekit_plugins_neuphonic-1.1.0.dist-info}/WHEEL RENAMED Viewed

File without changes

livekit-plugins-neuphonic 1.0.23__py3-none-any.whl → 1.1.0__py3-none-any.whl

livekit-plugins-neuphonic 1.0.23py3-none-any.whl → 1.1.0py3-none-any.whl