PyPI - livekit-plugins-resemble - Versions diffs - 1.0.22__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

livekit-plugins-resemble 1.0.22py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of livekit-plugins-resemble might be problematic. Click here for more details.

Files changed (6) hide show

livekit/plugins/resemble/tts.py CHANGED Viewed

@@ -19,13 +19,14 @@ import base64
 import json
 import os
 import weakref
-from dataclasses import dataclass
+from dataclasses import dataclass, replace
 import aiohttp
 from livekit.agents import (
     APIConnectionError,
     APIConnectOptions,
+    APIError,
     APIStatusError,
     APITimeoutError,
     tokenize,
@@ -38,7 +39,6 @@ from .log import logger
 RESEMBLE_WEBSOCKET_URL = "wss://websocket.cluster.resemble.ai/stream"
 RESEMBLE_REST_API_URL = "https://f.cluster.resemble.ai/synthesize"
-NUM_CHANNELS = 1
 DEFAULT_VOICE_UUID = "55592656"
 BUFFERED_WORDS_COUNT = 3
@@ -77,7 +77,7 @@ class TTS(tts.TTS):
         super().__init__(
             capabilities=tts.TTSCapabilities(streaming=use_streaming),
             sample_rate=sample_rate,
-            num_channels=NUM_CHANNELS,
+            num_channels=1,
         )
         api_key = api_key or os.environ.get("RESEMBLE_API_KEY")
@@ -107,18 +107,16 @@ class TTS(tts.TTS):
             close_cb=self._close_ws,
         )
-    async def _connect_ws(self) -> aiohttp.ClientWebSocketResponse:
-        session = self._ensure_session()
+    async def _connect_ws(self, timeout: float) -> aiohttp.ClientWebSocketResponse:
         return await asyncio.wait_for(
-            session.ws_connect(
+            self._ensure_session().ws_connect(
                 RESEMBLE_WEBSOCKET_URL,
                 headers={"Authorization": f"Bearer {self._api_key}"},
             ),
-            self._conn_options.timeout,
+            timeout,
         )
-    async def _close_ws(self, ws: aiohttp.ClientWebSocketResponse):
+    async def _close_ws(self, ws: aiohttp.ClientWebSocketResponse) -> None:
         await ws.close()
     def _ensure_session(self) -> aiohttp.ClientSession:
@@ -134,153 +132,95 @@ class TTS(tts.TTS):
         self,
         *,
         voice_uuid: str | None = None,
-        sample_rate: int | None = None,
     ) -> None:
         """
         Update the Text-to-Speech (TTS) configuration options.
         Args:
             voice_uuid (str, optional): The voice UUID for the desired voice.
-            sample_rate (int, optional): The audio sample rate in Hz.
         """  # noqa: E501
         self._opts.voice_uuid = voice_uuid or self._opts.voice_uuid
-        self._opts.sample_rate = sample_rate or self._opts.sample_rate
     def synthesize(
-        self,
-        text: str,
-        *,
-        conn_options: APIConnectOptions | None = None,
+        self, text: str, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
     ) -> ChunkedStream:
-        return ChunkedStream(
-            tts=self,
-            input_text=text,
-            conn_options=conn_options or DEFAULT_API_CONNECT_OPTIONS,
-            opts=self._opts,
-            api_key=self._api_key,
-            session=self._ensure_session(),
-        )
+        return ChunkedStream(tts=self, input_text=text, conn_options=conn_options)
-    def stream(self, *, conn_options: APIConnectOptions | None = None) -> SynthesizeStream:
-        stream = SynthesizeStream(
-            tts=self,
-            pool=self._pool,
-            opts=self._opts,
-            api_key=self._api_key,
-        )
+    def stream(
+        self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
+    ) -> SynthesizeStream:
+        stream = SynthesizeStream(tts=self, conn_options=conn_options)
         self._streams.add(stream)
         return stream
     async def aclose(self) -> None:
         for stream in list(self._streams):
             await stream.aclose()
         self._streams.clear()
         await self._pool.aclose()
-        await super().aclose()
 class ChunkedStream(tts.ChunkedStream):
     """Synthesize text into speech in one go using Resemble AI's REST API."""
-    def __init__(
-        self,
-        *,
-        tts: TTS,
-        input_text: str,
-        opts: _TTSOptions,
-        conn_options: APIConnectOptions,
-        api_key: str,
-        session: aiohttp.ClientSession,
-    ) -> None:
+    def __init__(self, *, tts: TTS, input_text: str, conn_options: APIConnectOptions) -> None:
         super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
-        self._opts, self._session, self._api_key = opts, session, api_key
-    async def _run(self) -> None:
-        request_id = utils.shortuuid()
-        # Create request headers
-        headers = {
-            "Authorization": f"Bearer {self._api_key}",
-            "Content-Type": "application/json",
-            "Accept": "application/json",  # Expect JSON response
-        }
-        # Create request payload
-        payload = {
-            "voice_uuid": self._opts.voice_uuid,
-            "data": self._input_text,
-            "sample_rate": self._opts.sample_rate,
-            "precision": "PCM_16",
-        }
-        decoder = utils.codecs.AudioStreamDecoder(
-            sample_rate=self._opts.sample_rate,
-            num_channels=NUM_CHANNELS,
-        )
+        self._tts: TTS = tts
+        self._opts = replace(tts._opts)
+    async def _run(self, output_emitter: tts.AudioEmitter) -> None:
         try:
-            async with self._session.post(
+            async with self._tts._ensure_session().post(
                 RESEMBLE_REST_API_URL,
-                headers=headers,
-                json=payload,
+                headers={
+                    "Authorization": f"Bearer {self._tts._api_key}",
+                    "Content-Type": "application/json",
+                    "Accept": "application/json",
+                },
+                json={
+                    "voice_uuid": self._opts.voice_uuid,
+                    "data": self._input_text,
+                    "sample_rate": self._opts.sample_rate,
+                    "precision": "PCM_16",
+                },
                 timeout=aiohttp.ClientTimeout(
                     total=30,
                     sock_connect=self._conn_options.timeout,
                 ),
-            ) as response:
-                response.raise_for_status()
-                response_json = await response.json()
+            ) as resp:
+                resp.raise_for_status()
+                response_json = await resp.json()
-                # Check for success
                 if not response_json.get("success", False):
                     issues = response_json.get("issues", ["Unknown error"])
                     error_msg = "; ".join(issues)
-                    raise APIStatusError(
+                    raise APIError(
                         message=f"Resemble API returned failure: {error_msg}",
-                        status_code=response.status,
-                        request_id=request_id,
                         body=json.dumps(response_json),
                     )
-                # Extract base64-encoded audio content
-                audio_content_b64 = response_json.get("audio_content")
-                if not audio_content_b64:
-                    raise APIStatusError(
-                        message="No audio content in response",
-                        status_code=response.status,
-                        request_id=request_id,
-                        body=json.dumps(response_json),
-                    )
+                output_emitter.initialize(
+                    request_id=utils.shortuuid(),
+                    sample_rate=self._opts.sample_rate,
+                    num_channels=1,
+                    mime_type="audio/wav",
+                )
-                # Decode base64 to get raw audio bytes
-                audio_bytes = base64.b64decode(audio_content_b64)
-                decoder.push(audio_bytes)
-                decoder.end_input()
+                audio_b64 = response_json["audio_content"]
+                audio_bytes = base64.b64decode(audio_b64)
-                emitter = tts.SynthesizedAudioEmitter(
-                    event_ch=self._event_ch,
-                    request_id=request_id,
-                )
-                async for frame in decoder:
-                    emitter.push(frame)
-                emitter.flush()
+                output_emitter.push(audio_bytes)
+                output_emitter.flush()
+        except asyncio.TimeoutError:
+            raise APITimeoutError() from None
         except aiohttp.ClientResponseError as e:
             raise APIStatusError(
-                message=e.message,
-                status_code=e.status,
-                request_id=request_id,
-                body=f"resemble api error: {str(e)}",
-            ) from e
-        except asyncio.TimeoutError as e:
-            raise APITimeoutError() from e
-        except aiohttp.ClientError as e:
-            raise APIConnectionError(
-                message=f"Resemble API connection error: {str(e)}",
-            ) from e
+                message=e.message, status_code=e.status, request_id=None, body=None
+            ) from None
         except Exception as e:
-            raise APIConnectionError(f"Error during synthesis: {str(e)}") from e
-        finally:
-            await decoder.aclose()
+            raise APIConnectionError() from e
 class SynthesizeStream(tts.SynthesizeStream):
@@ -291,44 +231,45 @@ class SynthesizeStream(tts.SynthesizeStream):
     synthesis. Note that this requires a Business plan subscription with Resemble AI.
     """
-    def __init__(
-        self,
-        *,
-        tts: TTS,
-        opts: _TTSOptions,
-        pool: utils.ConnectionPool[aiohttp.ClientWebSocketResponse],
-        api_key: str,
-    ):
-        super().__init__(tts=tts)
-        self._opts, self._pool, self._api_key = opts, pool, api_key
-    async def _run(self) -> None:
-        request_id = utils.shortuuid()
+    def __init__(self, *, tts: TTS, conn_options: APIConnectOptions):
+        super().__init__(tts=tts, conn_options=conn_options)
+        self._tts: TTS = tts
+        self._opts = replace(tts._opts)
         self._segments_ch = utils.aio.Chan[tokenize.SentenceStream]()
-        @utils.log_exceptions(logger=logger)
-        async def _tokenize_input():
+    async def _run(self, output_emitter: tts.AudioEmitter) -> None:
+        request_id = utils.shortuuid()
+        output_emitter.initialize(
+            request_id=request_id,
+            sample_rate=self._opts.sample_rate,
+            num_channels=1,
+            stream=True,
+            mime_type="audio/mp3",
+        )
+        async def _tokenize_input() -> None:
             """tokenize text from the input_ch to words"""
             input_stream = None
-            async for input in self._input_ch:
-                if isinstance(input, str):
+            async for text in self._input_ch:
+                if isinstance(text, str):
                     if input_stream is None:
                         # new segment (after flush for e.g)
                         input_stream = self._opts.tokenizer.stream()
                         self._segments_ch.send_nowait(input_stream)
-                    input_stream.push_text(input)
-                elif isinstance(input, self._FlushSentinel):
+                    input_stream.push_text(text)
+                elif isinstance(text, self._FlushSentinel):
                     if input_stream is not None:
                         input_stream.end_input()
                     input_stream = None
             if input_stream is not None:
                 input_stream.end_input()
             self._segments_ch.close()
-        @utils.log_exceptions(logger=logger)
-        async def _process_segments():
+        async def _process_segments() -> None:
             async for input_stream in self._segments_ch:
-                await self._run_ws(input_stream)
+                await self._run_ws(input_stream, output_emitter)
         tasks = [
             asyncio.create_task(_tokenize_input()),
@@ -336,117 +277,77 @@ class SynthesizeStream(tts.SynthesizeStream):
         ]
         try:
             await asyncio.gather(*tasks)
-        except asyncio.TimeoutError as e:
-            raise APITimeoutError() from e
+        except asyncio.TimeoutError:
+            raise APITimeoutError() from None
         except aiohttp.ClientResponseError as e:
             raise APIStatusError(
-                message=e.message,
-                status_code=e.status,
-                request_id=request_id,
-                body=None,
-            ) from e
+                message=e.message, status_code=e.status, request_id=request_id, body=None
+            ) from None
         except Exception as e:
             raise APIConnectionError() from e
         finally:
             await utils.aio.gracefully_cancel(*tasks)
     async def _run_ws(
-        self,
-        input_stream: tokenize.SentenceStream,
+        self, input_stream: tokenize.SentenceStream, output_emitter: tts.AudioEmitter
     ) -> None:
-        async with self._pool.connection() as ws:
-            segment_id = utils.shortuuid()
-            decoder = utils.codecs.AudioStreamDecoder(
-                sample_rate=self._opts.sample_rate,
-                num_channels=NUM_CHANNELS,
-            )
-            index_lock = asyncio.Lock()
-            current_index = 0
-            pending_requests = set()
-            @utils.log_exceptions(logger=logger)
-            async def _send_task(ws: aiohttp.ClientWebSocketResponse):
-                nonlocal current_index
-                index = 0
-                async for data in input_stream:
-                    payload = {
-                        "voice_uuid": self._opts.voice_uuid,
-                        "data": data.token,
-                        "request_id": index,
-                        "sample_rate": self._opts.sample_rate,
-                        "precision": "PCM_16",
-                        "output_format": "mp3",
-                    }
-                    async with index_lock:
-                        pending_requests.add(index)
-                    index += 1
-                    current_index = index
-                    await ws.send_str(json.dumps(payload))
-            @utils.log_exceptions(logger=logger)
-            async def _emit_task():
-                emitter = tts.SynthesizedAudioEmitter(
-                    event_ch=self._event_ch,
-                    request_id=str(current_index),
-                    segment_id=segment_id,
-                )
-                async for frame in decoder:
-                    emitter.push(frame)
-                emitter.flush()
-            @utils.log_exceptions(logger=logger)
-            async def _recv_task(ws: aiohttp.ClientWebSocketResponse):
-                while True:
-                    msg = await ws.receive()
-                    if msg.type in (
-                        aiohttp.WSMsgType.CLOSED,
-                        aiohttp.WSMsgType.CLOSE,
-                        aiohttp.WSMsgType.CLOSING,
-                    ):
-                        raise APIStatusError(
-                            "Resemble connection closed unexpectedly",
-                            request_id=str(current_index),
-                        )
-                    if msg.type != aiohttp.WSMsgType.TEXT:
-                        logger.warning("Unexpected Resemble message type %s", msg.type)
-                        continue
-                    data = json.loads(msg.data)
-                    if data.get("type") == "audio":
-                        if data.get("audio_content", None):
-                            b64data = base64.b64decode(data["audio_content"])
-                            decoder.push(b64data)
-                    elif data.get("type") == "audio_end":
-                        async with index_lock:
-                            index = data["request_id"]
-                            pending_requests.remove(index)
-                            if not pending_requests:
-                                decoder.end_input()
-                                break  # we are not going to receive any more audio
-                    else:
-                        logger.error("Unexpected Resemble message %s", data)
+        segment_id = utils.shortuuid()
+        output_emitter.start_segment(segment_id=segment_id)
+        last_index = 0
+        input_ended = False
+        async def _send_task(ws: aiohttp.ClientWebSocketResponse) -> None:
+            nonlocal input_ended, last_index
+            async for data in input_stream:
+                last_index += 1
+                payload = {
+                    "voice_uuid": self._opts.voice_uuid,
+                    "data": data.token,
+                    "request_id": last_index,
+                    "sample_rate": self._opts.sample_rate,
+                    "precision": "PCM_16",
+                    "output_format": "mp3",
+                }
+                self._mark_started()
+                await ws.send_str(json.dumps(payload))
+            input_ended = True
+        async def _recv_task(ws: aiohttp.ClientWebSocketResponse) -> None:
+            while True:
+                msg = await ws.receive()
+                if msg.type in (
+                    aiohttp.WSMsgType.CLOSED,
+                    aiohttp.WSMsgType.CLOSE,
+                    aiohttp.WSMsgType.CLOSING,
+                ):
+                    raise APIStatusError("Resemble connection closed unexpectedly")
+                if msg.type != aiohttp.WSMsgType.TEXT:
+                    logger.warning("Unexpected Resemble message type %s", msg.type)
+                    continue
+                data = json.loads(msg.data)
+                if data.get("type") == "audio":
+                    if data.get("audio_content", None):
+                        b64data = base64.b64decode(data["audio_content"])
+                        output_emitter.push(b64data)
+                elif data.get("type") == "audio_end":
+                    index = data["request_id"]
+                    if index == last_index and input_ended:
+                        output_emitter.end_segment()
+                        break
+                else:
+                    logger.error("Unexpected Resemble message %s", data)
+        async with self._tts._pool.connection(timeout=self._conn_options.timeout) as ws:
             tasks = [
                 asyncio.create_task(_send_task(ws)),
                 asyncio.create_task(_recv_task(ws)),
-                asyncio.create_task(_emit_task()),
             ]
             try:
                 await asyncio.gather(*tasks)
-            except asyncio.TimeoutError as e:
-                raise APITimeoutError() from e
-            except aiohttp.ClientResponseError as e:
-                raise APIStatusError(
-                    message=e.message,
-                    status_code=e.status,
-                    request_id=str(current_index),
-                    body=None,
-                ) from e
-            except Exception as e:
-                raise APIConnectionError() from e
             finally:
                 await utils.aio.gracefully_cancel(*tasks)

livekit/plugins/resemble/version.py CHANGED Viewed

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "1.0.22"
+__version__ = "1.1.0"

{livekit_plugins_resemble-1.0.22.dist-info → livekit_plugins_resemble-1.1.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: livekit-plugins-resemble
-Version: 1.0.22
+Version: 1.1.0
 Summary: LiveKit Agents Plugin for Resemble AI
 Project-URL: Documentation, https://docs.livekit.io
 Project-URL: Website, https://livekit.io/
@@ -18,7 +18,7 @@ Classifier: Topic :: Multimedia :: Sound/Audio
 Classifier: Topic :: Multimedia :: Video
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: >=3.9.0
-Requires-Dist: livekit-agents>=1.0.22
+Requires-Dist: livekit-agents>=1.1.0
 Description-Content-Type: text/markdown
 # Resemble plugin for LiveKit Agents

livekit_plugins_resemble-1.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+livekit/plugins/resemble/__init__.py,sha256=bu6APAMiEaeKgD-hfMgoSv3EeH8_OQE5aJ_qPRv6pZ8,1236
+livekit/plugins/resemble/log.py,sha256=Pgg3yqt4OUcjrnnF8SKfH7G-Dk7jFI0yIhDa5hjTW5k,71
+livekit/plugins/resemble/models.py,sha256=nK29wOCKkS29KjbiDaTpb7mlmUQSad9U_0bTD8yRcwk,74
+livekit/plugins/resemble/py.typed,sha256=ajz1GSNU9xYVrFEDSz6Xwg7amWQ_yvW75tQa1ZvRIWc,3
+livekit/plugins/resemble/tts.py,sha256=FWxE6pPRgHVi_mDQgBA9qaoMwsX2lLK2fJqwPemdzGc,12838
+livekit/plugins/resemble/version.py,sha256=7SjyflIFTjH0djSotKGIRoRykPCqMpVYetIlvHMFuh0,600
+livekit_plugins_resemble-1.1.0.dist-info/METADATA,sha256=5kKSonWfHWA5jPc7lMo-d-a1uMeIrSpQbjCxbPuSWNo,4935
+livekit_plugins_resemble-1.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+livekit_plugins_resemble-1.1.0.dist-info/RECORD,,

livekit_plugins_resemble-1.0.22.dist-info/RECORD DELETED Viewed

@@ -1,9 +0,0 @@
-livekit/plugins/resemble/__init__.py,sha256=bu6APAMiEaeKgD-hfMgoSv3EeH8_OQE5aJ_qPRv6pZ8,1236
-livekit/plugins/resemble/log.py,sha256=Pgg3yqt4OUcjrnnF8SKfH7G-Dk7jFI0yIhDa5hjTW5k,71
-livekit/plugins/resemble/models.py,sha256=nK29wOCKkS29KjbiDaTpb7mlmUQSad9U_0bTD8yRcwk,74
-livekit/plugins/resemble/py.typed,sha256=ajz1GSNU9xYVrFEDSz6Xwg7amWQ_yvW75tQa1ZvRIWc,3
-livekit/plugins/resemble/tts.py,sha256=N8T0NrYh_nW77DN9yzI4OiNmSxzY7h9fISD2xHYfI8A,16169
-livekit/plugins/resemble/version.py,sha256=-8dkOE2vDSF9WN8VoBrSwU2sb5YBGFuwPnSQXQ-uaYM,601
-livekit_plugins_resemble-1.0.22.dist-info/METADATA,sha256=TFC2j4BIUqNZQZplxGMZF0mM4nc4fyFYYkJG2Ua0w6M,4937
-livekit_plugins_resemble-1.0.22.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-livekit_plugins_resemble-1.0.22.dist-info/RECORD,,

{livekit_plugins_resemble-1.0.22.dist-info → livekit_plugins_resemble-1.1.0.dist-info}/WHEEL RENAMED Viewed

File without changes

livekit-plugins-resemble 1.0.22__py3-none-any.whl → 1.1.0__py3-none-any.whl

Potentially problematic release.

livekit-plugins-resemble 1.0.22py3-none-any.whl → 1.1.0py3-none-any.whl