PyPI - pipecat-respeecher - Versions diffs - 0.1.0__py3-none-any.whl - Mend

pipecat-respeecher 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

pipecat_respeecher/__init__.py +7 -0
pipecat_respeecher/tts.py +343 -0
pipecat_respeecher-0.1.0.dist-info/METADATA +110 -0
pipecat_respeecher-0.1.0.dist-info/RECORD +7 -0
pipecat_respeecher-0.1.0.dist-info/WHEEL +5 -0
pipecat_respeecher-0.1.0.dist-info/licenses/LICENSE +25 -0
pipecat_respeecher-0.1.0.dist-info/top_level.txt +1 -0

pipecat_respeecher/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""Respeecher real-time text-to-speech integration for Pipecat."""
+from pipecat_respeecher.tts import RespeecherTTSService
+__all__ = [
+    "RespeecherTTSService",
+]

pipecat_respeecher/tts.py ADDED Viewed

@@ -0,0 +1,343 @@
+#
+# Copyright (c) 2025, Daily
+# Copyright (c) 2025, Respeecher
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+"""Respeecher real-time text-to-speech service implementation."""
+import base64
+import json
+import uuid
+from typing import AsyncGenerator, Optional
+from loguru import logger
+from pydantic import BaseModel, TypeAdapter, ValidationError
+from pipecat.frames.frames import (
+    CancelFrame,
+    EndFrame,
+    ErrorFrame,
+    Frame,
+    LLMFullResponseEndFrame,
+    StartFrame,
+    StartInterruptionFrame,
+    TTSAudioRawFrame,
+    TTSStartedFrame,
+    TTSStoppedFrame,
+)
+from pipecat.processors.frame_processor import FrameDirection
+from pipecat.services.tts_service import AudioContextTTSService, TTSService
+from pipecat.utils.tracing.service_decorators import traced_tts
+from respeecher.tts import (
+    ContextfulGenerationRequestParams,
+    StreamingOutputFormatParams,
+)
+from respeecher.tts import Response as TTSResponse
+from respeecher.voices import (
+    SamplingParamsParams as SamplingParams,  # TypedDict instead of a Pydantic model
+)
+from websockets.asyncio.client import connect as websocket_connect
+from websockets.protocol import State
+class RespeecherTTSService(AudioContextTTSService, TTSService):
+    """Respeecher real-time TTS service with WebSocket streaming and audio contexts.
+    Provides text-to-speech using Respeecher's streaming WebSocket API.
+    Supports audio context management and voice customization via sampling parameters.
+    """
+    class InputParams(BaseModel):
+        """Input parameters for Respeecher TTS configuration.
+        Parameters:
+            sampling_params: Sampling parameters used for speech synthesis.
+        """
+        sampling_params: SamplingParams = {}
+    def __init__(
+        self,
+        *,
+        api_key: str,
+        voice_id: str,
+        model: str = "public/tts/en-rt",
+        url: str = "wss://api.respeecher.com/v1",
+        sample_rate: Optional[int] = None,
+        params: Optional[InputParams] = None,
+        **kwargs,
+    ):
+        """Initialize the Respeecher TTS service.
+        Args:
+            api_key: Respeecher API key for authentication.
+            voice_id: ID of the voice to use for synthesis.
+            model: Model path for the Respeecher TTS API.
+            url: WebSocket base URL for Respeecher TTS API.
+            sample_rate: Audio sample rate. If None, uses default.
+            params: Additional input parameters for voice customization.
+            **kwargs: Additional arguments passed to TTSService.
+        """
+        AudioContextTTSService.__init__(self, reconnect_on_error=False)
+        TTSService.__init__(
+            self,
+            pause_frame_processing=True,
+            aggregate_sentences=False,
+            sample_rate=sample_rate,
+            **kwargs,
+        )
+        params = params or RespeecherTTSService.InputParams()
+        self._api_key = api_key
+        self._url = url
+        self._output_format: StreamingOutputFormatParams = {
+            "encoding": "pcm_s16le",
+            "sample_rate": sample_rate or 0,
+        }
+        self._settings = {"sampling_params": params.sampling_params}
+        self.set_model_name(model)
+        self.set_voice(voice_id)
+        self._context_id: str | None = None
+        self._receive_task = None
+    def can_generate_metrics(self) -> bool:
+        """Check if this service can generate processing metrics.
+        Returns:
+            True
+        """
+        return True
+    async def set_model(self, model: str):
+        """Set the TTS model.
+        Args:
+            model: The model name to use for synthesis.
+        """
+        self._model_id = model
+        await super().set_model(model)
+        logger.info(f"Switching TTS model to: [{model}]")
+        await self._disconnect()
+        await self._connect()
+    def _build_request(self, text: Optional[str] = None):
+        assert self._context_id is not None
+        request: ContextfulGenerationRequestParams = {
+            "transcript": text or "",
+            "continue": text is not None,
+            "context_id": self._context_id,
+            "voice": {
+                "id": self._voice_id,
+                "sampling_params": self._settings["sampling_params"],
+            },
+            "output_format": self._output_format,
+        }
+        return json.dumps(request)
+    async def start(self, frame: StartFrame):
+        """Start the Respeecher TTS service.
+        Args:
+            frame: The start frame containing initialization parameters.
+        """
+        await super().start(frame)
+        self._output_format["sample_rate"] = self.sample_rate
+        await self._connect()
+    async def stop(self, frame: EndFrame):
+        """Stop the Respeecher TTS service.
+        Args:
+            frame: The end frame.
+        """
+        await super().stop(frame)
+        await self._disconnect()
+    async def cancel(self, frame: CancelFrame):
+        """Cancel the Respeecher TTS service.
+        Args:
+            frame: The cancel frame.
+        """
+        await super().cancel(frame)
+        await self._disconnect()
+    async def _connect(self):
+        await self._connect_websocket()
+        if self._websocket and not self._receive_task:
+            self._receive_task = self.create_task(
+                self._receive_task_handler(self._report_error)
+            )
+    async def _disconnect(self):
+        if self._receive_task:
+            await self.cancel_task(self._receive_task)
+            self._receive_task = None
+        await self._disconnect_websocket()
+    async def _connect_websocket(self):
+        try:
+            if self._websocket and self._websocket.state is State.OPEN:
+                return
+            logger.debug("Connecting to Respeecher")
+            url = self._url.rstrip("/")
+            model_name = self._model_name.strip("/")
+            if model_name:
+                url += f"/{model_name}"
+            url += f"/tts/websocket?api_key={self._api_key}"
+            self._websocket = await websocket_connect(url)
+            await self._call_event_handler("on_connected")
+        except Exception as e:
+            logger.error(f"{self} initialization error: {e}")
+            self._context_id = None
+            self._websocket = None
+            await self._call_event_handler("on_connection_error", f"{e}")
+    async def _disconnect_websocket(self):
+        try:
+            await self.stop_all_metrics()
+            if self._websocket:
+                logger.debug("Disconnecting from Respeecher")
+                await self._websocket.close()
+        except Exception as e:
+            logger.error(f"{self} error closing websocket: {e}")
+        finally:
+            self._context_id = None
+            self._websocket = None
+            await self._call_event_handler("on_disconnected")
+    def _get_websocket(self):
+        if self._websocket:
+            return self._websocket
+        raise Exception("Websocket not connected")
+    async def _report_error(self, error: ErrorFrame):
+        await self._call_event_handler("on_connection_error", error.error)
+        await self.push_error_frame(error)
+    async def _handle_interruption(
+        self, frame: StartInterruptionFrame, direction: FrameDirection
+    ):
+        await super()._handle_interruption(frame, direction)
+        await self.stop_all_metrics()
+        if self._context_id:
+            cancel_request = json.dumps(
+                {"context_id": self._context_id, "cancel": True}
+            )
+            await self._get_websocket().send(cancel_request)
+            self._context_id = None
+    async def process_frame(self, frame: Frame, direction: FrameDirection):
+        """Process frames with context awareness.
+        Args:
+            frame: The frame to process.
+            direction: The direction of frame processing.
+        """
+        await super().process_frame(frame, direction)
+        if isinstance(frame, (LLMFullResponseEndFrame, EndFrame)):
+            await self.flush_audio()
+    async def flush_audio(self):
+        """Flush any pending audio and finalize the current context."""
+        if not self._context_id or not self._websocket:
+            return
+        logger.trace(f"{self}: flushing audio")
+        flush_request = self._build_request()
+        await self._websocket.send(flush_request)
+        self._context_id = None
+    async def _receive_messages_until_closed(self):
+        async for message in self._get_websocket():
+            try:
+                response = TypeAdapter(TTSResponse).validate_json(message)
+            except ValidationError as e:
+                logger.error(f"{self} cannot parse message: {e}")
+                continue
+            if response.context_id is not None and not self.audio_context_available(
+                response.context_id
+            ):
+                # We don't need to log an error, getting here is expected
+                # and is how interruptions are handled in the superclass
+                continue
+            if response.type == "error":
+                logger.error(f"{self} error: {response}")
+                await self.push_frame(TTSStoppedFrame())
+                await self.stop_all_metrics()
+                await self.push_error(ErrorFrame(f"{self} error: {response.error}"))
+                continue
+            if response.type == "done":
+                await self.push_frame(TTSStoppedFrame())
+                await self.stop_ttfb_metrics()
+                await self.remove_audio_context(response.context_id)
+            elif response.type == "chunk":
+                await self.stop_ttfb_metrics()
+                frame = TTSAudioRawFrame(
+                    audio=base64.b64decode(response.data),
+                    sample_rate=self.sample_rate,
+                    num_channels=1,
+                )
+                await self.append_to_audio_context(response.context_id, frame)
+    async def _receive_messages(self):
+        while True:
+            await self._receive_messages_until_closed()
+            logger.info(f"{self} Respeecher disconnected, reconnecting")
+            await self._connect_websocket()
+    @traced_tts
+    async def run_tts(self, text: str) -> AsyncGenerator[Frame | None, None]:
+        """Generate speech from text using Respeecher's streaming API.
+        Args:
+            text: The text to synthesize into speech.
+        Yields:
+            Frame: Audio frames containing the synthesized speech.
+        """
+        logger.debug(f"{self}: Generating TTS [{text}]")
+        try:
+            if not self._websocket or self._websocket.state is State.CLOSED:
+                await self._connect()
+            if not self._context_id:
+                await self.start_ttfb_metrics()
+                yield TTSStartedFrame()
+                self._context_id = str(uuid.uuid4())
+                await self.create_audio_context(self._context_id)
+            generation_request = self._build_request(text)
+            try:
+                await self._get_websocket().send(generation_request)
+                await self.start_tts_usage_metrics(text)
+            except Exception as e:
+                yield ErrorFrame(error=f"{self} error sending message: {e}")
+                yield TTSStoppedFrame()
+                await self._disconnect()
+                await self._connect()
+                return
+            yield None
+        except Exception as e:
+            yield ErrorFrame(error=f"{self} exception: {e}")

pipecat_respeecher-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,110 @@
+Metadata-Version: 2.4
+Name: pipecat-respeecher
+Version: 0.1.0
+Summary: Respeecher real-time TTS plugin for Pipecat
+Author-email: Respeecher <nv@respeecher.com>
+Maintainer-email: Respeecher <nv@respeecher.com>
+License-Expression: BSD-2-Clause
+Project-URL: homepage, https://www.respeecher.com/real-time-tts-api
+Project-URL: documentation, https://space.respeecher.com/docs
+Project-URL: source, https://github.com/respeecher/pipecat-respeecher
+Keywords: tts,pipecat-ai,pipecat
+Classifier: Intended Audience :: Developers
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: pipecat-ai>=0.0.99
+Requires-Dist: respeecher>=1.1.9
+Dynamic: license-file
+# Pipecat Respeecher Real-Time TTS Integration
+This is an official Respeecher integration for [Pipecat](https://pipecat.ai).
+[Learn more](https://www.respeecher.com/real-time-tts-api) about our real-time TTS API
+([Україномовна/Ukrainian TTS](https://www.respeecher.com/uk/real-time-tts-api)).
+**Maintainer: [Respeecher](https://www.respeecher.com/)**
+## Installation
+To be published.
+## Running the Example
+[`example.py`](./example.py) is a complete Pipecat pipeline with Respeecher TTS.
+(See [`example-ukrainian.py`](./example-ukrainian.py) for a Ukrainian language pipeline.)
+You can use it as a starting point for your agent,
+or you can head over to [Example Snippets](#example-snippets)
+if you already have a pipeline and just want to switch TTS.
+The complete pipeline example requires a
+[Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram) API key for
+Speech-to-Text, either a [Google Gemini](https://docs.pipecat.ai/server/services/llm/gemini)
+API key or a [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras) API key for LLM,
+and a [Respeecher Space](https://space.respeecher.com/api-keys) API key.
+The Speech-to-Text and LLM services are just an example and can generally be swapped for any
+other [supported Pipecat service](https://docs.pipecat.ai/server/services/supported-services).
+1. Clone this repository.
+2. Copy `env.example` to `.env` and fill in your API keys.
+3. Assuming you have the [uv](https://docs.astral.sh/uv/getting-started/installation/)
+   Python package manager installed, run `uv run example.py`, head over to
+   http://localhost:7860, and click _Connect_.
+   (The first run of `uv run example.py` may be slow because uv installs packages
+   and Pipecat downloads local models.)
+   The agent should greet you (both in text and in speech),
+   and you can converse with it through the chat interface or with your microphone.
+   (Make sure you have granted microphone access to the web page and that the microphone button
+   is not in the muted state.)
+## Example Snippets
+### Minimal Example
+```python
+from pipecat_respeecher import RespeecherTTSService
+tts = RespeecherTTSService(
+    api_key=os.getenv("RESPEECHER_API_KEY"),
+    voice_id="samantha",
+)
+```
+### Overriding Sampling Parameters
+See the [Sampling Parameters Guide](https://space.respeecher.com/docs/api/tts/sampling-params-guide).
+```python
+from pipecat_respeecher import RespeecherTTSService
+tts = RespeecherTTSService(
+    api_key=os.getenv("RESPEECHER_API_KEY"),
+    voice_id="samantha",
+    params=RespeecherTTSService.InputParams(
+        sampling_params={
+            "min_p": 0.01,
+        },
+    ),
+)
+```
+### Ukrainian Language Model
+See [Models & Languages](https://space.respeecher.com/docs/models-and-languages).
+```python
+from pipecat_respeecher import RespeecherTTSService
+tts = RespeecherTTSService(
+    api_key=os.getenv("RESPEECHER_API_KEY"),
+    model="public/tts/ua-rt",
+    voice_id="olesia-conversation",
+)
+```

pipecat_respeecher-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+pipecat_respeecher/__init__.py,sha256=loRRWbx3pIHDPwsDEiGpPHbaTq8afK8PdvKjadGrBgM,167
+pipecat_respeecher/tts.py,sha256=3r7RvIJXe-fY9zy6aOKftCFO26_a-Hr6V_NPfeVh0J8,11506
+pipecat_respeecher-0.1.0.dist-info/licenses/LICENSE,sha256=VYxgaSz1HG1JnJ3NfZybU6fxk5LyvqlpZwzsuzJrThg,1332
+pipecat_respeecher-0.1.0.dist-info/METADATA,sha256=wfMs4JIPuAw2FLuBRunrn8RP0_o6tcsjJCRk7LhsoZQ,4000
+pipecat_respeecher-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+pipecat_respeecher-0.1.0.dist-info/top_level.txt,sha256=agL5w1Tg8kRhZcU6wIOz5wjy1L6X2nE8nI5U1XBwYeU,19
+pipecat_respeecher-0.1.0.dist-info/RECORD,,

pipecat_respeecher-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (80.9.0)
+Root-Is-Purelib: true
+Tag: py3-none-any

pipecat_respeecher-0.1.0.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,25 @@
+BSD 2-Clause License
+Copyright (c) 2024–2025, Daily
+Copyright (c) 2025, Respeecher
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

pipecat_respeecher-0.1.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ pipecat_respeecher