pipecat-respeecher 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ """Respeecher real-time text-to-speech integration for Pipecat."""
2
+
3
+ from pipecat_respeecher.tts import RespeecherTTSService
4
+
5
+ __all__ = [
6
+ "RespeecherTTSService",
7
+ ]
@@ -0,0 +1,343 @@
1
+ #
2
+ # Copyright (c) 2025, Daily
3
+ # Copyright (c) 2025, Respeecher
4
+ #
5
+ # SPDX-License-Identifier: BSD 2-Clause License
6
+ #
7
+
8
+ """Respeecher real-time text-to-speech service implementation."""
9
+
10
+ import base64
11
+ import json
12
+ import uuid
13
+ from typing import AsyncGenerator, Optional
14
+
15
+ from loguru import logger
16
+ from pydantic import BaseModel, TypeAdapter, ValidationError
17
+
18
+ from pipecat.frames.frames import (
19
+ CancelFrame,
20
+ EndFrame,
21
+ ErrorFrame,
22
+ Frame,
23
+ LLMFullResponseEndFrame,
24
+ StartFrame,
25
+ StartInterruptionFrame,
26
+ TTSAudioRawFrame,
27
+ TTSStartedFrame,
28
+ TTSStoppedFrame,
29
+ )
30
+ from pipecat.processors.frame_processor import FrameDirection
31
+ from pipecat.services.tts_service import AudioContextTTSService, TTSService
32
+ from pipecat.utils.tracing.service_decorators import traced_tts
33
+
34
+ from respeecher.tts import (
35
+ ContextfulGenerationRequestParams,
36
+ StreamingOutputFormatParams,
37
+ )
38
+ from respeecher.tts import Response as TTSResponse
39
+ from respeecher.voices import (
40
+ SamplingParamsParams as SamplingParams, # TypedDict instead of a Pydantic model
41
+ )
42
+ from websockets.asyncio.client import connect as websocket_connect
43
+ from websockets.protocol import State
44
+
45
+
46
+ class RespeecherTTSService(AudioContextTTSService, TTSService):
47
+ """Respeecher real-time TTS service with WebSocket streaming and audio contexts.
48
+
49
+ Provides text-to-speech using Respeecher's streaming WebSocket API.
50
+ Supports audio context management and voice customization via sampling parameters.
51
+ """
52
+
53
+ class InputParams(BaseModel):
54
+ """Input parameters for Respeecher TTS configuration.
55
+
56
+ Parameters:
57
+ sampling_params: Sampling parameters used for speech synthesis.
58
+ """
59
+
60
+ sampling_params: SamplingParams = {}
61
+
62
+ def __init__(
63
+ self,
64
+ *,
65
+ api_key: str,
66
+ voice_id: str,
67
+ model: str = "public/tts/en-rt",
68
+ url: str = "wss://api.respeecher.com/v1",
69
+ sample_rate: Optional[int] = None,
70
+ params: Optional[InputParams] = None,
71
+ **kwargs,
72
+ ):
73
+ """Initialize the Respeecher TTS service.
74
+
75
+ Args:
76
+ api_key: Respeecher API key for authentication.
77
+ voice_id: ID of the voice to use for synthesis.
78
+ model: Model path for the Respeecher TTS API.
79
+ url: WebSocket base URL for Respeecher TTS API.
80
+ sample_rate: Audio sample rate. If None, uses default.
81
+ params: Additional input parameters for voice customization.
82
+ **kwargs: Additional arguments passed to TTSService.
83
+ """
84
+ AudioContextTTSService.__init__(self, reconnect_on_error=False)
85
+ TTSService.__init__(
86
+ self,
87
+ pause_frame_processing=True,
88
+ aggregate_sentences=False,
89
+ sample_rate=sample_rate,
90
+ **kwargs,
91
+ )
92
+
93
+ params = params or RespeecherTTSService.InputParams()
94
+
95
+ self._api_key = api_key
96
+ self._url = url
97
+ self._output_format: StreamingOutputFormatParams = {
98
+ "encoding": "pcm_s16le",
99
+ "sample_rate": sample_rate or 0,
100
+ }
101
+ self._settings = {"sampling_params": params.sampling_params}
102
+ self.set_model_name(model)
103
+ self.set_voice(voice_id)
104
+
105
+ self._context_id: str | None = None
106
+ self._receive_task = None
107
+
108
+ def can_generate_metrics(self) -> bool:
109
+ """Check if this service can generate processing metrics.
110
+
111
+ Returns:
112
+ True
113
+ """
114
+ return True
115
+
116
+ async def set_model(self, model: str):
117
+ """Set the TTS model.
118
+
119
+ Args:
120
+ model: The model name to use for synthesis.
121
+ """
122
+ self._model_id = model
123
+ await super().set_model(model)
124
+ logger.info(f"Switching TTS model to: [{model}]")
125
+ await self._disconnect()
126
+ await self._connect()
127
+
128
+ def _build_request(self, text: Optional[str] = None):
129
+ assert self._context_id is not None
130
+
131
+ request: ContextfulGenerationRequestParams = {
132
+ "transcript": text or "",
133
+ "continue": text is not None,
134
+ "context_id": self._context_id,
135
+ "voice": {
136
+ "id": self._voice_id,
137
+ "sampling_params": self._settings["sampling_params"],
138
+ },
139
+ "output_format": self._output_format,
140
+ }
141
+
142
+ return json.dumps(request)
143
+
144
+ async def start(self, frame: StartFrame):
145
+ """Start the Respeecher TTS service.
146
+
147
+ Args:
148
+ frame: The start frame containing initialization parameters.
149
+ """
150
+ await super().start(frame)
151
+ self._output_format["sample_rate"] = self.sample_rate
152
+ await self._connect()
153
+
154
+ async def stop(self, frame: EndFrame):
155
+ """Stop the Respeecher TTS service.
156
+
157
+ Args:
158
+ frame: The end frame.
159
+ """
160
+ await super().stop(frame)
161
+ await self._disconnect()
162
+
163
+ async def cancel(self, frame: CancelFrame):
164
+ """Cancel the Respeecher TTS service.
165
+
166
+ Args:
167
+ frame: The cancel frame.
168
+ """
169
+ await super().cancel(frame)
170
+ await self._disconnect()
171
+
172
+ async def _connect(self):
173
+ await self._connect_websocket()
174
+
175
+ if self._websocket and not self._receive_task:
176
+ self._receive_task = self.create_task(
177
+ self._receive_task_handler(self._report_error)
178
+ )
179
+
180
+ async def _disconnect(self):
181
+ if self._receive_task:
182
+ await self.cancel_task(self._receive_task)
183
+ self._receive_task = None
184
+
185
+ await self._disconnect_websocket()
186
+
187
+ async def _connect_websocket(self):
188
+ try:
189
+ if self._websocket and self._websocket.state is State.OPEN:
190
+ return
191
+ logger.debug("Connecting to Respeecher")
192
+
193
+ url = self._url.rstrip("/")
194
+ model_name = self._model_name.strip("/")
195
+
196
+ if model_name:
197
+ url += f"/{model_name}"
198
+
199
+ url += f"/tts/websocket?api_key={self._api_key}"
200
+
201
+ self._websocket = await websocket_connect(url)
202
+ await self._call_event_handler("on_connected")
203
+ except Exception as e:
204
+ logger.error(f"{self} initialization error: {e}")
205
+ self._context_id = None
206
+ self._websocket = None
207
+ await self._call_event_handler("on_connection_error", f"{e}")
208
+
209
+ async def _disconnect_websocket(self):
210
+ try:
211
+ await self.stop_all_metrics()
212
+
213
+ if self._websocket:
214
+ logger.debug("Disconnecting from Respeecher")
215
+ await self._websocket.close()
216
+ except Exception as e:
217
+ logger.error(f"{self} error closing websocket: {e}")
218
+ finally:
219
+ self._context_id = None
220
+ self._websocket = None
221
+ await self._call_event_handler("on_disconnected")
222
+
223
+ def _get_websocket(self):
224
+ if self._websocket:
225
+ return self._websocket
226
+ raise Exception("Websocket not connected")
227
+
228
+ async def _report_error(self, error: ErrorFrame):
229
+ await self._call_event_handler("on_connection_error", error.error)
230
+ await self.push_error_frame(error)
231
+
232
+ async def _handle_interruption(
233
+ self, frame: StartInterruptionFrame, direction: FrameDirection
234
+ ):
235
+ await super()._handle_interruption(frame, direction)
236
+ await self.stop_all_metrics()
237
+
238
+ if self._context_id:
239
+ cancel_request = json.dumps(
240
+ {"context_id": self._context_id, "cancel": True}
241
+ )
242
+ await self._get_websocket().send(cancel_request)
243
+ self._context_id = None
244
+
245
+ async def process_frame(self, frame: Frame, direction: FrameDirection):
246
+ """Process frames with context awareness.
247
+
248
+ Args:
249
+ frame: The frame to process.
250
+ direction: The direction of frame processing.
251
+ """
252
+ await super().process_frame(frame, direction)
253
+
254
+ if isinstance(frame, (LLMFullResponseEndFrame, EndFrame)):
255
+ await self.flush_audio()
256
+
257
+ async def flush_audio(self):
258
+ """Flush any pending audio and finalize the current context."""
259
+ if not self._context_id or not self._websocket:
260
+ return
261
+ logger.trace(f"{self}: flushing audio")
262
+ flush_request = self._build_request()
263
+ await self._websocket.send(flush_request)
264
+ self._context_id = None
265
+
266
+ async def _receive_messages_until_closed(self):
267
+ async for message in self._get_websocket():
268
+ try:
269
+ response = TypeAdapter(TTSResponse).validate_json(message)
270
+ except ValidationError as e:
271
+ logger.error(f"{self} cannot parse message: {e}")
272
+ continue
273
+
274
+ if response.context_id is not None and not self.audio_context_available(
275
+ response.context_id
276
+ ):
277
+ # We don't need to log an error, getting here is expected
278
+ # and is how interruptions are handled in the superclass
279
+ continue
280
+
281
+ if response.type == "error":
282
+ logger.error(f"{self} error: {response}")
283
+ await self.push_frame(TTSStoppedFrame())
284
+ await self.stop_all_metrics()
285
+ await self.push_error(ErrorFrame(f"{self} error: {response.error}"))
286
+ continue
287
+
288
+ if response.type == "done":
289
+ await self.push_frame(TTSStoppedFrame())
290
+ await self.stop_ttfb_metrics()
291
+ await self.remove_audio_context(response.context_id)
292
+ elif response.type == "chunk":
293
+ await self.stop_ttfb_metrics()
294
+ frame = TTSAudioRawFrame(
295
+ audio=base64.b64decode(response.data),
296
+ sample_rate=self.sample_rate,
297
+ num_channels=1,
298
+ )
299
+ await self.append_to_audio_context(response.context_id, frame)
300
+
301
+ async def _receive_messages(self):
302
+ while True:
303
+ await self._receive_messages_until_closed()
304
+ logger.info(f"{self} Respeecher disconnected, reconnecting")
305
+ await self._connect_websocket()
306
+
307
+ @traced_tts
308
+ async def run_tts(self, text: str) -> AsyncGenerator[Frame | None, None]:
309
+ """Generate speech from text using Respeecher's streaming API.
310
+
311
+ Args:
312
+ text: The text to synthesize into speech.
313
+
314
+ Yields:
315
+ Frame: Audio frames containing the synthesized speech.
316
+ """
317
+ logger.debug(f"{self}: Generating TTS [{text}]")
318
+
319
+ try:
320
+ if not self._websocket or self._websocket.state is State.CLOSED:
321
+ await self._connect()
322
+
323
+ if not self._context_id:
324
+ await self.start_ttfb_metrics()
325
+ yield TTSStartedFrame()
326
+ self._context_id = str(uuid.uuid4())
327
+ await self.create_audio_context(self._context_id)
328
+
329
+ generation_request = self._build_request(text)
330
+
331
+ try:
332
+ await self._get_websocket().send(generation_request)
333
+ await self.start_tts_usage_metrics(text)
334
+ except Exception as e:
335
+ yield ErrorFrame(error=f"{self} error sending message: {e}")
336
+ yield TTSStoppedFrame()
337
+ await self._disconnect()
338
+ await self._connect()
339
+ return
340
+
341
+ yield None
342
+ except Exception as e:
343
+ yield ErrorFrame(error=f"{self} exception: {e}")
@@ -0,0 +1,110 @@
1
+ Metadata-Version: 2.4
2
+ Name: pipecat-respeecher
3
+ Version: 0.1.0
4
+ Summary: Respeecher real-time TTS plugin for Pipecat
5
+ Author-email: Respeecher <nv@respeecher.com>
6
+ Maintainer-email: Respeecher <nv@respeecher.com>
7
+ License-Expression: BSD-2-Clause
8
+ Project-URL: homepage, https://www.respeecher.com/real-time-tts-api
9
+ Project-URL: documentation, https://space.respeecher.com/docs
10
+ Project-URL: source, https://github.com/respeecher/pipecat-respeecher
11
+ Keywords: tts,pipecat-ai,pipecat
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
+ Requires-Python: >=3.10
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: pipecat-ai>=0.0.99
23
+ Requires-Dist: respeecher>=1.1.9
24
+ Dynamic: license-file
25
+
26
+ # Pipecat Respeecher Real-Time TTS Integration
27
+
28
+ This is an official Respeecher integration for [Pipecat](https://pipecat.ai).
29
+
30
+ [Learn more](https://www.respeecher.com/real-time-tts-api) about our real-time TTS API
31
+ ([Україномовна/Ukrainian TTS](https://www.respeecher.com/uk/real-time-tts-api)).
32
+
33
+ **Maintainer: [Respeecher](https://www.respeecher.com/)**
34
+
35
+ ## Installation
36
+
37
+ To be published.
38
+
39
+ ## Running the Example
40
+
41
+ [`example.py`](./example.py) is a complete Pipecat pipeline with Respeecher TTS.
42
+ (See [`example-ukrainian.py`](./example-ukrainian.py) for a Ukrainian language pipeline.)
43
+ You can use it as a starting point for your agent,
44
+ or you can head over to [Example Snippets](#example-snippets)
45
+ if you already have a pipeline and just want to switch TTS.
46
+
47
+ The complete pipeline example requires a
48
+ [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram) API key for
49
+ Speech-to-Text, either a [Google Gemini](https://docs.pipecat.ai/server/services/llm/gemini)
50
+ API key or a [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras) API key for LLM,
51
+ and a [Respeecher Space](https://space.respeecher.com/api-keys) API key.
52
+ The Speech-to-Text and LLM services are just an example and can generally be swapped for any
53
+ other [supported Pipecat service](https://docs.pipecat.ai/server/services/supported-services).
54
+
55
+ 1. Clone this repository.
56
+ 2. Copy `env.example` to `.env` and fill in your API keys.
57
+ 3. Assuming you have the [uv](https://docs.astral.sh/uv/getting-started/installation/)
58
+ Python package manager installed, run `uv run example.py`, head over to
59
+ http://localhost:7860, and click _Connect_.
60
+ (The first run of `uv run example.py` may be slow because uv installs packages
61
+ and Pipecat downloads local models.)
62
+ The agent should greet you (both in text and in speech),
63
+ and you can converse with it through the chat interface or with your microphone.
64
+ (Make sure you have granted microphone access to the web page and that the microphone button
65
+ is not in the muted state.)
66
+
67
+ ## Example Snippets
68
+
69
+ ### Minimal Example
70
+
71
+ ```python
72
+ from pipecat_respeecher import RespeecherTTSService
73
+
74
+ tts = RespeecherTTSService(
75
+ api_key=os.getenv("RESPEECHER_API_KEY"),
76
+ voice_id="samantha",
77
+ )
78
+ ```
79
+
80
+ ### Overriding Sampling Parameters
81
+
82
+ See the [Sampling Parameters Guide](https://space.respeecher.com/docs/api/tts/sampling-params-guide).
83
+
84
+ ```python
85
+ from pipecat_respeecher import RespeecherTTSService
86
+
87
+ tts = RespeecherTTSService(
88
+ api_key=os.getenv("RESPEECHER_API_KEY"),
89
+ voice_id="samantha",
90
+ params=RespeecherTTSService.InputParams(
91
+ sampling_params={
92
+ "min_p": 0.01,
93
+ },
94
+ ),
95
+ )
96
+ ```
97
+
98
+ ### Ukrainian Language Model
99
+
100
+ See [Models & Languages](https://space.respeecher.com/docs/models-and-languages).
101
+
102
+ ```python
103
+ from pipecat_respeecher import RespeecherTTSService
104
+
105
+ tts = RespeecherTTSService(
106
+ api_key=os.getenv("RESPEECHER_API_KEY"),
107
+ model="public/tts/ua-rt",
108
+ voice_id="olesia-conversation",
109
+ )
110
+ ```
@@ -0,0 +1,7 @@
1
+ pipecat_respeecher/__init__.py,sha256=loRRWbx3pIHDPwsDEiGpPHbaTq8afK8PdvKjadGrBgM,167
2
+ pipecat_respeecher/tts.py,sha256=3r7RvIJXe-fY9zy6aOKftCFO26_a-Hr6V_NPfeVh0J8,11506
3
+ pipecat_respeecher-0.1.0.dist-info/licenses/LICENSE,sha256=VYxgaSz1HG1JnJ3NfZybU6fxk5LyvqlpZwzsuzJrThg,1332
4
+ pipecat_respeecher-0.1.0.dist-info/METADATA,sha256=wfMs4JIPuAw2FLuBRunrn8RP0_o6tcsjJCRk7LhsoZQ,4000
5
+ pipecat_respeecher-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
+ pipecat_respeecher-0.1.0.dist-info/top_level.txt,sha256=agL5w1Tg8kRhZcU6wIOz5wjy1L6X2nE8nI5U1XBwYeU,19
7
+ pipecat_respeecher-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,25 @@
1
+ BSD 2-Clause License
2
+
3
+ Copyright (c) 2024–2025, Daily
4
+ Copyright (c) 2025, Respeecher
5
+
6
+ Redistribution and use in source and binary forms, with or without
7
+ modification, are permitted provided that the following conditions are met:
8
+
9
+ 1. Redistributions of source code must retain the above copyright notice, this
10
+ list of conditions and the following disclaimer.
11
+
12
+ 2. Redistributions in binary form must reproduce the above copyright notice,
13
+ this list of conditions and the following disclaimer in the documentation
14
+ and/or other materials provided with the distribution.
15
+
16
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1 @@
1
+ pipecat_respeecher