dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
- dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
- pipecat/__init__.py +17 -0
- pipecat/adapters/base_llm_adapter.py +36 -1
- pipecat/adapters/schemas/direct_function.py +296 -0
- pipecat/adapters/schemas/function_schema.py +15 -6
- pipecat/adapters/schemas/tools_schema.py +55 -7
- pipecat/adapters/services/anthropic_adapter.py +22 -3
- pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
- pipecat/adapters/services/bedrock_adapter.py +22 -3
- pipecat/adapters/services/gemini_adapter.py +16 -3
- pipecat/adapters/services/open_ai_adapter.py +17 -2
- pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
- pipecat/audio/filters/base_audio_filter.py +30 -6
- pipecat/audio/filters/koala_filter.py +37 -2
- pipecat/audio/filters/krisp_filter.py +59 -6
- pipecat/audio/filters/noisereduce_filter.py +37 -0
- pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
- pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
- pipecat/audio/mixers/base_audio_mixer.py +30 -7
- pipecat/audio/mixers/soundfile_mixer.py +53 -6
- pipecat/audio/resamplers/base_audio_resampler.py +17 -9
- pipecat/audio/resamplers/resampy_resampler.py +26 -1
- pipecat/audio/resamplers/soxr_resampler.py +32 -1
- pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
- pipecat/audio/utils.py +194 -1
- pipecat/audio/vad/silero.py +60 -3
- pipecat/audio/vad/vad_analyzer.py +114 -30
- pipecat/clocks/base_clock.py +19 -0
- pipecat/clocks/system_clock.py +25 -0
- pipecat/extensions/voicemail/__init__.py +0 -0
- pipecat/extensions/voicemail/voicemail_detector.py +707 -0
- pipecat/frames/frames.py +590 -156
- pipecat/metrics/metrics.py +64 -1
- pipecat/observers/base_observer.py +58 -19
- pipecat/observers/loggers/debug_log_observer.py +56 -64
- pipecat/observers/loggers/llm_log_observer.py +8 -1
- pipecat/observers/loggers/transcription_log_observer.py +19 -7
- pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
- pipecat/observers/turn_tracking_observer.py +26 -1
- pipecat/pipeline/base_pipeline.py +5 -7
- pipecat/pipeline/base_task.py +52 -9
- pipecat/pipeline/parallel_pipeline.py +121 -177
- pipecat/pipeline/pipeline.py +129 -20
- pipecat/pipeline/runner.py +50 -1
- pipecat/pipeline/sync_parallel_pipeline.py +132 -32
- pipecat/pipeline/task.py +263 -280
- pipecat/pipeline/task_observer.py +85 -34
- pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
- pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
- pipecat/processors/aggregators/gated.py +25 -24
- pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
- pipecat/processors/aggregators/llm_response.py +398 -89
- pipecat/processors/aggregators/openai_llm_context.py +161 -13
- pipecat/processors/aggregators/sentence.py +25 -14
- pipecat/processors/aggregators/user_response.py +28 -3
- pipecat/processors/aggregators/vision_image_frame.py +24 -14
- pipecat/processors/async_generator.py +28 -0
- pipecat/processors/audio/audio_buffer_processor.py +78 -37
- pipecat/processors/consumer_processor.py +25 -6
- pipecat/processors/filters/frame_filter.py +23 -0
- pipecat/processors/filters/function_filter.py +30 -0
- pipecat/processors/filters/identity_filter.py +17 -2
- pipecat/processors/filters/null_filter.py +24 -1
- pipecat/processors/filters/stt_mute_filter.py +56 -21
- pipecat/processors/filters/wake_check_filter.py +46 -3
- pipecat/processors/filters/wake_notifier_filter.py +21 -3
- pipecat/processors/frame_processor.py +488 -131
- pipecat/processors/frameworks/langchain.py +38 -3
- pipecat/processors/frameworks/rtvi.py +719 -34
- pipecat/processors/gstreamer/pipeline_source.py +41 -0
- pipecat/processors/idle_frame_processor.py +26 -3
- pipecat/processors/logger.py +23 -0
- pipecat/processors/metrics/frame_processor_metrics.py +77 -4
- pipecat/processors/metrics/sentry.py +42 -4
- pipecat/processors/producer_processor.py +34 -14
- pipecat/processors/text_transformer.py +22 -10
- pipecat/processors/transcript_processor.py +48 -29
- pipecat/processors/user_idle_processor.py +31 -21
- pipecat/runner/__init__.py +1 -0
- pipecat/runner/daily.py +132 -0
- pipecat/runner/livekit.py +148 -0
- pipecat/runner/run.py +543 -0
- pipecat/runner/types.py +67 -0
- pipecat/runner/utils.py +515 -0
- pipecat/serializers/base_serializer.py +42 -0
- pipecat/serializers/exotel.py +17 -6
- pipecat/serializers/genesys.py +95 -0
- pipecat/serializers/livekit.py +33 -0
- pipecat/serializers/plivo.py +16 -15
- pipecat/serializers/protobuf.py +37 -1
- pipecat/serializers/telnyx.py +18 -17
- pipecat/serializers/twilio.py +32 -16
- pipecat/services/ai_service.py +5 -3
- pipecat/services/anthropic/llm.py +113 -43
- pipecat/services/assemblyai/models.py +63 -5
- pipecat/services/assemblyai/stt.py +64 -11
- pipecat/services/asyncai/__init__.py +0 -0
- pipecat/services/asyncai/tts.py +501 -0
- pipecat/services/aws/llm.py +185 -111
- pipecat/services/aws/stt.py +217 -23
- pipecat/services/aws/tts.py +118 -52
- pipecat/services/aws/utils.py +101 -5
- pipecat/services/aws_nova_sonic/aws.py +82 -64
- pipecat/services/aws_nova_sonic/context.py +15 -6
- pipecat/services/azure/common.py +10 -2
- pipecat/services/azure/image.py +32 -0
- pipecat/services/azure/llm.py +9 -7
- pipecat/services/azure/stt.py +65 -2
- pipecat/services/azure/tts.py +154 -23
- pipecat/services/cartesia/stt.py +125 -8
- pipecat/services/cartesia/tts.py +102 -38
- pipecat/services/cerebras/llm.py +15 -23
- pipecat/services/deepgram/stt.py +19 -11
- pipecat/services/deepgram/tts.py +36 -0
- pipecat/services/deepseek/llm.py +14 -23
- pipecat/services/elevenlabs/tts.py +330 -64
- pipecat/services/fal/image.py +43 -0
- pipecat/services/fal/stt.py +48 -10
- pipecat/services/fireworks/llm.py +14 -21
- pipecat/services/fish/tts.py +109 -9
- pipecat/services/gemini_multimodal_live/__init__.py +1 -0
- pipecat/services/gemini_multimodal_live/events.py +83 -2
- pipecat/services/gemini_multimodal_live/file_api.py +189 -0
- pipecat/services/gemini_multimodal_live/gemini.py +218 -21
- pipecat/services/gladia/config.py +17 -10
- pipecat/services/gladia/stt.py +82 -36
- pipecat/services/google/frames.py +40 -0
- pipecat/services/google/google.py +2 -0
- pipecat/services/google/image.py +39 -2
- pipecat/services/google/llm.py +176 -58
- pipecat/services/google/llm_openai.py +26 -4
- pipecat/services/google/llm_vertex.py +37 -15
- pipecat/services/google/rtvi.py +41 -0
- pipecat/services/google/stt.py +65 -17
- pipecat/services/google/test-google-chirp.py +45 -0
- pipecat/services/google/tts.py +390 -19
- pipecat/services/grok/llm.py +8 -6
- pipecat/services/groq/llm.py +8 -6
- pipecat/services/groq/stt.py +13 -9
- pipecat/services/groq/tts.py +40 -0
- pipecat/services/hamsa/__init__.py +9 -0
- pipecat/services/hamsa/stt.py +241 -0
- pipecat/services/heygen/__init__.py +5 -0
- pipecat/services/heygen/api.py +281 -0
- pipecat/services/heygen/client.py +620 -0
- pipecat/services/heygen/video.py +338 -0
- pipecat/services/image_service.py +5 -3
- pipecat/services/inworld/__init__.py +1 -0
- pipecat/services/inworld/tts.py +592 -0
- pipecat/services/llm_service.py +127 -45
- pipecat/services/lmnt/tts.py +80 -7
- pipecat/services/mcp_service.py +85 -44
- pipecat/services/mem0/memory.py +42 -13
- pipecat/services/minimax/tts.py +74 -15
- pipecat/services/mistral/__init__.py +0 -0
- pipecat/services/mistral/llm.py +185 -0
- pipecat/services/moondream/vision.py +55 -10
- pipecat/services/neuphonic/tts.py +275 -48
- pipecat/services/nim/llm.py +8 -6
- pipecat/services/ollama/llm.py +27 -7
- pipecat/services/openai/base_llm.py +54 -16
- pipecat/services/openai/image.py +30 -0
- pipecat/services/openai/llm.py +7 -5
- pipecat/services/openai/stt.py +13 -9
- pipecat/services/openai/tts.py +42 -10
- pipecat/services/openai_realtime_beta/azure.py +11 -9
- pipecat/services/openai_realtime_beta/context.py +7 -5
- pipecat/services/openai_realtime_beta/events.py +10 -7
- pipecat/services/openai_realtime_beta/openai.py +37 -18
- pipecat/services/openpipe/llm.py +30 -24
- pipecat/services/openrouter/llm.py +9 -7
- pipecat/services/perplexity/llm.py +15 -19
- pipecat/services/piper/tts.py +26 -12
- pipecat/services/playht/tts.py +227 -65
- pipecat/services/qwen/llm.py +8 -6
- pipecat/services/rime/tts.py +128 -17
- pipecat/services/riva/stt.py +160 -22
- pipecat/services/riva/tts.py +67 -2
- pipecat/services/sambanova/llm.py +19 -17
- pipecat/services/sambanova/stt.py +14 -8
- pipecat/services/sarvam/tts.py +60 -13
- pipecat/services/simli/video.py +82 -21
- pipecat/services/soniox/__init__.py +0 -0
- pipecat/services/soniox/stt.py +398 -0
- pipecat/services/speechmatics/stt.py +29 -17
- pipecat/services/stt_service.py +47 -11
- pipecat/services/tavus/video.py +94 -25
- pipecat/services/together/llm.py +8 -6
- pipecat/services/tts_service.py +77 -53
- pipecat/services/ultravox/stt.py +46 -43
- pipecat/services/vision_service.py +5 -3
- pipecat/services/websocket_service.py +12 -11
- pipecat/services/whisper/base_stt.py +58 -12
- pipecat/services/whisper/stt.py +69 -58
- pipecat/services/xtts/tts.py +59 -2
- pipecat/sync/base_notifier.py +19 -0
- pipecat/sync/event_notifier.py +24 -0
- pipecat/tests/utils.py +73 -5
- pipecat/transcriptions/language.py +24 -0
- pipecat/transports/base_input.py +112 -8
- pipecat/transports/base_output.py +235 -13
- pipecat/transports/base_transport.py +119 -0
- pipecat/transports/local/audio.py +76 -0
- pipecat/transports/local/tk.py +84 -0
- pipecat/transports/network/fastapi_websocket.py +174 -15
- pipecat/transports/network/small_webrtc.py +383 -39
- pipecat/transports/network/webrtc_connection.py +214 -8
- pipecat/transports/network/websocket_client.py +171 -1
- pipecat/transports/network/websocket_server.py +147 -9
- pipecat/transports/services/daily.py +792 -70
- pipecat/transports/services/helpers/daily_rest.py +122 -129
- pipecat/transports/services/livekit.py +339 -4
- pipecat/transports/services/tavus.py +273 -38
- pipecat/utils/asyncio/task_manager.py +92 -186
- pipecat/utils/base_object.py +83 -1
- pipecat/utils/network.py +2 -0
- pipecat/utils/string.py +114 -58
- pipecat/utils/text/base_text_aggregator.py +44 -13
- pipecat/utils/text/base_text_filter.py +46 -0
- pipecat/utils/text/markdown_text_filter.py +70 -14
- pipecat/utils/text/pattern_pair_aggregator.py +18 -14
- pipecat/utils/text/simple_text_aggregator.py +43 -2
- pipecat/utils/text/skip_tags_aggregator.py +21 -13
- pipecat/utils/time.py +36 -0
- pipecat/utils/tracing/class_decorators.py +32 -7
- pipecat/utils/tracing/conversation_context_provider.py +12 -2
- pipecat/utils/tracing/service_attributes.py +80 -64
- pipecat/utils/tracing/service_decorators.py +48 -21
- pipecat/utils/tracing/setup.py +13 -7
- pipecat/utils/tracing/turn_context_provider.py +12 -2
- pipecat/utils/tracing/turn_trace_observer.py +27 -0
- pipecat/utils/utils.py +14 -14
- dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
- pipecat/examples/daily_runner.py +0 -64
- pipecat/examples/run.py +0 -265
- pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
- pipecat/utils/asyncio/watchdog_event.py +0 -42
- pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
- pipecat/utils/asyncio/watchdog_queue.py +0 -48
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
- /pipecat/{examples → extensions}/__init__.py +0 -0
pipecat/services/playht/tts.py
CHANGED
|
@@ -4,14 +4,20 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""PlayHT text-to-speech service implementations.
|
|
8
|
+
|
|
9
|
+
This module provides integration with PlayHT's text-to-speech API
|
|
10
|
+
supporting both WebSocket streaming and HTTP-based synthesis.
|
|
11
|
+
"""
|
|
12
|
+
|
|
7
13
|
import io
|
|
8
14
|
import json
|
|
9
15
|
import struct
|
|
10
16
|
import uuid
|
|
17
|
+
import warnings
|
|
11
18
|
from typing import AsyncGenerator, Optional
|
|
12
19
|
|
|
13
20
|
import aiohttp
|
|
14
|
-
import websockets
|
|
15
21
|
from loguru import logger
|
|
16
22
|
from pydantic import BaseModel
|
|
17
23
|
|
|
@@ -32,16 +38,23 @@ from pipecat.transcriptions.language import Language
|
|
|
32
38
|
from pipecat.utils.tracing.service_decorators import traced_tts
|
|
33
39
|
|
|
34
40
|
try:
|
|
35
|
-
from
|
|
36
|
-
from
|
|
37
|
-
from pyht.client import Language as PlayHTLanguage
|
|
41
|
+
from websockets.asyncio.client import connect as websocket_connect
|
|
42
|
+
from websockets.protocol import State
|
|
38
43
|
except ModuleNotFoundError as e:
|
|
39
44
|
logger.error(f"Exception: {e}")
|
|
40
|
-
logger.error("In order to use
|
|
45
|
+
logger.error("In order to use PlayHTTTSService, you need to `pip install pipecat-ai[playht]`.")
|
|
41
46
|
raise Exception(f"Missing module: {e}")
|
|
42
47
|
|
|
43
48
|
|
|
44
49
|
def language_to_playht_language(language: Language) -> Optional[str]:
|
|
50
|
+
"""Convert a Language enum to PlayHT language code.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
language: The Language enum value to convert.
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
The corresponding PlayHT language code, or None if not supported.
|
|
57
|
+
"""
|
|
45
58
|
BASE_LANGUAGES = {
|
|
46
59
|
Language.AF: "afrikans",
|
|
47
60
|
Language.AM: "amharic",
|
|
@@ -96,7 +109,22 @@ def language_to_playht_language(language: Language) -> Optional[str]:
|
|
|
96
109
|
|
|
97
110
|
|
|
98
111
|
class PlayHTTTSService(InterruptibleTTSService):
|
|
112
|
+
"""PlayHT WebSocket-based text-to-speech service.
|
|
113
|
+
|
|
114
|
+
Provides real-time text-to-speech synthesis using PlayHT's WebSocket API.
|
|
115
|
+
Supports streaming audio generation with configurable voice engines and
|
|
116
|
+
language settings.
|
|
117
|
+
"""
|
|
118
|
+
|
|
99
119
|
class InputParams(BaseModel):
|
|
120
|
+
"""Input parameters for PlayHT TTS configuration.
|
|
121
|
+
|
|
122
|
+
Parameters:
|
|
123
|
+
language: Language for synthesis. Defaults to English.
|
|
124
|
+
speed: Speech speed multiplier. Defaults to 1.0.
|
|
125
|
+
seed: Random seed for voice consistency.
|
|
126
|
+
"""
|
|
127
|
+
|
|
100
128
|
language: Optional[Language] = Language.EN
|
|
101
129
|
speed: Optional[float] = 1.0
|
|
102
130
|
seed: Optional[int] = None
|
|
@@ -113,6 +141,18 @@ class PlayHTTTSService(InterruptibleTTSService):
|
|
|
113
141
|
params: Optional[InputParams] = None,
|
|
114
142
|
**kwargs,
|
|
115
143
|
):
|
|
144
|
+
"""Initialize the PlayHT WebSocket TTS service.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
api_key: PlayHT API key for authentication.
|
|
148
|
+
user_id: PlayHT user ID for authentication.
|
|
149
|
+
voice_url: URL of the voice to use for synthesis.
|
|
150
|
+
voice_engine: Voice engine to use. Defaults to "Play3.0-mini".
|
|
151
|
+
sample_rate: Audio sample rate. If None, uses default.
|
|
152
|
+
output_format: Audio output format. Defaults to "wav".
|
|
153
|
+
params: Additional input parameters for voice customization.
|
|
154
|
+
**kwargs: Additional arguments passed to parent InterruptibleTTSService.
|
|
155
|
+
"""
|
|
116
156
|
super().__init__(
|
|
117
157
|
pause_frame_processing=True,
|
|
118
158
|
sample_rate=sample_rate,
|
|
@@ -140,30 +180,60 @@ class PlayHTTTSService(InterruptibleTTSService):
|
|
|
140
180
|
self.set_voice(voice_url)
|
|
141
181
|
|
|
142
182
|
def can_generate_metrics(self) -> bool:
|
|
183
|
+
"""Check if this service can generate processing metrics.
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
True, as PlayHT service supports metrics generation.
|
|
187
|
+
"""
|
|
143
188
|
return True
|
|
144
189
|
|
|
145
190
|
def language_to_service_language(self, language: Language) -> Optional[str]:
|
|
191
|
+
"""Convert a Language enum to PlayHT service language format.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
language: The language to convert.
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
The PlayHT-specific language code, or None if not supported.
|
|
198
|
+
"""
|
|
146
199
|
return language_to_playht_language(language)
|
|
147
200
|
|
|
148
201
|
async def start(self, frame: StartFrame):
|
|
202
|
+
"""Start the PlayHT TTS service.
|
|
203
|
+
|
|
204
|
+
Args:
|
|
205
|
+
frame: The start frame containing initialization parameters.
|
|
206
|
+
"""
|
|
149
207
|
await super().start(frame)
|
|
150
208
|
await self._connect()
|
|
151
209
|
|
|
152
210
|
async def stop(self, frame: EndFrame):
|
|
211
|
+
"""Stop the PlayHT TTS service.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
frame: The end frame.
|
|
215
|
+
"""
|
|
153
216
|
await super().stop(frame)
|
|
154
217
|
await self._disconnect()
|
|
155
218
|
|
|
156
219
|
async def cancel(self, frame: CancelFrame):
|
|
220
|
+
"""Cancel the PlayHT TTS service.
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
frame: The cancel frame.
|
|
224
|
+
"""
|
|
157
225
|
await super().cancel(frame)
|
|
158
226
|
await self._disconnect()
|
|
159
227
|
|
|
160
228
|
async def _connect(self):
|
|
229
|
+
"""Connect to PlayHT WebSocket and start receive task."""
|
|
161
230
|
await self._connect_websocket()
|
|
162
231
|
|
|
163
232
|
if self._websocket and not self._receive_task:
|
|
164
233
|
self._receive_task = self.create_task(self._receive_task_handler(self._report_error))
|
|
165
234
|
|
|
166
235
|
async def _disconnect(self):
|
|
236
|
+
"""Disconnect from PlayHT WebSocket and clean up tasks."""
|
|
167
237
|
if self._receive_task:
|
|
168
238
|
await self.cancel_task(self._receive_task)
|
|
169
239
|
self._receive_task = None
|
|
@@ -171,8 +241,9 @@ class PlayHTTTSService(InterruptibleTTSService):
|
|
|
171
241
|
await self._disconnect_websocket()
|
|
172
242
|
|
|
173
243
|
async def _connect_websocket(self):
|
|
244
|
+
"""Connect to PlayHT websocket."""
|
|
174
245
|
try:
|
|
175
|
-
if self._websocket and self._websocket.
|
|
246
|
+
if self._websocket and self._websocket.state is State.OPEN:
|
|
176
247
|
return
|
|
177
248
|
|
|
178
249
|
logger.debug("Connecting to PlayHT")
|
|
@@ -183,7 +254,7 @@ class PlayHTTTSService(InterruptibleTTSService):
|
|
|
183
254
|
if not isinstance(self._websocket_url, str):
|
|
184
255
|
raise ValueError("WebSocket URL is not a string")
|
|
185
256
|
|
|
186
|
-
self._websocket = await
|
|
257
|
+
self._websocket = await websocket_connect(self._websocket_url)
|
|
187
258
|
except ValueError as e:
|
|
188
259
|
logger.error(f"{self} initialization error: {e}")
|
|
189
260
|
self._websocket = None
|
|
@@ -194,6 +265,7 @@ class PlayHTTTSService(InterruptibleTTSService):
|
|
|
194
265
|
await self._call_event_handler("on_connection_error", f"{e}")
|
|
195
266
|
|
|
196
267
|
async def _disconnect_websocket(self):
|
|
268
|
+
"""Disconnect from PlayHT websocket."""
|
|
197
269
|
try:
|
|
198
270
|
await self.stop_all_metrics()
|
|
199
271
|
|
|
@@ -207,6 +279,7 @@ class PlayHTTTSService(InterruptibleTTSService):
|
|
|
207
279
|
self._websocket = None
|
|
208
280
|
|
|
209
281
|
async def _get_websocket_url(self):
|
|
282
|
+
"""Retrieve WebSocket URL from PlayHT API."""
|
|
210
283
|
async with aiohttp.ClientSession() as session:
|
|
211
284
|
async with session.post(
|
|
212
285
|
"https://api.play.ht/api/v4/websocket-auth",
|
|
@@ -235,16 +308,19 @@ class PlayHTTTSService(InterruptibleTTSService):
|
|
|
235
308
|
raise Exception(f"Failed to get WebSocket URL: {response.status}")
|
|
236
309
|
|
|
237
310
|
def _get_websocket(self):
|
|
311
|
+
"""Get the WebSocket connection if available."""
|
|
238
312
|
if self._websocket:
|
|
239
313
|
return self._websocket
|
|
240
314
|
raise Exception("Websocket not connected")
|
|
241
315
|
|
|
242
316
|
async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection):
|
|
317
|
+
"""Handle interruption by stopping metrics and clearing request ID."""
|
|
243
318
|
await super()._handle_interruption(frame, direction)
|
|
244
319
|
await self.stop_all_metrics()
|
|
245
320
|
self._request_id = None
|
|
246
321
|
|
|
247
322
|
async def _receive_messages(self):
|
|
323
|
+
"""Receive messages from PlayHT websocket."""
|
|
248
324
|
async for message in self._get_websocket():
|
|
249
325
|
if isinstance(message, bytes):
|
|
250
326
|
# Skip the WAV header message
|
|
@@ -273,11 +349,19 @@ class PlayHTTTSService(InterruptibleTTSService):
|
|
|
273
349
|
|
|
274
350
|
@traced_tts
|
|
275
351
|
async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
|
|
352
|
+
"""Generate TTS audio from text using PlayHT's WebSocket API.
|
|
353
|
+
|
|
354
|
+
Args:
|
|
355
|
+
text: The text to synthesize into speech.
|
|
356
|
+
|
|
357
|
+
Yields:
|
|
358
|
+
Frame: Audio frames containing the synthesized speech.
|
|
359
|
+
"""
|
|
276
360
|
logger.debug(f"{self}: Generating TTS [{text}]")
|
|
277
361
|
|
|
278
362
|
try:
|
|
279
363
|
# Reconnect if the websocket is closed
|
|
280
|
-
if not self._websocket or self._websocket.
|
|
364
|
+
if not self._websocket or self._websocket.state is State.CLOSED:
|
|
281
365
|
await self._connect()
|
|
282
366
|
|
|
283
367
|
if not self._request_id:
|
|
@@ -316,7 +400,22 @@ class PlayHTTTSService(InterruptibleTTSService):
|
|
|
316
400
|
|
|
317
401
|
|
|
318
402
|
class PlayHTHttpTTSService(TTSService):
|
|
403
|
+
"""PlayHT HTTP-based text-to-speech service.
|
|
404
|
+
|
|
405
|
+
Provides text-to-speech synthesis using PlayHT's HTTP API for simpler,
|
|
406
|
+
non-streaming synthesis. Suitable for use cases where streaming is not
|
|
407
|
+
required and simpler integration is preferred.
|
|
408
|
+
"""
|
|
409
|
+
|
|
319
410
|
class InputParams(BaseModel):
|
|
411
|
+
"""Input parameters for PlayHT HTTP TTS configuration.
|
|
412
|
+
|
|
413
|
+
Parameters:
|
|
414
|
+
language: Language for synthesis. Defaults to English.
|
|
415
|
+
speed: Speech speed multiplier. Defaults to 1.0.
|
|
416
|
+
seed: Random seed for voice consistency.
|
|
417
|
+
"""
|
|
418
|
+
|
|
320
419
|
language: Optional[Language] = Language.EN
|
|
321
420
|
speed: Optional[float] = 1.0
|
|
322
421
|
seed: Optional[int] = None
|
|
@@ -328,40 +427,59 @@ class PlayHTHttpTTSService(TTSService):
|
|
|
328
427
|
user_id: str,
|
|
329
428
|
voice_url: str,
|
|
330
429
|
voice_engine: str = "Play3.0-mini",
|
|
331
|
-
protocol: str =
|
|
430
|
+
protocol: Optional[str] = None,
|
|
431
|
+
output_format: str = "wav",
|
|
332
432
|
sample_rate: Optional[int] = None,
|
|
333
433
|
params: Optional[InputParams] = None,
|
|
334
434
|
**kwargs,
|
|
335
435
|
):
|
|
436
|
+
"""Initialize the PlayHT HTTP TTS service.
|
|
437
|
+
|
|
438
|
+
Args:
|
|
439
|
+
api_key: PlayHT API key for authentication.
|
|
440
|
+
user_id: PlayHT user ID for authentication.
|
|
441
|
+
voice_url: URL of the voice to use for synthesis.
|
|
442
|
+
voice_engine: Voice engine to use. Defaults to "Play3.0-mini".
|
|
443
|
+
protocol: Protocol to use ("http" or "ws").
|
|
444
|
+
|
|
445
|
+
.. deprecated:: 0.0.80
|
|
446
|
+
This parameter no longer has any effect and will be removed in a future version.
|
|
447
|
+
Use PlayHTTTSService for WebSocket or PlayHTHttpTTSService for HTTP.
|
|
448
|
+
|
|
449
|
+
output_format: Audio output format. Defaults to "wav".
|
|
450
|
+
sample_rate: Audio sample rate. If None, uses default.
|
|
451
|
+
params: Additional input parameters for voice customization.
|
|
452
|
+
**kwargs: Additional arguments passed to parent TTSService.
|
|
453
|
+
"""
|
|
336
454
|
super().__init__(sample_rate=sample_rate, **kwargs)
|
|
337
455
|
|
|
456
|
+
# Warn about deprecated protocol parameter if explicitly provided
|
|
457
|
+
if protocol:
|
|
458
|
+
warnings.warn(
|
|
459
|
+
"The 'protocol' parameter is deprecated and will be removed in a future version.",
|
|
460
|
+
DeprecationWarning,
|
|
461
|
+
stacklevel=2,
|
|
462
|
+
)
|
|
463
|
+
|
|
338
464
|
params = params or PlayHTHttpTTSService.InputParams()
|
|
339
465
|
|
|
340
466
|
self._user_id = user_id
|
|
341
467
|
self._api_key = api_key
|
|
342
468
|
|
|
343
|
-
self._client = AsyncClient(
|
|
344
|
-
user_id=self._user_id,
|
|
345
|
-
api_key=self._api_key,
|
|
346
|
-
)
|
|
347
|
-
|
|
348
469
|
# Check if voice_engine contains protocol information (backward compatibility)
|
|
349
470
|
if "-http" in voice_engine:
|
|
350
471
|
# Extract the base engine name
|
|
351
472
|
voice_engine = voice_engine.replace("-http", "")
|
|
352
|
-
protocol = "http"
|
|
353
473
|
elif "-ws" in voice_engine:
|
|
354
474
|
# Extract the base engine name
|
|
355
475
|
voice_engine = voice_engine.replace("-ws", "")
|
|
356
|
-
protocol = "ws"
|
|
357
476
|
|
|
358
477
|
self._settings = {
|
|
359
478
|
"language": self.language_to_service_language(params.language)
|
|
360
479
|
if params.language
|
|
361
480
|
else "english",
|
|
362
|
-
"
|
|
481
|
+
"output_format": output_format,
|
|
363
482
|
"voice_engine": voice_engine,
|
|
364
|
-
"protocol": protocol,
|
|
365
483
|
"speed": params.speed,
|
|
366
484
|
"seed": params.seed,
|
|
367
485
|
}
|
|
@@ -369,74 +487,118 @@ class PlayHTHttpTTSService(TTSService):
|
|
|
369
487
|
self.set_voice(voice_url)
|
|
370
488
|
|
|
371
489
|
async def start(self, frame: StartFrame):
|
|
490
|
+
"""Start the PlayHT HTTP TTS service.
|
|
491
|
+
|
|
492
|
+
Args:
|
|
493
|
+
frame: The start frame containing initialization parameters.
|
|
494
|
+
"""
|
|
372
495
|
await super().start(frame)
|
|
373
496
|
self._settings["sample_rate"] = self.sample_rate
|
|
374
497
|
|
|
375
|
-
def _create_options(self) -> TTSOptions:
|
|
376
|
-
language_str = self._settings["language"]
|
|
377
|
-
playht_language = None
|
|
378
|
-
if language_str:
|
|
379
|
-
# Convert string to PlayHT Language enum
|
|
380
|
-
for lang in PlayHTLanguage:
|
|
381
|
-
if lang.value == language_str:
|
|
382
|
-
playht_language = lang
|
|
383
|
-
break
|
|
384
|
-
|
|
385
|
-
return TTSOptions(
|
|
386
|
-
voice=self._voice_id,
|
|
387
|
-
language=playht_language,
|
|
388
|
-
sample_rate=self.sample_rate,
|
|
389
|
-
format=self._settings["format"],
|
|
390
|
-
speed=self._settings["speed"],
|
|
391
|
-
seed=self._settings["seed"],
|
|
392
|
-
)
|
|
393
|
-
|
|
394
498
|
def can_generate_metrics(self) -> bool:
|
|
499
|
+
"""Check if this service can generate processing metrics.
|
|
500
|
+
|
|
501
|
+
Returns:
|
|
502
|
+
True, as PlayHT HTTP service supports metrics generation.
|
|
503
|
+
"""
|
|
395
504
|
return True
|
|
396
505
|
|
|
397
506
|
def language_to_service_language(self, language: Language) -> Optional[str]:
|
|
507
|
+
"""Convert a Language enum to PlayHT service language format.
|
|
508
|
+
|
|
509
|
+
Args:
|
|
510
|
+
language: The language to convert.
|
|
511
|
+
|
|
512
|
+
Returns:
|
|
513
|
+
The PlayHT-specific language code, or None if not supported.
|
|
514
|
+
"""
|
|
398
515
|
return language_to_playht_language(language)
|
|
399
516
|
|
|
400
517
|
@traced_tts
|
|
401
518
|
async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
|
|
519
|
+
"""Generate TTS audio from text using PlayHT's HTTP API.
|
|
520
|
+
|
|
521
|
+
Args:
|
|
522
|
+
text: The text to synthesize into speech.
|
|
523
|
+
|
|
524
|
+
Yields:
|
|
525
|
+
Frame: Audio frames containing the synthesized speech.
|
|
526
|
+
"""
|
|
402
527
|
logger.debug(f"{self}: Generating TTS [{text}]")
|
|
403
528
|
|
|
404
529
|
try:
|
|
405
|
-
options = self._create_options()
|
|
406
|
-
|
|
407
530
|
await self.start_ttfb_metrics()
|
|
408
531
|
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
532
|
+
# Prepare the request payload
|
|
533
|
+
payload = {
|
|
534
|
+
"text": text,
|
|
535
|
+
"voice": self._voice_id,
|
|
536
|
+
"voice_engine": self._settings["voice_engine"],
|
|
537
|
+
"output_format": self._settings["output_format"],
|
|
538
|
+
"sample_rate": self.sample_rate,
|
|
539
|
+
"language": self._settings["language"],
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
# Add optional parameters if they exist
|
|
543
|
+
if self._settings["speed"] is not None:
|
|
544
|
+
payload["speed"] = self._settings["speed"]
|
|
545
|
+
if self._settings["seed"] is not None:
|
|
546
|
+
payload["seed"] = self._settings["seed"]
|
|
547
|
+
|
|
548
|
+
headers = {
|
|
549
|
+
"Authorization": f"Bearer {self._api_key}",
|
|
550
|
+
"X-User-Id": self._user_id,
|
|
551
|
+
"Content-Type": "application/json",
|
|
552
|
+
"Accept": "*/*",
|
|
553
|
+
}
|
|
415
554
|
|
|
416
555
|
await self.start_tts_usage_metrics(text)
|
|
417
556
|
|
|
418
557
|
yield TTSStartedFrame()
|
|
419
558
|
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
if
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
559
|
+
async with aiohttp.ClientSession() as session:
|
|
560
|
+
async with session.post(
|
|
561
|
+
"https://api.play.ht/api/v2/tts/stream",
|
|
562
|
+
headers=headers,
|
|
563
|
+
json=payload,
|
|
564
|
+
) as response:
|
|
565
|
+
if response.status not in (200, 201):
|
|
566
|
+
error_text = await response.text()
|
|
567
|
+
raise Exception(f"PlayHT API error {response.status}: {error_text}")
|
|
568
|
+
|
|
569
|
+
in_header = True
|
|
570
|
+
buffer = b""
|
|
571
|
+
|
|
572
|
+
CHUNK_SIZE = self.chunk_size
|
|
573
|
+
|
|
574
|
+
async for chunk in response.content.iter_chunked(CHUNK_SIZE):
|
|
575
|
+
if len(chunk) == 0:
|
|
576
|
+
continue
|
|
577
|
+
|
|
578
|
+
# Skip the RIFF header
|
|
579
|
+
if in_header:
|
|
580
|
+
buffer += chunk
|
|
581
|
+
if len(buffer) <= 36:
|
|
582
|
+
continue
|
|
583
|
+
else:
|
|
584
|
+
fh = io.BytesIO(buffer)
|
|
585
|
+
fh.seek(36)
|
|
586
|
+
(data, size) = struct.unpack("<4sI", fh.read(8))
|
|
587
|
+
while data != b"data":
|
|
588
|
+
fh.read(size)
|
|
589
|
+
(data, size) = struct.unpack("<4sI", fh.read(8))
|
|
590
|
+
# Extract audio data after header
|
|
591
|
+
audio_data = buffer[fh.tell() :]
|
|
592
|
+
if len(audio_data) > 0:
|
|
593
|
+
await self.stop_ttfb_metrics()
|
|
594
|
+
frame = TTSAudioRawFrame(audio_data, self.sample_rate, 1)
|
|
595
|
+
yield frame
|
|
596
|
+
in_header = False
|
|
597
|
+
elif len(chunk) > 0:
|
|
598
|
+
await self.stop_ttfb_metrics()
|
|
599
|
+
frame = TTSAudioRawFrame(chunk, self.sample_rate, 1)
|
|
600
|
+
yield frame
|
|
601
|
+
|
|
440
602
|
except Exception as e:
|
|
441
603
|
logger.error(f"{self} error generating TTS: {e}")
|
|
442
604
|
finally:
|
pipecat/services/qwen/llm.py
CHANGED
|
@@ -16,12 +16,6 @@ class QwenLLMService(OpenAILLMService):
|
|
|
16
16
|
|
|
17
17
|
This service extends OpenAILLMService to connect to Qwen's API endpoint while
|
|
18
18
|
maintaining full compatibility with OpenAI's interface and functionality.
|
|
19
|
-
|
|
20
|
-
Args:
|
|
21
|
-
api_key: The API key for accessing Qwen's API (DashScope API key).
|
|
22
|
-
base_url: Base URL for Qwen API. Defaults to "https://dashscope-intl.aliyuncs.com/compatible-mode/v1".
|
|
23
|
-
model: The model identifier to use. Defaults to "qwen-plus".
|
|
24
|
-
**kwargs: Additional keyword arguments passed to OpenAILLMService.
|
|
25
19
|
"""
|
|
26
20
|
|
|
27
21
|
def __init__(
|
|
@@ -32,6 +26,14 @@ class QwenLLMService(OpenAILLMService):
|
|
|
32
26
|
model: str = "qwen-plus",
|
|
33
27
|
**kwargs,
|
|
34
28
|
):
|
|
29
|
+
"""Initialize the Qwen LLM service.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
api_key: The API key for accessing Qwen's API (DashScope API key).
|
|
33
|
+
base_url: Base URL for Qwen API. Defaults to "https://dashscope-intl.aliyuncs.com/compatible-mode/v1".
|
|
34
|
+
model: The model identifier to use. Defaults to "qwen-plus".
|
|
35
|
+
**kwargs: Additional keyword arguments passed to OpenAILLMService.
|
|
36
|
+
"""
|
|
35
37
|
super().__init__(api_key=api_key, base_url=base_url, model=model, **kwargs)
|
|
36
38
|
logger.info(f"Initialized Qwen LLM service with model: {model}")
|
|
37
39
|
|