dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
- dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
- pipecat/__init__.py +17 -0
- pipecat/adapters/base_llm_adapter.py +36 -1
- pipecat/adapters/schemas/direct_function.py +296 -0
- pipecat/adapters/schemas/function_schema.py +15 -6
- pipecat/adapters/schemas/tools_schema.py +55 -7
- pipecat/adapters/services/anthropic_adapter.py +22 -3
- pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
- pipecat/adapters/services/bedrock_adapter.py +22 -3
- pipecat/adapters/services/gemini_adapter.py +16 -3
- pipecat/adapters/services/open_ai_adapter.py +17 -2
- pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
- pipecat/audio/filters/base_audio_filter.py +30 -6
- pipecat/audio/filters/koala_filter.py +37 -2
- pipecat/audio/filters/krisp_filter.py +59 -6
- pipecat/audio/filters/noisereduce_filter.py +37 -0
- pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
- pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
- pipecat/audio/mixers/base_audio_mixer.py +30 -7
- pipecat/audio/mixers/soundfile_mixer.py +53 -6
- pipecat/audio/resamplers/base_audio_resampler.py +17 -9
- pipecat/audio/resamplers/resampy_resampler.py +26 -1
- pipecat/audio/resamplers/soxr_resampler.py +32 -1
- pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
- pipecat/audio/utils.py +194 -1
- pipecat/audio/vad/silero.py +60 -3
- pipecat/audio/vad/vad_analyzer.py +114 -30
- pipecat/clocks/base_clock.py +19 -0
- pipecat/clocks/system_clock.py +25 -0
- pipecat/extensions/voicemail/__init__.py +0 -0
- pipecat/extensions/voicemail/voicemail_detector.py +707 -0
- pipecat/frames/frames.py +590 -156
- pipecat/metrics/metrics.py +64 -1
- pipecat/observers/base_observer.py +58 -19
- pipecat/observers/loggers/debug_log_observer.py +56 -64
- pipecat/observers/loggers/llm_log_observer.py +8 -1
- pipecat/observers/loggers/transcription_log_observer.py +19 -7
- pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
- pipecat/observers/turn_tracking_observer.py +26 -1
- pipecat/pipeline/base_pipeline.py +5 -7
- pipecat/pipeline/base_task.py +52 -9
- pipecat/pipeline/parallel_pipeline.py +121 -177
- pipecat/pipeline/pipeline.py +129 -20
- pipecat/pipeline/runner.py +50 -1
- pipecat/pipeline/sync_parallel_pipeline.py +132 -32
- pipecat/pipeline/task.py +263 -280
- pipecat/pipeline/task_observer.py +85 -34
- pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
- pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
- pipecat/processors/aggregators/gated.py +25 -24
- pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
- pipecat/processors/aggregators/llm_response.py +398 -89
- pipecat/processors/aggregators/openai_llm_context.py +161 -13
- pipecat/processors/aggregators/sentence.py +25 -14
- pipecat/processors/aggregators/user_response.py +28 -3
- pipecat/processors/aggregators/vision_image_frame.py +24 -14
- pipecat/processors/async_generator.py +28 -0
- pipecat/processors/audio/audio_buffer_processor.py +78 -37
- pipecat/processors/consumer_processor.py +25 -6
- pipecat/processors/filters/frame_filter.py +23 -0
- pipecat/processors/filters/function_filter.py +30 -0
- pipecat/processors/filters/identity_filter.py +17 -2
- pipecat/processors/filters/null_filter.py +24 -1
- pipecat/processors/filters/stt_mute_filter.py +56 -21
- pipecat/processors/filters/wake_check_filter.py +46 -3
- pipecat/processors/filters/wake_notifier_filter.py +21 -3
- pipecat/processors/frame_processor.py +488 -131
- pipecat/processors/frameworks/langchain.py +38 -3
- pipecat/processors/frameworks/rtvi.py +719 -34
- pipecat/processors/gstreamer/pipeline_source.py +41 -0
- pipecat/processors/idle_frame_processor.py +26 -3
- pipecat/processors/logger.py +23 -0
- pipecat/processors/metrics/frame_processor_metrics.py +77 -4
- pipecat/processors/metrics/sentry.py +42 -4
- pipecat/processors/producer_processor.py +34 -14
- pipecat/processors/text_transformer.py +22 -10
- pipecat/processors/transcript_processor.py +48 -29
- pipecat/processors/user_idle_processor.py +31 -21
- pipecat/runner/__init__.py +1 -0
- pipecat/runner/daily.py +132 -0
- pipecat/runner/livekit.py +148 -0
- pipecat/runner/run.py +543 -0
- pipecat/runner/types.py +67 -0
- pipecat/runner/utils.py +515 -0
- pipecat/serializers/base_serializer.py +42 -0
- pipecat/serializers/exotel.py +17 -6
- pipecat/serializers/genesys.py +95 -0
- pipecat/serializers/livekit.py +33 -0
- pipecat/serializers/plivo.py +16 -15
- pipecat/serializers/protobuf.py +37 -1
- pipecat/serializers/telnyx.py +18 -17
- pipecat/serializers/twilio.py +32 -16
- pipecat/services/ai_service.py +5 -3
- pipecat/services/anthropic/llm.py +113 -43
- pipecat/services/assemblyai/models.py +63 -5
- pipecat/services/assemblyai/stt.py +64 -11
- pipecat/services/asyncai/__init__.py +0 -0
- pipecat/services/asyncai/tts.py +501 -0
- pipecat/services/aws/llm.py +185 -111
- pipecat/services/aws/stt.py +217 -23
- pipecat/services/aws/tts.py +118 -52
- pipecat/services/aws/utils.py +101 -5
- pipecat/services/aws_nova_sonic/aws.py +82 -64
- pipecat/services/aws_nova_sonic/context.py +15 -6
- pipecat/services/azure/common.py +10 -2
- pipecat/services/azure/image.py +32 -0
- pipecat/services/azure/llm.py +9 -7
- pipecat/services/azure/stt.py +65 -2
- pipecat/services/azure/tts.py +154 -23
- pipecat/services/cartesia/stt.py +125 -8
- pipecat/services/cartesia/tts.py +102 -38
- pipecat/services/cerebras/llm.py +15 -23
- pipecat/services/deepgram/stt.py +19 -11
- pipecat/services/deepgram/tts.py +36 -0
- pipecat/services/deepseek/llm.py +14 -23
- pipecat/services/elevenlabs/tts.py +330 -64
- pipecat/services/fal/image.py +43 -0
- pipecat/services/fal/stt.py +48 -10
- pipecat/services/fireworks/llm.py +14 -21
- pipecat/services/fish/tts.py +109 -9
- pipecat/services/gemini_multimodal_live/__init__.py +1 -0
- pipecat/services/gemini_multimodal_live/events.py +83 -2
- pipecat/services/gemini_multimodal_live/file_api.py +189 -0
- pipecat/services/gemini_multimodal_live/gemini.py +218 -21
- pipecat/services/gladia/config.py +17 -10
- pipecat/services/gladia/stt.py +82 -36
- pipecat/services/google/frames.py +40 -0
- pipecat/services/google/google.py +2 -0
- pipecat/services/google/image.py +39 -2
- pipecat/services/google/llm.py +176 -58
- pipecat/services/google/llm_openai.py +26 -4
- pipecat/services/google/llm_vertex.py +37 -15
- pipecat/services/google/rtvi.py +41 -0
- pipecat/services/google/stt.py +65 -17
- pipecat/services/google/test-google-chirp.py +45 -0
- pipecat/services/google/tts.py +390 -19
- pipecat/services/grok/llm.py +8 -6
- pipecat/services/groq/llm.py +8 -6
- pipecat/services/groq/stt.py +13 -9
- pipecat/services/groq/tts.py +40 -0
- pipecat/services/hamsa/__init__.py +9 -0
- pipecat/services/hamsa/stt.py +241 -0
- pipecat/services/heygen/__init__.py +5 -0
- pipecat/services/heygen/api.py +281 -0
- pipecat/services/heygen/client.py +620 -0
- pipecat/services/heygen/video.py +338 -0
- pipecat/services/image_service.py +5 -3
- pipecat/services/inworld/__init__.py +1 -0
- pipecat/services/inworld/tts.py +592 -0
- pipecat/services/llm_service.py +127 -45
- pipecat/services/lmnt/tts.py +80 -7
- pipecat/services/mcp_service.py +85 -44
- pipecat/services/mem0/memory.py +42 -13
- pipecat/services/minimax/tts.py +74 -15
- pipecat/services/mistral/__init__.py +0 -0
- pipecat/services/mistral/llm.py +185 -0
- pipecat/services/moondream/vision.py +55 -10
- pipecat/services/neuphonic/tts.py +275 -48
- pipecat/services/nim/llm.py +8 -6
- pipecat/services/ollama/llm.py +27 -7
- pipecat/services/openai/base_llm.py +54 -16
- pipecat/services/openai/image.py +30 -0
- pipecat/services/openai/llm.py +7 -5
- pipecat/services/openai/stt.py +13 -9
- pipecat/services/openai/tts.py +42 -10
- pipecat/services/openai_realtime_beta/azure.py +11 -9
- pipecat/services/openai_realtime_beta/context.py +7 -5
- pipecat/services/openai_realtime_beta/events.py +10 -7
- pipecat/services/openai_realtime_beta/openai.py +37 -18
- pipecat/services/openpipe/llm.py +30 -24
- pipecat/services/openrouter/llm.py +9 -7
- pipecat/services/perplexity/llm.py +15 -19
- pipecat/services/piper/tts.py +26 -12
- pipecat/services/playht/tts.py +227 -65
- pipecat/services/qwen/llm.py +8 -6
- pipecat/services/rime/tts.py +128 -17
- pipecat/services/riva/stt.py +160 -22
- pipecat/services/riva/tts.py +67 -2
- pipecat/services/sambanova/llm.py +19 -17
- pipecat/services/sambanova/stt.py +14 -8
- pipecat/services/sarvam/tts.py +60 -13
- pipecat/services/simli/video.py +82 -21
- pipecat/services/soniox/__init__.py +0 -0
- pipecat/services/soniox/stt.py +398 -0
- pipecat/services/speechmatics/stt.py +29 -17
- pipecat/services/stt_service.py +47 -11
- pipecat/services/tavus/video.py +94 -25
- pipecat/services/together/llm.py +8 -6
- pipecat/services/tts_service.py +77 -53
- pipecat/services/ultravox/stt.py +46 -43
- pipecat/services/vision_service.py +5 -3
- pipecat/services/websocket_service.py +12 -11
- pipecat/services/whisper/base_stt.py +58 -12
- pipecat/services/whisper/stt.py +69 -58
- pipecat/services/xtts/tts.py +59 -2
- pipecat/sync/base_notifier.py +19 -0
- pipecat/sync/event_notifier.py +24 -0
- pipecat/tests/utils.py +73 -5
- pipecat/transcriptions/language.py +24 -0
- pipecat/transports/base_input.py +112 -8
- pipecat/transports/base_output.py +235 -13
- pipecat/transports/base_transport.py +119 -0
- pipecat/transports/local/audio.py +76 -0
- pipecat/transports/local/tk.py +84 -0
- pipecat/transports/network/fastapi_websocket.py +174 -15
- pipecat/transports/network/small_webrtc.py +383 -39
- pipecat/transports/network/webrtc_connection.py +214 -8
- pipecat/transports/network/websocket_client.py +171 -1
- pipecat/transports/network/websocket_server.py +147 -9
- pipecat/transports/services/daily.py +792 -70
- pipecat/transports/services/helpers/daily_rest.py +122 -129
- pipecat/transports/services/livekit.py +339 -4
- pipecat/transports/services/tavus.py +273 -38
- pipecat/utils/asyncio/task_manager.py +92 -186
- pipecat/utils/base_object.py +83 -1
- pipecat/utils/network.py +2 -0
- pipecat/utils/string.py +114 -58
- pipecat/utils/text/base_text_aggregator.py +44 -13
- pipecat/utils/text/base_text_filter.py +46 -0
- pipecat/utils/text/markdown_text_filter.py +70 -14
- pipecat/utils/text/pattern_pair_aggregator.py +18 -14
- pipecat/utils/text/simple_text_aggregator.py +43 -2
- pipecat/utils/text/skip_tags_aggregator.py +21 -13
- pipecat/utils/time.py +36 -0
- pipecat/utils/tracing/class_decorators.py +32 -7
- pipecat/utils/tracing/conversation_context_provider.py +12 -2
- pipecat/utils/tracing/service_attributes.py +80 -64
- pipecat/utils/tracing/service_decorators.py +48 -21
- pipecat/utils/tracing/setup.py +13 -7
- pipecat/utils/tracing/turn_context_provider.py +12 -2
- pipecat/utils/tracing/turn_trace_observer.py +27 -0
- pipecat/utils/utils.py +14 -14
- dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
- pipecat/examples/daily_runner.py +0 -64
- pipecat/examples/run.py +0 -265
- pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
- pipecat/utils/asyncio/watchdog_event.py +0 -42
- pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
- pipecat/utils/asyncio/watchdog_queue.py +0 -48
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
- /pipecat/{examples → extensions}/__init__.py +0 -0
|
@@ -4,11 +4,18 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""Neuphonic text-to-speech service implementations.
|
|
8
|
+
|
|
9
|
+
This module provides WebSocket and HTTP-based integrations with Neuphonic's
|
|
10
|
+
text-to-speech API for real-time audio synthesis.
|
|
11
|
+
"""
|
|
12
|
+
|
|
7
13
|
import asyncio
|
|
8
14
|
import base64
|
|
9
15
|
import json
|
|
10
16
|
from typing import Any, AsyncGenerator, Mapping, Optional
|
|
11
17
|
|
|
18
|
+
import aiohttp
|
|
12
19
|
from loguru import logger
|
|
13
20
|
from pydantic import BaseModel
|
|
14
21
|
|
|
@@ -29,12 +36,11 @@ from pipecat.frames.frames import (
|
|
|
29
36
|
from pipecat.processors.frame_processor import FrameDirection
|
|
30
37
|
from pipecat.services.tts_service import InterruptibleTTSService, TTSService
|
|
31
38
|
from pipecat.transcriptions.language import Language
|
|
32
|
-
from pipecat.utils.asyncio.watchdog_async_iterator import WatchdogAsyncIterator
|
|
33
39
|
from pipecat.utils.tracing.service_decorators import traced_tts
|
|
34
40
|
|
|
35
41
|
try:
|
|
36
|
-
import
|
|
37
|
-
from
|
|
42
|
+
from websockets.asyncio.client import connect as websocket_connect
|
|
43
|
+
from websockets.protocol import State
|
|
38
44
|
except ModuleNotFoundError as e:
|
|
39
45
|
logger.error(f"Exception: {e}")
|
|
40
46
|
logger.error("In order to use Neuphonic, you need to `pip install pipecat-ai[neuphonic]`.")
|
|
@@ -42,6 +48,14 @@ except ModuleNotFoundError as e:
|
|
|
42
48
|
|
|
43
49
|
|
|
44
50
|
def language_to_neuphonic_lang_code(language: Language) -> Optional[str]:
|
|
51
|
+
"""Convert a Language enum to Neuphonic language code.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
language: The Language enum value to convert.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
The corresponding Neuphonic language code, or None if not supported.
|
|
58
|
+
"""
|
|
45
59
|
BASE_LANGUAGES = {
|
|
46
60
|
Language.DE: "de",
|
|
47
61
|
Language.EN: "en",
|
|
@@ -69,7 +83,21 @@ def language_to_neuphonic_lang_code(language: Language) -> Optional[str]:
|
|
|
69
83
|
|
|
70
84
|
|
|
71
85
|
class NeuphonicTTSService(InterruptibleTTSService):
|
|
86
|
+
"""Neuphonic real-time text-to-speech service using WebSocket streaming.
|
|
87
|
+
|
|
88
|
+
Provides real-time text-to-speech synthesis using Neuphonic's WebSocket API.
|
|
89
|
+
Supports interruption handling, keepalive connections, and configurable voice
|
|
90
|
+
parameters for high-quality speech generation.
|
|
91
|
+
"""
|
|
92
|
+
|
|
72
93
|
class InputParams(BaseModel):
|
|
94
|
+
"""Input parameters for Neuphonic TTS configuration.
|
|
95
|
+
|
|
96
|
+
Parameters:
|
|
97
|
+
language: Language for synthesis. Defaults to English.
|
|
98
|
+
speed: Speech speed multiplier. Defaults to 1.0.
|
|
99
|
+
"""
|
|
100
|
+
|
|
73
101
|
language: Optional[Language] = Language.EN
|
|
74
102
|
speed: Optional[float] = 1.0
|
|
75
103
|
|
|
@@ -82,10 +110,23 @@ class NeuphonicTTSService(InterruptibleTTSService):
|
|
|
82
110
|
sample_rate: Optional[int] = 22050,
|
|
83
111
|
encoding: str = "pcm_linear",
|
|
84
112
|
params: Optional[InputParams] = None,
|
|
113
|
+
aggregate_sentences: Optional[bool] = True,
|
|
85
114
|
**kwargs,
|
|
86
115
|
):
|
|
116
|
+
"""Initialize the Neuphonic TTS service.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
api_key: Neuphonic API key for authentication.
|
|
120
|
+
voice_id: ID of the voice to use for synthesis.
|
|
121
|
+
url: WebSocket URL for the Neuphonic API.
|
|
122
|
+
sample_rate: Audio sample rate in Hz. Defaults to 22050.
|
|
123
|
+
encoding: Audio encoding format. Defaults to "pcm_linear".
|
|
124
|
+
params: Additional input parameters for TTS configuration.
|
|
125
|
+
aggregate_sentences: Whether to aggregate sentences within the TTSService.
|
|
126
|
+
**kwargs: Additional arguments passed to parent InterruptibleTTSService.
|
|
127
|
+
"""
|
|
87
128
|
super().__init__(
|
|
88
|
-
aggregate_sentences=
|
|
129
|
+
aggregate_sentences=aggregate_sentences,
|
|
89
130
|
push_text_frames=False,
|
|
90
131
|
push_stop_frames=True,
|
|
91
132
|
stop_frame_timeout_s=2.0,
|
|
@@ -114,12 +155,26 @@ class NeuphonicTTSService(InterruptibleTTSService):
|
|
|
114
155
|
self._keepalive_task = None
|
|
115
156
|
|
|
116
157
|
def can_generate_metrics(self) -> bool:
|
|
158
|
+
"""Check if this service can generate processing metrics.
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
True, as Neuphonic service supports metrics generation.
|
|
162
|
+
"""
|
|
117
163
|
return True
|
|
118
164
|
|
|
119
165
|
def language_to_service_language(self, language: Language) -> Optional[str]:
|
|
166
|
+
"""Convert a Language enum to Neuphonic service language format.
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
language: The language to convert.
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
The Neuphonic-specific language code, or None if not supported.
|
|
173
|
+
"""
|
|
120
174
|
return language_to_neuphonic_lang_code(language)
|
|
121
175
|
|
|
122
176
|
async def _update_settings(self, settings: Mapping[str, Any]):
|
|
177
|
+
"""Update service settings and reconnect with new configuration."""
|
|
123
178
|
if "voice_id" in settings:
|
|
124
179
|
self.set_voice(settings["voice_id"])
|
|
125
180
|
|
|
@@ -129,28 +184,56 @@ class NeuphonicTTSService(InterruptibleTTSService):
|
|
|
129
184
|
logger.info(f"Switching TTS to settings: [{self._settings}]")
|
|
130
185
|
|
|
131
186
|
async def start(self, frame: StartFrame):
|
|
187
|
+
"""Start the Neuphonic TTS service.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
frame: The start frame containing initialization parameters.
|
|
191
|
+
"""
|
|
132
192
|
await super().start(frame)
|
|
133
193
|
await self._connect()
|
|
134
194
|
|
|
135
195
|
async def stop(self, frame: EndFrame):
|
|
196
|
+
"""Stop the Neuphonic TTS service.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
frame: The end frame.
|
|
200
|
+
"""
|
|
136
201
|
await super().stop(frame)
|
|
137
202
|
await self._disconnect()
|
|
138
203
|
|
|
139
204
|
async def cancel(self, frame: CancelFrame):
|
|
205
|
+
"""Cancel the Neuphonic TTS service.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
frame: The cancel frame.
|
|
209
|
+
"""
|
|
140
210
|
await super().cancel(frame)
|
|
141
211
|
await self._disconnect()
|
|
142
212
|
|
|
143
213
|
async def flush_audio(self):
|
|
214
|
+
"""Flush any pending audio synthesis by sending stop command."""
|
|
144
215
|
if self._websocket:
|
|
145
216
|
msg = {"text": "<STOP>"}
|
|
146
217
|
await self._websocket.send(json.dumps(msg))
|
|
147
218
|
|
|
148
219
|
async def push_frame(self, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM):
|
|
220
|
+
"""Push a frame downstream with special handling for stop conditions.
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
frame: The frame to push.
|
|
224
|
+
direction: The direction to push the frame.
|
|
225
|
+
"""
|
|
149
226
|
await super().push_frame(frame, direction)
|
|
150
227
|
if isinstance(frame, (TTSStoppedFrame, StartInterruptionFrame)):
|
|
151
228
|
self._started = False
|
|
152
229
|
|
|
153
230
|
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
231
|
+
"""Process frames with special handling for speech control.
|
|
232
|
+
|
|
233
|
+
Args:
|
|
234
|
+
frame: The frame to process.
|
|
235
|
+
direction: The direction of frame processing.
|
|
236
|
+
"""
|
|
154
237
|
await super().process_frame(frame, direction)
|
|
155
238
|
|
|
156
239
|
# If we received a TTSSpeakFrame and the LLM response included text (it
|
|
@@ -164,6 +247,7 @@ class NeuphonicTTSService(InterruptibleTTSService):
|
|
|
164
247
|
await self.resume_processing_frames()
|
|
165
248
|
|
|
166
249
|
async def _connect(self):
|
|
250
|
+
"""Connect to Neuphonic WebSocket and start background tasks."""
|
|
167
251
|
await self._connect_websocket()
|
|
168
252
|
|
|
169
253
|
if self._websocket and not self._receive_task:
|
|
@@ -173,6 +257,7 @@ class NeuphonicTTSService(InterruptibleTTSService):
|
|
|
173
257
|
self._keepalive_task = self.create_task(self._keepalive_task_handler())
|
|
174
258
|
|
|
175
259
|
async def _disconnect(self):
|
|
260
|
+
"""Disconnect from Neuphonic WebSocket and clean up tasks."""
|
|
176
261
|
if self._receive_task:
|
|
177
262
|
await self.cancel_task(self._receive_task)
|
|
178
263
|
self._receive_task = None
|
|
@@ -184,8 +269,9 @@ class NeuphonicTTSService(InterruptibleTTSService):
|
|
|
184
269
|
await self._disconnect_websocket()
|
|
185
270
|
|
|
186
271
|
async def _connect_websocket(self):
|
|
272
|
+
"""Establish WebSocket connection to Neuphonic API."""
|
|
187
273
|
try:
|
|
188
|
-
if self._websocket and self._websocket.
|
|
274
|
+
if self._websocket and self._websocket.state is State.OPEN:
|
|
189
275
|
return
|
|
190
276
|
|
|
191
277
|
logger.debug("Connecting to Neuphonic")
|
|
@@ -195,20 +281,25 @@ class NeuphonicTTSService(InterruptibleTTSService):
|
|
|
195
281
|
"voice_id": self._voice_id,
|
|
196
282
|
}
|
|
197
283
|
|
|
198
|
-
query_params = [
|
|
284
|
+
query_params = []
|
|
199
285
|
for key, value in tts_config.items():
|
|
200
286
|
if value is not None:
|
|
201
287
|
query_params.append(f"{key}={value}")
|
|
202
288
|
|
|
203
|
-
url = f"{self._url}/speak/{self._settings['lang_code']}
|
|
289
|
+
url = f"{self._url}/speak/{self._settings['lang_code']}"
|
|
290
|
+
if query_params:
|
|
291
|
+
url += f"?{'&'.join(query_params)}"
|
|
204
292
|
|
|
205
|
-
|
|
293
|
+
headers = {"x-api-key": self._api_key}
|
|
294
|
+
|
|
295
|
+
self._websocket = await websocket_connect(url, additional_headers=headers)
|
|
206
296
|
except Exception as e:
|
|
207
297
|
logger.error(f"{self} initialization error: {e}")
|
|
208
298
|
self._websocket = None
|
|
209
299
|
await self._call_event_handler("on_connection_error", f"{e}")
|
|
210
300
|
|
|
211
301
|
async def _disconnect_websocket(self):
|
|
302
|
+
"""Close WebSocket connection and clean up state."""
|
|
212
303
|
try:
|
|
213
304
|
await self.stop_all_metrics()
|
|
214
305
|
|
|
@@ -222,10 +313,11 @@ class NeuphonicTTSService(InterruptibleTTSService):
|
|
|
222
313
|
self._websocket = None
|
|
223
314
|
|
|
224
315
|
async def _receive_messages(self):
|
|
225
|
-
|
|
316
|
+
"""Receive and process messages from Neuphonic WebSocket."""
|
|
317
|
+
async for message in self._websocket:
|
|
226
318
|
if isinstance(message, str):
|
|
227
319
|
msg = json.loads(message)
|
|
228
|
-
if msg.get("data"
|
|
320
|
+
if msg.get("data") and msg["data"].get("audio"):
|
|
229
321
|
await self.stop_ttfb_metrics()
|
|
230
322
|
|
|
231
323
|
audio = base64.b64decode(msg["data"]["audio"])
|
|
@@ -233,24 +325,40 @@ class NeuphonicTTSService(InterruptibleTTSService):
|
|
|
233
325
|
await self.push_frame(frame)
|
|
234
326
|
|
|
235
327
|
async def _keepalive_task_handler(self):
|
|
236
|
-
|
|
328
|
+
"""Handle keepalive messages to maintain WebSocket connection."""
|
|
329
|
+
KEEPALIVE_SLEEP = 10
|
|
237
330
|
while True:
|
|
238
|
-
self.reset_watchdog()
|
|
239
331
|
await asyncio.sleep(KEEPALIVE_SLEEP)
|
|
240
|
-
await self.
|
|
332
|
+
await self._send_keepalive()
|
|
333
|
+
|
|
334
|
+
async def _send_keepalive(self):
|
|
335
|
+
"""Send keepalive message to maintain connection."""
|
|
336
|
+
if self._websocket:
|
|
337
|
+
# Send empty text for keepalive
|
|
338
|
+
msg = {"text": ""}
|
|
339
|
+
await self._websocket.send(json.dumps(msg))
|
|
241
340
|
|
|
242
341
|
async def _send_text(self, text: str):
|
|
342
|
+
"""Send text to Neuphonic WebSocket for synthesis."""
|
|
243
343
|
if self._websocket:
|
|
244
|
-
msg = {"text": text}
|
|
344
|
+
msg = {"text": f"{text} <STOP>"}
|
|
245
345
|
logger.debug(f"Sending text to websocket: {msg}")
|
|
246
346
|
await self._websocket.send(json.dumps(msg))
|
|
247
347
|
|
|
248
348
|
@traced_tts
|
|
249
349
|
async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
|
|
350
|
+
"""Generate speech from text using Neuphonic's streaming API.
|
|
351
|
+
|
|
352
|
+
Args:
|
|
353
|
+
text: The text to synthesize into speech.
|
|
354
|
+
|
|
355
|
+
Yields:
|
|
356
|
+
Frame: Audio frames containing the synthesized speech.
|
|
357
|
+
"""
|
|
250
358
|
logger.debug(f"Generating TTS: [{text}]")
|
|
251
359
|
|
|
252
360
|
try:
|
|
253
|
-
if not self._websocket or self._websocket.
|
|
361
|
+
if not self._websocket or self._websocket.state is State.CLOSED:
|
|
254
362
|
await self._connect()
|
|
255
363
|
|
|
256
364
|
try:
|
|
@@ -274,19 +382,21 @@ class NeuphonicTTSService(InterruptibleTTSService):
|
|
|
274
382
|
|
|
275
383
|
|
|
276
384
|
class NeuphonicHttpTTSService(TTSService):
|
|
277
|
-
"""Neuphonic
|
|
385
|
+
"""Neuphonic text-to-speech service using HTTP streaming.
|
|
278
386
|
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
url: Base URL for the Neuphonic API (default: "https://api.neuphonic.com")
|
|
283
|
-
sample_rate: Sample rate for audio output (default: 22050Hz)
|
|
284
|
-
encoding: Audio encoding format (default: "pcm_linear")
|
|
285
|
-
params: Additional parameters for TTS generation including language and speed
|
|
286
|
-
**kwargs: Additional keyword arguments passed to the parent class
|
|
387
|
+
Provides text-to-speech synthesis using Neuphonic's HTTP API with server-sent
|
|
388
|
+
events for streaming audio delivery. Suitable for applications that prefer
|
|
389
|
+
HTTP-based communication over WebSocket connections.
|
|
287
390
|
"""
|
|
288
391
|
|
|
289
392
|
class InputParams(BaseModel):
|
|
393
|
+
"""Input parameters for Neuphonic HTTP TTS configuration.
|
|
394
|
+
|
|
395
|
+
Parameters:
|
|
396
|
+
language: Language for synthesis. Defaults to English.
|
|
397
|
+
speed: Speech speed multiplier. Defaults to 1.0.
|
|
398
|
+
"""
|
|
399
|
+
|
|
290
400
|
language: Optional[Language] = Language.EN
|
|
291
401
|
speed: Optional[float] = 1.0
|
|
292
402
|
|
|
@@ -295,66 +405,183 @@ class NeuphonicHttpTTSService(TTSService):
|
|
|
295
405
|
*,
|
|
296
406
|
api_key: str,
|
|
297
407
|
voice_id: Optional[str] = None,
|
|
408
|
+
aiohttp_session: aiohttp.ClientSession,
|
|
298
409
|
url: str = "https://api.neuphonic.com",
|
|
299
410
|
sample_rate: Optional[int] = 22050,
|
|
300
|
-
encoding: str = "pcm_linear",
|
|
411
|
+
encoding: Optional[str] = "pcm_linear",
|
|
301
412
|
params: Optional[InputParams] = None,
|
|
302
413
|
**kwargs,
|
|
303
414
|
):
|
|
415
|
+
"""Initialize the Neuphonic HTTP TTS service.
|
|
416
|
+
|
|
417
|
+
Args:
|
|
418
|
+
api_key: Neuphonic API key for authentication.
|
|
419
|
+
voice_id: ID of the voice to use for synthesis.
|
|
420
|
+
aiohttp_session: Shared aiohttp session for HTTP requests.
|
|
421
|
+
url: Base URL for the Neuphonic HTTP API.
|
|
422
|
+
sample_rate: Audio sample rate in Hz. Defaults to 22050.
|
|
423
|
+
encoding: Audio encoding format. Defaults to "pcm_linear".
|
|
424
|
+
params: Additional input parameters for TTS configuration.
|
|
425
|
+
**kwargs: Additional arguments passed to parent TTSService.
|
|
426
|
+
"""
|
|
304
427
|
super().__init__(sample_rate=sample_rate, **kwargs)
|
|
305
428
|
|
|
306
429
|
params = params or NeuphonicHttpTTSService.InputParams()
|
|
307
430
|
|
|
308
431
|
self._api_key = api_key
|
|
309
|
-
self.
|
|
310
|
-
self.
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
"sampling_rate": sample_rate,
|
|
315
|
-
}
|
|
432
|
+
self._session = aiohttp_session
|
|
433
|
+
self._base_url = url.rstrip("/")
|
|
434
|
+
self._lang_code = self.language_to_service_language(params.language) or "en"
|
|
435
|
+
self._speed = params.speed
|
|
436
|
+
self._encoding = encoding
|
|
316
437
|
self.set_voice(voice_id)
|
|
317
438
|
|
|
318
439
|
def can_generate_metrics(self) -> bool:
|
|
440
|
+
"""Check if this service can generate processing metrics.
|
|
441
|
+
|
|
442
|
+
Returns:
|
|
443
|
+
True, as Neuphonic HTTP service supports metrics generation.
|
|
444
|
+
"""
|
|
319
445
|
return True
|
|
320
446
|
|
|
447
|
+
def language_to_service_language(self, language: Language) -> Optional[str]:
|
|
448
|
+
"""Convert a Language enum to Neuphonic service language format.
|
|
449
|
+
|
|
450
|
+
Args:
|
|
451
|
+
language: The language to convert.
|
|
452
|
+
|
|
453
|
+
Returns:
|
|
454
|
+
The Neuphonic-specific language code, or None if not supported.
|
|
455
|
+
"""
|
|
456
|
+
return language_to_neuphonic_lang_code(language)
|
|
457
|
+
|
|
321
458
|
async def start(self, frame: StartFrame):
|
|
459
|
+
"""Start the Neuphonic HTTP TTS service.
|
|
460
|
+
|
|
461
|
+
Args:
|
|
462
|
+
frame: The start frame containing initialization parameters.
|
|
463
|
+
"""
|
|
322
464
|
await super().start(frame)
|
|
323
465
|
|
|
324
466
|
async def flush_audio(self):
|
|
467
|
+
"""Flush any pending audio synthesis.
|
|
468
|
+
|
|
469
|
+
Note:
|
|
470
|
+
HTTP-based service doesn't require explicit flushing.
|
|
471
|
+
"""
|
|
325
472
|
pass
|
|
326
473
|
|
|
474
|
+
def _parse_sse_message(self, message: str) -> dict | None:
|
|
475
|
+
"""Parse a Server-Sent Event message.
|
|
476
|
+
|
|
477
|
+
Args:
|
|
478
|
+
message: The SSE message to parse.
|
|
479
|
+
|
|
480
|
+
Returns:
|
|
481
|
+
Parsed message dictionary or None if not a data message.
|
|
482
|
+
"""
|
|
483
|
+
message = message.strip()
|
|
484
|
+
|
|
485
|
+
if not message or "data" not in message:
|
|
486
|
+
return None
|
|
487
|
+
|
|
488
|
+
try:
|
|
489
|
+
# Split on ": " and take the part after "data: "
|
|
490
|
+
_, data_content = message.split(": ", 1)
|
|
491
|
+
|
|
492
|
+
if not data_content or data_content == "[DONE]":
|
|
493
|
+
return None
|
|
494
|
+
|
|
495
|
+
message_dict = json.loads(data_content)
|
|
496
|
+
|
|
497
|
+
# Check for errors in the response
|
|
498
|
+
if message_dict.get("errors") is not None:
|
|
499
|
+
raise Exception(
|
|
500
|
+
f"Neuphonic API error {message_dict.get('status_code', 'unknown')}: {message_dict['errors']}"
|
|
501
|
+
)
|
|
502
|
+
|
|
503
|
+
return message_dict
|
|
504
|
+
except (ValueError, json.JSONDecodeError) as e:
|
|
505
|
+
logger.warning(f"Failed to parse SSE message: {e}")
|
|
506
|
+
return None
|
|
507
|
+
|
|
327
508
|
@traced_tts
|
|
328
509
|
async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
|
|
329
510
|
"""Generate speech from text using Neuphonic streaming API.
|
|
330
511
|
|
|
331
512
|
Args:
|
|
332
|
-
text: The text to convert to speech
|
|
513
|
+
text: The text to convert to speech.
|
|
514
|
+
|
|
333
515
|
Yields:
|
|
334
|
-
|
|
516
|
+
Frame: Audio frames containing the synthesized speech and status information.
|
|
335
517
|
"""
|
|
336
518
|
logger.debug(f"Generating TTS: [{text}]")
|
|
337
519
|
|
|
338
|
-
|
|
520
|
+
url = f"{self._base_url}/sse/speak/{self._lang_code}"
|
|
339
521
|
|
|
340
|
-
|
|
522
|
+
headers = {
|
|
523
|
+
"X-API-KEY": self._api_key,
|
|
524
|
+
"Content-Type": "application/json",
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
payload = {
|
|
528
|
+
"text": text,
|
|
529
|
+
"lang_code": self._lang_code,
|
|
530
|
+
"encoding": self._encoding,
|
|
531
|
+
"sampling_rate": self.sample_rate,
|
|
532
|
+
"speed": self._speed,
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
if self._voice_id:
|
|
536
|
+
payload["voice_id"] = self._voice_id
|
|
341
537
|
|
|
342
538
|
try:
|
|
343
539
|
await self.start_ttfb_metrics()
|
|
344
|
-
response = sse.send(text, TTSConfig(**self._settings, voice_id=self._voice_id))
|
|
345
540
|
|
|
346
|
-
|
|
347
|
-
|
|
541
|
+
async with self._session.post(url, json=payload, headers=headers) as response:
|
|
542
|
+
if response.status != 200:
|
|
543
|
+
error_text = await response.text()
|
|
544
|
+
error_message = f"Neuphonic API error: HTTP {response.status} - {error_text}"
|
|
545
|
+
logger.error(error_message)
|
|
546
|
+
yield ErrorFrame(error=error_message)
|
|
547
|
+
return
|
|
348
548
|
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
549
|
+
await self.start_tts_usage_metrics(text)
|
|
550
|
+
yield TTSStartedFrame()
|
|
551
|
+
|
|
552
|
+
# Process SSE stream line by line
|
|
553
|
+
async for line in response.content:
|
|
554
|
+
if not line:
|
|
555
|
+
continue
|
|
556
|
+
|
|
557
|
+
message = line.decode("utf-8", errors="ignore")
|
|
558
|
+
if not message.strip():
|
|
559
|
+
continue
|
|
560
|
+
|
|
561
|
+
try:
|
|
562
|
+
parsed_message = self._parse_sse_message(message)
|
|
563
|
+
|
|
564
|
+
if (
|
|
565
|
+
parsed_message is not None
|
|
566
|
+
and parsed_message.get("data", {}).get("audio") is not None
|
|
567
|
+
):
|
|
568
|
+
audio_b64 = parsed_message["data"]["audio"]
|
|
569
|
+
audio_bytes = base64.b64decode(audio_b64)
|
|
570
|
+
|
|
571
|
+
await self.stop_ttfb_metrics()
|
|
572
|
+
yield TTSAudioRawFrame(audio_bytes, self.sample_rate, 1)
|
|
573
|
+
|
|
574
|
+
except Exception as e:
|
|
575
|
+
logger.error(f"Error processing SSE message: {e}")
|
|
576
|
+
# Don't yield error frame for individual message failures
|
|
577
|
+
continue
|
|
578
|
+
|
|
579
|
+
except asyncio.CancelledError:
|
|
580
|
+
logger.debug("TTS generation cancelled")
|
|
581
|
+
raise
|
|
356
582
|
except Exception as e:
|
|
357
|
-
logger.
|
|
358
|
-
yield ErrorFrame(error=str(e))
|
|
583
|
+
logger.exception(f"Error in run_tts: {e}")
|
|
584
|
+
yield ErrorFrame(error=f"Neuphonic TTS error: {str(e)}")
|
|
359
585
|
finally:
|
|
586
|
+
await self.stop_ttfb_metrics()
|
|
360
587
|
yield TTSStoppedFrame()
|
pipecat/services/nim/llm.py
CHANGED
|
@@ -21,12 +21,6 @@ class NimLLMService(OpenAILLMService):
|
|
|
21
21
|
This service extends OpenAILLMService to work with NVIDIA's NIM API while maintaining
|
|
22
22
|
compatibility with the OpenAI-style interface. It specifically handles the difference
|
|
23
23
|
in token usage reporting between NIM (incremental) and OpenAI (final summary).
|
|
24
|
-
|
|
25
|
-
Args:
|
|
26
|
-
api_key: The API key for accessing NVIDIA's NIM API.
|
|
27
|
-
base_url: The base URL for NIM API. Defaults to "https://integrate.api.nvidia.com/v1".
|
|
28
|
-
model: The model identifier to use. Defaults to "nvidia/llama-3.1-nemotron-70b-instruct".
|
|
29
|
-
**kwargs: Additional keyword arguments passed to OpenAILLMService.
|
|
30
24
|
"""
|
|
31
25
|
|
|
32
26
|
def __init__(
|
|
@@ -37,6 +31,14 @@ class NimLLMService(OpenAILLMService):
|
|
|
37
31
|
model: str = "nvidia/llama-3.1-nemotron-70b-instruct",
|
|
38
32
|
**kwargs,
|
|
39
33
|
):
|
|
34
|
+
"""Initialize the NimLLMService.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
api_key: The API key for accessing NVIDIA's NIM API.
|
|
38
|
+
base_url: The base URL for NIM API. Defaults to "https://integrate.api.nvidia.com/v1".
|
|
39
|
+
model: The model identifier to use. Defaults to "nvidia/llama-3.1-nemotron-70b-instruct".
|
|
40
|
+
**kwargs: Additional keyword arguments passed to OpenAILLMService.
|
|
41
|
+
"""
|
|
40
42
|
super().__init__(api_key=api_key, base_url=base_url, model=model, **kwargs)
|
|
41
43
|
# Counters for accumulating token usage metrics
|
|
42
44
|
self._prompt_tokens = 0
|
pipecat/services/ollama/llm.py
CHANGED
|
@@ -6,6 +6,8 @@
|
|
|
6
6
|
|
|
7
7
|
"""OLLama LLM service implementation for Pipecat AI framework."""
|
|
8
8
|
|
|
9
|
+
from loguru import logger
|
|
10
|
+
|
|
9
11
|
from pipecat.services.openai.llm import OpenAILLMService
|
|
10
12
|
|
|
11
13
|
|
|
@@ -14,12 +16,30 @@ class OLLamaLLMService(OpenAILLMService):
|
|
|
14
16
|
|
|
15
17
|
This service extends OpenAILLMService to work with locally hosted OLLama models,
|
|
16
18
|
providing a compatible interface for running large language models locally.
|
|
17
|
-
|
|
18
|
-
Args:
|
|
19
|
-
model: The OLLama model to use. Defaults to "llama2".
|
|
20
|
-
base_url: The base URL for the OLLama API endpoint.
|
|
21
|
-
Defaults to "http://localhost:11434/v1".
|
|
22
19
|
"""
|
|
23
20
|
|
|
24
|
-
def __init__(
|
|
25
|
-
|
|
21
|
+
def __init__(
|
|
22
|
+
self, *, model: str = "llama2", base_url: str = "http://localhost:11434/v1", **kwargs
|
|
23
|
+
):
|
|
24
|
+
"""Initialize OLLama LLM service.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
model: The OLLama model to use. Defaults to "llama2".
|
|
28
|
+
base_url: The base URL for the OLLama API endpoint.
|
|
29
|
+
Defaults to "http://localhost:11434/v1".
|
|
30
|
+
**kwargs: Additional keyword arguments passed to OpenAILLMService.
|
|
31
|
+
"""
|
|
32
|
+
super().__init__(model=model, base_url=base_url, api_key="ollama", **kwargs)
|
|
33
|
+
|
|
34
|
+
def create_client(self, base_url=None, **kwargs):
|
|
35
|
+
"""Create OpenAI-compatible client for Ollama.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
base_url: The base URL for the API. If None, uses instance base_url.
|
|
39
|
+
**kwargs: Additional keyword arguments passed to the parent create_client method.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
An OpenAI-compatible client configured for Ollama.
|
|
43
|
+
"""
|
|
44
|
+
logger.debug(f"Creating Ollama client with api {base_url}")
|
|
45
|
+
return super().create_client(base_url=base_url, **kwargs)
|