dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
- dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
- pipecat/__init__.py +17 -0
- pipecat/adapters/base_llm_adapter.py +36 -1
- pipecat/adapters/schemas/direct_function.py +296 -0
- pipecat/adapters/schemas/function_schema.py +15 -6
- pipecat/adapters/schemas/tools_schema.py +55 -7
- pipecat/adapters/services/anthropic_adapter.py +22 -3
- pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
- pipecat/adapters/services/bedrock_adapter.py +22 -3
- pipecat/adapters/services/gemini_adapter.py +16 -3
- pipecat/adapters/services/open_ai_adapter.py +17 -2
- pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
- pipecat/audio/filters/base_audio_filter.py +30 -6
- pipecat/audio/filters/koala_filter.py +37 -2
- pipecat/audio/filters/krisp_filter.py +59 -6
- pipecat/audio/filters/noisereduce_filter.py +37 -0
- pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
- pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
- pipecat/audio/mixers/base_audio_mixer.py +30 -7
- pipecat/audio/mixers/soundfile_mixer.py +53 -6
- pipecat/audio/resamplers/base_audio_resampler.py +17 -9
- pipecat/audio/resamplers/resampy_resampler.py +26 -1
- pipecat/audio/resamplers/soxr_resampler.py +32 -1
- pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
- pipecat/audio/utils.py +194 -1
- pipecat/audio/vad/silero.py +60 -3
- pipecat/audio/vad/vad_analyzer.py +114 -30
- pipecat/clocks/base_clock.py +19 -0
- pipecat/clocks/system_clock.py +25 -0
- pipecat/extensions/voicemail/__init__.py +0 -0
- pipecat/extensions/voicemail/voicemail_detector.py +707 -0
- pipecat/frames/frames.py +590 -156
- pipecat/metrics/metrics.py +64 -1
- pipecat/observers/base_observer.py +58 -19
- pipecat/observers/loggers/debug_log_observer.py +56 -64
- pipecat/observers/loggers/llm_log_observer.py +8 -1
- pipecat/observers/loggers/transcription_log_observer.py +19 -7
- pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
- pipecat/observers/turn_tracking_observer.py +26 -1
- pipecat/pipeline/base_pipeline.py +5 -7
- pipecat/pipeline/base_task.py +52 -9
- pipecat/pipeline/parallel_pipeline.py +121 -177
- pipecat/pipeline/pipeline.py +129 -20
- pipecat/pipeline/runner.py +50 -1
- pipecat/pipeline/sync_parallel_pipeline.py +132 -32
- pipecat/pipeline/task.py +263 -280
- pipecat/pipeline/task_observer.py +85 -34
- pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
- pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
- pipecat/processors/aggregators/gated.py +25 -24
- pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
- pipecat/processors/aggregators/llm_response.py +398 -89
- pipecat/processors/aggregators/openai_llm_context.py +161 -13
- pipecat/processors/aggregators/sentence.py +25 -14
- pipecat/processors/aggregators/user_response.py +28 -3
- pipecat/processors/aggregators/vision_image_frame.py +24 -14
- pipecat/processors/async_generator.py +28 -0
- pipecat/processors/audio/audio_buffer_processor.py +78 -37
- pipecat/processors/consumer_processor.py +25 -6
- pipecat/processors/filters/frame_filter.py +23 -0
- pipecat/processors/filters/function_filter.py +30 -0
- pipecat/processors/filters/identity_filter.py +17 -2
- pipecat/processors/filters/null_filter.py +24 -1
- pipecat/processors/filters/stt_mute_filter.py +56 -21
- pipecat/processors/filters/wake_check_filter.py +46 -3
- pipecat/processors/filters/wake_notifier_filter.py +21 -3
- pipecat/processors/frame_processor.py +488 -131
- pipecat/processors/frameworks/langchain.py +38 -3
- pipecat/processors/frameworks/rtvi.py +719 -34
- pipecat/processors/gstreamer/pipeline_source.py +41 -0
- pipecat/processors/idle_frame_processor.py +26 -3
- pipecat/processors/logger.py +23 -0
- pipecat/processors/metrics/frame_processor_metrics.py +77 -4
- pipecat/processors/metrics/sentry.py +42 -4
- pipecat/processors/producer_processor.py +34 -14
- pipecat/processors/text_transformer.py +22 -10
- pipecat/processors/transcript_processor.py +48 -29
- pipecat/processors/user_idle_processor.py +31 -21
- pipecat/runner/__init__.py +1 -0
- pipecat/runner/daily.py +132 -0
- pipecat/runner/livekit.py +148 -0
- pipecat/runner/run.py +543 -0
- pipecat/runner/types.py +67 -0
- pipecat/runner/utils.py +515 -0
- pipecat/serializers/base_serializer.py +42 -0
- pipecat/serializers/exotel.py +17 -6
- pipecat/serializers/genesys.py +95 -0
- pipecat/serializers/livekit.py +33 -0
- pipecat/serializers/plivo.py +16 -15
- pipecat/serializers/protobuf.py +37 -1
- pipecat/serializers/telnyx.py +18 -17
- pipecat/serializers/twilio.py +32 -16
- pipecat/services/ai_service.py +5 -3
- pipecat/services/anthropic/llm.py +113 -43
- pipecat/services/assemblyai/models.py +63 -5
- pipecat/services/assemblyai/stt.py +64 -11
- pipecat/services/asyncai/__init__.py +0 -0
- pipecat/services/asyncai/tts.py +501 -0
- pipecat/services/aws/llm.py +185 -111
- pipecat/services/aws/stt.py +217 -23
- pipecat/services/aws/tts.py +118 -52
- pipecat/services/aws/utils.py +101 -5
- pipecat/services/aws_nova_sonic/aws.py +82 -64
- pipecat/services/aws_nova_sonic/context.py +15 -6
- pipecat/services/azure/common.py +10 -2
- pipecat/services/azure/image.py +32 -0
- pipecat/services/azure/llm.py +9 -7
- pipecat/services/azure/stt.py +65 -2
- pipecat/services/azure/tts.py +154 -23
- pipecat/services/cartesia/stt.py +125 -8
- pipecat/services/cartesia/tts.py +102 -38
- pipecat/services/cerebras/llm.py +15 -23
- pipecat/services/deepgram/stt.py +19 -11
- pipecat/services/deepgram/tts.py +36 -0
- pipecat/services/deepseek/llm.py +14 -23
- pipecat/services/elevenlabs/tts.py +330 -64
- pipecat/services/fal/image.py +43 -0
- pipecat/services/fal/stt.py +48 -10
- pipecat/services/fireworks/llm.py +14 -21
- pipecat/services/fish/tts.py +109 -9
- pipecat/services/gemini_multimodal_live/__init__.py +1 -0
- pipecat/services/gemini_multimodal_live/events.py +83 -2
- pipecat/services/gemini_multimodal_live/file_api.py +189 -0
- pipecat/services/gemini_multimodal_live/gemini.py +218 -21
- pipecat/services/gladia/config.py +17 -10
- pipecat/services/gladia/stt.py +82 -36
- pipecat/services/google/frames.py +40 -0
- pipecat/services/google/google.py +2 -0
- pipecat/services/google/image.py +39 -2
- pipecat/services/google/llm.py +176 -58
- pipecat/services/google/llm_openai.py +26 -4
- pipecat/services/google/llm_vertex.py +37 -15
- pipecat/services/google/rtvi.py +41 -0
- pipecat/services/google/stt.py +65 -17
- pipecat/services/google/test-google-chirp.py +45 -0
- pipecat/services/google/tts.py +390 -19
- pipecat/services/grok/llm.py +8 -6
- pipecat/services/groq/llm.py +8 -6
- pipecat/services/groq/stt.py +13 -9
- pipecat/services/groq/tts.py +40 -0
- pipecat/services/hamsa/__init__.py +9 -0
- pipecat/services/hamsa/stt.py +241 -0
- pipecat/services/heygen/__init__.py +5 -0
- pipecat/services/heygen/api.py +281 -0
- pipecat/services/heygen/client.py +620 -0
- pipecat/services/heygen/video.py +338 -0
- pipecat/services/image_service.py +5 -3
- pipecat/services/inworld/__init__.py +1 -0
- pipecat/services/inworld/tts.py +592 -0
- pipecat/services/llm_service.py +127 -45
- pipecat/services/lmnt/tts.py +80 -7
- pipecat/services/mcp_service.py +85 -44
- pipecat/services/mem0/memory.py +42 -13
- pipecat/services/minimax/tts.py +74 -15
- pipecat/services/mistral/__init__.py +0 -0
- pipecat/services/mistral/llm.py +185 -0
- pipecat/services/moondream/vision.py +55 -10
- pipecat/services/neuphonic/tts.py +275 -48
- pipecat/services/nim/llm.py +8 -6
- pipecat/services/ollama/llm.py +27 -7
- pipecat/services/openai/base_llm.py +54 -16
- pipecat/services/openai/image.py +30 -0
- pipecat/services/openai/llm.py +7 -5
- pipecat/services/openai/stt.py +13 -9
- pipecat/services/openai/tts.py +42 -10
- pipecat/services/openai_realtime_beta/azure.py +11 -9
- pipecat/services/openai_realtime_beta/context.py +7 -5
- pipecat/services/openai_realtime_beta/events.py +10 -7
- pipecat/services/openai_realtime_beta/openai.py +37 -18
- pipecat/services/openpipe/llm.py +30 -24
- pipecat/services/openrouter/llm.py +9 -7
- pipecat/services/perplexity/llm.py +15 -19
- pipecat/services/piper/tts.py +26 -12
- pipecat/services/playht/tts.py +227 -65
- pipecat/services/qwen/llm.py +8 -6
- pipecat/services/rime/tts.py +128 -17
- pipecat/services/riva/stt.py +160 -22
- pipecat/services/riva/tts.py +67 -2
- pipecat/services/sambanova/llm.py +19 -17
- pipecat/services/sambanova/stt.py +14 -8
- pipecat/services/sarvam/tts.py +60 -13
- pipecat/services/simli/video.py +82 -21
- pipecat/services/soniox/__init__.py +0 -0
- pipecat/services/soniox/stt.py +398 -0
- pipecat/services/speechmatics/stt.py +29 -17
- pipecat/services/stt_service.py +47 -11
- pipecat/services/tavus/video.py +94 -25
- pipecat/services/together/llm.py +8 -6
- pipecat/services/tts_service.py +77 -53
- pipecat/services/ultravox/stt.py +46 -43
- pipecat/services/vision_service.py +5 -3
- pipecat/services/websocket_service.py +12 -11
- pipecat/services/whisper/base_stt.py +58 -12
- pipecat/services/whisper/stt.py +69 -58
- pipecat/services/xtts/tts.py +59 -2
- pipecat/sync/base_notifier.py +19 -0
- pipecat/sync/event_notifier.py +24 -0
- pipecat/tests/utils.py +73 -5
- pipecat/transcriptions/language.py +24 -0
- pipecat/transports/base_input.py +112 -8
- pipecat/transports/base_output.py +235 -13
- pipecat/transports/base_transport.py +119 -0
- pipecat/transports/local/audio.py +76 -0
- pipecat/transports/local/tk.py +84 -0
- pipecat/transports/network/fastapi_websocket.py +174 -15
- pipecat/transports/network/small_webrtc.py +383 -39
- pipecat/transports/network/webrtc_connection.py +214 -8
- pipecat/transports/network/websocket_client.py +171 -1
- pipecat/transports/network/websocket_server.py +147 -9
- pipecat/transports/services/daily.py +792 -70
- pipecat/transports/services/helpers/daily_rest.py +122 -129
- pipecat/transports/services/livekit.py +339 -4
- pipecat/transports/services/tavus.py +273 -38
- pipecat/utils/asyncio/task_manager.py +92 -186
- pipecat/utils/base_object.py +83 -1
- pipecat/utils/network.py +2 -0
- pipecat/utils/string.py +114 -58
- pipecat/utils/text/base_text_aggregator.py +44 -13
- pipecat/utils/text/base_text_filter.py +46 -0
- pipecat/utils/text/markdown_text_filter.py +70 -14
- pipecat/utils/text/pattern_pair_aggregator.py +18 -14
- pipecat/utils/text/simple_text_aggregator.py +43 -2
- pipecat/utils/text/skip_tags_aggregator.py +21 -13
- pipecat/utils/time.py +36 -0
- pipecat/utils/tracing/class_decorators.py +32 -7
- pipecat/utils/tracing/conversation_context_provider.py +12 -2
- pipecat/utils/tracing/service_attributes.py +80 -64
- pipecat/utils/tracing/service_decorators.py +48 -21
- pipecat/utils/tracing/setup.py +13 -7
- pipecat/utils/tracing/turn_context_provider.py +12 -2
- pipecat/utils/tracing/turn_trace_observer.py +27 -0
- pipecat/utils/utils.py +14 -14
- dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
- pipecat/examples/daily_runner.py +0 -64
- pipecat/examples/run.py +0 -265
- pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
- pipecat/utils/asyncio/watchdog_event.py +0 -42
- pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
- pipecat/utils/asyncio/watchdog_queue.py +0 -48
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
- /pipecat/{examples → extensions}/__init__.py +0 -0
|
@@ -0,0 +1,707 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2024–2025, Daily
|
|
3
|
+
#
|
|
4
|
+
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
"""Voicemail detection module for Pipecat.
|
|
8
|
+
|
|
9
|
+
This module provides voicemail detection capabilities using parallel pipeline
|
|
10
|
+
processing to classify incoming calls as either voicemail messages or live
|
|
11
|
+
conversations. It's specifically designed for outbound calling scenarios where
|
|
12
|
+
a bot needs to determine if a human answered or if the call went to voicemail.
|
|
13
|
+
|
|
14
|
+
Note:
|
|
15
|
+
The voicemail module is optimized for text LLMs only.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
import asyncio
|
|
19
|
+
from typing import List, Optional
|
|
20
|
+
|
|
21
|
+
from loguru import logger
|
|
22
|
+
|
|
23
|
+
from pipecat.frames.frames import (
|
|
24
|
+
BotInterruptionFrame,
|
|
25
|
+
EndFrame,
|
|
26
|
+
Frame,
|
|
27
|
+
LLMFullResponseEndFrame,
|
|
28
|
+
LLMFullResponseStartFrame,
|
|
29
|
+
LLMTextFrame,
|
|
30
|
+
StopFrame,
|
|
31
|
+
SystemFrame,
|
|
32
|
+
TTSAudioRawFrame,
|
|
33
|
+
TTSStartedFrame,
|
|
34
|
+
TTSStoppedFrame,
|
|
35
|
+
TTSTextFrame,
|
|
36
|
+
UserStartedSpeakingFrame,
|
|
37
|
+
UserStoppedSpeakingFrame,
|
|
38
|
+
)
|
|
39
|
+
from pipecat.pipeline.parallel_pipeline import ParallelPipeline
|
|
40
|
+
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
|
41
|
+
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor, FrameProcessorSetup
|
|
42
|
+
from pipecat.services.llm_service import LLMService
|
|
43
|
+
from pipecat.sync.base_notifier import BaseNotifier
|
|
44
|
+
from pipecat.sync.event_notifier import EventNotifier
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class NotifierGate(FrameProcessor):
|
|
48
|
+
"""Base gate processor that controls frame flow based on notifier signals.
|
|
49
|
+
|
|
50
|
+
This base class provides common gate functionality for processors that need to
|
|
51
|
+
start open and close permanently when a notifier signals. Subclasses define
|
|
52
|
+
which frames are allowed through when the gate is closed.
|
|
53
|
+
|
|
54
|
+
The gate starts open to allow initial processing and closes permanently once
|
|
55
|
+
the notifier signals. This ensures controlled frame flow based on external
|
|
56
|
+
decisions or events.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
def __init__(self, notifier: BaseNotifier, task_name: str = "gate"):
|
|
60
|
+
"""Initialize the notifier gate.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
notifier: Notifier that signals when the gate should close.
|
|
64
|
+
task_name: Name for the notification waiting task (for debugging).
|
|
65
|
+
"""
|
|
66
|
+
super().__init__()
|
|
67
|
+
self._notifier = notifier
|
|
68
|
+
self._task_name = task_name
|
|
69
|
+
self._gate_opened = True
|
|
70
|
+
self._gate_task: Optional[asyncio.Task] = None
|
|
71
|
+
|
|
72
|
+
async def setup(self, setup: FrameProcessorSetup):
|
|
73
|
+
"""Set up the processor with required components.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
setup: Configuration object containing setup parameters.
|
|
77
|
+
"""
|
|
78
|
+
await super().setup(setup)
|
|
79
|
+
self._gate_task = self.create_task(self._wait_for_notification())
|
|
80
|
+
|
|
81
|
+
async def cleanup(self):
|
|
82
|
+
"""Clean up the processor resources."""
|
|
83
|
+
await super().cleanup()
|
|
84
|
+
if self._gate_task:
|
|
85
|
+
await self.cancel_task(self._gate_task)
|
|
86
|
+
self._gate_task = None
|
|
87
|
+
|
|
88
|
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
89
|
+
"""Process frames and control gate state based on notifier signals.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
frame: The frame to process.
|
|
93
|
+
direction: The direction of frame flow in the pipeline.
|
|
94
|
+
"""
|
|
95
|
+
await super().process_frame(frame, direction)
|
|
96
|
+
|
|
97
|
+
# Gate logic: open gate allows all frames, closed gate filters frames
|
|
98
|
+
if self._gate_opened:
|
|
99
|
+
await self.push_frame(frame, direction)
|
|
100
|
+
elif isinstance(
|
|
101
|
+
frame,
|
|
102
|
+
(SystemFrame, EndFrame, StopFrame),
|
|
103
|
+
):
|
|
104
|
+
await self.push_frame(frame, direction)
|
|
105
|
+
|
|
106
|
+
async def _wait_for_notification(self):
|
|
107
|
+
"""Wait for notifier signal and close the gate.
|
|
108
|
+
|
|
109
|
+
This method blocks until the notifier signals, then closes the gate
|
|
110
|
+
permanently to change frame filtering behavior.
|
|
111
|
+
"""
|
|
112
|
+
await self._notifier.wait()
|
|
113
|
+
|
|
114
|
+
if self._gate_opened:
|
|
115
|
+
self._gate_opened = False
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class ClassifierGate(NotifierGate):
|
|
119
|
+
"""Gate processor that controls frame flow based on classification decisions.
|
|
120
|
+
|
|
121
|
+
Inherits from NotifierGate and starts open to allow initial classification
|
|
122
|
+
processing. Closes permanently once a classification decision is made
|
|
123
|
+
(CONVERSATION or VOICEMAIL). This ensures the classifier only runs until a
|
|
124
|
+
definitive decision is reached, preventing unnecessary LLM calls and maintaining
|
|
125
|
+
system efficiency.
|
|
126
|
+
|
|
127
|
+
When closed, only allows system frames and user speaking frames to continue.
|
|
128
|
+
Speaking frames are needed for voicemail timing control, but not for conversation.
|
|
129
|
+
"""
|
|
130
|
+
|
|
131
|
+
def __init__(self, gate_notifier: BaseNotifier, conversation_notifier: BaseNotifier):
|
|
132
|
+
"""Initialize the classifier gate.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
gate_notifier: Notifier that signals when a classification decision has
|
|
136
|
+
been made and the gate should close.
|
|
137
|
+
conversation_notifier: Notifier that signals when conversation is detected.
|
|
138
|
+
"""
|
|
139
|
+
super().__init__(gate_notifier, task_name="classifier_gate")
|
|
140
|
+
self._conversation_notifier = conversation_notifier
|
|
141
|
+
self._conversation_detected = False
|
|
142
|
+
self._conversation_task: Optional[asyncio.Task] = None
|
|
143
|
+
|
|
144
|
+
async def setup(self, setup: FrameProcessorSetup):
|
|
145
|
+
"""Set up the processor with required components.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
setup: Configuration object containing setup parameters.
|
|
149
|
+
"""
|
|
150
|
+
await super().setup(setup)
|
|
151
|
+
self._conversation_task = self.create_task(self._wait_for_conversation())
|
|
152
|
+
|
|
153
|
+
async def cleanup(self):
|
|
154
|
+
"""Clean up the processor resources."""
|
|
155
|
+
await super().cleanup()
|
|
156
|
+
if self._conversation_task:
|
|
157
|
+
await self.cancel_task(self._conversation_task)
|
|
158
|
+
self._conversation_task = None
|
|
159
|
+
|
|
160
|
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
161
|
+
"""Process frames and control gate state based on notifier signals.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
frame: The frame to process.
|
|
165
|
+
direction: The direction of frame flow in the pipeline.
|
|
166
|
+
"""
|
|
167
|
+
await FrameProcessor.process_frame(self, frame, direction)
|
|
168
|
+
|
|
169
|
+
# Gate logic: open gate allows all frames, closed gate filters frames
|
|
170
|
+
if self._gate_opened:
|
|
171
|
+
await self.push_frame(frame, direction)
|
|
172
|
+
elif isinstance(frame, (UserStartedSpeakingFrame, UserStoppedSpeakingFrame)):
|
|
173
|
+
# Only allow speaking frames if conversation was NOT detected (i.e., voicemail case)
|
|
174
|
+
# This prevents the UserContextAggregator from issuing a warning about no aggregation
|
|
175
|
+
# to push.
|
|
176
|
+
if not self._conversation_detected:
|
|
177
|
+
await self.push_frame(frame, direction)
|
|
178
|
+
elif isinstance(frame, (SystemFrame, EndFrame, StopFrame)):
|
|
179
|
+
# Always allow system frames through
|
|
180
|
+
# This includes the UserStartedSpeakingFrame and UserStoppedSpeakingFrame
|
|
181
|
+
# which are used to detect voicemail timing.
|
|
182
|
+
await self.push_frame(frame, direction)
|
|
183
|
+
|
|
184
|
+
async def _wait_for_conversation(self):
|
|
185
|
+
"""Wait for conversation detection notification and mark conversation detected."""
|
|
186
|
+
await self._conversation_notifier.wait()
|
|
187
|
+
self._conversation_detected = True
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
class ConversationGate(NotifierGate):
|
|
191
|
+
"""Gate processor that blocks conversation flow when voicemail is detected.
|
|
192
|
+
|
|
193
|
+
Inherits from NotifierGate and starts open to allow normal conversation
|
|
194
|
+
processing. Closes permanently when voicemail is detected to prevent the
|
|
195
|
+
main conversation LLM from processing additional input after voicemail
|
|
196
|
+
classification.
|
|
197
|
+
|
|
198
|
+
When closed, only allows system frames and user speaking frames to continue.
|
|
199
|
+
"""
|
|
200
|
+
|
|
201
|
+
def __init__(self, voicemail_notifier: BaseNotifier):
|
|
202
|
+
"""Initialize the conversation gate.
|
|
203
|
+
|
|
204
|
+
Args:
|
|
205
|
+
voicemail_notifier: Notifier that signals when voicemail has been
|
|
206
|
+
detected and the conversation should be blocked.
|
|
207
|
+
"""
|
|
208
|
+
super().__init__(voicemail_notifier, task_name="conversation_gate")
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
class ClassificationProcessor(FrameProcessor):
|
|
212
|
+
"""Processor that handles LLM classification responses and triggers events.
|
|
213
|
+
|
|
214
|
+
This processor aggregates LLM text tokens into complete responses and analyzes
|
|
215
|
+
them to determine if the call reached a voicemail system or a live person.
|
|
216
|
+
It uses the LLM response frame delimiters (LLMFullResponseStartFrame and
|
|
217
|
+
LLMFullResponseEndFrame) to ensure complete token aggregation regardless
|
|
218
|
+
of how the LLM tokenizes the response words.
|
|
219
|
+
|
|
220
|
+
The processor expects responses containing either "CONVERSATION" (indicating
|
|
221
|
+
a human answered) or "VOICEMAIL" (indicating an automated system). Once a
|
|
222
|
+
decision is made, it triggers the appropriate notifications and event handlers.
|
|
223
|
+
|
|
224
|
+
For voicemail detection, the event handler timer starts immediately and is cancelled
|
|
225
|
+
and restarted based on user speech patterns to ensure proper timing.
|
|
226
|
+
"""
|
|
227
|
+
|
|
228
|
+
def __init__(
|
|
229
|
+
self,
|
|
230
|
+
*,
|
|
231
|
+
gate_notifier: BaseNotifier,
|
|
232
|
+
conversation_notifier: BaseNotifier,
|
|
233
|
+
voicemail_notifier: BaseNotifier,
|
|
234
|
+
voicemail_response_delay: float,
|
|
235
|
+
):
|
|
236
|
+
"""Initialize the voicemail processor.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
gate_notifier: Notifier to signal the ClassifierGate about classification
|
|
240
|
+
decisions so it can close and stop processing.
|
|
241
|
+
conversation_notifier: Notifier to signal the TTSGate to release
|
|
242
|
+
all gated TTS frames for normal conversation flow.
|
|
243
|
+
voicemail_notifier: Notifier to signal the TTSGate to clear
|
|
244
|
+
gated TTS frames since voicemail was detected.
|
|
245
|
+
voicemail_response_delay: Delay in seconds after user stops speaking
|
|
246
|
+
before triggering the voicemail event handler. This ensures the voicemail
|
|
247
|
+
greeting or user message is complete before responding.
|
|
248
|
+
"""
|
|
249
|
+
super().__init__()
|
|
250
|
+
self._gate_notifier = gate_notifier
|
|
251
|
+
self._conversation_notifier = conversation_notifier
|
|
252
|
+
self._voicemail_notifier = voicemail_notifier
|
|
253
|
+
self._voicemail_response_delay = voicemail_response_delay
|
|
254
|
+
|
|
255
|
+
# Register the voicemail detected event
|
|
256
|
+
self._register_event_handler("on_voicemail_detected")
|
|
257
|
+
|
|
258
|
+
# Aggregation state for collecting complete LLM responses
|
|
259
|
+
self._processing_response = False
|
|
260
|
+
self._response_buffer = ""
|
|
261
|
+
self._decision_made = False
|
|
262
|
+
|
|
263
|
+
# Voicemail timing state
|
|
264
|
+
self._voicemail_detected = False
|
|
265
|
+
self._voicemail_task: Optional[asyncio.Task] = None
|
|
266
|
+
self._voicemail_event = asyncio.Event()
|
|
267
|
+
self._voicemail_event.set()
|
|
268
|
+
|
|
269
|
+
async def setup(self, setup: FrameProcessorSetup):
|
|
270
|
+
"""Set up the processor with required components.
|
|
271
|
+
|
|
272
|
+
Args:
|
|
273
|
+
setup: Configuration object containing setup parameters.
|
|
274
|
+
"""
|
|
275
|
+
await super().setup(setup)
|
|
276
|
+
self._voicemail_task = self.create_task(self._delayed_voicemail_handler())
|
|
277
|
+
|
|
278
|
+
async def cleanup(self):
|
|
279
|
+
"""Clean up the processor resources."""
|
|
280
|
+
await super().cleanup()
|
|
281
|
+
if self._voicemail_task:
|
|
282
|
+
await self.cancel_task(self._voicemail_task)
|
|
283
|
+
self._voicemail_task = None
|
|
284
|
+
|
|
285
|
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
286
|
+
"""Process frames and handle LLM classification responses.
|
|
287
|
+
|
|
288
|
+
This method implements a state machine for aggregating LLM responses:
|
|
289
|
+
1. LLMFullResponseStartFrame: Begin collecting tokens
|
|
290
|
+
2. LLMTextFrame: Accumulate text tokens into buffer
|
|
291
|
+
3. LLMFullResponseEndFrame: Process complete response and make decision
|
|
292
|
+
4. UserStartedSpeakingFrame/UserStoppedSpeakingFrame: Manage voicemail timing
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
frame: The frame to process.
|
|
296
|
+
direction: The direction of frame flow in the pipeline.
|
|
297
|
+
"""
|
|
298
|
+
await super().process_frame(frame, direction)
|
|
299
|
+
|
|
300
|
+
if isinstance(frame, LLMFullResponseStartFrame):
|
|
301
|
+
# Begin aggregating a new LLM response
|
|
302
|
+
self._processing_response = True
|
|
303
|
+
self._response_buffer = ""
|
|
304
|
+
|
|
305
|
+
elif isinstance(frame, LLMFullResponseEndFrame):
|
|
306
|
+
# Complete response received - make classification decision
|
|
307
|
+
if self._processing_response and not self._decision_made:
|
|
308
|
+
await self._process_classification(self._response_buffer.strip())
|
|
309
|
+
self._processing_response = False
|
|
310
|
+
self._response_buffer = ""
|
|
311
|
+
|
|
312
|
+
elif isinstance(frame, LLMTextFrame) and self._processing_response:
|
|
313
|
+
# Accumulate text tokens from the streaming LLM response
|
|
314
|
+
self._response_buffer += frame.text
|
|
315
|
+
|
|
316
|
+
elif isinstance(frame, UserStartedSpeakingFrame):
|
|
317
|
+
# User started speaking - set the voicemail event
|
|
318
|
+
if self._voicemail_detected:
|
|
319
|
+
self._voicemail_event.set()
|
|
320
|
+
|
|
321
|
+
elif isinstance(frame, UserStoppedSpeakingFrame):
|
|
322
|
+
# User stopped speaking - clear the voicemail event
|
|
323
|
+
if self._voicemail_detected:
|
|
324
|
+
self._voicemail_event.clear()
|
|
325
|
+
|
|
326
|
+
else:
|
|
327
|
+
# Pass all non-LLM frames through
|
|
328
|
+
# Blocking LLM frames prevents interference with the downstream LLM
|
|
329
|
+
await self.push_frame(frame, direction)
|
|
330
|
+
|
|
331
|
+
async def _process_classification(self, full_response: str):
|
|
332
|
+
"""Process the complete LLM classification response and trigger actions.
|
|
333
|
+
|
|
334
|
+
Analyzes the aggregated response text to determine if it contains
|
|
335
|
+
"CONVERSATION" or "VOICEMAIL" and triggers the appropriate notifications
|
|
336
|
+
and callbacks based on the classification result.
|
|
337
|
+
|
|
338
|
+
Args:
|
|
339
|
+
full_response: The complete aggregated response text from the LLM.
|
|
340
|
+
"""
|
|
341
|
+
if self._decision_made:
|
|
342
|
+
return
|
|
343
|
+
|
|
344
|
+
response = full_response.upper()
|
|
345
|
+
logger.debug(f"{self}: Classifying response: '{full_response}'")
|
|
346
|
+
|
|
347
|
+
if "CONVERSATION" in response:
|
|
348
|
+
# Human answered - continue normal conversation flow
|
|
349
|
+
self._decision_made = True
|
|
350
|
+
logger.info(f"{self}: CONVERSATION detected")
|
|
351
|
+
await self._gate_notifier.notify() # Close the classifier gate
|
|
352
|
+
await self._conversation_notifier.notify() # Release buffered TTS frames
|
|
353
|
+
|
|
354
|
+
elif "VOICEMAIL" in response:
|
|
355
|
+
# Voicemail detected - trigger voicemail handling
|
|
356
|
+
self._decision_made = True
|
|
357
|
+
self._voicemail_detected = True
|
|
358
|
+
logger.info(f"{self}: VOICEMAIL detected")
|
|
359
|
+
await self._gate_notifier.notify() # Close the classifier gate
|
|
360
|
+
await self._voicemail_notifier.notify() # Clear buffered TTS frames
|
|
361
|
+
|
|
362
|
+
# Interrupt the current pipeline to stop any ongoing processing
|
|
363
|
+
await self.push_frame(BotInterruptionFrame(), FrameDirection.UPSTREAM)
|
|
364
|
+
|
|
365
|
+
# Set the voicemail event to trigger the voicemail handler
|
|
366
|
+
self._voicemail_event.clear()
|
|
367
|
+
|
|
368
|
+
else:
|
|
369
|
+
# This can happen if the LLM is interrupted before completing the response
|
|
370
|
+
logger.debug(f"{self}: No classification found: '{full_response}'")
|
|
371
|
+
|
|
372
|
+
async def _delayed_voicemail_handler(self):
|
|
373
|
+
"""Execute the voicemail event handler after the configured delay.
|
|
374
|
+
|
|
375
|
+
This method waits for the specified delay period, then triggers the
|
|
376
|
+
developer's voicemail event handler. The timer can be cancelled and restarted
|
|
377
|
+
based on user speech patterns to ensure proper timing.
|
|
378
|
+
"""
|
|
379
|
+
while True:
|
|
380
|
+
try:
|
|
381
|
+
await asyncio.wait_for(
|
|
382
|
+
self._voicemail_event.wait(), timeout=self._voicemail_response_delay
|
|
383
|
+
)
|
|
384
|
+
await asyncio.sleep(0.1)
|
|
385
|
+
except asyncio.TimeoutError:
|
|
386
|
+
await self._call_event_handler("on_voicemail_detected")
|
|
387
|
+
break
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
class TTSGate(FrameProcessor):
|
|
391
|
+
"""Gates TTS frames until voicemail classification decision is made.
|
|
392
|
+
|
|
393
|
+
This processor holds TTS output frames in a gate while the voicemail
|
|
394
|
+
classification is in progress. This prevents audio from being played
|
|
395
|
+
to the caller before determining if they're human or a voicemail system.
|
|
396
|
+
|
|
397
|
+
The gate operates in two modes based on the classification result:
|
|
398
|
+
|
|
399
|
+
- CONVERSATION: Opens the gate to release all held frames for normal dialogue
|
|
400
|
+
- VOICEMAIL: Clears held frames since they're not needed for voicemail
|
|
401
|
+
|
|
402
|
+
The gating only applies to TTS-related frames (TTSTextFrame, TTSAudioRawFrame).
|
|
403
|
+
All other frames pass through immediately to maintain proper pipeline flow.
|
|
404
|
+
"""
|
|
405
|
+
|
|
406
|
+
def __init__(self, conversation_notifier: BaseNotifier, voicemail_notifier: BaseNotifier):
|
|
407
|
+
"""Initialize the TTS gate.
|
|
408
|
+
|
|
409
|
+
Args:
|
|
410
|
+
conversation_notifier: Notifier that signals when a conversation is
|
|
411
|
+
detected and gated frames should be released for playback.
|
|
412
|
+
voicemail_notifier: Notifier that signals when voicemail is detected
|
|
413
|
+
and gated frames should be cleared (not played).
|
|
414
|
+
"""
|
|
415
|
+
super().__init__()
|
|
416
|
+
self._conversation_notifier = conversation_notifier
|
|
417
|
+
self._voicemail_notifier = voicemail_notifier
|
|
418
|
+
self._frame_buffer: List[tuple[Frame, FrameDirection]] = []
|
|
419
|
+
self._gating_active = True
|
|
420
|
+
self._conversation_task: Optional[asyncio.Task] = None
|
|
421
|
+
self._voicemail_task: Optional[asyncio.Task] = None
|
|
422
|
+
|
|
423
|
+
async def setup(self, setup: FrameProcessorSetup):
|
|
424
|
+
"""Set up the processor with required components.
|
|
425
|
+
|
|
426
|
+
Args:
|
|
427
|
+
setup: Configuration object containing setup parameters.
|
|
428
|
+
"""
|
|
429
|
+
await super().setup(setup)
|
|
430
|
+
|
|
431
|
+
self._conversation_task = self.create_task(self._wait_for_conversation())
|
|
432
|
+
self._voicemail_task = self.create_task(self._wait_for_voicemail())
|
|
433
|
+
|
|
434
|
+
async def cleanup(self):
|
|
435
|
+
"""Clean up the processor resources."""
|
|
436
|
+
await super().cleanup()
|
|
437
|
+
if self._conversation_task:
|
|
438
|
+
await self.cancel_task(self._conversation_task)
|
|
439
|
+
self._conversation_task = None
|
|
440
|
+
if self._voicemail_task:
|
|
441
|
+
await self.cancel_task(self._voicemail_task)
|
|
442
|
+
self._voicemail_task = None
|
|
443
|
+
|
|
444
|
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
445
|
+
"""Process frames and handle gating logic based on frame type.
|
|
446
|
+
|
|
447
|
+
TTS frames are gated while classification is active. All other frames
|
|
448
|
+
pass through immediately. The gating state is controlled by the
|
|
449
|
+
classification notifications.
|
|
450
|
+
|
|
451
|
+
Args:
|
|
452
|
+
frame: The frame to process.
|
|
453
|
+
direction: The direction of frame flow in the pipeline.
|
|
454
|
+
"""
|
|
455
|
+
await super().process_frame(frame, direction)
|
|
456
|
+
|
|
457
|
+
# Core gating logic: hold TTS frames, pass everything else through
|
|
458
|
+
if self._gating_active and isinstance(
|
|
459
|
+
frame, (TTSStartedFrame, TTSStoppedFrame, TTSTextFrame, TTSAudioRawFrame)
|
|
460
|
+
):
|
|
461
|
+
# Gate TTS frames while waiting for classification decision
|
|
462
|
+
self._frame_buffer.append((frame, direction))
|
|
463
|
+
else:
|
|
464
|
+
# Pass through all non-TTS frames immediately
|
|
465
|
+
await self.push_frame(frame, direction)
|
|
466
|
+
|
|
467
|
+
async def _wait_for_conversation(self):
|
|
468
|
+
"""Wait for conversation detection notification and release gated frames.
|
|
469
|
+
|
|
470
|
+
When a conversation is detected, all gated TTS frames are released
|
|
471
|
+
in order to continue normal dialogue flow. This allows the bot to
|
|
472
|
+
respond naturally to the human caller.
|
|
473
|
+
"""
|
|
474
|
+
await self._conversation_notifier.wait()
|
|
475
|
+
|
|
476
|
+
# Release all gated frames in original order
|
|
477
|
+
self._gating_active = False
|
|
478
|
+
for frame, direction in self._frame_buffer:
|
|
479
|
+
await self.push_frame(frame, direction)
|
|
480
|
+
self._frame_buffer.clear()
|
|
481
|
+
|
|
482
|
+
async def _wait_for_voicemail(self):
|
|
483
|
+
"""Wait for voicemail detection notification and clear gated frames.
|
|
484
|
+
|
|
485
|
+
When voicemail is detected, all gated TTS frames are discarded
|
|
486
|
+
since they were intended for human conversation and are not appropriate
|
|
487
|
+
for voicemail systems. The developer event handlers will handle voicemail-
|
|
488
|
+
specific audio output.
|
|
489
|
+
"""
|
|
490
|
+
await self._voicemail_notifier.wait()
|
|
491
|
+
|
|
492
|
+
# Clear gated frames without playing them
|
|
493
|
+
self._gating_active = False
|
|
494
|
+
self._frame_buffer.clear()
|
|
495
|
+
|
|
496
|
+
|
|
497
|
+
class VoicemailDetector(ParallelPipeline):
|
|
498
|
+
"""Parallel pipeline for detecting voicemail vs. live conversation in outbound calls.
|
|
499
|
+
|
|
500
|
+
This detector uses a parallel pipeline architecture to perform real-time
|
|
501
|
+
classification of outbound phone calls without interrupting the conversation
|
|
502
|
+
flow. It determines whether a human answered the phone or if the call went
|
|
503
|
+
to a voicemail system.
|
|
504
|
+
|
|
505
|
+
Architecture:
|
|
506
|
+
|
|
507
|
+
- Conversation branch: Empty pipeline that allows normal frame flow
|
|
508
|
+
- Classification branch: Contains the LLM classifier and decision logic
|
|
509
|
+
|
|
510
|
+
The system uses a gate mechanism to control when classification runs and
|
|
511
|
+
a gating system to prevent TTS output until classification is complete.
|
|
512
|
+
Once a decision is made, the appropriate action is taken:
|
|
513
|
+
|
|
514
|
+
- CONVERSATION: Continue normal bot dialogue
|
|
515
|
+
- VOICEMAIL: Trigger developer event handler for custom voicemail handling
|
|
516
|
+
|
|
517
|
+
Example::
|
|
518
|
+
|
|
519
|
+
classification_llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
|
520
|
+
detector = VoicemailDetector(llm=classification_llm)
|
|
521
|
+
|
|
522
|
+
@detector.event_handler("on_voicemail_detected")
|
|
523
|
+
async def handle_voicemail(processor):
|
|
524
|
+
await processor.push_frame(TTSSpeakFrame("Please leave a message."))
|
|
525
|
+
|
|
526
|
+
pipeline = Pipeline([
|
|
527
|
+
transport.input(),
|
|
528
|
+
stt,
|
|
529
|
+
detector.detector(), # Classification
|
|
530
|
+
context_aggregator.user(),
|
|
531
|
+
llm,
|
|
532
|
+
tts,
|
|
533
|
+
detector.gate(), # TTS gating
|
|
534
|
+
transport.output(),
|
|
535
|
+
context_aggregator.assistant(),
|
|
536
|
+
])
|
|
537
|
+
|
|
538
|
+
# For custom prompts, append the required response instruction:
|
|
539
|
+
custom_prompt = "Your custom classification logic here. " + VoicemailDetector.CLASSIFIER_RESPONSE_INSTRUCTION
|
|
540
|
+
|
|
541
|
+
Events:
|
|
542
|
+
on_voicemail_detected: Triggered when voicemail is detected after the configured
|
|
543
|
+
delay. The event handler receives one argument: the ClassificationProcessor
|
|
544
|
+
instance which can be used to push frames.
|
|
545
|
+
|
|
546
|
+
Constants:
|
|
547
|
+
CLASSIFIER_RESPONSE_INSTRUCTION: The exact text that must be included in custom
|
|
548
|
+
system prompts to ensure proper classification functionality.
|
|
549
|
+
"""
|
|
550
|
+
|
|
551
|
+
CLASSIFIER_RESPONSE_INSTRUCTION = 'Respond with ONLY "CONVERSATION" if a person answered, or "VOICEMAIL" if it\'s voicemail/recording.'
|
|
552
|
+
|
|
553
|
+
DEFAULT_SYSTEM_PROMPT = (
|
|
554
|
+
"""You are a voicemail detection classifier for an OUTBOUND calling system. A bot has called a phone number and you need to determine if a human answered or if the call went to voicemail based on the provided text.
|
|
555
|
+
|
|
556
|
+
HUMAN ANSWERED - LIVE CONVERSATION (respond "CONVERSATION"):
|
|
557
|
+
- Personal greetings: "Hello?", "Hi", "Yeah?", "John speaking"
|
|
558
|
+
- Interactive responses: "Who is this?", "What do you want?", "Can I help you?"
|
|
559
|
+
- Conversational tone expecting back-and-forth dialogue
|
|
560
|
+
- Questions directed at the caller: "Hello? Anyone there?"
|
|
561
|
+
- Informal responses: "Yep", "What's up?", "Speaking"
|
|
562
|
+
- Natural, spontaneous speech patterns
|
|
563
|
+
- Immediate acknowledgment of the call
|
|
564
|
+
|
|
565
|
+
VOICEMAIL SYSTEM (respond "VOICEMAIL"):
|
|
566
|
+
- Automated voicemail greetings: "Hi, you've reached [name], please leave a message"
|
|
567
|
+
- Phone carrier messages: "The number you have dialed is not in service", "Please leave a message", "All circuits are busy"
|
|
568
|
+
- Professional voicemail: "This is [name], I'm not available right now"
|
|
569
|
+
- Instructions about leaving messages: "leave a message", "leave your name and number"
|
|
570
|
+
- References to callback or messaging: "call me back", "I'll get back to you"
|
|
571
|
+
- Carrier system messages: "mailbox is full", "has not been set up"
|
|
572
|
+
- Business hours messages: "our office is currently closed"
|
|
573
|
+
|
|
574
|
+
"""
|
|
575
|
+
+ CLASSIFIER_RESPONSE_INSTRUCTION
|
|
576
|
+
)
|
|
577
|
+
|
|
578
|
+
def __init__(
|
|
579
|
+
self,
|
|
580
|
+
*,
|
|
581
|
+
llm: LLMService,
|
|
582
|
+
voicemail_response_delay: float = 2.0,
|
|
583
|
+
custom_system_prompt: Optional[str] = None,
|
|
584
|
+
):
|
|
585
|
+
"""Initialize the voicemail detector with classification and buffering components.
|
|
586
|
+
|
|
587
|
+
Args:
|
|
588
|
+
llm: LLM service used for voicemail vs conversation classification.
|
|
589
|
+
Should be fast and reliable for real-time classification.
|
|
590
|
+
voicemail_response_delay: Delay in seconds after user stops speaking
|
|
591
|
+
before triggering the voicemail event handler. This allows voicemail
|
|
592
|
+
responses to be played back after a short delay to ensure the response
|
|
593
|
+
occurs during the voicemail recording. Default is 2.0 seconds.
|
|
594
|
+
custom_system_prompt: Optional custom system prompt for classification. If None,
|
|
595
|
+
uses the default prompt optimized for outbound calling scenarios.
|
|
596
|
+
Custom prompts should instruct the LLM to respond with exactly
|
|
597
|
+
"CONVERSATION" or "VOICEMAIL" for proper detection functionality.
|
|
598
|
+
"""
|
|
599
|
+
self._classifier_llm = llm
|
|
600
|
+
self._prompt = (
|
|
601
|
+
custom_system_prompt if custom_system_prompt is not None else self.DEFAULT_SYSTEM_PROMPT
|
|
602
|
+
)
|
|
603
|
+
self._voicemail_response_delay = voicemail_response_delay
|
|
604
|
+
|
|
605
|
+
# Validate custom prompts to ensure they work with the detection logic
|
|
606
|
+
if custom_system_prompt is not None:
|
|
607
|
+
self._validate_prompt(custom_system_prompt)
|
|
608
|
+
|
|
609
|
+
# Set up the LLM context with the classification prompt
|
|
610
|
+
self._messages = [
|
|
611
|
+
{
|
|
612
|
+
"role": "system",
|
|
613
|
+
"content": self._prompt,
|
|
614
|
+
},
|
|
615
|
+
]
|
|
616
|
+
|
|
617
|
+
# Create the LLM context and aggregators for conversation management
|
|
618
|
+
self._context = OpenAILLMContext(self._messages)
|
|
619
|
+
self._context_aggregator = llm.create_context_aggregator(self._context)
|
|
620
|
+
|
|
621
|
+
# Create notification system for coordinating between components
|
|
622
|
+
self._gate_notifier = EventNotifier() # Signals classification completion
|
|
623
|
+
self._conversation_notifier = EventNotifier() # Signals conversation detected
|
|
624
|
+
self._voicemail_notifier = EventNotifier() # Signals voicemail detected
|
|
625
|
+
|
|
626
|
+
# Create the processor components
|
|
627
|
+
self._classifier_gate = ClassifierGate(self._gate_notifier, self._conversation_notifier)
|
|
628
|
+
self._conversation_gate = ConversationGate(self._voicemail_notifier)
|
|
629
|
+
self._classification_processor = ClassificationProcessor(
|
|
630
|
+
gate_notifier=self._gate_notifier,
|
|
631
|
+
conversation_notifier=self._conversation_notifier,
|
|
632
|
+
voicemail_notifier=self._voicemail_notifier,
|
|
633
|
+
voicemail_response_delay=voicemail_response_delay,
|
|
634
|
+
)
|
|
635
|
+
self._voicemail_gate = TTSGate(self._conversation_notifier, self._voicemail_notifier)
|
|
636
|
+
|
|
637
|
+
# Initialize the parallel pipeline with conversation and classifier branches
|
|
638
|
+
super().__init__(
|
|
639
|
+
# Conversation branch: gate to blocks after voicemail detection
|
|
640
|
+
[self._conversation_gate],
|
|
641
|
+
# Classification branch: gate -> context -> LLM -> processor -> context
|
|
642
|
+
[
|
|
643
|
+
self._classifier_gate,
|
|
644
|
+
self._context_aggregator.user(),
|
|
645
|
+
self._classifier_llm,
|
|
646
|
+
self._classification_processor,
|
|
647
|
+
self._context_aggregator.assistant(),
|
|
648
|
+
],
|
|
649
|
+
)
|
|
650
|
+
|
|
651
|
+
# Register the voicemail detected event after super().__init__()
|
|
652
|
+
self._register_event_handler("on_voicemail_detected")
|
|
653
|
+
|
|
654
|
+
def _validate_prompt(self, prompt: str) -> None:
|
|
655
|
+
"""Validate custom prompt contains required response format instructions.
|
|
656
|
+
|
|
657
|
+
Custom prompts must instruct the LLM to respond with exactly "CONVERSATION"
|
|
658
|
+
or "VOICEMAIL" for the detection logic to work properly. This method
|
|
659
|
+
checks for the presence of these keywords and warns if they're missing.
|
|
660
|
+
|
|
661
|
+
Args:
|
|
662
|
+
prompt: The custom system prompt to validate.
|
|
663
|
+
"""
|
|
664
|
+
has_conversation = "CONVERSATION" in prompt
|
|
665
|
+
has_voicemail = "VOICEMAIL" in prompt
|
|
666
|
+
|
|
667
|
+
if not has_conversation or not has_voicemail:
|
|
668
|
+
logger.warning(
|
|
669
|
+
"Custom system prompt should instruct the LLM to respond with exactly "
|
|
670
|
+
'"CONVERSATION" or "VOICEMAIL" for proper detection functionality. '
|
|
671
|
+
f"Consider appending VoicemailDetector.CLASSIFIER_RESPONSE_INSTRUCTION to your prompt: "
|
|
672
|
+
f'"{self.CLASSIFIER_RESPONSE_INSTRUCTION}"'
|
|
673
|
+
)
|
|
674
|
+
|
|
675
|
+
def detector(self) -> "VoicemailDetector":
|
|
676
|
+
"""Get the detector pipeline for placement after STT in the main pipeline.
|
|
677
|
+
|
|
678
|
+
This should be placed after the STT service and before the context
|
|
679
|
+
aggregator in your main pipeline to enable voicemail classification.
|
|
680
|
+
|
|
681
|
+
Returns:
|
|
682
|
+
The VoicemailDetector instance itself (which is a ParallelPipeline).
|
|
683
|
+
"""
|
|
684
|
+
return self
|
|
685
|
+
|
|
686
|
+
def gate(self) -> TTSGate:
|
|
687
|
+
"""Get the gate processor for placement after TTS in the main pipeline.
|
|
688
|
+
|
|
689
|
+
This should be placed after the TTS service and before the transport
|
|
690
|
+
output to enable TTS frame gating during classification.
|
|
691
|
+
|
|
692
|
+
Returns:
|
|
693
|
+
The TTSGate processor instance.
|
|
694
|
+
"""
|
|
695
|
+
return self._voicemail_gate
|
|
696
|
+
|
|
697
|
+
def add_event_handler(self, event_name: str, handler):
|
|
698
|
+
"""Add an event handler for voicemail detection events.
|
|
699
|
+
|
|
700
|
+
Args:
|
|
701
|
+
event_name: The name of the event to handle.
|
|
702
|
+
handler: The function to call when the event occurs.
|
|
703
|
+
"""
|
|
704
|
+
if event_name == "on_voicemail_detected":
|
|
705
|
+
self._classification_processor.add_event_handler(event_name, handler)
|
|
706
|
+
else:
|
|
707
|
+
super().add_event_handler(event_name, handler)
|