dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (244) hide show
  1. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
  2. dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
  3. pipecat/__init__.py +17 -0
  4. pipecat/adapters/base_llm_adapter.py +36 -1
  5. pipecat/adapters/schemas/direct_function.py +296 -0
  6. pipecat/adapters/schemas/function_schema.py +15 -6
  7. pipecat/adapters/schemas/tools_schema.py +55 -7
  8. pipecat/adapters/services/anthropic_adapter.py +22 -3
  9. pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
  10. pipecat/adapters/services/bedrock_adapter.py +22 -3
  11. pipecat/adapters/services/gemini_adapter.py +16 -3
  12. pipecat/adapters/services/open_ai_adapter.py +17 -2
  13. pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
  14. pipecat/audio/filters/base_audio_filter.py +30 -6
  15. pipecat/audio/filters/koala_filter.py +37 -2
  16. pipecat/audio/filters/krisp_filter.py +59 -6
  17. pipecat/audio/filters/noisereduce_filter.py +37 -0
  18. pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
  19. pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
  20. pipecat/audio/mixers/base_audio_mixer.py +30 -7
  21. pipecat/audio/mixers/soundfile_mixer.py +53 -6
  22. pipecat/audio/resamplers/base_audio_resampler.py +17 -9
  23. pipecat/audio/resamplers/resampy_resampler.py +26 -1
  24. pipecat/audio/resamplers/soxr_resampler.py +32 -1
  25. pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
  26. pipecat/audio/utils.py +194 -1
  27. pipecat/audio/vad/silero.py +60 -3
  28. pipecat/audio/vad/vad_analyzer.py +114 -30
  29. pipecat/clocks/base_clock.py +19 -0
  30. pipecat/clocks/system_clock.py +25 -0
  31. pipecat/extensions/voicemail/__init__.py +0 -0
  32. pipecat/extensions/voicemail/voicemail_detector.py +707 -0
  33. pipecat/frames/frames.py +590 -156
  34. pipecat/metrics/metrics.py +64 -1
  35. pipecat/observers/base_observer.py +58 -19
  36. pipecat/observers/loggers/debug_log_observer.py +56 -64
  37. pipecat/observers/loggers/llm_log_observer.py +8 -1
  38. pipecat/observers/loggers/transcription_log_observer.py +19 -7
  39. pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
  40. pipecat/observers/turn_tracking_observer.py +26 -1
  41. pipecat/pipeline/base_pipeline.py +5 -7
  42. pipecat/pipeline/base_task.py +52 -9
  43. pipecat/pipeline/parallel_pipeline.py +121 -177
  44. pipecat/pipeline/pipeline.py +129 -20
  45. pipecat/pipeline/runner.py +50 -1
  46. pipecat/pipeline/sync_parallel_pipeline.py +132 -32
  47. pipecat/pipeline/task.py +263 -280
  48. pipecat/pipeline/task_observer.py +85 -34
  49. pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
  50. pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
  51. pipecat/processors/aggregators/gated.py +25 -24
  52. pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
  53. pipecat/processors/aggregators/llm_response.py +398 -89
  54. pipecat/processors/aggregators/openai_llm_context.py +161 -13
  55. pipecat/processors/aggregators/sentence.py +25 -14
  56. pipecat/processors/aggregators/user_response.py +28 -3
  57. pipecat/processors/aggregators/vision_image_frame.py +24 -14
  58. pipecat/processors/async_generator.py +28 -0
  59. pipecat/processors/audio/audio_buffer_processor.py +78 -37
  60. pipecat/processors/consumer_processor.py +25 -6
  61. pipecat/processors/filters/frame_filter.py +23 -0
  62. pipecat/processors/filters/function_filter.py +30 -0
  63. pipecat/processors/filters/identity_filter.py +17 -2
  64. pipecat/processors/filters/null_filter.py +24 -1
  65. pipecat/processors/filters/stt_mute_filter.py +56 -21
  66. pipecat/processors/filters/wake_check_filter.py +46 -3
  67. pipecat/processors/filters/wake_notifier_filter.py +21 -3
  68. pipecat/processors/frame_processor.py +488 -131
  69. pipecat/processors/frameworks/langchain.py +38 -3
  70. pipecat/processors/frameworks/rtvi.py +719 -34
  71. pipecat/processors/gstreamer/pipeline_source.py +41 -0
  72. pipecat/processors/idle_frame_processor.py +26 -3
  73. pipecat/processors/logger.py +23 -0
  74. pipecat/processors/metrics/frame_processor_metrics.py +77 -4
  75. pipecat/processors/metrics/sentry.py +42 -4
  76. pipecat/processors/producer_processor.py +34 -14
  77. pipecat/processors/text_transformer.py +22 -10
  78. pipecat/processors/transcript_processor.py +48 -29
  79. pipecat/processors/user_idle_processor.py +31 -21
  80. pipecat/runner/__init__.py +1 -0
  81. pipecat/runner/daily.py +132 -0
  82. pipecat/runner/livekit.py +148 -0
  83. pipecat/runner/run.py +543 -0
  84. pipecat/runner/types.py +67 -0
  85. pipecat/runner/utils.py +515 -0
  86. pipecat/serializers/base_serializer.py +42 -0
  87. pipecat/serializers/exotel.py +17 -6
  88. pipecat/serializers/genesys.py +95 -0
  89. pipecat/serializers/livekit.py +33 -0
  90. pipecat/serializers/plivo.py +16 -15
  91. pipecat/serializers/protobuf.py +37 -1
  92. pipecat/serializers/telnyx.py +18 -17
  93. pipecat/serializers/twilio.py +32 -16
  94. pipecat/services/ai_service.py +5 -3
  95. pipecat/services/anthropic/llm.py +113 -43
  96. pipecat/services/assemblyai/models.py +63 -5
  97. pipecat/services/assemblyai/stt.py +64 -11
  98. pipecat/services/asyncai/__init__.py +0 -0
  99. pipecat/services/asyncai/tts.py +501 -0
  100. pipecat/services/aws/llm.py +185 -111
  101. pipecat/services/aws/stt.py +217 -23
  102. pipecat/services/aws/tts.py +118 -52
  103. pipecat/services/aws/utils.py +101 -5
  104. pipecat/services/aws_nova_sonic/aws.py +82 -64
  105. pipecat/services/aws_nova_sonic/context.py +15 -6
  106. pipecat/services/azure/common.py +10 -2
  107. pipecat/services/azure/image.py +32 -0
  108. pipecat/services/azure/llm.py +9 -7
  109. pipecat/services/azure/stt.py +65 -2
  110. pipecat/services/azure/tts.py +154 -23
  111. pipecat/services/cartesia/stt.py +125 -8
  112. pipecat/services/cartesia/tts.py +102 -38
  113. pipecat/services/cerebras/llm.py +15 -23
  114. pipecat/services/deepgram/stt.py +19 -11
  115. pipecat/services/deepgram/tts.py +36 -0
  116. pipecat/services/deepseek/llm.py +14 -23
  117. pipecat/services/elevenlabs/tts.py +330 -64
  118. pipecat/services/fal/image.py +43 -0
  119. pipecat/services/fal/stt.py +48 -10
  120. pipecat/services/fireworks/llm.py +14 -21
  121. pipecat/services/fish/tts.py +109 -9
  122. pipecat/services/gemini_multimodal_live/__init__.py +1 -0
  123. pipecat/services/gemini_multimodal_live/events.py +83 -2
  124. pipecat/services/gemini_multimodal_live/file_api.py +189 -0
  125. pipecat/services/gemini_multimodal_live/gemini.py +218 -21
  126. pipecat/services/gladia/config.py +17 -10
  127. pipecat/services/gladia/stt.py +82 -36
  128. pipecat/services/google/frames.py +40 -0
  129. pipecat/services/google/google.py +2 -0
  130. pipecat/services/google/image.py +39 -2
  131. pipecat/services/google/llm.py +176 -58
  132. pipecat/services/google/llm_openai.py +26 -4
  133. pipecat/services/google/llm_vertex.py +37 -15
  134. pipecat/services/google/rtvi.py +41 -0
  135. pipecat/services/google/stt.py +65 -17
  136. pipecat/services/google/test-google-chirp.py +45 -0
  137. pipecat/services/google/tts.py +390 -19
  138. pipecat/services/grok/llm.py +8 -6
  139. pipecat/services/groq/llm.py +8 -6
  140. pipecat/services/groq/stt.py +13 -9
  141. pipecat/services/groq/tts.py +40 -0
  142. pipecat/services/hamsa/__init__.py +9 -0
  143. pipecat/services/hamsa/stt.py +241 -0
  144. pipecat/services/heygen/__init__.py +5 -0
  145. pipecat/services/heygen/api.py +281 -0
  146. pipecat/services/heygen/client.py +620 -0
  147. pipecat/services/heygen/video.py +338 -0
  148. pipecat/services/image_service.py +5 -3
  149. pipecat/services/inworld/__init__.py +1 -0
  150. pipecat/services/inworld/tts.py +592 -0
  151. pipecat/services/llm_service.py +127 -45
  152. pipecat/services/lmnt/tts.py +80 -7
  153. pipecat/services/mcp_service.py +85 -44
  154. pipecat/services/mem0/memory.py +42 -13
  155. pipecat/services/minimax/tts.py +74 -15
  156. pipecat/services/mistral/__init__.py +0 -0
  157. pipecat/services/mistral/llm.py +185 -0
  158. pipecat/services/moondream/vision.py +55 -10
  159. pipecat/services/neuphonic/tts.py +275 -48
  160. pipecat/services/nim/llm.py +8 -6
  161. pipecat/services/ollama/llm.py +27 -7
  162. pipecat/services/openai/base_llm.py +54 -16
  163. pipecat/services/openai/image.py +30 -0
  164. pipecat/services/openai/llm.py +7 -5
  165. pipecat/services/openai/stt.py +13 -9
  166. pipecat/services/openai/tts.py +42 -10
  167. pipecat/services/openai_realtime_beta/azure.py +11 -9
  168. pipecat/services/openai_realtime_beta/context.py +7 -5
  169. pipecat/services/openai_realtime_beta/events.py +10 -7
  170. pipecat/services/openai_realtime_beta/openai.py +37 -18
  171. pipecat/services/openpipe/llm.py +30 -24
  172. pipecat/services/openrouter/llm.py +9 -7
  173. pipecat/services/perplexity/llm.py +15 -19
  174. pipecat/services/piper/tts.py +26 -12
  175. pipecat/services/playht/tts.py +227 -65
  176. pipecat/services/qwen/llm.py +8 -6
  177. pipecat/services/rime/tts.py +128 -17
  178. pipecat/services/riva/stt.py +160 -22
  179. pipecat/services/riva/tts.py +67 -2
  180. pipecat/services/sambanova/llm.py +19 -17
  181. pipecat/services/sambanova/stt.py +14 -8
  182. pipecat/services/sarvam/tts.py +60 -13
  183. pipecat/services/simli/video.py +82 -21
  184. pipecat/services/soniox/__init__.py +0 -0
  185. pipecat/services/soniox/stt.py +398 -0
  186. pipecat/services/speechmatics/stt.py +29 -17
  187. pipecat/services/stt_service.py +47 -11
  188. pipecat/services/tavus/video.py +94 -25
  189. pipecat/services/together/llm.py +8 -6
  190. pipecat/services/tts_service.py +77 -53
  191. pipecat/services/ultravox/stt.py +46 -43
  192. pipecat/services/vision_service.py +5 -3
  193. pipecat/services/websocket_service.py +12 -11
  194. pipecat/services/whisper/base_stt.py +58 -12
  195. pipecat/services/whisper/stt.py +69 -58
  196. pipecat/services/xtts/tts.py +59 -2
  197. pipecat/sync/base_notifier.py +19 -0
  198. pipecat/sync/event_notifier.py +24 -0
  199. pipecat/tests/utils.py +73 -5
  200. pipecat/transcriptions/language.py +24 -0
  201. pipecat/transports/base_input.py +112 -8
  202. pipecat/transports/base_output.py +235 -13
  203. pipecat/transports/base_transport.py +119 -0
  204. pipecat/transports/local/audio.py +76 -0
  205. pipecat/transports/local/tk.py +84 -0
  206. pipecat/transports/network/fastapi_websocket.py +174 -15
  207. pipecat/transports/network/small_webrtc.py +383 -39
  208. pipecat/transports/network/webrtc_connection.py +214 -8
  209. pipecat/transports/network/websocket_client.py +171 -1
  210. pipecat/transports/network/websocket_server.py +147 -9
  211. pipecat/transports/services/daily.py +792 -70
  212. pipecat/transports/services/helpers/daily_rest.py +122 -129
  213. pipecat/transports/services/livekit.py +339 -4
  214. pipecat/transports/services/tavus.py +273 -38
  215. pipecat/utils/asyncio/task_manager.py +92 -186
  216. pipecat/utils/base_object.py +83 -1
  217. pipecat/utils/network.py +2 -0
  218. pipecat/utils/string.py +114 -58
  219. pipecat/utils/text/base_text_aggregator.py +44 -13
  220. pipecat/utils/text/base_text_filter.py +46 -0
  221. pipecat/utils/text/markdown_text_filter.py +70 -14
  222. pipecat/utils/text/pattern_pair_aggregator.py +18 -14
  223. pipecat/utils/text/simple_text_aggregator.py +43 -2
  224. pipecat/utils/text/skip_tags_aggregator.py +21 -13
  225. pipecat/utils/time.py +36 -0
  226. pipecat/utils/tracing/class_decorators.py +32 -7
  227. pipecat/utils/tracing/conversation_context_provider.py +12 -2
  228. pipecat/utils/tracing/service_attributes.py +80 -64
  229. pipecat/utils/tracing/service_decorators.py +48 -21
  230. pipecat/utils/tracing/setup.py +13 -7
  231. pipecat/utils/tracing/turn_context_provider.py +12 -2
  232. pipecat/utils/tracing/turn_trace_observer.py +27 -0
  233. pipecat/utils/utils.py +14 -14
  234. dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
  235. pipecat/examples/daily_runner.py +0 -64
  236. pipecat/examples/run.py +0 -265
  237. pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
  238. pipecat/utils/asyncio/watchdog_event.py +0 -42
  239. pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
  240. pipecat/utils/asyncio/watchdog_queue.py +0 -48
  241. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
  242. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
  243. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
  244. /pipecat/{examples → extensions}/__init__.py +0 -0
@@ -0,0 +1,707 @@
1
+ #
2
+ # Copyright (c) 2024–2025, Daily
3
+ #
4
+ # SPDX-License-Identifier: BSD 2-Clause License
5
+ #
6
+
7
+ """Voicemail detection module for Pipecat.
8
+
9
+ This module provides voicemail detection capabilities using parallel pipeline
10
+ processing to classify incoming calls as either voicemail messages or live
11
+ conversations. It's specifically designed for outbound calling scenarios where
12
+ a bot needs to determine if a human answered or if the call went to voicemail.
13
+
14
+ Note:
15
+ The voicemail module is optimized for text LLMs only.
16
+ """
17
+
18
+ import asyncio
19
+ from typing import List, Optional
20
+
21
+ from loguru import logger
22
+
23
+ from pipecat.frames.frames import (
24
+ BotInterruptionFrame,
25
+ EndFrame,
26
+ Frame,
27
+ LLMFullResponseEndFrame,
28
+ LLMFullResponseStartFrame,
29
+ LLMTextFrame,
30
+ StopFrame,
31
+ SystemFrame,
32
+ TTSAudioRawFrame,
33
+ TTSStartedFrame,
34
+ TTSStoppedFrame,
35
+ TTSTextFrame,
36
+ UserStartedSpeakingFrame,
37
+ UserStoppedSpeakingFrame,
38
+ )
39
+ from pipecat.pipeline.parallel_pipeline import ParallelPipeline
40
+ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
41
+ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor, FrameProcessorSetup
42
+ from pipecat.services.llm_service import LLMService
43
+ from pipecat.sync.base_notifier import BaseNotifier
44
+ from pipecat.sync.event_notifier import EventNotifier
45
+
46
+
47
+ class NotifierGate(FrameProcessor):
48
+ """Base gate processor that controls frame flow based on notifier signals.
49
+
50
+ This base class provides common gate functionality for processors that need to
51
+ start open and close permanently when a notifier signals. Subclasses define
52
+ which frames are allowed through when the gate is closed.
53
+
54
+ The gate starts open to allow initial processing and closes permanently once
55
+ the notifier signals. This ensures controlled frame flow based on external
56
+ decisions or events.
57
+ """
58
+
59
+ def __init__(self, notifier: BaseNotifier, task_name: str = "gate"):
60
+ """Initialize the notifier gate.
61
+
62
+ Args:
63
+ notifier: Notifier that signals when the gate should close.
64
+ task_name: Name for the notification waiting task (for debugging).
65
+ """
66
+ super().__init__()
67
+ self._notifier = notifier
68
+ self._task_name = task_name
69
+ self._gate_opened = True
70
+ self._gate_task: Optional[asyncio.Task] = None
71
+
72
+ async def setup(self, setup: FrameProcessorSetup):
73
+ """Set up the processor with required components.
74
+
75
+ Args:
76
+ setup: Configuration object containing setup parameters.
77
+ """
78
+ await super().setup(setup)
79
+ self._gate_task = self.create_task(self._wait_for_notification())
80
+
81
+ async def cleanup(self):
82
+ """Clean up the processor resources."""
83
+ await super().cleanup()
84
+ if self._gate_task:
85
+ await self.cancel_task(self._gate_task)
86
+ self._gate_task = None
87
+
88
+ async def process_frame(self, frame: Frame, direction: FrameDirection):
89
+ """Process frames and control gate state based on notifier signals.
90
+
91
+ Args:
92
+ frame: The frame to process.
93
+ direction: The direction of frame flow in the pipeline.
94
+ """
95
+ await super().process_frame(frame, direction)
96
+
97
+ # Gate logic: open gate allows all frames, closed gate filters frames
98
+ if self._gate_opened:
99
+ await self.push_frame(frame, direction)
100
+ elif isinstance(
101
+ frame,
102
+ (SystemFrame, EndFrame, StopFrame),
103
+ ):
104
+ await self.push_frame(frame, direction)
105
+
106
+ async def _wait_for_notification(self):
107
+ """Wait for notifier signal and close the gate.
108
+
109
+ This method blocks until the notifier signals, then closes the gate
110
+ permanently to change frame filtering behavior.
111
+ """
112
+ await self._notifier.wait()
113
+
114
+ if self._gate_opened:
115
+ self._gate_opened = False
116
+
117
+
118
+ class ClassifierGate(NotifierGate):
119
+ """Gate processor that controls frame flow based on classification decisions.
120
+
121
+ Inherits from NotifierGate and starts open to allow initial classification
122
+ processing. Closes permanently once a classification decision is made
123
+ (CONVERSATION or VOICEMAIL). This ensures the classifier only runs until a
124
+ definitive decision is reached, preventing unnecessary LLM calls and maintaining
125
+ system efficiency.
126
+
127
+ When closed, only allows system frames and user speaking frames to continue.
128
+ Speaking frames are needed for voicemail timing control, but not for conversation.
129
+ """
130
+
131
+ def __init__(self, gate_notifier: BaseNotifier, conversation_notifier: BaseNotifier):
132
+ """Initialize the classifier gate.
133
+
134
+ Args:
135
+ gate_notifier: Notifier that signals when a classification decision has
136
+ been made and the gate should close.
137
+ conversation_notifier: Notifier that signals when conversation is detected.
138
+ """
139
+ super().__init__(gate_notifier, task_name="classifier_gate")
140
+ self._conversation_notifier = conversation_notifier
141
+ self._conversation_detected = False
142
+ self._conversation_task: Optional[asyncio.Task] = None
143
+
144
+ async def setup(self, setup: FrameProcessorSetup):
145
+ """Set up the processor with required components.
146
+
147
+ Args:
148
+ setup: Configuration object containing setup parameters.
149
+ """
150
+ await super().setup(setup)
151
+ self._conversation_task = self.create_task(self._wait_for_conversation())
152
+
153
+ async def cleanup(self):
154
+ """Clean up the processor resources."""
155
+ await super().cleanup()
156
+ if self._conversation_task:
157
+ await self.cancel_task(self._conversation_task)
158
+ self._conversation_task = None
159
+
160
+ async def process_frame(self, frame: Frame, direction: FrameDirection):
161
+ """Process frames and control gate state based on notifier signals.
162
+
163
+ Args:
164
+ frame: The frame to process.
165
+ direction: The direction of frame flow in the pipeline.
166
+ """
167
+ await FrameProcessor.process_frame(self, frame, direction)
168
+
169
+ # Gate logic: open gate allows all frames, closed gate filters frames
170
+ if self._gate_opened:
171
+ await self.push_frame(frame, direction)
172
+ elif isinstance(frame, (UserStartedSpeakingFrame, UserStoppedSpeakingFrame)):
173
+ # Only allow speaking frames if conversation was NOT detected (i.e., voicemail case)
174
+ # This prevents the UserContextAggregator from issuing a warning about no aggregation
175
+ # to push.
176
+ if not self._conversation_detected:
177
+ await self.push_frame(frame, direction)
178
+ elif isinstance(frame, (SystemFrame, EndFrame, StopFrame)):
179
+ # Always allow system frames through
180
+ # This includes the UserStartedSpeakingFrame and UserStoppedSpeakingFrame
181
+ # which are used to detect voicemail timing.
182
+ await self.push_frame(frame, direction)
183
+
184
+ async def _wait_for_conversation(self):
185
+ """Wait for conversation detection notification and mark conversation detected."""
186
+ await self._conversation_notifier.wait()
187
+ self._conversation_detected = True
188
+
189
+
190
+ class ConversationGate(NotifierGate):
191
+ """Gate processor that blocks conversation flow when voicemail is detected.
192
+
193
+ Inherits from NotifierGate and starts open to allow normal conversation
194
+ processing. Closes permanently when voicemail is detected to prevent the
195
+ main conversation LLM from processing additional input after voicemail
196
+ classification.
197
+
198
+ When closed, only allows system frames and user speaking frames to continue.
199
+ """
200
+
201
+ def __init__(self, voicemail_notifier: BaseNotifier):
202
+ """Initialize the conversation gate.
203
+
204
+ Args:
205
+ voicemail_notifier: Notifier that signals when voicemail has been
206
+ detected and the conversation should be blocked.
207
+ """
208
+ super().__init__(voicemail_notifier, task_name="conversation_gate")
209
+
210
+
211
+ class ClassificationProcessor(FrameProcessor):
212
+ """Processor that handles LLM classification responses and triggers events.
213
+
214
+ This processor aggregates LLM text tokens into complete responses and analyzes
215
+ them to determine if the call reached a voicemail system or a live person.
216
+ It uses the LLM response frame delimiters (LLMFullResponseStartFrame and
217
+ LLMFullResponseEndFrame) to ensure complete token aggregation regardless
218
+ of how the LLM tokenizes the response words.
219
+
220
+ The processor expects responses containing either "CONVERSATION" (indicating
221
+ a human answered) or "VOICEMAIL" (indicating an automated system). Once a
222
+ decision is made, it triggers the appropriate notifications and event handlers.
223
+
224
+ For voicemail detection, the event handler timer starts immediately and is cancelled
225
+ and restarted based on user speech patterns to ensure proper timing.
226
+ """
227
+
228
+ def __init__(
229
+ self,
230
+ *,
231
+ gate_notifier: BaseNotifier,
232
+ conversation_notifier: BaseNotifier,
233
+ voicemail_notifier: BaseNotifier,
234
+ voicemail_response_delay: float,
235
+ ):
236
+ """Initialize the voicemail processor.
237
+
238
+ Args:
239
+ gate_notifier: Notifier to signal the ClassifierGate about classification
240
+ decisions so it can close and stop processing.
241
+ conversation_notifier: Notifier to signal the TTSGate to release
242
+ all gated TTS frames for normal conversation flow.
243
+ voicemail_notifier: Notifier to signal the TTSGate to clear
244
+ gated TTS frames since voicemail was detected.
245
+ voicemail_response_delay: Delay in seconds after user stops speaking
246
+ before triggering the voicemail event handler. This ensures the voicemail
247
+ greeting or user message is complete before responding.
248
+ """
249
+ super().__init__()
250
+ self._gate_notifier = gate_notifier
251
+ self._conversation_notifier = conversation_notifier
252
+ self._voicemail_notifier = voicemail_notifier
253
+ self._voicemail_response_delay = voicemail_response_delay
254
+
255
+ # Register the voicemail detected event
256
+ self._register_event_handler("on_voicemail_detected")
257
+
258
+ # Aggregation state for collecting complete LLM responses
259
+ self._processing_response = False
260
+ self._response_buffer = ""
261
+ self._decision_made = False
262
+
263
+ # Voicemail timing state
264
+ self._voicemail_detected = False
265
+ self._voicemail_task: Optional[asyncio.Task] = None
266
+ self._voicemail_event = asyncio.Event()
267
+ self._voicemail_event.set()
268
+
269
+ async def setup(self, setup: FrameProcessorSetup):
270
+ """Set up the processor with required components.
271
+
272
+ Args:
273
+ setup: Configuration object containing setup parameters.
274
+ """
275
+ await super().setup(setup)
276
+ self._voicemail_task = self.create_task(self._delayed_voicemail_handler())
277
+
278
+ async def cleanup(self):
279
+ """Clean up the processor resources."""
280
+ await super().cleanup()
281
+ if self._voicemail_task:
282
+ await self.cancel_task(self._voicemail_task)
283
+ self._voicemail_task = None
284
+
285
+ async def process_frame(self, frame: Frame, direction: FrameDirection):
286
+ """Process frames and handle LLM classification responses.
287
+
288
+ This method implements a state machine for aggregating LLM responses:
289
+ 1. LLMFullResponseStartFrame: Begin collecting tokens
290
+ 2. LLMTextFrame: Accumulate text tokens into buffer
291
+ 3. LLMFullResponseEndFrame: Process complete response and make decision
292
+ 4. UserStartedSpeakingFrame/UserStoppedSpeakingFrame: Manage voicemail timing
293
+
294
+ Args:
295
+ frame: The frame to process.
296
+ direction: The direction of frame flow in the pipeline.
297
+ """
298
+ await super().process_frame(frame, direction)
299
+
300
+ if isinstance(frame, LLMFullResponseStartFrame):
301
+ # Begin aggregating a new LLM response
302
+ self._processing_response = True
303
+ self._response_buffer = ""
304
+
305
+ elif isinstance(frame, LLMFullResponseEndFrame):
306
+ # Complete response received - make classification decision
307
+ if self._processing_response and not self._decision_made:
308
+ await self._process_classification(self._response_buffer.strip())
309
+ self._processing_response = False
310
+ self._response_buffer = ""
311
+
312
+ elif isinstance(frame, LLMTextFrame) and self._processing_response:
313
+ # Accumulate text tokens from the streaming LLM response
314
+ self._response_buffer += frame.text
315
+
316
+ elif isinstance(frame, UserStartedSpeakingFrame):
317
+ # User started speaking - set the voicemail event
318
+ if self._voicemail_detected:
319
+ self._voicemail_event.set()
320
+
321
+ elif isinstance(frame, UserStoppedSpeakingFrame):
322
+ # User stopped speaking - clear the voicemail event
323
+ if self._voicemail_detected:
324
+ self._voicemail_event.clear()
325
+
326
+ else:
327
+ # Pass all non-LLM frames through
328
+ # Blocking LLM frames prevents interference with the downstream LLM
329
+ await self.push_frame(frame, direction)
330
+
331
+ async def _process_classification(self, full_response: str):
332
+ """Process the complete LLM classification response and trigger actions.
333
+
334
+ Analyzes the aggregated response text to determine if it contains
335
+ "CONVERSATION" or "VOICEMAIL" and triggers the appropriate notifications
336
+ and callbacks based on the classification result.
337
+
338
+ Args:
339
+ full_response: The complete aggregated response text from the LLM.
340
+ """
341
+ if self._decision_made:
342
+ return
343
+
344
+ response = full_response.upper()
345
+ logger.debug(f"{self}: Classifying response: '{full_response}'")
346
+
347
+ if "CONVERSATION" in response:
348
+ # Human answered - continue normal conversation flow
349
+ self._decision_made = True
350
+ logger.info(f"{self}: CONVERSATION detected")
351
+ await self._gate_notifier.notify() # Close the classifier gate
352
+ await self._conversation_notifier.notify() # Release buffered TTS frames
353
+
354
+ elif "VOICEMAIL" in response:
355
+ # Voicemail detected - trigger voicemail handling
356
+ self._decision_made = True
357
+ self._voicemail_detected = True
358
+ logger.info(f"{self}: VOICEMAIL detected")
359
+ await self._gate_notifier.notify() # Close the classifier gate
360
+ await self._voicemail_notifier.notify() # Clear buffered TTS frames
361
+
362
+ # Interrupt the current pipeline to stop any ongoing processing
363
+ await self.push_frame(BotInterruptionFrame(), FrameDirection.UPSTREAM)
364
+
365
+ # Set the voicemail event to trigger the voicemail handler
366
+ self._voicemail_event.clear()
367
+
368
+ else:
369
+ # This can happen if the LLM is interrupted before completing the response
370
+ logger.debug(f"{self}: No classification found: '{full_response}'")
371
+
372
+ async def _delayed_voicemail_handler(self):
373
+ """Execute the voicemail event handler after the configured delay.
374
+
375
+ This method waits for the specified delay period, then triggers the
376
+ developer's voicemail event handler. The timer can be cancelled and restarted
377
+ based on user speech patterns to ensure proper timing.
378
+ """
379
+ while True:
380
+ try:
381
+ await asyncio.wait_for(
382
+ self._voicemail_event.wait(), timeout=self._voicemail_response_delay
383
+ )
384
+ await asyncio.sleep(0.1)
385
+ except asyncio.TimeoutError:
386
+ await self._call_event_handler("on_voicemail_detected")
387
+ break
388
+
389
+
390
+ class TTSGate(FrameProcessor):
391
+ """Gates TTS frames until voicemail classification decision is made.
392
+
393
+ This processor holds TTS output frames in a gate while the voicemail
394
+ classification is in progress. This prevents audio from being played
395
+ to the caller before determining if they're human or a voicemail system.
396
+
397
+ The gate operates in two modes based on the classification result:
398
+
399
+ - CONVERSATION: Opens the gate to release all held frames for normal dialogue
400
+ - VOICEMAIL: Clears held frames since they're not needed for voicemail
401
+
402
+ The gating only applies to TTS-related frames (TTSTextFrame, TTSAudioRawFrame).
403
+ All other frames pass through immediately to maintain proper pipeline flow.
404
+ """
405
+
406
+ def __init__(self, conversation_notifier: BaseNotifier, voicemail_notifier: BaseNotifier):
407
+ """Initialize the TTS gate.
408
+
409
+ Args:
410
+ conversation_notifier: Notifier that signals when a conversation is
411
+ detected and gated frames should be released for playback.
412
+ voicemail_notifier: Notifier that signals when voicemail is detected
413
+ and gated frames should be cleared (not played).
414
+ """
415
+ super().__init__()
416
+ self._conversation_notifier = conversation_notifier
417
+ self._voicemail_notifier = voicemail_notifier
418
+ self._frame_buffer: List[tuple[Frame, FrameDirection]] = []
419
+ self._gating_active = True
420
+ self._conversation_task: Optional[asyncio.Task] = None
421
+ self._voicemail_task: Optional[asyncio.Task] = None
422
+
423
+ async def setup(self, setup: FrameProcessorSetup):
424
+ """Set up the processor with required components.
425
+
426
+ Args:
427
+ setup: Configuration object containing setup parameters.
428
+ """
429
+ await super().setup(setup)
430
+
431
+ self._conversation_task = self.create_task(self._wait_for_conversation())
432
+ self._voicemail_task = self.create_task(self._wait_for_voicemail())
433
+
434
+ async def cleanup(self):
435
+ """Clean up the processor resources."""
436
+ await super().cleanup()
437
+ if self._conversation_task:
438
+ await self.cancel_task(self._conversation_task)
439
+ self._conversation_task = None
440
+ if self._voicemail_task:
441
+ await self.cancel_task(self._voicemail_task)
442
+ self._voicemail_task = None
443
+
444
+ async def process_frame(self, frame: Frame, direction: FrameDirection):
445
+ """Process frames and handle gating logic based on frame type.
446
+
447
+ TTS frames are gated while classification is active. All other frames
448
+ pass through immediately. The gating state is controlled by the
449
+ classification notifications.
450
+
451
+ Args:
452
+ frame: The frame to process.
453
+ direction: The direction of frame flow in the pipeline.
454
+ """
455
+ await super().process_frame(frame, direction)
456
+
457
+ # Core gating logic: hold TTS frames, pass everything else through
458
+ if self._gating_active and isinstance(
459
+ frame, (TTSStartedFrame, TTSStoppedFrame, TTSTextFrame, TTSAudioRawFrame)
460
+ ):
461
+ # Gate TTS frames while waiting for classification decision
462
+ self._frame_buffer.append((frame, direction))
463
+ else:
464
+ # Pass through all non-TTS frames immediately
465
+ await self.push_frame(frame, direction)
466
+
467
+ async def _wait_for_conversation(self):
468
+ """Wait for conversation detection notification and release gated frames.
469
+
470
+ When a conversation is detected, all gated TTS frames are released
471
+ in order to continue normal dialogue flow. This allows the bot to
472
+ respond naturally to the human caller.
473
+ """
474
+ await self._conversation_notifier.wait()
475
+
476
+ # Release all gated frames in original order
477
+ self._gating_active = False
478
+ for frame, direction in self._frame_buffer:
479
+ await self.push_frame(frame, direction)
480
+ self._frame_buffer.clear()
481
+
482
+ async def _wait_for_voicemail(self):
483
+ """Wait for voicemail detection notification and clear gated frames.
484
+
485
+ When voicemail is detected, all gated TTS frames are discarded
486
+ since they were intended for human conversation and are not appropriate
487
+ for voicemail systems. The developer event handlers will handle voicemail-
488
+ specific audio output.
489
+ """
490
+ await self._voicemail_notifier.wait()
491
+
492
+ # Clear gated frames without playing them
493
+ self._gating_active = False
494
+ self._frame_buffer.clear()
495
+
496
+
497
+ class VoicemailDetector(ParallelPipeline):
498
+ """Parallel pipeline for detecting voicemail vs. live conversation in outbound calls.
499
+
500
+ This detector uses a parallel pipeline architecture to perform real-time
501
+ classification of outbound phone calls without interrupting the conversation
502
+ flow. It determines whether a human answered the phone or if the call went
503
+ to a voicemail system.
504
+
505
+ Architecture:
506
+
507
+ - Conversation branch: Empty pipeline that allows normal frame flow
508
+ - Classification branch: Contains the LLM classifier and decision logic
509
+
510
+ The system uses a gate mechanism to control when classification runs and
511
+ a gating system to prevent TTS output until classification is complete.
512
+ Once a decision is made, the appropriate action is taken:
513
+
514
+ - CONVERSATION: Continue normal bot dialogue
515
+ - VOICEMAIL: Trigger developer event handler for custom voicemail handling
516
+
517
+ Example::
518
+
519
+ classification_llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
520
+ detector = VoicemailDetector(llm=classification_llm)
521
+
522
+ @detector.event_handler("on_voicemail_detected")
523
+ async def handle_voicemail(processor):
524
+ await processor.push_frame(TTSSpeakFrame("Please leave a message."))
525
+
526
+ pipeline = Pipeline([
527
+ transport.input(),
528
+ stt,
529
+ detector.detector(), # Classification
530
+ context_aggregator.user(),
531
+ llm,
532
+ tts,
533
+ detector.gate(), # TTS gating
534
+ transport.output(),
535
+ context_aggregator.assistant(),
536
+ ])
537
+
538
+ # For custom prompts, append the required response instruction:
539
+ custom_prompt = "Your custom classification logic here. " + VoicemailDetector.CLASSIFIER_RESPONSE_INSTRUCTION
540
+
541
+ Events:
542
+ on_voicemail_detected: Triggered when voicemail is detected after the configured
543
+ delay. The event handler receives one argument: the ClassificationProcessor
544
+ instance which can be used to push frames.
545
+
546
+ Constants:
547
+ CLASSIFIER_RESPONSE_INSTRUCTION: The exact text that must be included in custom
548
+ system prompts to ensure proper classification functionality.
549
+ """
550
+
551
+ CLASSIFIER_RESPONSE_INSTRUCTION = 'Respond with ONLY "CONVERSATION" if a person answered, or "VOICEMAIL" if it\'s voicemail/recording.'
552
+
553
+ DEFAULT_SYSTEM_PROMPT = (
554
+ """You are a voicemail detection classifier for an OUTBOUND calling system. A bot has called a phone number and you need to determine if a human answered or if the call went to voicemail based on the provided text.
555
+
556
+ HUMAN ANSWERED - LIVE CONVERSATION (respond "CONVERSATION"):
557
+ - Personal greetings: "Hello?", "Hi", "Yeah?", "John speaking"
558
+ - Interactive responses: "Who is this?", "What do you want?", "Can I help you?"
559
+ - Conversational tone expecting back-and-forth dialogue
560
+ - Questions directed at the caller: "Hello? Anyone there?"
561
+ - Informal responses: "Yep", "What's up?", "Speaking"
562
+ - Natural, spontaneous speech patterns
563
+ - Immediate acknowledgment of the call
564
+
565
+ VOICEMAIL SYSTEM (respond "VOICEMAIL"):
566
+ - Automated voicemail greetings: "Hi, you've reached [name], please leave a message"
567
+ - Phone carrier messages: "The number you have dialed is not in service", "Please leave a message", "All circuits are busy"
568
+ - Professional voicemail: "This is [name], I'm not available right now"
569
+ - Instructions about leaving messages: "leave a message", "leave your name and number"
570
+ - References to callback or messaging: "call me back", "I'll get back to you"
571
+ - Carrier system messages: "mailbox is full", "has not been set up"
572
+ - Business hours messages: "our office is currently closed"
573
+
574
+ """
575
+ + CLASSIFIER_RESPONSE_INSTRUCTION
576
+ )
577
+
578
+ def __init__(
579
+ self,
580
+ *,
581
+ llm: LLMService,
582
+ voicemail_response_delay: float = 2.0,
583
+ custom_system_prompt: Optional[str] = None,
584
+ ):
585
+ """Initialize the voicemail detector with classification and buffering components.
586
+
587
+ Args:
588
+ llm: LLM service used for voicemail vs conversation classification.
589
+ Should be fast and reliable for real-time classification.
590
+ voicemail_response_delay: Delay in seconds after user stops speaking
591
+ before triggering the voicemail event handler. This allows voicemail
592
+ responses to be played back after a short delay to ensure the response
593
+ occurs during the voicemail recording. Default is 2.0 seconds.
594
+ custom_system_prompt: Optional custom system prompt for classification. If None,
595
+ uses the default prompt optimized for outbound calling scenarios.
596
+ Custom prompts should instruct the LLM to respond with exactly
597
+ "CONVERSATION" or "VOICEMAIL" for proper detection functionality.
598
+ """
599
+ self._classifier_llm = llm
600
+ self._prompt = (
601
+ custom_system_prompt if custom_system_prompt is not None else self.DEFAULT_SYSTEM_PROMPT
602
+ )
603
+ self._voicemail_response_delay = voicemail_response_delay
604
+
605
+ # Validate custom prompts to ensure they work with the detection logic
606
+ if custom_system_prompt is not None:
607
+ self._validate_prompt(custom_system_prompt)
608
+
609
+ # Set up the LLM context with the classification prompt
610
+ self._messages = [
611
+ {
612
+ "role": "system",
613
+ "content": self._prompt,
614
+ },
615
+ ]
616
+
617
+ # Create the LLM context and aggregators for conversation management
618
+ self._context = OpenAILLMContext(self._messages)
619
+ self._context_aggregator = llm.create_context_aggregator(self._context)
620
+
621
+ # Create notification system for coordinating between components
622
+ self._gate_notifier = EventNotifier() # Signals classification completion
623
+ self._conversation_notifier = EventNotifier() # Signals conversation detected
624
+ self._voicemail_notifier = EventNotifier() # Signals voicemail detected
625
+
626
+ # Create the processor components
627
+ self._classifier_gate = ClassifierGate(self._gate_notifier, self._conversation_notifier)
628
+ self._conversation_gate = ConversationGate(self._voicemail_notifier)
629
+ self._classification_processor = ClassificationProcessor(
630
+ gate_notifier=self._gate_notifier,
631
+ conversation_notifier=self._conversation_notifier,
632
+ voicemail_notifier=self._voicemail_notifier,
633
+ voicemail_response_delay=voicemail_response_delay,
634
+ )
635
+ self._voicemail_gate = TTSGate(self._conversation_notifier, self._voicemail_notifier)
636
+
637
+ # Initialize the parallel pipeline with conversation and classifier branches
638
+ super().__init__(
639
+ # Conversation branch: gate to blocks after voicemail detection
640
+ [self._conversation_gate],
641
+ # Classification branch: gate -> context -> LLM -> processor -> context
642
+ [
643
+ self._classifier_gate,
644
+ self._context_aggregator.user(),
645
+ self._classifier_llm,
646
+ self._classification_processor,
647
+ self._context_aggregator.assistant(),
648
+ ],
649
+ )
650
+
651
+ # Register the voicemail detected event after super().__init__()
652
+ self._register_event_handler("on_voicemail_detected")
653
+
654
+ def _validate_prompt(self, prompt: str) -> None:
655
+ """Validate custom prompt contains required response format instructions.
656
+
657
+ Custom prompts must instruct the LLM to respond with exactly "CONVERSATION"
658
+ or "VOICEMAIL" for the detection logic to work properly. This method
659
+ checks for the presence of these keywords and warns if they're missing.
660
+
661
+ Args:
662
+ prompt: The custom system prompt to validate.
663
+ """
664
+ has_conversation = "CONVERSATION" in prompt
665
+ has_voicemail = "VOICEMAIL" in prompt
666
+
667
+ if not has_conversation or not has_voicemail:
668
+ logger.warning(
669
+ "Custom system prompt should instruct the LLM to respond with exactly "
670
+ '"CONVERSATION" or "VOICEMAIL" for proper detection functionality. '
671
+ f"Consider appending VoicemailDetector.CLASSIFIER_RESPONSE_INSTRUCTION to your prompt: "
672
+ f'"{self.CLASSIFIER_RESPONSE_INSTRUCTION}"'
673
+ )
674
+
675
+ def detector(self) -> "VoicemailDetector":
676
+ """Get the detector pipeline for placement after STT in the main pipeline.
677
+
678
+ This should be placed after the STT service and before the context
679
+ aggregator in your main pipeline to enable voicemail classification.
680
+
681
+ Returns:
682
+ The VoicemailDetector instance itself (which is a ParallelPipeline).
683
+ """
684
+ return self
685
+
686
+ def gate(self) -> TTSGate:
687
+ """Get the gate processor for placement after TTS in the main pipeline.
688
+
689
+ This should be placed after the TTS service and before the transport
690
+ output to enable TTS frame gating during classification.
691
+
692
+ Returns:
693
+ The TTSGate processor instance.
694
+ """
695
+ return self._voicemail_gate
696
+
697
+ def add_event_handler(self, event_name: str, handler):
698
+ """Add an event handler for voicemail detection events.
699
+
700
+ Args:
701
+ event_name: The name of the event to handle.
702
+ handler: The function to call when the event occurs.
703
+ """
704
+ if event_name == "on_voicemail_detected":
705
+ self._classification_processor.add_event_handler(event_name, handler)
706
+ else:
707
+ super().add_event_handler(event_name, handler)