dv-pipecat-ai 0.0.82.dev815__py3-none-any.whl → 0.0.82.dev857__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (106) hide show
  1. {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/METADATA +8 -3
  2. {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/RECORD +106 -79
  3. pipecat/adapters/base_llm_adapter.py +44 -6
  4. pipecat/adapters/services/anthropic_adapter.py +302 -2
  5. pipecat/adapters/services/aws_nova_sonic_adapter.py +40 -2
  6. pipecat/adapters/services/bedrock_adapter.py +40 -2
  7. pipecat/adapters/services/gemini_adapter.py +276 -6
  8. pipecat/adapters/services/open_ai_adapter.py +88 -7
  9. pipecat/adapters/services/open_ai_realtime_adapter.py +39 -1
  10. pipecat/audio/dtmf/__init__.py +0 -0
  11. pipecat/audio/dtmf/types.py +47 -0
  12. pipecat/audio/dtmf/utils.py +70 -0
  13. pipecat/audio/filters/aic_filter.py +199 -0
  14. pipecat/audio/utils.py +9 -7
  15. pipecat/extensions/ivr/__init__.py +0 -0
  16. pipecat/extensions/ivr/ivr_navigator.py +452 -0
  17. pipecat/frames/frames.py +156 -43
  18. pipecat/pipeline/llm_switcher.py +76 -0
  19. pipecat/pipeline/parallel_pipeline.py +3 -3
  20. pipecat/pipeline/service_switcher.py +144 -0
  21. pipecat/pipeline/task.py +68 -28
  22. pipecat/pipeline/task_observer.py +10 -0
  23. pipecat/processors/aggregators/dtmf_aggregator.py +2 -2
  24. pipecat/processors/aggregators/llm_context.py +277 -0
  25. pipecat/processors/aggregators/llm_response.py +48 -15
  26. pipecat/processors/aggregators/llm_response_universal.py +840 -0
  27. pipecat/processors/aggregators/openai_llm_context.py +3 -3
  28. pipecat/processors/dtmf_aggregator.py +0 -2
  29. pipecat/processors/filters/stt_mute_filter.py +0 -2
  30. pipecat/processors/frame_processor.py +18 -11
  31. pipecat/processors/frameworks/rtvi.py +17 -10
  32. pipecat/processors/metrics/sentry.py +2 -0
  33. pipecat/runner/daily.py +137 -36
  34. pipecat/runner/run.py +1 -1
  35. pipecat/runner/utils.py +7 -7
  36. pipecat/serializers/asterisk.py +20 -4
  37. pipecat/serializers/exotel.py +1 -1
  38. pipecat/serializers/plivo.py +1 -1
  39. pipecat/serializers/telnyx.py +1 -1
  40. pipecat/serializers/twilio.py +1 -1
  41. pipecat/services/__init__.py +2 -2
  42. pipecat/services/anthropic/llm.py +113 -28
  43. pipecat/services/asyncai/tts.py +4 -0
  44. pipecat/services/aws/llm.py +82 -8
  45. pipecat/services/aws/tts.py +0 -10
  46. pipecat/services/aws_nova_sonic/aws.py +5 -0
  47. pipecat/services/cartesia/tts.py +28 -16
  48. pipecat/services/cerebras/llm.py +15 -10
  49. pipecat/services/deepgram/stt.py +8 -0
  50. pipecat/services/deepseek/llm.py +13 -8
  51. pipecat/services/fireworks/llm.py +13 -8
  52. pipecat/services/fish/tts.py +8 -6
  53. pipecat/services/gemini_multimodal_live/gemini.py +5 -0
  54. pipecat/services/gladia/config.py +7 -1
  55. pipecat/services/gladia/stt.py +23 -15
  56. pipecat/services/google/llm.py +159 -59
  57. pipecat/services/google/llm_openai.py +18 -3
  58. pipecat/services/grok/llm.py +2 -1
  59. pipecat/services/llm_service.py +38 -3
  60. pipecat/services/mem0/memory.py +2 -1
  61. pipecat/services/mistral/llm.py +5 -6
  62. pipecat/services/nim/llm.py +2 -1
  63. pipecat/services/openai/base_llm.py +88 -26
  64. pipecat/services/openai/image.py +6 -1
  65. pipecat/services/openai_realtime_beta/openai.py +5 -2
  66. pipecat/services/openpipe/llm.py +6 -8
  67. pipecat/services/perplexity/llm.py +13 -8
  68. pipecat/services/playht/tts.py +9 -6
  69. pipecat/services/rime/tts.py +1 -1
  70. pipecat/services/sambanova/llm.py +18 -13
  71. pipecat/services/sarvam/tts.py +415 -10
  72. pipecat/services/speechmatics/stt.py +2 -2
  73. pipecat/services/tavus/video.py +1 -1
  74. pipecat/services/tts_service.py +15 -5
  75. pipecat/services/vistaar/llm.py +2 -5
  76. pipecat/transports/base_input.py +32 -19
  77. pipecat/transports/base_output.py +39 -5
  78. pipecat/transports/daily/__init__.py +0 -0
  79. pipecat/transports/daily/transport.py +2371 -0
  80. pipecat/transports/daily/utils.py +410 -0
  81. pipecat/transports/livekit/__init__.py +0 -0
  82. pipecat/transports/livekit/transport.py +1042 -0
  83. pipecat/transports/network/fastapi_websocket.py +12 -546
  84. pipecat/transports/network/small_webrtc.py +12 -922
  85. pipecat/transports/network/webrtc_connection.py +9 -595
  86. pipecat/transports/network/websocket_client.py +12 -481
  87. pipecat/transports/network/websocket_server.py +12 -487
  88. pipecat/transports/services/daily.py +9 -2334
  89. pipecat/transports/services/helpers/daily_rest.py +12 -396
  90. pipecat/transports/services/livekit.py +12 -975
  91. pipecat/transports/services/tavus.py +12 -757
  92. pipecat/transports/smallwebrtc/__init__.py +0 -0
  93. pipecat/transports/smallwebrtc/connection.py +612 -0
  94. pipecat/transports/smallwebrtc/transport.py +936 -0
  95. pipecat/transports/tavus/__init__.py +0 -0
  96. pipecat/transports/tavus/transport.py +770 -0
  97. pipecat/transports/websocket/__init__.py +0 -0
  98. pipecat/transports/websocket/client.py +494 -0
  99. pipecat/transports/websocket/fastapi.py +559 -0
  100. pipecat/transports/websocket/server.py +500 -0
  101. pipecat/transports/whatsapp/__init__.py +0 -0
  102. pipecat/transports/whatsapp/api.py +345 -0
  103. pipecat/transports/whatsapp/client.py +364 -0
  104. {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/WHEEL +0 -0
  105. {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/licenses/LICENSE +0 -0
  106. {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,199 @@
1
+ #
2
+ # Copyright (c) 2024–2025, Daily
3
+ #
4
+ # SPDX-License-Identifier: BSD 2-Clause License
5
+ #
6
+
7
+ """ai-coustics AIC SDK audio filter for Pipecat.
8
+
9
+ This module provides an audio filter implementation using ai-coustics' AIC SDK to
10
+ enhance audio streams in real time. It mirrors the structure of other filters like
11
+ the Koala filter and integrates with Pipecat's input transport pipeline.
12
+ """
13
+
14
+ from typing import List, Optional
15
+
16
+ import numpy as np
17
+ from loguru import logger
18
+
19
+ from pipecat.audio.filters.base_audio_filter import BaseAudioFilter
20
+ from pipecat.frames.frames import FilterControlFrame, FilterEnableFrame
21
+
22
+ try:
23
+ # AIC SDK (https://ai-coustics.github.io/aic-sdk-py/api/)
24
+ from aic import AICModelType, AICParameter, Model
25
+ except ModuleNotFoundError as e:
26
+ logger.error(f"Exception: {e}")
27
+ logger.error("In order to use the AIC filter, you need to `pip install pipecat-ai[aic]`.")
28
+ raise Exception(f"Missing module: {e}")
29
+
30
+
31
+ class AICFilter(BaseAudioFilter):
32
+ """Audio filter using ai-coustics' AIC SDK for real-time enhancement.
33
+
34
+ Buffers incoming audio to the model's preferred block size and processes
35
+ planar frames in-place using float32 samples in the linear -1..+1 range.
36
+ """
37
+
38
+ def __init__(
39
+ self,
40
+ *,
41
+ license_key: str = "",
42
+ model_type: AICModelType = AICModelType.QUAIL_L,
43
+ enhancement_level: Optional[float] = 1.0,
44
+ voice_gain: Optional[float] = 1.0,
45
+ noise_gate_enable: Optional[bool] = True,
46
+ ) -> None:
47
+ """Initialize the AIC filter.
48
+
49
+ Args:
50
+ license_key: ai-coustics license key for authentication.
51
+ model_type: Model variant to load.
52
+ enhancement_level: Optional overall enhancement strength (0.0..1.0).
53
+ voice_gain: Optional linear gain applied to detected speech (0.0..4.0).
54
+ noise_gate_enable: Optional enable/disable noise gate (default: True).
55
+ """
56
+ self._license_key = license_key
57
+ self._model_type = model_type
58
+
59
+ self._enhancement_level = enhancement_level
60
+ self._voice_gain = voice_gain
61
+ self._noise_gate_enable = noise_gate_enable
62
+
63
+ self._enabled = True
64
+ self._sample_rate = 0
65
+ self._aic_ready = False
66
+ self._frames_per_block = 0
67
+ self._audio_buffer = bytearray()
68
+ # Model will be created in start() since the API now requires sample_rate
69
+ self._aic = None
70
+
71
+ async def start(self, sample_rate: int):
72
+ """Initialize the filter with the transport's sample rate.
73
+
74
+ Args:
75
+ sample_rate: The sample rate of the input transport in Hz.
76
+
77
+ Returns:
78
+ None
79
+ """
80
+ self._sample_rate = sample_rate
81
+
82
+ try:
83
+ # Create model with required runtime parameters
84
+ self._aic = Model(
85
+ model_type=self._model_type,
86
+ license_key=self._license_key or None,
87
+ sample_rate=self._sample_rate,
88
+ channels=1,
89
+ )
90
+ self._frames_per_block = self._aic.optimal_num_frames()
91
+
92
+ # Optional parameter configuration
93
+ if self._enhancement_level is not None:
94
+ self._aic.set_parameter(
95
+ AICParameter.ENHANCEMENT_LEVEL,
96
+ float(self._enhancement_level if self._enabled else 0.0),
97
+ )
98
+ if self._voice_gain is not None:
99
+ self._aic.set_parameter(AICParameter.VOICE_GAIN, float(self._voice_gain))
100
+ if self._noise_gate_enable is not None:
101
+ self._aic.set_parameter(
102
+ AICParameter.NOISE_GATE_ENABLE, 1.0 if bool(self._noise_gate_enable) else 0.0
103
+ )
104
+
105
+ self._aic_ready = True
106
+
107
+ # Log processor information
108
+ logger.debug(f"ai-coustics filter started:")
109
+ logger.debug(f" Sample rate: {self._sample_rate} Hz")
110
+ logger.debug(f" Frames per chunk: {self._frames_per_block}")
111
+ logger.debug(f" Enhancement strength: {int(self._enhancement_level * 100)}%")
112
+ logger.debug(f" Optimal input buffer size: {self._aic.optimal_num_frames()} samples")
113
+ logger.debug(f" Optimal sample rate: {self._aic.optimal_sample_rate()} Hz")
114
+ logger.debug(
115
+ f" Current algorithmic latency: {self._aic.processing_latency() / self._sample_rate * 1000:.2f}ms"
116
+ )
117
+ except Exception as e: # noqa: BLE001 - surfacing SDK initialization errors
118
+ logger.error(f"AIC model initialization failed: {e}")
119
+ self._aic_ready = False
120
+
121
+ async def stop(self):
122
+ """Clean up the AIC model when stopping.
123
+
124
+ Returns:
125
+ None
126
+ """
127
+ try:
128
+ if self._aic is not None:
129
+ self._aic.close()
130
+ finally:
131
+ self._aic = None
132
+ self._aic_ready = False
133
+ self._audio_buffer.clear()
134
+
135
+ async def process_frame(self, frame: FilterControlFrame):
136
+ """Process control frames to enable/disable filtering.
137
+
138
+ Args:
139
+ frame: The control frame containing filter commands.
140
+
141
+ Returns:
142
+ None
143
+ """
144
+ if isinstance(frame, FilterEnableFrame):
145
+ self._enabled = frame.enable
146
+ if self._aic is not None:
147
+ try:
148
+ level = float(self._enhancement_level if self._enabled else 0.0)
149
+ self._aic.set_parameter(AICParameter.ENHANCEMENT_LEVEL, level)
150
+ except Exception as e: # noqa: BLE001
151
+ logger.error(f"AIC set_parameter failed: {e}")
152
+
153
+ async def filter(self, audio: bytes) -> bytes:
154
+ """Apply AIC enhancement to audio data.
155
+
156
+ Buffers incoming audio and processes it in chunks that match the AIC
157
+ model's required block length. Returns enhanced audio data.
158
+
159
+ Args:
160
+ audio: Raw audio data as bytes to be filtered (int16 PCM, planar).
161
+
162
+ Returns:
163
+ Enhanced audio data as bytes (int16 PCM, planar).
164
+ """
165
+ if not self._aic_ready or self._aic is None:
166
+ return audio
167
+
168
+ self._audio_buffer.extend(audio)
169
+
170
+ filtered_chunks: List[bytes] = []
171
+
172
+ # Number of int16 samples currently buffered
173
+ available_frames = len(self._audio_buffer) // 2
174
+
175
+ while available_frames >= self._frames_per_block:
176
+ # Consume exactly one block worth of frames
177
+ samples_to_consume = self._frames_per_block * 1
178
+ bytes_to_consume = samples_to_consume * 2
179
+ block_bytes = bytes(self._audio_buffer[:bytes_to_consume])
180
+
181
+ # Convert to float32 in -1..+1 range and reshape to planar (channels, frames)
182
+ block_i16 = np.frombuffer(block_bytes, dtype=np.int16)
183
+ block_f32 = (block_i16.astype(np.float32) / 32768.0).reshape(
184
+ (1, self._frames_per_block)
185
+ )
186
+
187
+ # Process planar in-place; returns ndarray (same shape)
188
+ out_f32 = self._aic.process(block_f32)
189
+
190
+ # Convert back to int16 bytes, planar layout
191
+ out_i16 = np.clip(out_f32 * 32768.0, -32768, 32767).astype(np.int16)
192
+ filtered_chunks.append(out_i16.reshape(-1).tobytes())
193
+
194
+ # Slide buffer
195
+ self._audio_buffer = self._audio_buffer[bytes_to_consume:]
196
+ available_frames = len(self._audio_buffer) // 2
197
+
198
+ # Do not flush incomplete frames; keep them buffered for the next call
199
+ return b"".join(filtered_chunks)
pipecat/audio/utils.py CHANGED
@@ -41,13 +41,15 @@ def create_default_resampler(**kwargs) -> BaseAudioResampler:
41
41
  """
42
42
  import warnings
43
43
 
44
- warnings.warn(
45
- "`create_default_resampler` is deprecated. "
46
- "Use `create_stream_resampler` for real-time processing scenarios or "
47
- "`create_file_resampler` for batch processing of complete audio files.",
48
- DeprecationWarning,
49
- stacklevel=2,
50
- )
44
+ with warnings.catch_warnings():
45
+ warnings.simplefilter("always")
46
+ warnings.warn(
47
+ "`create_default_resampler` is deprecated. "
48
+ "Use `create_stream_resampler` for real-time processing scenarios or "
49
+ "`create_file_resampler` for batch processing of complete audio files.",
50
+ DeprecationWarning,
51
+ stacklevel=2,
52
+ )
51
53
  return SOXRAudioResampler(**kwargs)
52
54
 
53
55
 
File without changes
@@ -0,0 +1,452 @@
1
+ #
2
+ # Copyright (c) 2024–2025, Daily
3
+ #
4
+ # SPDX-License-Identifier: BSD 2-Clause License
5
+ #
6
+
7
+ """Interactive Voice Response (IVR) navigation components.
8
+
9
+ This module provides classes for automated navigation of IVR phone systems
10
+ using LLM-based decision making and DTMF tone generation.
11
+ """
12
+
13
+ from enum import Enum
14
+ from typing import List, Optional
15
+
16
+ from loguru import logger
17
+
18
+ from pipecat.audio.dtmf.types import KeypadEntry
19
+ from pipecat.audio.vad.vad_analyzer import VADParams
20
+ from pipecat.frames.frames import (
21
+ Frame,
22
+ LLMContextFrame,
23
+ LLMMessagesUpdateFrame,
24
+ LLMTextFrame,
25
+ OutputDTMFUrgentFrame,
26
+ StartFrame,
27
+ TextFrame,
28
+ VADParamsUpdateFrame,
29
+ )
30
+ from pipecat.pipeline.pipeline import Pipeline
31
+ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContextFrame
32
+ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
33
+ from pipecat.services.llm_service import LLMService
34
+ from pipecat.utils.text.pattern_pair_aggregator import PatternMatch, PatternPairAggregator
35
+
36
+
37
+ class IVRStatus(Enum):
38
+ """Enumeration of IVR navigation status values.
39
+
40
+ These statuses are used to communicate the current state of IVR navigation
41
+ between the LLM and the IVR processing system.
42
+ """
43
+
44
+ DETECTED = "detected"
45
+ COMPLETED = "completed"
46
+ STUCK = "stuck"
47
+ WAIT = "wait"
48
+
49
+
50
+ class IVRProcessor(FrameProcessor):
51
+ """Processes LLM responses for IVR navigation commands.
52
+
53
+ Aggregates XML-tagged commands from LLM text streams and executes
54
+ corresponding actions like DTMF tone generation and mode switching.
55
+
56
+ Supported features:
57
+
58
+ - DTMF command processing (`<dtmf>1</dtmf>`)
59
+ - IVR state management (see IVRStatus enum: `<ivr>detected</ivr>`, `<ivr>completed</ivr>`, `<ivr>stuck</ivr>`, `<ivr>wait</ivr>`)
60
+ - Automatic prompt and VAD parameter switching
61
+ - Event emission via on_ivr_status_changed for detected, completed, and stuck states
62
+ """
63
+
64
+ def __init__(
65
+ self,
66
+ *,
67
+ classifier_prompt: str,
68
+ ivr_prompt: str,
69
+ ivr_vad_params: Optional[VADParams] = None,
70
+ ):
71
+ """Initialize the IVR processor.
72
+
73
+ Args:
74
+ classifier_prompt: System prompt for classifying IVR or conversation.
75
+ ivr_prompt: System prompt for IVR navigation mode.
76
+ ivr_vad_params: VAD parameters for IVR navigation mode. If None, defaults to VADParams(stop_secs=2.0).
77
+ """
78
+ super().__init__()
79
+
80
+ self._ivr_prompt = ivr_prompt
81
+ self._ivr_vad_params = ivr_vad_params or VADParams(stop_secs=2.0)
82
+ self._classifier_prompt = classifier_prompt
83
+
84
+ # Store saved context messages
85
+ self._saved_messages: List[dict] = []
86
+
87
+ # XML pattern aggregation
88
+ self._aggregator = PatternPairAggregator()
89
+ self._setup_xml_patterns()
90
+
91
+ # Register IVR events
92
+ self._register_event_handler("on_conversation_detected")
93
+ self._register_event_handler("on_ivr_status_changed")
94
+
95
+ def update_saved_messages(self, messages: List[dict]) -> None:
96
+ """Update the saved context messages.
97
+
98
+ Sets the messages that are saved when switching between
99
+ conversation and IVR navigation modes.
100
+
101
+ Args:
102
+ messages: List of message dictionaries to save.
103
+ """
104
+ self._saved_messages = messages
105
+
106
+ def _get_conversation_history(self) -> List[dict]:
107
+ """Get saved context messages without the system message.
108
+
109
+ Returns:
110
+ List of message dictionaries excluding the first system message.
111
+ """
112
+ return self._saved_messages[1:] if self._saved_messages else []
113
+
114
+ def _setup_xml_patterns(self):
115
+ """Set up XML pattern detection and handlers."""
116
+ # Register DTMF pattern
117
+ self._aggregator.add_pattern_pair("dtmf", "<dtmf>", "</dtmf>", remove_match=True)
118
+ self._aggregator.on_pattern_match("dtmf", self._handle_dtmf_action)
119
+
120
+ # Register mode pattern
121
+ self._aggregator.add_pattern_pair("mode", "<mode>", "</mode>", remove_match=True)
122
+ self._aggregator.on_pattern_match("mode", self._handle_mode_action)
123
+
124
+ # Register IVR pattern
125
+ self._aggregator.add_pattern_pair("ivr", "<ivr>", "</ivr>", remove_match=True)
126
+ self._aggregator.on_pattern_match("ivr", self._handle_ivr_action)
127
+
128
+ async def process_frame(self, frame: Frame, direction: FrameDirection):
129
+ """Process frames and aggregate XML tag content.
130
+
131
+ Args:
132
+ frame: The frame to process.
133
+ direction: The direction of frame flow in the pipeline.
134
+ """
135
+ await super().process_frame(frame, direction)
136
+
137
+ if isinstance(frame, StartFrame):
138
+ # Push the StartFrame right away
139
+ await self.push_frame(frame, direction)
140
+
141
+ # Set the classifier prompt and push it upstream
142
+ messages = [{"role": "system", "content": self._classifier_prompt}]
143
+ llm_update_frame = LLMMessagesUpdateFrame(messages=messages)
144
+ await self.push_frame(llm_update_frame, FrameDirection.UPSTREAM)
145
+
146
+ elif isinstance(frame, LLMTextFrame):
147
+ # Process text through the pattern aggregator
148
+ result = await self._aggregator.aggregate(frame.text)
149
+ if result:
150
+ # Push aggregated text that doesn't contain XML patterns
151
+ await self.push_frame(LLMTextFrame(result), direction)
152
+
153
+ else:
154
+ await self.push_frame(frame, direction)
155
+
156
+ async def _handle_dtmf_action(self, match: PatternMatch):
157
+ """Handle DTMF action by creating and pushing DTMF frame.
158
+
159
+ Args:
160
+ match: The pattern match containing DTMF content.
161
+ """
162
+ value = match.content
163
+ logger.debug(f"DTMF detected: {value}")
164
+
165
+ try:
166
+ # Convert the value to a KeypadEntry
167
+ keypad_entry = KeypadEntry(value)
168
+ dtmf_frame = OutputDTMFUrgentFrame(button=keypad_entry)
169
+ await self.push_frame(dtmf_frame)
170
+ # Push a TextFrame to add DTMF message to the context
171
+ text_frame = TextFrame(text=f"<dtmf>{value}</dtmf>")
172
+ text_frame.skip_tts = True
173
+ await self.push_frame(text_frame)
174
+ except ValueError:
175
+ logger.warning(f"Invalid DTMF value: {value}. Must be 0-9, *, or #")
176
+
177
+ async def _handle_ivr_action(self, match: PatternMatch):
178
+ """Handle IVR status action.
179
+
180
+ Args:
181
+ match: The pattern match containing IVR status content.
182
+ """
183
+ status = match.content
184
+ logger.trace(f"IVR status detected: {status}")
185
+
186
+ # Convert string to enum, with validation
187
+ try:
188
+ ivr_status = IVRStatus(status)
189
+ except ValueError:
190
+ logger.warning(f"Unknown IVR status: {status}")
191
+ return
192
+
193
+ match ivr_status:
194
+ case IVRStatus.DETECTED:
195
+ await self._handle_ivr_detected()
196
+ case IVRStatus.COMPLETED:
197
+ await self._handle_ivr_completed()
198
+ case IVRStatus.STUCK:
199
+ await self._handle_ivr_stuck()
200
+ case IVRStatus.WAIT:
201
+ await self._handle_ivr_wait()
202
+
203
+ # Push a TextFrame to add the IVR detected signal to the context
204
+ ivr_text_frame = TextFrame(text=f"<ivr>{status}</ivr>")
205
+ ivr_text_frame.skip_tts = True
206
+ await self.push_frame(ivr_text_frame)
207
+
208
+ async def _handle_mode_action(self, match: PatternMatch):
209
+ """Handle mode action by switching to the appropriate mode.
210
+
211
+ Args:
212
+ match: The pattern match containing mode content.
213
+ """
214
+ mode = match.content
215
+ logger.debug(f"Mode detected: {mode}")
216
+ if mode == "conversation":
217
+ await self._handle_conversation()
218
+ elif mode == "ivr":
219
+ await self._handle_ivr_detected()
220
+
221
+ # No TextFrame is pushed for the mode selection, as the mode
222
+ # selection conversation is ephemeral and the system message
223
+ # is removed after the mode is detected.
224
+
225
+ async def _handle_conversation(self):
226
+ """Handle conversation mode by switching to conversation mode.
227
+
228
+ Emit an on_conversation_detected event with saved conversation history.
229
+ """
230
+ logger.debug("Conversation detected - emitting on_conversation_detected event")
231
+
232
+ # Extract conversation history for the event handler
233
+ conversation_history = self._get_conversation_history()
234
+
235
+ await self._call_event_handler("on_conversation_detected", conversation_history)
236
+
237
+ async def _handle_ivr_detected(self):
238
+ """Handle IVR detection by switching to IVR mode.
239
+
240
+ Allows bidirectional switching for error recovery and complex IVR flows.
241
+ Saves previous messages from the conversation context when available.
242
+ """
243
+ logger.debug("IVR detected - switching to IVR navigation mode")
244
+
245
+ # Create new context with IVR system prompt and saved messages
246
+ messages = [{"role": "system", "content": self._ivr_prompt}]
247
+
248
+ # Add saved conversation history if available
249
+ conversation_history = self._get_conversation_history()
250
+ if conversation_history:
251
+ messages.extend(conversation_history)
252
+
253
+ # Push the messages upstream and run the LLM with the new context
254
+ llm_update_frame = LLMMessagesUpdateFrame(messages=messages, run_llm=True)
255
+ await self.push_frame(llm_update_frame, FrameDirection.UPSTREAM)
256
+
257
+ # Update VAD parameters for IVR response timing
258
+ vad_update_frame = VADParamsUpdateFrame(params=self._ivr_vad_params)
259
+ await self.push_frame(vad_update_frame, FrameDirection.UPSTREAM)
260
+
261
+ # Emit status changed event
262
+ await self._call_event_handler("on_ivr_status_changed", IVRStatus.DETECTED)
263
+
264
+ async def _handle_ivr_completed(self):
265
+ """Handle IVR completion by triggering the status changed event.
266
+
267
+ Emits on_ivr_status_changed with IVRStatus.COMPLETED.
268
+ """
269
+ logger.debug("IVR navigation completed - triggering status change event")
270
+
271
+ await self._call_event_handler("on_ivr_status_changed", IVRStatus.COMPLETED)
272
+
273
+ async def _handle_ivr_stuck(self):
274
+ """Handle IVR stuck state by triggering the status changed event.
275
+
276
+ Emits on_ivr_status_changed with IVRStatus.STUCK for external handling of stuck scenarios.
277
+ """
278
+ logger.debug("IVR navigation stuck - triggering status change event")
279
+
280
+ await self._call_event_handler("on_ivr_status_changed", IVRStatus.STUCK)
281
+
282
+ async def _handle_ivr_wait(self):
283
+ """Handle IVR wait state when transcription is incomplete.
284
+
285
+ The LLM is indicating it needs more information to make a decision.
286
+ This is a no-op since the system will continue to provide more transcription.
287
+ """
288
+ logger.debug("IVR waiting for more complete transcription")
289
+
290
+
291
+ class IVRNavigator(Pipeline):
292
+ """Pipeline for automated IVR system navigation.
293
+
294
+ Orchestrates LLM-based IVR navigation by combining an LLM service with
295
+ IVR processing capabilities. Starts with mode classification to classify input
296
+ as conversation or IVR system.
297
+
298
+ Navigation behavior:
299
+
300
+ - Detects conversation vs IVR systems automatically
301
+ - Navigates IVR menus using DTMF tones and verbal responses
302
+ - Provides event hooks for mode classification and status changes (on_conversation_detected, on_ivr_status_changed)
303
+ - Developers control conversation handling via on_conversation_detected event
304
+ """
305
+
306
+ CLASSIFIER_PROMPT = """You are an IVR detection classifier. Analyze the transcribed text to determine if it's an automated IVR system or a live human conversation.
307
+
308
+ IVR SYSTEM (respond `<mode>ivr</mode>`):
309
+ - Menu options: "Press 1 for billing", "Press 2 for technical support", "Press 0 to speak to an agent"
310
+ - Automated instructions: "Please enter your account number", "Say or press your selection", "Enter your phone number followed by the pound key"
311
+ - System prompts: "Thank you for calling [company]", "Your call is important to us", "Please hold while we connect you"
312
+ - Scripted introductions: "Welcome to [company] customer service", "For faster service, have your account number ready"
313
+ - Navigation phrases: "To return to the main menu", "Press star to repeat", "Say 'agent' or press 0"
314
+ - Hold messages: "Please continue to hold", "Your estimated wait time is", "Thank you for your patience"
315
+ - Carrier messages: "All circuits are busy", "Due to high call volume"
316
+
317
+ HUMAN CONVERSATION (respond `<mode>conversation</mode>`):
318
+ - Personal greetings: "Hello, this is Sarah", "Good morning, how can I help you?", "Customer service, this is Mike"
319
+ - Interactive responses: "Who am I speaking with?", "What can I do for you today?", "How are you calling about?"
320
+ - Natural speech patterns: hesitations, informal language, conversational flow
321
+ - Direct engagement: "I see you're calling about...", "Let me look that up for you", "Can you spell that for me?"
322
+ - Spontaneous responses: "Oh, I can help with that", "Sure, no problem", "Hmm, let me check"
323
+
324
+ RESPOND ONLY with either:
325
+ - `<mode>ivr</mode>` for IVR system
326
+ - `<mode>conversation</mode>` for human conversation"""
327
+
328
+ IVR_NAVIGATION_BASE = """You are navigating an Interactive Voice Response (IVR) system to accomplish a specific goal. You receive text transcriptions of the IVR system's audio prompts and menu options.
329
+
330
+ YOUR NAVIGATION GOAL:
331
+ {goal}
332
+
333
+ NAVIGATION RULES:
334
+ 1. When you see menu options with keypress instructions (e.g., "Press 1 for...", "Press 2 for..."), ONLY respond with a keypress if one of the options aligns with your navigation goal
335
+ 2. If an option closely matches your goal, respond with: `<dtmf>NUMBER</dtmf>` (e.g., `<dtmf>1</dtmf>`)
336
+ 3. For sequences of numbers (dates, account numbers, phone numbers), enter each digit separately: `<dtmf>1</dtmf><dtmf>2</dtmf><dtmf>3</dtmf>` for "123"
337
+ 4. When the system asks for verbal responses (e.g., "Say Yes or No", "Please state your name", "What department?"), respond with natural language text ending with punctuation
338
+ 5. If multiple options seem relevant, choose the most specific or direct path
339
+ 6. If NO options are relevant to your goal, respond with `<ivr>wait</ivr>` - the system may present more options
340
+ 7. If the transcription is incomplete or unclear, respond with `<ivr>wait</ivr>` to indicate you need more information
341
+
342
+ COMPLETION CRITERIA - Respond with `<ivr>completed</ivr>` when:
343
+ - You see "Please hold while I transfer you" or similar transfer language
344
+ - You see "You're being connected to..." or "Connecting you to..."
345
+ - The system says "One moment please" after selecting your final option
346
+ - The system indicates you've reached the target department/service
347
+ - You've successfully navigated to your goal and are being transferred to a human
348
+
349
+ WAIT CRITERIA - Respond with `<ivr>wait</ivr>` when:
350
+ - NONE of the presented options are relevant to your navigation goal
351
+ - The transcription appears to be cut off mid-sentence
352
+ - You can see partial menu options but the list seems incomplete
353
+ - The transcription is unclear or garbled
354
+ - You suspect there are more options that weren't captured in the transcription
355
+ - The system presents options for specific user types that don't apply to your goal
356
+
357
+ IMPORTANT: Do NOT feel pressured to select an option if none match your goal. Waiting is often the correct response when the IVR system is presenting partial menus or options intended for different user types.
358
+
359
+ STUCK CRITERIA - Respond with `<ivr>stuck</ivr>` when:
360
+ - You've been through the same menu options 3+ times without progress
361
+ - No available options relate to your goal after careful consideration
362
+ - You encounter an error message or "invalid selection" repeatedly
363
+ - The system asks for information you don't have (account numbers, PINs, etc.)
364
+ - You reach a dead end with no relevant options and no way back
365
+
366
+ STRATEGY TIPS:
367
+ - Look for keywords in menu options that match your goal
368
+ - Try general options like "Customer Service" or "Other Services" if specific options aren't available
369
+ - Pay attention to sub-menus. Sometimes the path requires multiple steps through different menu layers
370
+ - If you see "For all other inquiries, press..." that's often a good fallback option
371
+ - Remember that reaching your goal may require navigating through several menu levels
372
+ - Be patient - IVR systems often present options in waves, and waiting for the right option is better than selecting the wrong one
373
+
374
+ SEQUENCE INPUT EXAMPLES:
375
+ - For date of birth "01/15/1990": `<dtmf>0</dtmf><dtmf>1</dtmf><dtmf>1</dtmf><dtmf>5</dtmf><dtmf>1</dtmf><dtmf>9</dtmf><dtmf>9</dtmf><dtmf>0</dtmf>`
376
+ - For account number "12345": `<dtmf>1</dtmf><dtmf>2</dtmf><dtmf>3</dtmf><dtmf>4</dtmf><dtmf>5</dtmf>`
377
+ - For phone number last 4 digits "6789": `<dtmf>6</dtmf><dtmf>7</dtmf><dtmf>8</dtmf><dtmf>9</dtmf>`
378
+
379
+ VERBAL RESPONSE EXAMPLES:
380
+ - "Is your date of birth 01/15/1990? Say Yes or No" → "Yes."
381
+ - "Please state your first and last name" → "John Smith."
382
+ - "What department are you trying to reach?" → "Billing."
383
+ - "Are you calling about an existing order? Please say Yes or No" → "No."
384
+ - "Did I hear that correctly? Please say Yes or No" → "Yes."
385
+
386
+ Remember: Respond with `<dtmf>NUMBER</dtmf>` (single or multiple for sequences), `<ivr>completed</ivr>`, `<ivr>stuck</ivr>`, `<ivr>wait</ivr>`, OR natural language text when verbal responses are requested. No other response types."""
387
+
388
+ def __init__(
389
+ self,
390
+ *,
391
+ llm: LLMService,
392
+ ivr_prompt: str,
393
+ ivr_vad_params: Optional[VADParams] = None,
394
+ ):
395
+ """Initialize the IVR navigator.
396
+
397
+ Args:
398
+ llm: LLM service for text generation and decision making.
399
+ ivr_prompt: Navigation goal prompt integrated with IVR navigation instructions.
400
+ ivr_vad_params: VAD parameters for IVR navigation mode. If None, defaults to VADParams(stop_secs=2.0).
401
+ """
402
+ self._llm = llm
403
+ self._ivr_prompt = self.IVR_NAVIGATION_BASE.format(goal=ivr_prompt)
404
+ self._ivr_vad_params = ivr_vad_params or VADParams(stop_secs=2.0)
405
+ self._classifier_prompt = self.CLASSIFIER_PROMPT
406
+
407
+ self._ivr_processor = IVRProcessor(
408
+ classifier_prompt=self._classifier_prompt,
409
+ ivr_prompt=self._ivr_prompt,
410
+ ivr_vad_params=self._ivr_vad_params,
411
+ )
412
+
413
+ # Add the IVR processor to the pipeline
414
+ super().__init__([self._llm, self._ivr_processor])
415
+
416
+ # Register IVR events
417
+ self._register_event_handler("on_conversation_detected")
418
+ self._register_event_handler("on_ivr_status_changed")
419
+
420
+ async def process_frame(self, frame: Frame, direction: FrameDirection):
421
+ """Process frames at the pipeline level to intercept context frames.
422
+
423
+ Args:
424
+ frame: The frame to process.
425
+ direction: The direction of frame flow in the pipeline.
426
+ """
427
+ if isinstance(frame, (OpenAILLMContextFrame, LLMContextFrame)):
428
+ # Extract messages and pass to IVR processor
429
+ all_messages = frame.context.get_messages()
430
+
431
+ # Store messages in the IVR processor for mode switching
432
+ self._ivr_processor.update_saved_messages(all_messages)
433
+
434
+ # Let the pipeline handle normal frame processing
435
+ await super().process_frame(frame, direction)
436
+
437
+ def add_event_handler(self, event_name: str, handler):
438
+ """Add event handler for IVR navigation events.
439
+
440
+ Args:
441
+ event_name: Event name ("on_conversation_detected", "on_ivr_status_changed").
442
+ handler: Async function called when event occurs.
443
+ - on_conversation_detected: Receives IVRProcessor instance and conversation_history list
444
+ - on_ivr_status_changed: Receives IVRProcessor instance and IVRStatus enum value
445
+ """
446
+ if event_name in (
447
+ "on_conversation_detected",
448
+ "on_ivr_status_changed",
449
+ ):
450
+ self._ivr_processor.add_event_handler(event_name, handler)
451
+ else:
452
+ super().add_event_handler(event_name, handler)