dv-pipecat-ai 0.0.82.dev857__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/METADATA +98 -130
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/RECORD +192 -140
- pipecat/adapters/base_llm_adapter.py +38 -1
- pipecat/adapters/services/anthropic_adapter.py +9 -14
- pipecat/adapters/services/aws_nova_sonic_adapter.py +120 -5
- pipecat/adapters/services/bedrock_adapter.py +236 -13
- pipecat/adapters/services/gemini_adapter.py +12 -8
- pipecat/adapters/services/open_ai_adapter.py +19 -7
- pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
- pipecat/audio/dtmf/dtmf-0.wav +0 -0
- pipecat/audio/dtmf/dtmf-1.wav +0 -0
- pipecat/audio/dtmf/dtmf-2.wav +0 -0
- pipecat/audio/dtmf/dtmf-3.wav +0 -0
- pipecat/audio/dtmf/dtmf-4.wav +0 -0
- pipecat/audio/dtmf/dtmf-5.wav +0 -0
- pipecat/audio/dtmf/dtmf-6.wav +0 -0
- pipecat/audio/dtmf/dtmf-7.wav +0 -0
- pipecat/audio/dtmf/dtmf-8.wav +0 -0
- pipecat/audio/dtmf/dtmf-9.wav +0 -0
- pipecat/audio/dtmf/dtmf-pound.wav +0 -0
- pipecat/audio/dtmf/dtmf-star.wav +0 -0
- pipecat/audio/filters/krisp_viva_filter.py +193 -0
- pipecat/audio/filters/noisereduce_filter.py +15 -0
- pipecat/audio/turn/base_turn_analyzer.py +9 -1
- pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
- pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
- pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
- pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
- pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
- pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
- pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
- pipecat/audio/vad/data/README.md +10 -0
- pipecat/audio/vad/data/silero_vad_v2.onnx +0 -0
- pipecat/audio/vad/silero.py +9 -3
- pipecat/audio/vad/vad_analyzer.py +13 -1
- pipecat/extensions/voicemail/voicemail_detector.py +5 -5
- pipecat/frames/frames.py +277 -86
- pipecat/observers/loggers/debug_log_observer.py +3 -3
- pipecat/observers/loggers/llm_log_observer.py +7 -3
- pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
- pipecat/pipeline/runner.py +18 -6
- pipecat/pipeline/service_switcher.py +64 -36
- pipecat/pipeline/task.py +125 -79
- pipecat/pipeline/tts_switcher.py +30 -0
- pipecat/processors/aggregators/dtmf_aggregator.py +2 -3
- pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
- pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
- pipecat/processors/aggregators/llm_context.py +40 -2
- pipecat/processors/aggregators/llm_response.py +32 -15
- pipecat/processors/aggregators/llm_response_universal.py +19 -15
- pipecat/processors/aggregators/user_response.py +6 -6
- pipecat/processors/aggregators/vision_image_frame.py +24 -2
- pipecat/processors/audio/audio_buffer_processor.py +43 -8
- pipecat/processors/dtmf_aggregator.py +174 -77
- pipecat/processors/filters/stt_mute_filter.py +17 -0
- pipecat/processors/frame_processor.py +110 -24
- pipecat/processors/frameworks/langchain.py +8 -2
- pipecat/processors/frameworks/rtvi.py +210 -68
- pipecat/processors/frameworks/strands_agents.py +170 -0
- pipecat/processors/logger.py +2 -2
- pipecat/processors/transcript_processor.py +26 -5
- pipecat/processors/user_idle_processor.py +35 -11
- pipecat/runner/daily.py +59 -20
- pipecat/runner/run.py +395 -93
- pipecat/runner/types.py +6 -4
- pipecat/runner/utils.py +51 -10
- pipecat/serializers/__init__.py +5 -1
- pipecat/serializers/asterisk.py +16 -2
- pipecat/serializers/convox.py +41 -4
- pipecat/serializers/custom.py +257 -0
- pipecat/serializers/exotel.py +5 -5
- pipecat/serializers/livekit.py +20 -0
- pipecat/serializers/plivo.py +5 -5
- pipecat/serializers/protobuf.py +6 -5
- pipecat/serializers/telnyx.py +2 -2
- pipecat/serializers/twilio.py +43 -23
- pipecat/serializers/vi.py +324 -0
- pipecat/services/ai_service.py +2 -6
- pipecat/services/anthropic/llm.py +2 -25
- pipecat/services/assemblyai/models.py +6 -0
- pipecat/services/assemblyai/stt.py +13 -5
- pipecat/services/asyncai/tts.py +5 -3
- pipecat/services/aws/__init__.py +1 -0
- pipecat/services/aws/llm.py +147 -105
- pipecat/services/aws/nova_sonic/__init__.py +0 -0
- pipecat/services/aws/nova_sonic/context.py +436 -0
- pipecat/services/aws/nova_sonic/frames.py +25 -0
- pipecat/services/aws/nova_sonic/llm.py +1265 -0
- pipecat/services/aws/stt.py +3 -3
- pipecat/services/aws_nova_sonic/__init__.py +19 -1
- pipecat/services/aws_nova_sonic/aws.py +11 -1151
- pipecat/services/aws_nova_sonic/context.py +8 -354
- pipecat/services/aws_nova_sonic/frames.py +13 -17
- pipecat/services/azure/llm.py +51 -1
- pipecat/services/azure/realtime/__init__.py +0 -0
- pipecat/services/azure/realtime/llm.py +65 -0
- pipecat/services/azure/stt.py +15 -0
- pipecat/services/cartesia/stt.py +77 -70
- pipecat/services/cartesia/tts.py +80 -13
- pipecat/services/deepgram/__init__.py +1 -0
- pipecat/services/deepgram/flux/__init__.py +0 -0
- pipecat/services/deepgram/flux/stt.py +640 -0
- pipecat/services/elevenlabs/__init__.py +4 -1
- pipecat/services/elevenlabs/stt.py +339 -0
- pipecat/services/elevenlabs/tts.py +87 -46
- pipecat/services/fish/tts.py +5 -2
- pipecat/services/gemini_multimodal_live/events.py +38 -524
- pipecat/services/gemini_multimodal_live/file_api.py +23 -173
- pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
- pipecat/services/gladia/stt.py +56 -72
- pipecat/services/google/__init__.py +1 -0
- pipecat/services/google/gemini_live/__init__.py +3 -0
- pipecat/services/google/gemini_live/file_api.py +189 -0
- pipecat/services/google/gemini_live/llm.py +1582 -0
- pipecat/services/google/gemini_live/llm_vertex.py +184 -0
- pipecat/services/google/llm.py +15 -11
- pipecat/services/google/llm_openai.py +3 -3
- pipecat/services/google/llm_vertex.py +86 -16
- pipecat/services/google/stt.py +4 -0
- pipecat/services/google/tts.py +7 -3
- pipecat/services/heygen/api.py +2 -0
- pipecat/services/heygen/client.py +8 -4
- pipecat/services/heygen/video.py +2 -0
- pipecat/services/hume/__init__.py +5 -0
- pipecat/services/hume/tts.py +220 -0
- pipecat/services/inworld/tts.py +6 -6
- pipecat/services/llm_service.py +15 -5
- pipecat/services/lmnt/tts.py +4 -2
- pipecat/services/mcp_service.py +4 -2
- pipecat/services/mem0/memory.py +6 -5
- pipecat/services/mistral/llm.py +29 -8
- pipecat/services/moondream/vision.py +42 -16
- pipecat/services/neuphonic/tts.py +5 -2
- pipecat/services/openai/__init__.py +1 -0
- pipecat/services/openai/base_llm.py +27 -20
- pipecat/services/openai/realtime/__init__.py +0 -0
- pipecat/services/openai/realtime/context.py +272 -0
- pipecat/services/openai/realtime/events.py +1106 -0
- pipecat/services/openai/realtime/frames.py +37 -0
- pipecat/services/openai/realtime/llm.py +829 -0
- pipecat/services/openai/tts.py +49 -10
- pipecat/services/openai_realtime/__init__.py +27 -0
- pipecat/services/openai_realtime/azure.py +21 -0
- pipecat/services/openai_realtime/context.py +21 -0
- pipecat/services/openai_realtime/events.py +21 -0
- pipecat/services/openai_realtime/frames.py +21 -0
- pipecat/services/openai_realtime_beta/azure.py +16 -0
- pipecat/services/openai_realtime_beta/openai.py +17 -5
- pipecat/services/piper/tts.py +7 -9
- pipecat/services/playht/tts.py +34 -4
- pipecat/services/rime/tts.py +12 -12
- pipecat/services/riva/stt.py +3 -1
- pipecat/services/salesforce/__init__.py +9 -0
- pipecat/services/salesforce/llm.py +700 -0
- pipecat/services/sarvam/__init__.py +7 -0
- pipecat/services/sarvam/stt.py +540 -0
- pipecat/services/sarvam/tts.py +97 -13
- pipecat/services/simli/video.py +2 -2
- pipecat/services/speechmatics/stt.py +22 -10
- pipecat/services/stt_service.py +47 -0
- pipecat/services/tavus/video.py +2 -2
- pipecat/services/tts_service.py +75 -22
- pipecat/services/vision_service.py +7 -6
- pipecat/services/vistaar/llm.py +51 -9
- pipecat/tests/utils.py +4 -4
- pipecat/transcriptions/language.py +41 -1
- pipecat/transports/base_input.py +13 -34
- pipecat/transports/base_output.py +140 -104
- pipecat/transports/daily/transport.py +199 -26
- pipecat/transports/heygen/__init__.py +0 -0
- pipecat/transports/heygen/transport.py +381 -0
- pipecat/transports/livekit/transport.py +228 -63
- pipecat/transports/local/audio.py +6 -1
- pipecat/transports/local/tk.py +11 -2
- pipecat/transports/network/fastapi_websocket.py +1 -1
- pipecat/transports/smallwebrtc/connection.py +103 -19
- pipecat/transports/smallwebrtc/request_handler.py +246 -0
- pipecat/transports/smallwebrtc/transport.py +65 -23
- pipecat/transports/tavus/transport.py +23 -12
- pipecat/transports/websocket/client.py +41 -5
- pipecat/transports/websocket/fastapi.py +21 -11
- pipecat/transports/websocket/server.py +14 -7
- pipecat/transports/whatsapp/api.py +8 -0
- pipecat/transports/whatsapp/client.py +47 -0
- pipecat/utils/base_object.py +54 -22
- pipecat/utils/redis.py +58 -0
- pipecat/utils/string.py +13 -1
- pipecat/utils/tracing/service_decorators.py +21 -21
- pipecat/serializers/genesys.py +0 -95
- pipecat/services/google/test-google-chirp.py +0 -45
- pipecat/services/openai.py +0 -698
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/top_level.txt +0 -0
- /pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"""Strands Agent integration for Pipecat.
|
|
2
|
+
|
|
3
|
+
This module provides integration with Strands Agents for handling conversational AI
|
|
4
|
+
interactions. It supports both single agent and multi-agent graphs.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
from loguru import logger
|
|
10
|
+
|
|
11
|
+
from pipecat.frames.frames import (
|
|
12
|
+
Frame,
|
|
13
|
+
LLMContextFrame,
|
|
14
|
+
LLMFullResponseEndFrame,
|
|
15
|
+
LLMFullResponseStartFrame,
|
|
16
|
+
LLMTextFrame,
|
|
17
|
+
)
|
|
18
|
+
from pipecat.metrics.metrics import LLMTokenUsage
|
|
19
|
+
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContextFrame
|
|
20
|
+
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
|
21
|
+
|
|
22
|
+
try:
|
|
23
|
+
from strands import Agent
|
|
24
|
+
from strands.multiagent.graph import Graph
|
|
25
|
+
except ModuleNotFoundError as e:
|
|
26
|
+
logger.exception("In order to use Strands Agents, you need to `pip install strands-agents`.")
|
|
27
|
+
raise Exception(f"Missing module: {e}")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class StrandsAgentsProcessor(FrameProcessor):
|
|
31
|
+
"""Processor that integrates Strands Agents with Pipecat's frame pipeline.
|
|
32
|
+
|
|
33
|
+
This processor takes LLM message frames, extracts the latest user message,
|
|
34
|
+
and processes it through either a single Strands Agent or a multi-agent Graph.
|
|
35
|
+
The response is streamed back as text frames with appropriate response markers.
|
|
36
|
+
|
|
37
|
+
Supports both single agent streaming and graph-based multi-agent workflows.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __init__(
|
|
41
|
+
self,
|
|
42
|
+
agent: Optional[Agent] = None,
|
|
43
|
+
graph: Optional[Graph] = None,
|
|
44
|
+
graph_exit_node: Optional[str] = None,
|
|
45
|
+
):
|
|
46
|
+
"""Initialize the Strands Agents processor.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
agent: The Strands Agent to use for single-agent processing.
|
|
50
|
+
graph: The Strands multi-agent Graph to use for graph-based processing.
|
|
51
|
+
graph_exit_node: The exit node name when using graph-based processing.
|
|
52
|
+
|
|
53
|
+
Raises:
|
|
54
|
+
AssertionError: If neither agent nor graph is provided, or if graph is
|
|
55
|
+
provided without a graph_exit_node.
|
|
56
|
+
"""
|
|
57
|
+
super().__init__()
|
|
58
|
+
self.agent = agent
|
|
59
|
+
self.graph = graph
|
|
60
|
+
self.graph_exit_node = graph_exit_node
|
|
61
|
+
|
|
62
|
+
assert self.agent or self.graph, "Either agent or graph must be provided"
|
|
63
|
+
|
|
64
|
+
if self.graph:
|
|
65
|
+
assert self.graph_exit_node, "graph_exit_node must be provided if graph is provided"
|
|
66
|
+
|
|
67
|
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
68
|
+
"""Process incoming frames and handle LLM message frames.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
frame: The incoming frame to process.
|
|
72
|
+
direction: The direction of frame flow in the pipeline.
|
|
73
|
+
"""
|
|
74
|
+
await super().process_frame(frame, direction)
|
|
75
|
+
if isinstance(frame, (LLMContextFrame, OpenAILLMContextFrame)):
|
|
76
|
+
messages = frame.context.get_messages()
|
|
77
|
+
if messages:
|
|
78
|
+
last_message = messages[-1]
|
|
79
|
+
await self._ainvoke(str(last_message["content"]).strip())
|
|
80
|
+
else:
|
|
81
|
+
await self.push_frame(frame, direction)
|
|
82
|
+
|
|
83
|
+
async def _ainvoke(self, text: str):
|
|
84
|
+
"""Invoke the Strands agent with the provided text and stream results as Pipecat frames.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
text: The user input text to process through the agent or graph.
|
|
88
|
+
"""
|
|
89
|
+
logger.debug(f"Invoking Strands agent with: {text}")
|
|
90
|
+
ttfb_tracking = True
|
|
91
|
+
try:
|
|
92
|
+
await self.push_frame(LLMFullResponseStartFrame())
|
|
93
|
+
await self.start_processing_metrics()
|
|
94
|
+
await self.start_ttfb_metrics()
|
|
95
|
+
|
|
96
|
+
if self.graph:
|
|
97
|
+
# Graph does not stream; await full result then emit assistant text
|
|
98
|
+
graph_result = await self.graph.invoke_async(text)
|
|
99
|
+
if ttfb_tracking:
|
|
100
|
+
await self.stop_ttfb_metrics()
|
|
101
|
+
ttfb_tracking = False
|
|
102
|
+
try:
|
|
103
|
+
node_result = graph_result.results[self.graph_exit_node]
|
|
104
|
+
logger.debug(f"Node result: {node_result}")
|
|
105
|
+
for agent_result in node_result.get_agent_results():
|
|
106
|
+
# Push to TTS service
|
|
107
|
+
message = getattr(agent_result, "message", None)
|
|
108
|
+
if isinstance(message, dict) and "content" in message:
|
|
109
|
+
for block in message["content"]:
|
|
110
|
+
if isinstance(block, dict) and "text" in block:
|
|
111
|
+
await self.push_frame(LLMTextFrame(str(block["text"])))
|
|
112
|
+
# Update usage metrics
|
|
113
|
+
await self._report_usage_metrics(
|
|
114
|
+
agent_result.metrics.accumulated_usage.get("inputTokens", 0),
|
|
115
|
+
agent_result.metrics.accumulated_usage.get("outputTokens", 0),
|
|
116
|
+
agent_result.metrics.accumulated_usage.get("totalTokens", 0),
|
|
117
|
+
)
|
|
118
|
+
except Exception as parse_err:
|
|
119
|
+
logger.warning(f"Failed to extract messages from GraphResult: {parse_err}")
|
|
120
|
+
else:
|
|
121
|
+
# Agent supports streaming events via async iterator
|
|
122
|
+
async for event in self.agent.stream_async(text):
|
|
123
|
+
# Push to TTS service
|
|
124
|
+
if isinstance(event, dict) and "data" in event:
|
|
125
|
+
await self.push_frame(LLMTextFrame(str(event["data"])))
|
|
126
|
+
if ttfb_tracking:
|
|
127
|
+
await self.stop_ttfb_metrics()
|
|
128
|
+
ttfb_tracking = False
|
|
129
|
+
|
|
130
|
+
# Update usage metrics
|
|
131
|
+
if (
|
|
132
|
+
isinstance(event, dict)
|
|
133
|
+
and "event" in event
|
|
134
|
+
and "metadata" in event["event"]
|
|
135
|
+
):
|
|
136
|
+
if "usage" in event["event"]["metadata"]:
|
|
137
|
+
usage = event["event"]["metadata"]["usage"]
|
|
138
|
+
await self._report_usage_metrics(
|
|
139
|
+
usage.get("inputTokens", 0),
|
|
140
|
+
usage.get("outputTokens", 0),
|
|
141
|
+
usage.get("totalTokens", 0),
|
|
142
|
+
)
|
|
143
|
+
except GeneratorExit:
|
|
144
|
+
logger.warning(f"{self} generator was closed prematurely")
|
|
145
|
+
except Exception as e:
|
|
146
|
+
logger.exception(f"{self} an unknown error occurred: {e}")
|
|
147
|
+
finally:
|
|
148
|
+
if ttfb_tracking:
|
|
149
|
+
await self.stop_ttfb_metrics()
|
|
150
|
+
ttfb_tracking = False
|
|
151
|
+
await self.stop_processing_metrics()
|
|
152
|
+
await self.push_frame(LLMFullResponseEndFrame())
|
|
153
|
+
|
|
154
|
+
def can_generate_metrics(self) -> bool:
|
|
155
|
+
"""Check if this service can generate performance metrics.
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
True as this service supports metrics generation.
|
|
159
|
+
"""
|
|
160
|
+
return True
|
|
161
|
+
|
|
162
|
+
async def _report_usage_metrics(
|
|
163
|
+
self, prompt_tokens: int, completion_tokens: int, total_tokens: int
|
|
164
|
+
):
|
|
165
|
+
tokens = LLMTokenUsage(
|
|
166
|
+
prompt_tokens=prompt_tokens,
|
|
167
|
+
completion_tokens=completion_tokens,
|
|
168
|
+
total_tokens=total_tokens,
|
|
169
|
+
)
|
|
170
|
+
await self.start_llm_usage_metrics(tokens)
|
pipecat/processors/logger.py
CHANGED
|
@@ -15,7 +15,7 @@ from pipecat.frames.frames import (
|
|
|
15
15
|
Frame,
|
|
16
16
|
InputAudioRawFrame,
|
|
17
17
|
OutputAudioRawFrame,
|
|
18
|
-
|
|
18
|
+
UserSpeakingFrame,
|
|
19
19
|
)
|
|
20
20
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
|
21
21
|
|
|
@@ -36,9 +36,9 @@ class FrameLogger(FrameProcessor):
|
|
|
36
36
|
color: Optional[str] = None,
|
|
37
37
|
ignored_frame_types: Tuple[Type[Frame], ...] = (
|
|
38
38
|
BotSpeakingFrame,
|
|
39
|
+
UserSpeakingFrame,
|
|
39
40
|
InputAudioRawFrame,
|
|
40
41
|
OutputAudioRawFrame,
|
|
41
|
-
TransportMessageFrame,
|
|
42
42
|
),
|
|
43
43
|
):
|
|
44
44
|
"""Initialize the frame logger.
|
|
@@ -19,7 +19,8 @@ from pipecat.frames.frames import (
|
|
|
19
19
|
CancelFrame,
|
|
20
20
|
EndFrame,
|
|
21
21
|
Frame,
|
|
22
|
-
|
|
22
|
+
InterruptionFrame,
|
|
23
|
+
TranscriptDropFrame,
|
|
23
24
|
TranscriptionFrame,
|
|
24
25
|
TranscriptionMessage,
|
|
25
26
|
TranscriptionUpdateFrame,
|
|
@@ -44,6 +45,7 @@ class BaseTranscriptProcessor(FrameProcessor):
|
|
|
44
45
|
super().__init__(**kwargs)
|
|
45
46
|
self._processed_messages: List[TranscriptionMessage] = []
|
|
46
47
|
self._register_event_handler("on_transcript_update")
|
|
48
|
+
self._register_event_handler("on_transcript_drop")
|
|
47
49
|
|
|
48
50
|
async def _emit_update(self, messages: List[TranscriptionMessage]):
|
|
49
51
|
"""Emit transcript updates for new messages.
|
|
@@ -57,6 +59,19 @@ class BaseTranscriptProcessor(FrameProcessor):
|
|
|
57
59
|
await self._call_event_handler("on_transcript_update", update_frame)
|
|
58
60
|
await self.push_frame(update_frame)
|
|
59
61
|
|
|
62
|
+
async def _handle_transcript_drop(self, frame: TranscriptDropFrame):
|
|
63
|
+
"""Handle transcript drop notifications by removing stored messages."""
|
|
64
|
+
if not frame.transcript_ids:
|
|
65
|
+
return
|
|
66
|
+
|
|
67
|
+
await self._call_event_handler("on_transcript_drop", frame)
|
|
68
|
+
|
|
69
|
+
drop_ids = set(frame.transcript_ids)
|
|
70
|
+
if drop_ids:
|
|
71
|
+
self._processed_messages = [
|
|
72
|
+
msg for msg in self._processed_messages if msg.message_id not in drop_ids
|
|
73
|
+
]
|
|
74
|
+
|
|
60
75
|
|
|
61
76
|
class UserTranscriptProcessor(BaseTranscriptProcessor):
|
|
62
77
|
"""Processes user transcription frames into timestamped conversation messages."""
|
|
@@ -72,9 +87,15 @@ class UserTranscriptProcessor(BaseTranscriptProcessor):
|
|
|
72
87
|
|
|
73
88
|
if isinstance(frame, TranscriptionFrame):
|
|
74
89
|
message = TranscriptionMessage(
|
|
75
|
-
role="user",
|
|
90
|
+
role="user",
|
|
91
|
+
user_id=frame.user_id,
|
|
92
|
+
content=frame.text,
|
|
93
|
+
timestamp=frame.timestamp,
|
|
94
|
+
message_id=frame.id,
|
|
76
95
|
)
|
|
77
96
|
await self._emit_update([message])
|
|
97
|
+
elif isinstance(frame, TranscriptDropFrame):
|
|
98
|
+
await self._handle_transcript_drop(frame)
|
|
78
99
|
|
|
79
100
|
await self.push_frame(frame, direction)
|
|
80
101
|
|
|
@@ -86,7 +107,7 @@ class AssistantTranscriptProcessor(BaseTranscriptProcessor):
|
|
|
86
107
|
transcript messages. Utterances are completed when:
|
|
87
108
|
|
|
88
109
|
- The bot stops speaking (BotStoppedSpeakingFrame)
|
|
89
|
-
- The bot is interrupted (
|
|
110
|
+
- The bot is interrupted (InterruptionFrame)
|
|
90
111
|
- The pipeline ends (EndFrame)
|
|
91
112
|
"""
|
|
92
113
|
|
|
@@ -185,7 +206,7 @@ class AssistantTranscriptProcessor(BaseTranscriptProcessor):
|
|
|
185
206
|
|
|
186
207
|
- TTSTextFrame: Aggregates text for current utterance
|
|
187
208
|
- BotStoppedSpeakingFrame: Completes current utterance
|
|
188
|
-
-
|
|
209
|
+
- InterruptionFrame: Completes current utterance due to interruption
|
|
189
210
|
- EndFrame: Completes current utterance at pipeline end
|
|
190
211
|
- CancelFrame: Completes current utterance due to cancellation
|
|
191
212
|
|
|
@@ -195,7 +216,7 @@ class AssistantTranscriptProcessor(BaseTranscriptProcessor):
|
|
|
195
216
|
"""
|
|
196
217
|
await super().process_frame(frame, direction)
|
|
197
218
|
|
|
198
|
-
if isinstance(frame, (
|
|
219
|
+
if isinstance(frame, (InterruptionFrame, CancelFrame)):
|
|
199
220
|
# Push frame first otherwise our emitted transcription update frame
|
|
200
221
|
# might get cleaned up.
|
|
201
222
|
await self.push_frame(frame, direction)
|
|
@@ -15,17 +15,18 @@ from loguru import logger
|
|
|
15
15
|
from pipecat.frames.frames import (
|
|
16
16
|
BotSpeakingFrame,
|
|
17
17
|
CancelFrame,
|
|
18
|
+
EndDTMFCaptureFrame,
|
|
18
19
|
EndFrame,
|
|
19
20
|
Frame,
|
|
20
|
-
FunctionCallCancelFrame,
|
|
21
21
|
FunctionCallInProgressFrame,
|
|
22
22
|
FunctionCallResultFrame,
|
|
23
23
|
InputDTMFFrame,
|
|
24
|
+
LLMFullResponseStartFrame,
|
|
25
|
+
LLMFullResponseEndFrame,
|
|
26
|
+
StartDTMFCaptureFrame,
|
|
27
|
+
StartFrame,
|
|
24
28
|
StartUserIdleProcessorFrame,
|
|
25
29
|
StopUserIdleProcessorFrame,
|
|
26
|
-
FunctionCallInProgressFrame,
|
|
27
|
-
FunctionCallResultFrame,
|
|
28
|
-
StartFrame,
|
|
29
30
|
UserStartedSpeakingFrame,
|
|
30
31
|
UserStoppedSpeakingFrame,
|
|
31
32
|
)
|
|
@@ -83,9 +84,12 @@ class UserIdleProcessor(FrameProcessor):
|
|
|
83
84
|
self._timeout = timeout
|
|
84
85
|
self._retry_count = 0
|
|
85
86
|
self._interrupted = False
|
|
87
|
+
self._function_call_active = False
|
|
88
|
+
self._dtmf_capture_active = False
|
|
86
89
|
self._conversation_started = False
|
|
87
90
|
self._idle_task = None
|
|
88
91
|
self._idle_event = asyncio.Event()
|
|
92
|
+
self._llm_in_progress = False
|
|
89
93
|
|
|
90
94
|
def _wrap_callback(
|
|
91
95
|
self,
|
|
@@ -180,10 +184,20 @@ class UserIdleProcessor(FrameProcessor):
|
|
|
180
184
|
self._idle_event.set()
|
|
181
185
|
elif isinstance(frame, FunctionCallInProgressFrame):
|
|
182
186
|
# Function calls can take longer than the timeout, so we want to prevent idle callbacks
|
|
187
|
+
self._function_call_active = True
|
|
183
188
|
self._interrupted = True
|
|
184
189
|
self._idle_event.set()
|
|
185
190
|
elif isinstance(frame, FunctionCallResultFrame):
|
|
186
|
-
self.
|
|
191
|
+
self._function_call_active = False
|
|
192
|
+
self._interrupted = self._dtmf_capture_active
|
|
193
|
+
self._idle_event.set()
|
|
194
|
+
elif isinstance(frame, StartDTMFCaptureFrame):
|
|
195
|
+
self._dtmf_capture_active = True
|
|
196
|
+
self._interrupted = True
|
|
197
|
+
self._idle_event.set()
|
|
198
|
+
elif isinstance(frame, EndDTMFCaptureFrame):
|
|
199
|
+
self._dtmf_capture_active = False
|
|
200
|
+
self._interrupted = self._function_call_active
|
|
187
201
|
self._idle_event.set()
|
|
188
202
|
elif isinstance(frame, StartUserIdleProcessorFrame):
|
|
189
203
|
if not self._idle_task:
|
|
@@ -192,6 +206,18 @@ class UserIdleProcessor(FrameProcessor):
|
|
|
192
206
|
elif isinstance(frame, StopUserIdleProcessorFrame):
|
|
193
207
|
self.logger.debug("Stopping Idle Processor")
|
|
194
208
|
await self._stop()
|
|
209
|
+
elif isinstance(frame, LLMFullResponseStartFrame):
|
|
210
|
+
self.logger.debug(
|
|
211
|
+
"LLM FullResponseStartFrame received, making llm_in_progress to True"
|
|
212
|
+
)
|
|
213
|
+
self._idle_event.set()
|
|
214
|
+
self._llm_in_progress = True
|
|
215
|
+
elif isinstance(frame, LLMFullResponseEndFrame):
|
|
216
|
+
self.logger.debug(
|
|
217
|
+
"LLM FullResponseEndFrame received, making llm_in_progress to False"
|
|
218
|
+
)
|
|
219
|
+
self._idle_event.set()
|
|
220
|
+
self._llm_in_progress = False
|
|
195
221
|
|
|
196
222
|
async def cleanup(self) -> None:
|
|
197
223
|
"""Cleans up resources when processor is shutting down."""
|
|
@@ -206,15 +232,13 @@ class UserIdleProcessor(FrameProcessor):
|
|
|
206
232
|
|
|
207
233
|
Runs in a loop until cancelled or callback indicates completion.
|
|
208
234
|
"""
|
|
209
|
-
|
|
235
|
+
running = True
|
|
236
|
+
while running:
|
|
210
237
|
try:
|
|
211
238
|
await asyncio.wait_for(self._idle_event.wait(), timeout=self._timeout)
|
|
212
239
|
except asyncio.TimeoutError:
|
|
213
|
-
if not self._interrupted:
|
|
240
|
+
if not self._interrupted and not self._llm_in_progress:
|
|
214
241
|
self._retry_count += 1
|
|
215
|
-
|
|
216
|
-
if not should_continue:
|
|
217
|
-
await self._stop()
|
|
218
|
-
break
|
|
242
|
+
running = await self._callback(self, self._retry_count)
|
|
219
243
|
finally:
|
|
220
244
|
self._idle_event.clear()
|
pipecat/runner/daily.py
CHANGED
|
@@ -76,12 +76,14 @@ class DailyRoomConfig(BaseModel):
|
|
|
76
76
|
async def configure(
|
|
77
77
|
aiohttp_session: aiohttp.ClientSession,
|
|
78
78
|
*,
|
|
79
|
+
api_key: Optional[str] = None,
|
|
79
80
|
room_exp_duration: Optional[float] = 2.0,
|
|
80
81
|
token_exp_duration: Optional[float] = 2.0,
|
|
81
82
|
sip_caller_phone: Optional[str] = None,
|
|
82
83
|
sip_enable_video: Optional[bool] = False,
|
|
83
84
|
sip_num_endpoints: Optional[int] = 1,
|
|
84
85
|
sip_codecs: Optional[Dict[str, List[str]]] = None,
|
|
86
|
+
room_properties: Optional[DailyRoomProperties] = None,
|
|
85
87
|
) -> DailyRoomConfig:
|
|
86
88
|
"""Configure Daily room URL and token with optional SIP capabilities.
|
|
87
89
|
|
|
@@ -91,6 +93,7 @@ async def configure(
|
|
|
91
93
|
|
|
92
94
|
Args:
|
|
93
95
|
aiohttp_session: HTTP session for making API requests.
|
|
96
|
+
api_key: Daily API key.
|
|
94
97
|
room_exp_duration: Room expiration time in hours.
|
|
95
98
|
token_exp_duration: Token expiration time in hours.
|
|
96
99
|
sip_caller_phone: Phone number or identifier for SIP display name.
|
|
@@ -99,6 +102,10 @@ async def configure(
|
|
|
99
102
|
sip_num_endpoints: Number of allowed SIP endpoints.
|
|
100
103
|
sip_codecs: Codecs to support for audio and video. If None, uses Daily defaults.
|
|
101
104
|
Example: {"audio": ["OPUS"], "video": ["H264"]}
|
|
105
|
+
room_properties: Optional DailyRoomProperties to use instead of building from
|
|
106
|
+
individual parameters. When provided, this overrides room_exp_duration and
|
|
107
|
+
SIP-related parameters. If not provided, properties are built from the
|
|
108
|
+
individual parameters as before.
|
|
102
109
|
|
|
103
110
|
Returns:
|
|
104
111
|
DailyRoomConfig: Object with room_url, token, and optional sip_endpoint.
|
|
@@ -115,18 +122,48 @@ async def configure(
|
|
|
115
122
|
# SIP-enabled room
|
|
116
123
|
sip_config = await configure(session, sip_caller_phone="+15551234567")
|
|
117
124
|
print(f"SIP endpoint: {sip_config.sip_endpoint}")
|
|
125
|
+
|
|
126
|
+
# Custom room properties with recording enabled
|
|
127
|
+
custom_props = DailyRoomProperties(
|
|
128
|
+
enable_recording="cloud",
|
|
129
|
+
max_participants=2,
|
|
130
|
+
)
|
|
131
|
+
config = await configure(session, room_properties=custom_props)
|
|
118
132
|
"""
|
|
119
133
|
# Check for required API key
|
|
120
|
-
api_key = os.getenv("DAILY_API_KEY")
|
|
134
|
+
api_key = api_key or os.getenv("DAILY_API_KEY")
|
|
121
135
|
if not api_key:
|
|
122
136
|
raise Exception(
|
|
123
137
|
"DAILY_API_KEY environment variable is required. "
|
|
124
138
|
"Get your API key from https://dashboard.daily.co/developers"
|
|
125
139
|
)
|
|
126
140
|
|
|
141
|
+
# Warn if both room_properties and individual parameters are provided
|
|
142
|
+
if room_properties is not None:
|
|
143
|
+
individual_params_provided = any(
|
|
144
|
+
[
|
|
145
|
+
room_exp_duration != 2.0,
|
|
146
|
+
token_exp_duration != 2.0,
|
|
147
|
+
sip_caller_phone is not None,
|
|
148
|
+
sip_enable_video is not False,
|
|
149
|
+
sip_num_endpoints != 1,
|
|
150
|
+
sip_codecs is not None,
|
|
151
|
+
]
|
|
152
|
+
)
|
|
153
|
+
if individual_params_provided:
|
|
154
|
+
logger.warning(
|
|
155
|
+
"Both room_properties and individual parameters (room_exp_duration, token_exp_duration, "
|
|
156
|
+
"sip_*) were provided. The room_properties will be used and individual parameters "
|
|
157
|
+
"will be ignored."
|
|
158
|
+
)
|
|
159
|
+
|
|
127
160
|
# Determine if SIP mode is enabled
|
|
128
161
|
sip_enabled = sip_caller_phone is not None
|
|
129
162
|
|
|
163
|
+
# If room_properties is provided, check if it has SIP configuration
|
|
164
|
+
if room_properties and room_properties.sip:
|
|
165
|
+
sip_enabled = True
|
|
166
|
+
|
|
130
167
|
daily_rest_helper = DailyRESTHelper(
|
|
131
168
|
daily_api_key=api_key,
|
|
132
169
|
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
|
@@ -150,27 +187,29 @@ async def configure(
|
|
|
150
187
|
room_name = f"{room_prefix}-{uuid.uuid4().hex[:8]}"
|
|
151
188
|
logger.info(f"Creating new Daily room: {room_name}")
|
|
152
189
|
|
|
153
|
-
#
|
|
154
|
-
|
|
190
|
+
# Use provided room_properties or build from parameters
|
|
191
|
+
if room_properties is None:
|
|
192
|
+
# Calculate expiration time
|
|
193
|
+
expiration_time = time.time() + (room_exp_duration * 60 * 60)
|
|
155
194
|
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
)
|
|
161
|
-
|
|
162
|
-
# Add SIP configuration if enabled
|
|
163
|
-
if sip_enabled:
|
|
164
|
-
sip_params = DailyRoomSipParams(
|
|
165
|
-
display_name=sip_caller_phone,
|
|
166
|
-
video=sip_enable_video,
|
|
167
|
-
sip_mode="dial-in",
|
|
168
|
-
num_endpoints=sip_num_endpoints,
|
|
169
|
-
codecs=sip_codecs,
|
|
195
|
+
# Create room properties
|
|
196
|
+
room_properties = DailyRoomProperties(
|
|
197
|
+
exp=expiration_time,
|
|
198
|
+
eject_at_room_exp=True,
|
|
170
199
|
)
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
200
|
+
|
|
201
|
+
# Add SIP configuration if enabled
|
|
202
|
+
if sip_enabled:
|
|
203
|
+
sip_params = DailyRoomSipParams(
|
|
204
|
+
display_name=sip_caller_phone,
|
|
205
|
+
video=sip_enable_video,
|
|
206
|
+
sip_mode="dial-in",
|
|
207
|
+
num_endpoints=sip_num_endpoints,
|
|
208
|
+
codecs=sip_codecs,
|
|
209
|
+
)
|
|
210
|
+
room_properties.sip = sip_params
|
|
211
|
+
room_properties.enable_dialout = True # Enable outbound calls if needed
|
|
212
|
+
room_properties.start_video_off = not sip_enable_video # Voice-only by default
|
|
174
213
|
|
|
175
214
|
# Create room parameters
|
|
176
215
|
room_params = DailyRoomParams(name=room_name, properties=room_properties)
|