dv-pipecat-ai 0.0.85.dev5__py3-none-any.whl → 0.0.85.dev698__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.85.dev5.dist-info → dv_pipecat_ai-0.0.85.dev698.dist-info}/METADATA +78 -117
- {dv_pipecat_ai-0.0.85.dev5.dist-info → dv_pipecat_ai-0.0.85.dev698.dist-info}/RECORD +157 -123
- pipecat/adapters/base_llm_adapter.py +38 -1
- pipecat/adapters/services/anthropic_adapter.py +9 -14
- pipecat/adapters/services/aws_nova_sonic_adapter.py +5 -0
- pipecat/adapters/services/bedrock_adapter.py +236 -13
- pipecat/adapters/services/gemini_adapter.py +12 -8
- pipecat/adapters/services/open_ai_adapter.py +19 -7
- pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
- pipecat/audio/filters/krisp_viva_filter.py +193 -0
- pipecat/audio/filters/noisereduce_filter.py +15 -0
- pipecat/audio/turn/base_turn_analyzer.py +9 -1
- pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
- pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
- pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
- pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
- pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
- pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
- pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
- pipecat/audio/vad/data/README.md +10 -0
- pipecat/audio/vad/vad_analyzer.py +13 -1
- pipecat/extensions/voicemail/voicemail_detector.py +5 -5
- pipecat/frames/frames.py +120 -87
- pipecat/observers/loggers/debug_log_observer.py +3 -3
- pipecat/observers/loggers/llm_log_observer.py +7 -3
- pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
- pipecat/pipeline/runner.py +12 -4
- pipecat/pipeline/service_switcher.py +64 -36
- pipecat/pipeline/task.py +85 -24
- pipecat/processors/aggregators/dtmf_aggregator.py +28 -22
- pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
- pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
- pipecat/processors/aggregators/llm_response.py +6 -7
- pipecat/processors/aggregators/llm_response_universal.py +19 -15
- pipecat/processors/aggregators/user_response.py +6 -6
- pipecat/processors/aggregators/vision_image_frame.py +24 -2
- pipecat/processors/audio/audio_buffer_processor.py +43 -8
- pipecat/processors/filters/stt_mute_filter.py +2 -0
- pipecat/processors/frame_processor.py +103 -17
- pipecat/processors/frameworks/langchain.py +8 -2
- pipecat/processors/frameworks/rtvi.py +209 -68
- pipecat/processors/frameworks/strands_agents.py +170 -0
- pipecat/processors/logger.py +2 -2
- pipecat/processors/transcript_processor.py +4 -4
- pipecat/processors/user_idle_processor.py +3 -6
- pipecat/runner/run.py +270 -50
- pipecat/runner/types.py +2 -0
- pipecat/runner/utils.py +51 -10
- pipecat/serializers/exotel.py +5 -5
- pipecat/serializers/livekit.py +20 -0
- pipecat/serializers/plivo.py +6 -9
- pipecat/serializers/protobuf.py +6 -5
- pipecat/serializers/telnyx.py +2 -2
- pipecat/serializers/twilio.py +43 -23
- pipecat/services/ai_service.py +2 -6
- pipecat/services/anthropic/llm.py +2 -25
- pipecat/services/asyncai/tts.py +2 -3
- pipecat/services/aws/__init__.py +1 -0
- pipecat/services/aws/llm.py +122 -97
- pipecat/services/aws/nova_sonic/__init__.py +0 -0
- pipecat/services/aws/nova_sonic/context.py +367 -0
- pipecat/services/aws/nova_sonic/frames.py +25 -0
- pipecat/services/aws/nova_sonic/llm.py +1155 -0
- pipecat/services/aws/stt.py +1 -3
- pipecat/services/aws_nova_sonic/__init__.py +19 -1
- pipecat/services/aws_nova_sonic/aws.py +11 -1151
- pipecat/services/aws_nova_sonic/context.py +13 -355
- pipecat/services/aws_nova_sonic/frames.py +13 -17
- pipecat/services/azure/realtime/__init__.py +0 -0
- pipecat/services/azure/realtime/llm.py +65 -0
- pipecat/services/azure/stt.py +15 -0
- pipecat/services/cartesia/tts.py +2 -2
- pipecat/services/deepgram/__init__.py +1 -0
- pipecat/services/deepgram/flux/__init__.py +0 -0
- pipecat/services/deepgram/flux/stt.py +636 -0
- pipecat/services/elevenlabs/__init__.py +2 -1
- pipecat/services/elevenlabs/stt.py +254 -276
- pipecat/services/elevenlabs/tts.py +5 -5
- pipecat/services/fish/tts.py +2 -2
- pipecat/services/gemini_multimodal_live/events.py +38 -524
- pipecat/services/gemini_multimodal_live/file_api.py +23 -173
- pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
- pipecat/services/gladia/stt.py +56 -72
- pipecat/services/google/__init__.py +1 -0
- pipecat/services/google/gemini_live/__init__.py +3 -0
- pipecat/services/google/gemini_live/file_api.py +189 -0
- pipecat/services/google/gemini_live/llm.py +1582 -0
- pipecat/services/google/gemini_live/llm_vertex.py +184 -0
- pipecat/services/google/llm.py +15 -11
- pipecat/services/google/llm_openai.py +3 -3
- pipecat/services/google/llm_vertex.py +86 -16
- pipecat/services/google/tts.py +7 -3
- pipecat/services/heygen/api.py +2 -0
- pipecat/services/heygen/client.py +8 -4
- pipecat/services/heygen/video.py +2 -0
- pipecat/services/hume/__init__.py +5 -0
- pipecat/services/hume/tts.py +220 -0
- pipecat/services/inworld/tts.py +6 -6
- pipecat/services/llm_service.py +15 -5
- pipecat/services/lmnt/tts.py +2 -2
- pipecat/services/mcp_service.py +4 -2
- pipecat/services/mem0/memory.py +6 -5
- pipecat/services/mistral/llm.py +29 -8
- pipecat/services/moondream/vision.py +42 -16
- pipecat/services/neuphonic/tts.py +2 -2
- pipecat/services/openai/__init__.py +1 -0
- pipecat/services/openai/base_llm.py +27 -20
- pipecat/services/openai/realtime/__init__.py +0 -0
- pipecat/services/openai/realtime/context.py +272 -0
- pipecat/services/openai/realtime/events.py +1106 -0
- pipecat/services/openai/realtime/frames.py +37 -0
- pipecat/services/openai/realtime/llm.py +829 -0
- pipecat/services/openai/tts.py +16 -8
- pipecat/services/openai_realtime/__init__.py +27 -0
- pipecat/services/openai_realtime/azure.py +21 -0
- pipecat/services/openai_realtime/context.py +21 -0
- pipecat/services/openai_realtime/events.py +21 -0
- pipecat/services/openai_realtime/frames.py +21 -0
- pipecat/services/openai_realtime_beta/azure.py +16 -0
- pipecat/services/openai_realtime_beta/openai.py +17 -5
- pipecat/services/playht/tts.py +31 -4
- pipecat/services/rime/tts.py +3 -4
- pipecat/services/sarvam/tts.py +2 -6
- pipecat/services/simli/video.py +2 -2
- pipecat/services/speechmatics/stt.py +1 -7
- pipecat/services/stt_service.py +34 -0
- pipecat/services/tavus/video.py +2 -2
- pipecat/services/tts_service.py +9 -9
- pipecat/services/vision_service.py +7 -6
- pipecat/services/vistaar/llm.py +4 -0
- pipecat/tests/utils.py +4 -4
- pipecat/transcriptions/language.py +41 -1
- pipecat/transports/base_input.py +17 -42
- pipecat/transports/base_output.py +42 -26
- pipecat/transports/daily/transport.py +199 -26
- pipecat/transports/heygen/__init__.py +0 -0
- pipecat/transports/heygen/transport.py +381 -0
- pipecat/transports/livekit/transport.py +228 -63
- pipecat/transports/local/audio.py +6 -1
- pipecat/transports/local/tk.py +11 -2
- pipecat/transports/network/fastapi_websocket.py +1 -1
- pipecat/transports/smallwebrtc/connection.py +98 -19
- pipecat/transports/smallwebrtc/request_handler.py +204 -0
- pipecat/transports/smallwebrtc/transport.py +65 -23
- pipecat/transports/tavus/transport.py +23 -12
- pipecat/transports/websocket/client.py +41 -5
- pipecat/transports/websocket/fastapi.py +21 -11
- pipecat/transports/websocket/server.py +14 -7
- pipecat/transports/whatsapp/api.py +8 -0
- pipecat/transports/whatsapp/client.py +47 -0
- pipecat/utils/base_object.py +54 -22
- pipecat/utils/string.py +12 -1
- pipecat/utils/tracing/service_decorators.py +21 -21
- {dv_pipecat_ai-0.0.85.dev5.dist-info → dv_pipecat_ai-0.0.85.dev698.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.85.dev5.dist-info → dv_pipecat_ai-0.0.85.dev698.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.85.dev5.dist-info → dv_pipecat_ai-0.0.85.dev698.dist-info}/top_level.txt +0 -0
- /pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"""Strands Agent integration for Pipecat.
|
|
2
|
+
|
|
3
|
+
This module provides integration with Strands Agents for handling conversational AI
|
|
4
|
+
interactions. It supports both single agent and multi-agent graphs.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
from loguru import logger
|
|
10
|
+
|
|
11
|
+
from pipecat.frames.frames import (
|
|
12
|
+
Frame,
|
|
13
|
+
LLMContextFrame,
|
|
14
|
+
LLMFullResponseEndFrame,
|
|
15
|
+
LLMFullResponseStartFrame,
|
|
16
|
+
LLMTextFrame,
|
|
17
|
+
)
|
|
18
|
+
from pipecat.metrics.metrics import LLMTokenUsage
|
|
19
|
+
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContextFrame
|
|
20
|
+
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
|
21
|
+
|
|
22
|
+
try:
|
|
23
|
+
from strands import Agent
|
|
24
|
+
from strands.multiagent.graph import Graph
|
|
25
|
+
except ModuleNotFoundError as e:
|
|
26
|
+
logger.exception("In order to use Strands Agents, you need to `pip install strands-agents`.")
|
|
27
|
+
raise Exception(f"Missing module: {e}")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class StrandsAgentsProcessor(FrameProcessor):
|
|
31
|
+
"""Processor that integrates Strands Agents with Pipecat's frame pipeline.
|
|
32
|
+
|
|
33
|
+
This processor takes LLM message frames, extracts the latest user message,
|
|
34
|
+
and processes it through either a single Strands Agent or a multi-agent Graph.
|
|
35
|
+
The response is streamed back as text frames with appropriate response markers.
|
|
36
|
+
|
|
37
|
+
Supports both single agent streaming and graph-based multi-agent workflows.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __init__(
|
|
41
|
+
self,
|
|
42
|
+
agent: Optional[Agent] = None,
|
|
43
|
+
graph: Optional[Graph] = None,
|
|
44
|
+
graph_exit_node: Optional[str] = None,
|
|
45
|
+
):
|
|
46
|
+
"""Initialize the Strands Agents processor.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
agent: The Strands Agent to use for single-agent processing.
|
|
50
|
+
graph: The Strands multi-agent Graph to use for graph-based processing.
|
|
51
|
+
graph_exit_node: The exit node name when using graph-based processing.
|
|
52
|
+
|
|
53
|
+
Raises:
|
|
54
|
+
AssertionError: If neither agent nor graph is provided, or if graph is
|
|
55
|
+
provided without a graph_exit_node.
|
|
56
|
+
"""
|
|
57
|
+
super().__init__()
|
|
58
|
+
self.agent = agent
|
|
59
|
+
self.graph = graph
|
|
60
|
+
self.graph_exit_node = graph_exit_node
|
|
61
|
+
|
|
62
|
+
assert self.agent or self.graph, "Either agent or graph must be provided"
|
|
63
|
+
|
|
64
|
+
if self.graph:
|
|
65
|
+
assert self.graph_exit_node, "graph_exit_node must be provided if graph is provided"
|
|
66
|
+
|
|
67
|
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
68
|
+
"""Process incoming frames and handle LLM message frames.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
frame: The incoming frame to process.
|
|
72
|
+
direction: The direction of frame flow in the pipeline.
|
|
73
|
+
"""
|
|
74
|
+
await super().process_frame(frame, direction)
|
|
75
|
+
if isinstance(frame, (LLMContextFrame, OpenAILLMContextFrame)):
|
|
76
|
+
messages = frame.context.get_messages()
|
|
77
|
+
if messages:
|
|
78
|
+
last_message = messages[-1]
|
|
79
|
+
await self._ainvoke(str(last_message["content"]).strip())
|
|
80
|
+
else:
|
|
81
|
+
await self.push_frame(frame, direction)
|
|
82
|
+
|
|
83
|
+
async def _ainvoke(self, text: str):
|
|
84
|
+
"""Invoke the Strands agent with the provided text and stream results as Pipecat frames.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
text: The user input text to process through the agent or graph.
|
|
88
|
+
"""
|
|
89
|
+
logger.debug(f"Invoking Strands agent with: {text}")
|
|
90
|
+
ttfb_tracking = True
|
|
91
|
+
try:
|
|
92
|
+
await self.push_frame(LLMFullResponseStartFrame())
|
|
93
|
+
await self.start_processing_metrics()
|
|
94
|
+
await self.start_ttfb_metrics()
|
|
95
|
+
|
|
96
|
+
if self.graph:
|
|
97
|
+
# Graph does not stream; await full result then emit assistant text
|
|
98
|
+
graph_result = await self.graph.invoke_async(text)
|
|
99
|
+
if ttfb_tracking:
|
|
100
|
+
await self.stop_ttfb_metrics()
|
|
101
|
+
ttfb_tracking = False
|
|
102
|
+
try:
|
|
103
|
+
node_result = graph_result.results[self.graph_exit_node]
|
|
104
|
+
logger.debug(f"Node result: {node_result}")
|
|
105
|
+
for agent_result in node_result.get_agent_results():
|
|
106
|
+
# Push to TTS service
|
|
107
|
+
message = getattr(agent_result, "message", None)
|
|
108
|
+
if isinstance(message, dict) and "content" in message:
|
|
109
|
+
for block in message["content"]:
|
|
110
|
+
if isinstance(block, dict) and "text" in block:
|
|
111
|
+
await self.push_frame(LLMTextFrame(str(block["text"])))
|
|
112
|
+
# Update usage metrics
|
|
113
|
+
await self._report_usage_metrics(
|
|
114
|
+
agent_result.metrics.accumulated_usage.get("inputTokens", 0),
|
|
115
|
+
agent_result.metrics.accumulated_usage.get("outputTokens", 0),
|
|
116
|
+
agent_result.metrics.accumulated_usage.get("totalTokens", 0),
|
|
117
|
+
)
|
|
118
|
+
except Exception as parse_err:
|
|
119
|
+
logger.warning(f"Failed to extract messages from GraphResult: {parse_err}")
|
|
120
|
+
else:
|
|
121
|
+
# Agent supports streaming events via async iterator
|
|
122
|
+
async for event in self.agent.stream_async(text):
|
|
123
|
+
# Push to TTS service
|
|
124
|
+
if isinstance(event, dict) and "data" in event:
|
|
125
|
+
await self.push_frame(LLMTextFrame(str(event["data"])))
|
|
126
|
+
if ttfb_tracking:
|
|
127
|
+
await self.stop_ttfb_metrics()
|
|
128
|
+
ttfb_tracking = False
|
|
129
|
+
|
|
130
|
+
# Update usage metrics
|
|
131
|
+
if (
|
|
132
|
+
isinstance(event, dict)
|
|
133
|
+
and "event" in event
|
|
134
|
+
and "metadata" in event["event"]
|
|
135
|
+
):
|
|
136
|
+
if "usage" in event["event"]["metadata"]:
|
|
137
|
+
usage = event["event"]["metadata"]["usage"]
|
|
138
|
+
await self._report_usage_metrics(
|
|
139
|
+
usage.get("inputTokens", 0),
|
|
140
|
+
usage.get("outputTokens", 0),
|
|
141
|
+
usage.get("totalTokens", 0),
|
|
142
|
+
)
|
|
143
|
+
except GeneratorExit:
|
|
144
|
+
logger.warning(f"{self} generator was closed prematurely")
|
|
145
|
+
except Exception as e:
|
|
146
|
+
logger.exception(f"{self} an unknown error occurred: {e}")
|
|
147
|
+
finally:
|
|
148
|
+
if ttfb_tracking:
|
|
149
|
+
await self.stop_ttfb_metrics()
|
|
150
|
+
ttfb_tracking = False
|
|
151
|
+
await self.stop_processing_metrics()
|
|
152
|
+
await self.push_frame(LLMFullResponseEndFrame())
|
|
153
|
+
|
|
154
|
+
def can_generate_metrics(self) -> bool:
|
|
155
|
+
"""Check if this service can generate performance metrics.
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
True as this service supports metrics generation.
|
|
159
|
+
"""
|
|
160
|
+
return True
|
|
161
|
+
|
|
162
|
+
async def _report_usage_metrics(
|
|
163
|
+
self, prompt_tokens: int, completion_tokens: int, total_tokens: int
|
|
164
|
+
):
|
|
165
|
+
tokens = LLMTokenUsage(
|
|
166
|
+
prompt_tokens=prompt_tokens,
|
|
167
|
+
completion_tokens=completion_tokens,
|
|
168
|
+
total_tokens=total_tokens,
|
|
169
|
+
)
|
|
170
|
+
await self.start_llm_usage_metrics(tokens)
|
pipecat/processors/logger.py
CHANGED
|
@@ -15,7 +15,7 @@ from pipecat.frames.frames import (
|
|
|
15
15
|
Frame,
|
|
16
16
|
InputAudioRawFrame,
|
|
17
17
|
OutputAudioRawFrame,
|
|
18
|
-
|
|
18
|
+
UserSpeakingFrame,
|
|
19
19
|
)
|
|
20
20
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
|
21
21
|
|
|
@@ -36,9 +36,9 @@ class FrameLogger(FrameProcessor):
|
|
|
36
36
|
color: Optional[str] = None,
|
|
37
37
|
ignored_frame_types: Tuple[Type[Frame], ...] = (
|
|
38
38
|
BotSpeakingFrame,
|
|
39
|
+
UserSpeakingFrame,
|
|
39
40
|
InputAudioRawFrame,
|
|
40
41
|
OutputAudioRawFrame,
|
|
41
|
-
TransportMessageFrame,
|
|
42
42
|
),
|
|
43
43
|
):
|
|
44
44
|
"""Initialize the frame logger.
|
|
@@ -19,7 +19,7 @@ from pipecat.frames.frames import (
|
|
|
19
19
|
CancelFrame,
|
|
20
20
|
EndFrame,
|
|
21
21
|
Frame,
|
|
22
|
-
|
|
22
|
+
InterruptionFrame,
|
|
23
23
|
TranscriptionFrame,
|
|
24
24
|
TranscriptionMessage,
|
|
25
25
|
TranscriptionUpdateFrame,
|
|
@@ -86,7 +86,7 @@ class AssistantTranscriptProcessor(BaseTranscriptProcessor):
|
|
|
86
86
|
transcript messages. Utterances are completed when:
|
|
87
87
|
|
|
88
88
|
- The bot stops speaking (BotStoppedSpeakingFrame)
|
|
89
|
-
- The bot is interrupted (
|
|
89
|
+
- The bot is interrupted (InterruptionFrame)
|
|
90
90
|
- The pipeline ends (EndFrame)
|
|
91
91
|
"""
|
|
92
92
|
|
|
@@ -185,7 +185,7 @@ class AssistantTranscriptProcessor(BaseTranscriptProcessor):
|
|
|
185
185
|
|
|
186
186
|
- TTSTextFrame: Aggregates text for current utterance
|
|
187
187
|
- BotStoppedSpeakingFrame: Completes current utterance
|
|
188
|
-
-
|
|
188
|
+
- InterruptionFrame: Completes current utterance due to interruption
|
|
189
189
|
- EndFrame: Completes current utterance at pipeline end
|
|
190
190
|
- CancelFrame: Completes current utterance due to cancellation
|
|
191
191
|
|
|
@@ -195,7 +195,7 @@ class AssistantTranscriptProcessor(BaseTranscriptProcessor):
|
|
|
195
195
|
"""
|
|
196
196
|
await super().process_frame(frame, direction)
|
|
197
197
|
|
|
198
|
-
if isinstance(frame, (
|
|
198
|
+
if isinstance(frame, (InterruptionFrame, CancelFrame)):
|
|
199
199
|
# Push frame first otherwise our emitted transcription update frame
|
|
200
200
|
# might get cleaned up.
|
|
201
201
|
await self.push_frame(frame, direction)
|
|
@@ -18,7 +18,6 @@ from pipecat.frames.frames import (
|
|
|
18
18
|
EndDTMFCaptureFrame,
|
|
19
19
|
EndFrame,
|
|
20
20
|
Frame,
|
|
21
|
-
FunctionCallCancelFrame,
|
|
22
21
|
FunctionCallInProgressFrame,
|
|
23
22
|
FunctionCallResultFrame,
|
|
24
23
|
InputDTMFFrame,
|
|
@@ -233,15 +232,13 @@ class UserIdleProcessor(FrameProcessor):
|
|
|
233
232
|
|
|
234
233
|
Runs in a loop until cancelled or callback indicates completion.
|
|
235
234
|
"""
|
|
236
|
-
|
|
235
|
+
running = True
|
|
236
|
+
while running:
|
|
237
237
|
try:
|
|
238
238
|
await asyncio.wait_for(self._idle_event.wait(), timeout=self._timeout)
|
|
239
239
|
except asyncio.TimeoutError:
|
|
240
240
|
if not self._interrupted and not self._llm_in_progress:
|
|
241
241
|
self._retry_count += 1
|
|
242
|
-
|
|
243
|
-
if not should_continue:
|
|
244
|
-
await self._stop()
|
|
245
|
-
break
|
|
242
|
+
running = await self._callback(self, self._retry_count)
|
|
246
243
|
finally:
|
|
247
244
|
self._idle_event.clear()
|