dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
- dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
- pipecat/__init__.py +17 -0
- pipecat/adapters/base_llm_adapter.py +36 -1
- pipecat/adapters/schemas/direct_function.py +296 -0
- pipecat/adapters/schemas/function_schema.py +15 -6
- pipecat/adapters/schemas/tools_schema.py +55 -7
- pipecat/adapters/services/anthropic_adapter.py +22 -3
- pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
- pipecat/adapters/services/bedrock_adapter.py +22 -3
- pipecat/adapters/services/gemini_adapter.py +16 -3
- pipecat/adapters/services/open_ai_adapter.py +17 -2
- pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
- pipecat/audio/filters/base_audio_filter.py +30 -6
- pipecat/audio/filters/koala_filter.py +37 -2
- pipecat/audio/filters/krisp_filter.py +59 -6
- pipecat/audio/filters/noisereduce_filter.py +37 -0
- pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
- pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
- pipecat/audio/mixers/base_audio_mixer.py +30 -7
- pipecat/audio/mixers/soundfile_mixer.py +53 -6
- pipecat/audio/resamplers/base_audio_resampler.py +17 -9
- pipecat/audio/resamplers/resampy_resampler.py +26 -1
- pipecat/audio/resamplers/soxr_resampler.py +32 -1
- pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
- pipecat/audio/utils.py +194 -1
- pipecat/audio/vad/silero.py +60 -3
- pipecat/audio/vad/vad_analyzer.py +114 -30
- pipecat/clocks/base_clock.py +19 -0
- pipecat/clocks/system_clock.py +25 -0
- pipecat/extensions/voicemail/__init__.py +0 -0
- pipecat/extensions/voicemail/voicemail_detector.py +707 -0
- pipecat/frames/frames.py +590 -156
- pipecat/metrics/metrics.py +64 -1
- pipecat/observers/base_observer.py +58 -19
- pipecat/observers/loggers/debug_log_observer.py +56 -64
- pipecat/observers/loggers/llm_log_observer.py +8 -1
- pipecat/observers/loggers/transcription_log_observer.py +19 -7
- pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
- pipecat/observers/turn_tracking_observer.py +26 -1
- pipecat/pipeline/base_pipeline.py +5 -7
- pipecat/pipeline/base_task.py +52 -9
- pipecat/pipeline/parallel_pipeline.py +121 -177
- pipecat/pipeline/pipeline.py +129 -20
- pipecat/pipeline/runner.py +50 -1
- pipecat/pipeline/sync_parallel_pipeline.py +132 -32
- pipecat/pipeline/task.py +263 -280
- pipecat/pipeline/task_observer.py +85 -34
- pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
- pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
- pipecat/processors/aggregators/gated.py +25 -24
- pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
- pipecat/processors/aggregators/llm_response.py +398 -89
- pipecat/processors/aggregators/openai_llm_context.py +161 -13
- pipecat/processors/aggregators/sentence.py +25 -14
- pipecat/processors/aggregators/user_response.py +28 -3
- pipecat/processors/aggregators/vision_image_frame.py +24 -14
- pipecat/processors/async_generator.py +28 -0
- pipecat/processors/audio/audio_buffer_processor.py +78 -37
- pipecat/processors/consumer_processor.py +25 -6
- pipecat/processors/filters/frame_filter.py +23 -0
- pipecat/processors/filters/function_filter.py +30 -0
- pipecat/processors/filters/identity_filter.py +17 -2
- pipecat/processors/filters/null_filter.py +24 -1
- pipecat/processors/filters/stt_mute_filter.py +56 -21
- pipecat/processors/filters/wake_check_filter.py +46 -3
- pipecat/processors/filters/wake_notifier_filter.py +21 -3
- pipecat/processors/frame_processor.py +488 -131
- pipecat/processors/frameworks/langchain.py +38 -3
- pipecat/processors/frameworks/rtvi.py +719 -34
- pipecat/processors/gstreamer/pipeline_source.py +41 -0
- pipecat/processors/idle_frame_processor.py +26 -3
- pipecat/processors/logger.py +23 -0
- pipecat/processors/metrics/frame_processor_metrics.py +77 -4
- pipecat/processors/metrics/sentry.py +42 -4
- pipecat/processors/producer_processor.py +34 -14
- pipecat/processors/text_transformer.py +22 -10
- pipecat/processors/transcript_processor.py +48 -29
- pipecat/processors/user_idle_processor.py +31 -21
- pipecat/runner/__init__.py +1 -0
- pipecat/runner/daily.py +132 -0
- pipecat/runner/livekit.py +148 -0
- pipecat/runner/run.py +543 -0
- pipecat/runner/types.py +67 -0
- pipecat/runner/utils.py +515 -0
- pipecat/serializers/base_serializer.py +42 -0
- pipecat/serializers/exotel.py +17 -6
- pipecat/serializers/genesys.py +95 -0
- pipecat/serializers/livekit.py +33 -0
- pipecat/serializers/plivo.py +16 -15
- pipecat/serializers/protobuf.py +37 -1
- pipecat/serializers/telnyx.py +18 -17
- pipecat/serializers/twilio.py +32 -16
- pipecat/services/ai_service.py +5 -3
- pipecat/services/anthropic/llm.py +113 -43
- pipecat/services/assemblyai/models.py +63 -5
- pipecat/services/assemblyai/stt.py +64 -11
- pipecat/services/asyncai/__init__.py +0 -0
- pipecat/services/asyncai/tts.py +501 -0
- pipecat/services/aws/llm.py +185 -111
- pipecat/services/aws/stt.py +217 -23
- pipecat/services/aws/tts.py +118 -52
- pipecat/services/aws/utils.py +101 -5
- pipecat/services/aws_nova_sonic/aws.py +82 -64
- pipecat/services/aws_nova_sonic/context.py +15 -6
- pipecat/services/azure/common.py +10 -2
- pipecat/services/azure/image.py +32 -0
- pipecat/services/azure/llm.py +9 -7
- pipecat/services/azure/stt.py +65 -2
- pipecat/services/azure/tts.py +154 -23
- pipecat/services/cartesia/stt.py +125 -8
- pipecat/services/cartesia/tts.py +102 -38
- pipecat/services/cerebras/llm.py +15 -23
- pipecat/services/deepgram/stt.py +19 -11
- pipecat/services/deepgram/tts.py +36 -0
- pipecat/services/deepseek/llm.py +14 -23
- pipecat/services/elevenlabs/tts.py +330 -64
- pipecat/services/fal/image.py +43 -0
- pipecat/services/fal/stt.py +48 -10
- pipecat/services/fireworks/llm.py +14 -21
- pipecat/services/fish/tts.py +109 -9
- pipecat/services/gemini_multimodal_live/__init__.py +1 -0
- pipecat/services/gemini_multimodal_live/events.py +83 -2
- pipecat/services/gemini_multimodal_live/file_api.py +189 -0
- pipecat/services/gemini_multimodal_live/gemini.py +218 -21
- pipecat/services/gladia/config.py +17 -10
- pipecat/services/gladia/stt.py +82 -36
- pipecat/services/google/frames.py +40 -0
- pipecat/services/google/google.py +2 -0
- pipecat/services/google/image.py +39 -2
- pipecat/services/google/llm.py +176 -58
- pipecat/services/google/llm_openai.py +26 -4
- pipecat/services/google/llm_vertex.py +37 -15
- pipecat/services/google/rtvi.py +41 -0
- pipecat/services/google/stt.py +65 -17
- pipecat/services/google/test-google-chirp.py +45 -0
- pipecat/services/google/tts.py +390 -19
- pipecat/services/grok/llm.py +8 -6
- pipecat/services/groq/llm.py +8 -6
- pipecat/services/groq/stt.py +13 -9
- pipecat/services/groq/tts.py +40 -0
- pipecat/services/hamsa/__init__.py +9 -0
- pipecat/services/hamsa/stt.py +241 -0
- pipecat/services/heygen/__init__.py +5 -0
- pipecat/services/heygen/api.py +281 -0
- pipecat/services/heygen/client.py +620 -0
- pipecat/services/heygen/video.py +338 -0
- pipecat/services/image_service.py +5 -3
- pipecat/services/inworld/__init__.py +1 -0
- pipecat/services/inworld/tts.py +592 -0
- pipecat/services/llm_service.py +127 -45
- pipecat/services/lmnt/tts.py +80 -7
- pipecat/services/mcp_service.py +85 -44
- pipecat/services/mem0/memory.py +42 -13
- pipecat/services/minimax/tts.py +74 -15
- pipecat/services/mistral/__init__.py +0 -0
- pipecat/services/mistral/llm.py +185 -0
- pipecat/services/moondream/vision.py +55 -10
- pipecat/services/neuphonic/tts.py +275 -48
- pipecat/services/nim/llm.py +8 -6
- pipecat/services/ollama/llm.py +27 -7
- pipecat/services/openai/base_llm.py +54 -16
- pipecat/services/openai/image.py +30 -0
- pipecat/services/openai/llm.py +7 -5
- pipecat/services/openai/stt.py +13 -9
- pipecat/services/openai/tts.py +42 -10
- pipecat/services/openai_realtime_beta/azure.py +11 -9
- pipecat/services/openai_realtime_beta/context.py +7 -5
- pipecat/services/openai_realtime_beta/events.py +10 -7
- pipecat/services/openai_realtime_beta/openai.py +37 -18
- pipecat/services/openpipe/llm.py +30 -24
- pipecat/services/openrouter/llm.py +9 -7
- pipecat/services/perplexity/llm.py +15 -19
- pipecat/services/piper/tts.py +26 -12
- pipecat/services/playht/tts.py +227 -65
- pipecat/services/qwen/llm.py +8 -6
- pipecat/services/rime/tts.py +128 -17
- pipecat/services/riva/stt.py +160 -22
- pipecat/services/riva/tts.py +67 -2
- pipecat/services/sambanova/llm.py +19 -17
- pipecat/services/sambanova/stt.py +14 -8
- pipecat/services/sarvam/tts.py +60 -13
- pipecat/services/simli/video.py +82 -21
- pipecat/services/soniox/__init__.py +0 -0
- pipecat/services/soniox/stt.py +398 -0
- pipecat/services/speechmatics/stt.py +29 -17
- pipecat/services/stt_service.py +47 -11
- pipecat/services/tavus/video.py +94 -25
- pipecat/services/together/llm.py +8 -6
- pipecat/services/tts_service.py +77 -53
- pipecat/services/ultravox/stt.py +46 -43
- pipecat/services/vision_service.py +5 -3
- pipecat/services/websocket_service.py +12 -11
- pipecat/services/whisper/base_stt.py +58 -12
- pipecat/services/whisper/stt.py +69 -58
- pipecat/services/xtts/tts.py +59 -2
- pipecat/sync/base_notifier.py +19 -0
- pipecat/sync/event_notifier.py +24 -0
- pipecat/tests/utils.py +73 -5
- pipecat/transcriptions/language.py +24 -0
- pipecat/transports/base_input.py +112 -8
- pipecat/transports/base_output.py +235 -13
- pipecat/transports/base_transport.py +119 -0
- pipecat/transports/local/audio.py +76 -0
- pipecat/transports/local/tk.py +84 -0
- pipecat/transports/network/fastapi_websocket.py +174 -15
- pipecat/transports/network/small_webrtc.py +383 -39
- pipecat/transports/network/webrtc_connection.py +214 -8
- pipecat/transports/network/websocket_client.py +171 -1
- pipecat/transports/network/websocket_server.py +147 -9
- pipecat/transports/services/daily.py +792 -70
- pipecat/transports/services/helpers/daily_rest.py +122 -129
- pipecat/transports/services/livekit.py +339 -4
- pipecat/transports/services/tavus.py +273 -38
- pipecat/utils/asyncio/task_manager.py +92 -186
- pipecat/utils/base_object.py +83 -1
- pipecat/utils/network.py +2 -0
- pipecat/utils/string.py +114 -58
- pipecat/utils/text/base_text_aggregator.py +44 -13
- pipecat/utils/text/base_text_filter.py +46 -0
- pipecat/utils/text/markdown_text_filter.py +70 -14
- pipecat/utils/text/pattern_pair_aggregator.py +18 -14
- pipecat/utils/text/simple_text_aggregator.py +43 -2
- pipecat/utils/text/skip_tags_aggregator.py +21 -13
- pipecat/utils/time.py +36 -0
- pipecat/utils/tracing/class_decorators.py +32 -7
- pipecat/utils/tracing/conversation_context_provider.py +12 -2
- pipecat/utils/tracing/service_attributes.py +80 -64
- pipecat/utils/tracing/service_decorators.py +48 -21
- pipecat/utils/tracing/setup.py +13 -7
- pipecat/utils/tracing/turn_context_provider.py +12 -2
- pipecat/utils/tracing/turn_trace_observer.py +27 -0
- pipecat/utils/utils.py +14 -14
- dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
- pipecat/examples/daily_runner.py +0 -64
- pipecat/examples/run.py +0 -265
- pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
- pipecat/utils/asyncio/watchdog_event.py +0 -42
- pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
- pipecat/utils/asyncio/watchdog_queue.py +0 -48
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
- /pipecat/{examples → extensions}/__init__.py +0 -0
|
@@ -4,8 +4,16 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""LLM response aggregators for handling conversation context and message aggregation.
|
|
8
|
+
|
|
9
|
+
This module provides aggregators that process and accumulate LLM responses, user inputs,
|
|
10
|
+
and conversation context. These aggregators handle the flow between speech-to-text,
|
|
11
|
+
LLM processing, and text-to-speech components in conversational AI pipelines.
|
|
12
|
+
"""
|
|
13
|
+
|
|
7
14
|
import asyncio
|
|
8
15
|
import time
|
|
16
|
+
import warnings
|
|
9
17
|
from abc import abstractmethod
|
|
10
18
|
from dataclasses import dataclass
|
|
11
19
|
from typing import Dict, List, Literal, Optional, Set
|
|
@@ -13,6 +21,8 @@ from typing import Dict, List, Literal, Optional, Set
|
|
|
13
21
|
from loguru import logger
|
|
14
22
|
|
|
15
23
|
from pipecat.audio.interruptions.base_interruption_strategy import BaseInterruptionStrategy
|
|
24
|
+
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
25
|
+
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
16
26
|
from pipecat.frames.frames import (
|
|
17
27
|
BotInterruptionFrame,
|
|
18
28
|
BotStartedSpeakingFrame,
|
|
@@ -37,6 +47,7 @@ from pipecat.frames.frames import (
|
|
|
37
47
|
LLMSetToolsFrame,
|
|
38
48
|
LLMTextFrame,
|
|
39
49
|
OpenAILLMContextAssistantTimestampFrame,
|
|
50
|
+
SpeechControlParamsFrame,
|
|
40
51
|
StartFrame,
|
|
41
52
|
StartInterruptionFrame,
|
|
42
53
|
TextFrame,
|
|
@@ -55,30 +66,63 @@ from pipecat.utils.time import time_now_iso8601
|
|
|
55
66
|
|
|
56
67
|
@dataclass
|
|
57
68
|
class LLMUserAggregatorParams:
|
|
69
|
+
"""Parameters for configuring LLM user aggregation behavior.
|
|
70
|
+
|
|
71
|
+
Parameters:
|
|
72
|
+
aggregation_timeout: Maximum time in seconds to wait for additional
|
|
73
|
+
transcription content before pushing aggregated result. This
|
|
74
|
+
timeout is used only when the transcription is slow to arrive.
|
|
75
|
+
turn_emulated_vad_timeout: Maximum time in seconds to wait for emulated
|
|
76
|
+
VAD when using turn-based analysis. Applied when transcription is
|
|
77
|
+
received but VAD didn't detect speech (e.g., whispered utterances).
|
|
78
|
+
enable_emulated_vad_interruptions: When True, allows emulated VAD events
|
|
79
|
+
to interrupt the bot when it's speaking. When False, emulated speech
|
|
80
|
+
is ignored while the bot is speaking.
|
|
81
|
+
"""
|
|
82
|
+
|
|
58
83
|
aggregation_timeout: float = 0.5
|
|
84
|
+
turn_emulated_vad_timeout: float = 0.8
|
|
85
|
+
enable_emulated_vad_interruptions: bool = False
|
|
59
86
|
|
|
60
87
|
|
|
61
88
|
@dataclass
|
|
62
89
|
class LLMAssistantAggregatorParams:
|
|
90
|
+
"""Parameters for configuring LLM assistant aggregation behavior.
|
|
91
|
+
|
|
92
|
+
Parameters:
|
|
93
|
+
expect_stripped_words: Whether to expect and handle stripped words
|
|
94
|
+
in text frames by adding spaces between tokens.
|
|
95
|
+
"""
|
|
96
|
+
|
|
63
97
|
expect_stripped_words: bool = True
|
|
64
98
|
|
|
65
99
|
|
|
66
100
|
class LLMFullResponseAggregator(FrameProcessor):
|
|
67
|
-
"""
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
101
|
+
"""Aggregates complete LLM responses between start and end frames.
|
|
102
|
+
|
|
103
|
+
This aggregator collects LLM text frames (tokens) received between
|
|
104
|
+
`LLMFullResponseStartFrame` and `LLMFullResponseEndFrame` and provides
|
|
105
|
+
the complete response via an event handler.
|
|
106
|
+
|
|
107
|
+
The aggregator provides an "on_completion" event that fires when a full
|
|
108
|
+
completion is available::
|
|
109
|
+
|
|
110
|
+
@aggregator.event_handler("on_completion")
|
|
111
|
+
async def on_completion(
|
|
112
|
+
aggregator: LLMFullResponseAggregator,
|
|
113
|
+
completion: str,
|
|
114
|
+
completed: bool,
|
|
115
|
+
):
|
|
116
|
+
# Handle the completion
|
|
117
|
+
pass
|
|
79
118
|
"""
|
|
80
119
|
|
|
81
120
|
def __init__(self, **kwargs):
|
|
121
|
+
"""Initialize the LLM full response aggregator.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
**kwargs: Additional arguments passed to parent FrameProcessor.
|
|
125
|
+
"""
|
|
82
126
|
super().__init__(**kwargs)
|
|
83
127
|
|
|
84
128
|
self._aggregation = ""
|
|
@@ -87,6 +131,12 @@ class LLMFullResponseAggregator(FrameProcessor):
|
|
|
87
131
|
self._register_event_handler("on_completion")
|
|
88
132
|
|
|
89
133
|
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
134
|
+
"""Process incoming frames and aggregate LLM text content.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
frame: The frame to process.
|
|
138
|
+
direction: The direction of frame flow in the pipeline.
|
|
139
|
+
"""
|
|
90
140
|
await super().process_frame(frame, direction)
|
|
91
141
|
|
|
92
142
|
if isinstance(frame, StartInterruptionFrame):
|
|
@@ -117,83 +167,123 @@ class LLMFullResponseAggregator(FrameProcessor):
|
|
|
117
167
|
|
|
118
168
|
|
|
119
169
|
class BaseLLMResponseAggregator(FrameProcessor):
|
|
120
|
-
"""
|
|
121
|
-
aggregators process incoming frames and aggregate content until they are
|
|
122
|
-
ready to push the aggregation. In the case of a user, an aggregation might
|
|
123
|
-
be a full transcription received from the STT service.
|
|
170
|
+
"""Base class for all LLM response aggregators.
|
|
124
171
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
172
|
+
These aggregators process incoming frames and aggregate content until they are
|
|
173
|
+
ready to push the aggregation downstream. They maintain conversation state
|
|
174
|
+
and handle message flow between different components in the pipeline.
|
|
128
175
|
|
|
176
|
+
The aggregators keep a store (e.g. message list or LLM context) of the current
|
|
177
|
+
conversation, storing messages from both users and the bot.
|
|
129
178
|
"""
|
|
130
179
|
|
|
131
180
|
def __init__(self, **kwargs):
|
|
181
|
+
"""Initialize the base LLM response aggregator.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
**kwargs: Additional arguments passed to parent FrameProcessor.
|
|
185
|
+
"""
|
|
132
186
|
super().__init__(**kwargs)
|
|
133
187
|
|
|
134
188
|
@property
|
|
135
189
|
@abstractmethod
|
|
136
190
|
def messages(self) -> List[dict]:
|
|
137
|
-
"""
|
|
191
|
+
"""Get the messages from the current conversation.
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
List of message dictionaries representing the conversation history.
|
|
195
|
+
"""
|
|
138
196
|
pass
|
|
139
197
|
|
|
140
198
|
@property
|
|
141
199
|
@abstractmethod
|
|
142
200
|
def role(self) -> str:
|
|
143
|
-
"""
|
|
201
|
+
"""Get the role for this aggregator.
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
The role string (e.g. "user", "assistant") for this aggregator.
|
|
205
|
+
"""
|
|
144
206
|
pass
|
|
145
207
|
|
|
146
208
|
@abstractmethod
|
|
147
209
|
def add_messages(self, messages):
|
|
148
|
-
"""Add the given messages to the conversation.
|
|
210
|
+
"""Add the given messages to the conversation.
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
messages: Messages to append to the conversation history.
|
|
214
|
+
"""
|
|
149
215
|
pass
|
|
150
216
|
|
|
151
217
|
@abstractmethod
|
|
152
218
|
def set_messages(self, messages):
|
|
153
|
-
"""Reset the conversation with the given messages.
|
|
219
|
+
"""Reset the conversation with the given messages.
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
messages: Messages to replace the current conversation history.
|
|
223
|
+
"""
|
|
154
224
|
pass
|
|
155
225
|
|
|
156
226
|
@abstractmethod
|
|
157
227
|
def set_tools(self, tools):
|
|
158
|
-
"""Set LLM tools to be used in the current conversation.
|
|
228
|
+
"""Set LLM tools to be used in the current conversation.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
tools: List of tool definitions for the LLM to use.
|
|
232
|
+
"""
|
|
159
233
|
pass
|
|
160
234
|
|
|
161
235
|
@abstractmethod
|
|
162
236
|
def set_tool_choice(self, tool_choice):
|
|
163
|
-
"""Set the tool choice
|
|
237
|
+
"""Set the tool choice for the LLM.
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
tool_choice: Tool choice configuration for the LLM context.
|
|
241
|
+
"""
|
|
164
242
|
pass
|
|
165
243
|
|
|
166
244
|
@abstractmethod
|
|
167
245
|
async def reset(self):
|
|
168
|
-
"""Reset the
|
|
169
|
-
|
|
246
|
+
"""Reset the internal state of this aggregator.
|
|
247
|
+
|
|
248
|
+
This should clear aggregation state but not modify the conversation messages.
|
|
170
249
|
"""
|
|
171
250
|
pass
|
|
172
251
|
|
|
173
252
|
@abstractmethod
|
|
174
253
|
async def handle_aggregation(self, aggregation: str):
|
|
175
|
-
"""
|
|
176
|
-
a simple list of message or a context. It doesn't not push any frames.
|
|
254
|
+
"""Add the given aggregation to the conversation store.
|
|
177
255
|
|
|
256
|
+
Args:
|
|
257
|
+
aggregation: The aggregated text content to add to the conversation.
|
|
178
258
|
"""
|
|
179
259
|
pass
|
|
180
260
|
|
|
181
261
|
@abstractmethod
|
|
182
262
|
async def push_aggregation(self):
|
|
183
|
-
"""
|
|
184
|
-
aggregation this might push a new context frame.
|
|
263
|
+
"""Push the current aggregation downstream.
|
|
185
264
|
|
|
265
|
+
The specific frame type pushed depends on the aggregator implementation
|
|
266
|
+
(e.g. context frame, messages frame).
|
|
186
267
|
"""
|
|
187
268
|
pass
|
|
188
269
|
|
|
189
270
|
|
|
190
271
|
class LLMContextResponseAggregator(BaseLLMResponseAggregator):
|
|
191
|
-
"""
|
|
192
|
-
conversation. It pushes `OpenAILLMContextFrame` as an aggregation frame.
|
|
272
|
+
"""Base LLM aggregator that uses an OpenAI LLM context for conversation storage.
|
|
193
273
|
|
|
274
|
+
This aggregator maintains conversation state using an OpenAILLMContext and
|
|
275
|
+
pushes OpenAILLMContextFrame objects as aggregation frames. It provides
|
|
276
|
+
common functionality for context-based conversation management.
|
|
194
277
|
"""
|
|
195
278
|
|
|
196
279
|
def __init__(self, *, context: OpenAILLMContext, role: str, **kwargs):
|
|
280
|
+
"""Initialize the context response aggregator.
|
|
281
|
+
|
|
282
|
+
Args:
|
|
283
|
+
context: The OpenAI LLM context to use for conversation storage.
|
|
284
|
+
role: The role this aggregator represents (e.g. "user", "assistant").
|
|
285
|
+
**kwargs: Additional arguments passed to parent class.
|
|
286
|
+
"""
|
|
197
287
|
super().__init__(**kwargs)
|
|
198
288
|
self._context = context
|
|
199
289
|
self._role = role
|
|
@@ -202,46 +292,99 @@ class LLMContextResponseAggregator(BaseLLMResponseAggregator):
|
|
|
202
292
|
|
|
203
293
|
@property
|
|
204
294
|
def messages(self) -> List[dict]:
|
|
295
|
+
"""Get messages from the LLM context.
|
|
296
|
+
|
|
297
|
+
Returns:
|
|
298
|
+
List of message dictionaries from the context.
|
|
299
|
+
"""
|
|
205
300
|
return self._context.get_messages()
|
|
206
301
|
|
|
207
302
|
@property
|
|
208
303
|
def role(self) -> str:
|
|
304
|
+
"""Get the role for this aggregator.
|
|
305
|
+
|
|
306
|
+
Returns:
|
|
307
|
+
The role string for this aggregator.
|
|
308
|
+
"""
|
|
209
309
|
return self._role
|
|
210
310
|
|
|
211
311
|
@property
|
|
212
312
|
def context(self):
|
|
313
|
+
"""Get the OpenAI LLM context.
|
|
314
|
+
|
|
315
|
+
Returns:
|
|
316
|
+
The OpenAILLMContext instance used by this aggregator.
|
|
317
|
+
"""
|
|
213
318
|
return self._context
|
|
214
319
|
|
|
215
320
|
def get_context_frame(self) -> OpenAILLMContextFrame:
|
|
321
|
+
"""Create a context frame with the current context.
|
|
322
|
+
|
|
323
|
+
Returns:
|
|
324
|
+
OpenAILLMContextFrame containing the current context.
|
|
325
|
+
"""
|
|
216
326
|
return OpenAILLMContextFrame(context=self._context)
|
|
217
327
|
|
|
218
328
|
async def push_context_frame(self, direction: FrameDirection = FrameDirection.DOWNSTREAM):
|
|
329
|
+
"""Push a context frame in the specified direction.
|
|
330
|
+
|
|
331
|
+
Args:
|
|
332
|
+
direction: The direction to push the frame (upstream or downstream).
|
|
333
|
+
"""
|
|
219
334
|
frame = self.get_context_frame()
|
|
220
335
|
await self.push_frame(frame, direction)
|
|
221
336
|
|
|
222
337
|
def add_messages(self, messages):
|
|
338
|
+
"""Add messages to the context.
|
|
339
|
+
|
|
340
|
+
Args:
|
|
341
|
+
messages: Messages to add to the conversation context.
|
|
342
|
+
"""
|
|
223
343
|
self._context.add_messages(messages)
|
|
224
344
|
|
|
225
345
|
def set_messages(self, messages):
|
|
346
|
+
"""Set the context messages.
|
|
347
|
+
|
|
348
|
+
Args:
|
|
349
|
+
messages: Messages to replace the current context messages.
|
|
350
|
+
"""
|
|
226
351
|
self._context.set_messages(messages)
|
|
227
352
|
|
|
228
353
|
def set_tools(self, tools: List):
|
|
354
|
+
"""Set tools in the context.
|
|
355
|
+
|
|
356
|
+
Args:
|
|
357
|
+
tools: List of tool definitions to set in the context.
|
|
358
|
+
"""
|
|
229
359
|
self._context.set_tools(tools)
|
|
230
360
|
|
|
231
361
|
def set_tool_choice(self, tool_choice: Literal["none", "auto", "required"] | dict):
|
|
362
|
+
"""Set tool choice in the context.
|
|
363
|
+
|
|
364
|
+
Args:
|
|
365
|
+
tool_choice: Tool choice configuration for the context.
|
|
366
|
+
"""
|
|
232
367
|
self._context.set_tool_choice(tool_choice)
|
|
233
368
|
|
|
234
369
|
async def reset(self):
|
|
370
|
+
"""Reset the aggregation state."""
|
|
235
371
|
self._aggregation = ""
|
|
236
372
|
|
|
237
373
|
|
|
238
374
|
class LLMUserContextAggregator(LLMContextResponseAggregator):
|
|
239
|
-
"""
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
375
|
+
"""User LLM aggregator that processes speech-to-text transcriptions.
|
|
376
|
+
|
|
377
|
+
This aggregator handles the complex logic of aggregating user speech transcriptions
|
|
378
|
+
from STT services. It manages multiple scenarios including:
|
|
379
|
+
|
|
380
|
+
- Transcriptions received between VAD events
|
|
381
|
+
- Transcriptions received outside VAD events
|
|
382
|
+
- Interim vs final transcriptions
|
|
383
|
+
- User interruptions during bot speech
|
|
384
|
+
- Emulated VAD for whispered or short utterances
|
|
244
385
|
|
|
386
|
+
The aggregator uses timeouts to handle cases where transcriptions arrive
|
|
387
|
+
after VAD events or when no VAD is available.
|
|
245
388
|
"""
|
|
246
389
|
|
|
247
390
|
def __init__(
|
|
@@ -251,8 +394,18 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
|
|
|
251
394
|
params: Optional[LLMUserAggregatorParams] = None,
|
|
252
395
|
**kwargs,
|
|
253
396
|
):
|
|
397
|
+
"""Initialize the user context aggregator.
|
|
398
|
+
|
|
399
|
+
Args:
|
|
400
|
+
context: The OpenAI LLM context for conversation storage.
|
|
401
|
+
params: Configuration parameters for aggregation behavior.
|
|
402
|
+
**kwargs: Additional arguments. Supports deprecated 'aggregation_timeout'.
|
|
403
|
+
"""
|
|
254
404
|
super().__init__(context=context, role="user", **kwargs)
|
|
255
405
|
self._params = params or LLMUserAggregatorParams()
|
|
406
|
+
self._vad_params: Optional[VADParams] = None
|
|
407
|
+
self._turn_params: Optional[SmartTurnParams] = None
|
|
408
|
+
|
|
256
409
|
if "aggregation_timeout" in kwargs:
|
|
257
410
|
import warnings
|
|
258
411
|
|
|
@@ -280,6 +433,7 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
|
|
|
280
433
|
self._last_aggregation_push_time = 0
|
|
281
434
|
|
|
282
435
|
async def reset(self):
|
|
436
|
+
"""Reset the aggregation state and interruption strategies."""
|
|
283
437
|
await super().reset()
|
|
284
438
|
self._was_bot_speaking = False
|
|
285
439
|
self._seen_interim_results = False
|
|
@@ -287,12 +441,22 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
|
|
|
287
441
|
[await s.reset() for s in self._interruption_strategies]
|
|
288
442
|
|
|
289
443
|
async def handle_aggregation(self, aggregation: str):
|
|
444
|
+
"""Add the aggregated user text to the context.
|
|
445
|
+
|
|
446
|
+
Args:
|
|
447
|
+
aggregation: The aggregated user text to add as a user message.
|
|
448
|
+
"""
|
|
290
449
|
self._context.add_message({"role": self.role, "content": aggregation})
|
|
291
450
|
|
|
292
451
|
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
452
|
+
"""Process frames for user speech aggregation and context management.
|
|
453
|
+
|
|
454
|
+
Args:
|
|
455
|
+
frame: The frame to process.
|
|
456
|
+
direction: The direction of frame flow in the pipeline.
|
|
457
|
+
"""
|
|
293
458
|
if isinstance(frame, StartInterruptionFrame):
|
|
294
459
|
self.logger.debug("Received StartInterruptionFrame")
|
|
295
|
-
|
|
296
460
|
await super().process_frame(frame, direction)
|
|
297
461
|
|
|
298
462
|
if isinstance(frame, StartFrame):
|
|
@@ -328,9 +492,9 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
|
|
|
328
492
|
elif isinstance(frame, InterimTranscriptionFrame):
|
|
329
493
|
await self._handle_interim_transcription(frame)
|
|
330
494
|
elif isinstance(frame, LLMMessagesAppendFrame):
|
|
331
|
-
self.
|
|
495
|
+
await self._handle_llm_messages_append(frame)
|
|
332
496
|
elif isinstance(frame, LLMMessagesUpdateFrame):
|
|
333
|
-
self.
|
|
497
|
+
await self._handle_llm_messages_update(frame)
|
|
334
498
|
elif isinstance(frame, LLMSetToolsFrame):
|
|
335
499
|
self.set_tools(frame.tools)
|
|
336
500
|
elif isinstance(frame, LLMSetToolChoiceFrame):
|
|
@@ -338,6 +502,10 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
|
|
|
338
502
|
elif isinstance(frame, LLMFullResponseStartFrame):
|
|
339
503
|
self._last_llm_response_start_time = time.time()
|
|
340
504
|
self._latest_final_transcript = ""
|
|
505
|
+
elif isinstance(frame, SpeechControlParamsFrame):
|
|
506
|
+
self._vad_params = frame.vad_params
|
|
507
|
+
self._turn_params = frame.turn_params
|
|
508
|
+
await self.push_frame(frame, direction)
|
|
341
509
|
else:
|
|
342
510
|
await self.push_frame(frame, direction)
|
|
343
511
|
|
|
@@ -353,7 +521,7 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
|
|
|
353
521
|
self._last_aggregation_push_time = time.time()
|
|
354
522
|
|
|
355
523
|
async def push_aggregation(self, trigger_interruption: bool = False):
|
|
356
|
-
"""
|
|
524
|
+
"""Push the current aggregation based on interruption strategies and conditions."""
|
|
357
525
|
if len(self._aggregation) > 0:
|
|
358
526
|
if self.interruption_strategies and self._bot_speaking:
|
|
359
527
|
should_interrupt = await self._should_interrupt_based_on_strategies()
|
|
@@ -373,7 +541,8 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
|
|
|
373
541
|
self.logger.debug(
|
|
374
542
|
"Triggering interruption - pushing BotInterruptionFrame and aggregation"
|
|
375
543
|
)
|
|
376
|
-
await self.push_frame(BotInterruptionFrame(), FrameDirection.UPSTREAM)
|
|
544
|
+
# await self.push_frame(BotInterruptionFrame(), FrameDirection.UPSTREAM)
|
|
545
|
+
await self.push_frame(StartInterruptionFrame(), FrameDirection.DOWNSTREAM)
|
|
377
546
|
self.logger.debug("Pushed BotInterruptionFrame")
|
|
378
547
|
# No interruption config - normal behavior (always push aggregation)
|
|
379
548
|
await self._process_aggregation()
|
|
@@ -393,7 +562,11 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
|
|
|
393
562
|
# await self.push_frame(OpenAILLMContextFrame(self._context))
|
|
394
563
|
|
|
395
564
|
async def _should_interrupt_based_on_strategies(self) -> bool:
|
|
396
|
-
"""Check if interruption should occur based on configured strategies.
|
|
565
|
+
"""Check if interruption should occur based on configured strategies.
|
|
566
|
+
|
|
567
|
+
Returns:
|
|
568
|
+
True if any interruption strategy indicates interruption should occur.
|
|
569
|
+
"""
|
|
397
570
|
|
|
398
571
|
async def should_interrupt(strategy: BaseInterruptionStrategy):
|
|
399
572
|
await strategy.append_text(self._aggregation)
|
|
@@ -410,6 +583,16 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
|
|
|
410
583
|
async def _cancel(self, frame: CancelFrame):
|
|
411
584
|
await self._cancel_aggregation_task()
|
|
412
585
|
|
|
586
|
+
async def _handle_llm_messages_append(self, frame: LLMMessagesAppendFrame):
|
|
587
|
+
self.add_messages(frame.messages)
|
|
588
|
+
if frame.run_llm:
|
|
589
|
+
await self.push_context_frame()
|
|
590
|
+
|
|
591
|
+
async def _handle_llm_messages_update(self, frame: LLMMessagesUpdateFrame):
|
|
592
|
+
self.set_messages(frame.messages)
|
|
593
|
+
if frame.run_llm:
|
|
594
|
+
await self.push_context_frame()
|
|
595
|
+
|
|
413
596
|
async def _handle_input_audio(self, frame: InputAudioRawFrame):
|
|
414
597
|
for s in self.interruption_strategies:
|
|
415
598
|
await s.append_audio(frame.audio, frame.sample_rate)
|
|
@@ -524,9 +707,40 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
|
|
|
524
707
|
async def _aggregation_task_handler(self):
|
|
525
708
|
while True:
|
|
526
709
|
try:
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
710
|
+
# The _aggregation_task_handler handles two distinct timeout scenarios:
|
|
711
|
+
#
|
|
712
|
+
# 1. When emulating_vad=True: Wait for emulated VAD timeout before
|
|
713
|
+
# pushing aggregation (simulating VAD behavior when no actual VAD
|
|
714
|
+
# detection occurred).
|
|
715
|
+
#
|
|
716
|
+
# 2. When emulating_vad=False: Use aggregation_timeout as a buffer
|
|
717
|
+
# to wait for potential late-arriving transcription frames after
|
|
718
|
+
# a real VAD event.
|
|
719
|
+
#
|
|
720
|
+
# For emulated VAD scenarios, the timeout strategy depends on whether
|
|
721
|
+
# a turn analyzer is configured:
|
|
722
|
+
#
|
|
723
|
+
# - WITH turn analyzer: Use turn_emulated_vad_timeout parameter because
|
|
724
|
+
# the VAD's stop_secs is set very low (e.g. 0.2s) for rapid speech
|
|
725
|
+
# chunking to feed the turn analyzer. This low value is too fast
|
|
726
|
+
# for emulated VAD scenarios where we need to allow users time to
|
|
727
|
+
# finish speaking (e.g. 0.8s).
|
|
728
|
+
#
|
|
729
|
+
# - WITHOUT turn analyzer: Use VAD's stop_secs directly to maintain
|
|
730
|
+
# consistent user experience between real VAD detection and
|
|
731
|
+
# emulated VAD scenarios.
|
|
732
|
+
if not self._emulating_vad:
|
|
733
|
+
timeout = self._params.aggregation_timeout
|
|
734
|
+
elif self._turn_params:
|
|
735
|
+
timeout = self._params.turn_emulated_vad_timeout
|
|
736
|
+
else:
|
|
737
|
+
# Use VAD stop_secs when no turn analyzer is present, fallback if no VAD params
|
|
738
|
+
timeout = (
|
|
739
|
+
self._vad_params.stop_secs
|
|
740
|
+
if self._vad_params
|
|
741
|
+
else self._params.turn_emulated_vad_timeout
|
|
742
|
+
)
|
|
743
|
+
await asyncio.wait_for(self._aggregation_event.wait(), timeout=timeout)
|
|
530
744
|
await self._maybe_emulate_user_speaking()
|
|
531
745
|
except asyncio.TimeoutError:
|
|
532
746
|
if not self._user_speaking:
|
|
@@ -540,43 +754,47 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
|
|
|
540
754
|
)
|
|
541
755
|
self._emulating_vad = False
|
|
542
756
|
finally:
|
|
543
|
-
self.reset_watchdog()
|
|
544
757
|
self._aggregation_event.clear()
|
|
545
758
|
|
|
546
759
|
async def _maybe_emulate_user_speaking(self):
|
|
547
|
-
"""
|
|
548
|
-
detected by VAD. Only do that if the bot is not speaking.
|
|
760
|
+
"""Maybe emulate user speaking based on transcription.
|
|
549
761
|
|
|
762
|
+
Emulate user speaking if we got a transcription but it was not
|
|
763
|
+
detected by VAD. Behavior when bot is speaking depends on the
|
|
764
|
+
enable_emulated_vad_interruptions parameter.
|
|
550
765
|
"""
|
|
551
|
-
if not self._user_speaking:
|
|
552
|
-
diff_time = time.time() - self._last_user_speaking_time
|
|
553
|
-
if diff_time < self._aggregation_timeout:
|
|
554
|
-
self.logger.debug("Pushing aggregation")
|
|
555
|
-
await self.push_aggregation()
|
|
556
|
-
# Commenting the original pipecat code.
|
|
557
766
|
# Check if we received a transcription but VAD was not able to detect
|
|
558
767
|
# voice (e.g. when you whisper a short utterance). In that case, we need
|
|
559
|
-
# to emulate VAD (i.e. user start/stopped speaking)
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
768
|
+
# to emulate VAD (i.e. user start/stopped speaking).
|
|
769
|
+
if (
|
|
770
|
+
not self._user_speaking
|
|
771
|
+
and not self._waiting_for_aggregation
|
|
772
|
+
and len(self._aggregation) > 0
|
|
773
|
+
):
|
|
774
|
+
if self._bot_speaking and not self._params.enable_emulated_vad_interruptions:
|
|
775
|
+
# If emulated VAD interruptions are disabled and bot is speaking, ignore
|
|
776
|
+
logger.debug("Ignoring user speaking emulation, bot is speaking.")
|
|
777
|
+
await self.reset()
|
|
778
|
+
else:
|
|
779
|
+
# Either bot is not speaking, or emulated VAD interruptions are enabled
|
|
780
|
+
# - trigger user speaking emulation.
|
|
781
|
+
await self.push_frame(EmulateUserStartedSpeakingFrame(), FrameDirection.UPSTREAM)
|
|
782
|
+
self._emulating_vad = True
|
|
573
783
|
|
|
574
784
|
|
|
575
785
|
class LLMAssistantContextAggregator(LLMContextResponseAggregator):
|
|
576
|
-
"""
|
|
577
|
-
|
|
578
|
-
|
|
786
|
+
"""Assistant LLM aggregator that processes bot responses and function calls.
|
|
787
|
+
|
|
788
|
+
This aggregator handles the complex logic of processing assistant responses including:
|
|
789
|
+
|
|
790
|
+
- Text frame aggregation between response start/end markers
|
|
791
|
+
- Function call lifecycle management
|
|
792
|
+
- Context updates with timestamps
|
|
793
|
+
- Tool execution and result handling
|
|
794
|
+
- Interruption handling during responses
|
|
579
795
|
|
|
796
|
+
The aggregator manages function calls in progress and coordinates between
|
|
797
|
+
text generation and tool execution phases of LLM responses.
|
|
580
798
|
"""
|
|
581
799
|
|
|
582
800
|
def __init__(
|
|
@@ -586,6 +804,13 @@ class LLMAssistantContextAggregator(LLMContextResponseAggregator):
|
|
|
586
804
|
params: Optional[LLMAssistantAggregatorParams] = None,
|
|
587
805
|
**kwargs,
|
|
588
806
|
):
|
|
807
|
+
"""Initialize the assistant context aggregator.
|
|
808
|
+
|
|
809
|
+
Args:
|
|
810
|
+
context: The OpenAI LLM context for conversation storage.
|
|
811
|
+
params: Configuration parameters for aggregation behavior.
|
|
812
|
+
**kwargs: Additional arguments. Supports deprecated 'expect_stripped_words'.
|
|
813
|
+
"""
|
|
589
814
|
super().__init__(context=context, role="assistant", **kwargs)
|
|
590
815
|
self._params = params or LLMAssistantAggregatorParams()
|
|
591
816
|
|
|
@@ -610,26 +835,57 @@ class LLMAssistantContextAggregator(LLMContextResponseAggregator):
|
|
|
610
835
|
"""Check if there are any function calls currently in progress.
|
|
611
836
|
|
|
612
837
|
Returns:
|
|
613
|
-
|
|
838
|
+
True if function calls are in progress, False otherwise.
|
|
614
839
|
"""
|
|
615
840
|
return bool(self._function_calls_in_progress)
|
|
616
841
|
|
|
617
842
|
async def handle_aggregation(self, aggregation: str):
|
|
843
|
+
"""Add the aggregated assistant text to the context.
|
|
844
|
+
|
|
845
|
+
Args:
|
|
846
|
+
aggregation: The aggregated assistant text to add as an assistant message.
|
|
847
|
+
"""
|
|
618
848
|
self._context.add_message({"role": "assistant", "content": aggregation})
|
|
619
849
|
|
|
620
850
|
async def handle_function_call_in_progress(self, frame: FunctionCallInProgressFrame):
|
|
851
|
+
"""Handle a function call that is in progress.
|
|
852
|
+
|
|
853
|
+
Args:
|
|
854
|
+
frame: The function call in progress frame to handle.
|
|
855
|
+
"""
|
|
621
856
|
pass
|
|
622
857
|
|
|
623
858
|
async def handle_function_call_result(self, frame: FunctionCallResultFrame):
|
|
859
|
+
"""Handle the result of a completed function call.
|
|
860
|
+
|
|
861
|
+
Args:
|
|
862
|
+
frame: The function call result frame to handle.
|
|
863
|
+
"""
|
|
624
864
|
pass
|
|
625
865
|
|
|
626
866
|
async def handle_function_call_cancel(self, frame: FunctionCallCancelFrame):
|
|
867
|
+
"""Handle cancellation of a function call.
|
|
868
|
+
|
|
869
|
+
Args:
|
|
870
|
+
frame: The function call cancel frame to handle.
|
|
871
|
+
"""
|
|
627
872
|
pass
|
|
628
873
|
|
|
629
874
|
async def handle_user_image_frame(self, frame: UserImageRawFrame):
|
|
875
|
+
"""Handle a user image frame associated with a function call.
|
|
876
|
+
|
|
877
|
+
Args:
|
|
878
|
+
frame: The user image frame to handle.
|
|
879
|
+
"""
|
|
630
880
|
pass
|
|
631
881
|
|
|
632
882
|
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
883
|
+
"""Process frames for assistant response aggregation and function call management.
|
|
884
|
+
|
|
885
|
+
Args:
|
|
886
|
+
frame: The frame to process.
|
|
887
|
+
direction: The direction of frame flow in the pipeline.
|
|
888
|
+
"""
|
|
633
889
|
await super().process_frame(frame, direction)
|
|
634
890
|
|
|
635
891
|
if isinstance(frame, StartInterruptionFrame):
|
|
@@ -642,9 +898,9 @@ class LLMAssistantContextAggregator(LLMContextResponseAggregator):
|
|
|
642
898
|
elif isinstance(frame, TextFrame):
|
|
643
899
|
await self._handle_text(frame)
|
|
644
900
|
elif isinstance(frame, LLMMessagesAppendFrame):
|
|
645
|
-
self.
|
|
901
|
+
await self._handle_llm_messages_append(frame)
|
|
646
902
|
elif isinstance(frame, LLMMessagesUpdateFrame):
|
|
647
|
-
self.
|
|
903
|
+
await self._handle_llm_messages_update(frame)
|
|
648
904
|
elif isinstance(frame, LLMSetToolsFrame):
|
|
649
905
|
self.set_tools(frame.tools)
|
|
650
906
|
elif isinstance(frame, LLMSetToolChoiceFrame):
|
|
@@ -666,6 +922,7 @@ class LLMAssistantContextAggregator(LLMContextResponseAggregator):
|
|
|
666
922
|
await self.push_frame(frame, direction)
|
|
667
923
|
|
|
668
924
|
async def push_aggregation(self):
|
|
925
|
+
"""Push the current assistant aggregation with timestamp."""
|
|
669
926
|
if not self._aggregation:
|
|
670
927
|
return
|
|
671
928
|
|
|
@@ -682,6 +939,16 @@ class LLMAssistantContextAggregator(LLMContextResponseAggregator):
|
|
|
682
939
|
timestamp_frame = OpenAILLMContextAssistantTimestampFrame(timestamp=time_now_iso8601())
|
|
683
940
|
await self.push_frame(timestamp_frame)
|
|
684
941
|
|
|
942
|
+
async def _handle_llm_messages_append(self, frame: LLMMessagesAppendFrame):
|
|
943
|
+
self.add_messages(frame.messages)
|
|
944
|
+
if frame.run_llm:
|
|
945
|
+
await self.push_context_frame(FrameDirection.UPSTREAM)
|
|
946
|
+
|
|
947
|
+
async def _handle_llm_messages_update(self, frame: LLMMessagesUpdateFrame):
|
|
948
|
+
self.set_messages(frame.messages)
|
|
949
|
+
if frame.run_llm:
|
|
950
|
+
await self.push_context_frame(FrameDirection.UPSTREAM)
|
|
951
|
+
|
|
685
952
|
async def _handle_interruptions(self, frame: StartInterruptionFrame):
|
|
686
953
|
await self.push_aggregation()
|
|
687
954
|
self._started = 0
|
|
@@ -788,13 +1055,20 @@ class LLMAssistantContextAggregator(LLMContextResponseAggregator):
|
|
|
788
1055
|
|
|
789
1056
|
def _context_updated_task_finished(self, task: asyncio.Task):
|
|
790
1057
|
self._context_updated_tasks.discard(task)
|
|
791
|
-
# The task is finished so this should exit immediately. We need to do
|
|
792
|
-
# this because otherwise the task manager would report a dangling task
|
|
793
|
-
# if we don't remove it.
|
|
794
|
-
asyncio.run_coroutine_threadsafe(self.wait_for_task(task), self.get_event_loop())
|
|
795
1058
|
|
|
796
1059
|
|
|
797
1060
|
class LLMUserResponseAggregator(LLMUserContextAggregator):
|
|
1061
|
+
"""User response aggregator that outputs LLMMessagesFrame instead of context frames.
|
|
1062
|
+
|
|
1063
|
+
.. deprecated:: 0.0.79
|
|
1064
|
+
This class is deprecated and will be removed in a future version.
|
|
1065
|
+
Use `LLMUserContextAggregator` or another LLM-specific subclass instead.
|
|
1066
|
+
|
|
1067
|
+
This aggregator extends LLMUserContextAggregator but pushes LLMMessagesFrame
|
|
1068
|
+
objects downstream instead of OpenAILLMContextFrame objects. This is useful
|
|
1069
|
+
when you need message-based output rather than context-based output.
|
|
1070
|
+
"""
|
|
1071
|
+
|
|
798
1072
|
def __init__(
|
|
799
1073
|
self,
|
|
800
1074
|
messages: Optional[List[dict]] = None,
|
|
@@ -802,21 +1076,42 @@ class LLMUserResponseAggregator(LLMUserContextAggregator):
|
|
|
802
1076
|
params: Optional[LLMUserAggregatorParams] = None,
|
|
803
1077
|
**kwargs,
|
|
804
1078
|
):
|
|
1079
|
+
"""Initialize the user response aggregator.
|
|
1080
|
+
|
|
1081
|
+
Args:
|
|
1082
|
+
messages: Initial messages for the conversation context.
|
|
1083
|
+
params: Configuration parameters for aggregation behavior.
|
|
1084
|
+
**kwargs: Additional arguments passed to parent class.
|
|
1085
|
+
"""
|
|
1086
|
+
warnings.warn(
|
|
1087
|
+
"LLMUserResponseAggregator is deprecated and will be removed in a future version. "
|
|
1088
|
+
"Use LLMUserContextAggregator or another LLM-specific subclass instead.",
|
|
1089
|
+
DeprecationWarning,
|
|
1090
|
+
stacklevel=2,
|
|
1091
|
+
)
|
|
805
1092
|
super().__init__(context=OpenAILLMContext(messages), params=params, **kwargs)
|
|
806
1093
|
|
|
807
|
-
async def
|
|
808
|
-
|
|
809
|
-
|
|
1094
|
+
async def _process_aggregation(self):
|
|
1095
|
+
"""Process the current aggregation and push it downstream."""
|
|
1096
|
+
aggregation = self._aggregation
|
|
1097
|
+
await self.reset()
|
|
1098
|
+
await self.handle_aggregation(aggregation)
|
|
1099
|
+
frame = LLMMessagesFrame(self._context.messages)
|
|
1100
|
+
await self.push_frame(frame)
|
|
810
1101
|
|
|
811
|
-
# Reset the aggregation. Reset it before pushing it down, otherwise
|
|
812
|
-
# if the tasks gets cancelled we won't be able to clear things up.
|
|
813
|
-
await self.reset()
|
|
814
1102
|
|
|
815
|
-
|
|
816
|
-
|
|
1103
|
+
class LLMAssistantResponseAggregator(LLMAssistantContextAggregator):
|
|
1104
|
+
"""Assistant response aggregator that outputs LLMMessagesFrame instead of context frames.
|
|
817
1105
|
|
|
1106
|
+
.. deprecated:: 0.0.79
|
|
1107
|
+
This class is deprecated and will be removed in a future version.
|
|
1108
|
+
Use `LLMAssistantContextAggregator` or another LLM-specific subclass instead.
|
|
1109
|
+
|
|
1110
|
+
This aggregator extends LLMAssistantContextAggregator but pushes LLMMessagesFrame
|
|
1111
|
+
objects downstream instead of OpenAILLMContextFrame objects. This is useful
|
|
1112
|
+
when you need message-based output rather than context-based output.
|
|
1113
|
+
"""
|
|
818
1114
|
|
|
819
|
-
class LLMAssistantResponseAggregator(LLMAssistantContextAggregator):
|
|
820
1115
|
def __init__(
|
|
821
1116
|
self,
|
|
822
1117
|
messages: Optional[List[dict]] = None,
|
|
@@ -824,9 +1119,23 @@ class LLMAssistantResponseAggregator(LLMAssistantContextAggregator):
|
|
|
824
1119
|
params: Optional[LLMAssistantAggregatorParams] = None,
|
|
825
1120
|
**kwargs,
|
|
826
1121
|
):
|
|
1122
|
+
"""Initialize the assistant response aggregator.
|
|
1123
|
+
|
|
1124
|
+
Args:
|
|
1125
|
+
messages: Initial messages for the conversation context.
|
|
1126
|
+
params: Configuration parameters for aggregation behavior.
|
|
1127
|
+
**kwargs: Additional arguments passed to parent class.
|
|
1128
|
+
"""
|
|
1129
|
+
warnings.warn(
|
|
1130
|
+
"LLMAssistantResponseAggregator is deprecated and will be removed in a future version. "
|
|
1131
|
+
"Use LLMAssistantContextAggregator or another LLM-specific subclass instead.",
|
|
1132
|
+
DeprecationWarning,
|
|
1133
|
+
stacklevel=2,
|
|
1134
|
+
)
|
|
827
1135
|
super().__init__(context=OpenAILLMContext(messages), params=params, **kwargs)
|
|
828
1136
|
|
|
829
1137
|
async def push_aggregation(self):
|
|
1138
|
+
"""Push the aggregated assistant response as an LLMMessagesFrame."""
|
|
830
1139
|
if len(self._aggregation) > 0:
|
|
831
1140
|
await self.handle_aggregation(self._aggregation)
|
|
832
1141
|
|