dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
- dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
- pipecat/__init__.py +17 -0
- pipecat/adapters/base_llm_adapter.py +36 -1
- pipecat/adapters/schemas/direct_function.py +296 -0
- pipecat/adapters/schemas/function_schema.py +15 -6
- pipecat/adapters/schemas/tools_schema.py +55 -7
- pipecat/adapters/services/anthropic_adapter.py +22 -3
- pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
- pipecat/adapters/services/bedrock_adapter.py +22 -3
- pipecat/adapters/services/gemini_adapter.py +16 -3
- pipecat/adapters/services/open_ai_adapter.py +17 -2
- pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
- pipecat/audio/filters/base_audio_filter.py +30 -6
- pipecat/audio/filters/koala_filter.py +37 -2
- pipecat/audio/filters/krisp_filter.py +59 -6
- pipecat/audio/filters/noisereduce_filter.py +37 -0
- pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
- pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
- pipecat/audio/mixers/base_audio_mixer.py +30 -7
- pipecat/audio/mixers/soundfile_mixer.py +53 -6
- pipecat/audio/resamplers/base_audio_resampler.py +17 -9
- pipecat/audio/resamplers/resampy_resampler.py +26 -1
- pipecat/audio/resamplers/soxr_resampler.py +32 -1
- pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
- pipecat/audio/utils.py +194 -1
- pipecat/audio/vad/silero.py +60 -3
- pipecat/audio/vad/vad_analyzer.py +114 -30
- pipecat/clocks/base_clock.py +19 -0
- pipecat/clocks/system_clock.py +25 -0
- pipecat/extensions/voicemail/__init__.py +0 -0
- pipecat/extensions/voicemail/voicemail_detector.py +707 -0
- pipecat/frames/frames.py +590 -156
- pipecat/metrics/metrics.py +64 -1
- pipecat/observers/base_observer.py +58 -19
- pipecat/observers/loggers/debug_log_observer.py +56 -64
- pipecat/observers/loggers/llm_log_observer.py +8 -1
- pipecat/observers/loggers/transcription_log_observer.py +19 -7
- pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
- pipecat/observers/turn_tracking_observer.py +26 -1
- pipecat/pipeline/base_pipeline.py +5 -7
- pipecat/pipeline/base_task.py +52 -9
- pipecat/pipeline/parallel_pipeline.py +121 -177
- pipecat/pipeline/pipeline.py +129 -20
- pipecat/pipeline/runner.py +50 -1
- pipecat/pipeline/sync_parallel_pipeline.py +132 -32
- pipecat/pipeline/task.py +263 -280
- pipecat/pipeline/task_observer.py +85 -34
- pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
- pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
- pipecat/processors/aggregators/gated.py +25 -24
- pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
- pipecat/processors/aggregators/llm_response.py +398 -89
- pipecat/processors/aggregators/openai_llm_context.py +161 -13
- pipecat/processors/aggregators/sentence.py +25 -14
- pipecat/processors/aggregators/user_response.py +28 -3
- pipecat/processors/aggregators/vision_image_frame.py +24 -14
- pipecat/processors/async_generator.py +28 -0
- pipecat/processors/audio/audio_buffer_processor.py +78 -37
- pipecat/processors/consumer_processor.py +25 -6
- pipecat/processors/filters/frame_filter.py +23 -0
- pipecat/processors/filters/function_filter.py +30 -0
- pipecat/processors/filters/identity_filter.py +17 -2
- pipecat/processors/filters/null_filter.py +24 -1
- pipecat/processors/filters/stt_mute_filter.py +56 -21
- pipecat/processors/filters/wake_check_filter.py +46 -3
- pipecat/processors/filters/wake_notifier_filter.py +21 -3
- pipecat/processors/frame_processor.py +488 -131
- pipecat/processors/frameworks/langchain.py +38 -3
- pipecat/processors/frameworks/rtvi.py +719 -34
- pipecat/processors/gstreamer/pipeline_source.py +41 -0
- pipecat/processors/idle_frame_processor.py +26 -3
- pipecat/processors/logger.py +23 -0
- pipecat/processors/metrics/frame_processor_metrics.py +77 -4
- pipecat/processors/metrics/sentry.py +42 -4
- pipecat/processors/producer_processor.py +34 -14
- pipecat/processors/text_transformer.py +22 -10
- pipecat/processors/transcript_processor.py +48 -29
- pipecat/processors/user_idle_processor.py +31 -21
- pipecat/runner/__init__.py +1 -0
- pipecat/runner/daily.py +132 -0
- pipecat/runner/livekit.py +148 -0
- pipecat/runner/run.py +543 -0
- pipecat/runner/types.py +67 -0
- pipecat/runner/utils.py +515 -0
- pipecat/serializers/base_serializer.py +42 -0
- pipecat/serializers/exotel.py +17 -6
- pipecat/serializers/genesys.py +95 -0
- pipecat/serializers/livekit.py +33 -0
- pipecat/serializers/plivo.py +16 -15
- pipecat/serializers/protobuf.py +37 -1
- pipecat/serializers/telnyx.py +18 -17
- pipecat/serializers/twilio.py +32 -16
- pipecat/services/ai_service.py +5 -3
- pipecat/services/anthropic/llm.py +113 -43
- pipecat/services/assemblyai/models.py +63 -5
- pipecat/services/assemblyai/stt.py +64 -11
- pipecat/services/asyncai/__init__.py +0 -0
- pipecat/services/asyncai/tts.py +501 -0
- pipecat/services/aws/llm.py +185 -111
- pipecat/services/aws/stt.py +217 -23
- pipecat/services/aws/tts.py +118 -52
- pipecat/services/aws/utils.py +101 -5
- pipecat/services/aws_nova_sonic/aws.py +82 -64
- pipecat/services/aws_nova_sonic/context.py +15 -6
- pipecat/services/azure/common.py +10 -2
- pipecat/services/azure/image.py +32 -0
- pipecat/services/azure/llm.py +9 -7
- pipecat/services/azure/stt.py +65 -2
- pipecat/services/azure/tts.py +154 -23
- pipecat/services/cartesia/stt.py +125 -8
- pipecat/services/cartesia/tts.py +102 -38
- pipecat/services/cerebras/llm.py +15 -23
- pipecat/services/deepgram/stt.py +19 -11
- pipecat/services/deepgram/tts.py +36 -0
- pipecat/services/deepseek/llm.py +14 -23
- pipecat/services/elevenlabs/tts.py +330 -64
- pipecat/services/fal/image.py +43 -0
- pipecat/services/fal/stt.py +48 -10
- pipecat/services/fireworks/llm.py +14 -21
- pipecat/services/fish/tts.py +109 -9
- pipecat/services/gemini_multimodal_live/__init__.py +1 -0
- pipecat/services/gemini_multimodal_live/events.py +83 -2
- pipecat/services/gemini_multimodal_live/file_api.py +189 -0
- pipecat/services/gemini_multimodal_live/gemini.py +218 -21
- pipecat/services/gladia/config.py +17 -10
- pipecat/services/gladia/stt.py +82 -36
- pipecat/services/google/frames.py +40 -0
- pipecat/services/google/google.py +2 -0
- pipecat/services/google/image.py +39 -2
- pipecat/services/google/llm.py +176 -58
- pipecat/services/google/llm_openai.py +26 -4
- pipecat/services/google/llm_vertex.py +37 -15
- pipecat/services/google/rtvi.py +41 -0
- pipecat/services/google/stt.py +65 -17
- pipecat/services/google/test-google-chirp.py +45 -0
- pipecat/services/google/tts.py +390 -19
- pipecat/services/grok/llm.py +8 -6
- pipecat/services/groq/llm.py +8 -6
- pipecat/services/groq/stt.py +13 -9
- pipecat/services/groq/tts.py +40 -0
- pipecat/services/hamsa/__init__.py +9 -0
- pipecat/services/hamsa/stt.py +241 -0
- pipecat/services/heygen/__init__.py +5 -0
- pipecat/services/heygen/api.py +281 -0
- pipecat/services/heygen/client.py +620 -0
- pipecat/services/heygen/video.py +338 -0
- pipecat/services/image_service.py +5 -3
- pipecat/services/inworld/__init__.py +1 -0
- pipecat/services/inworld/tts.py +592 -0
- pipecat/services/llm_service.py +127 -45
- pipecat/services/lmnt/tts.py +80 -7
- pipecat/services/mcp_service.py +85 -44
- pipecat/services/mem0/memory.py +42 -13
- pipecat/services/minimax/tts.py +74 -15
- pipecat/services/mistral/__init__.py +0 -0
- pipecat/services/mistral/llm.py +185 -0
- pipecat/services/moondream/vision.py +55 -10
- pipecat/services/neuphonic/tts.py +275 -48
- pipecat/services/nim/llm.py +8 -6
- pipecat/services/ollama/llm.py +27 -7
- pipecat/services/openai/base_llm.py +54 -16
- pipecat/services/openai/image.py +30 -0
- pipecat/services/openai/llm.py +7 -5
- pipecat/services/openai/stt.py +13 -9
- pipecat/services/openai/tts.py +42 -10
- pipecat/services/openai_realtime_beta/azure.py +11 -9
- pipecat/services/openai_realtime_beta/context.py +7 -5
- pipecat/services/openai_realtime_beta/events.py +10 -7
- pipecat/services/openai_realtime_beta/openai.py +37 -18
- pipecat/services/openpipe/llm.py +30 -24
- pipecat/services/openrouter/llm.py +9 -7
- pipecat/services/perplexity/llm.py +15 -19
- pipecat/services/piper/tts.py +26 -12
- pipecat/services/playht/tts.py +227 -65
- pipecat/services/qwen/llm.py +8 -6
- pipecat/services/rime/tts.py +128 -17
- pipecat/services/riva/stt.py +160 -22
- pipecat/services/riva/tts.py +67 -2
- pipecat/services/sambanova/llm.py +19 -17
- pipecat/services/sambanova/stt.py +14 -8
- pipecat/services/sarvam/tts.py +60 -13
- pipecat/services/simli/video.py +82 -21
- pipecat/services/soniox/__init__.py +0 -0
- pipecat/services/soniox/stt.py +398 -0
- pipecat/services/speechmatics/stt.py +29 -17
- pipecat/services/stt_service.py +47 -11
- pipecat/services/tavus/video.py +94 -25
- pipecat/services/together/llm.py +8 -6
- pipecat/services/tts_service.py +77 -53
- pipecat/services/ultravox/stt.py +46 -43
- pipecat/services/vision_service.py +5 -3
- pipecat/services/websocket_service.py +12 -11
- pipecat/services/whisper/base_stt.py +58 -12
- pipecat/services/whisper/stt.py +69 -58
- pipecat/services/xtts/tts.py +59 -2
- pipecat/sync/base_notifier.py +19 -0
- pipecat/sync/event_notifier.py +24 -0
- pipecat/tests/utils.py +73 -5
- pipecat/transcriptions/language.py +24 -0
- pipecat/transports/base_input.py +112 -8
- pipecat/transports/base_output.py +235 -13
- pipecat/transports/base_transport.py +119 -0
- pipecat/transports/local/audio.py +76 -0
- pipecat/transports/local/tk.py +84 -0
- pipecat/transports/network/fastapi_websocket.py +174 -15
- pipecat/transports/network/small_webrtc.py +383 -39
- pipecat/transports/network/webrtc_connection.py +214 -8
- pipecat/transports/network/websocket_client.py +171 -1
- pipecat/transports/network/websocket_server.py +147 -9
- pipecat/transports/services/daily.py +792 -70
- pipecat/transports/services/helpers/daily_rest.py +122 -129
- pipecat/transports/services/livekit.py +339 -4
- pipecat/transports/services/tavus.py +273 -38
- pipecat/utils/asyncio/task_manager.py +92 -186
- pipecat/utils/base_object.py +83 -1
- pipecat/utils/network.py +2 -0
- pipecat/utils/string.py +114 -58
- pipecat/utils/text/base_text_aggregator.py +44 -13
- pipecat/utils/text/base_text_filter.py +46 -0
- pipecat/utils/text/markdown_text_filter.py +70 -14
- pipecat/utils/text/pattern_pair_aggregator.py +18 -14
- pipecat/utils/text/simple_text_aggregator.py +43 -2
- pipecat/utils/text/skip_tags_aggregator.py +21 -13
- pipecat/utils/time.py +36 -0
- pipecat/utils/tracing/class_decorators.py +32 -7
- pipecat/utils/tracing/conversation_context_provider.py +12 -2
- pipecat/utils/tracing/service_attributes.py +80 -64
- pipecat/utils/tracing/service_decorators.py +48 -21
- pipecat/utils/tracing/setup.py +13 -7
- pipecat/utils/tracing/turn_context_provider.py +12 -2
- pipecat/utils/tracing/turn_trace_observer.py +27 -0
- pipecat/utils/utils.py +14 -14
- dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
- pipecat/examples/daily_runner.py +0 -64
- pipecat/examples/run.py +0 -265
- pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
- pipecat/utils/asyncio/watchdog_event.py +0 -42
- pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
- pipecat/utils/asyncio/watchdog_queue.py +0 -48
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
- /pipecat/{examples → extensions}/__init__.py +0 -0
|
@@ -4,6 +4,12 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""OpenAI LLM context management for Pipecat.
|
|
8
|
+
|
|
9
|
+
This module provides classes for managing OpenAI-specific conversation contexts,
|
|
10
|
+
including message handling, tool management, and image/audio processing capabilities.
|
|
11
|
+
"""
|
|
12
|
+
|
|
7
13
|
import base64
|
|
8
14
|
import copy
|
|
9
15
|
import io
|
|
@@ -29,7 +35,21 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
|
|
29
35
|
|
|
30
36
|
|
|
31
37
|
class CustomEncoder(json.JSONEncoder):
|
|
38
|
+
"""Custom JSON encoder for handling special data types in logging.
|
|
39
|
+
|
|
40
|
+
Provides specialized encoding for io.BytesIO objects to display
|
|
41
|
+
readable representations in log output instead of raw binary data.
|
|
42
|
+
"""
|
|
43
|
+
|
|
32
44
|
def default(self, obj):
|
|
45
|
+
"""Encode special objects for JSON serialization.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
obj: The object to encode.
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
Encoded representation of the object.
|
|
52
|
+
"""
|
|
33
53
|
if isinstance(obj, io.BytesIO):
|
|
34
54
|
# Convert the first 8 bytes to an ASCII hex string
|
|
35
55
|
return f"{obj.getbuffer()[0:8].hex()}..."
|
|
@@ -37,63 +57,140 @@ class CustomEncoder(json.JSONEncoder):
|
|
|
37
57
|
|
|
38
58
|
|
|
39
59
|
class OpenAILLMContext:
|
|
60
|
+
"""Manages conversation context for OpenAI LLM interactions.
|
|
61
|
+
|
|
62
|
+
Handles message history, tool definitions, tool choices, and multimedia content
|
|
63
|
+
for OpenAI API conversations. Provides methods for message manipulation,
|
|
64
|
+
content formatting, and integration with various LLM adapters.
|
|
65
|
+
"""
|
|
66
|
+
|
|
40
67
|
def __init__(
|
|
41
68
|
self,
|
|
42
69
|
messages: Optional[List[ChatCompletionMessageParam]] = None,
|
|
43
70
|
tools: List[ChatCompletionToolParam] | NotGiven | ToolsSchema = NOT_GIVEN,
|
|
44
71
|
tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
|
|
45
72
|
):
|
|
73
|
+
"""Initialize the OpenAI LLM context.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
messages: Initial list of conversation messages.
|
|
77
|
+
tools: Available tools for the LLM to use.
|
|
78
|
+
tool_choice: Tool selection strategy for the LLM.
|
|
79
|
+
"""
|
|
46
80
|
self._messages: List[ChatCompletionMessageParam] = messages if messages else []
|
|
47
81
|
self._tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = tool_choice
|
|
48
82
|
self._tools: List[ChatCompletionToolParam] | NotGiven | ToolsSchema = tools
|
|
49
83
|
self._llm_adapter: Optional[BaseLLMAdapter] = None
|
|
50
84
|
|
|
51
85
|
def get_llm_adapter(self) -> Optional[BaseLLMAdapter]:
|
|
86
|
+
"""Get the current LLM adapter.
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
The currently set LLM adapter, or None if not set.
|
|
90
|
+
"""
|
|
52
91
|
return self._llm_adapter
|
|
53
92
|
|
|
54
93
|
def set_llm_adapter(self, llm_adapter: BaseLLMAdapter):
|
|
94
|
+
"""Set the LLM adapter for context processing.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
llm_adapter: The LLM adapter to use for tool conversion.
|
|
98
|
+
"""
|
|
55
99
|
self._llm_adapter = llm_adapter
|
|
56
100
|
|
|
57
101
|
@staticmethod
|
|
58
102
|
def from_messages(messages: List[dict]) -> "OpenAILLMContext":
|
|
103
|
+
"""Create a context from a list of message dictionaries.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
messages: List of message dictionaries to convert to context.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
New OpenAILLMContext instance with the provided messages.
|
|
110
|
+
"""
|
|
59
111
|
context = OpenAILLMContext()
|
|
60
112
|
|
|
61
113
|
for message in messages:
|
|
62
|
-
if "name" not in message:
|
|
63
|
-
message["name"] = message["role"]
|
|
64
114
|
context.add_message(message)
|
|
65
115
|
return context
|
|
66
116
|
|
|
67
117
|
@property
|
|
68
118
|
def messages(self) -> List[ChatCompletionMessageParam]:
|
|
119
|
+
"""Get the current messages list.
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
List of conversation messages.
|
|
123
|
+
"""
|
|
69
124
|
return self._messages
|
|
70
125
|
|
|
71
126
|
@property
|
|
72
127
|
def tools(self) -> List[ChatCompletionToolParam] | NotGiven | List[Any]:
|
|
128
|
+
"""Get the tools list, converting through adapter if available.
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
Tools list, potentially converted by the LLM adapter.
|
|
132
|
+
"""
|
|
73
133
|
if self._llm_adapter:
|
|
74
134
|
return self._llm_adapter.from_standard_tools(self._tools)
|
|
75
135
|
return self._tools
|
|
76
136
|
|
|
77
137
|
@property
|
|
78
138
|
def tool_choice(self) -> ChatCompletionToolChoiceOptionParam | NotGiven:
|
|
139
|
+
"""Get the current tool choice setting.
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
The tool choice configuration.
|
|
143
|
+
"""
|
|
79
144
|
return self._tool_choice
|
|
80
145
|
|
|
81
146
|
def add_message(self, message: ChatCompletionMessageParam):
|
|
147
|
+
"""Add a single message to the context.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
message: The message to add to the conversation history.
|
|
151
|
+
"""
|
|
82
152
|
self._messages.append(message)
|
|
83
153
|
|
|
84
154
|
def add_messages(self, messages: List[ChatCompletionMessageParam]):
|
|
155
|
+
"""Add multiple messages to the context.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
messages: List of messages to add to the conversation history.
|
|
159
|
+
"""
|
|
85
160
|
self._messages.extend(messages)
|
|
86
161
|
|
|
87
162
|
def set_messages(self, messages: List[ChatCompletionMessageParam]):
|
|
163
|
+
"""Replace all messages in the context.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
messages: New list of messages to replace the current history.
|
|
167
|
+
"""
|
|
88
168
|
self._messages[:] = messages
|
|
89
169
|
|
|
90
170
|
def get_messages(self) -> List[ChatCompletionMessageParam]:
|
|
171
|
+
"""Get a copy of the current messages list.
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
List of all messages in the conversation history.
|
|
175
|
+
"""
|
|
91
176
|
return self._messages
|
|
92
177
|
|
|
93
178
|
def get_messages_json(self) -> str:
|
|
179
|
+
"""Get messages as a formatted JSON string.
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
JSON string representation of all messages with custom encoding.
|
|
183
|
+
"""
|
|
94
184
|
return json.dumps(self._messages, cls=CustomEncoder, ensure_ascii=False, indent=2)
|
|
95
185
|
|
|
96
186
|
def get_messages_for_logging(self) -> str:
|
|
187
|
+
"""Get sanitized messages suitable for logging.
|
|
188
|
+
|
|
189
|
+
Removes or truncates sensitive data like image content for safe logging.
|
|
190
|
+
|
|
191
|
+
Returns:
|
|
192
|
+
JSON string with sanitized message content for logging.
|
|
193
|
+
"""
|
|
97
194
|
msgs = []
|
|
98
195
|
for message in self.messages:
|
|
99
196
|
msg = copy.deepcopy(message)
|
|
@@ -116,17 +213,18 @@ class OpenAILLMContext:
|
|
|
116
213
|
def from_standard_message(self, message):
|
|
117
214
|
"""Convert from OpenAI message format to OpenAI message format (passthrough).
|
|
118
215
|
|
|
119
|
-
OpenAI's format allows both simple string content and structured content
|
|
120
|
-
|
|
121
|
-
|
|
216
|
+
OpenAI's format allows both simple string content and structured content::
|
|
217
|
+
|
|
218
|
+
Simple: {"role": "user", "content": "Hello"}
|
|
219
|
+
Structured: {"role": "user", "content": [{"type": "text", "text": "Hello"}]}
|
|
122
220
|
|
|
123
221
|
Since OpenAI is our standard format, this is a passthrough function.
|
|
124
222
|
|
|
125
223
|
Args:
|
|
126
|
-
message
|
|
224
|
+
message: Message in OpenAI format.
|
|
127
225
|
|
|
128
226
|
Returns:
|
|
129
|
-
|
|
227
|
+
Same message, unchanged.
|
|
130
228
|
"""
|
|
131
229
|
return message
|
|
132
230
|
|
|
@@ -138,20 +236,28 @@ class OpenAILLMContext:
|
|
|
138
236
|
other LLM services that may need to return multiple messages.
|
|
139
237
|
|
|
140
238
|
Args:
|
|
141
|
-
obj
|
|
142
|
-
|
|
143
|
-
- List content: {"role": "user", "content": [{"type": "text", "text": "Hello"}]}
|
|
239
|
+
obj: Message in OpenAI format with either simple string content
|
|
240
|
+
or structured list content.
|
|
144
241
|
|
|
145
242
|
Returns:
|
|
146
|
-
|
|
147
|
-
the content was in simple string or structured list format
|
|
243
|
+
List containing the original messages, preserving the content format.
|
|
148
244
|
"""
|
|
149
245
|
return [obj]
|
|
150
246
|
|
|
151
247
|
def get_messages_for_initializing_history(self):
|
|
248
|
+
"""Get messages for initializing conversation history.
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
List of messages suitable for history initialization.
|
|
252
|
+
"""
|
|
152
253
|
return self._messages
|
|
153
254
|
|
|
154
255
|
def get_messages_for_persistent_storage(self):
|
|
256
|
+
"""Get messages formatted for persistent storage.
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
List of messages converted to standard format for storage.
|
|
260
|
+
"""
|
|
155
261
|
messages = []
|
|
156
262
|
for m in self._messages:
|
|
157
263
|
standard_messages = self.to_standard_messages(m)
|
|
@@ -159,9 +265,19 @@ class OpenAILLMContext:
|
|
|
159
265
|
return messages
|
|
160
266
|
|
|
161
267
|
def set_tool_choice(self, tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven):
|
|
268
|
+
"""Set the tool choice configuration.
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
tool_choice: Tool selection strategy for the LLM.
|
|
272
|
+
"""
|
|
162
273
|
self._tool_choice = tool_choice
|
|
163
274
|
|
|
164
275
|
def set_tools(self, tools: List[ChatCompletionToolParam] | NotGiven | ToolsSchema = NOT_GIVEN):
|
|
276
|
+
"""Set the available tools for the LLM.
|
|
277
|
+
|
|
278
|
+
Args:
|
|
279
|
+
tools: List of tools available to the LLM, or NOT_GIVEN to disable tools.
|
|
280
|
+
"""
|
|
165
281
|
if tools != NOT_GIVEN and isinstance(tools, list) and len(tools) == 0:
|
|
166
282
|
tools = NOT_GIVEN
|
|
167
283
|
self._tools = tools
|
|
@@ -169,6 +285,14 @@ class OpenAILLMContext:
|
|
|
169
285
|
def add_image_frame_message(
|
|
170
286
|
self, *, format: str, size: tuple[int, int], image: bytes, text: str = None
|
|
171
287
|
):
|
|
288
|
+
"""Add a message containing an image frame.
|
|
289
|
+
|
|
290
|
+
Args:
|
|
291
|
+
format: Image format (e.g., 'RGB', 'RGBA').
|
|
292
|
+
size: Image dimensions as (width, height) tuple.
|
|
293
|
+
image: Raw image bytes.
|
|
294
|
+
text: Optional text to include with the image.
|
|
295
|
+
"""
|
|
172
296
|
buffer = io.BytesIO()
|
|
173
297
|
Image.frombytes(format, size, image).save(buffer, format="JPEG")
|
|
174
298
|
encoded_image = base64.b64encode(buffer.getvalue()).decode("utf-8")
|
|
@@ -182,10 +306,30 @@ class OpenAILLMContext:
|
|
|
182
306
|
self.add_message({"role": "user", "content": content})
|
|
183
307
|
|
|
184
308
|
def add_audio_frames_message(self, *, audio_frames: list[AudioRawFrame], text: str = None):
|
|
309
|
+
"""Add a message containing audio frames.
|
|
310
|
+
|
|
311
|
+
Args:
|
|
312
|
+
audio_frames: List of audio frame objects to include.
|
|
313
|
+
text: Optional text to include with the audio.
|
|
314
|
+
|
|
315
|
+
Note:
|
|
316
|
+
This method is currently a placeholder for future implementation.
|
|
317
|
+
"""
|
|
185
318
|
# todo: implement for OpenAI models and others
|
|
186
319
|
pass
|
|
187
320
|
|
|
188
321
|
def create_wav_header(self, sample_rate, num_channels, bits_per_sample, data_size):
|
|
322
|
+
"""Create a WAV file header for audio data.
|
|
323
|
+
|
|
324
|
+
Args:
|
|
325
|
+
sample_rate: Audio sample rate in Hz.
|
|
326
|
+
num_channels: Number of audio channels.
|
|
327
|
+
bits_per_sample: Bits per audio sample.
|
|
328
|
+
data_size: Size of audio data in bytes.
|
|
329
|
+
|
|
330
|
+
Returns:
|
|
331
|
+
WAV header as a bytearray.
|
|
332
|
+
"""
|
|
189
333
|
# RIFF chunk descriptor
|
|
190
334
|
header = bytearray()
|
|
191
335
|
header.extend(b"RIFF") # ChunkID
|
|
@@ -211,10 +355,14 @@ class OpenAILLMContext:
|
|
|
211
355
|
|
|
212
356
|
@dataclass
|
|
213
357
|
class OpenAILLMContextFrame(Frame):
|
|
214
|
-
"""
|
|
358
|
+
"""Frame containing OpenAI-specific LLM context.
|
|
359
|
+
|
|
360
|
+
Like an LLMMessagesFrame, but with extra context specific to the OpenAI
|
|
215
361
|
API. The context in this message is also mutable, and will be changed by the
|
|
216
362
|
OpenAIContextAggregator frame processor.
|
|
217
363
|
|
|
364
|
+
Parameters:
|
|
365
|
+
context: The OpenAI LLM context containing messages, tools, and configuration.
|
|
218
366
|
"""
|
|
219
367
|
|
|
220
368
|
context: OpenAILLMContext
|
|
@@ -4,35 +4,46 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""Text sentence aggregation processor for Pipecat.
|
|
8
|
+
|
|
9
|
+
This module provides a frame processor that accumulates text frames into
|
|
10
|
+
complete sentences, only outputting when a sentence-ending pattern is detected.
|
|
11
|
+
"""
|
|
12
|
+
|
|
7
13
|
from pipecat.frames.frames import EndFrame, Frame, InterimTranscriptionFrame, TextFrame
|
|
8
14
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
|
9
15
|
from pipecat.utils.string import match_endofsentence
|
|
10
16
|
|
|
11
17
|
|
|
12
18
|
class SentenceAggregator(FrameProcessor):
|
|
13
|
-
"""
|
|
19
|
+
"""Aggregates text frames into complete sentences.
|
|
20
|
+
|
|
21
|
+
This processor accumulates incoming text frames until a sentence-ending
|
|
22
|
+
pattern is detected, then outputs the complete sentence as a single frame.
|
|
23
|
+
Useful for ensuring downstream processors receive coherent, complete sentences
|
|
24
|
+
rather than fragmented text.
|
|
25
|
+
|
|
26
|
+
Frame input/output::
|
|
14
27
|
|
|
15
|
-
Frame input/output:
|
|
16
28
|
TextFrame("Hello,") -> None
|
|
17
|
-
TextFrame(" world.") -> TextFrame("Hello world.")
|
|
18
|
-
|
|
19
|
-
Doctest: FIXME to work with asyncio
|
|
20
|
-
>>> import asyncio
|
|
21
|
-
>>> async def print_frames(aggregator, frame):
|
|
22
|
-
... async for frame in aggregator.process_frame(frame):
|
|
23
|
-
... print(frame.text)
|
|
24
|
-
|
|
25
|
-
>>> aggregator = SentenceAggregator()
|
|
26
|
-
>>> asyncio.run(print_frames(aggregator, TextFrame("Hello,")))
|
|
27
|
-
>>> asyncio.run(print_frames(aggregator, TextFrame(" world.")))
|
|
28
|
-
Hello, world.
|
|
29
|
+
TextFrame(" world.") -> TextFrame("Hello, world.")
|
|
29
30
|
"""
|
|
30
31
|
|
|
31
32
|
def __init__(self):
|
|
33
|
+
"""Initialize the sentence aggregator.
|
|
34
|
+
|
|
35
|
+
Sets up internal state for accumulating text frames into complete sentences.
|
|
36
|
+
"""
|
|
32
37
|
super().__init__()
|
|
33
38
|
self._aggregation = ""
|
|
34
39
|
|
|
35
40
|
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
41
|
+
"""Process incoming frames and aggregate text into complete sentences.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
frame: The incoming frame to process.
|
|
45
|
+
direction: The direction of frame flow in the pipeline.
|
|
46
|
+
"""
|
|
36
47
|
await super().process_frame(frame, direction)
|
|
37
48
|
|
|
38
49
|
# We ignore interim description at this point.
|
|
@@ -4,15 +4,40 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""User response aggregation for text frames.
|
|
8
|
+
|
|
9
|
+
This module provides an aggregator that collects user responses and outputs
|
|
10
|
+
them as TextFrame objects, useful for capturing and processing user input
|
|
11
|
+
in conversational pipelines.
|
|
12
|
+
"""
|
|
13
|
+
|
|
7
14
|
from pipecat.frames.frames import TextFrame
|
|
8
|
-
from pipecat.processors.aggregators.llm_response import
|
|
15
|
+
from pipecat.processors.aggregators.llm_response import LLMUserContextAggregator
|
|
16
|
+
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
|
17
|
+
|
|
9
18
|
|
|
19
|
+
class UserResponseAggregator(LLMUserContextAggregator):
|
|
20
|
+
"""Aggregates user responses into TextFrame objects.
|
|
21
|
+
|
|
22
|
+
This aggregator extends LLMUserContextAggregator to specifically handle
|
|
23
|
+
user input by collecting text responses and outputting them as TextFrame
|
|
24
|
+
objects when the aggregation is complete.
|
|
25
|
+
"""
|
|
10
26
|
|
|
11
|
-
class UserResponseAggregator(LLMUserResponseAggregator):
|
|
12
27
|
def __init__(self, **kwargs):
|
|
13
|
-
|
|
28
|
+
"""Initialize the user response aggregator.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
**kwargs: Additional arguments passed to parent LLMUserContextAggregator.
|
|
32
|
+
"""
|
|
33
|
+
super().__init__(context=OpenAILLMContext(), **kwargs)
|
|
14
34
|
|
|
15
35
|
async def push_aggregation(self):
|
|
36
|
+
"""Push the aggregated user response as a TextFrame.
|
|
37
|
+
|
|
38
|
+
Creates a TextFrame from the current aggregation if it contains content,
|
|
39
|
+
resets the aggregation state, and pushes the frame downstream.
|
|
40
|
+
"""
|
|
16
41
|
if len(self._aggregation) > 0:
|
|
17
42
|
frame = TextFrame(self._aggregation.strip())
|
|
18
43
|
|
|
@@ -4,33 +4,43 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""Vision image frame aggregation for Pipecat.
|
|
8
|
+
|
|
9
|
+
This module provides frame aggregation functionality to combine text and image
|
|
10
|
+
frames into vision frames for multimodal processing.
|
|
11
|
+
"""
|
|
12
|
+
|
|
7
13
|
from pipecat.frames.frames import Frame, InputImageRawFrame, TextFrame, VisionImageRawFrame
|
|
8
14
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
|
9
15
|
|
|
10
16
|
|
|
11
17
|
class VisionImageFrameAggregator(FrameProcessor):
|
|
12
|
-
"""
|
|
13
|
-
InputImageRawFrame. After the InputImageRawFrame arrives it will output a
|
|
14
|
-
VisionImageRawFrame.
|
|
15
|
-
|
|
16
|
-
>>> from pipecat.frames.frames import ImageFrame
|
|
17
|
-
|
|
18
|
-
>>> async def print_frames(aggregator, frame):
|
|
19
|
-
... async for frame in aggregator.process_frame(frame):
|
|
20
|
-
... print(frame)
|
|
21
|
-
|
|
22
|
-
>>> aggregator = VisionImageFrameAggregator()
|
|
23
|
-
>>> asyncio.run(print_frames(aggregator, TextFrame("What do you see?")))
|
|
24
|
-
>>> asyncio.run(print_frames(aggregator, ImageFrame(image=bytes([]), size=(0, 0))))
|
|
25
|
-
VisionImageFrame, text: What do you see?, image size: 0x0, buffer size: 0 B
|
|
18
|
+
"""Aggregates consecutive text and image frames into vision frames.
|
|
26
19
|
|
|
20
|
+
This aggregator waits for a consecutive TextFrame and an InputImageRawFrame.
|
|
21
|
+
After the InputImageRawFrame arrives it will output a VisionImageRawFrame
|
|
22
|
+
combining both the text and image data for multimodal processing.
|
|
27
23
|
"""
|
|
28
24
|
|
|
29
25
|
def __init__(self):
|
|
26
|
+
"""Initialize the vision image frame aggregator.
|
|
27
|
+
|
|
28
|
+
The aggregator starts with no cached text, waiting for the first
|
|
29
|
+
TextFrame to arrive before it can create vision frames.
|
|
30
|
+
"""
|
|
30
31
|
super().__init__()
|
|
31
32
|
self._describe_text = None
|
|
32
33
|
|
|
33
34
|
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
35
|
+
"""Process incoming frames and aggregate text with images.
|
|
36
|
+
|
|
37
|
+
Caches TextFrames and combines them with subsequent InputImageRawFrames
|
|
38
|
+
to create VisionImageRawFrames. Other frames are passed through unchanged.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
frame: The incoming frame to process.
|
|
42
|
+
direction: The direction of frame flow in the pipeline.
|
|
43
|
+
"""
|
|
34
44
|
await super().process_frame(frame, direction)
|
|
35
45
|
|
|
36
46
|
if isinstance(frame, TextFrame):
|
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""Async generator processor for frame serialization and streaming."""
|
|
8
|
+
|
|
7
9
|
import asyncio
|
|
8
10
|
from typing import Any, AsyncGenerator
|
|
9
11
|
|
|
@@ -17,12 +19,32 @@ from pipecat.serializers.base_serializer import FrameSerializer
|
|
|
17
19
|
|
|
18
20
|
|
|
19
21
|
class AsyncGeneratorProcessor(FrameProcessor):
|
|
22
|
+
"""A frame processor that serializes frames and provides them via async generator.
|
|
23
|
+
|
|
24
|
+
This processor passes frames through unchanged while simultaneously serializing
|
|
25
|
+
them and making the serialized data available through an async generator interface.
|
|
26
|
+
Useful for streaming frame data to external consumers while maintaining the
|
|
27
|
+
normal frame processing pipeline.
|
|
28
|
+
"""
|
|
29
|
+
|
|
20
30
|
def __init__(self, *, serializer: FrameSerializer, **kwargs):
|
|
31
|
+
"""Initialize the async generator processor.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
serializer: The frame serializer to use for converting frames to data.
|
|
35
|
+
**kwargs: Additional arguments passed to the parent FrameProcessor.
|
|
36
|
+
"""
|
|
21
37
|
super().__init__(**kwargs)
|
|
22
38
|
self._serializer = serializer
|
|
23
39
|
self._data_queue = asyncio.Queue()
|
|
24
40
|
|
|
25
41
|
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
42
|
+
"""Process frames by passing them through and queuing serialized data.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
frame: The frame to process.
|
|
46
|
+
direction: The direction of frame flow in the pipeline.
|
|
47
|
+
"""
|
|
26
48
|
await super().process_frame(frame, direction)
|
|
27
49
|
|
|
28
50
|
await self.push_frame(frame, direction)
|
|
@@ -35,6 +57,12 @@ class AsyncGeneratorProcessor(FrameProcessor):
|
|
|
35
57
|
await self._data_queue.put(data)
|
|
36
58
|
|
|
37
59
|
async def generator(self) -> AsyncGenerator[Any, None]:
|
|
60
|
+
"""Generate serialized frame data asynchronously.
|
|
61
|
+
|
|
62
|
+
Yields:
|
|
63
|
+
Serialized frame data from the internal queue until a termination
|
|
64
|
+
signal (None) is received.
|
|
65
|
+
"""
|
|
38
66
|
running = True
|
|
39
67
|
while running:
|
|
40
68
|
data = await self._data_queue.get()
|