dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
- dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
- pipecat/__init__.py +17 -0
- pipecat/adapters/base_llm_adapter.py +36 -1
- pipecat/adapters/schemas/direct_function.py +296 -0
- pipecat/adapters/schemas/function_schema.py +15 -6
- pipecat/adapters/schemas/tools_schema.py +55 -7
- pipecat/adapters/services/anthropic_adapter.py +22 -3
- pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
- pipecat/adapters/services/bedrock_adapter.py +22 -3
- pipecat/adapters/services/gemini_adapter.py +16 -3
- pipecat/adapters/services/open_ai_adapter.py +17 -2
- pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
- pipecat/audio/filters/base_audio_filter.py +30 -6
- pipecat/audio/filters/koala_filter.py +37 -2
- pipecat/audio/filters/krisp_filter.py +59 -6
- pipecat/audio/filters/noisereduce_filter.py +37 -0
- pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
- pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
- pipecat/audio/mixers/base_audio_mixer.py +30 -7
- pipecat/audio/mixers/soundfile_mixer.py +53 -6
- pipecat/audio/resamplers/base_audio_resampler.py +17 -9
- pipecat/audio/resamplers/resampy_resampler.py +26 -1
- pipecat/audio/resamplers/soxr_resampler.py +32 -1
- pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
- pipecat/audio/utils.py +194 -1
- pipecat/audio/vad/silero.py +60 -3
- pipecat/audio/vad/vad_analyzer.py +114 -30
- pipecat/clocks/base_clock.py +19 -0
- pipecat/clocks/system_clock.py +25 -0
- pipecat/extensions/voicemail/__init__.py +0 -0
- pipecat/extensions/voicemail/voicemail_detector.py +707 -0
- pipecat/frames/frames.py +590 -156
- pipecat/metrics/metrics.py +64 -1
- pipecat/observers/base_observer.py +58 -19
- pipecat/observers/loggers/debug_log_observer.py +56 -64
- pipecat/observers/loggers/llm_log_observer.py +8 -1
- pipecat/observers/loggers/transcription_log_observer.py +19 -7
- pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
- pipecat/observers/turn_tracking_observer.py +26 -1
- pipecat/pipeline/base_pipeline.py +5 -7
- pipecat/pipeline/base_task.py +52 -9
- pipecat/pipeline/parallel_pipeline.py +121 -177
- pipecat/pipeline/pipeline.py +129 -20
- pipecat/pipeline/runner.py +50 -1
- pipecat/pipeline/sync_parallel_pipeline.py +132 -32
- pipecat/pipeline/task.py +263 -280
- pipecat/pipeline/task_observer.py +85 -34
- pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
- pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
- pipecat/processors/aggregators/gated.py +25 -24
- pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
- pipecat/processors/aggregators/llm_response.py +398 -89
- pipecat/processors/aggregators/openai_llm_context.py +161 -13
- pipecat/processors/aggregators/sentence.py +25 -14
- pipecat/processors/aggregators/user_response.py +28 -3
- pipecat/processors/aggregators/vision_image_frame.py +24 -14
- pipecat/processors/async_generator.py +28 -0
- pipecat/processors/audio/audio_buffer_processor.py +78 -37
- pipecat/processors/consumer_processor.py +25 -6
- pipecat/processors/filters/frame_filter.py +23 -0
- pipecat/processors/filters/function_filter.py +30 -0
- pipecat/processors/filters/identity_filter.py +17 -2
- pipecat/processors/filters/null_filter.py +24 -1
- pipecat/processors/filters/stt_mute_filter.py +56 -21
- pipecat/processors/filters/wake_check_filter.py +46 -3
- pipecat/processors/filters/wake_notifier_filter.py +21 -3
- pipecat/processors/frame_processor.py +488 -131
- pipecat/processors/frameworks/langchain.py +38 -3
- pipecat/processors/frameworks/rtvi.py +719 -34
- pipecat/processors/gstreamer/pipeline_source.py +41 -0
- pipecat/processors/idle_frame_processor.py +26 -3
- pipecat/processors/logger.py +23 -0
- pipecat/processors/metrics/frame_processor_metrics.py +77 -4
- pipecat/processors/metrics/sentry.py +42 -4
- pipecat/processors/producer_processor.py +34 -14
- pipecat/processors/text_transformer.py +22 -10
- pipecat/processors/transcript_processor.py +48 -29
- pipecat/processors/user_idle_processor.py +31 -21
- pipecat/runner/__init__.py +1 -0
- pipecat/runner/daily.py +132 -0
- pipecat/runner/livekit.py +148 -0
- pipecat/runner/run.py +543 -0
- pipecat/runner/types.py +67 -0
- pipecat/runner/utils.py +515 -0
- pipecat/serializers/base_serializer.py +42 -0
- pipecat/serializers/exotel.py +17 -6
- pipecat/serializers/genesys.py +95 -0
- pipecat/serializers/livekit.py +33 -0
- pipecat/serializers/plivo.py +16 -15
- pipecat/serializers/protobuf.py +37 -1
- pipecat/serializers/telnyx.py +18 -17
- pipecat/serializers/twilio.py +32 -16
- pipecat/services/ai_service.py +5 -3
- pipecat/services/anthropic/llm.py +113 -43
- pipecat/services/assemblyai/models.py +63 -5
- pipecat/services/assemblyai/stt.py +64 -11
- pipecat/services/asyncai/__init__.py +0 -0
- pipecat/services/asyncai/tts.py +501 -0
- pipecat/services/aws/llm.py +185 -111
- pipecat/services/aws/stt.py +217 -23
- pipecat/services/aws/tts.py +118 -52
- pipecat/services/aws/utils.py +101 -5
- pipecat/services/aws_nova_sonic/aws.py +82 -64
- pipecat/services/aws_nova_sonic/context.py +15 -6
- pipecat/services/azure/common.py +10 -2
- pipecat/services/azure/image.py +32 -0
- pipecat/services/azure/llm.py +9 -7
- pipecat/services/azure/stt.py +65 -2
- pipecat/services/azure/tts.py +154 -23
- pipecat/services/cartesia/stt.py +125 -8
- pipecat/services/cartesia/tts.py +102 -38
- pipecat/services/cerebras/llm.py +15 -23
- pipecat/services/deepgram/stt.py +19 -11
- pipecat/services/deepgram/tts.py +36 -0
- pipecat/services/deepseek/llm.py +14 -23
- pipecat/services/elevenlabs/tts.py +330 -64
- pipecat/services/fal/image.py +43 -0
- pipecat/services/fal/stt.py +48 -10
- pipecat/services/fireworks/llm.py +14 -21
- pipecat/services/fish/tts.py +109 -9
- pipecat/services/gemini_multimodal_live/__init__.py +1 -0
- pipecat/services/gemini_multimodal_live/events.py +83 -2
- pipecat/services/gemini_multimodal_live/file_api.py +189 -0
- pipecat/services/gemini_multimodal_live/gemini.py +218 -21
- pipecat/services/gladia/config.py +17 -10
- pipecat/services/gladia/stt.py +82 -36
- pipecat/services/google/frames.py +40 -0
- pipecat/services/google/google.py +2 -0
- pipecat/services/google/image.py +39 -2
- pipecat/services/google/llm.py +176 -58
- pipecat/services/google/llm_openai.py +26 -4
- pipecat/services/google/llm_vertex.py +37 -15
- pipecat/services/google/rtvi.py +41 -0
- pipecat/services/google/stt.py +65 -17
- pipecat/services/google/test-google-chirp.py +45 -0
- pipecat/services/google/tts.py +390 -19
- pipecat/services/grok/llm.py +8 -6
- pipecat/services/groq/llm.py +8 -6
- pipecat/services/groq/stt.py +13 -9
- pipecat/services/groq/tts.py +40 -0
- pipecat/services/hamsa/__init__.py +9 -0
- pipecat/services/hamsa/stt.py +241 -0
- pipecat/services/heygen/__init__.py +5 -0
- pipecat/services/heygen/api.py +281 -0
- pipecat/services/heygen/client.py +620 -0
- pipecat/services/heygen/video.py +338 -0
- pipecat/services/image_service.py +5 -3
- pipecat/services/inworld/__init__.py +1 -0
- pipecat/services/inworld/tts.py +592 -0
- pipecat/services/llm_service.py +127 -45
- pipecat/services/lmnt/tts.py +80 -7
- pipecat/services/mcp_service.py +85 -44
- pipecat/services/mem0/memory.py +42 -13
- pipecat/services/minimax/tts.py +74 -15
- pipecat/services/mistral/__init__.py +0 -0
- pipecat/services/mistral/llm.py +185 -0
- pipecat/services/moondream/vision.py +55 -10
- pipecat/services/neuphonic/tts.py +275 -48
- pipecat/services/nim/llm.py +8 -6
- pipecat/services/ollama/llm.py +27 -7
- pipecat/services/openai/base_llm.py +54 -16
- pipecat/services/openai/image.py +30 -0
- pipecat/services/openai/llm.py +7 -5
- pipecat/services/openai/stt.py +13 -9
- pipecat/services/openai/tts.py +42 -10
- pipecat/services/openai_realtime_beta/azure.py +11 -9
- pipecat/services/openai_realtime_beta/context.py +7 -5
- pipecat/services/openai_realtime_beta/events.py +10 -7
- pipecat/services/openai_realtime_beta/openai.py +37 -18
- pipecat/services/openpipe/llm.py +30 -24
- pipecat/services/openrouter/llm.py +9 -7
- pipecat/services/perplexity/llm.py +15 -19
- pipecat/services/piper/tts.py +26 -12
- pipecat/services/playht/tts.py +227 -65
- pipecat/services/qwen/llm.py +8 -6
- pipecat/services/rime/tts.py +128 -17
- pipecat/services/riva/stt.py +160 -22
- pipecat/services/riva/tts.py +67 -2
- pipecat/services/sambanova/llm.py +19 -17
- pipecat/services/sambanova/stt.py +14 -8
- pipecat/services/sarvam/tts.py +60 -13
- pipecat/services/simli/video.py +82 -21
- pipecat/services/soniox/__init__.py +0 -0
- pipecat/services/soniox/stt.py +398 -0
- pipecat/services/speechmatics/stt.py +29 -17
- pipecat/services/stt_service.py +47 -11
- pipecat/services/tavus/video.py +94 -25
- pipecat/services/together/llm.py +8 -6
- pipecat/services/tts_service.py +77 -53
- pipecat/services/ultravox/stt.py +46 -43
- pipecat/services/vision_service.py +5 -3
- pipecat/services/websocket_service.py +12 -11
- pipecat/services/whisper/base_stt.py +58 -12
- pipecat/services/whisper/stt.py +69 -58
- pipecat/services/xtts/tts.py +59 -2
- pipecat/sync/base_notifier.py +19 -0
- pipecat/sync/event_notifier.py +24 -0
- pipecat/tests/utils.py +73 -5
- pipecat/transcriptions/language.py +24 -0
- pipecat/transports/base_input.py +112 -8
- pipecat/transports/base_output.py +235 -13
- pipecat/transports/base_transport.py +119 -0
- pipecat/transports/local/audio.py +76 -0
- pipecat/transports/local/tk.py +84 -0
- pipecat/transports/network/fastapi_websocket.py +174 -15
- pipecat/transports/network/small_webrtc.py +383 -39
- pipecat/transports/network/webrtc_connection.py +214 -8
- pipecat/transports/network/websocket_client.py +171 -1
- pipecat/transports/network/websocket_server.py +147 -9
- pipecat/transports/services/daily.py +792 -70
- pipecat/transports/services/helpers/daily_rest.py +122 -129
- pipecat/transports/services/livekit.py +339 -4
- pipecat/transports/services/tavus.py +273 -38
- pipecat/utils/asyncio/task_manager.py +92 -186
- pipecat/utils/base_object.py +83 -1
- pipecat/utils/network.py +2 -0
- pipecat/utils/string.py +114 -58
- pipecat/utils/text/base_text_aggregator.py +44 -13
- pipecat/utils/text/base_text_filter.py +46 -0
- pipecat/utils/text/markdown_text_filter.py +70 -14
- pipecat/utils/text/pattern_pair_aggregator.py +18 -14
- pipecat/utils/text/simple_text_aggregator.py +43 -2
- pipecat/utils/text/skip_tags_aggregator.py +21 -13
- pipecat/utils/time.py +36 -0
- pipecat/utils/tracing/class_decorators.py +32 -7
- pipecat/utils/tracing/conversation_context_provider.py +12 -2
- pipecat/utils/tracing/service_attributes.py +80 -64
- pipecat/utils/tracing/service_decorators.py +48 -21
- pipecat/utils/tracing/setup.py +13 -7
- pipecat/utils/tracing/turn_context_provider.py +12 -2
- pipecat/utils/tracing/turn_trace_observer.py +27 -0
- pipecat/utils/utils.py +14 -14
- dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
- pipecat/examples/daily_runner.py +0 -64
- pipecat/examples/run.py +0 -265
- pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
- pipecat/utils/asyncio/watchdog_event.py +0 -42
- pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
- pipecat/utils/asyncio/watchdog_queue.py +0 -48
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
- /pipecat/{examples → extensions}/__init__.py +0 -0
pipecat/frames/frames.py
CHANGED
|
@@ -4,6 +4,13 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""Core frame definitions for the Pipecat AI framework.
|
|
8
|
+
|
|
9
|
+
This module contains all frame types used throughout the Pipecat pipeline system,
|
|
10
|
+
including data frames, system frames, and control frames for audio, video, text,
|
|
11
|
+
and LLM processing.
|
|
12
|
+
"""
|
|
13
|
+
|
|
7
14
|
from dataclasses import dataclass, field
|
|
8
15
|
from enum import Enum
|
|
9
16
|
from typing import (
|
|
@@ -21,6 +28,7 @@ from typing import (
|
|
|
21
28
|
)
|
|
22
29
|
|
|
23
30
|
from pipecat.audio.interruptions.base_interruption_strategy import BaseInterruptionStrategy
|
|
31
|
+
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
24
32
|
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
25
33
|
from pipecat.metrics.metrics import MetricsData
|
|
26
34
|
from pipecat.transcriptions.language import Language
|
|
@@ -32,7 +40,22 @@ if TYPE_CHECKING:
|
|
|
32
40
|
|
|
33
41
|
|
|
34
42
|
class KeypadEntry(str, Enum):
|
|
35
|
-
"""DTMF entries.
|
|
43
|
+
"""DTMF keypad entries for phone system integration.
|
|
44
|
+
|
|
45
|
+
Parameters:
|
|
46
|
+
ONE: Number key 1.
|
|
47
|
+
TWO: Number key 2.
|
|
48
|
+
THREE: Number key 3.
|
|
49
|
+
FOUR: Number key 4.
|
|
50
|
+
FIVE: Number key 5.
|
|
51
|
+
SIX: Number key 6.
|
|
52
|
+
SEVEN: Number key 7.
|
|
53
|
+
EIGHT: Number key 8.
|
|
54
|
+
NINE: Number key 9.
|
|
55
|
+
ZERO: Number key 0.
|
|
56
|
+
POUND: Pound/hash key (#).
|
|
57
|
+
STAR: Star/asterisk key (*).
|
|
58
|
+
"""
|
|
36
59
|
|
|
37
60
|
ONE = "1"
|
|
38
61
|
TWO = "2"
|
|
@@ -49,12 +72,31 @@ class KeypadEntry(str, Enum):
|
|
|
49
72
|
|
|
50
73
|
|
|
51
74
|
def format_pts(pts: Optional[int]):
|
|
75
|
+
"""Format presentation timestamp (PTS) in nanoseconds to a human-readable string.
|
|
76
|
+
|
|
77
|
+
Converts a PTS value in nanoseconds to a string representation.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
pts: Presentation timestamp in nanoseconds, or None if not set.
|
|
81
|
+
"""
|
|
52
82
|
return nanoseconds_to_str(pts) if pts else None
|
|
53
83
|
|
|
54
84
|
|
|
55
85
|
@dataclass
|
|
56
86
|
class Frame:
|
|
57
|
-
"""Base frame class.
|
|
87
|
+
"""Base frame class for all frames in the Pipecat pipeline.
|
|
88
|
+
|
|
89
|
+
All frames inherit from this base class and automatically receive
|
|
90
|
+
unique identifiers, names, and metadata support.
|
|
91
|
+
|
|
92
|
+
Parameters:
|
|
93
|
+
id: Unique identifier for the frame instance.
|
|
94
|
+
name: Human-readable name combining class name and instance count.
|
|
95
|
+
pts: Presentation timestamp in nanoseconds.
|
|
96
|
+
metadata: Dictionary for arbitrary frame metadata.
|
|
97
|
+
transport_source: Name of the transport source that created this frame.
|
|
98
|
+
transport_destination: Name of the transport destination for this frame.
|
|
99
|
+
"""
|
|
58
100
|
|
|
59
101
|
id: int = field(init=False)
|
|
60
102
|
name: str = field(init=False)
|
|
@@ -77,9 +119,10 @@ class Frame:
|
|
|
77
119
|
|
|
78
120
|
@dataclass
|
|
79
121
|
class SystemFrame(Frame):
|
|
80
|
-
"""System
|
|
81
|
-
frame processors and should be processed immediately.
|
|
122
|
+
"""System frame class for immediate processing.
|
|
82
123
|
|
|
124
|
+
A frame that takes higher priority than other frames. System frames are
|
|
125
|
+
handled in order and are not affected by user interruptions.
|
|
83
126
|
"""
|
|
84
127
|
|
|
85
128
|
pass
|
|
@@ -87,9 +130,11 @@ class SystemFrame(Frame):
|
|
|
87
130
|
|
|
88
131
|
@dataclass
|
|
89
132
|
class DataFrame(Frame):
|
|
90
|
-
"""Data
|
|
91
|
-
contain data such as LLM context, text, audio or images.
|
|
133
|
+
"""Data frame class for processing data in order.
|
|
92
134
|
|
|
135
|
+
A frame that is processed in order and usually contains data such as LLM
|
|
136
|
+
context, text, audio or images. Data frames are cancelled by user
|
|
137
|
+
interruptions.
|
|
93
138
|
"""
|
|
94
139
|
|
|
95
140
|
pass
|
|
@@ -97,9 +142,12 @@ class DataFrame(Frame):
|
|
|
97
142
|
|
|
98
143
|
@dataclass
|
|
99
144
|
class ControlFrame(Frame):
|
|
100
|
-
"""Control
|
|
101
|
-
|
|
102
|
-
|
|
145
|
+
"""Control frame class for processing control information in order.
|
|
146
|
+
|
|
147
|
+
A frame that, similar to data frames, is processed in order and usually
|
|
148
|
+
contains control information such as update settings or to end the pipeline
|
|
149
|
+
after everything is flushed. Control frames are cancelled by user
|
|
150
|
+
interruptions.
|
|
103
151
|
|
|
104
152
|
"""
|
|
105
153
|
|
|
@@ -113,7 +161,14 @@ class ControlFrame(Frame):
|
|
|
113
161
|
|
|
114
162
|
@dataclass
|
|
115
163
|
class AudioRawFrame:
|
|
116
|
-
"""A chunk of audio.
|
|
164
|
+
"""A frame containing a chunk of raw audio.
|
|
165
|
+
|
|
166
|
+
Parameters:
|
|
167
|
+
audio: Raw audio bytes in PCM format.
|
|
168
|
+
sample_rate: Audio sample rate in Hz.
|
|
169
|
+
num_channels: Number of audio channels.
|
|
170
|
+
num_frames: Number of audio frames (calculated automatically).
|
|
171
|
+
"""
|
|
117
172
|
|
|
118
173
|
audio: bytes
|
|
119
174
|
sample_rate: int
|
|
@@ -126,7 +181,13 @@ class AudioRawFrame:
|
|
|
126
181
|
|
|
127
182
|
@dataclass
|
|
128
183
|
class ImageRawFrame:
|
|
129
|
-
"""A raw image.
|
|
184
|
+
"""A frame containing a raw image.
|
|
185
|
+
|
|
186
|
+
Parameters:
|
|
187
|
+
image: Raw image bytes.
|
|
188
|
+
size: Image dimensions as (width, height) tuple.
|
|
189
|
+
format: Image format (e.g., 'JPEG', 'PNG').
|
|
190
|
+
"""
|
|
130
191
|
|
|
131
192
|
image: bytes
|
|
132
193
|
size: Tuple[int, int]
|
|
@@ -140,10 +201,11 @@ class ImageRawFrame:
|
|
|
140
201
|
|
|
141
202
|
@dataclass
|
|
142
203
|
class OutputAudioRawFrame(DataFrame, AudioRawFrame):
|
|
143
|
-
"""
|
|
144
|
-
transport supports multiple audio destinations (e.g. multiple audio tracks) the
|
|
145
|
-
destination name can be specified.
|
|
204
|
+
"""Audio data frame for output to transport.
|
|
146
205
|
|
|
206
|
+
A chunk of raw audio that will be played by the output transport. If the
|
|
207
|
+
transport supports multiple audio destinations (e.g. multiple audio tracks)
|
|
208
|
+
the destination name can be specified in transport_destination.
|
|
147
209
|
"""
|
|
148
210
|
|
|
149
211
|
def __post_init__(self):
|
|
@@ -157,29 +219,49 @@ class OutputAudioRawFrame(DataFrame, AudioRawFrame):
|
|
|
157
219
|
|
|
158
220
|
@dataclass
|
|
159
221
|
class OutputImageRawFrame(DataFrame, ImageRawFrame):
|
|
160
|
-
"""
|
|
161
|
-
multiple video destinations (e.g. multiple video tracks) the destination
|
|
162
|
-
name can be specified.
|
|
222
|
+
"""Image data frame for output to transport.
|
|
163
223
|
|
|
224
|
+
An image that will be shown by the transport. If the transport supports
|
|
225
|
+
multiple video destinations (e.g. multiple video tracks) the destination
|
|
226
|
+
name can be specified in transport_destination.
|
|
164
227
|
"""
|
|
165
228
|
|
|
166
229
|
def __str__(self):
|
|
167
230
|
pts = format_pts(self.pts)
|
|
168
|
-
return f"{self.name}(pts: {pts}, size: {self.size}, format: {self.format})"
|
|
231
|
+
return f"{self.name}(pts: {pts}, destination: {self.transport_destination}, size: {self.size}, format: {self.format})"
|
|
169
232
|
|
|
170
233
|
|
|
171
234
|
@dataclass
|
|
172
235
|
class TTSAudioRawFrame(OutputAudioRawFrame):
|
|
173
|
-
"""
|
|
236
|
+
"""Audio data frame generated by Text-to-Speech services.
|
|
237
|
+
|
|
238
|
+
A chunk of output audio generated by a TTS service, ready for playback.
|
|
239
|
+
"""
|
|
240
|
+
|
|
241
|
+
pass
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
@dataclass
|
|
245
|
+
class SpeechOutputAudioRawFrame(OutputAudioRawFrame):
|
|
246
|
+
"""An audio frame part of a speech audio stream.
|
|
247
|
+
|
|
248
|
+
This frame is part of a continuous stream of audio frames containing speech.
|
|
249
|
+
The audio stream might also contain silence frames, so a process to distinguish
|
|
250
|
+
between speech and silence might be needed.
|
|
251
|
+
"""
|
|
174
252
|
|
|
175
253
|
pass
|
|
176
254
|
|
|
177
255
|
|
|
178
256
|
@dataclass
|
|
179
257
|
class URLImageRawFrame(OutputImageRawFrame):
|
|
180
|
-
"""
|
|
258
|
+
"""Image frame with an associated URL.
|
|
259
|
+
|
|
260
|
+
An output image with an associated URL. These images are usually
|
|
181
261
|
generated by third-party services that provide a URL to download the image.
|
|
182
262
|
|
|
263
|
+
Parameters:
|
|
264
|
+
url: URL where the image can be downloaded from.
|
|
183
265
|
"""
|
|
184
266
|
|
|
185
267
|
url: Optional[str] = None
|
|
@@ -191,10 +273,14 @@ class URLImageRawFrame(OutputImageRawFrame):
|
|
|
191
273
|
|
|
192
274
|
@dataclass
|
|
193
275
|
class SpriteFrame(DataFrame):
|
|
194
|
-
"""
|
|
276
|
+
"""Animated sprite frame containing multiple images.
|
|
277
|
+
|
|
278
|
+
An animated sprite that will be shown by the transport if the transport's
|
|
195
279
|
camera is enabled. Will play at the framerate specified in the transport's
|
|
196
280
|
`camera_out_framerate` constructor parameter.
|
|
197
281
|
|
|
282
|
+
Parameters:
|
|
283
|
+
images: List of image frames that make up the sprite animation.
|
|
198
284
|
"""
|
|
199
285
|
|
|
200
286
|
images: List[OutputImageRawFrame]
|
|
@@ -206,9 +292,14 @@ class SpriteFrame(DataFrame):
|
|
|
206
292
|
|
|
207
293
|
@dataclass
|
|
208
294
|
class TextFrame(DataFrame):
|
|
209
|
-
"""
|
|
210
|
-
be used to send text through processors.
|
|
295
|
+
"""Text data frame for passing text through the pipeline.
|
|
211
296
|
|
|
297
|
+
A chunk of text. Emitted by LLM services, consumed by context
|
|
298
|
+
aggregators, TTS services and more. Can be used to send text
|
|
299
|
+
through processors.
|
|
300
|
+
|
|
301
|
+
Parameters:
|
|
302
|
+
text: The text content.
|
|
212
303
|
"""
|
|
213
304
|
|
|
214
305
|
text: str
|
|
@@ -220,23 +311,30 @@ class TextFrame(DataFrame):
|
|
|
220
311
|
|
|
221
312
|
@dataclass
|
|
222
313
|
class LLMTextFrame(TextFrame):
|
|
223
|
-
"""
|
|
314
|
+
"""Text frame generated by LLM services."""
|
|
224
315
|
|
|
225
316
|
pass
|
|
226
317
|
|
|
227
318
|
|
|
228
319
|
@dataclass
|
|
229
320
|
class TTSTextFrame(TextFrame):
|
|
230
|
-
"""
|
|
321
|
+
"""Text frame generated by Text-to-Speech services."""
|
|
231
322
|
|
|
232
323
|
pass
|
|
233
324
|
|
|
234
325
|
|
|
235
326
|
@dataclass
|
|
236
327
|
class TranscriptionFrame(TextFrame):
|
|
237
|
-
"""
|
|
328
|
+
"""Text frame containing speech transcription data.
|
|
329
|
+
|
|
330
|
+
A text frame with transcription-specific data. The `result` field
|
|
238
331
|
contains the result from the STT service if available.
|
|
239
332
|
|
|
333
|
+
Parameters:
|
|
334
|
+
user_id: Identifier for the user who spoke.
|
|
335
|
+
timestamp: When the transcription occurred.
|
|
336
|
+
language: Detected or specified language of the speech.
|
|
337
|
+
result: Raw result from the STT service.
|
|
240
338
|
"""
|
|
241
339
|
|
|
242
340
|
user_id: str
|
|
@@ -250,9 +348,17 @@ class TranscriptionFrame(TextFrame):
|
|
|
250
348
|
|
|
251
349
|
@dataclass
|
|
252
350
|
class InterimTranscriptionFrame(TextFrame):
|
|
253
|
-
"""
|
|
351
|
+
"""Text frame containing partial/interim transcription data.
|
|
352
|
+
|
|
353
|
+
A text frame with interim transcription-specific data that represents
|
|
354
|
+
partial results before final transcription. The `result` field
|
|
254
355
|
contains the result from the STT service if available.
|
|
255
356
|
|
|
357
|
+
Parameters:
|
|
358
|
+
user_id: Identifier for the user who spoke.
|
|
359
|
+
timestamp: When the interim transcription occurred.
|
|
360
|
+
language: Detected or specified language of the speech.
|
|
361
|
+
result: Raw result from the STT service.
|
|
256
362
|
"""
|
|
257
363
|
|
|
258
364
|
text: str
|
|
@@ -267,10 +373,15 @@ class InterimTranscriptionFrame(TextFrame):
|
|
|
267
373
|
|
|
268
374
|
@dataclass
|
|
269
375
|
class TranslationFrame(TextFrame):
|
|
270
|
-
"""
|
|
376
|
+
"""Text frame containing translated transcription data.
|
|
271
377
|
|
|
272
|
-
|
|
378
|
+
A text frame with translated transcription data that will be placed
|
|
379
|
+
in the transport's receive queue when a participant speaks.
|
|
273
380
|
|
|
381
|
+
Parameters:
|
|
382
|
+
user_id: Identifier for the user who spoke.
|
|
383
|
+
timestamp: When the translation occurred.
|
|
384
|
+
language: Target language of the translation.
|
|
274
385
|
"""
|
|
275
386
|
|
|
276
387
|
user_id: str
|
|
@@ -283,16 +394,27 @@ class TranslationFrame(TextFrame):
|
|
|
283
394
|
|
|
284
395
|
@dataclass
|
|
285
396
|
class OpenAILLMContextAssistantTimestampFrame(DataFrame):
|
|
286
|
-
"""Timestamp information for assistant
|
|
397
|
+
"""Timestamp information for assistant messages in LLM context.
|
|
398
|
+
|
|
399
|
+
Parameters:
|
|
400
|
+
timestamp: Timestamp when the assistant message was created.
|
|
401
|
+
"""
|
|
287
402
|
|
|
288
403
|
timestamp: str
|
|
289
404
|
|
|
290
405
|
|
|
291
406
|
@dataclass
|
|
292
407
|
class TranscriptionMessage:
|
|
293
|
-
"""A message in a conversation transcript
|
|
408
|
+
"""A message in a conversation transcript.
|
|
294
409
|
|
|
410
|
+
A message in a conversation transcript containing the role and content.
|
|
295
411
|
Messages are in standard format with roles normalized to user/assistant.
|
|
412
|
+
|
|
413
|
+
Parameters:
|
|
414
|
+
role: The role of the message sender (user or assistant).
|
|
415
|
+
content: The message content/text.
|
|
416
|
+
user_id: Optional identifier for the user.
|
|
417
|
+
timestamp: Optional timestamp when the message was created.
|
|
296
418
|
"""
|
|
297
419
|
|
|
298
420
|
role: Literal["user", "assistant"]
|
|
@@ -303,39 +425,46 @@ class TranscriptionMessage:
|
|
|
303
425
|
|
|
304
426
|
@dataclass
|
|
305
427
|
class TranscriptionUpdateFrame(DataFrame):
|
|
306
|
-
"""
|
|
428
|
+
"""Frame containing new messages added to conversation transcript.
|
|
307
429
|
|
|
430
|
+
A frame containing new messages added to the conversation transcript.
|
|
308
431
|
This frame is emitted when new messages are added to the conversation history,
|
|
309
432
|
containing only the newly added messages rather than the full transcript.
|
|
310
433
|
Messages have normalized roles (user/assistant) regardless of the LLM service used.
|
|
311
434
|
Messages are always in the OpenAI standard message format, which supports both:
|
|
312
435
|
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
436
|
+
Examples:
|
|
437
|
+
Simple format::
|
|
438
|
+
|
|
439
|
+
[
|
|
440
|
+
{
|
|
441
|
+
"role": "user",
|
|
442
|
+
"content": "Hi, how are you?"
|
|
443
|
+
},
|
|
444
|
+
{
|
|
445
|
+
"role": "assistant",
|
|
446
|
+
"content": "Great! And you?"
|
|
447
|
+
}
|
|
448
|
+
]
|
|
449
|
+
|
|
450
|
+
Content list format::
|
|
451
|
+
|
|
452
|
+
[
|
|
453
|
+
{
|
|
454
|
+
"role": "user",
|
|
455
|
+
"content": [{"type": "text", "text": "Hi, how are you?"}]
|
|
456
|
+
},
|
|
457
|
+
{
|
|
458
|
+
"role": "assistant",
|
|
459
|
+
"content": [{"type": "text", "text": "Great! And you?"}]
|
|
460
|
+
}
|
|
461
|
+
]
|
|
336
462
|
|
|
337
463
|
OpenAI supports both formats. Anthropic and Google messages are converted to the
|
|
338
464
|
content list format.
|
|
465
|
+
|
|
466
|
+
Parameters:
|
|
467
|
+
messages: List of new transcript messages that were added.
|
|
339
468
|
"""
|
|
340
469
|
|
|
341
470
|
messages: List[TranscriptionMessage]
|
|
@@ -347,43 +476,84 @@ class TranscriptionUpdateFrame(DataFrame):
|
|
|
347
476
|
|
|
348
477
|
@dataclass
|
|
349
478
|
class LLMMessagesFrame(DataFrame):
|
|
350
|
-
"""
|
|
479
|
+
"""Frame containing LLM messages for chat completion.
|
|
480
|
+
|
|
481
|
+
.. deprecated:: 0.0.79
|
|
482
|
+
This class is deprecated and will be removed in a future version.
|
|
483
|
+
Instead, use either:
|
|
484
|
+
- `LLMMessagesUpdateFrame` with `run_llm=True`
|
|
485
|
+
- `OpenAILLMContextFrame` with desired messages in a new context
|
|
486
|
+
|
|
487
|
+
A frame containing a list of LLM messages. Used to signal that an LLM
|
|
351
488
|
service should run a chat completion and emit an LLMFullResponseStartFrame,
|
|
352
489
|
TextFrames and an LLMFullResponseEndFrame. Note that the `messages`
|
|
353
|
-
property in this class is mutable, and will be
|
|
490
|
+
property in this class is mutable, and will be updated by various
|
|
354
491
|
aggregators.
|
|
355
492
|
|
|
493
|
+
Parameters:
|
|
494
|
+
messages: List of message dictionaries in LLM format.
|
|
356
495
|
"""
|
|
357
496
|
|
|
358
497
|
messages: List[dict]
|
|
359
498
|
|
|
499
|
+
def __post_init__(self):
|
|
500
|
+
super().__post_init__()
|
|
501
|
+
import warnings
|
|
502
|
+
|
|
503
|
+
warnings.simplefilter("always")
|
|
504
|
+
warnings.warn(
|
|
505
|
+
"LLMMessagesFrame is deprecated and will be removed in a future version. "
|
|
506
|
+
"Instead, use either "
|
|
507
|
+
"`LLMMessagesUpdateFrame` with `run_llm=True`, or "
|
|
508
|
+
"`OpenAILLMContextFrame` with desired messages in a new context",
|
|
509
|
+
DeprecationWarning,
|
|
510
|
+
stacklevel=2,
|
|
511
|
+
)
|
|
512
|
+
|
|
360
513
|
|
|
361
514
|
@dataclass
|
|
362
515
|
class LLMMessagesAppendFrame(DataFrame):
|
|
363
|
-
"""
|
|
516
|
+
"""Frame containing LLM messages to append to current context.
|
|
517
|
+
|
|
518
|
+
A frame containing a list of LLM messages that need to be added to the
|
|
364
519
|
current context.
|
|
365
520
|
|
|
521
|
+
Parameters:
|
|
522
|
+
messages: List of message dictionaries to append.
|
|
523
|
+
run_llm: Whether the context update should be sent to the LLM.
|
|
366
524
|
"""
|
|
367
525
|
|
|
368
526
|
messages: List[dict]
|
|
527
|
+
run_llm: Optional[bool] = None
|
|
369
528
|
|
|
370
529
|
|
|
371
530
|
@dataclass
|
|
372
531
|
class LLMMessagesUpdateFrame(DataFrame):
|
|
373
|
-
"""
|
|
532
|
+
"""Frame containing LLM messages to replace current context.
|
|
533
|
+
|
|
534
|
+
A frame containing a list of new LLM messages. These messages will
|
|
374
535
|
replace the current context LLM messages and should generate a new
|
|
375
536
|
LLMMessagesFrame.
|
|
376
537
|
|
|
538
|
+
Parameters:
|
|
539
|
+
messages: List of message dictionaries to replace current context.
|
|
540
|
+
run_llm: Whether the context update should be sent to the LLM.
|
|
377
541
|
"""
|
|
378
542
|
|
|
379
543
|
messages: List[dict]
|
|
544
|
+
run_llm: Optional[bool] = None
|
|
380
545
|
|
|
381
546
|
|
|
382
547
|
@dataclass
|
|
383
548
|
class LLMSetToolsFrame(DataFrame):
|
|
384
|
-
"""
|
|
549
|
+
"""Frame containing tools for LLM function calling.
|
|
550
|
+
|
|
551
|
+
A frame containing a list of tools for an LLM to use for function calling.
|
|
385
552
|
The specific format depends on the LLM being used, but it should typically
|
|
386
553
|
contain JSON Schema objects.
|
|
554
|
+
|
|
555
|
+
Parameters:
|
|
556
|
+
tools: List of tool/function definitions for the LLM.
|
|
387
557
|
"""
|
|
388
558
|
|
|
389
559
|
tools: List[dict]
|
|
@@ -391,23 +561,35 @@ class LLMSetToolsFrame(DataFrame):
|
|
|
391
561
|
|
|
392
562
|
@dataclass
|
|
393
563
|
class LLMSetToolChoiceFrame(DataFrame):
|
|
394
|
-
"""
|
|
564
|
+
"""Frame containing tool choice configuration for LLM function calling.
|
|
565
|
+
|
|
566
|
+
Parameters:
|
|
567
|
+
tool_choice: Tool choice setting - 'none', 'auto', 'required', or specific tool dict.
|
|
568
|
+
"""
|
|
395
569
|
|
|
396
570
|
tool_choice: Literal["none", "auto", "required"] | dict
|
|
397
571
|
|
|
398
572
|
|
|
399
573
|
@dataclass
|
|
400
574
|
class LLMEnablePromptCachingFrame(DataFrame):
|
|
401
|
-
"""
|
|
575
|
+
"""Frame to enable/disable prompt caching in LLMs.
|
|
576
|
+
|
|
577
|
+
Parameters:
|
|
578
|
+
enable: Whether to enable prompt caching.
|
|
579
|
+
"""
|
|
402
580
|
|
|
403
581
|
enable: bool
|
|
404
582
|
|
|
405
583
|
|
|
406
584
|
@dataclass
|
|
407
585
|
class TTSSpeakFrame(DataFrame):
|
|
408
|
-
"""
|
|
409
|
-
pipeline (if any).
|
|
586
|
+
"""Frame containing text that should be spoken by TTS.
|
|
410
587
|
|
|
588
|
+
A frame that contains text that should be spoken by the TTS service
|
|
589
|
+
in the pipeline (if any).
|
|
590
|
+
|
|
591
|
+
Parameters:
|
|
592
|
+
text: The text to be spoken.
|
|
411
593
|
"""
|
|
412
594
|
|
|
413
595
|
text: str
|
|
@@ -415,6 +597,12 @@ class TTSSpeakFrame(DataFrame):
|
|
|
415
597
|
|
|
416
598
|
@dataclass
|
|
417
599
|
class TransportMessageFrame(DataFrame):
|
|
600
|
+
"""Frame containing transport-specific message data.
|
|
601
|
+
|
|
602
|
+
Parameters:
|
|
603
|
+
message: The transport message payload.
|
|
604
|
+
"""
|
|
605
|
+
|
|
418
606
|
message: Any
|
|
419
607
|
|
|
420
608
|
def __str__(self):
|
|
@@ -423,17 +611,22 @@ class TransportMessageFrame(DataFrame):
|
|
|
423
611
|
|
|
424
612
|
@dataclass
|
|
425
613
|
class DTMFFrame:
|
|
426
|
-
"""
|
|
614
|
+
"""Base class for DTMF (Dual-Tone Multi-Frequency) keypad frames.
|
|
615
|
+
|
|
616
|
+
Parameters:
|
|
617
|
+
button: The DTMF keypad entry that was pressed.
|
|
618
|
+
"""
|
|
427
619
|
|
|
428
620
|
button: KeypadEntry
|
|
429
621
|
|
|
430
622
|
|
|
431
623
|
@dataclass
|
|
432
624
|
class OutputDTMFFrame(DTMFFrame, DataFrame):
|
|
433
|
-
"""
|
|
625
|
+
"""DTMF keypress output frame for transport queuing.
|
|
626
|
+
|
|
627
|
+
A DTMF keypress output that will be queued. If your transport supports
|
|
434
628
|
multiple dial-out destinations, use the `transport_destination` field to
|
|
435
629
|
specify where the DTMF keypress should be sent.
|
|
436
|
-
|
|
437
630
|
"""
|
|
438
631
|
|
|
439
632
|
pass
|
|
@@ -446,12 +639,27 @@ class OutputDTMFFrame(DTMFFrame, DataFrame):
|
|
|
446
639
|
|
|
447
640
|
@dataclass
|
|
448
641
|
class StartFrame(SystemFrame):
|
|
449
|
-
"""
|
|
642
|
+
"""Initial frame to start pipeline processing.
|
|
643
|
+
|
|
644
|
+
This is the first frame that should be pushed down a pipeline to
|
|
645
|
+
initialize all processors with their configuration parameters.
|
|
646
|
+
|
|
647
|
+
Parameters:
|
|
648
|
+
audio_in_sample_rate: Input audio sample rate in Hz.
|
|
649
|
+
audio_out_sample_rate: Output audio sample rate in Hz.
|
|
650
|
+
allow_interruptions: Whether to allow user interruptions.
|
|
651
|
+
enable_metrics: Whether to enable performance metrics collection.
|
|
652
|
+
enable_tracing: Whether to enable OpenTelemetry tracing.
|
|
653
|
+
enable_usage_metrics: Whether to enable usage metrics collection.
|
|
654
|
+
interruption_strategies: List of interruption handling strategies.
|
|
655
|
+
report_only_initial_ttfb: Whether to report only initial time-to-first-byte.
|
|
656
|
+
"""
|
|
450
657
|
|
|
451
658
|
audio_in_sample_rate: int = 16000
|
|
452
659
|
audio_out_sample_rate: int = 24000
|
|
453
660
|
allow_interruptions: bool = False
|
|
454
661
|
enable_metrics: bool = False
|
|
662
|
+
enable_tracing: bool = False
|
|
455
663
|
enable_usage_metrics: bool = False
|
|
456
664
|
interruption_strategies: List[BaseInterruptionStrategy] = field(default_factory=list)
|
|
457
665
|
report_only_initial_ttfb: bool = False
|
|
@@ -459,21 +667,32 @@ class StartFrame(SystemFrame):
|
|
|
459
667
|
|
|
460
668
|
@dataclass
|
|
461
669
|
class CancelFrame(SystemFrame):
|
|
462
|
-
"""
|
|
670
|
+
"""Frame indicating pipeline should stop immediately.
|
|
671
|
+
|
|
672
|
+
Indicates that a pipeline needs to stop right away without
|
|
673
|
+
processing remaining queued frames.
|
|
674
|
+
"""
|
|
463
675
|
|
|
464
676
|
pass
|
|
465
677
|
|
|
466
678
|
|
|
467
679
|
@dataclass
|
|
468
680
|
class ErrorFrame(SystemFrame):
|
|
469
|
-
"""
|
|
470
|
-
|
|
681
|
+
"""Frame notifying of errors in the pipeline.
|
|
682
|
+
|
|
683
|
+
This is used to notify upstream that an error has occurred downstream in
|
|
684
|
+
the pipeline. A fatal error indicates the error is unrecoverable and that the
|
|
471
685
|
bot should exit.
|
|
472
686
|
|
|
687
|
+
Parameters:
|
|
688
|
+
error: Description of the error that occurred.
|
|
689
|
+
fatal: Whether the error is fatal and requires bot shutdown.
|
|
690
|
+
processor: The frame processor that generated the error.
|
|
473
691
|
"""
|
|
474
692
|
|
|
475
693
|
error: str
|
|
476
694
|
fatal: bool = False
|
|
695
|
+
processor: Optional["FrameProcessor"] = None
|
|
477
696
|
|
|
478
697
|
def __str__(self):
|
|
479
698
|
return f"{self.name}(error: {self.error}, fatal: {self.fatal})"
|
|
@@ -481,9 +700,13 @@ class ErrorFrame(SystemFrame):
|
|
|
481
700
|
|
|
482
701
|
@dataclass
|
|
483
702
|
class FatalErrorFrame(ErrorFrame):
|
|
484
|
-
"""
|
|
485
|
-
that the bot should exit.
|
|
703
|
+
"""Frame notifying of unrecoverable errors requiring bot shutdown.
|
|
486
704
|
|
|
705
|
+
This is used to notify upstream that an unrecoverable error has occurred and
|
|
706
|
+
that the bot should exit immediately.
|
|
707
|
+
|
|
708
|
+
Parameters:
|
|
709
|
+
fatal: Always True for fatal errors.
|
|
487
710
|
"""
|
|
488
711
|
|
|
489
712
|
fatal: bool = field(default=True, init=False)
|
|
@@ -491,10 +714,11 @@ class FatalErrorFrame(ErrorFrame):
|
|
|
491
714
|
|
|
492
715
|
@dataclass
|
|
493
716
|
class EndTaskFrame(SystemFrame):
|
|
494
|
-
"""
|
|
495
|
-
closed nicely (flushing all the queued frames) by pushing an EndFrame
|
|
496
|
-
downstream.
|
|
717
|
+
"""Frame to request graceful pipeline task closure.
|
|
497
718
|
|
|
719
|
+
This is used to notify the pipeline task that the pipeline should be
|
|
720
|
+
closed nicely (flushing all the queued frames) by pushing an EndFrame
|
|
721
|
+
downstream. This frame should be pushed upstream.
|
|
498
722
|
"""
|
|
499
723
|
|
|
500
724
|
pass
|
|
@@ -502,9 +726,11 @@ class EndTaskFrame(SystemFrame):
|
|
|
502
726
|
|
|
503
727
|
@dataclass
|
|
504
728
|
class CancelTaskFrame(SystemFrame):
|
|
505
|
-
"""
|
|
506
|
-
stopped immediately by pushing a CancelFrame downstream.
|
|
729
|
+
"""Frame to request immediate pipeline task cancellation.
|
|
507
730
|
|
|
731
|
+
This is used to notify the pipeline task that the pipeline should be
|
|
732
|
+
stopped immediately by pushing a CancelFrame downstream. This frame
|
|
733
|
+
should be pushed upstream.
|
|
508
734
|
"""
|
|
509
735
|
|
|
510
736
|
pass
|
|
@@ -512,10 +738,12 @@ class CancelTaskFrame(SystemFrame):
|
|
|
512
738
|
|
|
513
739
|
@dataclass
|
|
514
740
|
class StopTaskFrame(SystemFrame):
|
|
515
|
-
"""
|
|
516
|
-
soon as possible (flushing all the queued frames) but that the pipeline
|
|
517
|
-
processors should be kept in a running state.
|
|
741
|
+
"""Frame to request pipeline task stop while keeping processors running.
|
|
518
742
|
|
|
743
|
+
This is used to notify the pipeline task that it should be stopped as
|
|
744
|
+
soon as possible (flushing all the queued frames) but that the pipeline
|
|
745
|
+
processors should be kept in a running state. This frame should be pushed
|
|
746
|
+
upstream.
|
|
519
747
|
"""
|
|
520
748
|
|
|
521
749
|
pass
|
|
@@ -523,11 +751,15 @@ class StopTaskFrame(SystemFrame):
|
|
|
523
751
|
|
|
524
752
|
@dataclass
|
|
525
753
|
class FrameProcessorPauseUrgentFrame(SystemFrame):
|
|
526
|
-
"""
|
|
754
|
+
"""Frame to pause frame processing immediately.
|
|
755
|
+
|
|
756
|
+
This frame is used to pause frame processing for the given processor as
|
|
527
757
|
fast as possible. Pausing frame processing will keep frames in the internal
|
|
528
758
|
queue which will then be processed when frame processing is resumed with
|
|
529
759
|
`FrameProcessorResumeFrame`.
|
|
530
760
|
|
|
761
|
+
Parameters:
|
|
762
|
+
processor: The frame processor to pause.
|
|
531
763
|
"""
|
|
532
764
|
|
|
533
765
|
processor: "FrameProcessor"
|
|
@@ -535,10 +767,14 @@ class FrameProcessorPauseUrgentFrame(SystemFrame):
|
|
|
535
767
|
|
|
536
768
|
@dataclass
|
|
537
769
|
class FrameProcessorResumeUrgentFrame(SystemFrame):
|
|
538
|
-
"""
|
|
770
|
+
"""Frame to resume frame processing immediately.
|
|
771
|
+
|
|
772
|
+
This frame is used to resume frame processing for the given processor
|
|
539
773
|
if it was previously paused as fast as possible. After resuming frame
|
|
540
774
|
processing all queued frames will be processed in the order received.
|
|
541
775
|
|
|
776
|
+
Parameters:
|
|
777
|
+
processor: The frame processor to resume.
|
|
542
778
|
"""
|
|
543
779
|
|
|
544
780
|
processor: "FrameProcessor"
|
|
@@ -546,11 +782,12 @@ class FrameProcessorResumeUrgentFrame(SystemFrame):
|
|
|
546
782
|
|
|
547
783
|
@dataclass
|
|
548
784
|
class StartInterruptionFrame(SystemFrame):
|
|
549
|
-
"""
|
|
550
|
-
interruption). This is similar to UserStartedSpeakingFrame except that it
|
|
551
|
-
should be pushed concurrently with other frames (so the order is not
|
|
552
|
-
guaranteed).
|
|
785
|
+
"""Frame indicating user started speaking (interruption detected).
|
|
553
786
|
|
|
787
|
+
Emitted by the BaseInputTransport to indicate that a user has started
|
|
788
|
+
speaking (i.e. is interrupting). This is similar to
|
|
789
|
+
UserStartedSpeakingFrame except that it should be pushed concurrently
|
|
790
|
+
with other frames (so the order is not guaranteed).
|
|
554
791
|
"""
|
|
555
792
|
|
|
556
793
|
pass
|
|
@@ -558,11 +795,12 @@ class StartInterruptionFrame(SystemFrame):
|
|
|
558
795
|
|
|
559
796
|
@dataclass
|
|
560
797
|
class StopInterruptionFrame(SystemFrame):
|
|
561
|
-
"""
|
|
562
|
-
interruptions). This is similar to UserStoppedSpeakingFrame except that it
|
|
563
|
-
should be pushed concurrently with other frames (so the order is not
|
|
564
|
-
guaranteed).
|
|
798
|
+
"""Frame indicating user stopped speaking (interruption ended).
|
|
565
799
|
|
|
800
|
+
Emitted by the BaseInputTransport to indicate that a user has stopped
|
|
801
|
+
speaking (i.e. no more interruptions). This is similar to
|
|
802
|
+
UserStoppedSpeakingFrame except that it should be pushed concurrently
|
|
803
|
+
with other frames (so the order is not guaranteed).
|
|
566
804
|
"""
|
|
567
805
|
|
|
568
806
|
pass
|
|
@@ -570,11 +808,15 @@ class StopInterruptionFrame(SystemFrame):
|
|
|
570
808
|
|
|
571
809
|
@dataclass
|
|
572
810
|
class UserStartedSpeakingFrame(SystemFrame):
|
|
573
|
-
"""
|
|
811
|
+
"""Frame indicating user has started speaking.
|
|
812
|
+
|
|
813
|
+
Emitted by VAD to indicate that a user has started speaking. This can be
|
|
574
814
|
used for interruptions or other times when detecting that someone is
|
|
575
815
|
speaking is more important than knowing what they're saying (as you will
|
|
576
|
-
with a TranscriptionFrame)
|
|
816
|
+
get with a TranscriptionFrame).
|
|
577
817
|
|
|
818
|
+
Parameters:
|
|
819
|
+
emulated: Whether this event was emulated rather than detected by VAD.
|
|
578
820
|
"""
|
|
579
821
|
|
|
580
822
|
emulated: bool = False
|
|
@@ -582,14 +824,22 @@ class UserStartedSpeakingFrame(SystemFrame):
|
|
|
582
824
|
|
|
583
825
|
@dataclass
|
|
584
826
|
class UserStoppedSpeakingFrame(SystemFrame):
|
|
585
|
-
"""
|
|
827
|
+
"""Frame indicating user has stopped speaking.
|
|
828
|
+
|
|
829
|
+
Emitted by the VAD to indicate that a user stopped speaking.
|
|
830
|
+
|
|
831
|
+
Parameters:
|
|
832
|
+
emulated: Whether this event was emulated rather than detected by VAD.
|
|
833
|
+
"""
|
|
586
834
|
|
|
587
835
|
emulated: bool = False
|
|
588
836
|
|
|
589
837
|
|
|
590
838
|
@dataclass
|
|
591
839
|
class EmulateUserStartedSpeakingFrame(SystemFrame):
|
|
592
|
-
"""
|
|
840
|
+
"""Frame to emulate user started speaking behavior.
|
|
841
|
+
|
|
842
|
+
Emitted by internal processors upstream to emulate VAD behavior when a
|
|
593
843
|
user starts speaking.
|
|
594
844
|
"""
|
|
595
845
|
|
|
@@ -598,7 +848,9 @@ class EmulateUserStartedSpeakingFrame(SystemFrame):
|
|
|
598
848
|
|
|
599
849
|
@dataclass
|
|
600
850
|
class EmulateUserStoppedSpeakingFrame(SystemFrame):
|
|
601
|
-
"""
|
|
851
|
+
"""Frame to emulate user stopped speaking behavior.
|
|
852
|
+
|
|
853
|
+
Emitted by internal processors upstream to emulate VAD behavior when a
|
|
602
854
|
user stops speaking.
|
|
603
855
|
"""
|
|
604
856
|
|
|
@@ -607,24 +859,27 @@ class EmulateUserStoppedSpeakingFrame(SystemFrame):
|
|
|
607
859
|
|
|
608
860
|
@dataclass
|
|
609
861
|
class VADUserStartedSpeakingFrame(SystemFrame):
|
|
610
|
-
"""Frame emitted when VAD detects
|
|
862
|
+
"""Frame emitted when VAD definitively detects user started speaking."""
|
|
611
863
|
|
|
612
864
|
pass
|
|
613
865
|
|
|
614
866
|
|
|
615
867
|
@dataclass
|
|
616
868
|
class VADUserStoppedSpeakingFrame(SystemFrame):
|
|
617
|
-
"""Frame emitted when VAD detects
|
|
869
|
+
"""Frame emitted when VAD definitively detects user stopped speaking."""
|
|
618
870
|
|
|
619
871
|
pass
|
|
620
872
|
|
|
621
873
|
|
|
622
874
|
@dataclass
|
|
623
875
|
class BotInterruptionFrame(SystemFrame):
|
|
624
|
-
"""
|
|
876
|
+
"""Frame indicating the bot should be interrupted.
|
|
877
|
+
|
|
878
|
+
Emitted when the bot should be interrupted. This will mainly cause the
|
|
625
879
|
same actions as if the user interrupted except that the
|
|
626
880
|
UserStartedSpeakingFrame and UserStoppedSpeakingFrame won't be generated.
|
|
627
|
-
|
|
881
|
+
This frame should be pushed upstreams. It results in the BaseInputTransport
|
|
882
|
+
starting an interruption by pushing a StartInterruptionFrame downstream.
|
|
628
883
|
"""
|
|
629
884
|
|
|
630
885
|
pass
|
|
@@ -632,25 +887,34 @@ class BotInterruptionFrame(SystemFrame):
|
|
|
632
887
|
|
|
633
888
|
@dataclass
|
|
634
889
|
class BotStartedSpeakingFrame(SystemFrame):
|
|
635
|
-
"""
|
|
890
|
+
"""Frame indicating the bot started speaking.
|
|
891
|
+
|
|
892
|
+
Emitted upstream and downstream by the BaseTransportOutput to indicate the
|
|
893
|
+
bot started speaking.
|
|
894
|
+
"""
|
|
636
895
|
|
|
637
896
|
pass
|
|
638
897
|
|
|
639
898
|
|
|
640
899
|
@dataclass
|
|
641
900
|
class BotStoppedSpeakingFrame(SystemFrame):
|
|
642
|
-
"""
|
|
901
|
+
"""Frame indicating the bot stopped speaking.
|
|
902
|
+
|
|
903
|
+
Emitted upstream and downstream by the BaseTransportOutput to indicate the
|
|
904
|
+
bot stopped speaking.
|
|
905
|
+
"""
|
|
643
906
|
|
|
644
907
|
pass
|
|
645
908
|
|
|
646
909
|
|
|
647
910
|
@dataclass
|
|
648
911
|
class BotSpeakingFrame(SystemFrame):
|
|
649
|
-
"""
|
|
650
|
-
speaking. This can be used, for example, to detect when a user is idle. That
|
|
651
|
-
is, while the bot is speaking we don't want to trigger any user idle timeout
|
|
652
|
-
since the user might be listening.
|
|
912
|
+
"""Frame indicating the bot is currently speaking.
|
|
653
913
|
|
|
914
|
+
Emitted upstream and downstream by the BaseOutputTransport while the bot is
|
|
915
|
+
still speaking. This can be used, for example, to detect when a user is
|
|
916
|
+
idle. That is, while the bot is speaking we don't want to trigger any user
|
|
917
|
+
idle timeout since the user might be listening.
|
|
654
918
|
"""
|
|
655
919
|
|
|
656
920
|
pass
|
|
@@ -658,21 +922,28 @@ class BotSpeakingFrame(SystemFrame):
|
|
|
658
922
|
|
|
659
923
|
@dataclass
|
|
660
924
|
class MetricsFrame(SystemFrame):
|
|
661
|
-
"""
|
|
925
|
+
"""Frame containing performance metrics data.
|
|
926
|
+
|
|
927
|
+
Emitted by processors that can compute metrics like latencies.
|
|
928
|
+
|
|
929
|
+
Parameters:
|
|
930
|
+
data: List of metrics data collected by the processor.
|
|
931
|
+
"""
|
|
662
932
|
|
|
663
933
|
data: List[MetricsData]
|
|
664
934
|
|
|
665
935
|
|
|
666
936
|
@dataclass
|
|
667
937
|
class FunctionCallFromLLM:
|
|
668
|
-
"""Represents a function call returned by the LLM
|
|
938
|
+
"""Represents a function call returned by the LLM.
|
|
669
939
|
|
|
670
|
-
|
|
671
|
-
function_name (str): The name of the function.
|
|
672
|
-
tool_call_id (str): A unique identifier for the function call.
|
|
673
|
-
arguments (Mapping[str, Any]): The arguments for the function.
|
|
674
|
-
context (OpenAILLMContext): The LLM context.
|
|
940
|
+
Represents a function call returned by the LLM to be registered for execution.
|
|
675
941
|
|
|
942
|
+
Parameters:
|
|
943
|
+
function_name: The name of the function to call.
|
|
944
|
+
tool_call_id: A unique identifier for the function call.
|
|
945
|
+
arguments: The arguments to pass to the function.
|
|
946
|
+
context: The LLM context when the function call was made.
|
|
676
947
|
"""
|
|
677
948
|
|
|
678
949
|
function_name: str
|
|
@@ -683,15 +954,28 @@ class FunctionCallFromLLM:
|
|
|
683
954
|
|
|
684
955
|
@dataclass
|
|
685
956
|
class FunctionCallsStartedFrame(SystemFrame):
|
|
686
|
-
"""
|
|
687
|
-
|
|
957
|
+
"""Frame signaling that function call execution is starting.
|
|
958
|
+
|
|
959
|
+
A frame signaling that one or more function call execution is going to
|
|
960
|
+
start.
|
|
961
|
+
|
|
962
|
+
Parameters:
|
|
963
|
+
function_calls: Sequence of function calls that will be executed.
|
|
964
|
+
"""
|
|
688
965
|
|
|
689
966
|
function_calls: Sequence[FunctionCallFromLLM]
|
|
690
967
|
|
|
691
968
|
|
|
692
969
|
@dataclass
|
|
693
970
|
class FunctionCallInProgressFrame(SystemFrame):
|
|
694
|
-
"""
|
|
971
|
+
"""Frame signaling that a function call is currently executing.
|
|
972
|
+
|
|
973
|
+
Parameters:
|
|
974
|
+
function_name: Name of the function being executed.
|
|
975
|
+
tool_call_id: Unique identifier for this function call.
|
|
976
|
+
arguments: Arguments passed to the function.
|
|
977
|
+
cancel_on_interruption: Whether to cancel this call if interrupted.
|
|
978
|
+
"""
|
|
695
979
|
|
|
696
980
|
function_name: str
|
|
697
981
|
tool_call_id: str
|
|
@@ -701,7 +985,12 @@ class FunctionCallInProgressFrame(SystemFrame):
|
|
|
701
985
|
|
|
702
986
|
@dataclass
|
|
703
987
|
class FunctionCallCancelFrame(SystemFrame):
|
|
704
|
-
"""
|
|
988
|
+
"""Frame signaling that a function call has been cancelled.
|
|
989
|
+
|
|
990
|
+
Parameters:
|
|
991
|
+
function_name: Name of the function that was cancelled.
|
|
992
|
+
tool_call_id: Unique identifier for the cancelled function call.
|
|
993
|
+
"""
|
|
705
994
|
|
|
706
995
|
function_name: str
|
|
707
996
|
tool_call_id: str
|
|
@@ -709,7 +998,12 @@ class FunctionCallCancelFrame(SystemFrame):
|
|
|
709
998
|
|
|
710
999
|
@dataclass
|
|
711
1000
|
class FunctionCallResultProperties:
|
|
712
|
-
"""Properties for
|
|
1001
|
+
"""Properties for configuring function call result behavior.
|
|
1002
|
+
|
|
1003
|
+
Parameters:
|
|
1004
|
+
run_llm: Whether to run the LLM after receiving this result.
|
|
1005
|
+
on_context_updated: Callback to execute when context is updated.
|
|
1006
|
+
"""
|
|
713
1007
|
|
|
714
1008
|
run_llm: Optional[bool] = None
|
|
715
1009
|
on_context_updated: Optional[Callable[[], Awaitable[None]]] = None
|
|
@@ -717,7 +1011,16 @@ class FunctionCallResultProperties:
|
|
|
717
1011
|
|
|
718
1012
|
@dataclass
|
|
719
1013
|
class FunctionCallResultFrame(SystemFrame):
|
|
720
|
-
"""
|
|
1014
|
+
"""Frame containing the result of an LLM function call.
|
|
1015
|
+
|
|
1016
|
+
Parameters:
|
|
1017
|
+
function_name: Name of the function that was executed.
|
|
1018
|
+
tool_call_id: Unique identifier for the function call.
|
|
1019
|
+
arguments: Arguments that were passed to the function.
|
|
1020
|
+
result: The result returned by the function.
|
|
1021
|
+
run_llm: Whether to run the LLM after this result.
|
|
1022
|
+
properties: Additional properties for result handling.
|
|
1023
|
+
"""
|
|
721
1024
|
|
|
722
1025
|
function_name: str
|
|
723
1026
|
tool_call_id: str
|
|
@@ -729,13 +1032,23 @@ class FunctionCallResultFrame(SystemFrame):
|
|
|
729
1032
|
|
|
730
1033
|
@dataclass
|
|
731
1034
|
class STTMuteFrame(SystemFrame):
|
|
732
|
-
"""
|
|
1035
|
+
"""Frame to mute/unmute the Speech-to-Text service.
|
|
1036
|
+
|
|
1037
|
+
Parameters:
|
|
1038
|
+
mute: Whether to mute (True) or unmute (False) the STT service.
|
|
1039
|
+
"""
|
|
733
1040
|
|
|
734
1041
|
mute: bool
|
|
735
1042
|
|
|
736
1043
|
|
|
737
1044
|
@dataclass
|
|
738
1045
|
class TransportMessageUrgentFrame(SystemFrame):
|
|
1046
|
+
"""Frame for urgent transport messages that need immediate processing.
|
|
1047
|
+
|
|
1048
|
+
Parameters:
|
|
1049
|
+
message: The urgent transport message payload.
|
|
1050
|
+
"""
|
|
1051
|
+
|
|
739
1052
|
message: Any
|
|
740
1053
|
|
|
741
1054
|
def __str__(self):
|
|
@@ -744,10 +1057,18 @@ class TransportMessageUrgentFrame(SystemFrame):
|
|
|
744
1057
|
|
|
745
1058
|
@dataclass
|
|
746
1059
|
class UserImageRequestFrame(SystemFrame):
|
|
747
|
-
"""
|
|
1060
|
+
"""Frame requesting an image from a specific user.
|
|
1061
|
+
|
|
1062
|
+
A frame to request an image from the given user. The frame might be
|
|
748
1063
|
generated by a function call in which case the corresponding fields will be
|
|
749
1064
|
properly set.
|
|
750
1065
|
|
|
1066
|
+
Parameters:
|
|
1067
|
+
user_id: Identifier of the user to request image from.
|
|
1068
|
+
context: Optional context for the image request.
|
|
1069
|
+
function_name: Name of function that generated this request (if any).
|
|
1070
|
+
tool_call_id: Tool call ID if generated by function call.
|
|
1071
|
+
video_source: Specific video source to capture from.
|
|
751
1072
|
"""
|
|
752
1073
|
|
|
753
1074
|
user_id: str
|
|
@@ -762,10 +1083,11 @@ class UserImageRequestFrame(SystemFrame):
|
|
|
762
1083
|
|
|
763
1084
|
@dataclass
|
|
764
1085
|
class InputAudioRawFrame(SystemFrame, AudioRawFrame):
|
|
765
|
-
"""
|
|
766
|
-
supports multiple audio sources (e.g. multiple audio tracks) the source name
|
|
767
|
-
will be specified.
|
|
1086
|
+
"""Raw audio input frame from transport.
|
|
768
1087
|
|
|
1088
|
+
A chunk of audio usually coming from an input transport. If the transport
|
|
1089
|
+
supports multiple audio sources (e.g. multiple audio tracks) the source name
|
|
1090
|
+
will be specified in transport_source.
|
|
769
1091
|
"""
|
|
770
1092
|
|
|
771
1093
|
def __post_init__(self):
|
|
@@ -779,10 +1101,11 @@ class InputAudioRawFrame(SystemFrame, AudioRawFrame):
|
|
|
779
1101
|
|
|
780
1102
|
@dataclass
|
|
781
1103
|
class InputImageRawFrame(SystemFrame, ImageRawFrame):
|
|
782
|
-
"""
|
|
783
|
-
supports multiple video sources (e.g. multiple video tracks) the source name
|
|
784
|
-
will be specified.
|
|
1104
|
+
"""Raw image input frame from transport.
|
|
785
1105
|
|
|
1106
|
+
An image usually coming from an input transport. If the transport
|
|
1107
|
+
supports multiple video sources (e.g. multiple video tracks) the source name
|
|
1108
|
+
will be specified in transport_source.
|
|
786
1109
|
"""
|
|
787
1110
|
|
|
788
1111
|
def __str__(self):
|
|
@@ -790,9 +1113,29 @@ class InputImageRawFrame(SystemFrame, ImageRawFrame):
|
|
|
790
1113
|
return f"{self.name}(pts: {pts}, source: {self.transport_source}, size: {self.size}, format: {self.format})"
|
|
791
1114
|
|
|
792
1115
|
|
|
1116
|
+
@dataclass
|
|
1117
|
+
class InputTextRawFrame(SystemFrame, TextFrame):
|
|
1118
|
+
"""Raw text input frame from transport.
|
|
1119
|
+
|
|
1120
|
+
Text input usually coming from user typing or programmatic text injection
|
|
1121
|
+
that should be sent to LLM services as input, similar to how InputAudioRawFrame
|
|
1122
|
+
and InputImageRawFrame represent user audio and video input.
|
|
1123
|
+
"""
|
|
1124
|
+
|
|
1125
|
+
def __str__(self):
|
|
1126
|
+
pts = format_pts(self.pts)
|
|
1127
|
+
return f"{self.name}(pts: {pts}, source: {self.transport_source}, text: [{self.text}])"
|
|
1128
|
+
|
|
1129
|
+
|
|
793
1130
|
@dataclass
|
|
794
1131
|
class UserAudioRawFrame(InputAudioRawFrame):
|
|
795
|
-
"""
|
|
1132
|
+
"""Raw audio input frame associated with a specific user.
|
|
1133
|
+
|
|
1134
|
+
A chunk of audio, usually coming from an input transport, associated to a user.
|
|
1135
|
+
|
|
1136
|
+
Parameters:
|
|
1137
|
+
user_id: Identifier of the user who provided this audio.
|
|
1138
|
+
"""
|
|
796
1139
|
|
|
797
1140
|
user_id: str = ""
|
|
798
1141
|
|
|
@@ -803,7 +1146,14 @@ class UserAudioRawFrame(InputAudioRawFrame):
|
|
|
803
1146
|
|
|
804
1147
|
@dataclass
|
|
805
1148
|
class UserImageRawFrame(InputImageRawFrame):
|
|
806
|
-
"""
|
|
1149
|
+
"""Raw image input frame associated with a specific user.
|
|
1150
|
+
|
|
1151
|
+
An image associated to a user, potentially in response to an image request.
|
|
1152
|
+
|
|
1153
|
+
Parameters:
|
|
1154
|
+
user_id: Identifier of the user who provided this image.
|
|
1155
|
+
request: The original image request frame if this is a response.
|
|
1156
|
+
"""
|
|
807
1157
|
|
|
808
1158
|
user_id: str = ""
|
|
809
1159
|
request: Optional[UserImageRequestFrame] = None
|
|
@@ -815,7 +1165,13 @@ class UserImageRawFrame(InputImageRawFrame):
|
|
|
815
1165
|
|
|
816
1166
|
@dataclass
|
|
817
1167
|
class VisionImageRawFrame(InputImageRawFrame):
|
|
818
|
-
"""
|
|
1168
|
+
"""Image frame for vision/image analysis with associated text prompt.
|
|
1169
|
+
|
|
1170
|
+
An image with an associated text to ask for a description of it.
|
|
1171
|
+
|
|
1172
|
+
Parameters:
|
|
1173
|
+
text: Optional text prompt describing what to analyze in the image.
|
|
1174
|
+
"""
|
|
819
1175
|
|
|
820
1176
|
text: Optional[str] = None
|
|
821
1177
|
|
|
@@ -826,22 +1182,40 @@ class VisionImageRawFrame(InputImageRawFrame):
|
|
|
826
1182
|
|
|
827
1183
|
@dataclass
|
|
828
1184
|
class InputDTMFFrame(DTMFFrame, SystemFrame):
|
|
829
|
-
"""
|
|
1185
|
+
"""DTMF keypress input frame from transport."""
|
|
830
1186
|
|
|
831
1187
|
pass
|
|
832
1188
|
|
|
833
1189
|
|
|
834
1190
|
@dataclass
|
|
835
1191
|
class OutputDTMFUrgentFrame(DTMFFrame, SystemFrame):
|
|
836
|
-
"""
|
|
1192
|
+
"""DTMF keypress output frame for immediate sending.
|
|
1193
|
+
|
|
1194
|
+
A DTMF keypress output that will be sent right away. If your transport
|
|
837
1195
|
supports multiple dial-out destinations, use the `transport_destination`
|
|
838
1196
|
field to specify where the DTMF keypress should be sent.
|
|
839
|
-
|
|
840
1197
|
"""
|
|
841
1198
|
|
|
842
1199
|
pass
|
|
843
1200
|
|
|
844
1201
|
|
|
1202
|
+
@dataclass
|
|
1203
|
+
class SpeechControlParamsFrame(SystemFrame):
|
|
1204
|
+
"""Frame for notifying processors of speech control parameter changes.
|
|
1205
|
+
|
|
1206
|
+
This includes parameters for both VAD (Voice Activity Detection) and
|
|
1207
|
+
turn-taking analysis. It allows downstream processors to adjust their
|
|
1208
|
+
behavior based on updated interaction control settings.
|
|
1209
|
+
|
|
1210
|
+
Parameters:
|
|
1211
|
+
vad_params: Current VAD parameters.
|
|
1212
|
+
turn_params: Current turn-taking analysis parameters.
|
|
1213
|
+
"""
|
|
1214
|
+
|
|
1215
|
+
vad_params: Optional[VADParams] = None
|
|
1216
|
+
turn_params: Optional[SmartTurnParams] = None
|
|
1217
|
+
|
|
1218
|
+
|
|
845
1219
|
#
|
|
846
1220
|
# Control frames
|
|
847
1221
|
#
|
|
@@ -849,12 +1223,13 @@ class OutputDTMFUrgentFrame(DTMFFrame, SystemFrame):
|
|
|
849
1223
|
|
|
850
1224
|
@dataclass
|
|
851
1225
|
class EndFrame(ControlFrame):
|
|
852
|
-
"""
|
|
1226
|
+
"""Frame indicating pipeline has ended and should shut down.
|
|
1227
|
+
|
|
1228
|
+
Indicates that a pipeline has ended and frame processors and pipelines
|
|
853
1229
|
should be shut down. If the transport receives this frame, it will stop
|
|
854
1230
|
sending frames to its output channel(s) and close all its threads. Note,
|
|
855
|
-
that this is a control frame, which means it will received in the order it
|
|
856
|
-
was sent
|
|
857
|
-
|
|
1231
|
+
that this is a control frame, which means it will be received in the order it
|
|
1232
|
+
was sent.
|
|
858
1233
|
"""
|
|
859
1234
|
|
|
860
1235
|
pass
|
|
@@ -862,10 +1237,21 @@ class EndFrame(ControlFrame):
|
|
|
862
1237
|
|
|
863
1238
|
@dataclass
|
|
864
1239
|
class StopFrame(ControlFrame):
|
|
865
|
-
"""
|
|
1240
|
+
"""Frame indicating pipeline should stop but keep processors running.
|
|
1241
|
+
|
|
1242
|
+
Indicates that a pipeline should be stopped but that the pipeline
|
|
866
1243
|
processors should be kept in a running state. This is normally queued from
|
|
867
1244
|
the pipeline task.
|
|
1245
|
+
"""
|
|
1246
|
+
|
|
1247
|
+
pass
|
|
1248
|
+
|
|
868
1249
|
|
|
1250
|
+
@dataclass
|
|
1251
|
+
class OutputTransportReadyFrame(ControlFrame):
|
|
1252
|
+
"""Frame indicating that the output transport is ready.
|
|
1253
|
+
|
|
1254
|
+
Indicates that the output transport is ready and able to receive frames.
|
|
869
1255
|
"""
|
|
870
1256
|
|
|
871
1257
|
pass
|
|
@@ -873,9 +1259,13 @@ class StopFrame(ControlFrame):
|
|
|
873
1259
|
|
|
874
1260
|
@dataclass
|
|
875
1261
|
class HeartbeatFrame(ControlFrame):
|
|
876
|
-
"""
|
|
1262
|
+
"""Frame used by pipeline task to monitor pipeline health.
|
|
1263
|
+
|
|
1264
|
+
This frame is used by the pipeline task as a mechanism to know if the
|
|
877
1265
|
pipeline is running properly.
|
|
878
1266
|
|
|
1267
|
+
Parameters:
|
|
1268
|
+
timestamp: Timestamp when the heartbeat was generated.
|
|
879
1269
|
"""
|
|
880
1270
|
|
|
881
1271
|
timestamp: int
|
|
@@ -883,11 +1273,15 @@ class HeartbeatFrame(ControlFrame):
|
|
|
883
1273
|
|
|
884
1274
|
@dataclass
|
|
885
1275
|
class FrameProcessorPauseFrame(ControlFrame):
|
|
886
|
-
"""
|
|
1276
|
+
"""Frame to pause frame processing for a specific processor.
|
|
1277
|
+
|
|
1278
|
+
This frame is used to pause frame processing for the given
|
|
887
1279
|
processor. Pausing frame processing will keep frames in the internal queue
|
|
888
1280
|
which will then be processed when frame processing is resumed with
|
|
889
1281
|
`FrameProcessorResumeFrame`.
|
|
890
1282
|
|
|
1283
|
+
Parameters:
|
|
1284
|
+
processor: The frame processor to pause.
|
|
891
1285
|
"""
|
|
892
1286
|
|
|
893
1287
|
processor: "FrameProcessor"
|
|
@@ -895,10 +1289,14 @@ class FrameProcessorPauseFrame(ControlFrame):
|
|
|
895
1289
|
|
|
896
1290
|
@dataclass
|
|
897
1291
|
class FrameProcessorResumeFrame(ControlFrame):
|
|
898
|
-
"""
|
|
1292
|
+
"""Frame to resume frame processing for a specific processor.
|
|
1293
|
+
|
|
1294
|
+
This frame is used to resume frame processing for the given processor if
|
|
899
1295
|
it was previously paused. After resuming frame processing all queued frames
|
|
900
1296
|
will be processed in the order received.
|
|
901
1297
|
|
|
1298
|
+
Parameters:
|
|
1299
|
+
processor: The frame processor to resume.
|
|
902
1300
|
"""
|
|
903
1301
|
|
|
904
1302
|
processor: "FrameProcessor"
|
|
@@ -906,8 +1304,10 @@ class FrameProcessorResumeFrame(ControlFrame):
|
|
|
906
1304
|
|
|
907
1305
|
@dataclass
|
|
908
1306
|
class LLMFullResponseStartFrame(ControlFrame):
|
|
909
|
-
"""
|
|
910
|
-
|
|
1307
|
+
"""Frame indicating the beginning of an LLM response.
|
|
1308
|
+
|
|
1309
|
+
Used to indicate the beginning of an LLM response. Followed by one or
|
|
1310
|
+
more TextFrames and a final LLMFullResponseEndFrame.
|
|
911
1311
|
"""
|
|
912
1312
|
|
|
913
1313
|
pass
|
|
@@ -915,19 +1315,20 @@ class LLMFullResponseStartFrame(ControlFrame):
|
|
|
915
1315
|
|
|
916
1316
|
@dataclass
|
|
917
1317
|
class LLMFullResponseEndFrame(ControlFrame):
|
|
918
|
-
"""
|
|
1318
|
+
"""Frame indicating the end of an LLM response."""
|
|
919
1319
|
|
|
920
1320
|
pass
|
|
921
1321
|
|
|
922
1322
|
|
|
923
1323
|
@dataclass
|
|
924
1324
|
class TTSStartedFrame(ControlFrame):
|
|
925
|
-
"""
|
|
926
|
-
|
|
1325
|
+
"""Frame indicating the beginning of a TTS response.
|
|
1326
|
+
|
|
1327
|
+
Used to indicate the beginning of a TTS response. Following
|
|
1328
|
+
TTSAudioRawFrames are part of the TTS response until a
|
|
927
1329
|
TTSStoppedFrame. These frames can be used for aggregating audio frames in a
|
|
928
1330
|
transport to optimize the size of frames sent to the session, without
|
|
929
1331
|
needing to control this in the TTS service.
|
|
930
|
-
|
|
931
1332
|
"""
|
|
932
1333
|
|
|
933
1334
|
pass
|
|
@@ -935,37 +1336,54 @@ class TTSStartedFrame(ControlFrame):
|
|
|
935
1336
|
|
|
936
1337
|
@dataclass
|
|
937
1338
|
class TTSStoppedFrame(ControlFrame):
|
|
938
|
-
"""
|
|
1339
|
+
"""Frame indicating the end of a TTS response."""
|
|
939
1340
|
|
|
940
1341
|
pass
|
|
941
1342
|
|
|
942
1343
|
|
|
943
1344
|
@dataclass
|
|
944
1345
|
class ServiceUpdateSettingsFrame(ControlFrame):
|
|
945
|
-
"""
|
|
1346
|
+
"""Base frame for updating service settings.
|
|
1347
|
+
|
|
1348
|
+
A control frame containing a request to update service settings.
|
|
1349
|
+
|
|
1350
|
+
Parameters:
|
|
1351
|
+
settings: Dictionary of setting name to value mappings.
|
|
1352
|
+
"""
|
|
946
1353
|
|
|
947
1354
|
settings: Mapping[str, Any]
|
|
948
1355
|
|
|
949
1356
|
|
|
950
1357
|
@dataclass
|
|
951
1358
|
class LLMUpdateSettingsFrame(ServiceUpdateSettingsFrame):
|
|
1359
|
+
"""Frame for updating LLM service settings."""
|
|
1360
|
+
|
|
952
1361
|
pass
|
|
953
1362
|
|
|
954
1363
|
|
|
955
1364
|
@dataclass
|
|
956
1365
|
class TTSUpdateSettingsFrame(ServiceUpdateSettingsFrame):
|
|
1366
|
+
"""Frame for updating TTS service settings."""
|
|
1367
|
+
|
|
957
1368
|
pass
|
|
958
1369
|
|
|
959
1370
|
|
|
960
1371
|
@dataclass
|
|
961
1372
|
class STTUpdateSettingsFrame(ServiceUpdateSettingsFrame):
|
|
1373
|
+
"""Frame for updating STT service settings."""
|
|
1374
|
+
|
|
962
1375
|
pass
|
|
963
1376
|
|
|
964
1377
|
|
|
965
1378
|
@dataclass
|
|
966
1379
|
class VADParamsUpdateFrame(SystemFrame):
|
|
967
|
-
"""
|
|
1380
|
+
"""Frame for updating VAD parameters.
|
|
1381
|
+
|
|
1382
|
+
A control frame containing a request to update VAD params. Intended
|
|
968
1383
|
to be pushed upstream from RTVI processor.
|
|
1384
|
+
|
|
1385
|
+
Parameters:
|
|
1386
|
+
params: New VAD parameters to apply.
|
|
969
1387
|
"""
|
|
970
1388
|
|
|
971
1389
|
params: VADParams
|
|
@@ -973,42 +1391,58 @@ class VADParamsUpdateFrame(SystemFrame):
|
|
|
973
1391
|
|
|
974
1392
|
@dataclass
|
|
975
1393
|
class FilterControlFrame(ControlFrame):
|
|
976
|
-
"""Base control frame for
|
|
1394
|
+
"""Base control frame for audio filter operations."""
|
|
977
1395
|
|
|
978
1396
|
pass
|
|
979
1397
|
|
|
980
1398
|
|
|
981
1399
|
@dataclass
|
|
982
1400
|
class FilterUpdateSettingsFrame(FilterControlFrame):
|
|
983
|
-
"""
|
|
1401
|
+
"""Frame for updating audio filter settings.
|
|
1402
|
+
|
|
1403
|
+
Parameters:
|
|
1404
|
+
settings: Dictionary of filter setting name to value mappings.
|
|
1405
|
+
"""
|
|
984
1406
|
|
|
985
1407
|
settings: Mapping[str, Any]
|
|
986
1408
|
|
|
987
1409
|
|
|
988
1410
|
@dataclass
|
|
989
1411
|
class FilterEnableFrame(FilterControlFrame):
|
|
990
|
-
"""
|
|
1412
|
+
"""Frame for enabling/disabling audio filters at runtime.
|
|
1413
|
+
|
|
1414
|
+
Parameters:
|
|
1415
|
+
enable: Whether to enable (True) or disable (False) the filter.
|
|
1416
|
+
"""
|
|
991
1417
|
|
|
992
1418
|
enable: bool
|
|
993
1419
|
|
|
994
1420
|
|
|
995
1421
|
@dataclass
|
|
996
1422
|
class MixerControlFrame(ControlFrame):
|
|
997
|
-
"""Base control frame for
|
|
1423
|
+
"""Base control frame for audio mixer operations."""
|
|
998
1424
|
|
|
999
1425
|
pass
|
|
1000
1426
|
|
|
1001
1427
|
|
|
1002
1428
|
@dataclass
|
|
1003
1429
|
class MixerUpdateSettingsFrame(MixerControlFrame):
|
|
1004
|
-
"""
|
|
1430
|
+
"""Frame for updating audio mixer settings.
|
|
1431
|
+
|
|
1432
|
+
Parameters:
|
|
1433
|
+
settings: Dictionary of mixer setting name to value mappings.
|
|
1434
|
+
"""
|
|
1005
1435
|
|
|
1006
1436
|
settings: Mapping[str, Any]
|
|
1007
1437
|
|
|
1008
1438
|
|
|
1009
1439
|
@dataclass
|
|
1010
1440
|
class MixerEnableFrame(MixerControlFrame):
|
|
1011
|
-
"""
|
|
1441
|
+
"""Frame for enabling/disabling audio mixer at runtime.
|
|
1442
|
+
|
|
1443
|
+
Parameters:
|
|
1444
|
+
enable: Whether to enable (True) or disable (False) the mixer.
|
|
1445
|
+
"""
|
|
1012
1446
|
|
|
1013
1447
|
enable: bool
|
|
1014
1448
|
|