dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (244) hide show
  1. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
  2. dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
  3. pipecat/__init__.py +17 -0
  4. pipecat/adapters/base_llm_adapter.py +36 -1
  5. pipecat/adapters/schemas/direct_function.py +296 -0
  6. pipecat/adapters/schemas/function_schema.py +15 -6
  7. pipecat/adapters/schemas/tools_schema.py +55 -7
  8. pipecat/adapters/services/anthropic_adapter.py +22 -3
  9. pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
  10. pipecat/adapters/services/bedrock_adapter.py +22 -3
  11. pipecat/adapters/services/gemini_adapter.py +16 -3
  12. pipecat/adapters/services/open_ai_adapter.py +17 -2
  13. pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
  14. pipecat/audio/filters/base_audio_filter.py +30 -6
  15. pipecat/audio/filters/koala_filter.py +37 -2
  16. pipecat/audio/filters/krisp_filter.py +59 -6
  17. pipecat/audio/filters/noisereduce_filter.py +37 -0
  18. pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
  19. pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
  20. pipecat/audio/mixers/base_audio_mixer.py +30 -7
  21. pipecat/audio/mixers/soundfile_mixer.py +53 -6
  22. pipecat/audio/resamplers/base_audio_resampler.py +17 -9
  23. pipecat/audio/resamplers/resampy_resampler.py +26 -1
  24. pipecat/audio/resamplers/soxr_resampler.py +32 -1
  25. pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
  26. pipecat/audio/utils.py +194 -1
  27. pipecat/audio/vad/silero.py +60 -3
  28. pipecat/audio/vad/vad_analyzer.py +114 -30
  29. pipecat/clocks/base_clock.py +19 -0
  30. pipecat/clocks/system_clock.py +25 -0
  31. pipecat/extensions/voicemail/__init__.py +0 -0
  32. pipecat/extensions/voicemail/voicemail_detector.py +707 -0
  33. pipecat/frames/frames.py +590 -156
  34. pipecat/metrics/metrics.py +64 -1
  35. pipecat/observers/base_observer.py +58 -19
  36. pipecat/observers/loggers/debug_log_observer.py +56 -64
  37. pipecat/observers/loggers/llm_log_observer.py +8 -1
  38. pipecat/observers/loggers/transcription_log_observer.py +19 -7
  39. pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
  40. pipecat/observers/turn_tracking_observer.py +26 -1
  41. pipecat/pipeline/base_pipeline.py +5 -7
  42. pipecat/pipeline/base_task.py +52 -9
  43. pipecat/pipeline/parallel_pipeline.py +121 -177
  44. pipecat/pipeline/pipeline.py +129 -20
  45. pipecat/pipeline/runner.py +50 -1
  46. pipecat/pipeline/sync_parallel_pipeline.py +132 -32
  47. pipecat/pipeline/task.py +263 -280
  48. pipecat/pipeline/task_observer.py +85 -34
  49. pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
  50. pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
  51. pipecat/processors/aggregators/gated.py +25 -24
  52. pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
  53. pipecat/processors/aggregators/llm_response.py +398 -89
  54. pipecat/processors/aggregators/openai_llm_context.py +161 -13
  55. pipecat/processors/aggregators/sentence.py +25 -14
  56. pipecat/processors/aggregators/user_response.py +28 -3
  57. pipecat/processors/aggregators/vision_image_frame.py +24 -14
  58. pipecat/processors/async_generator.py +28 -0
  59. pipecat/processors/audio/audio_buffer_processor.py +78 -37
  60. pipecat/processors/consumer_processor.py +25 -6
  61. pipecat/processors/filters/frame_filter.py +23 -0
  62. pipecat/processors/filters/function_filter.py +30 -0
  63. pipecat/processors/filters/identity_filter.py +17 -2
  64. pipecat/processors/filters/null_filter.py +24 -1
  65. pipecat/processors/filters/stt_mute_filter.py +56 -21
  66. pipecat/processors/filters/wake_check_filter.py +46 -3
  67. pipecat/processors/filters/wake_notifier_filter.py +21 -3
  68. pipecat/processors/frame_processor.py +488 -131
  69. pipecat/processors/frameworks/langchain.py +38 -3
  70. pipecat/processors/frameworks/rtvi.py +719 -34
  71. pipecat/processors/gstreamer/pipeline_source.py +41 -0
  72. pipecat/processors/idle_frame_processor.py +26 -3
  73. pipecat/processors/logger.py +23 -0
  74. pipecat/processors/metrics/frame_processor_metrics.py +77 -4
  75. pipecat/processors/metrics/sentry.py +42 -4
  76. pipecat/processors/producer_processor.py +34 -14
  77. pipecat/processors/text_transformer.py +22 -10
  78. pipecat/processors/transcript_processor.py +48 -29
  79. pipecat/processors/user_idle_processor.py +31 -21
  80. pipecat/runner/__init__.py +1 -0
  81. pipecat/runner/daily.py +132 -0
  82. pipecat/runner/livekit.py +148 -0
  83. pipecat/runner/run.py +543 -0
  84. pipecat/runner/types.py +67 -0
  85. pipecat/runner/utils.py +515 -0
  86. pipecat/serializers/base_serializer.py +42 -0
  87. pipecat/serializers/exotel.py +17 -6
  88. pipecat/serializers/genesys.py +95 -0
  89. pipecat/serializers/livekit.py +33 -0
  90. pipecat/serializers/plivo.py +16 -15
  91. pipecat/serializers/protobuf.py +37 -1
  92. pipecat/serializers/telnyx.py +18 -17
  93. pipecat/serializers/twilio.py +32 -16
  94. pipecat/services/ai_service.py +5 -3
  95. pipecat/services/anthropic/llm.py +113 -43
  96. pipecat/services/assemblyai/models.py +63 -5
  97. pipecat/services/assemblyai/stt.py +64 -11
  98. pipecat/services/asyncai/__init__.py +0 -0
  99. pipecat/services/asyncai/tts.py +501 -0
  100. pipecat/services/aws/llm.py +185 -111
  101. pipecat/services/aws/stt.py +217 -23
  102. pipecat/services/aws/tts.py +118 -52
  103. pipecat/services/aws/utils.py +101 -5
  104. pipecat/services/aws_nova_sonic/aws.py +82 -64
  105. pipecat/services/aws_nova_sonic/context.py +15 -6
  106. pipecat/services/azure/common.py +10 -2
  107. pipecat/services/azure/image.py +32 -0
  108. pipecat/services/azure/llm.py +9 -7
  109. pipecat/services/azure/stt.py +65 -2
  110. pipecat/services/azure/tts.py +154 -23
  111. pipecat/services/cartesia/stt.py +125 -8
  112. pipecat/services/cartesia/tts.py +102 -38
  113. pipecat/services/cerebras/llm.py +15 -23
  114. pipecat/services/deepgram/stt.py +19 -11
  115. pipecat/services/deepgram/tts.py +36 -0
  116. pipecat/services/deepseek/llm.py +14 -23
  117. pipecat/services/elevenlabs/tts.py +330 -64
  118. pipecat/services/fal/image.py +43 -0
  119. pipecat/services/fal/stt.py +48 -10
  120. pipecat/services/fireworks/llm.py +14 -21
  121. pipecat/services/fish/tts.py +109 -9
  122. pipecat/services/gemini_multimodal_live/__init__.py +1 -0
  123. pipecat/services/gemini_multimodal_live/events.py +83 -2
  124. pipecat/services/gemini_multimodal_live/file_api.py +189 -0
  125. pipecat/services/gemini_multimodal_live/gemini.py +218 -21
  126. pipecat/services/gladia/config.py +17 -10
  127. pipecat/services/gladia/stt.py +82 -36
  128. pipecat/services/google/frames.py +40 -0
  129. pipecat/services/google/google.py +2 -0
  130. pipecat/services/google/image.py +39 -2
  131. pipecat/services/google/llm.py +176 -58
  132. pipecat/services/google/llm_openai.py +26 -4
  133. pipecat/services/google/llm_vertex.py +37 -15
  134. pipecat/services/google/rtvi.py +41 -0
  135. pipecat/services/google/stt.py +65 -17
  136. pipecat/services/google/test-google-chirp.py +45 -0
  137. pipecat/services/google/tts.py +390 -19
  138. pipecat/services/grok/llm.py +8 -6
  139. pipecat/services/groq/llm.py +8 -6
  140. pipecat/services/groq/stt.py +13 -9
  141. pipecat/services/groq/tts.py +40 -0
  142. pipecat/services/hamsa/__init__.py +9 -0
  143. pipecat/services/hamsa/stt.py +241 -0
  144. pipecat/services/heygen/__init__.py +5 -0
  145. pipecat/services/heygen/api.py +281 -0
  146. pipecat/services/heygen/client.py +620 -0
  147. pipecat/services/heygen/video.py +338 -0
  148. pipecat/services/image_service.py +5 -3
  149. pipecat/services/inworld/__init__.py +1 -0
  150. pipecat/services/inworld/tts.py +592 -0
  151. pipecat/services/llm_service.py +127 -45
  152. pipecat/services/lmnt/tts.py +80 -7
  153. pipecat/services/mcp_service.py +85 -44
  154. pipecat/services/mem0/memory.py +42 -13
  155. pipecat/services/minimax/tts.py +74 -15
  156. pipecat/services/mistral/__init__.py +0 -0
  157. pipecat/services/mistral/llm.py +185 -0
  158. pipecat/services/moondream/vision.py +55 -10
  159. pipecat/services/neuphonic/tts.py +275 -48
  160. pipecat/services/nim/llm.py +8 -6
  161. pipecat/services/ollama/llm.py +27 -7
  162. pipecat/services/openai/base_llm.py +54 -16
  163. pipecat/services/openai/image.py +30 -0
  164. pipecat/services/openai/llm.py +7 -5
  165. pipecat/services/openai/stt.py +13 -9
  166. pipecat/services/openai/tts.py +42 -10
  167. pipecat/services/openai_realtime_beta/azure.py +11 -9
  168. pipecat/services/openai_realtime_beta/context.py +7 -5
  169. pipecat/services/openai_realtime_beta/events.py +10 -7
  170. pipecat/services/openai_realtime_beta/openai.py +37 -18
  171. pipecat/services/openpipe/llm.py +30 -24
  172. pipecat/services/openrouter/llm.py +9 -7
  173. pipecat/services/perplexity/llm.py +15 -19
  174. pipecat/services/piper/tts.py +26 -12
  175. pipecat/services/playht/tts.py +227 -65
  176. pipecat/services/qwen/llm.py +8 -6
  177. pipecat/services/rime/tts.py +128 -17
  178. pipecat/services/riva/stt.py +160 -22
  179. pipecat/services/riva/tts.py +67 -2
  180. pipecat/services/sambanova/llm.py +19 -17
  181. pipecat/services/sambanova/stt.py +14 -8
  182. pipecat/services/sarvam/tts.py +60 -13
  183. pipecat/services/simli/video.py +82 -21
  184. pipecat/services/soniox/__init__.py +0 -0
  185. pipecat/services/soniox/stt.py +398 -0
  186. pipecat/services/speechmatics/stt.py +29 -17
  187. pipecat/services/stt_service.py +47 -11
  188. pipecat/services/tavus/video.py +94 -25
  189. pipecat/services/together/llm.py +8 -6
  190. pipecat/services/tts_service.py +77 -53
  191. pipecat/services/ultravox/stt.py +46 -43
  192. pipecat/services/vision_service.py +5 -3
  193. pipecat/services/websocket_service.py +12 -11
  194. pipecat/services/whisper/base_stt.py +58 -12
  195. pipecat/services/whisper/stt.py +69 -58
  196. pipecat/services/xtts/tts.py +59 -2
  197. pipecat/sync/base_notifier.py +19 -0
  198. pipecat/sync/event_notifier.py +24 -0
  199. pipecat/tests/utils.py +73 -5
  200. pipecat/transcriptions/language.py +24 -0
  201. pipecat/transports/base_input.py +112 -8
  202. pipecat/transports/base_output.py +235 -13
  203. pipecat/transports/base_transport.py +119 -0
  204. pipecat/transports/local/audio.py +76 -0
  205. pipecat/transports/local/tk.py +84 -0
  206. pipecat/transports/network/fastapi_websocket.py +174 -15
  207. pipecat/transports/network/small_webrtc.py +383 -39
  208. pipecat/transports/network/webrtc_connection.py +214 -8
  209. pipecat/transports/network/websocket_client.py +171 -1
  210. pipecat/transports/network/websocket_server.py +147 -9
  211. pipecat/transports/services/daily.py +792 -70
  212. pipecat/transports/services/helpers/daily_rest.py +122 -129
  213. pipecat/transports/services/livekit.py +339 -4
  214. pipecat/transports/services/tavus.py +273 -38
  215. pipecat/utils/asyncio/task_manager.py +92 -186
  216. pipecat/utils/base_object.py +83 -1
  217. pipecat/utils/network.py +2 -0
  218. pipecat/utils/string.py +114 -58
  219. pipecat/utils/text/base_text_aggregator.py +44 -13
  220. pipecat/utils/text/base_text_filter.py +46 -0
  221. pipecat/utils/text/markdown_text_filter.py +70 -14
  222. pipecat/utils/text/pattern_pair_aggregator.py +18 -14
  223. pipecat/utils/text/simple_text_aggregator.py +43 -2
  224. pipecat/utils/text/skip_tags_aggregator.py +21 -13
  225. pipecat/utils/time.py +36 -0
  226. pipecat/utils/tracing/class_decorators.py +32 -7
  227. pipecat/utils/tracing/conversation_context_provider.py +12 -2
  228. pipecat/utils/tracing/service_attributes.py +80 -64
  229. pipecat/utils/tracing/service_decorators.py +48 -21
  230. pipecat/utils/tracing/setup.py +13 -7
  231. pipecat/utils/tracing/turn_context_provider.py +12 -2
  232. pipecat/utils/tracing/turn_trace_observer.py +27 -0
  233. pipecat/utils/utils.py +14 -14
  234. dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
  235. pipecat/examples/daily_runner.py +0 -64
  236. pipecat/examples/run.py +0 -265
  237. pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
  238. pipecat/utils/asyncio/watchdog_event.py +0 -42
  239. pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
  240. pipecat/utils/asyncio/watchdog_queue.py +0 -48
  241. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
  242. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
  243. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
  244. /pipecat/{examples → extensions}/__init__.py +0 -0
pipecat/frames/frames.py CHANGED
@@ -4,6 +4,13 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """Core frame definitions for the Pipecat AI framework.
8
+
9
+ This module contains all frame types used throughout the Pipecat pipeline system,
10
+ including data frames, system frames, and control frames for audio, video, text,
11
+ and LLM processing.
12
+ """
13
+
7
14
  from dataclasses import dataclass, field
8
15
  from enum import Enum
9
16
  from typing import (
@@ -21,6 +28,7 @@ from typing import (
21
28
  )
22
29
 
23
30
  from pipecat.audio.interruptions.base_interruption_strategy import BaseInterruptionStrategy
31
+ from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
24
32
  from pipecat.audio.vad.vad_analyzer import VADParams
25
33
  from pipecat.metrics.metrics import MetricsData
26
34
  from pipecat.transcriptions.language import Language
@@ -32,7 +40,22 @@ if TYPE_CHECKING:
32
40
 
33
41
 
34
42
  class KeypadEntry(str, Enum):
35
- """DTMF entries."""
43
+ """DTMF keypad entries for phone system integration.
44
+
45
+ Parameters:
46
+ ONE: Number key 1.
47
+ TWO: Number key 2.
48
+ THREE: Number key 3.
49
+ FOUR: Number key 4.
50
+ FIVE: Number key 5.
51
+ SIX: Number key 6.
52
+ SEVEN: Number key 7.
53
+ EIGHT: Number key 8.
54
+ NINE: Number key 9.
55
+ ZERO: Number key 0.
56
+ POUND: Pound/hash key (#).
57
+ STAR: Star/asterisk key (*).
58
+ """
36
59
 
37
60
  ONE = "1"
38
61
  TWO = "2"
@@ -49,12 +72,31 @@ class KeypadEntry(str, Enum):
49
72
 
50
73
 
51
74
  def format_pts(pts: Optional[int]):
75
+ """Format presentation timestamp (PTS) in nanoseconds to a human-readable string.
76
+
77
+ Converts a PTS value in nanoseconds to a string representation.
78
+
79
+ Args:
80
+ pts: Presentation timestamp in nanoseconds, or None if not set.
81
+ """
52
82
  return nanoseconds_to_str(pts) if pts else None
53
83
 
54
84
 
55
85
  @dataclass
56
86
  class Frame:
57
- """Base frame class."""
87
+ """Base frame class for all frames in the Pipecat pipeline.
88
+
89
+ All frames inherit from this base class and automatically receive
90
+ unique identifiers, names, and metadata support.
91
+
92
+ Parameters:
93
+ id: Unique identifier for the frame instance.
94
+ name: Human-readable name combining class name and instance count.
95
+ pts: Presentation timestamp in nanoseconds.
96
+ metadata: Dictionary for arbitrary frame metadata.
97
+ transport_source: Name of the transport source that created this frame.
98
+ transport_destination: Name of the transport destination for this frame.
99
+ """
58
100
 
59
101
  id: int = field(init=False)
60
102
  name: str = field(init=False)
@@ -77,9 +119,10 @@ class Frame:
77
119
 
78
120
  @dataclass
79
121
  class SystemFrame(Frame):
80
- """System frames are frames that are not internally queued by any of the
81
- frame processors and should be processed immediately.
122
+ """System frame class for immediate processing.
82
123
 
124
+ A frame that takes higher priority than other frames. System frames are
125
+ handled in order and are not affected by user interruptions.
83
126
  """
84
127
 
85
128
  pass
@@ -87,9 +130,11 @@ class SystemFrame(Frame):
87
130
 
88
131
  @dataclass
89
132
  class DataFrame(Frame):
90
- """Data frames are frames that will be processed in order and usually
91
- contain data such as LLM context, text, audio or images.
133
+ """Data frame class for processing data in order.
92
134
 
135
+ A frame that is processed in order and usually contains data such as LLM
136
+ context, text, audio or images. Data frames are cancelled by user
137
+ interruptions.
93
138
  """
94
139
 
95
140
  pass
@@ -97,9 +142,12 @@ class DataFrame(Frame):
97
142
 
98
143
  @dataclass
99
144
  class ControlFrame(Frame):
100
- """Control frames are frames that, similar to data frames, will be processed
101
- in order and usually contain control information such as frames to update
102
- settings or to end the pipeline.
145
+ """Control frame class for processing control information in order.
146
+
147
+ A frame that, similar to data frames, is processed in order and usually
148
+ contains control information such as update settings or to end the pipeline
149
+ after everything is flushed. Control frames are cancelled by user
150
+ interruptions.
103
151
 
104
152
  """
105
153
 
@@ -113,7 +161,14 @@ class ControlFrame(Frame):
113
161
 
114
162
  @dataclass
115
163
  class AudioRawFrame:
116
- """A chunk of audio."""
164
+ """A frame containing a chunk of raw audio.
165
+
166
+ Parameters:
167
+ audio: Raw audio bytes in PCM format.
168
+ sample_rate: Audio sample rate in Hz.
169
+ num_channels: Number of audio channels.
170
+ num_frames: Number of audio frames (calculated automatically).
171
+ """
117
172
 
118
173
  audio: bytes
119
174
  sample_rate: int
@@ -126,7 +181,13 @@ class AudioRawFrame:
126
181
 
127
182
  @dataclass
128
183
  class ImageRawFrame:
129
- """A raw image."""
184
+ """A frame containing a raw image.
185
+
186
+ Parameters:
187
+ image: Raw image bytes.
188
+ size: Image dimensions as (width, height) tuple.
189
+ format: Image format (e.g., 'JPEG', 'PNG').
190
+ """
130
191
 
131
192
  image: bytes
132
193
  size: Tuple[int, int]
@@ -140,10 +201,11 @@ class ImageRawFrame:
140
201
 
141
202
  @dataclass
142
203
  class OutputAudioRawFrame(DataFrame, AudioRawFrame):
143
- """A chunk of audio. Will be played by the output transport. If the
144
- transport supports multiple audio destinations (e.g. multiple audio tracks) the
145
- destination name can be specified.
204
+ """Audio data frame for output to transport.
146
205
 
206
+ A chunk of raw audio that will be played by the output transport. If the
207
+ transport supports multiple audio destinations (e.g. multiple audio tracks)
208
+ the destination name can be specified in transport_destination.
147
209
  """
148
210
 
149
211
  def __post_init__(self):
@@ -157,29 +219,49 @@ class OutputAudioRawFrame(DataFrame, AudioRawFrame):
157
219
 
158
220
  @dataclass
159
221
  class OutputImageRawFrame(DataFrame, ImageRawFrame):
160
- """An image that will be shown by the transport. If the transport supports
161
- multiple video destinations (e.g. multiple video tracks) the destination
162
- name can be specified.
222
+ """Image data frame for output to transport.
163
223
 
224
+ An image that will be shown by the transport. If the transport supports
225
+ multiple video destinations (e.g. multiple video tracks) the destination
226
+ name can be specified in transport_destination.
164
227
  """
165
228
 
166
229
  def __str__(self):
167
230
  pts = format_pts(self.pts)
168
- return f"{self.name}(pts: {pts}, size: {self.size}, format: {self.format})"
231
+ return f"{self.name}(pts: {pts}, destination: {self.transport_destination}, size: {self.size}, format: {self.format})"
169
232
 
170
233
 
171
234
  @dataclass
172
235
  class TTSAudioRawFrame(OutputAudioRawFrame):
173
- """A chunk of output audio generated by a TTS service."""
236
+ """Audio data frame generated by Text-to-Speech services.
237
+
238
+ A chunk of output audio generated by a TTS service, ready for playback.
239
+ """
240
+
241
+ pass
242
+
243
+
244
+ @dataclass
245
+ class SpeechOutputAudioRawFrame(OutputAudioRawFrame):
246
+ """An audio frame part of a speech audio stream.
247
+
248
+ This frame is part of a continuous stream of audio frames containing speech.
249
+ The audio stream might also contain silence frames, so a process to distinguish
250
+ between speech and silence might be needed.
251
+ """
174
252
 
175
253
  pass
176
254
 
177
255
 
178
256
  @dataclass
179
257
  class URLImageRawFrame(OutputImageRawFrame):
180
- """An output image with an associated URL. These images are usually
258
+ """Image frame with an associated URL.
259
+
260
+ An output image with an associated URL. These images are usually
181
261
  generated by third-party services that provide a URL to download the image.
182
262
 
263
+ Parameters:
264
+ url: URL where the image can be downloaded from.
183
265
  """
184
266
 
185
267
  url: Optional[str] = None
@@ -191,10 +273,14 @@ class URLImageRawFrame(OutputImageRawFrame):
191
273
 
192
274
  @dataclass
193
275
  class SpriteFrame(DataFrame):
194
- """An animated sprite. Will be shown by the transport if the transport's
276
+ """Animated sprite frame containing multiple images.
277
+
278
+ An animated sprite that will be shown by the transport if the transport's
195
279
  camera is enabled. Will play at the framerate specified in the transport's
196
280
  `camera_out_framerate` constructor parameter.
197
281
 
282
+ Parameters:
283
+ images: List of image frames that make up the sprite animation.
198
284
  """
199
285
 
200
286
  images: List[OutputImageRawFrame]
@@ -206,9 +292,14 @@ class SpriteFrame(DataFrame):
206
292
 
207
293
  @dataclass
208
294
  class TextFrame(DataFrame):
209
- """A chunk of text. Emitted by LLM services, consumed by TTS services, can
210
- be used to send text through processors.
295
+ """Text data frame for passing text through the pipeline.
211
296
 
297
+ A chunk of text. Emitted by LLM services, consumed by context
298
+ aggregators, TTS services and more. Can be used to send text
299
+ through processors.
300
+
301
+ Parameters:
302
+ text: The text content.
212
303
  """
213
304
 
214
305
  text: str
@@ -220,23 +311,30 @@ class TextFrame(DataFrame):
220
311
 
221
312
  @dataclass
222
313
  class LLMTextFrame(TextFrame):
223
- """A text frame generated by LLM services."""
314
+ """Text frame generated by LLM services."""
224
315
 
225
316
  pass
226
317
 
227
318
 
228
319
  @dataclass
229
320
  class TTSTextFrame(TextFrame):
230
- """A text frame generated by TTS services."""
321
+ """Text frame generated by Text-to-Speech services."""
231
322
 
232
323
  pass
233
324
 
234
325
 
235
326
  @dataclass
236
327
  class TranscriptionFrame(TextFrame):
237
- """A text frame with transcription-specific data. The `result` field
328
+ """Text frame containing speech transcription data.
329
+
330
+ A text frame with transcription-specific data. The `result` field
238
331
  contains the result from the STT service if available.
239
332
 
333
+ Parameters:
334
+ user_id: Identifier for the user who spoke.
335
+ timestamp: When the transcription occurred.
336
+ language: Detected or specified language of the speech.
337
+ result: Raw result from the STT service.
240
338
  """
241
339
 
242
340
  user_id: str
@@ -250,9 +348,17 @@ class TranscriptionFrame(TextFrame):
250
348
 
251
349
  @dataclass
252
350
  class InterimTranscriptionFrame(TextFrame):
253
- """A text frame with interim transcription-specific data. The `result` field
351
+ """Text frame containing partial/interim transcription data.
352
+
353
+ A text frame with interim transcription-specific data that represents
354
+ partial results before final transcription. The `result` field
254
355
  contains the result from the STT service if available.
255
356
 
357
+ Parameters:
358
+ user_id: Identifier for the user who spoke.
359
+ timestamp: When the interim transcription occurred.
360
+ language: Detected or specified language of the speech.
361
+ result: Raw result from the STT service.
256
362
  """
257
363
 
258
364
  text: str
@@ -267,10 +373,15 @@ class InterimTranscriptionFrame(TextFrame):
267
373
 
268
374
  @dataclass
269
375
  class TranslationFrame(TextFrame):
270
- """A text frame with translated transcription data.
376
+ """Text frame containing translated transcription data.
271
377
 
272
- Will be placed in the transport's receive queue when a participant speaks.
378
+ A text frame with translated transcription data that will be placed
379
+ in the transport's receive queue when a participant speaks.
273
380
 
381
+ Parameters:
382
+ user_id: Identifier for the user who spoke.
383
+ timestamp: When the translation occurred.
384
+ language: Target language of the translation.
274
385
  """
275
386
 
276
387
  user_id: str
@@ -283,16 +394,27 @@ class TranslationFrame(TextFrame):
283
394
 
284
395
  @dataclass
285
396
  class OpenAILLMContextAssistantTimestampFrame(DataFrame):
286
- """Timestamp information for assistant message in LLM context."""
397
+ """Timestamp information for assistant messages in LLM context.
398
+
399
+ Parameters:
400
+ timestamp: Timestamp when the assistant message was created.
401
+ """
287
402
 
288
403
  timestamp: str
289
404
 
290
405
 
291
406
  @dataclass
292
407
  class TranscriptionMessage:
293
- """A message in a conversation transcript containing the role and content.
408
+ """A message in a conversation transcript.
294
409
 
410
+ A message in a conversation transcript containing the role and content.
295
411
  Messages are in standard format with roles normalized to user/assistant.
412
+
413
+ Parameters:
414
+ role: The role of the message sender (user or assistant).
415
+ content: The message content/text.
416
+ user_id: Optional identifier for the user.
417
+ timestamp: Optional timestamp when the message was created.
296
418
  """
297
419
 
298
420
  role: Literal["user", "assistant"]
@@ -303,39 +425,46 @@ class TranscriptionMessage:
303
425
 
304
426
  @dataclass
305
427
  class TranscriptionUpdateFrame(DataFrame):
306
- """A frame containing new messages added to the conversation transcript.
428
+ """Frame containing new messages added to conversation transcript.
307
429
 
430
+ A frame containing new messages added to the conversation transcript.
308
431
  This frame is emitted when new messages are added to the conversation history,
309
432
  containing only the newly added messages rather than the full transcript.
310
433
  Messages have normalized roles (user/assistant) regardless of the LLM service used.
311
434
  Messages are always in the OpenAI standard message format, which supports both:
312
435
 
313
- Simple format:
314
- [
315
- {
316
- "role": "user",
317
- "content": "Hi, how are you?"
318
- },
319
- {
320
- "role": "assistant",
321
- "content": "Great! And you?"
322
- }
323
- ]
324
-
325
- Content list format:
326
- [
327
- {
328
- "role": "user",
329
- "content": [{"type": "text", "text": "Hi, how are you?"}]
330
- },
331
- {
332
- "role": "assistant",
333
- "content": [{"type": "text", "text": "Great! And you?"}]
334
- }
335
- ]
436
+ Examples:
437
+ Simple format::
438
+
439
+ [
440
+ {
441
+ "role": "user",
442
+ "content": "Hi, how are you?"
443
+ },
444
+ {
445
+ "role": "assistant",
446
+ "content": "Great! And you?"
447
+ }
448
+ ]
449
+
450
+ Content list format::
451
+
452
+ [
453
+ {
454
+ "role": "user",
455
+ "content": [{"type": "text", "text": "Hi, how are you?"}]
456
+ },
457
+ {
458
+ "role": "assistant",
459
+ "content": [{"type": "text", "text": "Great! And you?"}]
460
+ }
461
+ ]
336
462
 
337
463
  OpenAI supports both formats. Anthropic and Google messages are converted to the
338
464
  content list format.
465
+
466
+ Parameters:
467
+ messages: List of new transcript messages that were added.
339
468
  """
340
469
 
341
470
  messages: List[TranscriptionMessage]
@@ -347,43 +476,84 @@ class TranscriptionUpdateFrame(DataFrame):
347
476
 
348
477
  @dataclass
349
478
  class LLMMessagesFrame(DataFrame):
350
- """A frame containing a list of LLM messages. Used to signal that an LLM
479
+ """Frame containing LLM messages for chat completion.
480
+
481
+ .. deprecated:: 0.0.79
482
+ This class is deprecated and will be removed in a future version.
483
+ Instead, use either:
484
+ - `LLMMessagesUpdateFrame` with `run_llm=True`
485
+ - `OpenAILLMContextFrame` with desired messages in a new context
486
+
487
+ A frame containing a list of LLM messages. Used to signal that an LLM
351
488
  service should run a chat completion and emit an LLMFullResponseStartFrame,
352
489
  TextFrames and an LLMFullResponseEndFrame. Note that the `messages`
353
- property in this class is mutable, and will be be updated by various
490
+ property in this class is mutable, and will be updated by various
354
491
  aggregators.
355
492
 
493
+ Parameters:
494
+ messages: List of message dictionaries in LLM format.
356
495
  """
357
496
 
358
497
  messages: List[dict]
359
498
 
499
+ def __post_init__(self):
500
+ super().__post_init__()
501
+ import warnings
502
+
503
+ warnings.simplefilter("always")
504
+ warnings.warn(
505
+ "LLMMessagesFrame is deprecated and will be removed in a future version. "
506
+ "Instead, use either "
507
+ "`LLMMessagesUpdateFrame` with `run_llm=True`, or "
508
+ "`OpenAILLMContextFrame` with desired messages in a new context",
509
+ DeprecationWarning,
510
+ stacklevel=2,
511
+ )
512
+
360
513
 
361
514
  @dataclass
362
515
  class LLMMessagesAppendFrame(DataFrame):
363
- """A frame containing a list of LLM messages that need to be added to the
516
+ """Frame containing LLM messages to append to current context.
517
+
518
+ A frame containing a list of LLM messages that need to be added to the
364
519
  current context.
365
520
 
521
+ Parameters:
522
+ messages: List of message dictionaries to append.
523
+ run_llm: Whether the context update should be sent to the LLM.
366
524
  """
367
525
 
368
526
  messages: List[dict]
527
+ run_llm: Optional[bool] = None
369
528
 
370
529
 
371
530
  @dataclass
372
531
  class LLMMessagesUpdateFrame(DataFrame):
373
- """A frame containing a list of new LLM messages. These messages will
532
+ """Frame containing LLM messages to replace current context.
533
+
534
+ A frame containing a list of new LLM messages. These messages will
374
535
  replace the current context LLM messages and should generate a new
375
536
  LLMMessagesFrame.
376
537
 
538
+ Parameters:
539
+ messages: List of message dictionaries to replace current context.
540
+ run_llm: Whether the context update should be sent to the LLM.
377
541
  """
378
542
 
379
543
  messages: List[dict]
544
+ run_llm: Optional[bool] = None
380
545
 
381
546
 
382
547
  @dataclass
383
548
  class LLMSetToolsFrame(DataFrame):
384
- """A frame containing a list of tools for an LLM to use for function calling.
549
+ """Frame containing tools for LLM function calling.
550
+
551
+ A frame containing a list of tools for an LLM to use for function calling.
385
552
  The specific format depends on the LLM being used, but it should typically
386
553
  contain JSON Schema objects.
554
+
555
+ Parameters:
556
+ tools: List of tool/function definitions for the LLM.
387
557
  """
388
558
 
389
559
  tools: List[dict]
@@ -391,23 +561,35 @@ class LLMSetToolsFrame(DataFrame):
391
561
 
392
562
  @dataclass
393
563
  class LLMSetToolChoiceFrame(DataFrame):
394
- """A frame containing a tool choice for an LLM to use for function calling."""
564
+ """Frame containing tool choice configuration for LLM function calling.
565
+
566
+ Parameters:
567
+ tool_choice: Tool choice setting - 'none', 'auto', 'required', or specific tool dict.
568
+ """
395
569
 
396
570
  tool_choice: Literal["none", "auto", "required"] | dict
397
571
 
398
572
 
399
573
  @dataclass
400
574
  class LLMEnablePromptCachingFrame(DataFrame):
401
- """A frame to enable/disable prompt caching in certain LLMs."""
575
+ """Frame to enable/disable prompt caching in LLMs.
576
+
577
+ Parameters:
578
+ enable: Whether to enable prompt caching.
579
+ """
402
580
 
403
581
  enable: bool
404
582
 
405
583
 
406
584
  @dataclass
407
585
  class TTSSpeakFrame(DataFrame):
408
- """A frame that contains a text that should be spoken by the TTS in the
409
- pipeline (if any).
586
+ """Frame containing text that should be spoken by TTS.
410
587
 
588
+ A frame that contains text that should be spoken by the TTS service
589
+ in the pipeline (if any).
590
+
591
+ Parameters:
592
+ text: The text to be spoken.
411
593
  """
412
594
 
413
595
  text: str
@@ -415,6 +597,12 @@ class TTSSpeakFrame(DataFrame):
415
597
 
416
598
  @dataclass
417
599
  class TransportMessageFrame(DataFrame):
600
+ """Frame containing transport-specific message data.
601
+
602
+ Parameters:
603
+ message: The transport message payload.
604
+ """
605
+
418
606
  message: Any
419
607
 
420
608
  def __str__(self):
@@ -423,17 +611,22 @@ class TransportMessageFrame(DataFrame):
423
611
 
424
612
  @dataclass
425
613
  class DTMFFrame:
426
- """A DTMF button frame"""
614
+ """Base class for DTMF (Dual-Tone Multi-Frequency) keypad frames.
615
+
616
+ Parameters:
617
+ button: The DTMF keypad entry that was pressed.
618
+ """
427
619
 
428
620
  button: KeypadEntry
429
621
 
430
622
 
431
623
  @dataclass
432
624
  class OutputDTMFFrame(DTMFFrame, DataFrame):
433
- """A DTMF keypress output that will be queued. If your transport supports
625
+ """DTMF keypress output frame for transport queuing.
626
+
627
+ A DTMF keypress output that will be queued. If your transport supports
434
628
  multiple dial-out destinations, use the `transport_destination` field to
435
629
  specify where the DTMF keypress should be sent.
436
-
437
630
  """
438
631
 
439
632
  pass
@@ -446,12 +639,27 @@ class OutputDTMFFrame(DTMFFrame, DataFrame):
446
639
 
447
640
  @dataclass
448
641
  class StartFrame(SystemFrame):
449
- """This is the first frame that should be pushed down a pipeline."""
642
+ """Initial frame to start pipeline processing.
643
+
644
+ This is the first frame that should be pushed down a pipeline to
645
+ initialize all processors with their configuration parameters.
646
+
647
+ Parameters:
648
+ audio_in_sample_rate: Input audio sample rate in Hz.
649
+ audio_out_sample_rate: Output audio sample rate in Hz.
650
+ allow_interruptions: Whether to allow user interruptions.
651
+ enable_metrics: Whether to enable performance metrics collection.
652
+ enable_tracing: Whether to enable OpenTelemetry tracing.
653
+ enable_usage_metrics: Whether to enable usage metrics collection.
654
+ interruption_strategies: List of interruption handling strategies.
655
+ report_only_initial_ttfb: Whether to report only initial time-to-first-byte.
656
+ """
450
657
 
451
658
  audio_in_sample_rate: int = 16000
452
659
  audio_out_sample_rate: int = 24000
453
660
  allow_interruptions: bool = False
454
661
  enable_metrics: bool = False
662
+ enable_tracing: bool = False
455
663
  enable_usage_metrics: bool = False
456
664
  interruption_strategies: List[BaseInterruptionStrategy] = field(default_factory=list)
457
665
  report_only_initial_ttfb: bool = False
@@ -459,21 +667,32 @@ class StartFrame(SystemFrame):
459
667
 
460
668
  @dataclass
461
669
  class CancelFrame(SystemFrame):
462
- """Indicates that a pipeline needs to stop right away."""
670
+ """Frame indicating pipeline should stop immediately.
671
+
672
+ Indicates that a pipeline needs to stop right away without
673
+ processing remaining queued frames.
674
+ """
463
675
 
464
676
  pass
465
677
 
466
678
 
467
679
  @dataclass
468
680
  class ErrorFrame(SystemFrame):
469
- """This is used notify upstream that an error has occurred downstream the
470
- pipeline. A fatal error indicates the error is unrecoverable and that the
681
+ """Frame notifying of errors in the pipeline.
682
+
683
+ This is used to notify upstream that an error has occurred downstream in
684
+ the pipeline. A fatal error indicates the error is unrecoverable and that the
471
685
  bot should exit.
472
686
 
687
+ Parameters:
688
+ error: Description of the error that occurred.
689
+ fatal: Whether the error is fatal and requires bot shutdown.
690
+ processor: The frame processor that generated the error.
473
691
  """
474
692
 
475
693
  error: str
476
694
  fatal: bool = False
695
+ processor: Optional["FrameProcessor"] = None
477
696
 
478
697
  def __str__(self):
479
698
  return f"{self.name}(error: {self.error}, fatal: {self.fatal})"
@@ -481,9 +700,13 @@ class ErrorFrame(SystemFrame):
481
700
 
482
701
  @dataclass
483
702
  class FatalErrorFrame(ErrorFrame):
484
- """This is used notify upstream that an unrecoverable error has occurred and
485
- that the bot should exit.
703
+ """Frame notifying of unrecoverable errors requiring bot shutdown.
486
704
 
705
+ This is used to notify upstream that an unrecoverable error has occurred and
706
+ that the bot should exit immediately.
707
+
708
+ Parameters:
709
+ fatal: Always True for fatal errors.
487
710
  """
488
711
 
489
712
  fatal: bool = field(default=True, init=False)
@@ -491,10 +714,11 @@ class FatalErrorFrame(ErrorFrame):
491
714
 
492
715
  @dataclass
493
716
  class EndTaskFrame(SystemFrame):
494
- """This is used to notify the pipeline task that the pipeline should be
495
- closed nicely (flushing all the queued frames) by pushing an EndFrame
496
- downstream.
717
+ """Frame to request graceful pipeline task closure.
497
718
 
719
+ This is used to notify the pipeline task that the pipeline should be
720
+ closed nicely (flushing all the queued frames) by pushing an EndFrame
721
+ downstream. This frame should be pushed upstream.
498
722
  """
499
723
 
500
724
  pass
@@ -502,9 +726,11 @@ class EndTaskFrame(SystemFrame):
502
726
 
503
727
  @dataclass
504
728
  class CancelTaskFrame(SystemFrame):
505
- """This is used to notify the pipeline task that the pipeline should be
506
- stopped immediately by pushing a CancelFrame downstream.
729
+ """Frame to request immediate pipeline task cancellation.
507
730
 
731
+ This is used to notify the pipeline task that the pipeline should be
732
+ stopped immediately by pushing a CancelFrame downstream. This frame
733
+ should be pushed upstream.
508
734
  """
509
735
 
510
736
  pass
@@ -512,10 +738,12 @@ class CancelTaskFrame(SystemFrame):
512
738
 
513
739
  @dataclass
514
740
  class StopTaskFrame(SystemFrame):
515
- """This is used to notify the pipeline task that it should be stopped as
516
- soon as possible (flushing all the queued frames) but that the pipeline
517
- processors should be kept in a running state.
741
+ """Frame to request pipeline task stop while keeping processors running.
518
742
 
743
+ This is used to notify the pipeline task that it should be stopped as
744
+ soon as possible (flushing all the queued frames) but that the pipeline
745
+ processors should be kept in a running state. This frame should be pushed
746
+ upstream.
519
747
  """
520
748
 
521
749
  pass
@@ -523,11 +751,15 @@ class StopTaskFrame(SystemFrame):
523
751
 
524
752
  @dataclass
525
753
  class FrameProcessorPauseUrgentFrame(SystemFrame):
526
- """This frame is used to pause frame processing for the given processor as
754
+ """Frame to pause frame processing immediately.
755
+
756
+ This frame is used to pause frame processing for the given processor as
527
757
  fast as possible. Pausing frame processing will keep frames in the internal
528
758
  queue which will then be processed when frame processing is resumed with
529
759
  `FrameProcessorResumeFrame`.
530
760
 
761
+ Parameters:
762
+ processor: The frame processor to pause.
531
763
  """
532
764
 
533
765
  processor: "FrameProcessor"
@@ -535,10 +767,14 @@ class FrameProcessorPauseUrgentFrame(SystemFrame):
535
767
 
536
768
  @dataclass
537
769
  class FrameProcessorResumeUrgentFrame(SystemFrame):
538
- """This frame is used to resume frame processing for the given processor
770
+ """Frame to resume frame processing immediately.
771
+
772
+ This frame is used to resume frame processing for the given processor
539
773
  if it was previously paused as fast as possible. After resuming frame
540
774
  processing all queued frames will be processed in the order received.
541
775
 
776
+ Parameters:
777
+ processor: The frame processor to resume.
542
778
  """
543
779
 
544
780
  processor: "FrameProcessor"
@@ -546,11 +782,12 @@ class FrameProcessorResumeUrgentFrame(SystemFrame):
546
782
 
547
783
  @dataclass
548
784
  class StartInterruptionFrame(SystemFrame):
549
- """Emitted by VAD to indicate that a user has started speaking (i.e. is
550
- interruption). This is similar to UserStartedSpeakingFrame except that it
551
- should be pushed concurrently with other frames (so the order is not
552
- guaranteed).
785
+ """Frame indicating user started speaking (interruption detected).
553
786
 
787
+ Emitted by the BaseInputTransport to indicate that a user has started
788
+ speaking (i.e. is interrupting). This is similar to
789
+ UserStartedSpeakingFrame except that it should be pushed concurrently
790
+ with other frames (so the order is not guaranteed).
554
791
  """
555
792
 
556
793
  pass
@@ -558,11 +795,12 @@ class StartInterruptionFrame(SystemFrame):
558
795
 
559
796
  @dataclass
560
797
  class StopInterruptionFrame(SystemFrame):
561
- """Emitted by VAD to indicate that a user has stopped speaking (i.e. no more
562
- interruptions). This is similar to UserStoppedSpeakingFrame except that it
563
- should be pushed concurrently with other frames (so the order is not
564
- guaranteed).
798
+ """Frame indicating user stopped speaking (interruption ended).
565
799
 
800
+ Emitted by the BaseInputTransport to indicate that a user has stopped
801
+ speaking (i.e. no more interruptions). This is similar to
802
+ UserStoppedSpeakingFrame except that it should be pushed concurrently
803
+ with other frames (so the order is not guaranteed).
566
804
  """
567
805
 
568
806
  pass
@@ -570,11 +808,15 @@ class StopInterruptionFrame(SystemFrame):
570
808
 
571
809
  @dataclass
572
810
  class UserStartedSpeakingFrame(SystemFrame):
573
- """Emitted by VAD to indicate that a user has started speaking. This can be
811
+ """Frame indicating user has started speaking.
812
+
813
+ Emitted by VAD to indicate that a user has started speaking. This can be
574
814
  used for interruptions or other times when detecting that someone is
575
815
  speaking is more important than knowing what they're saying (as you will
576
- with a TranscriptionFrame)
816
+ get with a TranscriptionFrame).
577
817
 
818
+ Parameters:
819
+ emulated: Whether this event was emulated rather than detected by VAD.
578
820
  """
579
821
 
580
822
  emulated: bool = False
@@ -582,14 +824,22 @@ class UserStartedSpeakingFrame(SystemFrame):
582
824
 
583
825
  @dataclass
584
826
  class UserStoppedSpeakingFrame(SystemFrame):
585
- """Emitted by the VAD to indicate that a user stopped speaking."""
827
+ """Frame indicating user has stopped speaking.
828
+
829
+ Emitted by the VAD to indicate that a user stopped speaking.
830
+
831
+ Parameters:
832
+ emulated: Whether this event was emulated rather than detected by VAD.
833
+ """
586
834
 
587
835
  emulated: bool = False
588
836
 
589
837
 
590
838
  @dataclass
591
839
  class EmulateUserStartedSpeakingFrame(SystemFrame):
592
- """Emitted by internal processors upstream to emulate VAD behavior when a
840
+ """Frame to emulate user started speaking behavior.
841
+
842
+ Emitted by internal processors upstream to emulate VAD behavior when a
593
843
  user starts speaking.
594
844
  """
595
845
 
@@ -598,7 +848,9 @@ class EmulateUserStartedSpeakingFrame(SystemFrame):
598
848
 
599
849
  @dataclass
600
850
  class EmulateUserStoppedSpeakingFrame(SystemFrame):
601
- """Emitted by internal processors upstream to emulate VAD behavior when a
851
+ """Frame to emulate user stopped speaking behavior.
852
+
853
+ Emitted by internal processors upstream to emulate VAD behavior when a
602
854
  user stops speaking.
603
855
  """
604
856
 
@@ -607,24 +859,27 @@ class EmulateUserStoppedSpeakingFrame(SystemFrame):
607
859
 
608
860
  @dataclass
609
861
  class VADUserStartedSpeakingFrame(SystemFrame):
610
- """Frame emitted when VAD detects the user has definitively started speaking."""
862
+ """Frame emitted when VAD definitively detects user started speaking."""
611
863
 
612
864
  pass
613
865
 
614
866
 
615
867
  @dataclass
616
868
  class VADUserStoppedSpeakingFrame(SystemFrame):
617
- """Frame emitted when VAD detects the user has definitively stopped speaking."""
869
+ """Frame emitted when VAD definitively detects user stopped speaking."""
618
870
 
619
871
  pass
620
872
 
621
873
 
622
874
  @dataclass
623
875
  class BotInterruptionFrame(SystemFrame):
624
- """Emitted by when the bot should be interrupted. This will mainly cause the
876
+ """Frame indicating the bot should be interrupted.
877
+
878
+ Emitted when the bot should be interrupted. This will mainly cause the
625
879
  same actions as if the user interrupted except that the
626
880
  UserStartedSpeakingFrame and UserStoppedSpeakingFrame won't be generated.
627
-
881
+ This frame should be pushed upstreams. It results in the BaseInputTransport
882
+ starting an interruption by pushing a StartInterruptionFrame downstream.
628
883
  """
629
884
 
630
885
  pass
@@ -632,25 +887,34 @@ class BotInterruptionFrame(SystemFrame):
632
887
 
633
888
  @dataclass
634
889
  class BotStartedSpeakingFrame(SystemFrame):
635
- """Emitted upstream by transport outputs to indicate the bot started speaking."""
890
+ """Frame indicating the bot started speaking.
891
+
892
+ Emitted upstream and downstream by the BaseTransportOutput to indicate the
893
+ bot started speaking.
894
+ """
636
895
 
637
896
  pass
638
897
 
639
898
 
640
899
  @dataclass
641
900
  class BotStoppedSpeakingFrame(SystemFrame):
642
- """Emitted upstream by transport outputs to indicate the bot stopped speaking."""
901
+ """Frame indicating the bot stopped speaking.
902
+
903
+ Emitted upstream and downstream by the BaseTransportOutput to indicate the
904
+ bot stopped speaking.
905
+ """
643
906
 
644
907
  pass
645
908
 
646
909
 
647
910
  @dataclass
648
911
  class BotSpeakingFrame(SystemFrame):
649
- """Emitted upstream by transport outputs while the bot is still
650
- speaking. This can be used, for example, to detect when a user is idle. That
651
- is, while the bot is speaking we don't want to trigger any user idle timeout
652
- since the user might be listening.
912
+ """Frame indicating the bot is currently speaking.
653
913
 
914
+ Emitted upstream and downstream by the BaseOutputTransport while the bot is
915
+ still speaking. This can be used, for example, to detect when a user is
916
+ idle. That is, while the bot is speaking we don't want to trigger any user
917
+ idle timeout since the user might be listening.
654
918
  """
655
919
 
656
920
  pass
@@ -658,21 +922,28 @@ class BotSpeakingFrame(SystemFrame):
658
922
 
659
923
  @dataclass
660
924
  class MetricsFrame(SystemFrame):
661
- """Emitted by processor that can compute metrics like latencies."""
925
+ """Frame containing performance metrics data.
926
+
927
+ Emitted by processors that can compute metrics like latencies.
928
+
929
+ Parameters:
930
+ data: List of metrics data collected by the processor.
931
+ """
662
932
 
663
933
  data: List[MetricsData]
664
934
 
665
935
 
666
936
  @dataclass
667
937
  class FunctionCallFromLLM:
668
- """Represents a function call returned by the LLM to be registered for execution.
938
+ """Represents a function call returned by the LLM.
669
939
 
670
- Attributes:
671
- function_name (str): The name of the function.
672
- tool_call_id (str): A unique identifier for the function call.
673
- arguments (Mapping[str, Any]): The arguments for the function.
674
- context (OpenAILLMContext): The LLM context.
940
+ Represents a function call returned by the LLM to be registered for execution.
675
941
 
942
+ Parameters:
943
+ function_name: The name of the function to call.
944
+ tool_call_id: A unique identifier for the function call.
945
+ arguments: The arguments to pass to the function.
946
+ context: The LLM context when the function call was made.
676
947
  """
677
948
 
678
949
  function_name: str
@@ -683,15 +954,28 @@ class FunctionCallFromLLM:
683
954
 
684
955
  @dataclass
685
956
  class FunctionCallsStartedFrame(SystemFrame):
686
- """A frame signaling that one or more function call execution is going to
687
- start."""
957
+ """Frame signaling that function call execution is starting.
958
+
959
+ A frame signaling that one or more function call execution is going to
960
+ start.
961
+
962
+ Parameters:
963
+ function_calls: Sequence of function calls that will be executed.
964
+ """
688
965
 
689
966
  function_calls: Sequence[FunctionCallFromLLM]
690
967
 
691
968
 
692
969
  @dataclass
693
970
  class FunctionCallInProgressFrame(SystemFrame):
694
- """A frame signaling that a function call is in progress."""
971
+ """Frame signaling that a function call is currently executing.
972
+
973
+ Parameters:
974
+ function_name: Name of the function being executed.
975
+ tool_call_id: Unique identifier for this function call.
976
+ arguments: Arguments passed to the function.
977
+ cancel_on_interruption: Whether to cancel this call if interrupted.
978
+ """
695
979
 
696
980
  function_name: str
697
981
  tool_call_id: str
@@ -701,7 +985,12 @@ class FunctionCallInProgressFrame(SystemFrame):
701
985
 
702
986
  @dataclass
703
987
  class FunctionCallCancelFrame(SystemFrame):
704
- """A frame to signal a function call has been cancelled."""
988
+ """Frame signaling that a function call has been cancelled.
989
+
990
+ Parameters:
991
+ function_name: Name of the function that was cancelled.
992
+ tool_call_id: Unique identifier for the cancelled function call.
993
+ """
705
994
 
706
995
  function_name: str
707
996
  tool_call_id: str
@@ -709,7 +998,12 @@ class FunctionCallCancelFrame(SystemFrame):
709
998
 
710
999
  @dataclass
711
1000
  class FunctionCallResultProperties:
712
- """Properties for a function call result frame."""
1001
+ """Properties for configuring function call result behavior.
1002
+
1003
+ Parameters:
1004
+ run_llm: Whether to run the LLM after receiving this result.
1005
+ on_context_updated: Callback to execute when context is updated.
1006
+ """
713
1007
 
714
1008
  run_llm: Optional[bool] = None
715
1009
  on_context_updated: Optional[Callable[[], Awaitable[None]]] = None
@@ -717,7 +1011,16 @@ class FunctionCallResultProperties:
717
1011
 
718
1012
  @dataclass
719
1013
  class FunctionCallResultFrame(SystemFrame):
720
- """A frame containing the result of an LLM function (tool) call."""
1014
+ """Frame containing the result of an LLM function call.
1015
+
1016
+ Parameters:
1017
+ function_name: Name of the function that was executed.
1018
+ tool_call_id: Unique identifier for the function call.
1019
+ arguments: Arguments that were passed to the function.
1020
+ result: The result returned by the function.
1021
+ run_llm: Whether to run the LLM after this result.
1022
+ properties: Additional properties for result handling.
1023
+ """
721
1024
 
722
1025
  function_name: str
723
1026
  tool_call_id: str
@@ -729,13 +1032,23 @@ class FunctionCallResultFrame(SystemFrame):
729
1032
 
730
1033
  @dataclass
731
1034
  class STTMuteFrame(SystemFrame):
732
- """System frame to mute/unmute the STT service."""
1035
+ """Frame to mute/unmute the Speech-to-Text service.
1036
+
1037
+ Parameters:
1038
+ mute: Whether to mute (True) or unmute (False) the STT service.
1039
+ """
733
1040
 
734
1041
  mute: bool
735
1042
 
736
1043
 
737
1044
  @dataclass
738
1045
  class TransportMessageUrgentFrame(SystemFrame):
1046
+ """Frame for urgent transport messages that need immediate processing.
1047
+
1048
+ Parameters:
1049
+ message: The urgent transport message payload.
1050
+ """
1051
+
739
1052
  message: Any
740
1053
 
741
1054
  def __str__(self):
@@ -744,10 +1057,18 @@ class TransportMessageUrgentFrame(SystemFrame):
744
1057
 
745
1058
  @dataclass
746
1059
  class UserImageRequestFrame(SystemFrame):
747
- """A frame to request an image from the given user. The frame might be
1060
+ """Frame requesting an image from a specific user.
1061
+
1062
+ A frame to request an image from the given user. The frame might be
748
1063
  generated by a function call in which case the corresponding fields will be
749
1064
  properly set.
750
1065
 
1066
+ Parameters:
1067
+ user_id: Identifier of the user to request image from.
1068
+ context: Optional context for the image request.
1069
+ function_name: Name of function that generated this request (if any).
1070
+ tool_call_id: Tool call ID if generated by function call.
1071
+ video_source: Specific video source to capture from.
751
1072
  """
752
1073
 
753
1074
  user_id: str
@@ -762,10 +1083,11 @@ class UserImageRequestFrame(SystemFrame):
762
1083
 
763
1084
  @dataclass
764
1085
  class InputAudioRawFrame(SystemFrame, AudioRawFrame):
765
- """A chunk of audio usually coming from an input transport. If the transport
766
- supports multiple audio sources (e.g. multiple audio tracks) the source name
767
- will be specified.
1086
+ """Raw audio input frame from transport.
768
1087
 
1088
+ A chunk of audio usually coming from an input transport. If the transport
1089
+ supports multiple audio sources (e.g. multiple audio tracks) the source name
1090
+ will be specified in transport_source.
769
1091
  """
770
1092
 
771
1093
  def __post_init__(self):
@@ -779,10 +1101,11 @@ class InputAudioRawFrame(SystemFrame, AudioRawFrame):
779
1101
 
780
1102
  @dataclass
781
1103
  class InputImageRawFrame(SystemFrame, ImageRawFrame):
782
- """An image usually coming from an input transport. If the transport
783
- supports multiple video sources (e.g. multiple video tracks) the source name
784
- will be specified.
1104
+ """Raw image input frame from transport.
785
1105
 
1106
+ An image usually coming from an input transport. If the transport
1107
+ supports multiple video sources (e.g. multiple video tracks) the source name
1108
+ will be specified in transport_source.
786
1109
  """
787
1110
 
788
1111
  def __str__(self):
@@ -790,9 +1113,29 @@ class InputImageRawFrame(SystemFrame, ImageRawFrame):
790
1113
  return f"{self.name}(pts: {pts}, source: {self.transport_source}, size: {self.size}, format: {self.format})"
791
1114
 
792
1115
 
1116
+ @dataclass
1117
+ class InputTextRawFrame(SystemFrame, TextFrame):
1118
+ """Raw text input frame from transport.
1119
+
1120
+ Text input usually coming from user typing or programmatic text injection
1121
+ that should be sent to LLM services as input, similar to how InputAudioRawFrame
1122
+ and InputImageRawFrame represent user audio and video input.
1123
+ """
1124
+
1125
+ def __str__(self):
1126
+ pts = format_pts(self.pts)
1127
+ return f"{self.name}(pts: {pts}, source: {self.transport_source}, text: [{self.text}])"
1128
+
1129
+
793
1130
  @dataclass
794
1131
  class UserAudioRawFrame(InputAudioRawFrame):
795
- """A chunk of audio, usually coming from an input transport, associated to a user."""
1132
+ """Raw audio input frame associated with a specific user.
1133
+
1134
+ A chunk of audio, usually coming from an input transport, associated to a user.
1135
+
1136
+ Parameters:
1137
+ user_id: Identifier of the user who provided this audio.
1138
+ """
796
1139
 
797
1140
  user_id: str = ""
798
1141
 
@@ -803,7 +1146,14 @@ class UserAudioRawFrame(InputAudioRawFrame):
803
1146
 
804
1147
  @dataclass
805
1148
  class UserImageRawFrame(InputImageRawFrame):
806
- """An image associated to a user."""
1149
+ """Raw image input frame associated with a specific user.
1150
+
1151
+ An image associated to a user, potentially in response to an image request.
1152
+
1153
+ Parameters:
1154
+ user_id: Identifier of the user who provided this image.
1155
+ request: The original image request frame if this is a response.
1156
+ """
807
1157
 
808
1158
  user_id: str = ""
809
1159
  request: Optional[UserImageRequestFrame] = None
@@ -815,7 +1165,13 @@ class UserImageRawFrame(InputImageRawFrame):
815
1165
 
816
1166
  @dataclass
817
1167
  class VisionImageRawFrame(InputImageRawFrame):
818
- """An image with an associated text to ask for a description of it."""
1168
+ """Image frame for vision/image analysis with associated text prompt.
1169
+
1170
+ An image with an associated text to ask for a description of it.
1171
+
1172
+ Parameters:
1173
+ text: Optional text prompt describing what to analyze in the image.
1174
+ """
819
1175
 
820
1176
  text: Optional[str] = None
821
1177
 
@@ -826,22 +1182,40 @@ class VisionImageRawFrame(InputImageRawFrame):
826
1182
 
827
1183
  @dataclass
828
1184
  class InputDTMFFrame(DTMFFrame, SystemFrame):
829
- """A DTMF keypress input."""
1185
+ """DTMF keypress input frame from transport."""
830
1186
 
831
1187
  pass
832
1188
 
833
1189
 
834
1190
  @dataclass
835
1191
  class OutputDTMFUrgentFrame(DTMFFrame, SystemFrame):
836
- """A DTMF keypress output that will be sent right away. If your transport
1192
+ """DTMF keypress output frame for immediate sending.
1193
+
1194
+ A DTMF keypress output that will be sent right away. If your transport
837
1195
  supports multiple dial-out destinations, use the `transport_destination`
838
1196
  field to specify where the DTMF keypress should be sent.
839
-
840
1197
  """
841
1198
 
842
1199
  pass
843
1200
 
844
1201
 
1202
+ @dataclass
1203
+ class SpeechControlParamsFrame(SystemFrame):
1204
+ """Frame for notifying processors of speech control parameter changes.
1205
+
1206
+ This includes parameters for both VAD (Voice Activity Detection) and
1207
+ turn-taking analysis. It allows downstream processors to adjust their
1208
+ behavior based on updated interaction control settings.
1209
+
1210
+ Parameters:
1211
+ vad_params: Current VAD parameters.
1212
+ turn_params: Current turn-taking analysis parameters.
1213
+ """
1214
+
1215
+ vad_params: Optional[VADParams] = None
1216
+ turn_params: Optional[SmartTurnParams] = None
1217
+
1218
+
845
1219
  #
846
1220
  # Control frames
847
1221
  #
@@ -849,12 +1223,13 @@ class OutputDTMFUrgentFrame(DTMFFrame, SystemFrame):
849
1223
 
850
1224
  @dataclass
851
1225
  class EndFrame(ControlFrame):
852
- """Indicates that a pipeline has ended and frame processors and pipelines
1226
+ """Frame indicating pipeline has ended and should shut down.
1227
+
1228
+ Indicates that a pipeline has ended and frame processors and pipelines
853
1229
  should be shut down. If the transport receives this frame, it will stop
854
1230
  sending frames to its output channel(s) and close all its threads. Note,
855
- that this is a control frame, which means it will received in the order it
856
- was sent (unline system frames).
857
-
1231
+ that this is a control frame, which means it will be received in the order it
1232
+ was sent.
858
1233
  """
859
1234
 
860
1235
  pass
@@ -862,10 +1237,21 @@ class EndFrame(ControlFrame):
862
1237
 
863
1238
  @dataclass
864
1239
  class StopFrame(ControlFrame):
865
- """Indicates that a pipeline should be stopped but that the pipeline
1240
+ """Frame indicating pipeline should stop but keep processors running.
1241
+
1242
+ Indicates that a pipeline should be stopped but that the pipeline
866
1243
  processors should be kept in a running state. This is normally queued from
867
1244
  the pipeline task.
1245
+ """
1246
+
1247
+ pass
1248
+
868
1249
 
1250
+ @dataclass
1251
+ class OutputTransportReadyFrame(ControlFrame):
1252
+ """Frame indicating that the output transport is ready.
1253
+
1254
+ Indicates that the output transport is ready and able to receive frames.
869
1255
  """
870
1256
 
871
1257
  pass
@@ -873,9 +1259,13 @@ class StopFrame(ControlFrame):
873
1259
 
874
1260
  @dataclass
875
1261
  class HeartbeatFrame(ControlFrame):
876
- """This frame is used by the pipeline task as a mechanism to know if the
1262
+ """Frame used by pipeline task to monitor pipeline health.
1263
+
1264
+ This frame is used by the pipeline task as a mechanism to know if the
877
1265
  pipeline is running properly.
878
1266
 
1267
+ Parameters:
1268
+ timestamp: Timestamp when the heartbeat was generated.
879
1269
  """
880
1270
 
881
1271
  timestamp: int
@@ -883,11 +1273,15 @@ class HeartbeatFrame(ControlFrame):
883
1273
 
884
1274
  @dataclass
885
1275
  class FrameProcessorPauseFrame(ControlFrame):
886
- """This frame is used to pause frame processing for the given
1276
+ """Frame to pause frame processing for a specific processor.
1277
+
1278
+ This frame is used to pause frame processing for the given
887
1279
  processor. Pausing frame processing will keep frames in the internal queue
888
1280
  which will then be processed when frame processing is resumed with
889
1281
  `FrameProcessorResumeFrame`.
890
1282
 
1283
+ Parameters:
1284
+ processor: The frame processor to pause.
891
1285
  """
892
1286
 
893
1287
  processor: "FrameProcessor"
@@ -895,10 +1289,14 @@ class FrameProcessorPauseFrame(ControlFrame):
895
1289
 
896
1290
  @dataclass
897
1291
  class FrameProcessorResumeFrame(ControlFrame):
898
- """This frame is used to resume frame processing for the given processor if
1292
+ """Frame to resume frame processing for a specific processor.
1293
+
1294
+ This frame is used to resume frame processing for the given processor if
899
1295
  it was previously paused. After resuming frame processing all queued frames
900
1296
  will be processed in the order received.
901
1297
 
1298
+ Parameters:
1299
+ processor: The frame processor to resume.
902
1300
  """
903
1301
 
904
1302
  processor: "FrameProcessor"
@@ -906,8 +1304,10 @@ class FrameProcessorResumeFrame(ControlFrame):
906
1304
 
907
1305
  @dataclass
908
1306
  class LLMFullResponseStartFrame(ControlFrame):
909
- """Used to indicate the beginning of an LLM response. Following by one or
910
- more TextFrame and a final LLMFullResponseEndFrame.
1307
+ """Frame indicating the beginning of an LLM response.
1308
+
1309
+ Used to indicate the beginning of an LLM response. Followed by one or
1310
+ more TextFrames and a final LLMFullResponseEndFrame.
911
1311
  """
912
1312
 
913
1313
  pass
@@ -915,19 +1315,20 @@ class LLMFullResponseStartFrame(ControlFrame):
915
1315
 
916
1316
  @dataclass
917
1317
  class LLMFullResponseEndFrame(ControlFrame):
918
- """Indicates the end of an LLM response."""
1318
+ """Frame indicating the end of an LLM response."""
919
1319
 
920
1320
  pass
921
1321
 
922
1322
 
923
1323
  @dataclass
924
1324
  class TTSStartedFrame(ControlFrame):
925
- """Used to indicate the beginning of a TTS response. Following
926
- TTSAudioRawFrames are part of the TTS response until an
1325
+ """Frame indicating the beginning of a TTS response.
1326
+
1327
+ Used to indicate the beginning of a TTS response. Following
1328
+ TTSAudioRawFrames are part of the TTS response until a
927
1329
  TTSStoppedFrame. These frames can be used for aggregating audio frames in a
928
1330
  transport to optimize the size of frames sent to the session, without
929
1331
  needing to control this in the TTS service.
930
-
931
1332
  """
932
1333
 
933
1334
  pass
@@ -935,37 +1336,54 @@ class TTSStartedFrame(ControlFrame):
935
1336
 
936
1337
  @dataclass
937
1338
  class TTSStoppedFrame(ControlFrame):
938
- """Indicates the end of a TTS response."""
1339
+ """Frame indicating the end of a TTS response."""
939
1340
 
940
1341
  pass
941
1342
 
942
1343
 
943
1344
  @dataclass
944
1345
  class ServiceUpdateSettingsFrame(ControlFrame):
945
- """A control frame containing a request to update service settings."""
1346
+ """Base frame for updating service settings.
1347
+
1348
+ A control frame containing a request to update service settings.
1349
+
1350
+ Parameters:
1351
+ settings: Dictionary of setting name to value mappings.
1352
+ """
946
1353
 
947
1354
  settings: Mapping[str, Any]
948
1355
 
949
1356
 
950
1357
  @dataclass
951
1358
  class LLMUpdateSettingsFrame(ServiceUpdateSettingsFrame):
1359
+ """Frame for updating LLM service settings."""
1360
+
952
1361
  pass
953
1362
 
954
1363
 
955
1364
  @dataclass
956
1365
  class TTSUpdateSettingsFrame(ServiceUpdateSettingsFrame):
1366
+ """Frame for updating TTS service settings."""
1367
+
957
1368
  pass
958
1369
 
959
1370
 
960
1371
  @dataclass
961
1372
  class STTUpdateSettingsFrame(ServiceUpdateSettingsFrame):
1373
+ """Frame for updating STT service settings."""
1374
+
962
1375
  pass
963
1376
 
964
1377
 
965
1378
  @dataclass
966
1379
  class VADParamsUpdateFrame(SystemFrame):
967
- """A control frame containing a request to update VAD params. Intended
1380
+ """Frame for updating VAD parameters.
1381
+
1382
+ A control frame containing a request to update VAD params. Intended
968
1383
  to be pushed upstream from RTVI processor.
1384
+
1385
+ Parameters:
1386
+ params: New VAD parameters to apply.
969
1387
  """
970
1388
 
971
1389
  params: VADParams
@@ -973,42 +1391,58 @@ class VADParamsUpdateFrame(SystemFrame):
973
1391
 
974
1392
  @dataclass
975
1393
  class FilterControlFrame(ControlFrame):
976
- """Base control frame for other audio filter frames."""
1394
+ """Base control frame for audio filter operations."""
977
1395
 
978
1396
  pass
979
1397
 
980
1398
 
981
1399
  @dataclass
982
1400
  class FilterUpdateSettingsFrame(FilterControlFrame):
983
- """Control frame to update filter settings."""
1401
+ """Frame for updating audio filter settings.
1402
+
1403
+ Parameters:
1404
+ settings: Dictionary of filter setting name to value mappings.
1405
+ """
984
1406
 
985
1407
  settings: Mapping[str, Any]
986
1408
 
987
1409
 
988
1410
  @dataclass
989
1411
  class FilterEnableFrame(FilterControlFrame):
990
- """Control frame to enable or disable the filter at runtime."""
1412
+ """Frame for enabling/disabling audio filters at runtime.
1413
+
1414
+ Parameters:
1415
+ enable: Whether to enable (True) or disable (False) the filter.
1416
+ """
991
1417
 
992
1418
  enable: bool
993
1419
 
994
1420
 
995
1421
  @dataclass
996
1422
  class MixerControlFrame(ControlFrame):
997
- """Base control frame for other audio mixer frames."""
1423
+ """Base control frame for audio mixer operations."""
998
1424
 
999
1425
  pass
1000
1426
 
1001
1427
 
1002
1428
  @dataclass
1003
1429
  class MixerUpdateSettingsFrame(MixerControlFrame):
1004
- """Control frame to update mixer settings."""
1430
+ """Frame for updating audio mixer settings.
1431
+
1432
+ Parameters:
1433
+ settings: Dictionary of mixer setting name to value mappings.
1434
+ """
1005
1435
 
1006
1436
  settings: Mapping[str, Any]
1007
1437
 
1008
1438
 
1009
1439
  @dataclass
1010
1440
  class MixerEnableFrame(MixerControlFrame):
1011
- """Control frame to enable or disable the mixer at runtime."""
1441
+ """Frame for enabling/disabling audio mixer at runtime.
1442
+
1443
+ Parameters:
1444
+ enable: Whether to enable (True) or disable (False) the mixer.
1445
+ """
1012
1446
 
1013
1447
  enable: bool
1014
1448