dv-pipecat-ai 0.0.82.dev857__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (195) hide show
  1. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/METADATA +98 -130
  2. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/RECORD +192 -140
  3. pipecat/adapters/base_llm_adapter.py +38 -1
  4. pipecat/adapters/services/anthropic_adapter.py +9 -14
  5. pipecat/adapters/services/aws_nova_sonic_adapter.py +120 -5
  6. pipecat/adapters/services/bedrock_adapter.py +236 -13
  7. pipecat/adapters/services/gemini_adapter.py +12 -8
  8. pipecat/adapters/services/open_ai_adapter.py +19 -7
  9. pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
  10. pipecat/audio/dtmf/dtmf-0.wav +0 -0
  11. pipecat/audio/dtmf/dtmf-1.wav +0 -0
  12. pipecat/audio/dtmf/dtmf-2.wav +0 -0
  13. pipecat/audio/dtmf/dtmf-3.wav +0 -0
  14. pipecat/audio/dtmf/dtmf-4.wav +0 -0
  15. pipecat/audio/dtmf/dtmf-5.wav +0 -0
  16. pipecat/audio/dtmf/dtmf-6.wav +0 -0
  17. pipecat/audio/dtmf/dtmf-7.wav +0 -0
  18. pipecat/audio/dtmf/dtmf-8.wav +0 -0
  19. pipecat/audio/dtmf/dtmf-9.wav +0 -0
  20. pipecat/audio/dtmf/dtmf-pound.wav +0 -0
  21. pipecat/audio/dtmf/dtmf-star.wav +0 -0
  22. pipecat/audio/filters/krisp_viva_filter.py +193 -0
  23. pipecat/audio/filters/noisereduce_filter.py +15 -0
  24. pipecat/audio/turn/base_turn_analyzer.py +9 -1
  25. pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
  26. pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
  27. pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
  28. pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
  29. pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
  30. pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
  31. pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
  32. pipecat/audio/vad/data/README.md +10 -0
  33. pipecat/audio/vad/data/silero_vad_v2.onnx +0 -0
  34. pipecat/audio/vad/silero.py +9 -3
  35. pipecat/audio/vad/vad_analyzer.py +13 -1
  36. pipecat/extensions/voicemail/voicemail_detector.py +5 -5
  37. pipecat/frames/frames.py +277 -86
  38. pipecat/observers/loggers/debug_log_observer.py +3 -3
  39. pipecat/observers/loggers/llm_log_observer.py +7 -3
  40. pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
  41. pipecat/pipeline/runner.py +18 -6
  42. pipecat/pipeline/service_switcher.py +64 -36
  43. pipecat/pipeline/task.py +125 -79
  44. pipecat/pipeline/tts_switcher.py +30 -0
  45. pipecat/processors/aggregators/dtmf_aggregator.py +2 -3
  46. pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
  47. pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
  48. pipecat/processors/aggregators/llm_context.py +40 -2
  49. pipecat/processors/aggregators/llm_response.py +32 -15
  50. pipecat/processors/aggregators/llm_response_universal.py +19 -15
  51. pipecat/processors/aggregators/user_response.py +6 -6
  52. pipecat/processors/aggregators/vision_image_frame.py +24 -2
  53. pipecat/processors/audio/audio_buffer_processor.py +43 -8
  54. pipecat/processors/dtmf_aggregator.py +174 -77
  55. pipecat/processors/filters/stt_mute_filter.py +17 -0
  56. pipecat/processors/frame_processor.py +110 -24
  57. pipecat/processors/frameworks/langchain.py +8 -2
  58. pipecat/processors/frameworks/rtvi.py +210 -68
  59. pipecat/processors/frameworks/strands_agents.py +170 -0
  60. pipecat/processors/logger.py +2 -2
  61. pipecat/processors/transcript_processor.py +26 -5
  62. pipecat/processors/user_idle_processor.py +35 -11
  63. pipecat/runner/daily.py +59 -20
  64. pipecat/runner/run.py +395 -93
  65. pipecat/runner/types.py +6 -4
  66. pipecat/runner/utils.py +51 -10
  67. pipecat/serializers/__init__.py +5 -1
  68. pipecat/serializers/asterisk.py +16 -2
  69. pipecat/serializers/convox.py +41 -4
  70. pipecat/serializers/custom.py +257 -0
  71. pipecat/serializers/exotel.py +5 -5
  72. pipecat/serializers/livekit.py +20 -0
  73. pipecat/serializers/plivo.py +5 -5
  74. pipecat/serializers/protobuf.py +6 -5
  75. pipecat/serializers/telnyx.py +2 -2
  76. pipecat/serializers/twilio.py +43 -23
  77. pipecat/serializers/vi.py +324 -0
  78. pipecat/services/ai_service.py +2 -6
  79. pipecat/services/anthropic/llm.py +2 -25
  80. pipecat/services/assemblyai/models.py +6 -0
  81. pipecat/services/assemblyai/stt.py +13 -5
  82. pipecat/services/asyncai/tts.py +5 -3
  83. pipecat/services/aws/__init__.py +1 -0
  84. pipecat/services/aws/llm.py +147 -105
  85. pipecat/services/aws/nova_sonic/__init__.py +0 -0
  86. pipecat/services/aws/nova_sonic/context.py +436 -0
  87. pipecat/services/aws/nova_sonic/frames.py +25 -0
  88. pipecat/services/aws/nova_sonic/llm.py +1265 -0
  89. pipecat/services/aws/stt.py +3 -3
  90. pipecat/services/aws_nova_sonic/__init__.py +19 -1
  91. pipecat/services/aws_nova_sonic/aws.py +11 -1151
  92. pipecat/services/aws_nova_sonic/context.py +8 -354
  93. pipecat/services/aws_nova_sonic/frames.py +13 -17
  94. pipecat/services/azure/llm.py +51 -1
  95. pipecat/services/azure/realtime/__init__.py +0 -0
  96. pipecat/services/azure/realtime/llm.py +65 -0
  97. pipecat/services/azure/stt.py +15 -0
  98. pipecat/services/cartesia/stt.py +77 -70
  99. pipecat/services/cartesia/tts.py +80 -13
  100. pipecat/services/deepgram/__init__.py +1 -0
  101. pipecat/services/deepgram/flux/__init__.py +0 -0
  102. pipecat/services/deepgram/flux/stt.py +640 -0
  103. pipecat/services/elevenlabs/__init__.py +4 -1
  104. pipecat/services/elevenlabs/stt.py +339 -0
  105. pipecat/services/elevenlabs/tts.py +87 -46
  106. pipecat/services/fish/tts.py +5 -2
  107. pipecat/services/gemini_multimodal_live/events.py +38 -524
  108. pipecat/services/gemini_multimodal_live/file_api.py +23 -173
  109. pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
  110. pipecat/services/gladia/stt.py +56 -72
  111. pipecat/services/google/__init__.py +1 -0
  112. pipecat/services/google/gemini_live/__init__.py +3 -0
  113. pipecat/services/google/gemini_live/file_api.py +189 -0
  114. pipecat/services/google/gemini_live/llm.py +1582 -0
  115. pipecat/services/google/gemini_live/llm_vertex.py +184 -0
  116. pipecat/services/google/llm.py +15 -11
  117. pipecat/services/google/llm_openai.py +3 -3
  118. pipecat/services/google/llm_vertex.py +86 -16
  119. pipecat/services/google/stt.py +4 -0
  120. pipecat/services/google/tts.py +7 -3
  121. pipecat/services/heygen/api.py +2 -0
  122. pipecat/services/heygen/client.py +8 -4
  123. pipecat/services/heygen/video.py +2 -0
  124. pipecat/services/hume/__init__.py +5 -0
  125. pipecat/services/hume/tts.py +220 -0
  126. pipecat/services/inworld/tts.py +6 -6
  127. pipecat/services/llm_service.py +15 -5
  128. pipecat/services/lmnt/tts.py +4 -2
  129. pipecat/services/mcp_service.py +4 -2
  130. pipecat/services/mem0/memory.py +6 -5
  131. pipecat/services/mistral/llm.py +29 -8
  132. pipecat/services/moondream/vision.py +42 -16
  133. pipecat/services/neuphonic/tts.py +5 -2
  134. pipecat/services/openai/__init__.py +1 -0
  135. pipecat/services/openai/base_llm.py +27 -20
  136. pipecat/services/openai/realtime/__init__.py +0 -0
  137. pipecat/services/openai/realtime/context.py +272 -0
  138. pipecat/services/openai/realtime/events.py +1106 -0
  139. pipecat/services/openai/realtime/frames.py +37 -0
  140. pipecat/services/openai/realtime/llm.py +829 -0
  141. pipecat/services/openai/tts.py +49 -10
  142. pipecat/services/openai_realtime/__init__.py +27 -0
  143. pipecat/services/openai_realtime/azure.py +21 -0
  144. pipecat/services/openai_realtime/context.py +21 -0
  145. pipecat/services/openai_realtime/events.py +21 -0
  146. pipecat/services/openai_realtime/frames.py +21 -0
  147. pipecat/services/openai_realtime_beta/azure.py +16 -0
  148. pipecat/services/openai_realtime_beta/openai.py +17 -5
  149. pipecat/services/piper/tts.py +7 -9
  150. pipecat/services/playht/tts.py +34 -4
  151. pipecat/services/rime/tts.py +12 -12
  152. pipecat/services/riva/stt.py +3 -1
  153. pipecat/services/salesforce/__init__.py +9 -0
  154. pipecat/services/salesforce/llm.py +700 -0
  155. pipecat/services/sarvam/__init__.py +7 -0
  156. pipecat/services/sarvam/stt.py +540 -0
  157. pipecat/services/sarvam/tts.py +97 -13
  158. pipecat/services/simli/video.py +2 -2
  159. pipecat/services/speechmatics/stt.py +22 -10
  160. pipecat/services/stt_service.py +47 -0
  161. pipecat/services/tavus/video.py +2 -2
  162. pipecat/services/tts_service.py +75 -22
  163. pipecat/services/vision_service.py +7 -6
  164. pipecat/services/vistaar/llm.py +51 -9
  165. pipecat/tests/utils.py +4 -4
  166. pipecat/transcriptions/language.py +41 -1
  167. pipecat/transports/base_input.py +13 -34
  168. pipecat/transports/base_output.py +140 -104
  169. pipecat/transports/daily/transport.py +199 -26
  170. pipecat/transports/heygen/__init__.py +0 -0
  171. pipecat/transports/heygen/transport.py +381 -0
  172. pipecat/transports/livekit/transport.py +228 -63
  173. pipecat/transports/local/audio.py +6 -1
  174. pipecat/transports/local/tk.py +11 -2
  175. pipecat/transports/network/fastapi_websocket.py +1 -1
  176. pipecat/transports/smallwebrtc/connection.py +103 -19
  177. pipecat/transports/smallwebrtc/request_handler.py +246 -0
  178. pipecat/transports/smallwebrtc/transport.py +65 -23
  179. pipecat/transports/tavus/transport.py +23 -12
  180. pipecat/transports/websocket/client.py +41 -5
  181. pipecat/transports/websocket/fastapi.py +21 -11
  182. pipecat/transports/websocket/server.py +14 -7
  183. pipecat/transports/whatsapp/api.py +8 -0
  184. pipecat/transports/whatsapp/client.py +47 -0
  185. pipecat/utils/base_object.py +54 -22
  186. pipecat/utils/redis.py +58 -0
  187. pipecat/utils/string.py +13 -1
  188. pipecat/utils/tracing/service_decorators.py +21 -21
  189. pipecat/serializers/genesys.py +0 -95
  190. pipecat/services/google/test-google-chirp.py +0 -45
  191. pipecat/services/openai.py +0 -698
  192. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/WHEEL +0 -0
  193. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/licenses/LICENSE +0 -0
  194. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/top_level.txt +0 -0
  195. /pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0
@@ -32,7 +32,6 @@ from pipecat.frames.frames import (
32
32
  LLMMessagesFrame,
33
33
  LLMTextFrame,
34
34
  LLMUpdateSettingsFrame,
35
- VisionImageRawFrame,
36
35
  )
37
36
  from pipecat.metrics.metrics import LLMTokenUsage
38
37
  from pipecat.processors.aggregators.llm_context import LLMContext
@@ -67,6 +66,7 @@ class BaseOpenAILLMService(LLMService):
67
66
  top_p: Top-p (nucleus) sampling parameter (0.0 to 1.0).
68
67
  max_tokens: Maximum tokens in response (deprecated, use max_completion_tokens).
69
68
  max_completion_tokens: Maximum completion tokens to generate.
69
+ service_tier: Service tier to use (e.g., "auto", "flex", "priority").
70
70
  extra: Additional model-specific parameters.
71
71
  """
72
72
 
@@ -84,6 +84,7 @@ class BaseOpenAILLMService(LLMService):
84
84
  top_p: Optional[float] = Field(default_factory=lambda: NOT_GIVEN, ge=0.0, le=1.0)
85
85
  max_tokens: Optional[int] = Field(default_factory=lambda: NOT_GIVEN, ge=1)
86
86
  max_completion_tokens: Optional[int] = Field(default_factory=lambda: NOT_GIVEN, ge=1)
87
+ service_tier: Optional[str] = Field(default_factory=lambda: NOT_GIVEN)
87
88
  extra: Optional[Dict[str, Any]] = Field(default_factory=dict)
88
89
 
89
90
  def __init__(
@@ -126,6 +127,7 @@ class BaseOpenAILLMService(LLMService):
126
127
  "top_p": params.top_p,
127
128
  "max_tokens": params.max_tokens,
128
129
  "max_completion_tokens": params.max_completion_tokens,
130
+ "service_tier": params.service_tier,
129
131
  "extra": params.extra if isinstance(params.extra, dict) else {},
130
132
  }
131
133
  self._retry_timeout_secs = retry_timeout_secs
@@ -237,6 +239,7 @@ class BaseOpenAILLMService(LLMService):
237
239
  "top_p": self._settings["top_p"],
238
240
  "max_tokens": self._settings["max_tokens"],
239
241
  "max_completion_tokens": self._settings["max_completion_tokens"],
242
+ "service_tier": self._settings["service_tier"],
240
243
  }
241
244
 
242
245
  # Messages, tools, tool_choice
@@ -282,8 +285,10 @@ class BaseOpenAILLMService(LLMService):
282
285
  # base64 encode any images
283
286
  for message in messages:
284
287
  if message.get("mime_type") == "image/jpeg":
285
- encoded_image = base64.b64encode(message["data"].getvalue()).decode("utf-8")
286
- text = message["content"]
288
+ # Avoid .getvalue() which makes a full copy of BytesIO
289
+ raw_bytes = message["data"].read()
290
+ encoded_image = base64.b64encode(raw_bytes).decode("utf-8")
291
+ text = message.get("content", "")
287
292
  message["content"] = [
288
293
  {"type": "text", "text": text},
289
294
  {
@@ -291,6 +296,7 @@ class BaseOpenAILLMService(LLMService):
291
296
  "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"},
292
297
  },
293
298
  ]
299
+ # Explicit cleanup
294
300
  del message["data"]
295
301
  del message["mime_type"]
296
302
 
@@ -335,10 +341,16 @@ class BaseOpenAILLMService(LLMService):
335
341
 
336
342
  async for chunk in chunk_stream:
337
343
  if chunk.usage:
344
+ cached_tokens = (
345
+ chunk.usage.prompt_tokens_details.cached_tokens
346
+ if chunk.usage.prompt_tokens_details
347
+ else None
348
+ )
338
349
  tokens = LLMTokenUsage(
339
350
  prompt_tokens=chunk.usage.prompt_tokens,
340
351
  completion_tokens=chunk.usage.completion_tokens,
341
352
  total_tokens=chunk.usage.total_tokens,
353
+ cache_read_input_tokens=cached_tokens,
342
354
  )
343
355
  await self.start_llm_usage_metrics(tokens)
344
356
 
@@ -417,13 +429,18 @@ class BaseOpenAILLMService(LLMService):
417
429
  async def process_frame(self, frame: Frame, direction: FrameDirection):
418
430
  """Process frames for LLM completion requests.
419
431
 
420
- Handles OpenAILLMContextFrame, LLMContextFrame, LLMMessagesFrame,
421
- VisionImageRawFrame, and LLMUpdateSettingsFrame to trigger LLM
422
- completions and manage settings.
423
-
424
- Args:
425
- frame: The frame to process.
426
- direction: The direction of frame processing.
432
+ Handles OpenAILLMContextFrame, LLMContextFrame, LLMMessagesFrame,
433
+ <<<<<<< HEAD
434
+ and LLMUpdateSettingsFrame to trigger LLM completions and manage
435
+ settings.
436
+ =======
437
+ VisionImageRawFrame, and LLMUpdateSettingsFrame to trigger LLM
438
+ completions and manage settings.
439
+ >>>>>>> dv-stage
440
+
441
+ Args:
442
+ frame: The frame to process.
443
+ direction: The direction of frame processing.
427
444
  """
428
445
  await super().process_frame(frame, direction)
429
446
 
@@ -438,16 +455,6 @@ class BaseOpenAILLMService(LLMService):
438
455
  # NOTE: LLMMessagesFrame is deprecated, so we don't support the newer universal
439
456
  # LLMContext with it
440
457
  context = OpenAILLMContext.from_messages(frame.messages)
441
- elif isinstance(frame, VisionImageRawFrame):
442
- # This is only useful in very simple pipelines because it creates
443
- # a new context. Generally we want a context manager to catch
444
- # UserImageRawFrames coming through the pipeline and add them
445
- # to the context.
446
- # TODO: support the newer universal LLMContext with a VisionImageRawFrame equivalent?
447
- context = OpenAILLMContext()
448
- context.add_image_frame_message(
449
- format=frame.format, size=frame.size, image=frame.image, text=frame.text
450
- )
451
458
  elif isinstance(frame, LLMUpdateSettingsFrame):
452
459
  await self._update_settings(frame.settings)
453
460
  else:
File without changes
@@ -0,0 +1,272 @@
1
+ #
2
+ # Copyright (c) 2024–2025, Daily
3
+ #
4
+ # SPDX-License-Identifier: BSD 2-Clause License
5
+ #
6
+
7
+ """OpenAI Realtime LLM context and aggregator implementations."""
8
+
9
+ import copy
10
+ import json
11
+
12
+ from loguru import logger
13
+
14
+ from pipecat.frames.frames import (
15
+ Frame,
16
+ FunctionCallResultFrame,
17
+ InterimTranscriptionFrame,
18
+ LLMMessagesUpdateFrame,
19
+ LLMSetToolsFrame,
20
+ LLMTextFrame,
21
+ TranscriptionFrame,
22
+ )
23
+ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
24
+ from pipecat.processors.frame_processor import FrameDirection
25
+ from pipecat.services.openai.llm import (
26
+ OpenAIAssistantContextAggregator,
27
+ OpenAIUserContextAggregator,
28
+ )
29
+
30
+ from . import events
31
+ from .frames import RealtimeFunctionCallResultFrame, RealtimeMessagesUpdateFrame
32
+
33
+
34
+ class OpenAIRealtimeLLMContext(OpenAILLMContext):
35
+ """OpenAI Realtime LLM context with session management and message conversion.
36
+
37
+ Extends the standard OpenAI LLM context to support real-time session properties,
38
+ instruction management, and conversion between standard message formats and
39
+ realtime conversation items.
40
+ """
41
+
42
+ def __init__(self, messages=None, tools=None, **kwargs):
43
+ """Initialize the OpenAIRealtimeLLMContext.
44
+
45
+ Args:
46
+ messages: Initial conversation messages. Defaults to None.
47
+ tools: Available function tools. Defaults to None.
48
+ **kwargs: Additional arguments passed to parent OpenAILLMContext.
49
+ """
50
+ super().__init__(messages=messages, tools=tools, **kwargs)
51
+ self.__setup_local()
52
+
53
+ def __setup_local(self):
54
+ self.llm_needs_settings_update = True
55
+ self.llm_needs_initial_messages = True
56
+ self._session_instructions = ""
57
+
58
+ return
59
+
60
+ @staticmethod
61
+ def upgrade_to_realtime(obj: OpenAILLMContext) -> "OpenAIRealtimeLLMContext":
62
+ """Upgrade a standard OpenAI LLM context to a realtime context.
63
+
64
+ Args:
65
+ obj: The OpenAILLMContext instance to upgrade.
66
+
67
+ Returns:
68
+ The upgraded OpenAIRealtimeLLMContext instance.
69
+ """
70
+ if isinstance(obj, OpenAILLMContext) and not isinstance(obj, OpenAIRealtimeLLMContext):
71
+ obj.__class__ = OpenAIRealtimeLLMContext
72
+ obj.__setup_local()
73
+ return obj
74
+
75
+ # todo
76
+ # - finish implementing all frames
77
+
78
+ def from_standard_message(self, message):
79
+ """Convert a standard message format to a realtime conversation item.
80
+
81
+ Args:
82
+ message: The standard message dictionary to convert.
83
+
84
+ Returns:
85
+ A ConversationItem instance for the realtime API.
86
+ """
87
+ if message.get("role") == "user":
88
+ content = message.get("content")
89
+ if isinstance(message.get("content"), list):
90
+ content = ""
91
+ for c in message.get("content"):
92
+ if c.get("type") == "text":
93
+ content += " " + c.get("text")
94
+ else:
95
+ logger.error(
96
+ f"Unhandled content type in context message: {c.get('type')} - {message}"
97
+ )
98
+ return events.ConversationItem(
99
+ role="user",
100
+ type="message",
101
+ content=[events.ItemContent(type="input_text", text=content)],
102
+ )
103
+ if message.get("role") == "assistant" and message.get("tool_calls"):
104
+ tc = message.get("tool_calls")[0]
105
+ return events.ConversationItem(
106
+ type="function_call",
107
+ call_id=tc["id"],
108
+ name=tc["function"]["name"],
109
+ arguments=tc["function"]["arguments"],
110
+ )
111
+ logger.error(f"Unhandled message type in from_standard_message: {message}")
112
+
113
+ def get_messages_for_initializing_history(self):
114
+ """Get conversation items for initializing the realtime session history.
115
+
116
+ Converts the context's messages to a format suitable for the realtime API,
117
+ handling system instructions and conversation history packaging.
118
+
119
+ Returns:
120
+ List of conversation items for session initialization.
121
+ """
122
+ # We can't load a long conversation history into the openai realtime api yet. (The API/model
123
+ # forgets that it can do audio, if you do a series of `conversation.item.create` calls.) So
124
+ # our general strategy until this is fixed is just to put everything into a first "user"
125
+ # message as a single input.
126
+ if not self.messages:
127
+ return []
128
+
129
+ messages = copy.deepcopy(self.messages)
130
+
131
+ # If we have a "system" message as our first message, let's pull that out into session
132
+ # "instructions"
133
+ if messages[0].get("role") == "system":
134
+ self.llm_needs_settings_update = True
135
+ system = messages.pop(0)
136
+ content = system.get("content")
137
+ if isinstance(content, str):
138
+ self._session_instructions = content
139
+ elif isinstance(content, list):
140
+ self._session_instructions = content[0].get("text")
141
+ if not messages:
142
+ return []
143
+
144
+ # If we have just a single "user" item, we can just send it normally
145
+ if len(messages) == 1 and messages[0].get("role") == "user":
146
+ return [self.from_standard_message(messages[0])]
147
+
148
+ # Otherwise, let's pack everything into a single "user" message with a bit of
149
+ # explanation for the LLM
150
+ intro_text = """
151
+ This is a previously saved conversation. Please treat this conversation history as a
152
+ starting point for the current conversation."""
153
+
154
+ trailing_text = """
155
+ This is the end of the previously saved conversation. Please continue the conversation
156
+ from here. If the last message is a user instruction or question, act on that instruction
157
+ or answer the question. If the last message is an assistant response, simple say that you
158
+ are ready to continue the conversation."""
159
+
160
+ return [
161
+ {
162
+ "role": "user",
163
+ "type": "message",
164
+ "content": [
165
+ {
166
+ "type": "input_text",
167
+ "text": "\n\n".join(
168
+ [intro_text, json.dumps(messages, indent=2), trailing_text]
169
+ ),
170
+ }
171
+ ],
172
+ }
173
+ ]
174
+
175
+ def add_user_content_item_as_message(self, item):
176
+ """Add a user content item as a standard message to the context.
177
+
178
+ Args:
179
+ item: The conversation item to add as a user message.
180
+ """
181
+ message = {
182
+ "role": "user",
183
+ "content": [{"type": "text", "text": item.content[0].transcript}],
184
+ }
185
+ self.add_message(message)
186
+
187
+
188
+ class OpenAIRealtimeUserContextAggregator(OpenAIUserContextAggregator):
189
+ """User context aggregator for OpenAI Realtime API.
190
+
191
+ Handles user input frames and generates appropriate context updates
192
+ for the realtime conversation, including message updates and tool settings.
193
+
194
+ Args:
195
+ context: The OpenAI realtime LLM context.
196
+ **kwargs: Additional arguments passed to parent aggregator.
197
+ """
198
+
199
+ async def process_frame(
200
+ self, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM
201
+ ):
202
+ """Process incoming frames and handle realtime-specific frame types.
203
+
204
+ Args:
205
+ frame: The frame to process.
206
+ direction: The direction of frame flow in the pipeline.
207
+ """
208
+ await super().process_frame(frame, direction)
209
+ # Parent does not push LLMMessagesUpdateFrame. This ensures that in a typical pipeline,
210
+ # messages are only processed by the user context aggregator, which is generally what we want. But
211
+ # we also need to send new messages over the websocket, so the openai realtime API has them
212
+ # in its context.
213
+ if isinstance(frame, LLMMessagesUpdateFrame):
214
+ await self.push_frame(RealtimeMessagesUpdateFrame(context=self._context))
215
+
216
+ # Parent also doesn't push the LLMSetToolsFrame.
217
+ if isinstance(frame, LLMSetToolsFrame):
218
+ await self.push_frame(frame, direction)
219
+
220
+ async def push_aggregation(self):
221
+ """Push user input aggregation.
222
+
223
+ Currently ignores all user input coming into the pipeline as realtime
224
+ audio input is handled directly by the service.
225
+ """
226
+ # for the moment, ignore all user input coming into the pipeline.
227
+ # todo: think about whether/how to fix this to allow for text input from
228
+ # upstream (transport/transcription, or other sources)
229
+ pass
230
+
231
+
232
+ class OpenAIRealtimeAssistantContextAggregator(OpenAIAssistantContextAggregator):
233
+ """Assistant context aggregator for OpenAI Realtime API.
234
+
235
+ Handles assistant output frames from the realtime service, filtering
236
+ out duplicate text frames and managing function call results.
237
+
238
+ Args:
239
+ context: The OpenAI realtime LLM context.
240
+ **kwargs: Additional arguments passed to parent aggregator.
241
+ """
242
+
243
+ # The LLMAssistantContextAggregator uses TextFrames to aggregate the LLM output,
244
+ # but the OpenAIRealtimeLLMService pushes LLMTextFrames and TTSTextFrames. We
245
+ # need to override this proces_frame for LLMTextFrame, so that only the TTSTextFrames
246
+ # are process. This ensures that the context gets only one set of messages.
247
+ # OpenAIRealtimeLLMService also pushes TranscriptionFrames and InterimTranscriptionFrames,
248
+ # so we need to ignore pushing those as well, as they're also TextFrames.
249
+ async def process_frame(self, frame: Frame, direction: FrameDirection):
250
+ """Process assistant frames, filtering out duplicate text content.
251
+
252
+ Args:
253
+ frame: The frame to process.
254
+ direction: The direction of frame flow in the pipeline.
255
+ """
256
+ if not isinstance(frame, (LLMTextFrame, TranscriptionFrame, InterimTranscriptionFrame)):
257
+ await super().process_frame(frame, direction)
258
+
259
+ async def handle_function_call_result(self, frame: FunctionCallResultFrame):
260
+ """Handle function call result and notify the realtime service.
261
+
262
+ Args:
263
+ frame: The function call result frame to handle.
264
+ """
265
+ await super().handle_function_call_result(frame)
266
+
267
+ # The standard function callback code path pushes the FunctionCallResultFrame from the llm itself,
268
+ # so we didn't have a chance to add the result to the openai realtime api context. Let's push a
269
+ # special frame to do that.
270
+ await self.push_frame(
271
+ RealtimeFunctionCallResultFrame(result_frame=frame), FrameDirection.UPSTREAM
272
+ )