letta-nightly 0.11.7.dev20250910104051__py3-none-any.whl → 0.11.7.dev20250911104039__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. letta/adapters/letta_llm_request_adapter.py +4 -2
  2. letta/adapters/letta_llm_stream_adapter.py +4 -2
  3. letta/agents/agent_loop.py +23 -0
  4. letta/agents/letta_agent_v2.py +5 -4
  5. letta/functions/helpers.py +3 -2
  6. letta/groups/sleeptime_multi_agent_v2.py +4 -2
  7. letta/groups/sleeptime_multi_agent_v3.py +4 -2
  8. letta/interfaces/anthropic_streaming_interface.py +10 -6
  9. letta/llm_api/google_vertex_client.py +1 -1
  10. letta/orm/agent.py +4 -1
  11. letta/orm/block.py +1 -0
  12. letta/orm/blocks_agents.py +1 -0
  13. letta/orm/sources_agents.py +2 -1
  14. letta/orm/tools_agents.py +5 -2
  15. letta/schemas/message.py +19 -2
  16. letta/server/rest_api/interface.py +34 -2
  17. letta/server/rest_api/json_parser.py +2 -0
  18. letta/server/rest_api/redis_stream_manager.py +2 -1
  19. letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +4 -2
  20. letta/server/rest_api/routers/v1/agents.py +47 -180
  21. letta/server/rest_api/routers/v1/folders.py +2 -2
  22. letta/server/rest_api/routers/v1/sources.py +2 -2
  23. letta/server/rest_api/streaming_response.py +2 -1
  24. letta/server/server.py +7 -5
  25. letta/services/agent_serialization_manager.py +4 -3
  26. letta/services/mcp_manager.py +2 -2
  27. letta/services/summarizer/summarizer.py +2 -1
  28. letta/services/tool_executor/multi_agent_tool_executor.py +17 -14
  29. letta/services/tool_sandbox/local_sandbox.py +2 -2
  30. letta/services/tool_sandbox/modal_version_manager.py +2 -1
  31. letta/streaming_utils.py +29 -4
  32. letta/utils.py +72 -3
  33. {letta_nightly-0.11.7.dev20250910104051.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/METADATA +1 -1
  34. {letta_nightly-0.11.7.dev20250910104051.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/RECORD +37 -36
  35. {letta_nightly-0.11.7.dev20250910104051.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/WHEEL +0 -0
  36. {letta_nightly-0.11.7.dev20250910104051.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/entry_points.txt +0 -0
  37. {letta_nightly-0.11.7.dev20250910104051.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/licenses/LICENSE +0 -0
@@ -8,6 +8,7 @@ from letta.schemas.letta_message_content import OmittedReasoningContent, Reasoni
8
8
  from letta.schemas.provider_trace import ProviderTraceCreate
9
9
  from letta.schemas.user import User
10
10
  from letta.settings import settings
11
+ from letta.utils import safe_create_task
11
12
 
12
13
 
13
14
  class LettaLLMRequestAdapter(LettaLLMAdapter):
@@ -98,7 +99,7 @@ class LettaLLMRequestAdapter(LettaLLMAdapter):
98
99
  if step_id is None or actor is None or not settings.track_provider_trace:
99
100
  return
100
101
 
101
- asyncio.create_task(
102
+ safe_create_task(
102
103
  self.telemetry_manager.create_provider_trace_async(
103
104
  actor=actor,
104
105
  provider_trace_create=ProviderTraceCreate(
@@ -107,5 +108,6 @@ class LettaLLMRequestAdapter(LettaLLMAdapter):
107
108
  step_id=step_id, # Use original step_id for telemetry
108
109
  organization_id=actor.organization_id,
109
110
  ),
110
- )
111
+ ),
112
+ label="create_provider_trace",
111
113
  )
@@ -13,6 +13,7 @@ from letta.schemas.provider_trace import ProviderTraceCreate
13
13
  from letta.schemas.usage import LettaUsageStatistics
14
14
  from letta.schemas.user import User
15
15
  from letta.settings import settings
16
+ from letta.utils import safe_create_task
16
17
 
17
18
 
18
19
  class LettaLLMStreamAdapter(LettaLLMAdapter):
@@ -141,7 +142,7 @@ class LettaLLMStreamAdapter(LettaLLMAdapter):
141
142
  if step_id is None or actor is None or not settings.track_provider_trace:
142
143
  return
143
144
 
144
- asyncio.create_task(
145
+ safe_create_task(
145
146
  self.telemetry_manager.create_provider_trace_async(
146
147
  actor=actor,
147
148
  provider_trace_create=ProviderTraceCreate(
@@ -165,5 +166,6 @@ class LettaLLMStreamAdapter(LettaLLMAdapter):
165
166
  step_id=step_id, # Use original step_id for telemetry
166
167
  organization_id=actor.organization_id,
167
168
  ),
168
- )
169
+ ),
170
+ label="create_provider_trace",
169
171
  )
@@ -0,0 +1,23 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ from letta.agents.base_agent_v2 import BaseAgentV2
4
+ from letta.agents.letta_agent_v2 import LettaAgentV2
5
+ from letta.groups.sleeptime_multi_agent_v3 import SleeptimeMultiAgentV3
6
+ from letta.schemas.agent import AgentState, AgentType
7
+
8
+ if TYPE_CHECKING:
9
+ from letta.orm import User
10
+
11
+
12
+ class AgentLoop:
13
+ """Factory class for instantiating the agent execution loop based on agent type"""
14
+
15
+ @staticmethod
16
+ def load(agent_state: AgentState, actor: "User") -> BaseAgentV2:
17
+ if agent_state.enable_sleeptime and agent_state.agent_type != AgentType.voice_convo_agent:
18
+ return SleeptimeMultiAgentV3(agent_state=agent_state, actor=actor, group=agent_state.multi_agent_group)
19
+ else:
20
+ return LettaAgentV2(
21
+ agent_state=agent_state,
22
+ actor=actor,
23
+ )
@@ -58,7 +58,7 @@ from letta.services.tool_executor.tool_execution_manager import ToolExecutionMan
58
58
  from letta.settings import model_settings, settings, summarizer_settings
59
59
  from letta.system import package_function_response
60
60
  from letta.types import JsonDict
61
- from letta.utils import log_telemetry, united_diff, validate_function_response
61
+ from letta.utils import log_telemetry, safe_create_task, united_diff, validate_function_response
62
62
 
63
63
 
64
64
  class LettaAgentV2(BaseAgentV2):
@@ -850,7 +850,7 @@ class LettaAgentV2(BaseAgentV2):
850
850
  tool_call_messages = create_letta_messages_from_llm_response(
851
851
  agent_id=agent_state.id,
852
852
  model=agent_state.llm_config.model,
853
- function_name="",
853
+ function_name=tool_call.function.name,
854
854
  function_arguments={},
855
855
  tool_execution_result=ToolExecutionResult(status="error"),
856
856
  tool_call_id=tool_call_id,
@@ -1151,7 +1151,7 @@ class LettaAgentV2(BaseAgentV2):
1151
1151
  step_metrics: StepMetrics,
1152
1152
  run_id: str | None = None,
1153
1153
  ):
1154
- task = asyncio.create_task(
1154
+ task = safe_create_task(
1155
1155
  self.step_manager.record_step_metrics_async(
1156
1156
  actor=self.actor,
1157
1157
  step_id=step_id,
@@ -1163,7 +1163,8 @@ class LettaAgentV2(BaseAgentV2):
1163
1163
  project_id=self.agent_state.project_id,
1164
1164
  template_id=self.agent_state.template_id,
1165
1165
  base_template_id=self.agent_state.base_template_id,
1166
- )
1166
+ ),
1167
+ label="record_step_metrics",
1167
1168
  )
1168
1169
  return task
1169
1170
 
@@ -19,6 +19,7 @@ from letta.schemas.message import Message, MessageCreate
19
19
  from letta.schemas.user import User
20
20
  from letta.server.rest_api.utils import get_letta_server
21
21
  from letta.settings import settings
22
+ from letta.utils import safe_create_task
22
23
 
23
24
 
24
25
  # TODO needed?
@@ -447,7 +448,7 @@ async def _send_message_to_agents_matching_tags_async(
447
448
  timeout=settings.multi_agent_send_message_timeout,
448
449
  )
449
450
 
450
- tasks = [asyncio.create_task(_send_single(agent_state)) for agent_state in matching_agents]
451
+ tasks = [safe_create_task(_send_single(agent_state), label=f"send_to_agent_{agent_state.id}") for agent_state in matching_agents]
451
452
  results = await asyncio.gather(*tasks, return_exceptions=True)
452
453
  final = []
453
454
  for r in results:
@@ -488,7 +489,7 @@ async def _send_message_to_all_agents_in_group_async(sender_agent: "Agent", mess
488
489
  timeout=settings.multi_agent_send_message_timeout,
489
490
  )
490
491
 
491
- tasks = [asyncio.create_task(_send_single(agent_state)) for agent_state in worker_agents]
492
+ tasks = [safe_create_task(_send_single(agent_state), label=f"send_to_worker_{agent_state.id}") for agent_state in worker_agents]
492
493
  results = await asyncio.gather(*tasks, return_exceptions=True)
493
494
  final = []
494
495
  for r in results:
@@ -24,6 +24,7 @@ from letta.services.message_manager import MessageManager
24
24
  from letta.services.passage_manager import PassageManager
25
25
  from letta.services.step_manager import NoopStepManager, StepManager
26
26
  from letta.services.telemetry_manager import NoopTelemetryManager, TelemetryManager
27
+ from letta.utils import safe_create_task
27
28
 
28
29
 
29
30
  class SleeptimeMultiAgentV2(BaseAgent):
@@ -236,7 +237,7 @@ class SleeptimeMultiAgentV2(BaseAgent):
236
237
  )
237
238
  run = await self.job_manager.create_job_async(pydantic_job=run, actor=self.actor)
238
239
 
239
- asyncio.create_task(
240
+ safe_create_task(
240
241
  self._participant_agent_step(
241
242
  foreground_agent_id=self.agent_id,
242
243
  sleeptime_agent_id=sleeptime_agent_id,
@@ -244,7 +245,8 @@ class SleeptimeMultiAgentV2(BaseAgent):
244
245
  last_processed_message_id=last_processed_message_id,
245
246
  run_id=run.id,
246
247
  use_assistant_message=True,
247
- )
248
+ ),
249
+ label=f"participant_agent_step_{sleeptime_agent_id}",
248
250
  )
249
251
  return run.id
250
252
 
@@ -17,6 +17,7 @@ from letta.schemas.message import Message, MessageCreate
17
17
  from letta.schemas.run import Run
18
18
  from letta.schemas.user import User
19
19
  from letta.services.group_manager import GroupManager
20
+ from letta.utils import safe_create_task
20
21
 
21
22
 
22
23
  class SleeptimeMultiAgentV3(LettaAgentV2):
@@ -142,7 +143,7 @@ class SleeptimeMultiAgentV3(LettaAgentV2):
142
143
  )
143
144
  run = await self.job_manager.create_job_async(pydantic_job=run, actor=self.actor)
144
145
 
145
- asyncio.create_task(
146
+ safe_create_task(
146
147
  self._participant_agent_step(
147
148
  foreground_agent_id=self.agent_state.id,
148
149
  sleeptime_agent_id=sleeptime_agent_id,
@@ -150,7 +151,8 @@ class SleeptimeMultiAgentV3(LettaAgentV2):
150
151
  last_processed_message_id=last_processed_message_id,
151
152
  run_id=run.id,
152
153
  use_assistant_message=use_assistant_message,
153
- )
154
+ ),
155
+ label=f"participant_agent_step_{sleeptime_agent_id}",
154
156
  )
155
157
  return run.id
156
158
 
@@ -106,15 +106,19 @@ class AnthropicStreamingInterface:
106
106
  try:
107
107
  tool_input = json.loads(self.accumulated_tool_call_args)
108
108
  except json.JSONDecodeError as e:
109
- logger.warning(
110
- f"Failed to decode tool call arguments for tool_call_id={self.tool_call_id}, "
111
- f"name={self.tool_call_name}. Raw input: {self.accumulated_tool_call_args!r}. Error: {e}"
112
- )
113
- raise
109
+ # Attempt to use OptimisticJSONParser to handle incomplete/malformed JSON
110
+ try:
111
+ tool_input = self.json_parser.parse(self.accumulated_tool_call_args)
112
+ except:
113
+ logger.warning(
114
+ f"Failed to decode tool call arguments for tool_call_id={self.tool_call_id}, "
115
+ f"name={self.tool_call_name}. Raw input: {self.accumulated_tool_call_args!r}. Error: {e}"
116
+ )
117
+ raise e
114
118
  if "id" in tool_input and tool_input["id"].startswith("toolu_") and "function" in tool_input:
115
119
  arguments = str(json.dumps(tool_input["function"]["arguments"], indent=2))
116
120
  else:
117
- arguments = self.accumulated_tool_call_args
121
+ arguments = str(json.dumps(tool_input, indent=2))
118
122
  return ToolCall(id=self.tool_call_id, function=FunctionCall(arguments=arguments, name=self.tool_call_name))
119
123
 
120
124
  def _check_inner_thoughts_complete(self, combined_args: str) -> bool:
@@ -272,7 +272,7 @@ class GoogleVertexClient(LLMClientBase):
272
272
  tool_names = []
273
273
 
274
274
  contents = self.add_dummy_model_messages(
275
- [m.to_google_ai_dict() for m in messages],
275
+ PydanticMessage.to_google_dicts_from_list(messages),
276
276
  )
277
277
 
278
278
  request_data = {
letta/orm/agent.py CHANGED
@@ -34,7 +34,10 @@ if TYPE_CHECKING:
34
34
  class Agent(SqlalchemyBase, OrganizationMixin, ProjectMixin, TemplateEntityMixin, TemplateMixin, AsyncAttrs):
35
35
  __tablename__ = "agents"
36
36
  __pydantic_model__ = PydanticAgentState
37
- __table_args__ = (Index("ix_agents_created_at", "created_at", "id"),)
37
+ __table_args__ = (
38
+ Index("ix_agents_created_at", "created_at", "id"),
39
+ Index("ix_agents_organization_id", "organization_id"),
40
+ )
38
41
 
39
42
  # agent generates its own id
40
43
  # TODO: We want to migrate all the ORM models to do this, so we will need to move this to the SqlalchemyBase
letta/orm/block.py CHANGED
@@ -24,6 +24,7 @@ class Block(OrganizationMixin, SqlalchemyBase, ProjectMixin, TemplateEntityMixin
24
24
  __table_args__ = (
25
25
  UniqueConstraint("id", "label", name="unique_block_id_label"),
26
26
  Index("created_at_label_idx", "created_at", "label"),
27
+ Index("ix_block_label", "label"),
27
28
  )
28
29
 
29
30
  template_name: Mapped[Optional[str]] = mapped_column(
@@ -20,6 +20,7 @@ class BlocksAgents(Base):
20
20
  UniqueConstraint("agent_id", "block_id", name="unique_agent_block"),
21
21
  Index("ix_blocks_agents_block_label_agent_id", "block_label", "agent_id"),
22
22
  Index("ix_blocks_block_label", "block_label"),
23
+ Index("ix_blocks_agents_block_id", "block_id"),
23
24
  )
24
25
 
25
26
  # unique agent + block label
@@ -1,4 +1,4 @@
1
- from sqlalchemy import ForeignKey, String
1
+ from sqlalchemy import ForeignKey, Index, String
2
2
  from sqlalchemy.orm import Mapped, mapped_column
3
3
 
4
4
  from letta.orm.base import Base
@@ -8,6 +8,7 @@ class SourcesAgents(Base):
8
8
  """Agents can have zero to many sources"""
9
9
 
10
10
  __tablename__ = "sources_agents"
11
+ __table_args__ = (Index("ix_sources_agents_source_id", "source_id"),)
11
12
 
12
13
  agent_id: Mapped[String] = mapped_column(String, ForeignKey("agents.id", ondelete="CASCADE"), primary_key=True)
13
14
  source_id: Mapped[String] = mapped_column(String, ForeignKey("sources.id", ondelete="CASCADE"), primary_key=True)
letta/orm/tools_agents.py CHANGED
@@ -1,4 +1,4 @@
1
- from sqlalchemy import ForeignKey, String, UniqueConstraint
1
+ from sqlalchemy import ForeignKey, Index, String, UniqueConstraint
2
2
  from sqlalchemy.orm import Mapped, mapped_column
3
3
 
4
4
  from letta.orm import Base
@@ -8,7 +8,10 @@ class ToolsAgents(Base):
8
8
  """Agents can have one or many tools associated with them."""
9
9
 
10
10
  __tablename__ = "tools_agents"
11
- __table_args__ = (UniqueConstraint("agent_id", "tool_id", name="unique_agent_tool"),)
11
+ __table_args__ = (
12
+ UniqueConstraint("agent_id", "tool_id", name="unique_agent_tool"),
13
+ Index("ix_tools_agents_tool_id", "tool_id"),
14
+ )
12
15
 
13
16
  # Each agent must have unique tool names
14
17
  agent_id: Mapped[str] = mapped_column(String, ForeignKey("agents.id", ondelete="CASCADE"), primary_key=True)
letta/schemas/message.py CHANGED
@@ -1027,10 +1027,13 @@ class Message(BaseMessage):
1027
1027
  result = [m for m in result if m is not None]
1028
1028
  return result
1029
1029
 
1030
- def to_google_ai_dict(self, put_inner_thoughts_in_kwargs: bool = True) -> dict:
1030
+ def to_google_dict(self, put_inner_thoughts_in_kwargs: bool = True) -> dict | None:
1031
1031
  """
1032
1032
  Go from Message class to Google AI REST message object
1033
1033
  """
1034
+ if self.role == "approval" and self.tool_calls is None:
1035
+ return None
1036
+
1034
1037
  # type Content: https://ai.google.dev/api/rest/v1/Content / https://ai.google.dev/api/rest/v1beta/Content
1035
1038
  # parts[]: Part
1036
1039
  # role: str ('user' or 'model')
@@ -1076,7 +1079,7 @@ class Message(BaseMessage):
1076
1079
  "parts": content_parts,
1077
1080
  }
1078
1081
 
1079
- elif self.role == "assistant":
1082
+ elif self.role == "assistant" or self.role == "approval":
1080
1083
  assert self.tool_calls is not None or text_content is not None
1081
1084
  google_ai_message = {
1082
1085
  "role": "model", # NOTE: different
@@ -1164,6 +1167,20 @@ class Message(BaseMessage):
1164
1167
 
1165
1168
  return google_ai_message
1166
1169
 
1170
+ @staticmethod
1171
+ def to_google_dicts_from_list(
1172
+ messages: List[Message],
1173
+ put_inner_thoughts_in_kwargs: bool = True,
1174
+ ):
1175
+ result = [
1176
+ m.to_google_dict(
1177
+ put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
1178
+ )
1179
+ for m in messages
1180
+ ]
1181
+ result = [m for m in result if m is not None]
1182
+ return result
1183
+
1167
1184
  @staticmethod
1168
1185
  def generate_otid_from_id(message_id: str, index: int) -> str:
1169
1186
  """
@@ -295,6 +295,25 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
295
295
  self.optimistic_json_parser = OptimisticJSONParser()
296
296
  self.current_json_parse_result = {}
297
297
 
298
+ # NOTE (fix): OpenAI deltas may split a key and its value across chunks
299
+ # (e.g. '"request_heartbeat"' in one chunk, ': true' in the next). The
300
+ # old behavior passed through each fragment verbatim, which could emit
301
+ # a bare key (or a key+opening quote) without its value, producing
302
+ # invalid JSON slices and the "missing end-quote" symptom downstream.
303
+ #
304
+ # To make streamed arguments robust, we add a JSON-aware incremental
305
+ # reader that only releases safe updates for the "main" JSON portion of
306
+ # the tool_call arguments. This prevents partial-key emissions while
307
+ # preserving incremental streaming for consumers.
308
+ #
309
+ # We still stream 'name' fragments as-is (safe), but 'arguments' are
310
+ # parsed incrementally and emitted only when a boundary is safe.
311
+ self._raw_args_reader = JSONInnerThoughtsExtractor(
312
+ inner_thoughts_key=inner_thoughts_kwarg,
313
+ wait_for_first_key=False,
314
+ )
315
+ self._raw_args_tool_call_id = None
316
+
298
317
  # Store metadata passed from server
299
318
  self.metadata = {}
300
319
 
@@ -654,11 +673,24 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
654
673
  tool_call_delta = {}
655
674
  if tool_call.id:
656
675
  tool_call_delta["id"] = tool_call.id
676
+ # Reset raw args reader per tool_call id
677
+ if self._raw_args_tool_call_id != tool_call.id:
678
+ self._raw_args_tool_call_id = tool_call.id
679
+ self._raw_args_reader = JSONInnerThoughtsExtractor(
680
+ inner_thoughts_key=self.inner_thoughts_kwarg,
681
+ wait_for_first_key=False,
682
+ )
657
683
  if tool_call.function:
658
- if tool_call.function.arguments:
659
- tool_call_delta["arguments"] = tool_call.function.arguments
684
+ # Stream name fragments as-is (names are short and harmless to emit)
660
685
  if tool_call.function.name:
661
686
  tool_call_delta["name"] = tool_call.function.name
687
+ # For arguments, incrementally parse to avoid emitting partial keys
688
+ if tool_call.function.arguments:
689
+ self.current_function_arguments += tool_call.function.arguments
690
+ updates_main_json, _ = self._raw_args_reader.process_fragment(tool_call.function.arguments)
691
+ # Only emit argument updates when a safe boundary is reached
692
+ if updates_main_json:
693
+ tool_call_delta["arguments"] = updates_main_json
662
694
 
663
695
  # We might end up with a no-op, in which case we should omit
664
696
  if (
@@ -63,6 +63,8 @@ class OptimisticJSONParser(JSONParser):
63
63
  '"': self._parse_string,
64
64
  "t": self._parse_true,
65
65
  "f": self._parse_false,
66
+ "T": self._parse_true,
67
+ "F": self._parse_false,
66
68
  "n": self._parse_null,
67
69
  }
68
70
  # Register number parser for digits and signs
@@ -8,6 +8,7 @@ from typing import AsyncIterator, Dict, List, Optional
8
8
 
9
9
  from letta.data_sources.redis_client import AsyncRedisClient
10
10
  from letta.log import get_logger
11
+ from letta.utils import safe_create_task
11
12
 
12
13
  logger = get_logger(__name__)
13
14
 
@@ -62,7 +63,7 @@ class RedisSSEStreamWriter:
62
63
  """Start the background flush task."""
63
64
  if not self._running:
64
65
  self._running = True
65
- self._flush_task = asyncio.create_task(self._periodic_flush())
66
+ self._flush_task = safe_create_task(self._periodic_flush(), label="redis_periodic_flush")
66
67
 
67
68
  async def stop(self):
68
69
  """Stop the background flush task and flush remaining data."""
@@ -14,6 +14,7 @@ from letta.server.rest_api.chat_completions_interface import ChatCompletionsStre
14
14
 
15
15
  # TODO this belongs in a controller!
16
16
  from letta.server.rest_api.utils import get_letta_server, get_user_message_from_chat_completions_request, sse_async_generator
17
+ from letta.utils import safe_create_task
17
18
 
18
19
  if TYPE_CHECKING:
19
20
  from letta.server.server import SyncServer
@@ -98,7 +99,7 @@ async def send_message_to_agent_chat_completions(
98
99
 
99
100
  # Offload the synchronous message_func to a separate thread
100
101
  streaming_interface.stream_start()
101
- asyncio.create_task(
102
+ safe_create_task(
102
103
  asyncio.to_thread(
103
104
  server.send_messages,
104
105
  actor=actor,
@@ -106,7 +107,8 @@ async def send_message_to_agent_chat_completions(
106
107
  input_messages=messages,
107
108
  interface=streaming_interface,
108
109
  put_inner_thoughts_first=False,
109
- )
110
+ ),
111
+ label="openai_send_messages",
110
112
  )
111
113
 
112
114
  # return a stream