letta-nightly 0.11.7.dev20250910104051__py3-none-any.whl → 0.11.7.dev20250912104045__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. letta/adapters/letta_llm_request_adapter.py +4 -2
  2. letta/adapters/letta_llm_stream_adapter.py +4 -2
  3. letta/agents/agent_loop.py +23 -0
  4. letta/agents/letta_agent_v2.py +34 -12
  5. letta/functions/helpers.py +3 -2
  6. letta/groups/sleeptime_multi_agent_v2.py +4 -2
  7. letta/groups/sleeptime_multi_agent_v3.py +4 -2
  8. letta/helpers/tpuf_client.py +41 -9
  9. letta/interfaces/anthropic_streaming_interface.py +10 -6
  10. letta/interfaces/openai_streaming_interface.py +9 -74
  11. letta/llm_api/google_vertex_client.py +6 -1
  12. letta/llm_api/openai_client.py +9 -8
  13. letta/orm/agent.py +4 -1
  14. letta/orm/block.py +1 -0
  15. letta/orm/blocks_agents.py +1 -0
  16. letta/orm/job.py +5 -1
  17. letta/orm/organization.py +2 -0
  18. letta/orm/sources_agents.py +2 -1
  19. letta/orm/tools_agents.py +5 -2
  20. letta/schemas/message.py +19 -2
  21. letta/server/rest_api/interface.py +34 -2
  22. letta/server/rest_api/json_parser.py +2 -0
  23. letta/server/rest_api/redis_stream_manager.py +17 -3
  24. letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +4 -2
  25. letta/server/rest_api/routers/v1/agents.py +49 -180
  26. letta/server/rest_api/routers/v1/folders.py +2 -2
  27. letta/server/rest_api/routers/v1/sources.py +2 -2
  28. letta/server/rest_api/routers/v1/tools.py +23 -39
  29. letta/server/rest_api/streaming_response.py +2 -1
  30. letta/server/server.py +7 -5
  31. letta/services/agent_serialization_manager.py +4 -3
  32. letta/services/job_manager.py +5 -2
  33. letta/services/mcp_manager.py +66 -5
  34. letta/services/summarizer/summarizer.py +2 -1
  35. letta/services/tool_executor/files_tool_executor.py +2 -2
  36. letta/services/tool_executor/multi_agent_tool_executor.py +17 -14
  37. letta/services/tool_sandbox/local_sandbox.py +2 -2
  38. letta/services/tool_sandbox/modal_version_manager.py +2 -1
  39. letta/streaming_utils.py +29 -4
  40. letta/utils.py +72 -3
  41. {letta_nightly-0.11.7.dev20250910104051.dist-info → letta_nightly-0.11.7.dev20250912104045.dist-info}/METADATA +3 -3
  42. {letta_nightly-0.11.7.dev20250910104051.dist-info → letta_nightly-0.11.7.dev20250912104045.dist-info}/RECORD +45 -44
  43. {letta_nightly-0.11.7.dev20250910104051.dist-info → letta_nightly-0.11.7.dev20250912104045.dist-info}/WHEEL +0 -0
  44. {letta_nightly-0.11.7.dev20250910104051.dist-info → letta_nightly-0.11.7.dev20250912104045.dist-info}/entry_points.txt +0 -0
  45. {letta_nightly-0.11.7.dev20250910104051.dist-info → letta_nightly-0.11.7.dev20250912104045.dist-info}/licenses/LICENSE +0 -0
@@ -8,6 +8,7 @@ from letta.schemas.letta_message_content import OmittedReasoningContent, Reasoni
8
8
  from letta.schemas.provider_trace import ProviderTraceCreate
9
9
  from letta.schemas.user import User
10
10
  from letta.settings import settings
11
+ from letta.utils import safe_create_task
11
12
 
12
13
 
13
14
  class LettaLLMRequestAdapter(LettaLLMAdapter):
@@ -98,7 +99,7 @@ class LettaLLMRequestAdapter(LettaLLMAdapter):
98
99
  if step_id is None or actor is None or not settings.track_provider_trace:
99
100
  return
100
101
 
101
- asyncio.create_task(
102
+ safe_create_task(
102
103
  self.telemetry_manager.create_provider_trace_async(
103
104
  actor=actor,
104
105
  provider_trace_create=ProviderTraceCreate(
@@ -107,5 +108,6 @@ class LettaLLMRequestAdapter(LettaLLMAdapter):
107
108
  step_id=step_id, # Use original step_id for telemetry
108
109
  organization_id=actor.organization_id,
109
110
  ),
110
- )
111
+ ),
112
+ label="create_provider_trace",
111
113
  )
@@ -13,6 +13,7 @@ from letta.schemas.provider_trace import ProviderTraceCreate
13
13
  from letta.schemas.usage import LettaUsageStatistics
14
14
  from letta.schemas.user import User
15
15
  from letta.settings import settings
16
+ from letta.utils import safe_create_task
16
17
 
17
18
 
18
19
  class LettaLLMStreamAdapter(LettaLLMAdapter):
@@ -141,7 +142,7 @@ class LettaLLMStreamAdapter(LettaLLMAdapter):
141
142
  if step_id is None or actor is None or not settings.track_provider_trace:
142
143
  return
143
144
 
144
- asyncio.create_task(
145
+ safe_create_task(
145
146
  self.telemetry_manager.create_provider_trace_async(
146
147
  actor=actor,
147
148
  provider_trace_create=ProviderTraceCreate(
@@ -165,5 +166,6 @@ class LettaLLMStreamAdapter(LettaLLMAdapter):
165
166
  step_id=step_id, # Use original step_id for telemetry
166
167
  organization_id=actor.organization_id,
167
168
  ),
168
- )
169
+ ),
170
+ label="create_provider_trace",
169
171
  )
@@ -0,0 +1,23 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ from letta.agents.base_agent_v2 import BaseAgentV2
4
+ from letta.agents.letta_agent_v2 import LettaAgentV2
5
+ from letta.groups.sleeptime_multi_agent_v3 import SleeptimeMultiAgentV3
6
+ from letta.schemas.agent import AgentState, AgentType
7
+
8
+ if TYPE_CHECKING:
9
+ from letta.orm import User
10
+
11
+
12
+ class AgentLoop:
13
+ """Factory class for instantiating the agent execution loop based on agent type"""
14
+
15
+ @staticmethod
16
+ def load(agent_state: AgentState, actor: "User") -> BaseAgentV2:
17
+ if agent_state.enable_sleeptime and agent_state.agent_type != AgentType.voice_convo_agent:
18
+ return SleeptimeMultiAgentV3(agent_state=agent_state, actor=actor, group=agent_state.multi_agent_group)
19
+ else:
20
+ return LettaAgentV2(
21
+ agent_state=agent_state,
22
+ actor=actor,
23
+ )
@@ -58,7 +58,7 @@ from letta.services.tool_executor.tool_execution_manager import ToolExecutionMan
58
58
  from letta.settings import model_settings, settings, summarizer_settings
59
59
  from letta.system import package_function_response
60
60
  from letta.types import JsonDict
61
- from letta.utils import log_telemetry, united_diff, validate_function_response
61
+ from letta.utils import log_telemetry, safe_create_task, united_diff, validate_function_response
62
62
 
63
63
 
64
64
  class LettaAgentV2(BaseAgentV2):
@@ -213,8 +213,17 @@ class LettaAgentV2(BaseAgentV2):
213
213
 
214
214
  if self.stop_reason is None:
215
215
  self.stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
216
- self._request_checkpoint_finish(request_span=request_span, request_start_timestamp_ns=request_start_timestamp_ns)
217
- return LettaResponse(messages=response_letta_messages, stop_reason=self.stop_reason, usage=self.usage)
216
+
217
+ result = LettaResponse(messages=response_letta_messages, stop_reason=self.stop_reason, usage=self.usage)
218
+ if run_id:
219
+ if self.job_update_metadata is None:
220
+ self.job_update_metadata = {}
221
+ self.job_update_metadata["result"] = result.model_dump(mode="json")
222
+
223
+ await self._request_checkpoint_finish(
224
+ request_span=request_span, request_start_timestamp_ns=request_start_timestamp_ns, run_id=run_id
225
+ )
226
+ return result
218
227
 
219
228
  @trace_method
220
229
  async def stream(
@@ -301,7 +310,20 @@ class LettaAgentV2(BaseAgentV2):
301
310
  yield f"data: {self.stop_reason.model_dump_json()}\n\n"
302
311
  raise
303
312
 
304
- self._request_checkpoint_finish(request_span=request_span, request_start_timestamp_ns=request_start_timestamp_ns)
313
+ if run_id:
314
+ letta_messages = Message.to_letta_messages_from_list(
315
+ self.response_messages,
316
+ use_assistant_message=use_assistant_message,
317
+ reverse=False,
318
+ )
319
+ result = LettaResponse(messages=letta_messages, stop_reason=self.stop_reason, usage=self.usage)
320
+ if self.job_update_metadata is None:
321
+ self.job_update_metadata = {}
322
+ self.job_update_metadata["result"] = result.model_dump(mode="json")
323
+
324
+ await self._request_checkpoint_finish(
325
+ request_span=request_span, request_start_timestamp_ns=request_start_timestamp_ns, run_id=run_id
326
+ )
305
327
  for finish_chunk in self.get_finish_chunks_for_stream(self.usage, self.stop_reason):
306
328
  yield f"data: {finish_chunk}\n\n"
307
329
 
@@ -736,11 +758,10 @@ class LettaAgentV2(BaseAgentV2):
736
758
  return None
737
759
 
738
760
  @trace_method
739
- def _request_checkpoint_finish(self, request_span: Span | None, request_start_timestamp_ns: int | None) -> None:
740
- if request_span is not None:
741
- duration_ns = get_utc_timestamp_ns() - request_start_timestamp_ns
742
- request_span.add_event(name="letta_request_ms", attributes={"duration_ms": ns_to_ms(duration_ns)})
743
- request_span.end()
761
+ async def _request_checkpoint_finish(
762
+ self, request_span: Span | None, request_start_timestamp_ns: int | None, run_id: str | None
763
+ ) -> None:
764
+ await self._log_request(request_start_timestamp_ns, request_span, self.job_update_metadata, is_error=False, run_id=run_id)
744
765
  return None
745
766
 
746
767
  @trace_method
@@ -850,7 +871,7 @@ class LettaAgentV2(BaseAgentV2):
850
871
  tool_call_messages = create_letta_messages_from_llm_response(
851
872
  agent_id=agent_state.id,
852
873
  model=agent_state.llm_config.model,
853
- function_name="",
874
+ function_name=tool_call.function.name,
854
875
  function_arguments={},
855
876
  tool_execution_result=ToolExecutionResult(status="error"),
856
877
  tool_call_id=tool_call_id,
@@ -1151,7 +1172,7 @@ class LettaAgentV2(BaseAgentV2):
1151
1172
  step_metrics: StepMetrics,
1152
1173
  run_id: str | None = None,
1153
1174
  ):
1154
- task = asyncio.create_task(
1175
+ task = safe_create_task(
1155
1176
  self.step_manager.record_step_metrics_async(
1156
1177
  actor=self.actor,
1157
1178
  step_id=step_id,
@@ -1163,7 +1184,8 @@ class LettaAgentV2(BaseAgentV2):
1163
1184
  project_id=self.agent_state.project_id,
1164
1185
  template_id=self.agent_state.template_id,
1165
1186
  base_template_id=self.agent_state.base_template_id,
1166
- )
1187
+ ),
1188
+ label="record_step_metrics",
1167
1189
  )
1168
1190
  return task
1169
1191
 
@@ -19,6 +19,7 @@ from letta.schemas.message import Message, MessageCreate
19
19
  from letta.schemas.user import User
20
20
  from letta.server.rest_api.utils import get_letta_server
21
21
  from letta.settings import settings
22
+ from letta.utils import safe_create_task
22
23
 
23
24
 
24
25
  # TODO needed?
@@ -447,7 +448,7 @@ async def _send_message_to_agents_matching_tags_async(
447
448
  timeout=settings.multi_agent_send_message_timeout,
448
449
  )
449
450
 
450
- tasks = [asyncio.create_task(_send_single(agent_state)) for agent_state in matching_agents]
451
+ tasks = [safe_create_task(_send_single(agent_state), label=f"send_to_agent_{agent_state.id}") for agent_state in matching_agents]
451
452
  results = await asyncio.gather(*tasks, return_exceptions=True)
452
453
  final = []
453
454
  for r in results:
@@ -488,7 +489,7 @@ async def _send_message_to_all_agents_in_group_async(sender_agent: "Agent", mess
488
489
  timeout=settings.multi_agent_send_message_timeout,
489
490
  )
490
491
 
491
- tasks = [asyncio.create_task(_send_single(agent_state)) for agent_state in worker_agents]
492
+ tasks = [safe_create_task(_send_single(agent_state), label=f"send_to_worker_{agent_state.id}") for agent_state in worker_agents]
492
493
  results = await asyncio.gather(*tasks, return_exceptions=True)
493
494
  final = []
494
495
  for r in results:
@@ -24,6 +24,7 @@ from letta.services.message_manager import MessageManager
24
24
  from letta.services.passage_manager import PassageManager
25
25
  from letta.services.step_manager import NoopStepManager, StepManager
26
26
  from letta.services.telemetry_manager import NoopTelemetryManager, TelemetryManager
27
+ from letta.utils import safe_create_task
27
28
 
28
29
 
29
30
  class SleeptimeMultiAgentV2(BaseAgent):
@@ -236,7 +237,7 @@ class SleeptimeMultiAgentV2(BaseAgent):
236
237
  )
237
238
  run = await self.job_manager.create_job_async(pydantic_job=run, actor=self.actor)
238
239
 
239
- asyncio.create_task(
240
+ safe_create_task(
240
241
  self._participant_agent_step(
241
242
  foreground_agent_id=self.agent_id,
242
243
  sleeptime_agent_id=sleeptime_agent_id,
@@ -244,7 +245,8 @@ class SleeptimeMultiAgentV2(BaseAgent):
244
245
  last_processed_message_id=last_processed_message_id,
245
246
  run_id=run.id,
246
247
  use_assistant_message=True,
247
- )
248
+ ),
249
+ label=f"participant_agent_step_{sleeptime_agent_id}",
248
250
  )
249
251
  return run.id
250
252
 
@@ -17,6 +17,7 @@ from letta.schemas.message import Message, MessageCreate
17
17
  from letta.schemas.run import Run
18
18
  from letta.schemas.user import User
19
19
  from letta.services.group_manager import GroupManager
20
+ from letta.utils import safe_create_task
20
21
 
21
22
 
22
23
  class SleeptimeMultiAgentV3(LettaAgentV2):
@@ -142,7 +143,7 @@ class SleeptimeMultiAgentV3(LettaAgentV2):
142
143
  )
143
144
  run = await self.job_manager.create_job_async(pydantic_job=run, actor=self.actor)
144
145
 
145
- asyncio.create_task(
146
+ safe_create_task(
146
147
  self._participant_agent_step(
147
148
  foreground_agent_id=self.agent_state.id,
148
149
  sleeptime_agent_id=sleeptime_agent_id,
@@ -150,7 +151,8 @@ class SleeptimeMultiAgentV3(LettaAgentV2):
150
151
  last_processed_message_id=last_processed_message_id,
151
152
  run_id=run.id,
152
153
  use_assistant_message=use_assistant_message,
153
- )
154
+ ),
155
+ label=f"participant_agent_step_{sleeptime_agent_id}",
154
156
  )
155
157
  return run.id
156
158
 
@@ -62,11 +62,18 @@ class TurbopufferClient:
62
62
  """
63
63
  from letta.llm_api.llm_client import LLMClient
64
64
 
65
+ # filter out empty strings after stripping
66
+ filtered_texts = [text for text in texts if text.strip()]
67
+
68
+ # skip embedding if no valid texts
69
+ if not filtered_texts:
70
+ return []
71
+
65
72
  embedding_client = LLMClient.create(
66
73
  provider_type=self.default_embedding_config.embedding_endpoint_type,
67
74
  actor=actor,
68
75
  )
69
- embeddings = await embedding_client.request_embeddings(texts, self.default_embedding_config)
76
+ embeddings = await embedding_client.request_embeddings(filtered_texts, self.default_embedding_config)
70
77
  return embeddings
71
78
 
72
79
  @trace_method
@@ -119,8 +126,16 @@ class TurbopufferClient:
119
126
  """
120
127
  from turbopuffer import AsyncTurbopuffer
121
128
 
129
+ # filter out empty text chunks
130
+ filtered_chunks = [(i, text) for i, text in enumerate(text_chunks) if text.strip()]
131
+
132
+ if not filtered_chunks:
133
+ logger.warning("All text chunks were empty, skipping insertion")
134
+ return []
135
+
122
136
  # generate embeddings using the default config
123
- embeddings = await self._generate_embeddings(text_chunks, actor)
137
+ filtered_texts = [text for _, text in filtered_chunks]
138
+ embeddings = await self._generate_embeddings(filtered_texts, actor)
124
139
 
125
140
  namespace_name = await self._get_archive_namespace_name(archive_id)
126
141
 
@@ -152,8 +167,8 @@ class TurbopufferClient:
152
167
  tags_arrays = [] # Store tags as arrays
153
168
  passages = []
154
169
 
155
- for idx, (text, embedding) in enumerate(zip(text_chunks, embeddings)):
156
- passage_id = passage_ids[idx]
170
+ for (original_idx, text), embedding in zip(filtered_chunks, embeddings):
171
+ passage_id = passage_ids[original_idx]
157
172
 
158
173
  # append to columns
159
174
  ids.append(passage_id)
@@ -240,8 +255,16 @@ class TurbopufferClient:
240
255
  """
241
256
  from turbopuffer import AsyncTurbopuffer
242
257
 
258
+ # filter out empty message texts
259
+ filtered_messages = [(i, text) for i, text in enumerate(message_texts) if text.strip()]
260
+
261
+ if not filtered_messages:
262
+ logger.warning("All message texts were empty, skipping insertion")
263
+ return True
264
+
243
265
  # generate embeddings using the default config
244
- embeddings = await self._generate_embeddings(message_texts, actor)
266
+ filtered_texts = [text for _, text in filtered_messages]
267
+ embeddings = await self._generate_embeddings(filtered_texts, actor)
245
268
 
246
269
  namespace_name = await self._get_message_namespace_name(organization_id)
247
270
 
@@ -266,8 +289,10 @@ class TurbopufferClient:
266
289
  project_ids = []
267
290
  template_ids = []
268
291
 
269
- for idx, (text, embedding, role, created_at) in enumerate(zip(message_texts, embeddings, roles, created_ats)):
270
- message_id = message_ids[idx]
292
+ for (original_idx, text), embedding in zip(filtered_messages, embeddings):
293
+ message_id = message_ids[original_idx]
294
+ role = roles[original_idx]
295
+ created_at = created_ats[original_idx]
271
296
 
272
297
  # ensure the provided timestamp is timezone-aware and in UTC
273
298
  if created_at.tzinfo is None:
@@ -1162,8 +1187,15 @@ class TurbopufferClient:
1162
1187
  if not text_chunks:
1163
1188
  return []
1164
1189
 
1190
+ # filter out empty text chunks
1191
+ filtered_chunks = [text for text in text_chunks if text.strip()]
1192
+
1193
+ if not filtered_chunks:
1194
+ logger.warning("All text chunks were empty, skipping file passage insertion")
1195
+ return []
1196
+
1165
1197
  # generate embeddings using the default config
1166
- embeddings = await self._generate_embeddings(text_chunks, actor)
1198
+ embeddings = await self._generate_embeddings(filtered_chunks, actor)
1167
1199
 
1168
1200
  namespace_name = await self._get_file_passages_namespace_name(organization_id)
1169
1201
 
@@ -1189,7 +1221,7 @@ class TurbopufferClient:
1189
1221
  created_ats = []
1190
1222
  passages = []
1191
1223
 
1192
- for idx, (text, embedding) in enumerate(zip(text_chunks, embeddings)):
1224
+ for text, embedding in zip(filtered_chunks, embeddings):
1193
1225
  passage = PydanticPassage(
1194
1226
  text=text,
1195
1227
  file_id=file_id,
@@ -106,15 +106,19 @@ class AnthropicStreamingInterface:
106
106
  try:
107
107
  tool_input = json.loads(self.accumulated_tool_call_args)
108
108
  except json.JSONDecodeError as e:
109
- logger.warning(
110
- f"Failed to decode tool call arguments for tool_call_id={self.tool_call_id}, "
111
- f"name={self.tool_call_name}. Raw input: {self.accumulated_tool_call_args!r}. Error: {e}"
112
- )
113
- raise
109
+ # Attempt to use OptimisticJSONParser to handle incomplete/malformed JSON
110
+ try:
111
+ tool_input = self.json_parser.parse(self.accumulated_tool_call_args)
112
+ except:
113
+ logger.warning(
114
+ f"Failed to decode tool call arguments for tool_call_id={self.tool_call_id}, "
115
+ f"name={self.tool_call_name}. Raw input: {self.accumulated_tool_call_args!r}. Error: {e}"
116
+ )
117
+ raise e
114
118
  if "id" in tool_input and tool_input["id"].startswith("toolu_") and "function" in tool_input:
115
119
  arguments = str(json.dumps(tool_input["function"]["arguments"], indent=2))
116
120
  else:
117
- arguments = self.accumulated_tool_call_args
121
+ arguments = str(json.dumps(tool_input, indent=2))
118
122
  return ToolCall(id=self.tool_call_id, function=FunctionCall(arguments=arguments, name=self.tool_call_name))
119
123
 
120
124
  def _check_inner_thoughts_complete(self, combined_args: str) -> bool:
@@ -24,7 +24,7 @@ from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
24
24
  from letta.schemas.message import Message
25
25
  from letta.schemas.openai.chat_completion_response import FunctionCall, ToolCall
26
26
  from letta.server.rest_api.json_parser import OptimisticJSONParser
27
- from letta.streaming_utils import JSONInnerThoughtsExtractor
27
+ from letta.streaming_utils import FunctionArgumentsStreamHandler, JSONInnerThoughtsExtractor
28
28
  from letta.utils import count_tokens
29
29
 
30
30
  logger = get_logger(__name__)
@@ -53,6 +53,8 @@ class OpenAIStreamingInterface:
53
53
 
54
54
  self.optimistic_json_parser: OptimisticJSONParser = OptimisticJSONParser()
55
55
  self.function_args_reader = JSONInnerThoughtsExtractor(wait_for_first_key=put_inner_thoughts_in_kwarg)
56
+ # Reader that extracts only the assistant message value from send_message args
57
+ self.assistant_message_json_reader = FunctionArgumentsStreamHandler(json_key=self.assistant_message_tool_kwarg)
56
58
  self.function_name_buffer = None
57
59
  self.function_args_buffer = None
58
60
  self.function_id_buffer = None
@@ -274,6 +276,8 @@ class OpenAIStreamingInterface:
274
276
  # Store the ID of the tool call so allow skipping the corresponding response
275
277
  if self.function_id_buffer:
276
278
  self.prev_assistant_message_id = self.function_id_buffer
279
+ # Reset message reader at the start of a new send_message stream
280
+ self.assistant_message_json_reader.reset()
277
281
 
278
282
  else:
279
283
  if prev_message_type and prev_message_type != "tool_call_message":
@@ -328,39 +332,15 @@ class OpenAIStreamingInterface:
328
332
  self.last_flushed_function_name is not None
329
333
  and self.last_flushed_function_name == self.assistant_message_tool_name
330
334
  ):
331
- # do an additional parse on the updates_main_json
332
- if self.function_args_buffer:
333
- updates_main_json = self.function_args_buffer + updates_main_json
334
- self.function_args_buffer = None
335
-
336
- # Pretty gross hardcoding that assumes that if we're toggling into the keywords, we have the full prefix
337
- match_str = '{"' + self.assistant_message_tool_kwarg + '":"'
338
- if updates_main_json == match_str:
339
- updates_main_json = None
340
-
341
- else:
342
- # Some hardcoding to strip off the trailing "}"
343
- if updates_main_json in ["}", '"}']:
344
- updates_main_json = None
345
- if updates_main_json and len(updates_main_json) > 0 and updates_main_json[-1:] == '"':
346
- updates_main_json = updates_main_json[:-1]
347
-
348
- if not updates_main_json:
349
- # early exit to turn into content mode
350
- pass
351
-
352
- # There may be a buffer from a previous chunk, for example
353
- # if the previous chunk had arguments but we needed to flush name
354
- if self.function_args_buffer:
355
- # In this case, we should release the buffer + new data at once
356
- combined_chunk = self.function_args_buffer + updates_main_json
357
-
335
+ # Minimal, robust extraction: only emit the value of "message"
336
+ extracted = self.assistant_message_json_reader.process_json_chunk(tool_call.function.arguments)
337
+ if extracted:
358
338
  if prev_message_type and prev_message_type != "assistant_message":
359
339
  message_index += 1
360
340
  assistant_message = AssistantMessage(
361
341
  id=self.letta_message_id,
362
342
  date=datetime.now(timezone.utc),
363
- content=combined_chunk,
343
+ content=extracted,
364
344
  otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
365
345
  )
366
346
  prev_message_type = assistant_message.message_type
@@ -368,51 +348,6 @@ class OpenAIStreamingInterface:
368
348
  # Store the ID of the tool call so allow skipping the corresponding response
369
349
  if self.function_id_buffer:
370
350
  self.prev_assistant_message_id = self.function_id_buffer
371
- # clear buffer
372
- self.function_args_buffer = None
373
- self.function_id_buffer = None
374
-
375
- else:
376
- # If there's no buffer to clear, just output a new chunk with new data
377
- # TODO: THIS IS HORRIBLE
378
- # TODO: WE USE THE OLD JSON PARSER EARLIER (WHICH DOES NOTHING) AND NOW THE NEW JSON PARSER
379
- # TODO: THIS IS TOTALLY WRONG AND BAD, BUT SAVING FOR A LARGER REWRITE IN THE NEAR FUTURE
380
- parsed_args = self.optimistic_json_parser.parse(self.current_function_arguments)
381
-
382
- if parsed_args.get(self.assistant_message_tool_kwarg) and parsed_args.get(
383
- self.assistant_message_tool_kwarg
384
- ) != self.current_json_parse_result.get(self.assistant_message_tool_kwarg):
385
- new_content = parsed_args.get(self.assistant_message_tool_kwarg)
386
- prev_content = self.current_json_parse_result.get(self.assistant_message_tool_kwarg, "")
387
- # TODO: Assumes consistent state and that prev_content is subset of new_content
388
- diff = new_content.replace(prev_content, "", 1)
389
-
390
- # quick patch to mitigate double message streaming error
391
- # TODO: root cause this issue and remove patch
392
- if diff != "" and "\\n" not in new_content:
393
- converted_new_content = new_content.replace("\n", "\\n")
394
- converted_content_diff = converted_new_content.replace(prev_content, "", 1)
395
- if converted_content_diff == "":
396
- diff = converted_content_diff
397
-
398
- self.current_json_parse_result = parsed_args
399
- if prev_message_type and prev_message_type != "assistant_message":
400
- message_index += 1
401
- assistant_message = AssistantMessage(
402
- id=self.letta_message_id,
403
- date=datetime.now(timezone.utc),
404
- content=diff,
405
- # name=name,
406
- otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
407
- )
408
- prev_message_type = assistant_message.message_type
409
- yield assistant_message
410
-
411
- # Store the ID of the tool call so allow skipping the corresponding response
412
- if self.function_id_buffer:
413
- self.prev_assistant_message_id = self.function_id_buffer
414
- # clear buffers
415
- self.function_id_buffer = None
416
351
  else:
417
352
  # There may be a buffer from a previous chunk, for example
418
353
  # if the previous chunk had arguments but we needed to flush name
@@ -67,6 +67,7 @@ class GoogleVertexClient(LLMClientBase):
67
67
  # https://github.com/googleapis/python-aiplatform/issues/4472
68
68
  retry_count = 1
69
69
  should_retry = True
70
+ response_data = None
70
71
  while should_retry and retry_count <= self.MAX_RETRIES:
71
72
  try:
72
73
  response = await client.aio.models.generate_content(
@@ -79,6 +80,8 @@ class GoogleVertexClient(LLMClientBase):
79
80
  if e.code == 503 or e.code == 500:
80
81
  logger.warning(f"Received {e}, retrying {retry_count}/{self.MAX_RETRIES}")
81
82
  retry_count += 1
83
+ if retry_count > self.MAX_RETRIES:
84
+ raise e
82
85
  continue
83
86
  raise e
84
87
  except Exception as e:
@@ -114,6 +117,8 @@ class GoogleVertexClient(LLMClientBase):
114
117
  should_retry = is_malformed_function_call
115
118
  retry_count += 1
116
119
 
120
+ if response_data is None:
121
+ raise RuntimeError("Failed to get response data after all retries")
117
122
  return response_data
118
123
 
119
124
  @staticmethod
@@ -272,7 +277,7 @@ class GoogleVertexClient(LLMClientBase):
272
277
  tool_names = []
273
278
 
274
279
  contents = self.add_dummy_model_messages(
275
- [m.to_google_ai_dict() for m in messages],
280
+ PydanticMessage.to_google_dicts_from_list(messages),
276
281
  )
277
282
 
278
283
  request_data = {
@@ -198,14 +198,15 @@ class OpenAIClient(LLMClientBase):
198
198
  # TODO(matt) move into LLMConfig
199
199
  # TODO: This vllm checking is very brittle and is a patch at most
200
200
  tool_choice = None
201
- if self.requires_auto_tool_choice(llm_config):
202
- tool_choice = "auto"
203
- elif tools:
204
- # only set if tools is non-Null
205
- tool_choice = "required"
206
-
207
- if force_tool_call is not None:
208
- tool_choice = ToolFunctionChoice(type="function", function=ToolFunctionChoiceFunctionCall(name=force_tool_call))
201
+ if tools: # only set tool_choice if tools exist
202
+ if self.requires_auto_tool_choice(llm_config):
203
+ tool_choice = "auto"
204
+ else:
205
+ # only set if tools is non-Null
206
+ tool_choice = "required"
207
+
208
+ if force_tool_call is not None:
209
+ tool_choice = ToolFunctionChoice(type="function", function=ToolFunctionChoiceFunctionCall(name=force_tool_call))
209
210
 
210
211
  data = ChatCompletionRequest(
211
212
  model=model,
letta/orm/agent.py CHANGED
@@ -34,7 +34,10 @@ if TYPE_CHECKING:
34
34
  class Agent(SqlalchemyBase, OrganizationMixin, ProjectMixin, TemplateEntityMixin, TemplateMixin, AsyncAttrs):
35
35
  __tablename__ = "agents"
36
36
  __pydantic_model__ = PydanticAgentState
37
- __table_args__ = (Index("ix_agents_created_at", "created_at", "id"),)
37
+ __table_args__ = (
38
+ Index("ix_agents_created_at", "created_at", "id"),
39
+ Index("ix_agents_organization_id", "organization_id"),
40
+ )
38
41
 
39
42
  # agent generates its own id
40
43
  # TODO: We want to migrate all the ORM models to do this, so we will need to move this to the SqlalchemyBase
letta/orm/block.py CHANGED
@@ -24,6 +24,7 @@ class Block(OrganizationMixin, SqlalchemyBase, ProjectMixin, TemplateEntityMixin
24
24
  __table_args__ = (
25
25
  UniqueConstraint("id", "label", name="unique_block_id_label"),
26
26
  Index("created_at_label_idx", "created_at", "label"),
27
+ Index("ix_block_label", "label"),
27
28
  )
28
29
 
29
30
  template_name: Mapped[Optional[str]] = mapped_column(
@@ -20,6 +20,7 @@ class BlocksAgents(Base):
20
20
  UniqueConstraint("agent_id", "block_id", name="unique_agent_block"),
21
21
  Index("ix_blocks_agents_block_label_agent_id", "block_label", "agent_id"),
22
22
  Index("ix_blocks_block_label", "block_label"),
23
+ Index("ix_blocks_agents_block_id", "block_id"),
23
24
  )
24
25
 
25
26
  # unique agent + block label
letta/orm/job.py CHANGED
@@ -1,7 +1,7 @@
1
1
  from datetime import datetime
2
2
  from typing import TYPE_CHECKING, List, Optional
3
3
 
4
- from sqlalchemy import JSON, BigInteger, Index, String
4
+ from sqlalchemy import JSON, BigInteger, ForeignKey, Index, String
5
5
  from sqlalchemy.orm import Mapped, mapped_column, relationship
6
6
 
7
7
  from letta.orm.mixins import UserMixin
@@ -12,6 +12,7 @@ from letta.schemas.job import Job as PydanticJob, LettaRequestConfig
12
12
  if TYPE_CHECKING:
13
13
  from letta.orm.job_messages import JobMessage
14
14
  from letta.orm.message import Message
15
+ from letta.orm.organization import Organization
15
16
  from letta.orm.step import Step
16
17
  from letta.orm.user import User
17
18
 
@@ -36,6 +37,7 @@ class Job(SqlalchemyBase, UserMixin):
36
37
  request_config: Mapped[Optional[LettaRequestConfig]] = mapped_column(
37
38
  JSON, nullable=True, doc="The request configuration for the job, stored as JSON."
38
39
  )
40
+ organization_id: Mapped[Optional[str]] = mapped_column(String, ForeignKey("organizations.id"))
39
41
 
40
42
  # callback related columns
41
43
  callback_url: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="When set, POST to this URL after job completion.")
@@ -53,6 +55,8 @@ class Job(SqlalchemyBase, UserMixin):
53
55
  user: Mapped["User"] = relationship("User", back_populates="jobs")
54
56
  job_messages: Mapped[List["JobMessage"]] = relationship("JobMessage", back_populates="job", cascade="all, delete-orphan")
55
57
  steps: Mapped[List["Step"]] = relationship("Step", back_populates="job", cascade="save-update")
58
+ # organization relationship (nullable for backward compatibility)
59
+ organization: Mapped[Optional["Organization"]] = relationship("Organization", back_populates="jobs")
56
60
 
57
61
  @property
58
62
  def messages(self) -> List["Message"]:
letta/orm/organization.py CHANGED
@@ -12,6 +12,7 @@ if TYPE_CHECKING:
12
12
  from letta.orm.block import Block
13
13
  from letta.orm.group import Group
14
14
  from letta.orm.identity import Identity
15
+ from letta.orm.job import Job
15
16
  from letta.orm.llm_batch_items import LLMBatchItem
16
17
  from letta.orm.llm_batch_job import LLMBatchJob
17
18
  from letta.orm.message import Message
@@ -66,3 +67,4 @@ class Organization(SqlalchemyBase):
66
67
  llm_batch_items: Mapped[List["LLMBatchItem"]] = relationship(
67
68
  "LLMBatchItem", back_populates="organization", cascade="all, delete-orphan"
68
69
  )
70
+ jobs: Mapped[List["Job"]] = relationship("Job", back_populates="organization", cascade="all, delete-orphan")