letta-nightly 0.11.7.dev20250910104051__py3-none-any.whl → 0.11.7.dev20250912104045__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/adapters/letta_llm_request_adapter.py +4 -2
- letta/adapters/letta_llm_stream_adapter.py +4 -2
- letta/agents/agent_loop.py +23 -0
- letta/agents/letta_agent_v2.py +34 -12
- letta/functions/helpers.py +3 -2
- letta/groups/sleeptime_multi_agent_v2.py +4 -2
- letta/groups/sleeptime_multi_agent_v3.py +4 -2
- letta/helpers/tpuf_client.py +41 -9
- letta/interfaces/anthropic_streaming_interface.py +10 -6
- letta/interfaces/openai_streaming_interface.py +9 -74
- letta/llm_api/google_vertex_client.py +6 -1
- letta/llm_api/openai_client.py +9 -8
- letta/orm/agent.py +4 -1
- letta/orm/block.py +1 -0
- letta/orm/blocks_agents.py +1 -0
- letta/orm/job.py +5 -1
- letta/orm/organization.py +2 -0
- letta/orm/sources_agents.py +2 -1
- letta/orm/tools_agents.py +5 -2
- letta/schemas/message.py +19 -2
- letta/server/rest_api/interface.py +34 -2
- letta/server/rest_api/json_parser.py +2 -0
- letta/server/rest_api/redis_stream_manager.py +17 -3
- letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +4 -2
- letta/server/rest_api/routers/v1/agents.py +49 -180
- letta/server/rest_api/routers/v1/folders.py +2 -2
- letta/server/rest_api/routers/v1/sources.py +2 -2
- letta/server/rest_api/routers/v1/tools.py +23 -39
- letta/server/rest_api/streaming_response.py +2 -1
- letta/server/server.py +7 -5
- letta/services/agent_serialization_manager.py +4 -3
- letta/services/job_manager.py +5 -2
- letta/services/mcp_manager.py +66 -5
- letta/services/summarizer/summarizer.py +2 -1
- letta/services/tool_executor/files_tool_executor.py +2 -2
- letta/services/tool_executor/multi_agent_tool_executor.py +17 -14
- letta/services/tool_sandbox/local_sandbox.py +2 -2
- letta/services/tool_sandbox/modal_version_manager.py +2 -1
- letta/streaming_utils.py +29 -4
- letta/utils.py +72 -3
- {letta_nightly-0.11.7.dev20250910104051.dist-info → letta_nightly-0.11.7.dev20250912104045.dist-info}/METADATA +3 -3
- {letta_nightly-0.11.7.dev20250910104051.dist-info → letta_nightly-0.11.7.dev20250912104045.dist-info}/RECORD +45 -44
- {letta_nightly-0.11.7.dev20250910104051.dist-info → letta_nightly-0.11.7.dev20250912104045.dist-info}/WHEEL +0 -0
- {letta_nightly-0.11.7.dev20250910104051.dist-info → letta_nightly-0.11.7.dev20250912104045.dist-info}/entry_points.txt +0 -0
- {letta_nightly-0.11.7.dev20250910104051.dist-info → letta_nightly-0.11.7.dev20250912104045.dist-info}/licenses/LICENSE +0 -0
@@ -8,6 +8,7 @@ from letta.schemas.letta_message_content import OmittedReasoningContent, Reasoni
|
|
8
8
|
from letta.schemas.provider_trace import ProviderTraceCreate
|
9
9
|
from letta.schemas.user import User
|
10
10
|
from letta.settings import settings
|
11
|
+
from letta.utils import safe_create_task
|
11
12
|
|
12
13
|
|
13
14
|
class LettaLLMRequestAdapter(LettaLLMAdapter):
|
@@ -98,7 +99,7 @@ class LettaLLMRequestAdapter(LettaLLMAdapter):
|
|
98
99
|
if step_id is None or actor is None or not settings.track_provider_trace:
|
99
100
|
return
|
100
101
|
|
101
|
-
|
102
|
+
safe_create_task(
|
102
103
|
self.telemetry_manager.create_provider_trace_async(
|
103
104
|
actor=actor,
|
104
105
|
provider_trace_create=ProviderTraceCreate(
|
@@ -107,5 +108,6 @@ class LettaLLMRequestAdapter(LettaLLMAdapter):
|
|
107
108
|
step_id=step_id, # Use original step_id for telemetry
|
108
109
|
organization_id=actor.organization_id,
|
109
110
|
),
|
110
|
-
)
|
111
|
+
),
|
112
|
+
label="create_provider_trace",
|
111
113
|
)
|
@@ -13,6 +13,7 @@ from letta.schemas.provider_trace import ProviderTraceCreate
|
|
13
13
|
from letta.schemas.usage import LettaUsageStatistics
|
14
14
|
from letta.schemas.user import User
|
15
15
|
from letta.settings import settings
|
16
|
+
from letta.utils import safe_create_task
|
16
17
|
|
17
18
|
|
18
19
|
class LettaLLMStreamAdapter(LettaLLMAdapter):
|
@@ -141,7 +142,7 @@ class LettaLLMStreamAdapter(LettaLLMAdapter):
|
|
141
142
|
if step_id is None or actor is None or not settings.track_provider_trace:
|
142
143
|
return
|
143
144
|
|
144
|
-
|
145
|
+
safe_create_task(
|
145
146
|
self.telemetry_manager.create_provider_trace_async(
|
146
147
|
actor=actor,
|
147
148
|
provider_trace_create=ProviderTraceCreate(
|
@@ -165,5 +166,6 @@ class LettaLLMStreamAdapter(LettaLLMAdapter):
|
|
165
166
|
step_id=step_id, # Use original step_id for telemetry
|
166
167
|
organization_id=actor.organization_id,
|
167
168
|
),
|
168
|
-
)
|
169
|
+
),
|
170
|
+
label="create_provider_trace",
|
169
171
|
)
|
@@ -0,0 +1,23 @@
|
|
1
|
+
from typing import TYPE_CHECKING
|
2
|
+
|
3
|
+
from letta.agents.base_agent_v2 import BaseAgentV2
|
4
|
+
from letta.agents.letta_agent_v2 import LettaAgentV2
|
5
|
+
from letta.groups.sleeptime_multi_agent_v3 import SleeptimeMultiAgentV3
|
6
|
+
from letta.schemas.agent import AgentState, AgentType
|
7
|
+
|
8
|
+
if TYPE_CHECKING:
|
9
|
+
from letta.orm import User
|
10
|
+
|
11
|
+
|
12
|
+
class AgentLoop:
|
13
|
+
"""Factory class for instantiating the agent execution loop based on agent type"""
|
14
|
+
|
15
|
+
@staticmethod
|
16
|
+
def load(agent_state: AgentState, actor: "User") -> BaseAgentV2:
|
17
|
+
if agent_state.enable_sleeptime and agent_state.agent_type != AgentType.voice_convo_agent:
|
18
|
+
return SleeptimeMultiAgentV3(agent_state=agent_state, actor=actor, group=agent_state.multi_agent_group)
|
19
|
+
else:
|
20
|
+
return LettaAgentV2(
|
21
|
+
agent_state=agent_state,
|
22
|
+
actor=actor,
|
23
|
+
)
|
letta/agents/letta_agent_v2.py
CHANGED
@@ -58,7 +58,7 @@ from letta.services.tool_executor.tool_execution_manager import ToolExecutionMan
|
|
58
58
|
from letta.settings import model_settings, settings, summarizer_settings
|
59
59
|
from letta.system import package_function_response
|
60
60
|
from letta.types import JsonDict
|
61
|
-
from letta.utils import log_telemetry, united_diff, validate_function_response
|
61
|
+
from letta.utils import log_telemetry, safe_create_task, united_diff, validate_function_response
|
62
62
|
|
63
63
|
|
64
64
|
class LettaAgentV2(BaseAgentV2):
|
@@ -213,8 +213,17 @@ class LettaAgentV2(BaseAgentV2):
|
|
213
213
|
|
214
214
|
if self.stop_reason is None:
|
215
215
|
self.stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
|
216
|
-
|
217
|
-
|
216
|
+
|
217
|
+
result = LettaResponse(messages=response_letta_messages, stop_reason=self.stop_reason, usage=self.usage)
|
218
|
+
if run_id:
|
219
|
+
if self.job_update_metadata is None:
|
220
|
+
self.job_update_metadata = {}
|
221
|
+
self.job_update_metadata["result"] = result.model_dump(mode="json")
|
222
|
+
|
223
|
+
await self._request_checkpoint_finish(
|
224
|
+
request_span=request_span, request_start_timestamp_ns=request_start_timestamp_ns, run_id=run_id
|
225
|
+
)
|
226
|
+
return result
|
218
227
|
|
219
228
|
@trace_method
|
220
229
|
async def stream(
|
@@ -301,7 +310,20 @@ class LettaAgentV2(BaseAgentV2):
|
|
301
310
|
yield f"data: {self.stop_reason.model_dump_json()}\n\n"
|
302
311
|
raise
|
303
312
|
|
304
|
-
|
313
|
+
if run_id:
|
314
|
+
letta_messages = Message.to_letta_messages_from_list(
|
315
|
+
self.response_messages,
|
316
|
+
use_assistant_message=use_assistant_message,
|
317
|
+
reverse=False,
|
318
|
+
)
|
319
|
+
result = LettaResponse(messages=letta_messages, stop_reason=self.stop_reason, usage=self.usage)
|
320
|
+
if self.job_update_metadata is None:
|
321
|
+
self.job_update_metadata = {}
|
322
|
+
self.job_update_metadata["result"] = result.model_dump(mode="json")
|
323
|
+
|
324
|
+
await self._request_checkpoint_finish(
|
325
|
+
request_span=request_span, request_start_timestamp_ns=request_start_timestamp_ns, run_id=run_id
|
326
|
+
)
|
305
327
|
for finish_chunk in self.get_finish_chunks_for_stream(self.usage, self.stop_reason):
|
306
328
|
yield f"data: {finish_chunk}\n\n"
|
307
329
|
|
@@ -736,11 +758,10 @@ class LettaAgentV2(BaseAgentV2):
|
|
736
758
|
return None
|
737
759
|
|
738
760
|
@trace_method
|
739
|
-
def _request_checkpoint_finish(
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
request_span.end()
|
761
|
+
async def _request_checkpoint_finish(
|
762
|
+
self, request_span: Span | None, request_start_timestamp_ns: int | None, run_id: str | None
|
763
|
+
) -> None:
|
764
|
+
await self._log_request(request_start_timestamp_ns, request_span, self.job_update_metadata, is_error=False, run_id=run_id)
|
744
765
|
return None
|
745
766
|
|
746
767
|
@trace_method
|
@@ -850,7 +871,7 @@ class LettaAgentV2(BaseAgentV2):
|
|
850
871
|
tool_call_messages = create_letta_messages_from_llm_response(
|
851
872
|
agent_id=agent_state.id,
|
852
873
|
model=agent_state.llm_config.model,
|
853
|
-
function_name=
|
874
|
+
function_name=tool_call.function.name,
|
854
875
|
function_arguments={},
|
855
876
|
tool_execution_result=ToolExecutionResult(status="error"),
|
856
877
|
tool_call_id=tool_call_id,
|
@@ -1151,7 +1172,7 @@ class LettaAgentV2(BaseAgentV2):
|
|
1151
1172
|
step_metrics: StepMetrics,
|
1152
1173
|
run_id: str | None = None,
|
1153
1174
|
):
|
1154
|
-
task =
|
1175
|
+
task = safe_create_task(
|
1155
1176
|
self.step_manager.record_step_metrics_async(
|
1156
1177
|
actor=self.actor,
|
1157
1178
|
step_id=step_id,
|
@@ -1163,7 +1184,8 @@ class LettaAgentV2(BaseAgentV2):
|
|
1163
1184
|
project_id=self.agent_state.project_id,
|
1164
1185
|
template_id=self.agent_state.template_id,
|
1165
1186
|
base_template_id=self.agent_state.base_template_id,
|
1166
|
-
)
|
1187
|
+
),
|
1188
|
+
label="record_step_metrics",
|
1167
1189
|
)
|
1168
1190
|
return task
|
1169
1191
|
|
letta/functions/helpers.py
CHANGED
@@ -19,6 +19,7 @@ from letta.schemas.message import Message, MessageCreate
|
|
19
19
|
from letta.schemas.user import User
|
20
20
|
from letta.server.rest_api.utils import get_letta_server
|
21
21
|
from letta.settings import settings
|
22
|
+
from letta.utils import safe_create_task
|
22
23
|
|
23
24
|
|
24
25
|
# TODO needed?
|
@@ -447,7 +448,7 @@ async def _send_message_to_agents_matching_tags_async(
|
|
447
448
|
timeout=settings.multi_agent_send_message_timeout,
|
448
449
|
)
|
449
450
|
|
450
|
-
tasks = [
|
451
|
+
tasks = [safe_create_task(_send_single(agent_state), label=f"send_to_agent_{agent_state.id}") for agent_state in matching_agents]
|
451
452
|
results = await asyncio.gather(*tasks, return_exceptions=True)
|
452
453
|
final = []
|
453
454
|
for r in results:
|
@@ -488,7 +489,7 @@ async def _send_message_to_all_agents_in_group_async(sender_agent: "Agent", mess
|
|
488
489
|
timeout=settings.multi_agent_send_message_timeout,
|
489
490
|
)
|
490
491
|
|
491
|
-
tasks = [
|
492
|
+
tasks = [safe_create_task(_send_single(agent_state), label=f"send_to_worker_{agent_state.id}") for agent_state in worker_agents]
|
492
493
|
results = await asyncio.gather(*tasks, return_exceptions=True)
|
493
494
|
final = []
|
494
495
|
for r in results:
|
@@ -24,6 +24,7 @@ from letta.services.message_manager import MessageManager
|
|
24
24
|
from letta.services.passage_manager import PassageManager
|
25
25
|
from letta.services.step_manager import NoopStepManager, StepManager
|
26
26
|
from letta.services.telemetry_manager import NoopTelemetryManager, TelemetryManager
|
27
|
+
from letta.utils import safe_create_task
|
27
28
|
|
28
29
|
|
29
30
|
class SleeptimeMultiAgentV2(BaseAgent):
|
@@ -236,7 +237,7 @@ class SleeptimeMultiAgentV2(BaseAgent):
|
|
236
237
|
)
|
237
238
|
run = await self.job_manager.create_job_async(pydantic_job=run, actor=self.actor)
|
238
239
|
|
239
|
-
|
240
|
+
safe_create_task(
|
240
241
|
self._participant_agent_step(
|
241
242
|
foreground_agent_id=self.agent_id,
|
242
243
|
sleeptime_agent_id=sleeptime_agent_id,
|
@@ -244,7 +245,8 @@ class SleeptimeMultiAgentV2(BaseAgent):
|
|
244
245
|
last_processed_message_id=last_processed_message_id,
|
245
246
|
run_id=run.id,
|
246
247
|
use_assistant_message=True,
|
247
|
-
)
|
248
|
+
),
|
249
|
+
label=f"participant_agent_step_{sleeptime_agent_id}",
|
248
250
|
)
|
249
251
|
return run.id
|
250
252
|
|
@@ -17,6 +17,7 @@ from letta.schemas.message import Message, MessageCreate
|
|
17
17
|
from letta.schemas.run import Run
|
18
18
|
from letta.schemas.user import User
|
19
19
|
from letta.services.group_manager import GroupManager
|
20
|
+
from letta.utils import safe_create_task
|
20
21
|
|
21
22
|
|
22
23
|
class SleeptimeMultiAgentV3(LettaAgentV2):
|
@@ -142,7 +143,7 @@ class SleeptimeMultiAgentV3(LettaAgentV2):
|
|
142
143
|
)
|
143
144
|
run = await self.job_manager.create_job_async(pydantic_job=run, actor=self.actor)
|
144
145
|
|
145
|
-
|
146
|
+
safe_create_task(
|
146
147
|
self._participant_agent_step(
|
147
148
|
foreground_agent_id=self.agent_state.id,
|
148
149
|
sleeptime_agent_id=sleeptime_agent_id,
|
@@ -150,7 +151,8 @@ class SleeptimeMultiAgentV3(LettaAgentV2):
|
|
150
151
|
last_processed_message_id=last_processed_message_id,
|
151
152
|
run_id=run.id,
|
152
153
|
use_assistant_message=use_assistant_message,
|
153
|
-
)
|
154
|
+
),
|
155
|
+
label=f"participant_agent_step_{sleeptime_agent_id}",
|
154
156
|
)
|
155
157
|
return run.id
|
156
158
|
|
letta/helpers/tpuf_client.py
CHANGED
@@ -62,11 +62,18 @@ class TurbopufferClient:
|
|
62
62
|
"""
|
63
63
|
from letta.llm_api.llm_client import LLMClient
|
64
64
|
|
65
|
+
# filter out empty strings after stripping
|
66
|
+
filtered_texts = [text for text in texts if text.strip()]
|
67
|
+
|
68
|
+
# skip embedding if no valid texts
|
69
|
+
if not filtered_texts:
|
70
|
+
return []
|
71
|
+
|
65
72
|
embedding_client = LLMClient.create(
|
66
73
|
provider_type=self.default_embedding_config.embedding_endpoint_type,
|
67
74
|
actor=actor,
|
68
75
|
)
|
69
|
-
embeddings = await embedding_client.request_embeddings(
|
76
|
+
embeddings = await embedding_client.request_embeddings(filtered_texts, self.default_embedding_config)
|
70
77
|
return embeddings
|
71
78
|
|
72
79
|
@trace_method
|
@@ -119,8 +126,16 @@ class TurbopufferClient:
|
|
119
126
|
"""
|
120
127
|
from turbopuffer import AsyncTurbopuffer
|
121
128
|
|
129
|
+
# filter out empty text chunks
|
130
|
+
filtered_chunks = [(i, text) for i, text in enumerate(text_chunks) if text.strip()]
|
131
|
+
|
132
|
+
if not filtered_chunks:
|
133
|
+
logger.warning("All text chunks were empty, skipping insertion")
|
134
|
+
return []
|
135
|
+
|
122
136
|
# generate embeddings using the default config
|
123
|
-
|
137
|
+
filtered_texts = [text for _, text in filtered_chunks]
|
138
|
+
embeddings = await self._generate_embeddings(filtered_texts, actor)
|
124
139
|
|
125
140
|
namespace_name = await self._get_archive_namespace_name(archive_id)
|
126
141
|
|
@@ -152,8 +167,8 @@ class TurbopufferClient:
|
|
152
167
|
tags_arrays = [] # Store tags as arrays
|
153
168
|
passages = []
|
154
169
|
|
155
|
-
for
|
156
|
-
passage_id = passage_ids[
|
170
|
+
for (original_idx, text), embedding in zip(filtered_chunks, embeddings):
|
171
|
+
passage_id = passage_ids[original_idx]
|
157
172
|
|
158
173
|
# append to columns
|
159
174
|
ids.append(passage_id)
|
@@ -240,8 +255,16 @@ class TurbopufferClient:
|
|
240
255
|
"""
|
241
256
|
from turbopuffer import AsyncTurbopuffer
|
242
257
|
|
258
|
+
# filter out empty message texts
|
259
|
+
filtered_messages = [(i, text) for i, text in enumerate(message_texts) if text.strip()]
|
260
|
+
|
261
|
+
if not filtered_messages:
|
262
|
+
logger.warning("All message texts were empty, skipping insertion")
|
263
|
+
return True
|
264
|
+
|
243
265
|
# generate embeddings using the default config
|
244
|
-
|
266
|
+
filtered_texts = [text for _, text in filtered_messages]
|
267
|
+
embeddings = await self._generate_embeddings(filtered_texts, actor)
|
245
268
|
|
246
269
|
namespace_name = await self._get_message_namespace_name(organization_id)
|
247
270
|
|
@@ -266,8 +289,10 @@ class TurbopufferClient:
|
|
266
289
|
project_ids = []
|
267
290
|
template_ids = []
|
268
291
|
|
269
|
-
for
|
270
|
-
message_id = message_ids[
|
292
|
+
for (original_idx, text), embedding in zip(filtered_messages, embeddings):
|
293
|
+
message_id = message_ids[original_idx]
|
294
|
+
role = roles[original_idx]
|
295
|
+
created_at = created_ats[original_idx]
|
271
296
|
|
272
297
|
# ensure the provided timestamp is timezone-aware and in UTC
|
273
298
|
if created_at.tzinfo is None:
|
@@ -1162,8 +1187,15 @@ class TurbopufferClient:
|
|
1162
1187
|
if not text_chunks:
|
1163
1188
|
return []
|
1164
1189
|
|
1190
|
+
# filter out empty text chunks
|
1191
|
+
filtered_chunks = [text for text in text_chunks if text.strip()]
|
1192
|
+
|
1193
|
+
if not filtered_chunks:
|
1194
|
+
logger.warning("All text chunks were empty, skipping file passage insertion")
|
1195
|
+
return []
|
1196
|
+
|
1165
1197
|
# generate embeddings using the default config
|
1166
|
-
embeddings = await self._generate_embeddings(
|
1198
|
+
embeddings = await self._generate_embeddings(filtered_chunks, actor)
|
1167
1199
|
|
1168
1200
|
namespace_name = await self._get_file_passages_namespace_name(organization_id)
|
1169
1201
|
|
@@ -1189,7 +1221,7 @@ class TurbopufferClient:
|
|
1189
1221
|
created_ats = []
|
1190
1222
|
passages = []
|
1191
1223
|
|
1192
|
-
for
|
1224
|
+
for text, embedding in zip(filtered_chunks, embeddings):
|
1193
1225
|
passage = PydanticPassage(
|
1194
1226
|
text=text,
|
1195
1227
|
file_id=file_id,
|
@@ -106,15 +106,19 @@ class AnthropicStreamingInterface:
|
|
106
106
|
try:
|
107
107
|
tool_input = json.loads(self.accumulated_tool_call_args)
|
108
108
|
except json.JSONDecodeError as e:
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
109
|
+
# Attempt to use OptimisticJSONParser to handle incomplete/malformed JSON
|
110
|
+
try:
|
111
|
+
tool_input = self.json_parser.parse(self.accumulated_tool_call_args)
|
112
|
+
except:
|
113
|
+
logger.warning(
|
114
|
+
f"Failed to decode tool call arguments for tool_call_id={self.tool_call_id}, "
|
115
|
+
f"name={self.tool_call_name}. Raw input: {self.accumulated_tool_call_args!r}. Error: {e}"
|
116
|
+
)
|
117
|
+
raise e
|
114
118
|
if "id" in tool_input and tool_input["id"].startswith("toolu_") and "function" in tool_input:
|
115
119
|
arguments = str(json.dumps(tool_input["function"]["arguments"], indent=2))
|
116
120
|
else:
|
117
|
-
arguments =
|
121
|
+
arguments = str(json.dumps(tool_input, indent=2))
|
118
122
|
return ToolCall(id=self.tool_call_id, function=FunctionCall(arguments=arguments, name=self.tool_call_name))
|
119
123
|
|
120
124
|
def _check_inner_thoughts_complete(self, combined_args: str) -> bool:
|
@@ -24,7 +24,7 @@ from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
|
|
24
24
|
from letta.schemas.message import Message
|
25
25
|
from letta.schemas.openai.chat_completion_response import FunctionCall, ToolCall
|
26
26
|
from letta.server.rest_api.json_parser import OptimisticJSONParser
|
27
|
-
from letta.streaming_utils import JSONInnerThoughtsExtractor
|
27
|
+
from letta.streaming_utils import FunctionArgumentsStreamHandler, JSONInnerThoughtsExtractor
|
28
28
|
from letta.utils import count_tokens
|
29
29
|
|
30
30
|
logger = get_logger(__name__)
|
@@ -53,6 +53,8 @@ class OpenAIStreamingInterface:
|
|
53
53
|
|
54
54
|
self.optimistic_json_parser: OptimisticJSONParser = OptimisticJSONParser()
|
55
55
|
self.function_args_reader = JSONInnerThoughtsExtractor(wait_for_first_key=put_inner_thoughts_in_kwarg)
|
56
|
+
# Reader that extracts only the assistant message value from send_message args
|
57
|
+
self.assistant_message_json_reader = FunctionArgumentsStreamHandler(json_key=self.assistant_message_tool_kwarg)
|
56
58
|
self.function_name_buffer = None
|
57
59
|
self.function_args_buffer = None
|
58
60
|
self.function_id_buffer = None
|
@@ -274,6 +276,8 @@ class OpenAIStreamingInterface:
|
|
274
276
|
# Store the ID of the tool call so allow skipping the corresponding response
|
275
277
|
if self.function_id_buffer:
|
276
278
|
self.prev_assistant_message_id = self.function_id_buffer
|
279
|
+
# Reset message reader at the start of a new send_message stream
|
280
|
+
self.assistant_message_json_reader.reset()
|
277
281
|
|
278
282
|
else:
|
279
283
|
if prev_message_type and prev_message_type != "tool_call_message":
|
@@ -328,39 +332,15 @@ class OpenAIStreamingInterface:
|
|
328
332
|
self.last_flushed_function_name is not None
|
329
333
|
and self.last_flushed_function_name == self.assistant_message_tool_name
|
330
334
|
):
|
331
|
-
#
|
332
|
-
|
333
|
-
|
334
|
-
self.function_args_buffer = None
|
335
|
-
|
336
|
-
# Pretty gross hardcoding that assumes that if we're toggling into the keywords, we have the full prefix
|
337
|
-
match_str = '{"' + self.assistant_message_tool_kwarg + '":"'
|
338
|
-
if updates_main_json == match_str:
|
339
|
-
updates_main_json = None
|
340
|
-
|
341
|
-
else:
|
342
|
-
# Some hardcoding to strip off the trailing "}"
|
343
|
-
if updates_main_json in ["}", '"}']:
|
344
|
-
updates_main_json = None
|
345
|
-
if updates_main_json and len(updates_main_json) > 0 and updates_main_json[-1:] == '"':
|
346
|
-
updates_main_json = updates_main_json[:-1]
|
347
|
-
|
348
|
-
if not updates_main_json:
|
349
|
-
# early exit to turn into content mode
|
350
|
-
pass
|
351
|
-
|
352
|
-
# There may be a buffer from a previous chunk, for example
|
353
|
-
# if the previous chunk had arguments but we needed to flush name
|
354
|
-
if self.function_args_buffer:
|
355
|
-
# In this case, we should release the buffer + new data at once
|
356
|
-
combined_chunk = self.function_args_buffer + updates_main_json
|
357
|
-
|
335
|
+
# Minimal, robust extraction: only emit the value of "message"
|
336
|
+
extracted = self.assistant_message_json_reader.process_json_chunk(tool_call.function.arguments)
|
337
|
+
if extracted:
|
358
338
|
if prev_message_type and prev_message_type != "assistant_message":
|
359
339
|
message_index += 1
|
360
340
|
assistant_message = AssistantMessage(
|
361
341
|
id=self.letta_message_id,
|
362
342
|
date=datetime.now(timezone.utc),
|
363
|
-
content=
|
343
|
+
content=extracted,
|
364
344
|
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
365
345
|
)
|
366
346
|
prev_message_type = assistant_message.message_type
|
@@ -368,51 +348,6 @@ class OpenAIStreamingInterface:
|
|
368
348
|
# Store the ID of the tool call so allow skipping the corresponding response
|
369
349
|
if self.function_id_buffer:
|
370
350
|
self.prev_assistant_message_id = self.function_id_buffer
|
371
|
-
# clear buffer
|
372
|
-
self.function_args_buffer = None
|
373
|
-
self.function_id_buffer = None
|
374
|
-
|
375
|
-
else:
|
376
|
-
# If there's no buffer to clear, just output a new chunk with new data
|
377
|
-
# TODO: THIS IS HORRIBLE
|
378
|
-
# TODO: WE USE THE OLD JSON PARSER EARLIER (WHICH DOES NOTHING) AND NOW THE NEW JSON PARSER
|
379
|
-
# TODO: THIS IS TOTALLY WRONG AND BAD, BUT SAVING FOR A LARGER REWRITE IN THE NEAR FUTURE
|
380
|
-
parsed_args = self.optimistic_json_parser.parse(self.current_function_arguments)
|
381
|
-
|
382
|
-
if parsed_args.get(self.assistant_message_tool_kwarg) and parsed_args.get(
|
383
|
-
self.assistant_message_tool_kwarg
|
384
|
-
) != self.current_json_parse_result.get(self.assistant_message_tool_kwarg):
|
385
|
-
new_content = parsed_args.get(self.assistant_message_tool_kwarg)
|
386
|
-
prev_content = self.current_json_parse_result.get(self.assistant_message_tool_kwarg, "")
|
387
|
-
# TODO: Assumes consistent state and that prev_content is subset of new_content
|
388
|
-
diff = new_content.replace(prev_content, "", 1)
|
389
|
-
|
390
|
-
# quick patch to mitigate double message streaming error
|
391
|
-
# TODO: root cause this issue and remove patch
|
392
|
-
if diff != "" and "\\n" not in new_content:
|
393
|
-
converted_new_content = new_content.replace("\n", "\\n")
|
394
|
-
converted_content_diff = converted_new_content.replace(prev_content, "", 1)
|
395
|
-
if converted_content_diff == "":
|
396
|
-
diff = converted_content_diff
|
397
|
-
|
398
|
-
self.current_json_parse_result = parsed_args
|
399
|
-
if prev_message_type and prev_message_type != "assistant_message":
|
400
|
-
message_index += 1
|
401
|
-
assistant_message = AssistantMessage(
|
402
|
-
id=self.letta_message_id,
|
403
|
-
date=datetime.now(timezone.utc),
|
404
|
-
content=diff,
|
405
|
-
# name=name,
|
406
|
-
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
407
|
-
)
|
408
|
-
prev_message_type = assistant_message.message_type
|
409
|
-
yield assistant_message
|
410
|
-
|
411
|
-
# Store the ID of the tool call so allow skipping the corresponding response
|
412
|
-
if self.function_id_buffer:
|
413
|
-
self.prev_assistant_message_id = self.function_id_buffer
|
414
|
-
# clear buffers
|
415
|
-
self.function_id_buffer = None
|
416
351
|
else:
|
417
352
|
# There may be a buffer from a previous chunk, for example
|
418
353
|
# if the previous chunk had arguments but we needed to flush name
|
@@ -67,6 +67,7 @@ class GoogleVertexClient(LLMClientBase):
|
|
67
67
|
# https://github.com/googleapis/python-aiplatform/issues/4472
|
68
68
|
retry_count = 1
|
69
69
|
should_retry = True
|
70
|
+
response_data = None
|
70
71
|
while should_retry and retry_count <= self.MAX_RETRIES:
|
71
72
|
try:
|
72
73
|
response = await client.aio.models.generate_content(
|
@@ -79,6 +80,8 @@ class GoogleVertexClient(LLMClientBase):
|
|
79
80
|
if e.code == 503 or e.code == 500:
|
80
81
|
logger.warning(f"Received {e}, retrying {retry_count}/{self.MAX_RETRIES}")
|
81
82
|
retry_count += 1
|
83
|
+
if retry_count > self.MAX_RETRIES:
|
84
|
+
raise e
|
82
85
|
continue
|
83
86
|
raise e
|
84
87
|
except Exception as e:
|
@@ -114,6 +117,8 @@ class GoogleVertexClient(LLMClientBase):
|
|
114
117
|
should_retry = is_malformed_function_call
|
115
118
|
retry_count += 1
|
116
119
|
|
120
|
+
if response_data is None:
|
121
|
+
raise RuntimeError("Failed to get response data after all retries")
|
117
122
|
return response_data
|
118
123
|
|
119
124
|
@staticmethod
|
@@ -272,7 +277,7 @@ class GoogleVertexClient(LLMClientBase):
|
|
272
277
|
tool_names = []
|
273
278
|
|
274
279
|
contents = self.add_dummy_model_messages(
|
275
|
-
|
280
|
+
PydanticMessage.to_google_dicts_from_list(messages),
|
276
281
|
)
|
277
282
|
|
278
283
|
request_data = {
|
letta/llm_api/openai_client.py
CHANGED
@@ -198,14 +198,15 @@ class OpenAIClient(LLMClientBase):
|
|
198
198
|
# TODO(matt) move into LLMConfig
|
199
199
|
# TODO: This vllm checking is very brittle and is a patch at most
|
200
200
|
tool_choice = None
|
201
|
-
if
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
201
|
+
if tools: # only set tool_choice if tools exist
|
202
|
+
if self.requires_auto_tool_choice(llm_config):
|
203
|
+
tool_choice = "auto"
|
204
|
+
else:
|
205
|
+
# only set if tools is non-Null
|
206
|
+
tool_choice = "required"
|
207
|
+
|
208
|
+
if force_tool_call is not None:
|
209
|
+
tool_choice = ToolFunctionChoice(type="function", function=ToolFunctionChoiceFunctionCall(name=force_tool_call))
|
209
210
|
|
210
211
|
data = ChatCompletionRequest(
|
211
212
|
model=model,
|
letta/orm/agent.py
CHANGED
@@ -34,7 +34,10 @@ if TYPE_CHECKING:
|
|
34
34
|
class Agent(SqlalchemyBase, OrganizationMixin, ProjectMixin, TemplateEntityMixin, TemplateMixin, AsyncAttrs):
|
35
35
|
__tablename__ = "agents"
|
36
36
|
__pydantic_model__ = PydanticAgentState
|
37
|
-
__table_args__ = (
|
37
|
+
__table_args__ = (
|
38
|
+
Index("ix_agents_created_at", "created_at", "id"),
|
39
|
+
Index("ix_agents_organization_id", "organization_id"),
|
40
|
+
)
|
38
41
|
|
39
42
|
# agent generates its own id
|
40
43
|
# TODO: We want to migrate all the ORM models to do this, so we will need to move this to the SqlalchemyBase
|
letta/orm/block.py
CHANGED
@@ -24,6 +24,7 @@ class Block(OrganizationMixin, SqlalchemyBase, ProjectMixin, TemplateEntityMixin
|
|
24
24
|
__table_args__ = (
|
25
25
|
UniqueConstraint("id", "label", name="unique_block_id_label"),
|
26
26
|
Index("created_at_label_idx", "created_at", "label"),
|
27
|
+
Index("ix_block_label", "label"),
|
27
28
|
)
|
28
29
|
|
29
30
|
template_name: Mapped[Optional[str]] = mapped_column(
|
letta/orm/blocks_agents.py
CHANGED
@@ -20,6 +20,7 @@ class BlocksAgents(Base):
|
|
20
20
|
UniqueConstraint("agent_id", "block_id", name="unique_agent_block"),
|
21
21
|
Index("ix_blocks_agents_block_label_agent_id", "block_label", "agent_id"),
|
22
22
|
Index("ix_blocks_block_label", "block_label"),
|
23
|
+
Index("ix_blocks_agents_block_id", "block_id"),
|
23
24
|
)
|
24
25
|
|
25
26
|
# unique agent + block label
|
letta/orm/job.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
from datetime import datetime
|
2
2
|
from typing import TYPE_CHECKING, List, Optional
|
3
3
|
|
4
|
-
from sqlalchemy import JSON, BigInteger, Index, String
|
4
|
+
from sqlalchemy import JSON, BigInteger, ForeignKey, Index, String
|
5
5
|
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
6
6
|
|
7
7
|
from letta.orm.mixins import UserMixin
|
@@ -12,6 +12,7 @@ from letta.schemas.job import Job as PydanticJob, LettaRequestConfig
|
|
12
12
|
if TYPE_CHECKING:
|
13
13
|
from letta.orm.job_messages import JobMessage
|
14
14
|
from letta.orm.message import Message
|
15
|
+
from letta.orm.organization import Organization
|
15
16
|
from letta.orm.step import Step
|
16
17
|
from letta.orm.user import User
|
17
18
|
|
@@ -36,6 +37,7 @@ class Job(SqlalchemyBase, UserMixin):
|
|
36
37
|
request_config: Mapped[Optional[LettaRequestConfig]] = mapped_column(
|
37
38
|
JSON, nullable=True, doc="The request configuration for the job, stored as JSON."
|
38
39
|
)
|
40
|
+
organization_id: Mapped[Optional[str]] = mapped_column(String, ForeignKey("organizations.id"))
|
39
41
|
|
40
42
|
# callback related columns
|
41
43
|
callback_url: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="When set, POST to this URL after job completion.")
|
@@ -53,6 +55,8 @@ class Job(SqlalchemyBase, UserMixin):
|
|
53
55
|
user: Mapped["User"] = relationship("User", back_populates="jobs")
|
54
56
|
job_messages: Mapped[List["JobMessage"]] = relationship("JobMessage", back_populates="job", cascade="all, delete-orphan")
|
55
57
|
steps: Mapped[List["Step"]] = relationship("Step", back_populates="job", cascade="save-update")
|
58
|
+
# organization relationship (nullable for backward compatibility)
|
59
|
+
organization: Mapped[Optional["Organization"]] = relationship("Organization", back_populates="jobs")
|
56
60
|
|
57
61
|
@property
|
58
62
|
def messages(self) -> List["Message"]:
|
letta/orm/organization.py
CHANGED
@@ -12,6 +12,7 @@ if TYPE_CHECKING:
|
|
12
12
|
from letta.orm.block import Block
|
13
13
|
from letta.orm.group import Group
|
14
14
|
from letta.orm.identity import Identity
|
15
|
+
from letta.orm.job import Job
|
15
16
|
from letta.orm.llm_batch_items import LLMBatchItem
|
16
17
|
from letta.orm.llm_batch_job import LLMBatchJob
|
17
18
|
from letta.orm.message import Message
|
@@ -66,3 +67,4 @@ class Organization(SqlalchemyBase):
|
|
66
67
|
llm_batch_items: Mapped[List["LLMBatchItem"]] = relationship(
|
67
68
|
"LLMBatchItem", back_populates="organization", cascade="all, delete-orphan"
|
68
69
|
)
|
70
|
+
jobs: Mapped[List["Job"]] = relationship("Job", back_populates="organization", cascade="all, delete-orphan")
|