letta-nightly 0.8.15.dev20250720104313__py3-none-any.whl → 0.8.16.dev20250721104533__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agent.py +27 -11
- letta/agents/helpers.py +1 -1
- letta/agents/letta_agent.py +518 -322
- letta/agents/letta_agent_batch.py +1 -2
- letta/agents/voice_agent.py +15 -17
- letta/client/client.py +3 -3
- letta/constants.py +5 -0
- letta/embeddings.py +0 -2
- letta/errors.py +8 -0
- letta/functions/function_sets/base.py +3 -3
- letta/functions/helpers.py +2 -3
- letta/groups/sleeptime_multi_agent.py +0 -1
- letta/helpers/composio_helpers.py +2 -2
- letta/helpers/converters.py +1 -1
- letta/helpers/pinecone_utils.py +8 -0
- letta/helpers/tool_rule_solver.py +13 -18
- letta/llm_api/aws_bedrock.py +16 -2
- letta/llm_api/cohere.py +1 -1
- letta/llm_api/openai_client.py +1 -1
- letta/local_llm/grammars/gbnf_grammar_generator.py +1 -1
- letta/local_llm/llm_chat_completion_wrappers/zephyr.py +14 -14
- letta/local_llm/utils.py +1 -2
- letta/orm/agent.py +3 -3
- letta/orm/block.py +4 -4
- letta/orm/files_agents.py +0 -1
- letta/orm/identity.py +2 -0
- letta/orm/mcp_server.py +0 -2
- letta/orm/message.py +140 -14
- letta/orm/organization.py +5 -5
- letta/orm/passage.py +4 -4
- letta/orm/source.py +1 -1
- letta/orm/sqlalchemy_base.py +61 -39
- letta/orm/step.py +2 -0
- letta/otel/db_pool_monitoring.py +308 -0
- letta/otel/metric_registry.py +94 -1
- letta/otel/sqlalchemy_instrumentation.py +548 -0
- letta/otel/sqlalchemy_instrumentation_integration.py +124 -0
- letta/otel/tracing.py +37 -1
- letta/schemas/agent.py +0 -3
- letta/schemas/agent_file.py +283 -0
- letta/schemas/block.py +0 -3
- letta/schemas/file.py +28 -26
- letta/schemas/letta_message.py +15 -4
- letta/schemas/memory.py +1 -1
- letta/schemas/message.py +31 -26
- letta/schemas/openai/chat_completion_response.py +0 -1
- letta/schemas/providers.py +20 -0
- letta/schemas/source.py +11 -13
- letta/schemas/step.py +12 -0
- letta/schemas/tool.py +0 -4
- letta/serialize_schemas/marshmallow_agent.py +14 -1
- letta/serialize_schemas/marshmallow_block.py +23 -1
- letta/serialize_schemas/marshmallow_message.py +1 -3
- letta/serialize_schemas/marshmallow_tool.py +23 -1
- letta/server/db.py +110 -6
- letta/server/rest_api/app.py +85 -73
- letta/server/rest_api/routers/v1/agents.py +68 -53
- letta/server/rest_api/routers/v1/blocks.py +2 -2
- letta/server/rest_api/routers/v1/jobs.py +3 -0
- letta/server/rest_api/routers/v1/organizations.py +2 -2
- letta/server/rest_api/routers/v1/sources.py +18 -2
- letta/server/rest_api/routers/v1/tools.py +11 -12
- letta/server/rest_api/routers/v1/users.py +1 -1
- letta/server/rest_api/streaming_response.py +13 -5
- letta/server/rest_api/utils.py +8 -25
- letta/server/server.py +11 -4
- letta/server/ws_api/server.py +2 -2
- letta/services/agent_file_manager.py +616 -0
- letta/services/agent_manager.py +133 -46
- letta/services/block_manager.py +38 -17
- letta/services/file_manager.py +106 -21
- letta/services/file_processor/file_processor.py +93 -0
- letta/services/files_agents_manager.py +28 -0
- letta/services/group_manager.py +4 -5
- letta/services/helpers/agent_manager_helper.py +57 -9
- letta/services/identity_manager.py +22 -0
- letta/services/job_manager.py +210 -91
- letta/services/llm_batch_manager.py +9 -6
- letta/services/mcp/stdio_client.py +1 -2
- letta/services/mcp_manager.py +0 -1
- letta/services/message_manager.py +49 -26
- letta/services/passage_manager.py +0 -1
- letta/services/provider_manager.py +1 -1
- letta/services/source_manager.py +114 -5
- letta/services/step_manager.py +36 -4
- letta/services/telemetry_manager.py +9 -2
- letta/services/tool_executor/builtin_tool_executor.py +5 -1
- letta/services/tool_executor/core_tool_executor.py +3 -3
- letta/services/tool_manager.py +95 -20
- letta/services/user_manager.py +4 -12
- letta/settings.py +23 -6
- letta/system.py +1 -1
- letta/utils.py +26 -2
- {letta_nightly-0.8.15.dev20250720104313.dist-info → letta_nightly-0.8.16.dev20250721104533.dist-info}/METADATA +3 -2
- {letta_nightly-0.8.15.dev20250720104313.dist-info → letta_nightly-0.8.16.dev20250721104533.dist-info}/RECORD +99 -94
- {letta_nightly-0.8.15.dev20250720104313.dist-info → letta_nightly-0.8.16.dev20250721104533.dist-info}/LICENSE +0 -0
- {letta_nightly-0.8.15.dev20250720104313.dist-info → letta_nightly-0.8.16.dev20250721104533.dist-info}/WHEEL +0 -0
- {letta_nightly-0.8.15.dev20250720104313.dist-info → letta_nightly-0.8.16.dev20250721104533.dist-info}/entry_points.txt +0 -0
letta/agents/letta_agent.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
import asyncio
|
2
1
|
import json
|
3
2
|
import uuid
|
4
3
|
from collections.abc import AsyncGenerator
|
@@ -44,6 +43,7 @@ from letta.schemas.llm_config import LLMConfig
|
|
44
43
|
from letta.schemas.message import Message, MessageCreate
|
45
44
|
from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics
|
46
45
|
from letta.schemas.provider_trace import ProviderTraceCreate
|
46
|
+
from letta.schemas.step import StepProgression
|
47
47
|
from letta.schemas.tool_execution_result import ToolExecutionResult
|
48
48
|
from letta.schemas.usage import LettaUsageStatistics
|
49
49
|
from letta.schemas.user import User
|
@@ -239,100 +239,164 @@ class LettaAgent(BaseAgent):
|
|
239
239
|
agent_step_span = tracer.start_span("agent_step", start_time=step_start)
|
240
240
|
agent_step_span.set_attributes({"step_id": step_id})
|
241
241
|
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
242
|
+
step_progression = StepProgression.START
|
243
|
+
should_continue = False
|
244
|
+
try:
|
245
|
+
request_data, response_data, current_in_context_messages, new_in_context_messages, valid_tool_names = (
|
246
|
+
await self._build_and_request_from_llm(
|
247
|
+
current_in_context_messages,
|
248
|
+
new_in_context_messages,
|
249
|
+
agent_state,
|
250
|
+
llm_client,
|
251
|
+
tool_rules_solver,
|
252
|
+
agent_step_span,
|
253
|
+
)
|
250
254
|
)
|
251
|
-
|
252
|
-
in_context_messages = current_in_context_messages + new_in_context_messages
|
253
|
-
|
254
|
-
log_event("agent.stream_no_tokens.llm_response.received") # [3^]
|
255
|
-
|
256
|
-
response = llm_client.convert_response_to_chat_completion(response_data, in_context_messages, agent_state.llm_config)
|
257
|
-
|
258
|
-
# update usage
|
259
|
-
usage.step_count += 1
|
260
|
-
usage.completion_tokens += response.usage.completion_tokens
|
261
|
-
usage.prompt_tokens += response.usage.prompt_tokens
|
262
|
-
usage.total_tokens += response.usage.total_tokens
|
263
|
-
MetricRegistry().message_output_tokens.record(
|
264
|
-
response.usage.completion_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
|
265
|
-
)
|
255
|
+
in_context_messages = current_in_context_messages + new_in_context_messages
|
266
256
|
|
267
|
-
|
268
|
-
#
|
269
|
-
raise ValueError("No tool calls found in response, model must make a tool call")
|
270
|
-
tool_call = response.choices[0].message.tool_calls[0]
|
271
|
-
if response.choices[0].message.reasoning_content:
|
272
|
-
reasoning = [
|
273
|
-
ReasoningContent(
|
274
|
-
reasoning=response.choices[0].message.reasoning_content,
|
275
|
-
is_native=True,
|
276
|
-
signature=response.choices[0].message.reasoning_content_signature,
|
277
|
-
)
|
278
|
-
]
|
279
|
-
elif response.choices[0].message.omitted_reasoning_content:
|
280
|
-
reasoning = [OmittedReasoningContent()]
|
281
|
-
elif response.choices[0].message.content:
|
282
|
-
reasoning = [TextContent(text=response.choices[0].message.content)] # reasoning placed into content for legacy reasons
|
283
|
-
else:
|
284
|
-
self.logger.info("No reasoning content found.")
|
285
|
-
reasoning = None
|
286
|
-
|
287
|
-
persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
|
288
|
-
tool_call,
|
289
|
-
valid_tool_names,
|
290
|
-
agent_state,
|
291
|
-
tool_rules_solver,
|
292
|
-
response.usage,
|
293
|
-
reasoning_content=reasoning,
|
294
|
-
step_id=step_id,
|
295
|
-
initial_messages=initial_messages,
|
296
|
-
agent_step_span=agent_step_span,
|
297
|
-
is_final_step=(i == max_steps - 1),
|
298
|
-
)
|
257
|
+
step_progression = StepProgression.RESPONSE_RECEIVED
|
258
|
+
log_event("agent.stream_no_tokens.llm_response.received") # [3^]
|
299
259
|
|
300
|
-
|
301
|
-
new_message_idx = len(initial_messages) if initial_messages else 0
|
302
|
-
self.response_messages.extend(persisted_messages[new_message_idx:])
|
303
|
-
new_in_context_messages.extend(persisted_messages[new_message_idx:])
|
304
|
-
initial_messages = None
|
305
|
-
log_event("agent.stream_no_tokens.llm_response.processed") # [4^]
|
260
|
+
response = llm_client.convert_response_to_chat_completion(response_data, in_context_messages, agent_state.llm_config)
|
306
261
|
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
262
|
+
# update usage
|
263
|
+
usage.step_count += 1
|
264
|
+
usage.completion_tokens += response.usage.completion_tokens
|
265
|
+
usage.prompt_tokens += response.usage.prompt_tokens
|
266
|
+
usage.total_tokens += response.usage.total_tokens
|
267
|
+
MetricRegistry().message_output_tokens.record(
|
268
|
+
response.usage.completion_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
|
269
|
+
)
|
312
270
|
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
271
|
+
if not response.choices[0].message.tool_calls:
|
272
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.no_tool_call.value)
|
273
|
+
raise ValueError("No tool calls found in response, model must make a tool call")
|
274
|
+
tool_call = response.choices[0].message.tool_calls[0]
|
275
|
+
if response.choices[0].message.reasoning_content:
|
276
|
+
reasoning = [
|
277
|
+
ReasoningContent(
|
278
|
+
reasoning=response.choices[0].message.reasoning_content,
|
279
|
+
is_native=True,
|
280
|
+
signature=response.choices[0].message.reasoning_content_signature,
|
281
|
+
)
|
282
|
+
]
|
283
|
+
elif response.choices[0].message.omitted_reasoning_content:
|
284
|
+
reasoning = [OmittedReasoningContent()]
|
285
|
+
elif response.choices[0].message.content:
|
286
|
+
reasoning = [TextContent(text=response.choices[0].message.content)] # reasoning placed into content for legacy reasons
|
287
|
+
else:
|
288
|
+
self.logger.info("No reasoning content found.")
|
289
|
+
reasoning = None
|
290
|
+
|
291
|
+
persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
|
292
|
+
tool_call,
|
293
|
+
valid_tool_names,
|
294
|
+
agent_state,
|
295
|
+
tool_rules_solver,
|
296
|
+
response.usage,
|
297
|
+
reasoning_content=reasoning,
|
319
298
|
step_id=step_id,
|
320
|
-
|
321
|
-
|
322
|
-
|
299
|
+
initial_messages=initial_messages,
|
300
|
+
agent_step_span=agent_step_span,
|
301
|
+
is_final_step=(i == max_steps - 1),
|
302
|
+
)
|
303
|
+
step_progression = StepProgression.STEP_LOGGED
|
304
|
+
|
305
|
+
# TODO (cliandy): handle message contexts with larger refactor and dedupe logic
|
306
|
+
new_message_idx = len(initial_messages) if initial_messages else 0
|
307
|
+
self.response_messages.extend(persisted_messages[new_message_idx:])
|
308
|
+
new_in_context_messages.extend(persisted_messages[new_message_idx:])
|
309
|
+
initial_messages = None
|
310
|
+
log_event("agent.stream_no_tokens.llm_response.processed") # [4^]
|
311
|
+
|
312
|
+
# log step time
|
313
|
+
now = get_utc_timestamp_ns()
|
314
|
+
step_ns = now - step_start
|
315
|
+
agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
|
316
|
+
agent_step_span.end()
|
317
|
+
|
318
|
+
# Log LLM Trace
|
319
|
+
await self.telemetry_manager.create_provider_trace_async(
|
320
|
+
actor=self.actor,
|
321
|
+
provider_trace_create=ProviderTraceCreate(
|
322
|
+
request_json=request_data,
|
323
|
+
response_json=response_data,
|
324
|
+
step_id=step_id,
|
325
|
+
organization_id=self.actor.organization_id,
|
326
|
+
),
|
327
|
+
)
|
328
|
+
step_progression = StepProgression.LOGGED_TRACE
|
323
329
|
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
+
# stream step
|
331
|
+
# TODO: improve TTFT
|
332
|
+
filter_user_messages = [m for m in persisted_messages if m.role != "user"]
|
333
|
+
letta_messages = Message.to_letta_messages_from_list(
|
334
|
+
filter_user_messages, use_assistant_message=use_assistant_message, reverse=False
|
335
|
+
)
|
330
336
|
|
331
|
-
|
332
|
-
|
333
|
-
|
337
|
+
for message in letta_messages:
|
338
|
+
if include_return_message_types is None or message.message_type in include_return_message_types:
|
339
|
+
yield f"data: {message.model_dump_json()}\n\n"
|
334
340
|
|
335
|
-
|
341
|
+
MetricRegistry().step_execution_time_ms_histogram.record(get_utc_timestamp_ns() - step_start, get_ctx_attributes())
|
342
|
+
step_progression = StepProgression.FINISHED
|
343
|
+
except Exception as e:
|
344
|
+
# Handle any unexpected errors during step processing
|
345
|
+
self.logger.error(f"Error during step processing: {e}")
|
346
|
+
|
347
|
+
# This indicates we failed after we decided to stop stepping, which indicates a bug with our flow.
|
348
|
+
if not stop_reason:
|
349
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
|
350
|
+
elif stop_reason.stop_reason in (StopReasonType.end_turn, StopReasonType.max_steps, StopReasonType.tool_rule):
|
351
|
+
self.logger.error("Error occurred during step processing, with valid stop reason: %s", stop_reason.stop_reason)
|
352
|
+
elif stop_reason.stop_reason not in (StopReasonType.no_tool_call, StopReasonType.invalid_tool_call):
|
353
|
+
raise ValueError(f"Invalid Stop Reason: {stop_reason}")
|
354
|
+
|
355
|
+
# Send error stop reason to client and re-raise
|
356
|
+
yield f"data: {stop_reason.model_dump_json()}\n\n", 500
|
357
|
+
raise
|
358
|
+
|
359
|
+
# Update step if it needs to be updated
|
360
|
+
finally:
|
361
|
+
if settings.track_stop_reason:
|
362
|
+
self.logger.info("Running final update. Step Progression: %s", step_progression)
|
363
|
+
try:
|
364
|
+
if step_progression < StepProgression.STEP_LOGGED:
|
365
|
+
await self.step_manager.log_step_async(
|
366
|
+
actor=self.actor,
|
367
|
+
agent_id=agent_state.id,
|
368
|
+
provider_name=agent_state.llm_config.model_endpoint_type,
|
369
|
+
provider_category=agent_state.llm_config.provider_category or "base",
|
370
|
+
model=agent_state.llm_config.model,
|
371
|
+
model_endpoint=agent_state.llm_config.model_endpoint,
|
372
|
+
context_window_limit=agent_state.llm_config.context_window,
|
373
|
+
usage=UsageStatistics(completion_tokens=0, prompt_tokens=0, total_tokens=0),
|
374
|
+
provider_id=None,
|
375
|
+
job_id=self.current_run_id if self.current_run_id else None,
|
376
|
+
step_id=step_id,
|
377
|
+
project_id=agent_state.project_id,
|
378
|
+
stop_reason=stop_reason,
|
379
|
+
)
|
380
|
+
if step_progression <= StepProgression.RESPONSE_RECEIVED:
|
381
|
+
# TODO (cliandy): persist response if we get it back
|
382
|
+
if settings.track_errored_messages:
|
383
|
+
for message in initial_messages:
|
384
|
+
message.is_err = True
|
385
|
+
message.step_id = step_id
|
386
|
+
await self.message_manager.create_many_messages_async(initial_messages, actor=self.actor)
|
387
|
+
elif step_progression <= StepProgression.LOGGED_TRACE:
|
388
|
+
if stop_reason is None:
|
389
|
+
self.logger.error("Error in step after logging step")
|
390
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
|
391
|
+
await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
|
392
|
+
elif step_progression == StepProgression.FINISHED and not should_continue:
|
393
|
+
if stop_reason is None:
|
394
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
|
395
|
+
await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
|
396
|
+
else:
|
397
|
+
self.logger.error("Invalid StepProgression value")
|
398
|
+
except Exception as e:
|
399
|
+
self.logger.error("Failed to update step: %s", e)
|
336
400
|
|
337
401
|
if not should_continue:
|
338
402
|
break
|
@@ -397,17 +461,6 @@ class LettaAgent(BaseAgent):
|
|
397
461
|
stop_reason = None
|
398
462
|
usage = LettaUsageStatistics()
|
399
463
|
for i in range(max_steps):
|
400
|
-
# Check for job cancellation at the start of each step
|
401
|
-
if await self._check_run_cancellation():
|
402
|
-
stop_reason = LettaStopReason(stop_reason=StopReasonType.cancelled.value)
|
403
|
-
logger.info(f"Agent execution cancelled for run {self.current_run_id}")
|
404
|
-
break
|
405
|
-
|
406
|
-
step_id = generate_step_id()
|
407
|
-
step_start = get_utc_timestamp_ns()
|
408
|
-
agent_step_span = tracer.start_span("agent_step", start_time=step_start)
|
409
|
-
agent_step_span.set_attributes({"step_id": step_id})
|
410
|
-
|
411
464
|
# If dry run, build request data and return it without making LLM call
|
412
465
|
if dry_run:
|
413
466
|
request_data, valid_tool_names = await self._create_llm_request_data_async(
|
@@ -418,84 +471,159 @@ class LettaAgent(BaseAgent):
|
|
418
471
|
)
|
419
472
|
return request_data
|
420
473
|
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
)
|
425
|
-
|
426
|
-
in_context_messages = current_in_context_messages + new_in_context_messages
|
427
|
-
|
428
|
-
log_event("agent.step.llm_response.received") # [3^]
|
474
|
+
# Check for job cancellation at the start of each step
|
475
|
+
if await self._check_run_cancellation():
|
476
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.cancelled.value)
|
477
|
+
logger.info(f"Agent execution cancelled for run {self.current_run_id}")
|
478
|
+
break
|
429
479
|
|
430
|
-
|
480
|
+
step_id = generate_step_id()
|
481
|
+
step_start = get_utc_timestamp_ns()
|
482
|
+
agent_step_span = tracer.start_span("agent_step", start_time=step_start)
|
483
|
+
agent_step_span.set_attributes({"step_id": step_id})
|
431
484
|
|
432
|
-
|
433
|
-
|
434
|
-
usage.prompt_tokens += response.usage.prompt_tokens
|
435
|
-
usage.total_tokens += response.usage.total_tokens
|
436
|
-
usage.run_ids = [run_id] if run_id else None
|
437
|
-
MetricRegistry().message_output_tokens.record(
|
438
|
-
response.usage.completion_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
|
439
|
-
)
|
485
|
+
step_progression = StepProgression.START
|
486
|
+
should_continue = False
|
440
487
|
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
if response.choices[0].message.reasoning_content:
|
446
|
-
reasoning = [
|
447
|
-
ReasoningContent(
|
448
|
-
reasoning=response.choices[0].message.reasoning_content,
|
449
|
-
is_native=True,
|
450
|
-
signature=response.choices[0].message.reasoning_content_signature,
|
488
|
+
try:
|
489
|
+
request_data, response_data, current_in_context_messages, new_in_context_messages, valid_tool_names = (
|
490
|
+
await self._build_and_request_from_llm(
|
491
|
+
current_in_context_messages, new_in_context_messages, agent_state, llm_client, tool_rules_solver, agent_step_span
|
451
492
|
)
|
452
|
-
|
453
|
-
|
454
|
-
reasoning = [TextContent(text=response.choices[0].message.content)] # reasoning placed into content for legacy reasons
|
455
|
-
elif response.choices[0].message.omitted_reasoning_content:
|
456
|
-
reasoning = [OmittedReasoningContent()]
|
457
|
-
else:
|
458
|
-
self.logger.info("No reasoning content found.")
|
459
|
-
reasoning = None
|
460
|
-
|
461
|
-
persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
|
462
|
-
tool_call,
|
463
|
-
valid_tool_names,
|
464
|
-
agent_state,
|
465
|
-
tool_rules_solver,
|
466
|
-
response.usage,
|
467
|
-
reasoning_content=reasoning,
|
468
|
-
step_id=step_id,
|
469
|
-
initial_messages=initial_messages,
|
470
|
-
agent_step_span=agent_step_span,
|
471
|
-
is_final_step=(i == max_steps - 1),
|
472
|
-
run_id=run_id,
|
473
|
-
)
|
474
|
-
new_message_idx = len(initial_messages) if initial_messages else 0
|
475
|
-
self.response_messages.extend(persisted_messages[new_message_idx:])
|
476
|
-
new_in_context_messages.extend(persisted_messages[new_message_idx:])
|
493
|
+
)
|
494
|
+
in_context_messages = current_in_context_messages + new_in_context_messages
|
477
495
|
|
478
|
-
|
479
|
-
|
496
|
+
step_progression = StepProgression.RESPONSE_RECEIVED
|
497
|
+
log_event("agent.step.llm_response.received") # [3^]
|
480
498
|
|
481
|
-
|
482
|
-
now = get_utc_timestamp_ns()
|
483
|
-
step_ns = now - step_start
|
484
|
-
agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
|
485
|
-
agent_step_span.end()
|
499
|
+
response = llm_client.convert_response_to_chat_completion(response_data, in_context_messages, agent_state.llm_config)
|
486
500
|
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
501
|
+
usage.step_count += 1
|
502
|
+
usage.completion_tokens += response.usage.completion_tokens
|
503
|
+
usage.prompt_tokens += response.usage.prompt_tokens
|
504
|
+
usage.total_tokens += response.usage.total_tokens
|
505
|
+
usage.run_ids = [run_id] if run_id else None
|
506
|
+
MetricRegistry().message_output_tokens.record(
|
507
|
+
response.usage.completion_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
|
508
|
+
)
|
509
|
+
|
510
|
+
if not response.choices[0].message.tool_calls:
|
511
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.no_tool_call.value)
|
512
|
+
raise ValueError("No tool calls found in response, model must make a tool call")
|
513
|
+
tool_call = response.choices[0].message.tool_calls[0]
|
514
|
+
if response.choices[0].message.reasoning_content:
|
515
|
+
reasoning = [
|
516
|
+
ReasoningContent(
|
517
|
+
reasoning=response.choices[0].message.reasoning_content,
|
518
|
+
is_native=True,
|
519
|
+
signature=response.choices[0].message.reasoning_content_signature,
|
520
|
+
)
|
521
|
+
]
|
522
|
+
elif response.choices[0].message.content:
|
523
|
+
reasoning = [TextContent(text=response.choices[0].message.content)] # reasoning placed into content for legacy reasons
|
524
|
+
elif response.choices[0].message.omitted_reasoning_content:
|
525
|
+
reasoning = [OmittedReasoningContent()]
|
526
|
+
else:
|
527
|
+
self.logger.info("No reasoning content found.")
|
528
|
+
reasoning = None
|
529
|
+
|
530
|
+
persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
|
531
|
+
tool_call,
|
532
|
+
valid_tool_names,
|
533
|
+
agent_state,
|
534
|
+
tool_rules_solver,
|
535
|
+
response.usage,
|
536
|
+
reasoning_content=reasoning,
|
493
537
|
step_id=step_id,
|
494
|
-
|
495
|
-
|
496
|
-
|
538
|
+
initial_messages=initial_messages,
|
539
|
+
agent_step_span=agent_step_span,
|
540
|
+
is_final_step=(i == max_steps - 1),
|
541
|
+
run_id=run_id,
|
542
|
+
)
|
543
|
+
step_progression = StepProgression.STEP_LOGGED
|
544
|
+
|
545
|
+
new_message_idx = len(initial_messages) if initial_messages else 0
|
546
|
+
self.response_messages.extend(persisted_messages[new_message_idx:])
|
547
|
+
new_in_context_messages.extend(persisted_messages[new_message_idx:])
|
548
|
+
|
549
|
+
initial_messages = None
|
550
|
+
log_event("agent.step.llm_response.processed") # [4^]
|
551
|
+
|
552
|
+
# log step time
|
553
|
+
now = get_utc_timestamp_ns()
|
554
|
+
step_ns = now - step_start
|
555
|
+
agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
|
556
|
+
agent_step_span.end()
|
557
|
+
|
558
|
+
# Log LLM Trace
|
559
|
+
await self.telemetry_manager.create_provider_trace_async(
|
560
|
+
actor=self.actor,
|
561
|
+
provider_trace_create=ProviderTraceCreate(
|
562
|
+
request_json=request_data,
|
563
|
+
response_json=response_data,
|
564
|
+
step_id=step_id,
|
565
|
+
organization_id=self.actor.organization_id,
|
566
|
+
),
|
567
|
+
)
|
497
568
|
|
498
|
-
|
569
|
+
step_progression = StepProgression.LOGGED_TRACE
|
570
|
+
MetricRegistry().step_execution_time_ms_histogram.record(get_utc_timestamp_ns() - step_start, get_ctx_attributes())
|
571
|
+
step_progression = StepProgression.FINISHED
|
572
|
+
|
573
|
+
except Exception as e:
|
574
|
+
# Handle any unexpected errors during step processing
|
575
|
+
self.logger.error(f"Error during step processing: {e}")
|
576
|
+
|
577
|
+
# This indicates we failed after we decided to stop stepping, which indicates a bug with our flow.
|
578
|
+
if not stop_reason:
|
579
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
|
580
|
+
elif stop_reason.stop_reason in (StopReasonType.end_turn, StopReasonType.max_steps, StopReasonType.tool_rule):
|
581
|
+
self.logger.error("Error occurred during step processing, with valid stop reason: %s", stop_reason.stop_reason)
|
582
|
+
elif stop_reason.stop_reason not in (StopReasonType.no_tool_call, StopReasonType.invalid_tool_call):
|
583
|
+
raise ValueError(f"Invalid Stop Reason: {stop_reason}")
|
584
|
+
raise
|
585
|
+
|
586
|
+
# Update step if it needs to be updated
|
587
|
+
finally:
|
588
|
+
if settings.track_stop_reason:
|
589
|
+
self.logger.info("Running final update. Step Progression: %s", step_progression)
|
590
|
+
try:
|
591
|
+
if step_progression < StepProgression.STEP_LOGGED:
|
592
|
+
await self.step_manager.log_step_async(
|
593
|
+
actor=self.actor,
|
594
|
+
agent_id=agent_state.id,
|
595
|
+
provider_name=agent_state.llm_config.model_endpoint_type,
|
596
|
+
provider_category=agent_state.llm_config.provider_category or "base",
|
597
|
+
model=agent_state.llm_config.model,
|
598
|
+
model_endpoint=agent_state.llm_config.model_endpoint,
|
599
|
+
context_window_limit=agent_state.llm_config.context_window,
|
600
|
+
usage=UsageStatistics(completion_tokens=0, prompt_tokens=0, total_tokens=0),
|
601
|
+
provider_id=None,
|
602
|
+
job_id=self.current_run_id if self.current_run_id else None,
|
603
|
+
step_id=step_id,
|
604
|
+
project_id=agent_state.project_id,
|
605
|
+
stop_reason=stop_reason,
|
606
|
+
)
|
607
|
+
if step_progression <= StepProgression.RESPONSE_RECEIVED:
|
608
|
+
# TODO (cliandy): persist response if we get it back
|
609
|
+
if settings.track_errored_messages:
|
610
|
+
for message in initial_messages:
|
611
|
+
message.is_err = True
|
612
|
+
message.step_id = step_id
|
613
|
+
await self.message_manager.create_many_messages_async(initial_messages, actor=self.actor)
|
614
|
+
elif step_progression <= StepProgression.LOGGED_TRACE:
|
615
|
+
if stop_reason is None:
|
616
|
+
self.logger.error("Error in step after logging step")
|
617
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
|
618
|
+
await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
|
619
|
+
elif step_progression == StepProgression.FINISHED and not should_continue:
|
620
|
+
if stop_reason is None:
|
621
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
|
622
|
+
await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
|
623
|
+
else:
|
624
|
+
self.logger.error("Invalid StepProgression value")
|
625
|
+
except Exception as e:
|
626
|
+
self.logger.error("Failed to update step: %s", e)
|
499
627
|
|
500
628
|
if not should_continue:
|
501
629
|
break
|
@@ -577,6 +705,7 @@ class LettaAgent(BaseAgent):
|
|
577
705
|
request_span.set_attributes({f"llm_config.{k}": v for k, v in agent_state.llm_config.model_dump().items() if v is not None})
|
578
706
|
|
579
707
|
for i in range(max_steps):
|
708
|
+
step_id = generate_step_id()
|
580
709
|
# Check for job cancellation at the start of each step
|
581
710
|
if await self._check_run_cancellation():
|
582
711
|
stop_reason = LettaStopReason(stop_reason=StopReasonType.cancelled.value)
|
@@ -584,163 +713,230 @@ class LettaAgent(BaseAgent):
|
|
584
713
|
yield f"data: {stop_reason.model_dump_json()}\n\n"
|
585
714
|
break
|
586
715
|
|
587
|
-
step_id = generate_step_id()
|
588
716
|
step_start = get_utc_timestamp_ns()
|
589
717
|
agent_step_span = tracer.start_span("agent_step", start_time=step_start)
|
590
718
|
agent_step_span.set_attributes({"step_id": step_id})
|
591
719
|
|
592
|
-
|
593
|
-
|
594
|
-
stream,
|
595
|
-
current_in_context_messages,
|
596
|
-
new_in_context_messages,
|
597
|
-
valid_tool_names,
|
598
|
-
provider_request_start_timestamp_ns,
|
599
|
-
) = await self._build_and_request_from_llm_streaming(
|
600
|
-
first_chunk,
|
601
|
-
agent_step_span,
|
602
|
-
request_start_timestamp_ns,
|
603
|
-
current_in_context_messages,
|
604
|
-
new_in_context_messages,
|
605
|
-
agent_state,
|
606
|
-
llm_client,
|
607
|
-
tool_rules_solver,
|
608
|
-
)
|
609
|
-
log_event("agent.stream.llm_response.received") # [3^]
|
610
|
-
|
611
|
-
# TODO: THIS IS INCREDIBLY UGLY
|
612
|
-
# TODO: THERE ARE MULTIPLE COPIES OF THE LLM_CONFIG EVERYWHERE THAT ARE GETTING MANIPULATED
|
613
|
-
if agent_state.llm_config.model_endpoint_type in [ProviderType.anthropic, ProviderType.bedrock]:
|
614
|
-
interface = AnthropicStreamingInterface(
|
615
|
-
use_assistant_message=use_assistant_message,
|
616
|
-
put_inner_thoughts_in_kwarg=agent_state.llm_config.put_inner_thoughts_in_kwargs,
|
617
|
-
)
|
618
|
-
elif agent_state.llm_config.model_endpoint_type == ProviderType.openai:
|
619
|
-
interface = OpenAIStreamingInterface(
|
620
|
-
use_assistant_message=use_assistant_message,
|
621
|
-
put_inner_thoughts_in_kwarg=agent_state.llm_config.put_inner_thoughts_in_kwargs,
|
622
|
-
)
|
623
|
-
else:
|
624
|
-
raise ValueError(f"Streaming not supported for {agent_state.llm_config}")
|
625
|
-
|
626
|
-
async for chunk in interface.process(
|
627
|
-
stream,
|
628
|
-
ttft_span=request_span,
|
629
|
-
provider_request_start_timestamp_ns=provider_request_start_timestamp_ns,
|
630
|
-
):
|
631
|
-
# Measure time to first token
|
632
|
-
if first_chunk and request_span is not None:
|
633
|
-
now = get_utc_timestamp_ns()
|
634
|
-
ttft_ns = now - request_start_timestamp_ns
|
635
|
-
request_span.add_event(name="time_to_first_token_ms", attributes={"ttft_ms": ns_to_ms(ttft_ns)})
|
636
|
-
metric_attributes = get_ctx_attributes()
|
637
|
-
metric_attributes["model.name"] = agent_state.llm_config.model
|
638
|
-
MetricRegistry().ttft_ms_histogram.record(ns_to_ms(ttft_ns), metric_attributes)
|
639
|
-
first_chunk = False
|
640
|
-
|
641
|
-
if include_return_message_types is None or chunk.message_type in include_return_message_types:
|
642
|
-
# filter down returned data
|
643
|
-
yield f"data: {chunk.model_dump_json()}\n\n"
|
644
|
-
|
645
|
-
stream_end_time_ns = get_utc_timestamp_ns()
|
646
|
-
|
647
|
-
# update usage
|
648
|
-
usage.step_count += 1
|
649
|
-
usage.completion_tokens += interface.output_tokens
|
650
|
-
usage.prompt_tokens += interface.input_tokens
|
651
|
-
usage.total_tokens += interface.input_tokens + interface.output_tokens
|
652
|
-
MetricRegistry().message_output_tokens.record(
|
653
|
-
interface.output_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
|
654
|
-
)
|
655
|
-
|
656
|
-
# log LLM request time
|
657
|
-
llm_request_ms = ns_to_ms(stream_end_time_ns - provider_request_start_timestamp_ns)
|
658
|
-
agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": llm_request_ms})
|
659
|
-
MetricRegistry().llm_execution_time_ms_histogram.record(
|
660
|
-
llm_request_ms,
|
661
|
-
dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model}),
|
662
|
-
)
|
663
|
-
|
664
|
-
# Process resulting stream content
|
720
|
+
step_progression = StepProgression.START
|
721
|
+
should_continue = False
|
665
722
|
try:
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
prompt_tokens=interface.input_tokens,
|
684
|
-
total_tokens=interface.input_tokens + interface.output_tokens,
|
685
|
-
),
|
686
|
-
reasoning_content=reasoning_content,
|
687
|
-
pre_computed_assistant_message_id=interface.letta_message_id,
|
688
|
-
step_id=step_id,
|
689
|
-
initial_messages=initial_messages,
|
690
|
-
agent_step_span=agent_step_span,
|
691
|
-
is_final_step=(i == max_steps - 1),
|
692
|
-
)
|
693
|
-
new_message_idx = len(initial_messages) if initial_messages else 0
|
694
|
-
self.response_messages.extend(persisted_messages[new_message_idx:])
|
695
|
-
new_in_context_messages.extend(persisted_messages[new_message_idx:])
|
723
|
+
(
|
724
|
+
request_data,
|
725
|
+
stream,
|
726
|
+
current_in_context_messages,
|
727
|
+
new_in_context_messages,
|
728
|
+
valid_tool_names,
|
729
|
+
provider_request_start_timestamp_ns,
|
730
|
+
) = await self._build_and_request_from_llm_streaming(
|
731
|
+
first_chunk,
|
732
|
+
agent_step_span,
|
733
|
+
request_start_timestamp_ns,
|
734
|
+
current_in_context_messages,
|
735
|
+
new_in_context_messages,
|
736
|
+
agent_state,
|
737
|
+
llm_client,
|
738
|
+
tool_rules_solver,
|
739
|
+
)
|
696
740
|
|
697
|
-
|
741
|
+
step_progression = StepProgression.STREAM_RECEIVED
|
742
|
+
log_event("agent.stream.llm_response.received") # [3^]
|
698
743
|
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
744
|
+
# TODO: THIS IS INCREDIBLY UGLY
|
745
|
+
# TODO: THERE ARE MULTIPLE COPIES OF THE LLM_CONFIG EVERYWHERE THAT ARE GETTING MANIPULATED
|
746
|
+
if agent_state.llm_config.model_endpoint_type in [ProviderType.anthropic, ProviderType.bedrock]:
|
747
|
+
interface = AnthropicStreamingInterface(
|
748
|
+
use_assistant_message=use_assistant_message,
|
749
|
+
put_inner_thoughts_in_kwarg=agent_state.llm_config.put_inner_thoughts_in_kwargs,
|
750
|
+
)
|
751
|
+
elif agent_state.llm_config.model_endpoint_type == ProviderType.openai:
|
752
|
+
interface = OpenAIStreamingInterface(
|
753
|
+
use_assistant_message=use_assistant_message,
|
754
|
+
put_inner_thoughts_in_kwarg=agent_state.llm_config.put_inner_thoughts_in_kwargs,
|
755
|
+
)
|
756
|
+
else:
|
757
|
+
raise ValueError(f"Streaming not supported for {agent_state.llm_config}")
|
758
|
+
|
759
|
+
async for chunk in interface.process(
|
760
|
+
stream,
|
761
|
+
ttft_span=request_span,
|
762
|
+
provider_request_start_timestamp_ns=provider_request_start_timestamp_ns,
|
763
|
+
):
|
764
|
+
# Measure time to first token
|
765
|
+
if first_chunk and request_span is not None:
|
766
|
+
now = get_utc_timestamp_ns()
|
767
|
+
ttft_ns = now - request_start_timestamp_ns
|
768
|
+
request_span.add_event(name="time_to_first_token_ms", attributes={"ttft_ms": ns_to_ms(ttft_ns)})
|
769
|
+
metric_attributes = get_ctx_attributes()
|
770
|
+
metric_attributes["model.name"] = agent_state.llm_config.model
|
771
|
+
MetricRegistry().ttft_ms_histogram.record(ns_to_ms(ttft_ns), metric_attributes)
|
772
|
+
first_chunk = False
|
773
|
+
|
774
|
+
if include_return_message_types is None or chunk.message_type in include_return_message_types:
|
775
|
+
# filter down returned data
|
776
|
+
yield f"data: {chunk.model_dump_json()}\n\n"
|
777
|
+
|
778
|
+
stream_end_time_ns = get_utc_timestamp_ns()
|
779
|
+
|
780
|
+
# update usage
|
781
|
+
usage.step_count += 1
|
782
|
+
usage.completion_tokens += interface.output_tokens
|
783
|
+
usage.prompt_tokens += interface.input_tokens
|
784
|
+
usage.total_tokens += interface.input_tokens + interface.output_tokens
|
785
|
+
MetricRegistry().message_output_tokens.record(
|
786
|
+
interface.output_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
|
787
|
+
)
|
704
788
|
|
705
|
-
|
706
|
-
|
789
|
+
# log LLM request time
|
790
|
+
llm_request_ms = ns_to_ms(stream_end_time_ns - provider_request_start_timestamp_ns)
|
791
|
+
agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": llm_request_ms})
|
792
|
+
MetricRegistry().llm_execution_time_ms_histogram.record(
|
793
|
+
llm_request_ms,
|
794
|
+
dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model}),
|
795
|
+
)
|
707
796
|
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
797
|
+
# Process resulting stream content
|
798
|
+
try:
|
799
|
+
tool_call = interface.get_tool_call_object()
|
800
|
+
except ValueError as e:
|
801
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.no_tool_call.value)
|
802
|
+
raise e
|
803
|
+
except Exception as e:
|
804
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.invalid_tool_call.value)
|
805
|
+
raise e
|
806
|
+
reasoning_content = interface.get_reasoning_content()
|
807
|
+
persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
|
808
|
+
tool_call,
|
809
|
+
valid_tool_names,
|
810
|
+
agent_state,
|
811
|
+
tool_rules_solver,
|
812
|
+
UsageStatistics(
|
813
|
+
completion_tokens=interface.output_tokens,
|
814
|
+
prompt_tokens=interface.input_tokens,
|
815
|
+
total_tokens=interface.input_tokens + interface.output_tokens,
|
816
|
+
),
|
817
|
+
reasoning_content=reasoning_content,
|
818
|
+
pre_computed_assistant_message_id=interface.letta_message_id,
|
727
819
|
step_id=step_id,
|
728
|
-
|
729
|
-
|
730
|
-
|
820
|
+
initial_messages=initial_messages,
|
821
|
+
agent_step_span=agent_step_span,
|
822
|
+
is_final_step=(i == max_steps - 1),
|
823
|
+
)
|
824
|
+
step_progression = StepProgression.STEP_LOGGED
|
825
|
+
|
826
|
+
new_message_idx = len(initial_messages) if initial_messages else 0
|
827
|
+
self.response_messages.extend(persisted_messages[new_message_idx:])
|
828
|
+
new_in_context_messages.extend(persisted_messages[new_message_idx:])
|
829
|
+
|
830
|
+
initial_messages = None
|
831
|
+
|
832
|
+
# log total step time
|
833
|
+
now = get_utc_timestamp_ns()
|
834
|
+
step_ns = now - step_start
|
835
|
+
agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
|
836
|
+
agent_step_span.end()
|
837
|
+
|
838
|
+
# TODO (cliandy): the stream POST request span has ended at this point, we should tie this to the stream
|
839
|
+
# log_event("agent.stream.llm_response.processed") # [4^]
|
840
|
+
|
841
|
+
# Log LLM Trace
|
842
|
+
# We are piecing together the streamed response here.
|
843
|
+
# Content here does not match the actual response schema as streams come in chunks.
|
844
|
+
await self.telemetry_manager.create_provider_trace_async(
|
845
|
+
actor=self.actor,
|
846
|
+
provider_trace_create=ProviderTraceCreate(
|
847
|
+
request_json=request_data,
|
848
|
+
response_json={
|
849
|
+
"content": {
|
850
|
+
"tool_call": tool_call.model_dump_json(),
|
851
|
+
"reasoning": [content.model_dump_json() for content in reasoning_content],
|
852
|
+
},
|
853
|
+
"id": interface.message_id,
|
854
|
+
"model": interface.model,
|
855
|
+
"role": "assistant",
|
856
|
+
# "stop_reason": "",
|
857
|
+
# "stop_sequence": None,
|
858
|
+
"type": "message",
|
859
|
+
"usage": {
|
860
|
+
"input_tokens": interface.input_tokens,
|
861
|
+
"output_tokens": interface.output_tokens,
|
862
|
+
},
|
863
|
+
},
|
864
|
+
step_id=step_id,
|
865
|
+
organization_id=self.actor.organization_id,
|
866
|
+
),
|
867
|
+
)
|
868
|
+
step_progression = StepProgression.LOGGED_TRACE
|
731
869
|
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
870
|
+
# yields tool response as this is handled from Letta and not the response from the LLM provider
|
871
|
+
tool_return = [msg for msg in persisted_messages if msg.role == "tool"][-1].to_letta_messages()[0]
|
872
|
+
if not (use_assistant_message and tool_return.name == "send_message"):
|
873
|
+
# Apply message type filtering if specified
|
874
|
+
if include_return_message_types is None or tool_return.message_type in include_return_message_types:
|
875
|
+
yield f"data: {tool_return.model_dump_json()}\n\n"
|
737
876
|
|
738
|
-
|
739
|
-
|
877
|
+
# TODO (cliandy): consolidate and expand with trace
|
878
|
+
MetricRegistry().step_execution_time_ms_histogram.record(get_utc_timestamp_ns() - step_start, get_ctx_attributes())
|
879
|
+
step_progression = StepProgression.FINISHED
|
880
|
+
|
881
|
+
except Exception as e:
|
882
|
+
# Handle any unexpected errors during step processing
|
883
|
+
self.logger.error(f"Error during step processing: {e}")
|
884
|
+
|
885
|
+
# This indicates we failed after we decided to stop stepping, which indicates a bug with our flow.
|
886
|
+
if not stop_reason:
|
887
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
|
888
|
+
elif stop_reason.stop_reason in (StopReasonType.end_turn, StopReasonType.max_steps, StopReasonType.tool_rule):
|
889
|
+
self.logger.error("Error occurred during step processing, with valid stop reason: %s", stop_reason.stop_reason)
|
890
|
+
elif stop_reason.stop_reason not in (StopReasonType.no_tool_call, StopReasonType.invalid_tool_call):
|
891
|
+
raise ValueError(f"Invalid Stop Reason: {stop_reason}")
|
892
|
+
|
893
|
+
# Send error stop reason to client and re-raise with expected response code
|
894
|
+
yield f"data: {stop_reason.model_dump_json()}\n\n", 500
|
895
|
+
raise
|
896
|
+
|
897
|
+
# Update step if it needs to be updated
|
898
|
+
finally:
|
899
|
+
if settings.track_stop_reason:
|
900
|
+
self.logger.info("Running final update. Step Progression: %s", step_progression)
|
901
|
+
try:
|
902
|
+
if step_progression < StepProgression.STEP_LOGGED:
|
903
|
+
await self.step_manager.log_step_async(
|
904
|
+
actor=self.actor,
|
905
|
+
agent_id=agent_state.id,
|
906
|
+
provider_name=agent_state.llm_config.model_endpoint_type,
|
907
|
+
provider_category=agent_state.llm_config.provider_category or "base",
|
908
|
+
model=agent_state.llm_config.model,
|
909
|
+
model_endpoint=agent_state.llm_config.model_endpoint,
|
910
|
+
context_window_limit=agent_state.llm_config.context_window,
|
911
|
+
usage=UsageStatistics(completion_tokens=0, prompt_tokens=0, total_tokens=0),
|
912
|
+
provider_id=None,
|
913
|
+
job_id=self.current_run_id if self.current_run_id else None,
|
914
|
+
step_id=step_id,
|
915
|
+
project_id=agent_state.project_id,
|
916
|
+
stop_reason=stop_reason,
|
917
|
+
)
|
918
|
+
if step_progression <= StepProgression.STREAM_RECEIVED:
|
919
|
+
if first_chunk and settings.track_errored_messages:
|
920
|
+
for message in initial_messages:
|
921
|
+
message.is_err = True
|
922
|
+
message.step_id = step_id
|
923
|
+
await self.message_manager.create_many_messages_async(initial_messages, actor=self.actor)
|
924
|
+
elif step_progression <= StepProgression.LOGGED_TRACE:
|
925
|
+
if stop_reason is None:
|
926
|
+
self.logger.error("Error in step after logging step")
|
927
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
|
928
|
+
await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
|
929
|
+
elif step_progression == StepProgression.FINISHED and not should_continue:
|
930
|
+
if stop_reason is None:
|
931
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
|
932
|
+
await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
|
933
|
+
else:
|
934
|
+
self.logger.error("Invalid StepProgression value")
|
935
|
+
except Exception as e:
|
936
|
+
self.logger.error("Failed to update step: %s", e)
|
740
937
|
|
741
938
|
if not should_continue:
|
742
939
|
break
|
743
|
-
|
744
940
|
# Extend the in context message ids
|
745
941
|
if not agent_state.message_buffer_autoclear:
|
746
942
|
await self._rebuild_context_window(
|
@@ -919,7 +1115,7 @@ class LettaAgent(BaseAgent):
|
|
919
1115
|
in_context_messages=in_context_messages,
|
920
1116
|
new_letta_messages=new_letta_messages,
|
921
1117
|
)
|
922
|
-
await self.agent_manager.
|
1118
|
+
await self.agent_manager.update_message_ids_async(
|
923
1119
|
agent_id=self.agent_id,
|
924
1120
|
message_ids=[m.id for m in new_in_context_messages],
|
925
1121
|
actor=self.actor,
|
@@ -936,7 +1132,7 @@ class LettaAgent(BaseAgent):
|
|
936
1132
|
new_in_context_messages, updated = await self.summarizer.summarize(
|
937
1133
|
in_context_messages=in_context_messages, new_letta_messages=[], force=True
|
938
1134
|
)
|
939
|
-
return await self.agent_manager.
|
1135
|
+
return await self.agent_manager.update_message_ids_async(
|
940
1136
|
agent_id=self.agent_id, message_ids=[m.id for m in new_in_context_messages], actor=self.actor
|
941
1137
|
)
|
942
1138
|
|
@@ -948,18 +1144,17 @@ class LettaAgent(BaseAgent):
|
|
948
1144
|
agent_state: AgentState,
|
949
1145
|
tool_rules_solver: ToolRulesSolver,
|
950
1146
|
) -> tuple[dict, list[str]]:
|
951
|
-
self.num_messages
|
952
|
-
(
|
953
|
-
|
954
|
-
|
955
|
-
|
956
|
-
|
957
|
-
(
|
958
|
-
|
959
|
-
|
960
|
-
|
961
|
-
|
962
|
-
)
|
1147
|
+
if not self.num_messages:
|
1148
|
+
self.num_messages = await self.message_manager.size_async(
|
1149
|
+
agent_id=agent_state.id,
|
1150
|
+
actor=self.actor,
|
1151
|
+
)
|
1152
|
+
if not self.num_archival_memories:
|
1153
|
+
self.num_archival_memories = await self.passage_manager.agent_passage_size_async(
|
1154
|
+
agent_id=agent_state.id,
|
1155
|
+
actor=self.actor,
|
1156
|
+
)
|
1157
|
+
|
963
1158
|
in_context_messages = await self._rebuild_memory_async(
|
964
1159
|
in_context_messages,
|
965
1160
|
agent_state,
|
@@ -1108,6 +1303,7 @@ class LettaAgent(BaseAgent):
|
|
1108
1303
|
job_id=run_id if run_id else self.current_run_id,
|
1109
1304
|
step_id=step_id,
|
1110
1305
|
project_id=agent_state.project_id,
|
1306
|
+
stop_reason=stop_reason,
|
1111
1307
|
)
|
1112
1308
|
|
1113
1309
|
tool_call_messages = create_letta_messages_from_llm_response(
|