letta-nightly 0.13.0.dev20251030104218__py3-none-any.whl → 0.13.1.dev20251031234110__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/__init__.py +1 -1
- letta/adapters/simple_llm_stream_adapter.py +1 -0
- letta/agents/letta_agent_v2.py +8 -0
- letta/agents/letta_agent_v3.py +120 -27
- letta/agents/temporal/activities/__init__.py +25 -0
- letta/agents/temporal/activities/create_messages.py +26 -0
- letta/agents/temporal/activities/create_step.py +57 -0
- letta/agents/temporal/activities/example_activity.py +9 -0
- letta/agents/temporal/activities/execute_tool.py +130 -0
- letta/agents/temporal/activities/llm_request.py +114 -0
- letta/agents/temporal/activities/prepare_messages.py +27 -0
- letta/agents/temporal/activities/refresh_context.py +160 -0
- letta/agents/temporal/activities/summarize_conversation_history.py +77 -0
- letta/agents/temporal/activities/update_message_ids.py +25 -0
- letta/agents/temporal/activities/update_run.py +43 -0
- letta/agents/temporal/constants.py +59 -0
- letta/agents/temporal/temporal_agent_workflow.py +704 -0
- letta/agents/temporal/types.py +275 -0
- letta/constants.py +8 -0
- letta/errors.py +4 -0
- letta/functions/function_sets/base.py +0 -11
- letta/groups/helpers.py +7 -1
- letta/groups/sleeptime_multi_agent_v4.py +4 -3
- letta/interfaces/anthropic_streaming_interface.py +0 -1
- letta/interfaces/openai_streaming_interface.py +103 -100
- letta/llm_api/anthropic_client.py +57 -12
- letta/llm_api/bedrock_client.py +1 -0
- letta/llm_api/deepseek_client.py +3 -2
- letta/llm_api/google_vertex_client.py +1 -0
- letta/llm_api/groq_client.py +1 -0
- letta/llm_api/llm_client_base.py +15 -1
- letta/llm_api/openai.py +2 -2
- letta/llm_api/openai_client.py +17 -3
- letta/llm_api/xai_client.py +1 -0
- letta/orm/organization.py +4 -0
- letta/orm/sqlalchemy_base.py +7 -0
- letta/otel/tracing.py +131 -4
- letta/schemas/agent_file.py +10 -10
- letta/schemas/block.py +22 -3
- letta/schemas/enums.py +21 -0
- letta/schemas/environment_variables.py +3 -2
- letta/schemas/group.py +3 -3
- letta/schemas/letta_response.py +36 -4
- letta/schemas/llm_batch_job.py +3 -3
- letta/schemas/llm_config.py +27 -3
- letta/schemas/mcp.py +3 -2
- letta/schemas/mcp_server.py +3 -2
- letta/schemas/message.py +167 -49
- letta/schemas/organization.py +2 -1
- letta/schemas/passage.py +2 -1
- letta/schemas/provider_trace.py +2 -1
- letta/schemas/providers/openrouter.py +1 -2
- letta/schemas/run_metrics.py +2 -1
- letta/schemas/sandbox_config.py +3 -1
- letta/schemas/step_metrics.py +2 -1
- letta/schemas/tool_rule.py +2 -2
- letta/schemas/user.py +2 -1
- letta/server/rest_api/app.py +5 -1
- letta/server/rest_api/routers/v1/__init__.py +4 -0
- letta/server/rest_api/routers/v1/agents.py +71 -9
- letta/server/rest_api/routers/v1/blocks.py +7 -7
- letta/server/rest_api/routers/v1/groups.py +40 -0
- letta/server/rest_api/routers/v1/identities.py +2 -2
- letta/server/rest_api/routers/v1/internal_agents.py +31 -0
- letta/server/rest_api/routers/v1/internal_blocks.py +177 -0
- letta/server/rest_api/routers/v1/internal_runs.py +25 -1
- letta/server/rest_api/routers/v1/runs.py +2 -22
- letta/server/rest_api/routers/v1/tools.py +10 -0
- letta/server/server.py +5 -2
- letta/services/agent_manager.py +4 -4
- letta/services/archive_manager.py +16 -0
- letta/services/group_manager.py +44 -0
- letta/services/helpers/run_manager_helper.py +2 -2
- letta/services/lettuce/lettuce_client.py +148 -0
- letta/services/mcp/base_client.py +9 -3
- letta/services/run_manager.py +148 -37
- letta/services/source_manager.py +91 -3
- letta/services/step_manager.py +2 -3
- letta/services/streaming_service.py +52 -13
- letta/services/summarizer/summarizer.py +28 -2
- letta/services/tool_executor/builtin_tool_executor.py +1 -1
- letta/services/tool_executor/core_tool_executor.py +2 -117
- letta/services/tool_schema_generator.py +2 -2
- letta/validators.py +21 -0
- {letta_nightly-0.13.0.dev20251030104218.dist-info → letta_nightly-0.13.1.dev20251031234110.dist-info}/METADATA +1 -1
- {letta_nightly-0.13.0.dev20251030104218.dist-info → letta_nightly-0.13.1.dev20251031234110.dist-info}/RECORD +89 -84
- letta/agent.py +0 -1758
- letta/cli/cli_load.py +0 -16
- letta/client/__init__.py +0 -0
- letta/client/streaming.py +0 -95
- letta/client/utils.py +0 -78
- letta/functions/async_composio_toolset.py +0 -109
- letta/functions/composio_helpers.py +0 -96
- letta/helpers/composio_helpers.py +0 -38
- letta/orm/job_messages.py +0 -33
- letta/schemas/providers.py +0 -1617
- letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +0 -132
- letta/services/tool_executor/composio_tool_executor.py +0 -57
- {letta_nightly-0.13.0.dev20251030104218.dist-info → letta_nightly-0.13.1.dev20251031234110.dist-info}/WHEEL +0 -0
- {letta_nightly-0.13.0.dev20251030104218.dist-info → letta_nightly-0.13.1.dev20251031234110.dist-info}/entry_points.txt +0 -0
- {letta_nightly-0.13.0.dev20251030104218.dist-info → letta_nightly-0.13.1.dev20251031234110.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
from temporalio import activity
|
|
2
|
+
from temporalio.exceptions import ApplicationError
|
|
3
|
+
|
|
4
|
+
from letta.adapters.letta_llm_request_adapter import LettaLLMRequestAdapter
|
|
5
|
+
from letta.agents.temporal.types import LLMCallResult, LLMRequestParams
|
|
6
|
+
from letta.errors import (
|
|
7
|
+
ContextWindowExceededError,
|
|
8
|
+
LLMAuthenticationError,
|
|
9
|
+
LLMBadRequestError,
|
|
10
|
+
LLMConnectionError,
|
|
11
|
+
LLMError,
|
|
12
|
+
LLMJSONParsingError,
|
|
13
|
+
LLMNotFoundError,
|
|
14
|
+
LLMPermissionDeniedError,
|
|
15
|
+
LLMRateLimitError,
|
|
16
|
+
LLMServerError,
|
|
17
|
+
LLMTimeoutError,
|
|
18
|
+
LLMUnprocessableEntityError,
|
|
19
|
+
)
|
|
20
|
+
from letta.helpers.datetime_helpers import get_utc_timestamp_ns
|
|
21
|
+
from letta.llm_api.llm_client import LLMClient
|
|
22
|
+
from letta.schemas.openai.chat_completion_response import UsageStatistics
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@activity.defn(name="llm_request")
|
|
26
|
+
async def llm_request(params: LLMRequestParams) -> LLMCallResult:
|
|
27
|
+
"""
|
|
28
|
+
Build and execute a non-streaming LLM request and return parsed tool call. Errors from the provider are intentionally propagated so the workflow can handle retries (e.g., summarization on ContextWindowExceededError).
|
|
29
|
+
"""
|
|
30
|
+
agent_state = params.agent_state
|
|
31
|
+
llm_config = agent_state.llm_config
|
|
32
|
+
|
|
33
|
+
llm_client = LLMClient.create(
|
|
34
|
+
provider_type=llm_config.model_endpoint_type,
|
|
35
|
+
put_inner_thoughts_first=True,
|
|
36
|
+
actor=params.actor,
|
|
37
|
+
)
|
|
38
|
+
llm_adapter = LettaLLMRequestAdapter(llm_client=llm_client, llm_config=llm_config)
|
|
39
|
+
|
|
40
|
+
request_data = llm_client.build_request_data(
|
|
41
|
+
agent_type=agent_state.agent_type,
|
|
42
|
+
messages=params.messages,
|
|
43
|
+
llm_config=llm_config,
|
|
44
|
+
tools=params.allowed_tools,
|
|
45
|
+
force_tool_call=params.force_tool_call,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
try:
|
|
49
|
+
# Track LLM request timing
|
|
50
|
+
llm_request_start_ns = get_utc_timestamp_ns()
|
|
51
|
+
|
|
52
|
+
# execute the llm request
|
|
53
|
+
invocation = llm_adapter.invoke_llm(
|
|
54
|
+
request_data=request_data,
|
|
55
|
+
messages=params.messages,
|
|
56
|
+
tools=params.allowed_tools,
|
|
57
|
+
use_assistant_message=params.use_assistant_message,
|
|
58
|
+
requires_approval_tools=params.requires_approval_tools or [],
|
|
59
|
+
step_id=params.step_id,
|
|
60
|
+
actor=params.actor,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# iterate through the async generator (non-streaming mode yields once with None)
|
|
64
|
+
async for _ in invocation:
|
|
65
|
+
pass
|
|
66
|
+
|
|
67
|
+
# Calculate LLM request duration
|
|
68
|
+
llm_request_end_ns = get_utc_timestamp_ns()
|
|
69
|
+
llm_request_ns = llm_request_end_ns - llm_request_start_ns
|
|
70
|
+
|
|
71
|
+
# extract results from the adapter after invocation completes
|
|
72
|
+
usage = llm_adapter.chat_completions_response.usage if llm_adapter.chat_completions_response else UsageStatistics()
|
|
73
|
+
return LLMCallResult(
|
|
74
|
+
tool_call=llm_adapter.tool_call,
|
|
75
|
+
reasoning_content=llm_adapter.reasoning_content,
|
|
76
|
+
assistant_message_id=llm_adapter.message_id,
|
|
77
|
+
usage=usage,
|
|
78
|
+
request_finish_ns=llm_adapter.llm_request_finish_timestamp_ns,
|
|
79
|
+
llm_request_start_ns=llm_request_start_ns,
|
|
80
|
+
llm_request_ns=llm_request_ns,
|
|
81
|
+
)
|
|
82
|
+
except (ValueError, LLMJSONParsingError) as e:
|
|
83
|
+
# Invalid or unparseable LLM response — non-retryable at activity layer
|
|
84
|
+
raise ApplicationError(str(e), type=type(e).__name__, non_retryable=True)
|
|
85
|
+
except ContextWindowExceededError as e:
|
|
86
|
+
# Context window overflow — non-retryable at activity layer; handled by workflow summarization
|
|
87
|
+
raise ApplicationError(str(e), type="ContextWindowExceededError", non_retryable=True)
|
|
88
|
+
except LLMError as e:
|
|
89
|
+
retryable_subtypes = (
|
|
90
|
+
LLMConnectionError,
|
|
91
|
+
LLMRateLimitError,
|
|
92
|
+
LLMServerError,
|
|
93
|
+
LLMTimeoutError,
|
|
94
|
+
)
|
|
95
|
+
non_retryable_subtypes = (
|
|
96
|
+
LLMBadRequestError,
|
|
97
|
+
LLMAuthenticationError,
|
|
98
|
+
LLMPermissionDeniedError,
|
|
99
|
+
LLMNotFoundError,
|
|
100
|
+
LLMUnprocessableEntityError,
|
|
101
|
+
)
|
|
102
|
+
if isinstance(e, retryable_subtypes):
|
|
103
|
+
non_retryable = False
|
|
104
|
+
elif isinstance(e, non_retryable_subtypes):
|
|
105
|
+
non_retryable = True
|
|
106
|
+
else:
|
|
107
|
+
# Default conservatively: do not retry unknown LLMError types
|
|
108
|
+
non_retryable = True
|
|
109
|
+
raise ApplicationError(str(e), type=type(e).__name__, non_retryable=non_retryable)
|
|
110
|
+
except TimeoutError as e:
|
|
111
|
+
raise ApplicationError(str(e), type=type(e).__name__, non_retryable=False)
|
|
112
|
+
except Exception as e:
|
|
113
|
+
# Any unexpected error — do not retry at activity layer
|
|
114
|
+
raise ApplicationError(str(e), type=type(e).__name__, non_retryable=True)
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from temporalio import activity
|
|
2
|
+
|
|
3
|
+
from letta.agents.helpers import _prepare_in_context_messages_no_persist_async
|
|
4
|
+
from letta.agents.temporal.types import PreparedMessages, WorkflowInputParams
|
|
5
|
+
from letta.services.message_manager import MessageManager
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@activity.defn(name="prepare_messages")
|
|
9
|
+
async def prepare_messages(input_: WorkflowInputParams) -> PreparedMessages:
|
|
10
|
+
"""Prepare in-context and new input messages without persisting.
|
|
11
|
+
|
|
12
|
+
Mirrors `_prepare_in_context_messages_no_persist_async` from the v2 agent, but
|
|
13
|
+
runs as a Temporal activity so the workflow stays deterministic.
|
|
14
|
+
"""
|
|
15
|
+
message_manager = MessageManager()
|
|
16
|
+
in_context_messages, input_messages_to_persist = await _prepare_in_context_messages_no_persist_async(
|
|
17
|
+
input_messages=input_.messages,
|
|
18
|
+
agent_state=input_.agent_state,
|
|
19
|
+
message_manager=message_manager,
|
|
20
|
+
actor=input_.actor,
|
|
21
|
+
run_id=input_.run_id,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
return PreparedMessages(
|
|
25
|
+
in_context_messages=in_context_messages,
|
|
26
|
+
input_messages_to_persist=input_messages_to_persist,
|
|
27
|
+
)
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from temporalio import activity
|
|
4
|
+
|
|
5
|
+
from letta.agents.temporal.types import RefreshContextParams, RefreshContextResult
|
|
6
|
+
from letta.helpers import ToolRulesSolver
|
|
7
|
+
from letta.helpers.datetime_helpers import get_utc_time
|
|
8
|
+
from letta.helpers.reasoning_helper import scrub_inner_thoughts_from_messages
|
|
9
|
+
from letta.prompts.prompt_generator import PromptGenerator
|
|
10
|
+
from letta.schemas.agent import AgentState
|
|
11
|
+
from letta.schemas.message import Message, MessageUpdate
|
|
12
|
+
from letta.schemas.user import User
|
|
13
|
+
from letta.services.agent_manager import AgentManager
|
|
14
|
+
from letta.services.archive_manager import ArchiveManager
|
|
15
|
+
from letta.services.message_manager import MessageManager
|
|
16
|
+
from letta.services.passage_manager import PassageManager
|
|
17
|
+
from letta.utils import united_diff
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _extract_dynamic_section(text: str) -> str:
|
|
21
|
+
start_marker = "</base_instructions>"
|
|
22
|
+
end_marker = "<memory_metadata>"
|
|
23
|
+
start_idx = text.find(start_marker)
|
|
24
|
+
end_idx = text.find(end_marker)
|
|
25
|
+
if start_idx != -1 and end_idx != -1:
|
|
26
|
+
return text[start_marker.__len__() + start_idx : end_idx]
|
|
27
|
+
return text
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
async def _rebuild_memory(
|
|
31
|
+
agent_state: AgentState,
|
|
32
|
+
in_context_messages: list[Message],
|
|
33
|
+
num_messages: int,
|
|
34
|
+
num_archival_memories: int,
|
|
35
|
+
actor: User,
|
|
36
|
+
message_manager: MessageManager,
|
|
37
|
+
passage_manager: PassageManager,
|
|
38
|
+
agent_manager: AgentManager,
|
|
39
|
+
archive_manager: ArchiveManager,
|
|
40
|
+
tool_rules_solver: ToolRulesSolver,
|
|
41
|
+
) -> tuple[list[Message], AgentState]:
|
|
42
|
+
agent_state = await agent_manager.refresh_memory_async(agent_state=agent_state, actor=actor)
|
|
43
|
+
|
|
44
|
+
tool_constraint_block = None
|
|
45
|
+
if tool_rules_solver is not None:
|
|
46
|
+
tool_constraint_block = tool_rules_solver.compile_tool_rule_prompts()
|
|
47
|
+
|
|
48
|
+
archive = await archive_manager.get_default_archive_for_agent_async(
|
|
49
|
+
agent_id=agent_state.id,
|
|
50
|
+
actor=actor,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
if archive:
|
|
54
|
+
archive_tags = await passage_manager.get_unique_tags_for_archive_async(
|
|
55
|
+
archive_id=archive.id,
|
|
56
|
+
actor=actor,
|
|
57
|
+
)
|
|
58
|
+
else:
|
|
59
|
+
archive_tags = None
|
|
60
|
+
|
|
61
|
+
# TODO: This is a pretty brittle pattern established all over our code, need to get rid of this
|
|
62
|
+
curr_system_message = in_context_messages[0]
|
|
63
|
+
curr_system_message_text = curr_system_message.content[0].text
|
|
64
|
+
|
|
65
|
+
# extract the dynamic section that includes memory blocks, tool rules, and directories
|
|
66
|
+
# this avoids timestamp comparison issues
|
|
67
|
+
def extract_dynamic_section(text):
|
|
68
|
+
start_marker = "</base_instructions>"
|
|
69
|
+
end_marker = "<memory_metadata>"
|
|
70
|
+
|
|
71
|
+
start_idx = text.find(start_marker)
|
|
72
|
+
end_idx = text.find(end_marker)
|
|
73
|
+
|
|
74
|
+
if start_idx != -1 and end_idx != -1:
|
|
75
|
+
return text[start_idx:end_idx]
|
|
76
|
+
return text # fallback to full text if markers not found
|
|
77
|
+
|
|
78
|
+
curr_dynamic_section = extract_dynamic_section(curr_system_message_text)
|
|
79
|
+
|
|
80
|
+
# generate just the memory string with current state for comparison
|
|
81
|
+
curr_memory_str = agent_state.memory.compile(
|
|
82
|
+
tool_usage_rules=tool_constraint_block,
|
|
83
|
+
sources=agent_state.sources,
|
|
84
|
+
max_files_open=agent_state.max_files_open,
|
|
85
|
+
llm_config=agent_state.llm_config,
|
|
86
|
+
)
|
|
87
|
+
new_dynamic_section = extract_dynamic_section(curr_memory_str)
|
|
88
|
+
|
|
89
|
+
# compare just the dynamic sections (memory blocks, tool rules, directories)
|
|
90
|
+
if curr_dynamic_section == new_dynamic_section:
|
|
91
|
+
return in_context_messages, agent_state
|
|
92
|
+
|
|
93
|
+
memory_edit_timestamp = get_utc_time()
|
|
94
|
+
|
|
95
|
+
# size of messages and archival memories
|
|
96
|
+
if num_messages is None:
|
|
97
|
+
num_messages = await message_manager.size_async(actor=actor, agent_id=agent_state.id)
|
|
98
|
+
if num_archival_memories is None:
|
|
99
|
+
num_archival_memories = await passage_manager.agent_passage_size_async(actor=actor, agent_id=agent_state.id)
|
|
100
|
+
|
|
101
|
+
new_system_message_str = PromptGenerator.get_system_message_from_compiled_memory(
|
|
102
|
+
system_prompt=agent_state.system,
|
|
103
|
+
memory_with_sources=curr_memory_str,
|
|
104
|
+
in_context_memory_last_edit=memory_edit_timestamp,
|
|
105
|
+
timezone=agent_state.timezone,
|
|
106
|
+
previous_message_count=num_messages - len(in_context_messages),
|
|
107
|
+
archival_memory_size=num_archival_memories,
|
|
108
|
+
archive_tags=archive_tags,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
diff = united_diff(curr_system_message_text, new_system_message_str)
|
|
112
|
+
if len(diff) > 0:
|
|
113
|
+
# [DB Call] Update Messages
|
|
114
|
+
# NOTE: So this is the only write in the activity
|
|
115
|
+
# I think this fine, since it's okay to rewrite the system message, it's idempotent afaik
|
|
116
|
+
new_system_message = await message_manager.update_message_by_id_async(
|
|
117
|
+
curr_system_message.id, message_update=MessageUpdate(content=new_system_message_str), actor=actor
|
|
118
|
+
)
|
|
119
|
+
return [new_system_message] + in_context_messages[1:], agent_state
|
|
120
|
+
|
|
121
|
+
else:
|
|
122
|
+
return in_context_messages, agent_state
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
@activity.defn(name="refresh_context_and_system_message")
|
|
126
|
+
async def refresh_context_and_system_message(params: RefreshContextParams) -> RefreshContextResult:
|
|
127
|
+
agent_state = params.agent_state
|
|
128
|
+
in_context_messages = list(params.in_context_messages)
|
|
129
|
+
tool_rules_solver = params.tool_rules_solver
|
|
130
|
+
actor = params.actor
|
|
131
|
+
|
|
132
|
+
message_manager = MessageManager()
|
|
133
|
+
passage_manager = PassageManager()
|
|
134
|
+
agent_manager = AgentManager()
|
|
135
|
+
archive_manager = ArchiveManager()
|
|
136
|
+
|
|
137
|
+
"""Mirror LettaAgentV2._refresh_messages + _rebuild_memory as an activity."""
|
|
138
|
+
num_messages = await message_manager.size_async(
|
|
139
|
+
agent_id=agent_state.id,
|
|
140
|
+
actor=actor,
|
|
141
|
+
)
|
|
142
|
+
num_archival_memories = await passage_manager.agent_passage_size_async(
|
|
143
|
+
agent_id=agent_state.id,
|
|
144
|
+
actor=actor,
|
|
145
|
+
)
|
|
146
|
+
in_context_messages, agent_state = await _rebuild_memory(
|
|
147
|
+
agent_state=agent_state,
|
|
148
|
+
in_context_messages=in_context_messages,
|
|
149
|
+
num_messages=num_messages,
|
|
150
|
+
num_archival_memories=num_archival_memories,
|
|
151
|
+
actor=actor,
|
|
152
|
+
message_manager=message_manager,
|
|
153
|
+
passage_manager=passage_manager,
|
|
154
|
+
agent_manager=agent_manager,
|
|
155
|
+
archive_manager=archive_manager,
|
|
156
|
+
tool_rules_solver=tool_rules_solver,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
in_context_messages = scrub_inner_thoughts_from_messages(in_context_messages, agent_state.llm_config)
|
|
160
|
+
return RefreshContextResult(messages=in_context_messages, agent_state=agent_state)
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from temporalio import activity
|
|
4
|
+
|
|
5
|
+
from letta.agents.temporal.types import SummarizeParams
|
|
6
|
+
from letta.schemas.agent import AgentType
|
|
7
|
+
from letta.schemas.message import Message
|
|
8
|
+
from letta.services.agent_manager import AgentManager
|
|
9
|
+
from letta.services.message_manager import MessageManager
|
|
10
|
+
from letta.services.summarizer.enums import SummarizationMode
|
|
11
|
+
from letta.services.summarizer.summarizer import Summarizer
|
|
12
|
+
from letta.settings import summarizer_settings
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@activity.defn(name="summarize_conversation_history")
|
|
16
|
+
async def summarize_conversation_history(params: SummarizeParams) -> List[Message]:
|
|
17
|
+
"""Summarize/evict history to fit context window and update agent message ids.
|
|
18
|
+
|
|
19
|
+
This activity mirrors LettaAgentV2.summarize_conversation_history:
|
|
20
|
+
- If force or tokens exceed window, call Summarizer.summarize(..., force=True, clear=True)
|
|
21
|
+
- Else call Summarizer.summarize(...) without force to perform partial evictions as needed
|
|
22
|
+
- Update AgentManager.update_message_ids_async with new in-context message IDs
|
|
23
|
+
- Return the updated in_context_messages
|
|
24
|
+
|
|
25
|
+
Notes:
|
|
26
|
+
- This activity performs DB updates and should remain an activity for determinism.
|
|
27
|
+
- Summarizer instance is created/configured inside the activity using agent_state and managers.
|
|
28
|
+
"""
|
|
29
|
+
# instantiate managers
|
|
30
|
+
agent_manager = AgentManager()
|
|
31
|
+
message_manager = MessageManager()
|
|
32
|
+
|
|
33
|
+
# determine summarization mode based on agent type
|
|
34
|
+
mode = (
|
|
35
|
+
SummarizationMode.STATIC_MESSAGE_BUFFER
|
|
36
|
+
if params.agent_state.agent_type == AgentType.voice_convo_agent
|
|
37
|
+
else summarizer_settings.mode
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
# create summarizer instance with configuration from settings
|
|
41
|
+
summarizer = Summarizer(
|
|
42
|
+
mode=mode,
|
|
43
|
+
summarizer_agent=None, # temporal doesn't use summarization agents yet
|
|
44
|
+
message_buffer_limit=summarizer_settings.message_buffer_limit,
|
|
45
|
+
message_buffer_min=summarizer_settings.message_buffer_min,
|
|
46
|
+
partial_evict_summarizer_percentage=summarizer_settings.partial_evict_summarizer_percentage,
|
|
47
|
+
agent_manager=agent_manager,
|
|
48
|
+
message_manager=message_manager,
|
|
49
|
+
actor=params.actor,
|
|
50
|
+
agent_id=params.agent_state.id,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# perform summarization
|
|
54
|
+
if params.force:
|
|
55
|
+
# force summarization with clear flag when context window exceeded
|
|
56
|
+
new_in_context_messages, updated = await summarizer.summarize(
|
|
57
|
+
in_context_messages=params.in_context_messages,
|
|
58
|
+
new_letta_messages=params.new_letta_messages,
|
|
59
|
+
force=True,
|
|
60
|
+
clear=True,
|
|
61
|
+
)
|
|
62
|
+
else:
|
|
63
|
+
# regular summarization without force
|
|
64
|
+
new_in_context_messages, updated = await summarizer.summarize(
|
|
65
|
+
in_context_messages=params.in_context_messages,
|
|
66
|
+
new_letta_messages=params.new_letta_messages,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# update agent message ids in database
|
|
70
|
+
message_ids = [m.id for m in new_in_context_messages]
|
|
71
|
+
await agent_manager.update_message_ids_async(
|
|
72
|
+
agent_id=params.agent_state.id,
|
|
73
|
+
message_ids=message_ids,
|
|
74
|
+
actor=params.actor,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
return new_in_context_messages
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from temporalio import activity
|
|
2
|
+
|
|
3
|
+
from letta.agents.temporal.types import UpdateMessageIdsParams, UpdateMessageIdsResult
|
|
4
|
+
from letta.services.agent_manager import AgentManager
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@activity.defn(name="update_message_ids")
|
|
8
|
+
async def update_message_ids(params: UpdateMessageIdsParams) -> UpdateMessageIdsResult:
|
|
9
|
+
"""Update agent's message IDs in the database."""
|
|
10
|
+
agent_manager = AgentManager()
|
|
11
|
+
|
|
12
|
+
# update message ids in database
|
|
13
|
+
await agent_manager.update_message_ids_async(
|
|
14
|
+
agent_id=params.agent_id,
|
|
15
|
+
message_ids=params.message_ids,
|
|
16
|
+
actor=params.actor,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
# get the updated agent state
|
|
20
|
+
updated_agent_state = await agent_manager.get_agent_async(
|
|
21
|
+
agent_id=params.agent_id,
|
|
22
|
+
actor=params.actor,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
return UpdateMessageIdsResult(success=True, agent_state=updated_agent_state, persisted_messages=[])
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from temporalio import activity
|
|
2
|
+
|
|
3
|
+
from letta.agents.temporal.types import UpdateRunParams
|
|
4
|
+
from letta.schemas.letta_response import LettaResponse
|
|
5
|
+
from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
|
|
6
|
+
from letta.schemas.message import Message
|
|
7
|
+
from letta.schemas.run import RunUpdate
|
|
8
|
+
from letta.services.run_manager import RunManager
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@activity.defn(name="update_run")
|
|
12
|
+
async def update_run(params: UpdateRunParams) -> None:
|
|
13
|
+
"""
|
|
14
|
+
Update run status and add messages to run.
|
|
15
|
+
"""
|
|
16
|
+
run_manager = RunManager()
|
|
17
|
+
|
|
18
|
+
if params.stop_reason is None:
|
|
19
|
+
params.stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
|
|
20
|
+
messages = Message.to_letta_messages_from_list(params.persisted_messages, use_assistant_message=True, reverse=False)
|
|
21
|
+
result = LettaResponse(messages=messages, stop_reason=params.stop_reason, usage=params.usage)
|
|
22
|
+
|
|
23
|
+
# Update run status
|
|
24
|
+
update = RunUpdate(
|
|
25
|
+
status=params.run_status,
|
|
26
|
+
stop_reason=params.stop_reason.stop_reason if params.stop_reason else None,
|
|
27
|
+
metadata_={"result": result.model_dump(mode="json")},
|
|
28
|
+
total_duration_ns=params.total_duration_ns,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
await run_manager.update_run_by_id_async(
|
|
32
|
+
run_id=params.run_id,
|
|
33
|
+
update=update,
|
|
34
|
+
actor=params.actor,
|
|
35
|
+
refresh_result_messages=False, # TODO: Temporary field while there's discrepancies between Temporal / rest of message invocation endpoints
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
# TODO: we shouldn't have a try / catch here and fix idempotency thoroughly, fixing to enable re-running jobs
|
|
39
|
+
# Note: RunManager doesn't have an add_messages method
|
|
40
|
+
# Messages are typically associated with the run through steps
|
|
41
|
+
# This functionality may need to be handled differently
|
|
42
|
+
|
|
43
|
+
return
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
from datetime import timedelta
|
|
2
|
+
|
|
3
|
+
from temporalio.common import RetryPolicy
|
|
4
|
+
|
|
5
|
+
# prepare_messages (reads context, builds input messages)
|
|
6
|
+
PREPARE_MESSAGES_ACTIVITY_START_TO_CLOSE_TIMEOUT = timedelta(seconds=30)
|
|
7
|
+
PREPARE_MESSAGES_ACTIVITY_SCHEDULE_TO_CLOSE_TIMEOUT = timedelta(minutes=2)
|
|
8
|
+
|
|
9
|
+
# refresh_context_and_system_message (rebuilds memory/system prompt, scrubs)
|
|
10
|
+
REFRESH_CONTEXT_ACTIVITY_START_TO_CLOSE_TIMEOUT = timedelta(seconds=180)
|
|
11
|
+
REFRESH_CONTEXT_ACTIVITY_SCHEDULE_TO_CLOSE_TIMEOUT = timedelta(minutes=5)
|
|
12
|
+
|
|
13
|
+
# llm_request (provider call; can be retried with summarization)
|
|
14
|
+
LLM_ACTIVITY_START_TO_CLOSE_TIMEOUT = timedelta(seconds=300)
|
|
15
|
+
LLM_ACTIVITY_SCHEDULE_TO_CLOSE_TIMEOUT = timedelta(minutes=30)
|
|
16
|
+
|
|
17
|
+
# Temporal-native retry policy for LLM activity calls.
|
|
18
|
+
# - Retries transient LLM* errors with exponential backoff
|
|
19
|
+
# - Avoids auto-retry on context window issues (handled in workflow via summarization)
|
|
20
|
+
# - Avoids auto-retry on invalid/unsuccessful response parsing
|
|
21
|
+
LLM_ACTIVITY_RETRY_POLICY = RetryPolicy(
|
|
22
|
+
initial_interval=timedelta(seconds=1),
|
|
23
|
+
backoff_coefficient=2.0,
|
|
24
|
+
maximum_interval=timedelta(seconds=30),
|
|
25
|
+
maximum_attempts=5,
|
|
26
|
+
non_retryable_error_types=[
|
|
27
|
+
# Handled explicitly in workflow to alter inputs then re-call
|
|
28
|
+
"ContextWindowExceededError",
|
|
29
|
+
# Treat parsing/invalid response as non-retryable at activity layer
|
|
30
|
+
"ValueError",
|
|
31
|
+
"LLMJSONParsingError",
|
|
32
|
+
# Non-retryable LLM API errors
|
|
33
|
+
"LLMBadRequestError",
|
|
34
|
+
"LLMAuthenticationError",
|
|
35
|
+
"LLMPermissionDeniedError",
|
|
36
|
+
"LLMNotFoundError",
|
|
37
|
+
"LLMUnprocessableEntityError",
|
|
38
|
+
],
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
# summarize_conversation_history (evicts history, updates message IDs)
|
|
42
|
+
SUMMARIZE_ACTIVITY_START_TO_CLOSE_TIMEOUT = timedelta(seconds=300)
|
|
43
|
+
SUMMARIZE_ACTIVITY_SCHEDULE_TO_CLOSE_TIMEOUT = timedelta(minutes=10)
|
|
44
|
+
|
|
45
|
+
# tool execution (used later during _handle_ai_response)
|
|
46
|
+
TOOL_EXECUTION_ACTIVITY_START_TO_CLOSE_TIMEOUT = timedelta(seconds=600)
|
|
47
|
+
TOOL_EXECUTION_ACTIVITY_SCHEDULE_TO_CLOSE_TIMEOUT = timedelta(minutes=30)
|
|
48
|
+
|
|
49
|
+
# create_step (saves step to agent state)
|
|
50
|
+
CREATE_STEP_ACTIVITY_START_TO_CLOSE_TIMEOUT = timedelta(seconds=60)
|
|
51
|
+
CREATE_STEP_ACTIVITY_SCHEDULE_TO_CLOSE_TIMEOUT = timedelta(minutes=30)
|
|
52
|
+
|
|
53
|
+
# create_messages (saves messages to agent state)
|
|
54
|
+
CREATE_MESSAGES_ACTIVITY_START_TO_CLOSE_TIMEOUT = timedelta(seconds=60)
|
|
55
|
+
CREATE_MESSAGES_ACTIVITY_SCHEDULE_TO_CLOSE_TIMEOUT = timedelta(minutes=30)
|
|
56
|
+
|
|
57
|
+
# update run metadata (saves status to run)
|
|
58
|
+
UPDATE_RUN_ACTIVITY_START_TO_CLOSE_TIMEOUT = timedelta(seconds=60)
|
|
59
|
+
UPDATE_RUN_ACTIVITY_SCHEDULE_TO_CLOSE_TIMEOUT = timedelta(minutes=30)
|