letta-nightly 0.7.20.dev20250520104253__py3-none-any.whl → 0.7.21.dev20250521233415__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agent.py +290 -3
- letta/agents/base_agent.py +0 -55
- letta/agents/helpers.py +5 -0
- letta/agents/letta_agent.py +314 -64
- letta/agents/letta_agent_batch.py +102 -55
- letta/agents/voice_agent.py +5 -5
- letta/client/client.py +9 -18
- letta/constants.py +55 -1
- letta/functions/function_sets/builtin.py +27 -0
- letta/functions/mcp_client/stdio_client.py +1 -1
- letta/groups/sleeptime_multi_agent_v2.py +1 -1
- letta/interfaces/anthropic_streaming_interface.py +10 -1
- letta/interfaces/openai_streaming_interface.py +9 -2
- letta/llm_api/anthropic.py +21 -2
- letta/llm_api/anthropic_client.py +33 -6
- letta/llm_api/google_ai_client.py +136 -423
- letta/llm_api/google_vertex_client.py +173 -22
- letta/llm_api/llm_api_tools.py +27 -0
- letta/llm_api/llm_client.py +1 -1
- letta/llm_api/llm_client_base.py +32 -21
- letta/llm_api/openai.py +57 -0
- letta/llm_api/openai_client.py +7 -11
- letta/memory.py +0 -1
- letta/orm/__init__.py +1 -0
- letta/orm/enums.py +1 -0
- letta/orm/provider_trace.py +26 -0
- letta/orm/step.py +1 -0
- letta/schemas/provider_trace.py +43 -0
- letta/schemas/providers.py +210 -65
- letta/schemas/step.py +1 -0
- letta/schemas/tool.py +4 -0
- letta/server/db.py +37 -19
- letta/server/rest_api/routers/v1/__init__.py +2 -0
- letta/server/rest_api/routers/v1/agents.py +57 -34
- letta/server/rest_api/routers/v1/blocks.py +3 -3
- letta/server/rest_api/routers/v1/identities.py +24 -26
- letta/server/rest_api/routers/v1/jobs.py +3 -3
- letta/server/rest_api/routers/v1/llms.py +13 -8
- letta/server/rest_api/routers/v1/sandbox_configs.py +6 -6
- letta/server/rest_api/routers/v1/tags.py +3 -3
- letta/server/rest_api/routers/v1/telemetry.py +18 -0
- letta/server/rest_api/routers/v1/tools.py +6 -6
- letta/server/rest_api/streaming_response.py +105 -0
- letta/server/rest_api/utils.py +4 -0
- letta/server/server.py +140 -0
- letta/services/agent_manager.py +251 -18
- letta/services/block_manager.py +52 -37
- letta/services/helpers/noop_helper.py +10 -0
- letta/services/identity_manager.py +43 -38
- letta/services/job_manager.py +29 -0
- letta/services/message_manager.py +111 -0
- letta/services/sandbox_config_manager.py +36 -0
- letta/services/step_manager.py +146 -0
- letta/services/telemetry_manager.py +58 -0
- letta/services/tool_executor/tool_execution_manager.py +49 -5
- letta/services/tool_executor/tool_execution_sandbox.py +47 -0
- letta/services/tool_executor/tool_executor.py +236 -7
- letta/services/tool_manager.py +160 -1
- letta/services/tool_sandbox/e2b_sandbox.py +65 -3
- letta/settings.py +10 -2
- letta/tracing.py +5 -5
- {letta_nightly-0.7.20.dev20250520104253.dist-info → letta_nightly-0.7.21.dev20250521233415.dist-info}/METADATA +3 -2
- {letta_nightly-0.7.20.dev20250520104253.dist-info → letta_nightly-0.7.21.dev20250521233415.dist-info}/RECORD +67 -60
- {letta_nightly-0.7.20.dev20250520104253.dist-info → letta_nightly-0.7.21.dev20250521233415.dist-info}/LICENSE +0 -0
- {letta_nightly-0.7.20.dev20250520104253.dist-info → letta_nightly-0.7.21.dev20250521233415.dist-info}/WHEEL +0 -0
- {letta_nightly-0.7.20.dev20250520104253.dist-info → letta_nightly-0.7.21.dev20250521233415.dist-info}/entry_points.txt +0 -0
letta/__init__.py
CHANGED
letta/agent.py
CHANGED
@@ -1,4 +1,6 @@
|
|
1
|
+
import asyncio
|
1
2
|
import json
|
3
|
+
import os
|
2
4
|
import time
|
3
5
|
import traceback
|
4
6
|
import warnings
|
@@ -7,6 +9,7 @@ from typing import Dict, List, Optional, Tuple, Union
|
|
7
9
|
|
8
10
|
from openai.types.beta.function_tool import FunctionTool as OpenAITool
|
9
11
|
|
12
|
+
from letta.agents.helpers import generate_step_id
|
10
13
|
from letta.constants import (
|
11
14
|
CLI_WARNING_PREFIX,
|
12
15
|
COMPOSIO_ENTITY_ENV_VAR_KEY,
|
@@ -16,6 +19,7 @@ from letta.constants import (
|
|
16
19
|
LETTA_CORE_TOOL_MODULE_NAME,
|
17
20
|
LETTA_MULTI_AGENT_TOOL_MODULE_NAME,
|
18
21
|
LLM_MAX_TOKENS,
|
22
|
+
READ_ONLY_BLOCK_EDIT_ERROR,
|
19
23
|
REQ_HEARTBEAT_MESSAGE,
|
20
24
|
SEND_MESSAGE_TOOL_NAME,
|
21
25
|
)
|
@@ -41,7 +45,7 @@ from letta.orm.enums import ToolType
|
|
41
45
|
from letta.schemas.agent import AgentState, AgentStepResponse, UpdateAgent, get_prompt_template_for_agent_type
|
42
46
|
from letta.schemas.block import BlockUpdate
|
43
47
|
from letta.schemas.embedding_config import EmbeddingConfig
|
44
|
-
from letta.schemas.enums import MessageRole
|
48
|
+
from letta.schemas.enums import MessageRole, ProviderType
|
45
49
|
from letta.schemas.letta_message_content import TextContent
|
46
50
|
from letta.schemas.memory import ContextWindowOverview, Memory
|
47
51
|
from letta.schemas.message import Message, MessageCreate, ToolReturn
|
@@ -61,9 +65,10 @@ from letta.services.message_manager import MessageManager
|
|
61
65
|
from letta.services.passage_manager import PassageManager
|
62
66
|
from letta.services.provider_manager import ProviderManager
|
63
67
|
from letta.services.step_manager import StepManager
|
68
|
+
from letta.services.telemetry_manager import NoopTelemetryManager, TelemetryManager
|
64
69
|
from letta.services.tool_executor.tool_execution_sandbox import ToolExecutionSandbox
|
65
70
|
from letta.services.tool_manager import ToolManager
|
66
|
-
from letta.settings import summarizer_settings
|
71
|
+
from letta.settings import settings, summarizer_settings
|
67
72
|
from letta.streaming_interface import StreamingRefreshCLIInterface
|
68
73
|
from letta.system import get_heartbeat, get_token_limit_warning, package_function_response, package_summarize_message, package_user_message
|
69
74
|
from letta.tracing import log_event, trace_method
|
@@ -141,6 +146,7 @@ class Agent(BaseAgent):
|
|
141
146
|
self.agent_manager = AgentManager()
|
142
147
|
self.job_manager = JobManager()
|
143
148
|
self.step_manager = StepManager()
|
149
|
+
self.telemetry_manager = TelemetryManager() if settings.llm_api_logging else NoopTelemetryManager()
|
144
150
|
|
145
151
|
# State needed for heartbeat pausing
|
146
152
|
|
@@ -298,6 +304,7 @@ class Agent(BaseAgent):
|
|
298
304
|
step_count: Optional[int] = None,
|
299
305
|
last_function_failed: bool = False,
|
300
306
|
put_inner_thoughts_first: bool = True,
|
307
|
+
step_id: Optional[str] = None,
|
301
308
|
) -> ChatCompletionResponse | None:
|
302
309
|
"""Get response from LLM API with robust retry mechanism."""
|
303
310
|
log_telemetry(self.logger, "_get_ai_reply start")
|
@@ -347,8 +354,9 @@ class Agent(BaseAgent):
|
|
347
354
|
messages=message_sequence,
|
348
355
|
llm_config=self.agent_state.llm_config,
|
349
356
|
tools=allowed_functions,
|
350
|
-
stream=stream,
|
351
357
|
force_tool_call=force_tool_call,
|
358
|
+
telemetry_manager=self.telemetry_manager,
|
359
|
+
step_id=step_id,
|
352
360
|
)
|
353
361
|
else:
|
354
362
|
# Fallback to existing flow
|
@@ -365,6 +373,9 @@ class Agent(BaseAgent):
|
|
365
373
|
stream_interface=self.interface,
|
366
374
|
put_inner_thoughts_first=put_inner_thoughts_first,
|
367
375
|
name=self.agent_state.name,
|
376
|
+
telemetry_manager=self.telemetry_manager,
|
377
|
+
step_id=step_id,
|
378
|
+
actor=self.user,
|
368
379
|
)
|
369
380
|
log_telemetry(self.logger, "_get_ai_reply create finish")
|
370
381
|
|
@@ -840,6 +851,9 @@ class Agent(BaseAgent):
|
|
840
851
|
# Extract job_id from metadata if present
|
841
852
|
job_id = metadata.get("job_id") if metadata else None
|
842
853
|
|
854
|
+
# Declare step_id for the given step to be used as the step is processing.
|
855
|
+
step_id = generate_step_id()
|
856
|
+
|
843
857
|
# Step 0: update core memory
|
844
858
|
# only pulling latest block data if shared memory is being used
|
845
859
|
current_persisted_memory = Memory(
|
@@ -870,6 +884,7 @@ class Agent(BaseAgent):
|
|
870
884
|
step_count=step_count,
|
871
885
|
last_function_failed=last_function_failed,
|
872
886
|
put_inner_thoughts_first=put_inner_thoughts_first,
|
887
|
+
step_id=step_id,
|
873
888
|
)
|
874
889
|
if not response:
|
875
890
|
# EDGE CASE: Function call failed AND there's no tools left for agent to call -> return early
|
@@ -944,6 +959,7 @@ class Agent(BaseAgent):
|
|
944
959
|
actor=self.user,
|
945
960
|
agent_id=self.agent_state.id,
|
946
961
|
provider_name=self.agent_state.llm_config.model_endpoint_type,
|
962
|
+
provider_category=self.agent_state.llm_config.provider_category or "base",
|
947
963
|
model=self.agent_state.llm_config.model,
|
948
964
|
model_endpoint=self.agent_state.llm_config.model_endpoint,
|
949
965
|
context_window_limit=self.agent_state.llm_config.context_window,
|
@@ -953,6 +969,7 @@ class Agent(BaseAgent):
|
|
953
969
|
actor=self.user,
|
954
970
|
),
|
955
971
|
job_id=job_id,
|
972
|
+
step_id=step_id,
|
956
973
|
)
|
957
974
|
for message in all_new_messages:
|
958
975
|
message.step_id = step.id
|
@@ -1255,6 +1272,276 @@ class Agent(BaseAgent):
|
|
1255
1272
|
functions_definitions=available_functions_definitions,
|
1256
1273
|
)
|
1257
1274
|
|
1275
|
+
async def get_context_window_async(self) -> ContextWindowOverview:
|
1276
|
+
if os.getenv("LETTA_ENVIRONMENT") == "PRODUCTION":
|
1277
|
+
return await self.get_context_window_from_anthropic_async()
|
1278
|
+
return await self.get_context_window_from_tiktoken_async()
|
1279
|
+
|
1280
|
+
async def get_context_window_from_tiktoken_async(self) -> ContextWindowOverview:
|
1281
|
+
"""Get the context window of the agent"""
|
1282
|
+
# Grab the in-context messages
|
1283
|
+
# conversion of messages to OpenAI dict format, which is passed to the token counter
|
1284
|
+
(in_context_messages, passage_manager_size, message_manager_size) = await asyncio.gather(
|
1285
|
+
self.agent_manager.get_in_context_messages_async(agent_id=self.agent_state.id, actor=self.user),
|
1286
|
+
self.passage_manager.size_async(actor=self.user, agent_id=self.agent_state.id),
|
1287
|
+
self.message_manager.size_async(actor=self.user, agent_id=self.agent_state.id),
|
1288
|
+
)
|
1289
|
+
in_context_messages_openai = [m.to_openai_dict() for m in in_context_messages]
|
1290
|
+
|
1291
|
+
# Extract system, memory and external summary
|
1292
|
+
if (
|
1293
|
+
len(in_context_messages) > 0
|
1294
|
+
and in_context_messages[0].role == MessageRole.system
|
1295
|
+
and in_context_messages[0].content
|
1296
|
+
and len(in_context_messages[0].content) == 1
|
1297
|
+
and isinstance(in_context_messages[0].content[0], TextContent)
|
1298
|
+
):
|
1299
|
+
system_message = in_context_messages[0].content[0].text
|
1300
|
+
|
1301
|
+
external_memory_marker_pos = system_message.find("###")
|
1302
|
+
core_memory_marker_pos = system_message.find("<", external_memory_marker_pos)
|
1303
|
+
if external_memory_marker_pos != -1 and core_memory_marker_pos != -1:
|
1304
|
+
system_prompt = system_message[:external_memory_marker_pos].strip()
|
1305
|
+
external_memory_summary = system_message[external_memory_marker_pos:core_memory_marker_pos].strip()
|
1306
|
+
core_memory = system_message[core_memory_marker_pos:].strip()
|
1307
|
+
else:
|
1308
|
+
# if no markers found, put everything in system message
|
1309
|
+
system_prompt = system_message
|
1310
|
+
external_memory_summary = ""
|
1311
|
+
core_memory = ""
|
1312
|
+
else:
|
1313
|
+
# if no system message, fall back on agent's system prompt
|
1314
|
+
system_prompt = self.agent_state.system
|
1315
|
+
external_memory_summary = ""
|
1316
|
+
core_memory = ""
|
1317
|
+
|
1318
|
+
num_tokens_system = count_tokens(system_prompt)
|
1319
|
+
num_tokens_core_memory = count_tokens(core_memory)
|
1320
|
+
num_tokens_external_memory_summary = count_tokens(external_memory_summary)
|
1321
|
+
|
1322
|
+
# Check if there's a summary message in the message queue
|
1323
|
+
if (
|
1324
|
+
len(in_context_messages) > 1
|
1325
|
+
and in_context_messages[1].role == MessageRole.user
|
1326
|
+
and in_context_messages[1].content
|
1327
|
+
and len(in_context_messages[1].content) == 1
|
1328
|
+
and isinstance(in_context_messages[1].content[0], TextContent)
|
1329
|
+
# TODO remove hardcoding
|
1330
|
+
and "The following is a summary of the previous " in in_context_messages[1].content[0].text
|
1331
|
+
):
|
1332
|
+
# Summary message exists
|
1333
|
+
text_content = in_context_messages[1].content[0].text
|
1334
|
+
assert text_content is not None
|
1335
|
+
summary_memory = text_content
|
1336
|
+
num_tokens_summary_memory = count_tokens(text_content)
|
1337
|
+
# with a summary message, the real messages start at index 2
|
1338
|
+
num_tokens_messages = (
|
1339
|
+
num_tokens_from_messages(messages=in_context_messages_openai[2:], model=self.model)
|
1340
|
+
if len(in_context_messages_openai) > 2
|
1341
|
+
else 0
|
1342
|
+
)
|
1343
|
+
|
1344
|
+
else:
|
1345
|
+
summary_memory = None
|
1346
|
+
num_tokens_summary_memory = 0
|
1347
|
+
# with no summary message, the real messages start at index 1
|
1348
|
+
num_tokens_messages = (
|
1349
|
+
num_tokens_from_messages(messages=in_context_messages_openai[1:], model=self.model)
|
1350
|
+
if len(in_context_messages_openai) > 1
|
1351
|
+
else 0
|
1352
|
+
)
|
1353
|
+
|
1354
|
+
# tokens taken up by function definitions
|
1355
|
+
agent_state_tool_jsons = [t.json_schema for t in self.agent_state.tools]
|
1356
|
+
if agent_state_tool_jsons:
|
1357
|
+
available_functions_definitions = [OpenAITool(type="function", function=f) for f in agent_state_tool_jsons]
|
1358
|
+
num_tokens_available_functions_definitions = num_tokens_from_functions(functions=agent_state_tool_jsons, model=self.model)
|
1359
|
+
else:
|
1360
|
+
available_functions_definitions = []
|
1361
|
+
num_tokens_available_functions_definitions = 0
|
1362
|
+
|
1363
|
+
num_tokens_used_total = (
|
1364
|
+
num_tokens_system # system prompt
|
1365
|
+
+ num_tokens_available_functions_definitions # function definitions
|
1366
|
+
+ num_tokens_core_memory # core memory
|
1367
|
+
+ num_tokens_external_memory_summary # metadata (statistics) about recall/archival
|
1368
|
+
+ num_tokens_summary_memory # summary of ongoing conversation
|
1369
|
+
+ num_tokens_messages # tokens taken by messages
|
1370
|
+
)
|
1371
|
+
assert isinstance(num_tokens_used_total, int)
|
1372
|
+
|
1373
|
+
return ContextWindowOverview(
|
1374
|
+
# context window breakdown (in messages)
|
1375
|
+
num_messages=len(in_context_messages),
|
1376
|
+
num_archival_memory=passage_manager_size,
|
1377
|
+
num_recall_memory=message_manager_size,
|
1378
|
+
num_tokens_external_memory_summary=num_tokens_external_memory_summary,
|
1379
|
+
external_memory_summary=external_memory_summary,
|
1380
|
+
# top-level information
|
1381
|
+
context_window_size_max=self.agent_state.llm_config.context_window,
|
1382
|
+
context_window_size_current=num_tokens_used_total,
|
1383
|
+
# context window breakdown (in tokens)
|
1384
|
+
num_tokens_system=num_tokens_system,
|
1385
|
+
system_prompt=system_prompt,
|
1386
|
+
num_tokens_core_memory=num_tokens_core_memory,
|
1387
|
+
core_memory=core_memory,
|
1388
|
+
num_tokens_summary_memory=num_tokens_summary_memory,
|
1389
|
+
summary_memory=summary_memory,
|
1390
|
+
num_tokens_messages=num_tokens_messages,
|
1391
|
+
messages=in_context_messages,
|
1392
|
+
# related to functions
|
1393
|
+
num_tokens_functions_definitions=num_tokens_available_functions_definitions,
|
1394
|
+
functions_definitions=available_functions_definitions,
|
1395
|
+
)
|
1396
|
+
|
1397
|
+
async def get_context_window_from_anthropic_async(self) -> ContextWindowOverview:
|
1398
|
+
"""Get the context window of the agent"""
|
1399
|
+
anthropic_client = LLMClient.create(provider_type=ProviderType.anthropic, actor=self.user)
|
1400
|
+
model = self.agent_state.llm_config.model if self.agent_state.llm_config.model_endpoint_type == "anthropic" else None
|
1401
|
+
|
1402
|
+
# Grab the in-context messages
|
1403
|
+
# conversion of messages to anthropic dict format, which is passed to the token counter
|
1404
|
+
(in_context_messages, passage_manager_size, message_manager_size) = await asyncio.gather(
|
1405
|
+
self.agent_manager.get_in_context_messages_async(agent_id=self.agent_state.id, actor=self.user),
|
1406
|
+
self.passage_manager.size_async(actor=self.user, agent_id=self.agent_state.id),
|
1407
|
+
self.message_manager.size_async(actor=self.user, agent_id=self.agent_state.id),
|
1408
|
+
)
|
1409
|
+
in_context_messages_anthropic = [m.to_anthropic_dict() for m in in_context_messages]
|
1410
|
+
|
1411
|
+
# Extract system, memory and external summary
|
1412
|
+
if (
|
1413
|
+
len(in_context_messages) > 0
|
1414
|
+
and in_context_messages[0].role == MessageRole.system
|
1415
|
+
and in_context_messages[0].content
|
1416
|
+
and len(in_context_messages[0].content) == 1
|
1417
|
+
and isinstance(in_context_messages[0].content[0], TextContent)
|
1418
|
+
):
|
1419
|
+
system_message = in_context_messages[0].content[0].text
|
1420
|
+
|
1421
|
+
external_memory_marker_pos = system_message.find("###")
|
1422
|
+
core_memory_marker_pos = system_message.find("<", external_memory_marker_pos)
|
1423
|
+
if external_memory_marker_pos != -1 and core_memory_marker_pos != -1:
|
1424
|
+
system_prompt = system_message[:external_memory_marker_pos].strip()
|
1425
|
+
external_memory_summary = system_message[external_memory_marker_pos:core_memory_marker_pos].strip()
|
1426
|
+
core_memory = system_message[core_memory_marker_pos:].strip()
|
1427
|
+
else:
|
1428
|
+
# if no markers found, put everything in system message
|
1429
|
+
system_prompt = system_message
|
1430
|
+
external_memory_summary = None
|
1431
|
+
core_memory = None
|
1432
|
+
else:
|
1433
|
+
# if no system message, fall back on agent's system prompt
|
1434
|
+
system_prompt = self.agent_state.system
|
1435
|
+
external_memory_summary = None
|
1436
|
+
core_memory = None
|
1437
|
+
|
1438
|
+
num_tokens_system_coroutine = anthropic_client.count_tokens(model=model, messages=[{"role": "user", "content": system_prompt}])
|
1439
|
+
num_tokens_core_memory_coroutine = (
|
1440
|
+
anthropic_client.count_tokens(model=model, messages=[{"role": "user", "content": core_memory}])
|
1441
|
+
if core_memory
|
1442
|
+
else asyncio.sleep(0, result=0)
|
1443
|
+
)
|
1444
|
+
num_tokens_external_memory_summary_coroutine = (
|
1445
|
+
anthropic_client.count_tokens(model=model, messages=[{"role": "user", "content": external_memory_summary}])
|
1446
|
+
if external_memory_summary
|
1447
|
+
else asyncio.sleep(0, result=0)
|
1448
|
+
)
|
1449
|
+
|
1450
|
+
# Check if there's a summary message in the message queue
|
1451
|
+
if (
|
1452
|
+
len(in_context_messages) > 1
|
1453
|
+
and in_context_messages[1].role == MessageRole.user
|
1454
|
+
and in_context_messages[1].content
|
1455
|
+
and len(in_context_messages[1].content) == 1
|
1456
|
+
and isinstance(in_context_messages[1].content[0], TextContent)
|
1457
|
+
# TODO remove hardcoding
|
1458
|
+
and "The following is a summary of the previous " in in_context_messages[1].content[0].text
|
1459
|
+
):
|
1460
|
+
# Summary message exists
|
1461
|
+
text_content = in_context_messages[1].content[0].text
|
1462
|
+
assert text_content is not None
|
1463
|
+
summary_memory = text_content
|
1464
|
+
num_tokens_summary_memory_coroutine = anthropic_client.count_tokens(
|
1465
|
+
model=model, messages=[{"role": "user", "content": summary_memory}]
|
1466
|
+
)
|
1467
|
+
# with a summary message, the real messages start at index 2
|
1468
|
+
num_tokens_messages_coroutine = (
|
1469
|
+
anthropic_client.count_tokens(model=model, messages=in_context_messages_anthropic[2:])
|
1470
|
+
if len(in_context_messages_anthropic) > 2
|
1471
|
+
else asyncio.sleep(0, result=0)
|
1472
|
+
)
|
1473
|
+
|
1474
|
+
else:
|
1475
|
+
summary_memory = None
|
1476
|
+
num_tokens_summary_memory_coroutine = asyncio.sleep(0, result=0)
|
1477
|
+
# with no summary message, the real messages start at index 1
|
1478
|
+
num_tokens_messages_coroutine = (
|
1479
|
+
anthropic_client.count_tokens(model=model, messages=in_context_messages_anthropic[1:])
|
1480
|
+
if len(in_context_messages_anthropic) > 1
|
1481
|
+
else asyncio.sleep(0, result=0)
|
1482
|
+
)
|
1483
|
+
|
1484
|
+
# tokens taken up by function definitions
|
1485
|
+
if self.agent_state.tools and len(self.agent_state.tools) > 0:
|
1486
|
+
available_functions_definitions = [OpenAITool(type="function", function=f.json_schema) for f in self.agent_state.tools]
|
1487
|
+
num_tokens_available_functions_definitions_coroutine = anthropic_client.count_tokens(
|
1488
|
+
model=model,
|
1489
|
+
tools=available_functions_definitions,
|
1490
|
+
)
|
1491
|
+
else:
|
1492
|
+
available_functions_definitions = []
|
1493
|
+
num_tokens_available_functions_definitions_coroutine = asyncio.sleep(0, result=0)
|
1494
|
+
|
1495
|
+
(
|
1496
|
+
num_tokens_system,
|
1497
|
+
num_tokens_core_memory,
|
1498
|
+
num_tokens_external_memory_summary,
|
1499
|
+
num_tokens_summary_memory,
|
1500
|
+
num_tokens_messages,
|
1501
|
+
num_tokens_available_functions_definitions,
|
1502
|
+
) = await asyncio.gather(
|
1503
|
+
num_tokens_system_coroutine,
|
1504
|
+
num_tokens_core_memory_coroutine,
|
1505
|
+
num_tokens_external_memory_summary_coroutine,
|
1506
|
+
num_tokens_summary_memory_coroutine,
|
1507
|
+
num_tokens_messages_coroutine,
|
1508
|
+
num_tokens_available_functions_definitions_coroutine,
|
1509
|
+
)
|
1510
|
+
|
1511
|
+
num_tokens_used_total = (
|
1512
|
+
num_tokens_system # system prompt
|
1513
|
+
+ num_tokens_available_functions_definitions # function definitions
|
1514
|
+
+ num_tokens_core_memory # core memory
|
1515
|
+
+ num_tokens_external_memory_summary # metadata (statistics) about recall/archival
|
1516
|
+
+ num_tokens_summary_memory # summary of ongoing conversation
|
1517
|
+
+ num_tokens_messages # tokens taken by messages
|
1518
|
+
)
|
1519
|
+
assert isinstance(num_tokens_used_total, int)
|
1520
|
+
|
1521
|
+
return ContextWindowOverview(
|
1522
|
+
# context window breakdown (in messages)
|
1523
|
+
num_messages=len(in_context_messages),
|
1524
|
+
num_archival_memory=passage_manager_size,
|
1525
|
+
num_recall_memory=message_manager_size,
|
1526
|
+
num_tokens_external_memory_summary=num_tokens_external_memory_summary,
|
1527
|
+
external_memory_summary=external_memory_summary,
|
1528
|
+
# top-level information
|
1529
|
+
context_window_size_max=self.agent_state.llm_config.context_window,
|
1530
|
+
context_window_size_current=num_tokens_used_total,
|
1531
|
+
# context window breakdown (in tokens)
|
1532
|
+
num_tokens_system=num_tokens_system,
|
1533
|
+
system_prompt=system_prompt,
|
1534
|
+
num_tokens_core_memory=num_tokens_core_memory,
|
1535
|
+
core_memory=core_memory,
|
1536
|
+
num_tokens_summary_memory=num_tokens_summary_memory,
|
1537
|
+
summary_memory=summary_memory,
|
1538
|
+
num_tokens_messages=num_tokens_messages,
|
1539
|
+
messages=in_context_messages,
|
1540
|
+
# related to functions
|
1541
|
+
num_tokens_functions_definitions=num_tokens_available_functions_definitions,
|
1542
|
+
functions_definitions=available_functions_definitions,
|
1543
|
+
)
|
1544
|
+
|
1258
1545
|
def count_tokens(self) -> int:
|
1259
1546
|
"""Count the tokens in the current context window"""
|
1260
1547
|
context_window_breakdown = self.get_context_window()
|
letta/agents/base_agent.py
CHANGED
@@ -72,61 +72,6 @@ class BaseAgent(ABC):
|
|
72
72
|
|
73
73
|
return [{"role": input_message.role.value, "content": get_content(input_message)} for input_message in input_messages]
|
74
74
|
|
75
|
-
def _rebuild_memory(
|
76
|
-
self,
|
77
|
-
in_context_messages: List[Message],
|
78
|
-
agent_state: AgentState,
|
79
|
-
num_messages: int | None = None, # storing these calculations is specific to the voice agent
|
80
|
-
num_archival_memories: int | None = None,
|
81
|
-
) -> List[Message]:
|
82
|
-
try:
|
83
|
-
# Refresh Memory
|
84
|
-
# TODO: This only happens for the summary block (voice?)
|
85
|
-
# [DB Call] loading blocks (modifies: agent_state.memory.blocks)
|
86
|
-
self.agent_manager.refresh_memory(agent_state=agent_state, actor=self.actor)
|
87
|
-
|
88
|
-
# TODO: This is a pretty brittle pattern established all over our code, need to get rid of this
|
89
|
-
curr_system_message = in_context_messages[0]
|
90
|
-
curr_memory_str = agent_state.memory.compile()
|
91
|
-
curr_system_message_text = curr_system_message.content[0].text
|
92
|
-
if curr_memory_str in curr_system_message_text:
|
93
|
-
# NOTE: could this cause issues if a block is removed? (substring match would still work)
|
94
|
-
logger.debug(
|
95
|
-
f"Memory hasn't changed for agent id={agent_state.id} and actor=({self.actor.id}, {self.actor.name}), skipping system prompt rebuild"
|
96
|
-
)
|
97
|
-
return in_context_messages
|
98
|
-
|
99
|
-
memory_edit_timestamp = get_utc_time()
|
100
|
-
|
101
|
-
# [DB Call] size of messages and archival memories
|
102
|
-
num_messages = num_messages or self.message_manager.size(actor=self.actor, agent_id=agent_state.id)
|
103
|
-
num_archival_memories = num_archival_memories or self.passage_manager.size(actor=self.actor, agent_id=agent_state.id)
|
104
|
-
|
105
|
-
new_system_message_str = compile_system_message(
|
106
|
-
system_prompt=agent_state.system,
|
107
|
-
in_context_memory=agent_state.memory,
|
108
|
-
in_context_memory_last_edit=memory_edit_timestamp,
|
109
|
-
previous_message_count=num_messages,
|
110
|
-
archival_memory_size=num_archival_memories,
|
111
|
-
)
|
112
|
-
|
113
|
-
diff = united_diff(curr_system_message_text, new_system_message_str)
|
114
|
-
if len(diff) > 0:
|
115
|
-
logger.debug(f"Rebuilding system with new memory...\nDiff:\n{diff}")
|
116
|
-
|
117
|
-
# [DB Call] Update Messages
|
118
|
-
new_system_message = self.message_manager.update_message_by_id(
|
119
|
-
curr_system_message.id, message_update=MessageUpdate(content=new_system_message_str), actor=self.actor
|
120
|
-
)
|
121
|
-
# Skip pulling down the agent's memory again to save on a db call
|
122
|
-
return [new_system_message] + in_context_messages[1:]
|
123
|
-
|
124
|
-
else:
|
125
|
-
return in_context_messages
|
126
|
-
except:
|
127
|
-
logger.exception(f"Failed to rebuild memory for agent id={agent_state.id} and actor=({self.actor.id}, {self.actor.name})")
|
128
|
-
raise
|
129
|
-
|
130
75
|
async def _rebuild_memory_async(
|
131
76
|
self,
|
132
77
|
in_context_messages: List[Message],
|
letta/agents/helpers.py
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
import uuid
|
1
2
|
import xml.etree.ElementTree as ET
|
2
3
|
from typing import List, Tuple
|
3
4
|
|
@@ -150,3 +151,7 @@ def deserialize_message_history(xml_str: str) -> Tuple[List[str], str]:
|
|
150
151
|
context = sum_el.text or ""
|
151
152
|
|
152
153
|
return messages, context
|
154
|
+
|
155
|
+
|
156
|
+
def generate_step_id():
|
157
|
+
return f"step-{uuid.uuid4()}"
|