letta-nightly 0.8.13.dev20250714104447__py3-none-any.whl → 0.8.15.dev20250715080149__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/__init__.py +1 -1
- letta/constants.py +6 -0
- letta/functions/function_sets/base.py +2 -2
- letta/functions/function_sets/files.py +11 -11
- letta/helpers/decorators.py +1 -1
- letta/helpers/pinecone_utils.py +164 -11
- letta/orm/agent.py +1 -1
- letta/orm/file.py +2 -17
- letta/orm/files_agents.py +9 -10
- letta/orm/organization.py +0 -4
- letta/orm/passage.py +0 -10
- letta/orm/source.py +3 -20
- letta/prompts/system/memgpt_v2_chat.txt +28 -10
- letta/schemas/file.py +1 -0
- letta/schemas/memory.py +2 -2
- letta/server/rest_api/routers/v1/agents.py +4 -4
- letta/server/rest_api/routers/v1/messages.py +2 -6
- letta/server/rest_api/routers/v1/sources.py +3 -3
- letta/server/server.py +0 -3
- letta/services/agent_manager.py +194 -147
- letta/services/block_manager.py +18 -18
- letta/services/context_window_calculator/context_window_calculator.py +15 -10
- letta/services/context_window_calculator/token_counter.py +40 -0
- letta/services/file_manager.py +37 -0
- letta/services/file_processor/chunker/line_chunker.py +17 -0
- letta/services/file_processor/embedder/openai_embedder.py +50 -5
- letta/services/files_agents_manager.py +12 -2
- letta/services/group_manager.py +11 -11
- letta/services/source_manager.py +19 -3
- letta/services/tool_executor/core_tool_executor.py +2 -2
- letta/services/tool_executor/files_tool_executor.py +6 -1
- {letta_nightly-0.8.13.dev20250714104447.dist-info → letta_nightly-0.8.15.dev20250715080149.dist-info}/METADATA +1 -1
- {letta_nightly-0.8.13.dev20250714104447.dist-info → letta_nightly-0.8.15.dev20250715080149.dist-info}/RECORD +36 -36
- {letta_nightly-0.8.13.dev20250714104447.dist-info → letta_nightly-0.8.15.dev20250715080149.dist-info}/LICENSE +0 -0
- {letta_nightly-0.8.13.dev20250714104447.dist-info → letta_nightly-0.8.15.dev20250715080149.dist-info}/WHEEL +0 -0
- {letta_nightly-0.8.13.dev20250714104447.dist-info → letta_nightly-0.8.15.dev20250715080149.dist-info}/entry_points.txt +0 -0
letta/services/block_manager.py
CHANGED
|
@@ -23,8 +23,8 @@ logger = get_logger(__name__)
|
|
|
23
23
|
class BlockManager:
|
|
24
24
|
"""Manager class to handle business logic related to Blocks."""
|
|
25
25
|
|
|
26
|
-
@trace_method
|
|
27
26
|
@enforce_types
|
|
27
|
+
@trace_method
|
|
28
28
|
def create_or_update_block(self, block: PydanticBlock, actor: PydanticUser) -> PydanticBlock:
|
|
29
29
|
"""Create a new block based on the Block schema."""
|
|
30
30
|
db_block = self.get_block_by_id(block.id, actor)
|
|
@@ -38,8 +38,8 @@ class BlockManager:
|
|
|
38
38
|
block.create(session, actor=actor)
|
|
39
39
|
return block.to_pydantic()
|
|
40
40
|
|
|
41
|
-
@trace_method
|
|
42
41
|
@enforce_types
|
|
42
|
+
@trace_method
|
|
43
43
|
async def create_or_update_block_async(self, block: PydanticBlock, actor: PydanticUser) -> PydanticBlock:
|
|
44
44
|
"""Create a new block based on the Block schema."""
|
|
45
45
|
db_block = await self.get_block_by_id_async(block.id, actor)
|
|
@@ -53,8 +53,8 @@ class BlockManager:
|
|
|
53
53
|
await block.create_async(session, actor=actor)
|
|
54
54
|
return block.to_pydantic()
|
|
55
55
|
|
|
56
|
-
@trace_method
|
|
57
56
|
@enforce_types
|
|
57
|
+
@trace_method
|
|
58
58
|
def batch_create_blocks(self, blocks: List[PydanticBlock], actor: PydanticUser) -> List[PydanticBlock]:
|
|
59
59
|
"""
|
|
60
60
|
Batch-create multiple Blocks in one transaction for better performance.
|
|
@@ -77,8 +77,8 @@ class BlockManager:
|
|
|
77
77
|
# Convert back to Pydantic
|
|
78
78
|
return [m.to_pydantic() for m in created_models]
|
|
79
79
|
|
|
80
|
-
@trace_method
|
|
81
80
|
@enforce_types
|
|
81
|
+
@trace_method
|
|
82
82
|
async def batch_create_blocks_async(self, blocks: List[PydanticBlock], actor: PydanticUser) -> List[PydanticBlock]:
|
|
83
83
|
"""
|
|
84
84
|
Batch-create multiple Blocks in one transaction for better performance.
|
|
@@ -101,8 +101,8 @@ class BlockManager:
|
|
|
101
101
|
# Convert back to Pydantic
|
|
102
102
|
return [m.to_pydantic() for m in created_models]
|
|
103
103
|
|
|
104
|
-
@trace_method
|
|
105
104
|
@enforce_types
|
|
105
|
+
@trace_method
|
|
106
106
|
def update_block(self, block_id: str, block_update: BlockUpdate, actor: PydanticUser) -> PydanticBlock:
|
|
107
107
|
"""Update a block by its ID with the given BlockUpdate object."""
|
|
108
108
|
# Safety check for block
|
|
@@ -117,8 +117,8 @@ class BlockManager:
|
|
|
117
117
|
block.update(db_session=session, actor=actor)
|
|
118
118
|
return block.to_pydantic()
|
|
119
119
|
|
|
120
|
-
@trace_method
|
|
121
120
|
@enforce_types
|
|
121
|
+
@trace_method
|
|
122
122
|
async def update_block_async(self, block_id: str, block_update: BlockUpdate, actor: PydanticUser) -> PydanticBlock:
|
|
123
123
|
"""Update a block by its ID with the given BlockUpdate object."""
|
|
124
124
|
# Safety check for block
|
|
@@ -133,8 +133,8 @@ class BlockManager:
|
|
|
133
133
|
await block.update_async(db_session=session, actor=actor)
|
|
134
134
|
return block.to_pydantic()
|
|
135
135
|
|
|
136
|
-
@trace_method
|
|
137
136
|
@enforce_types
|
|
137
|
+
@trace_method
|
|
138
138
|
def delete_block(self, block_id: str, actor: PydanticUser) -> PydanticBlock:
|
|
139
139
|
"""Delete a block by its ID."""
|
|
140
140
|
with db_registry.session() as session:
|
|
@@ -142,8 +142,8 @@ class BlockManager:
|
|
|
142
142
|
block.hard_delete(db_session=session, actor=actor)
|
|
143
143
|
return block.to_pydantic()
|
|
144
144
|
|
|
145
|
-
@trace_method
|
|
146
145
|
@enforce_types
|
|
146
|
+
@trace_method
|
|
147
147
|
async def delete_block_async(self, block_id: str, actor: PydanticUser) -> PydanticBlock:
|
|
148
148
|
"""Delete a block by its ID."""
|
|
149
149
|
async with db_registry.async_session() as session:
|
|
@@ -151,8 +151,8 @@ class BlockManager:
|
|
|
151
151
|
await block.hard_delete_async(db_session=session, actor=actor)
|
|
152
152
|
return block.to_pydantic()
|
|
153
153
|
|
|
154
|
-
@trace_method
|
|
155
154
|
@enforce_types
|
|
155
|
+
@trace_method
|
|
156
156
|
async def get_blocks_async(
|
|
157
157
|
self,
|
|
158
158
|
actor: PydanticUser,
|
|
@@ -214,8 +214,8 @@ class BlockManager:
|
|
|
214
214
|
|
|
215
215
|
return [block.to_pydantic() for block in blocks]
|
|
216
216
|
|
|
217
|
-
@trace_method
|
|
218
217
|
@enforce_types
|
|
218
|
+
@trace_method
|
|
219
219
|
def get_block_by_id(self, block_id: str, actor: Optional[PydanticUser] = None) -> Optional[PydanticBlock]:
|
|
220
220
|
"""Retrieve a block by its name."""
|
|
221
221
|
with db_registry.session() as session:
|
|
@@ -225,8 +225,8 @@ class BlockManager:
|
|
|
225
225
|
except NoResultFound:
|
|
226
226
|
return None
|
|
227
227
|
|
|
228
|
-
@trace_method
|
|
229
228
|
@enforce_types
|
|
229
|
+
@trace_method
|
|
230
230
|
async def get_block_by_id_async(self, block_id: str, actor: Optional[PydanticUser] = None) -> Optional[PydanticBlock]:
|
|
231
231
|
"""Retrieve a block by its name."""
|
|
232
232
|
async with db_registry.async_session() as session:
|
|
@@ -236,8 +236,8 @@ class BlockManager:
|
|
|
236
236
|
except NoResultFound:
|
|
237
237
|
return None
|
|
238
238
|
|
|
239
|
-
@trace_method
|
|
240
239
|
@enforce_types
|
|
240
|
+
@trace_method
|
|
241
241
|
async def get_all_blocks_by_ids_async(self, block_ids: List[str], actor: Optional[PydanticUser] = None) -> List[PydanticBlock]:
|
|
242
242
|
"""Retrieve blocks by their ids without loading unnecessary relationships. Async implementation."""
|
|
243
243
|
from sqlalchemy import select
|
|
@@ -284,8 +284,8 @@ class BlockManager:
|
|
|
284
284
|
|
|
285
285
|
return pydantic_blocks
|
|
286
286
|
|
|
287
|
-
@trace_method
|
|
288
287
|
@enforce_types
|
|
288
|
+
@trace_method
|
|
289
289
|
async def get_agents_for_block_async(
|
|
290
290
|
self,
|
|
291
291
|
block_id: str,
|
|
@@ -301,8 +301,8 @@ class BlockManager:
|
|
|
301
301
|
agents = await asyncio.gather(*[agent.to_pydantic_async(include_relationships=include_relationships) for agent in agents_orm])
|
|
302
302
|
return agents
|
|
303
303
|
|
|
304
|
-
@trace_method
|
|
305
304
|
@enforce_types
|
|
305
|
+
@trace_method
|
|
306
306
|
async def size_async(self, actor: PydanticUser) -> int:
|
|
307
307
|
"""
|
|
308
308
|
Get the total count of blocks for the given user.
|
|
@@ -312,8 +312,8 @@ class BlockManager:
|
|
|
312
312
|
|
|
313
313
|
# Block History Functions
|
|
314
314
|
|
|
315
|
-
@trace_method
|
|
316
315
|
@enforce_types
|
|
316
|
+
@trace_method
|
|
317
317
|
def checkpoint_block(
|
|
318
318
|
self,
|
|
319
319
|
block_id: str,
|
|
@@ -416,8 +416,8 @@ class BlockManager:
|
|
|
416
416
|
updated_block = block.update(db_session=session, actor=actor, no_commit=True)
|
|
417
417
|
return updated_block
|
|
418
418
|
|
|
419
|
-
@trace_method
|
|
420
419
|
@enforce_types
|
|
420
|
+
@trace_method
|
|
421
421
|
def undo_checkpoint_block(self, block_id: str, actor: PydanticUser, use_preloaded_block: Optional[BlockModel] = None) -> PydanticBlock:
|
|
422
422
|
"""
|
|
423
423
|
Move the block to the immediately previous checkpoint in BlockHistory.
|
|
@@ -459,8 +459,8 @@ class BlockManager:
|
|
|
459
459
|
session.commit()
|
|
460
460
|
return block.to_pydantic()
|
|
461
461
|
|
|
462
|
-
@trace_method
|
|
463
462
|
@enforce_types
|
|
463
|
+
@trace_method
|
|
464
464
|
def redo_checkpoint_block(self, block_id: str, actor: PydanticUser, use_preloaded_block: Optional[BlockModel] = None) -> PydanticBlock:
|
|
465
465
|
"""
|
|
466
466
|
Move the block to the next checkpoint if it exists.
|
|
@@ -498,8 +498,8 @@ class BlockManager:
|
|
|
498
498
|
session.commit()
|
|
499
499
|
return block.to_pydantic()
|
|
500
500
|
|
|
501
|
-
@trace_method
|
|
502
501
|
@enforce_types
|
|
502
|
+
@trace_method
|
|
503
503
|
async def bulk_update_block_values_async(
|
|
504
504
|
self, updates: Dict[str, str], actor: PydanticUser, return_hydrated: bool = False
|
|
505
505
|
) -> Optional[List[PydanticBlock]]:
|
|
@@ -4,11 +4,14 @@ from typing import Any, List, Optional, Tuple
|
|
|
4
4
|
from openai.types.beta.function_tool import FunctionTool as OpenAITool
|
|
5
5
|
|
|
6
6
|
from letta.log import get_logger
|
|
7
|
+
from letta.schemas.agent import AgentState
|
|
7
8
|
from letta.schemas.enums import MessageRole
|
|
8
9
|
from letta.schemas.letta_message_content import TextContent
|
|
9
10
|
from letta.schemas.memory import ContextWindowOverview
|
|
11
|
+
from letta.schemas.message import Message
|
|
10
12
|
from letta.schemas.user import User as PydanticUser
|
|
11
13
|
from letta.services.context_window_calculator.token_counter import TokenCounter
|
|
14
|
+
from letta.services.message_manager import MessageManager
|
|
12
15
|
|
|
13
16
|
logger = get_logger(__name__)
|
|
14
17
|
|
|
@@ -56,16 +59,18 @@ class ContextWindowCalculator:
|
|
|
56
59
|
return None, 1
|
|
57
60
|
|
|
58
61
|
async def calculate_context_window(
|
|
59
|
-
self,
|
|
62
|
+
self,
|
|
63
|
+
agent_state: AgentState,
|
|
64
|
+
actor: PydanticUser,
|
|
65
|
+
token_counter: TokenCounter,
|
|
66
|
+
message_manager: MessageManager,
|
|
67
|
+
system_message_compiled: Message,
|
|
68
|
+
num_archival_memories: int,
|
|
69
|
+
num_messages: int,
|
|
60
70
|
) -> ContextWindowOverview:
|
|
61
71
|
"""Calculate context window information using the provided token counter"""
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
(in_context_messages, passage_manager_size, message_manager_size) = await asyncio.gather(
|
|
65
|
-
message_manager.get_messages_by_ids_async(message_ids=agent_state.message_ids, actor=actor),
|
|
66
|
-
passage_manager.agent_passage_size_async(actor=actor, agent_id=agent_state.id),
|
|
67
|
-
message_manager.size_async(actor=actor, agent_id=agent_state.id),
|
|
68
|
-
)
|
|
72
|
+
messages = await message_manager.get_messages_by_ids_async(message_ids=agent_state.message_ids[1:], actor=actor)
|
|
73
|
+
in_context_messages = [system_message_compiled] + messages
|
|
69
74
|
|
|
70
75
|
# Convert messages to appropriate format
|
|
71
76
|
converted_messages = token_counter.convert_messages(in_context_messages)
|
|
@@ -128,8 +133,8 @@ class ContextWindowCalculator:
|
|
|
128
133
|
return ContextWindowOverview(
|
|
129
134
|
# context window breakdown (in messages)
|
|
130
135
|
num_messages=len(in_context_messages),
|
|
131
|
-
num_archival_memory=
|
|
132
|
-
num_recall_memory=
|
|
136
|
+
num_archival_memory=num_archival_memories,
|
|
137
|
+
num_recall_memory=num_messages,
|
|
133
138
|
num_tokens_external_memory_summary=num_tokens_external_memory_summary,
|
|
134
139
|
external_memory_summary=external_memory_summary,
|
|
135
140
|
# top-level information
|
|
@@ -1,7 +1,11 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import json
|
|
1
3
|
from abc import ABC, abstractmethod
|
|
2
4
|
from typing import Any, Dict, List
|
|
3
5
|
|
|
6
|
+
from letta.helpers.decorators import async_redis_cache
|
|
4
7
|
from letta.llm_api.anthropic_client import AnthropicClient
|
|
8
|
+
from letta.otel.tracing import trace_method
|
|
5
9
|
from letta.schemas.openai.chat_completion_request import Tool as OpenAITool
|
|
6
10
|
from letta.utils import count_tokens
|
|
7
11
|
|
|
@@ -33,16 +37,34 @@ class AnthropicTokenCounter(TokenCounter):
|
|
|
33
37
|
self.client = anthropic_client
|
|
34
38
|
self.model = model
|
|
35
39
|
|
|
40
|
+
@trace_method
|
|
41
|
+
@async_redis_cache(
|
|
42
|
+
key_func=lambda self, text: f"anthropic_text_tokens:{self.model}:{hashlib.sha256(text.encode()).hexdigest()[:16]}",
|
|
43
|
+
prefix="token_counter",
|
|
44
|
+
ttl_s=3600, # cache for 1 hour
|
|
45
|
+
)
|
|
36
46
|
async def count_text_tokens(self, text: str) -> int:
|
|
37
47
|
if not text:
|
|
38
48
|
return 0
|
|
39
49
|
return await self.client.count_tokens(model=self.model, messages=[{"role": "user", "content": text}])
|
|
40
50
|
|
|
51
|
+
@trace_method
|
|
52
|
+
@async_redis_cache(
|
|
53
|
+
key_func=lambda self, messages: f"anthropic_message_tokens:{self.model}:{hashlib.sha256(json.dumps(messages, sort_keys=True).encode()).hexdigest()[:16]}",
|
|
54
|
+
prefix="token_counter",
|
|
55
|
+
ttl_s=3600, # cache for 1 hour
|
|
56
|
+
)
|
|
41
57
|
async def count_message_tokens(self, messages: List[Dict[str, Any]]) -> int:
|
|
42
58
|
if not messages:
|
|
43
59
|
return 0
|
|
44
60
|
return await self.client.count_tokens(model=self.model, messages=messages)
|
|
45
61
|
|
|
62
|
+
@trace_method
|
|
63
|
+
@async_redis_cache(
|
|
64
|
+
key_func=lambda self, tools: f"anthropic_tool_tokens:{self.model}:{hashlib.sha256(json.dumps([t.model_dump() for t in tools], sort_keys=True).encode()).hexdigest()[:16]}",
|
|
65
|
+
prefix="token_counter",
|
|
66
|
+
ttl_s=3600, # cache for 1 hour
|
|
67
|
+
)
|
|
46
68
|
async def count_tool_tokens(self, tools: List[OpenAITool]) -> int:
|
|
47
69
|
if not tools:
|
|
48
70
|
return 0
|
|
@@ -58,11 +80,23 @@ class TiktokenCounter(TokenCounter):
|
|
|
58
80
|
def __init__(self, model: str):
|
|
59
81
|
self.model = model
|
|
60
82
|
|
|
83
|
+
@trace_method
|
|
84
|
+
@async_redis_cache(
|
|
85
|
+
key_func=lambda self, text: f"tiktoken_text_tokens:{self.model}:{hashlib.sha256(text.encode()).hexdigest()[:16]}",
|
|
86
|
+
prefix="token_counter",
|
|
87
|
+
ttl_s=3600, # cache for 1 hour
|
|
88
|
+
)
|
|
61
89
|
async def count_text_tokens(self, text: str) -> int:
|
|
62
90
|
if not text:
|
|
63
91
|
return 0
|
|
64
92
|
return count_tokens(text)
|
|
65
93
|
|
|
94
|
+
@trace_method
|
|
95
|
+
@async_redis_cache(
|
|
96
|
+
key_func=lambda self, messages: f"tiktoken_message_tokens:{self.model}:{hashlib.sha256(json.dumps(messages, sort_keys=True).encode()).hexdigest()[:16]}",
|
|
97
|
+
prefix="token_counter",
|
|
98
|
+
ttl_s=3600, # cache for 1 hour
|
|
99
|
+
)
|
|
66
100
|
async def count_message_tokens(self, messages: List[Dict[str, Any]]) -> int:
|
|
67
101
|
if not messages:
|
|
68
102
|
return 0
|
|
@@ -70,6 +104,12 @@ class TiktokenCounter(TokenCounter):
|
|
|
70
104
|
|
|
71
105
|
return num_tokens_from_messages(messages=messages, model=self.model)
|
|
72
106
|
|
|
107
|
+
@trace_method
|
|
108
|
+
@async_redis_cache(
|
|
109
|
+
key_func=lambda self, tools: f"tiktoken_tool_tokens:{self.model}:{hashlib.sha256(json.dumps([t.model_dump() for t in tools], sort_keys=True).encode()).hexdigest()[:16]}",
|
|
110
|
+
prefix="token_counter",
|
|
111
|
+
ttl_s=3600, # cache for 1 hour
|
|
112
|
+
)
|
|
73
113
|
async def count_tool_tokens(self, tools: List[OpenAITool]) -> int:
|
|
74
114
|
if not tools:
|
|
75
115
|
return 0
|
letta/services/file_manager.py
CHANGED
|
@@ -8,6 +8,7 @@ from sqlalchemy.exc import IntegrityError
|
|
|
8
8
|
from sqlalchemy.orm import selectinload
|
|
9
9
|
|
|
10
10
|
from letta.constants import MAX_FILENAME_LENGTH
|
|
11
|
+
from letta.helpers.decorators import async_redis_cache
|
|
11
12
|
from letta.orm.errors import NoResultFound
|
|
12
13
|
from letta.orm.file import FileContent as FileContentModel
|
|
13
14
|
from letta.orm.file import FileMetadata as FileMetadataModel
|
|
@@ -34,6 +35,16 @@ class DuplicateFileError(Exception):
|
|
|
34
35
|
class FileManager:
|
|
35
36
|
"""Manager class to handle business logic related to files."""
|
|
36
37
|
|
|
38
|
+
async def _invalidate_file_caches(self, file_id: str, actor: PydanticUser, original_filename: str = None, source_id: str = None):
|
|
39
|
+
"""Invalidate all caches related to a file."""
|
|
40
|
+
# invalidate file content cache (all variants)
|
|
41
|
+
await self.get_file_by_id.cache_invalidate(self, file_id, actor, include_content=True)
|
|
42
|
+
await self.get_file_by_id.cache_invalidate(self, file_id, actor, include_content=False)
|
|
43
|
+
|
|
44
|
+
# invalidate filename-based cache if we have the info
|
|
45
|
+
if original_filename and source_id:
|
|
46
|
+
await self.get_file_by_original_name_and_source.cache_invalidate(self, original_filename, source_id, actor)
|
|
47
|
+
|
|
37
48
|
@enforce_types
|
|
38
49
|
@trace_method
|
|
39
50
|
async def create_file(
|
|
@@ -61,6 +72,10 @@ class FileManager:
|
|
|
61
72
|
|
|
62
73
|
await session.commit()
|
|
63
74
|
await session.refresh(file_orm)
|
|
75
|
+
|
|
76
|
+
# invalidate cache for this new file
|
|
77
|
+
await self._invalidate_file_caches(file_orm.id, actor, file_orm.original_file_name, file_orm.source_id)
|
|
78
|
+
|
|
64
79
|
return await file_orm.to_pydantic_async()
|
|
65
80
|
|
|
66
81
|
except IntegrityError:
|
|
@@ -70,6 +85,12 @@ class FileManager:
|
|
|
70
85
|
# TODO: We make actor optional for now, but should most likely be enforced due to security reasons
|
|
71
86
|
@enforce_types
|
|
72
87
|
@trace_method
|
|
88
|
+
@async_redis_cache(
|
|
89
|
+
key_func=lambda self, file_id, actor=None, include_content=False, strip_directory_prefix=False: f"{file_id}:{actor.organization_id if actor else 'none'}:{include_content}:{strip_directory_prefix}",
|
|
90
|
+
prefix="file_content",
|
|
91
|
+
ttl_s=3600,
|
|
92
|
+
model_class=PydanticFileMetadata,
|
|
93
|
+
)
|
|
73
94
|
async def get_file_by_id(
|
|
74
95
|
self, file_id: str, actor: Optional[PydanticUser] = None, *, include_content: bool = False, strip_directory_prefix: bool = False
|
|
75
96
|
) -> Optional[PydanticFileMetadata]:
|
|
@@ -155,6 +176,9 @@ class FileManager:
|
|
|
155
176
|
await session.execute(stmt)
|
|
156
177
|
await session.commit()
|
|
157
178
|
|
|
179
|
+
# invalidate cache for this file
|
|
180
|
+
await self._invalidate_file_caches(file_id, actor)
|
|
181
|
+
|
|
158
182
|
# Reload via normal accessor so we return a fully-attached object
|
|
159
183
|
file_orm = await FileMetadataModel.read_async(
|
|
160
184
|
db_session=session,
|
|
@@ -200,6 +224,9 @@ class FileManager:
|
|
|
200
224
|
|
|
201
225
|
await session.commit()
|
|
202
226
|
|
|
227
|
+
# invalidate cache for this file since content changed
|
|
228
|
+
await self._invalidate_file_caches(file_id, actor)
|
|
229
|
+
|
|
203
230
|
# Reload with content
|
|
204
231
|
query = select(FileMetadataModel).options(selectinload(FileMetadataModel.content)).where(FileMetadataModel.id == file_id)
|
|
205
232
|
result = await session.execute(query)
|
|
@@ -239,6 +266,10 @@ class FileManager:
|
|
|
239
266
|
"""Delete a file by its ID."""
|
|
240
267
|
async with db_registry.async_session() as session:
|
|
241
268
|
file = await FileMetadataModel.read_async(db_session=session, identifier=file_id)
|
|
269
|
+
|
|
270
|
+
# invalidate cache for this file before deletion
|
|
271
|
+
await self._invalidate_file_caches(file_id, actor, file.original_file_name, file.source_id)
|
|
272
|
+
|
|
242
273
|
await file.hard_delete_async(db_session=session, actor=actor)
|
|
243
274
|
return await file.to_pydantic_async()
|
|
244
275
|
|
|
@@ -285,6 +316,12 @@ class FileManager:
|
|
|
285
316
|
|
|
286
317
|
@enforce_types
|
|
287
318
|
@trace_method
|
|
319
|
+
@async_redis_cache(
|
|
320
|
+
key_func=lambda self, original_filename, source_id, actor: f"{original_filename}:{source_id}:{actor.organization_id}",
|
|
321
|
+
prefix="file_by_name",
|
|
322
|
+
ttl_s=3600,
|
|
323
|
+
model_class=PydanticFileMetadata,
|
|
324
|
+
)
|
|
288
325
|
async def get_file_by_original_name_and_source(
|
|
289
326
|
self, original_filename: str, source_id: str, actor: PydanticUser
|
|
290
327
|
) -> Optional[PydanticFileMetadata]:
|
|
@@ -40,6 +40,10 @@ class LineChunker:
|
|
|
40
40
|
|
|
41
41
|
def _chunk_by_lines(self, text: str, preserve_indentation: bool = False) -> List[str]:
|
|
42
42
|
"""Traditional line-based chunking for code and structured data"""
|
|
43
|
+
# early stop, can happen if the there's nothing on a specific file
|
|
44
|
+
if not text:
|
|
45
|
+
return []
|
|
46
|
+
|
|
43
47
|
lines = []
|
|
44
48
|
for line in text.splitlines():
|
|
45
49
|
if preserve_indentation:
|
|
@@ -57,6 +61,10 @@ class LineChunker:
|
|
|
57
61
|
|
|
58
62
|
def _chunk_by_sentences(self, text: str) -> List[str]:
|
|
59
63
|
"""Sentence-based chunking for documentation and markup"""
|
|
64
|
+
# early stop, can happen if the there's nothing on a specific file
|
|
65
|
+
if not text:
|
|
66
|
+
return []
|
|
67
|
+
|
|
60
68
|
# Simple sentence splitting on periods, exclamation marks, and question marks
|
|
61
69
|
# followed by whitespace or end of string
|
|
62
70
|
sentence_pattern = r"(?<=[.!?])\s+(?=[A-Z])"
|
|
@@ -75,6 +83,10 @@ class LineChunker:
|
|
|
75
83
|
|
|
76
84
|
def _chunk_by_characters(self, text: str, target_line_length: int = 100) -> List[str]:
|
|
77
85
|
"""Character-based wrapping for prose text"""
|
|
86
|
+
# early stop, can happen if the there's nothing on a specific file
|
|
87
|
+
if not text:
|
|
88
|
+
return []
|
|
89
|
+
|
|
78
90
|
words = text.split()
|
|
79
91
|
lines = []
|
|
80
92
|
current_line = []
|
|
@@ -110,6 +122,11 @@ class LineChunker:
|
|
|
110
122
|
strategy = self._determine_chunking_strategy(file_metadata)
|
|
111
123
|
text = file_metadata.content
|
|
112
124
|
|
|
125
|
+
# early stop, can happen if the there's nothing on a specific file
|
|
126
|
+
if not text:
|
|
127
|
+
logger.warning(f"File ({file_metadata}) has no content")
|
|
128
|
+
return []
|
|
129
|
+
|
|
113
130
|
# Apply the appropriate chunking strategy
|
|
114
131
|
if strategy == ChunkingStrategy.DOCUMENTATION:
|
|
115
132
|
content_lines = self._chunk_by_sentences(text)
|
|
@@ -25,7 +25,6 @@ class OpenAIEmbedder(BaseEmbedder):
|
|
|
25
25
|
else EmbeddingConfig.default_config(model_name="letta")
|
|
26
26
|
)
|
|
27
27
|
self.embedding_config = embedding_config or self.default_embedding_config
|
|
28
|
-
self.max_concurrent_requests = 20
|
|
29
28
|
|
|
30
29
|
# TODO: Unify to global OpenAI client
|
|
31
30
|
self.client: OpenAIClient = cast(
|
|
@@ -48,9 +47,55 @@ class OpenAIEmbedder(BaseEmbedder):
|
|
|
48
47
|
"embedding_endpoint_type": self.embedding_config.embedding_endpoint_type,
|
|
49
48
|
},
|
|
50
49
|
)
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
embeddings = await self.client.request_embeddings(inputs=batch, embedding_config=self.embedding_config)
|
|
53
|
+
log_event("embedder.batch_completed", {"batch_size": len(batch), "embeddings_generated": len(embeddings)})
|
|
54
|
+
return [(idx, e) for idx, e in zip(batch_indices, embeddings)]
|
|
55
|
+
except Exception as e:
|
|
56
|
+
# if it's a token limit error and we can split, do it
|
|
57
|
+
if self._is_token_limit_error(e) and len(batch) > 1:
|
|
58
|
+
logger.warning(f"Token limit exceeded for batch of size {len(batch)}, splitting in half and retrying")
|
|
59
|
+
log_event(
|
|
60
|
+
"embedder.batch_split_retry",
|
|
61
|
+
{
|
|
62
|
+
"original_batch_size": len(batch),
|
|
63
|
+
"error": str(e),
|
|
64
|
+
"split_size": len(batch) // 2,
|
|
65
|
+
},
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# split batch in half
|
|
69
|
+
mid = len(batch) // 2
|
|
70
|
+
batch1 = batch[:mid]
|
|
71
|
+
batch1_indices = batch_indices[:mid]
|
|
72
|
+
batch2 = batch[mid:]
|
|
73
|
+
batch2_indices = batch_indices[mid:]
|
|
74
|
+
|
|
75
|
+
# retry with smaller batches
|
|
76
|
+
result1 = await self._embed_batch(batch1, batch1_indices)
|
|
77
|
+
result2 = await self._embed_batch(batch2, batch2_indices)
|
|
78
|
+
|
|
79
|
+
return result1 + result2
|
|
80
|
+
else:
|
|
81
|
+
# re-raise for other errors or if batch size is already 1
|
|
82
|
+
raise
|
|
83
|
+
|
|
84
|
+
def _is_token_limit_error(self, error: Exception) -> bool:
|
|
85
|
+
"""Check if the error is due to token limit exceeded"""
|
|
86
|
+
# convert to string and check for token limit patterns
|
|
87
|
+
error_str = str(error).lower()
|
|
88
|
+
|
|
89
|
+
# TODO: This is quite brittle, works for now
|
|
90
|
+
# check for the specific patterns we see in token limit errors
|
|
91
|
+
is_token_limit = (
|
|
92
|
+
"max_tokens_per_request" in error_str
|
|
93
|
+
or ("requested" in error_str and "tokens" in error_str and "max" in error_str and "per request" in error_str)
|
|
94
|
+
or "token limit" in error_str
|
|
95
|
+
or ("bad request to openai" in error_str and "tokens" in error_str and "max" in error_str)
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
return is_token_limit
|
|
54
99
|
|
|
55
100
|
@trace_method
|
|
56
101
|
async def generate_embedded_passages(self, file_id: str, source_id: str, chunks: List[str], actor: User) -> List[Passage]:
|
|
@@ -100,7 +145,7 @@ class OpenAIEmbedder(BaseEmbedder):
|
|
|
100
145
|
|
|
101
146
|
log_event(
|
|
102
147
|
"embedder.concurrent_processing_started",
|
|
103
|
-
{"concurrent_tasks": len(tasks)
|
|
148
|
+
{"concurrent_tasks": len(tasks)},
|
|
104
149
|
)
|
|
105
150
|
results = await asyncio.gather(*tasks)
|
|
106
151
|
log_event("embedder.concurrent_processing_completed", {"batches_processed": len(results)})
|
|
@@ -29,6 +29,7 @@ class FileAgentManager:
|
|
|
29
29
|
agent_id: str,
|
|
30
30
|
file_id: str,
|
|
31
31
|
file_name: str,
|
|
32
|
+
source_id: str,
|
|
32
33
|
actor: PydanticUser,
|
|
33
34
|
is_open: bool = True,
|
|
34
35
|
visible_content: Optional[str] = None,
|
|
@@ -47,7 +48,12 @@ class FileAgentManager:
|
|
|
47
48
|
if is_open:
|
|
48
49
|
# Use the efficient LRU + open method
|
|
49
50
|
closed_files, was_already_open = await self.enforce_max_open_files_and_open(
|
|
50
|
-
agent_id=agent_id,
|
|
51
|
+
agent_id=agent_id,
|
|
52
|
+
file_id=file_id,
|
|
53
|
+
file_name=file_name,
|
|
54
|
+
source_id=source_id,
|
|
55
|
+
actor=actor,
|
|
56
|
+
visible_content=visible_content or "",
|
|
51
57
|
)
|
|
52
58
|
|
|
53
59
|
# Get the updated file agent to return
|
|
@@ -85,6 +91,7 @@ class FileAgentManager:
|
|
|
85
91
|
agent_id=agent_id,
|
|
86
92
|
file_id=file_id,
|
|
87
93
|
file_name=file_name,
|
|
94
|
+
source_id=source_id,
|
|
88
95
|
organization_id=actor.organization_id,
|
|
89
96
|
is_open=is_open,
|
|
90
97
|
visible_content=visible_content,
|
|
@@ -327,7 +334,7 @@ class FileAgentManager:
|
|
|
327
334
|
@enforce_types
|
|
328
335
|
@trace_method
|
|
329
336
|
async def enforce_max_open_files_and_open(
|
|
330
|
-
self, *, agent_id: str, file_id: str, file_name: str, actor: PydanticUser, visible_content: str
|
|
337
|
+
self, *, agent_id: str, file_id: str, file_name: str, source_id: str, actor: PydanticUser, visible_content: str
|
|
331
338
|
) -> tuple[List[str], bool]:
|
|
332
339
|
"""
|
|
333
340
|
Efficiently handle LRU eviction and file opening in a single transaction.
|
|
@@ -336,6 +343,7 @@ class FileAgentManager:
|
|
|
336
343
|
agent_id: ID of the agent
|
|
337
344
|
file_id: ID of the file to open
|
|
338
345
|
file_name: Name of the file to open
|
|
346
|
+
source_id: ID of the source (denormalized from files.source_id)
|
|
339
347
|
actor: User performing the action
|
|
340
348
|
visible_content: Content to set for the opened file
|
|
341
349
|
|
|
@@ -418,6 +426,7 @@ class FileAgentManager:
|
|
|
418
426
|
agent_id=agent_id,
|
|
419
427
|
file_id=file_id,
|
|
420
428
|
file_name=file_name,
|
|
429
|
+
source_id=source_id,
|
|
421
430
|
organization_id=actor.organization_id,
|
|
422
431
|
is_open=True,
|
|
423
432
|
visible_content=visible_content,
|
|
@@ -516,6 +525,7 @@ class FileAgentManager:
|
|
|
516
525
|
agent_id=agent_id,
|
|
517
526
|
file_id=meta.id,
|
|
518
527
|
file_name=meta.file_name,
|
|
528
|
+
source_id=meta.source_id,
|
|
519
529
|
organization_id=actor.organization_id,
|
|
520
530
|
is_open=is_now_open,
|
|
521
531
|
visible_content=vc,
|
letta/services/group_manager.py
CHANGED
|
@@ -19,8 +19,8 @@ from letta.utils import enforce_types
|
|
|
19
19
|
|
|
20
20
|
class GroupManager:
|
|
21
21
|
|
|
22
|
-
@trace_method
|
|
23
22
|
@enforce_types
|
|
23
|
+
@trace_method
|
|
24
24
|
def list_groups(
|
|
25
25
|
self,
|
|
26
26
|
actor: PydanticUser,
|
|
@@ -45,22 +45,22 @@ class GroupManager:
|
|
|
45
45
|
)
|
|
46
46
|
return [group.to_pydantic() for group in groups]
|
|
47
47
|
|
|
48
|
-
@trace_method
|
|
49
48
|
@enforce_types
|
|
49
|
+
@trace_method
|
|
50
50
|
def retrieve_group(self, group_id: str, actor: PydanticUser) -> PydanticGroup:
|
|
51
51
|
with db_registry.session() as session:
|
|
52
52
|
group = GroupModel.read(db_session=session, identifier=group_id, actor=actor)
|
|
53
53
|
return group.to_pydantic()
|
|
54
54
|
|
|
55
|
-
@trace_method
|
|
56
55
|
@enforce_types
|
|
56
|
+
@trace_method
|
|
57
57
|
async def retrieve_group_async(self, group_id: str, actor: PydanticUser) -> PydanticGroup:
|
|
58
58
|
async with db_registry.async_session() as session:
|
|
59
59
|
group = await GroupModel.read_async(db_session=session, identifier=group_id, actor=actor)
|
|
60
60
|
return group.to_pydantic()
|
|
61
61
|
|
|
62
|
-
@trace_method
|
|
63
62
|
@enforce_types
|
|
63
|
+
@trace_method
|
|
64
64
|
def create_group(self, group: GroupCreate, actor: PydanticUser) -> PydanticGroup:
|
|
65
65
|
with db_registry.session() as session:
|
|
66
66
|
new_group = GroupModel()
|
|
@@ -150,8 +150,8 @@ class GroupManager:
|
|
|
150
150
|
await new_group.create_async(session, actor=actor)
|
|
151
151
|
return new_group.to_pydantic()
|
|
152
152
|
|
|
153
|
-
@trace_method
|
|
154
153
|
@enforce_types
|
|
154
|
+
@trace_method
|
|
155
155
|
async def modify_group_async(self, group_id: str, group_update: GroupUpdate, actor: PydanticUser) -> PydanticGroup:
|
|
156
156
|
async with db_registry.async_session() as session:
|
|
157
157
|
group = await GroupModel.read_async(db_session=session, identifier=group_id, actor=actor)
|
|
@@ -213,16 +213,16 @@ class GroupManager:
|
|
|
213
213
|
await group.update_async(session, actor=actor)
|
|
214
214
|
return group.to_pydantic()
|
|
215
215
|
|
|
216
|
-
@trace_method
|
|
217
216
|
@enforce_types
|
|
217
|
+
@trace_method
|
|
218
218
|
def delete_group(self, group_id: str, actor: PydanticUser) -> None:
|
|
219
219
|
with db_registry.session() as session:
|
|
220
220
|
# Retrieve the agent
|
|
221
221
|
group = GroupModel.read(db_session=session, identifier=group_id, actor=actor)
|
|
222
222
|
group.hard_delete(session)
|
|
223
223
|
|
|
224
|
-
@trace_method
|
|
225
224
|
@enforce_types
|
|
225
|
+
@trace_method
|
|
226
226
|
def list_group_messages(
|
|
227
227
|
self,
|
|
228
228
|
actor: PydanticUser,
|
|
@@ -258,8 +258,8 @@ class GroupManager:
|
|
|
258
258
|
|
|
259
259
|
return messages
|
|
260
260
|
|
|
261
|
-
@trace_method
|
|
262
261
|
@enforce_types
|
|
262
|
+
@trace_method
|
|
263
263
|
def reset_messages(self, group_id: str, actor: PydanticUser) -> None:
|
|
264
264
|
with db_registry.session() as session:
|
|
265
265
|
# Ensure group is loadable by user
|
|
@@ -272,8 +272,8 @@ class GroupManager:
|
|
|
272
272
|
|
|
273
273
|
session.commit()
|
|
274
274
|
|
|
275
|
-
@trace_method
|
|
276
275
|
@enforce_types
|
|
276
|
+
@trace_method
|
|
277
277
|
def bump_turns_counter(self, group_id: str, actor: PydanticUser) -> int:
|
|
278
278
|
with db_registry.session() as session:
|
|
279
279
|
# Ensure group is loadable by user
|
|
@@ -284,8 +284,8 @@ class GroupManager:
|
|
|
284
284
|
group.update(session, actor=actor)
|
|
285
285
|
return group.turns_counter
|
|
286
286
|
|
|
287
|
-
@trace_method
|
|
288
287
|
@enforce_types
|
|
288
|
+
@trace_method
|
|
289
289
|
async def bump_turns_counter_async(self, group_id: str, actor: PydanticUser) -> int:
|
|
290
290
|
async with db_registry.async_session() as session:
|
|
291
291
|
# Ensure group is loadable by user
|
|
@@ -309,8 +309,8 @@ class GroupManager:
|
|
|
309
309
|
|
|
310
310
|
return prev_last_processed_message_id
|
|
311
311
|
|
|
312
|
-
@trace_method
|
|
313
312
|
@enforce_types
|
|
313
|
+
@trace_method
|
|
314
314
|
async def get_last_processed_message_id_and_update_async(
|
|
315
315
|
self, group_id: str, last_processed_message_id: str, actor: PydanticUser
|
|
316
316
|
) -> str:
|