letta-nightly 0.8.3.dev20250612104349__py3-none-any.whl → 0.8.4.dev20250613104250__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agent.py +11 -1
- letta/agents/base_agent.py +11 -4
- letta/agents/ephemeral_summary_agent.py +3 -2
- letta/agents/letta_agent.py +109 -78
- letta/agents/letta_agent_batch.py +4 -3
- letta/agents/voice_agent.py +3 -3
- letta/agents/voice_sleeptime_agent.py +3 -2
- letta/client/client.py +6 -3
- letta/constants.py +6 -0
- letta/data_sources/connectors.py +3 -5
- letta/functions/async_composio_toolset.py +4 -1
- letta/functions/function_sets/files.py +4 -3
- letta/functions/schema_generator.py +5 -2
- letta/groups/sleeptime_multi_agent_v2.py +4 -3
- letta/helpers/converters.py +7 -1
- letta/helpers/message_helper.py +31 -11
- letta/helpers/tool_rule_solver.py +69 -4
- letta/interfaces/anthropic_streaming_interface.py +8 -1
- letta/interfaces/openai_streaming_interface.py +4 -1
- letta/llm_api/anthropic_client.py +4 -4
- letta/llm_api/openai_client.py +56 -11
- letta/local_llm/utils.py +3 -20
- letta/orm/sqlalchemy_base.py +7 -1
- letta/otel/metric_registry.py +26 -0
- letta/otel/metrics.py +78 -14
- letta/schemas/letta_message_content.py +64 -3
- letta/schemas/letta_request.py +5 -1
- letta/schemas/message.py +61 -14
- letta/schemas/openai/chat_completion_request.py +1 -1
- letta/schemas/providers.py +41 -14
- letta/schemas/tool_rule.py +67 -0
- letta/schemas/user.py +2 -2
- letta/server/rest_api/routers/v1/agents.py +22 -12
- letta/server/rest_api/routers/v1/sources.py +13 -25
- letta/server/server.py +10 -5
- letta/services/agent_manager.py +5 -1
- letta/services/file_manager.py +219 -0
- letta/services/file_processor/chunker/line_chunker.py +119 -14
- letta/services/file_processor/file_processor.py +8 -8
- letta/services/file_processor/file_types.py +303 -0
- letta/services/file_processor/parser/mistral_parser.py +2 -11
- letta/services/helpers/agent_manager_helper.py +6 -0
- letta/services/message_manager.py +32 -0
- letta/services/organization_manager.py +4 -6
- letta/services/passage_manager.py +1 -0
- letta/services/source_manager.py +0 -208
- letta/services/tool_executor/composio_tool_executor.py +5 -1
- letta/services/tool_executor/files_tool_executor.py +291 -15
- letta/services/user_manager.py +8 -8
- letta/system.py +3 -1
- letta/utils.py +7 -13
- {letta_nightly-0.8.3.dev20250612104349.dist-info → letta_nightly-0.8.4.dev20250613104250.dist-info}/METADATA +2 -2
- {letta_nightly-0.8.3.dev20250612104349.dist-info → letta_nightly-0.8.4.dev20250613104250.dist-info}/RECORD +57 -55
- {letta_nightly-0.8.3.dev20250612104349.dist-info → letta_nightly-0.8.4.dev20250613104250.dist-info}/LICENSE +0 -0
- {letta_nightly-0.8.3.dev20250612104349.dist-info → letta_nightly-0.8.4.dev20250613104250.dist-info}/WHEEL +0 -0
- {letta_nightly-0.8.3.dev20250612104349.dist-info → letta_nightly-0.8.4.dev20250613104250.dist-info}/entry_points.txt +0 -0
letta/services/source_manager.py
CHANGED
@@ -1,21 +1,10 @@
|
|
1
1
|
import asyncio
|
2
|
-
from datetime import datetime
|
3
2
|
from typing import List, Optional
|
4
3
|
|
5
|
-
from sqlalchemy import select, update
|
6
|
-
from sqlalchemy.dialects.postgresql import insert as pg_insert
|
7
|
-
from sqlalchemy.exc import IntegrityError
|
8
|
-
from sqlalchemy.orm import selectinload
|
9
|
-
|
10
4
|
from letta.orm.errors import NoResultFound
|
11
|
-
from letta.orm.file import FileContent as FileContentModel
|
12
|
-
from letta.orm.file import FileMetadata as FileMetadataModel
|
13
5
|
from letta.orm.source import Source as SourceModel
|
14
|
-
from letta.orm.sqlalchemy_base import AccessType
|
15
6
|
from letta.otel.tracing import trace_method
|
16
7
|
from letta.schemas.agent import AgentState as PydanticAgentState
|
17
|
-
from letta.schemas.enums import FileProcessingStatus
|
18
|
-
from letta.schemas.file import FileMetadata as PydanticFileMetadata
|
19
8
|
from letta.schemas.source import Source as PydanticSource
|
20
9
|
from letta.schemas.source import SourceUpdate
|
21
10
|
from letta.schemas.user import User as PydanticUser
|
@@ -148,200 +137,3 @@ class SourceManager:
|
|
148
137
|
return None
|
149
138
|
else:
|
150
139
|
return sources[0].to_pydantic()
|
151
|
-
|
152
|
-
@enforce_types
|
153
|
-
@trace_method
|
154
|
-
async def create_file(
|
155
|
-
self,
|
156
|
-
file_metadata: PydanticFileMetadata,
|
157
|
-
actor: PydanticUser,
|
158
|
-
*,
|
159
|
-
text: Optional[str] = None,
|
160
|
-
) -> PydanticFileMetadata:
|
161
|
-
|
162
|
-
# short-circuit if it already exists
|
163
|
-
existing = await self.get_file_by_id(file_metadata.id, actor=actor)
|
164
|
-
if existing:
|
165
|
-
return existing
|
166
|
-
|
167
|
-
async with db_registry.async_session() as session:
|
168
|
-
try:
|
169
|
-
file_metadata.organization_id = actor.organization_id
|
170
|
-
file_orm = FileMetadataModel(**file_metadata.model_dump(to_orm=True, exclude_none=True))
|
171
|
-
await file_orm.create_async(session, actor=actor, no_commit=True)
|
172
|
-
|
173
|
-
if text is not None:
|
174
|
-
content_orm = FileContentModel(file_id=file_orm.id, text=text)
|
175
|
-
await content_orm.create_async(session, actor=actor, no_commit=True)
|
176
|
-
|
177
|
-
await session.commit()
|
178
|
-
await session.refresh(file_orm)
|
179
|
-
return await file_orm.to_pydantic_async()
|
180
|
-
|
181
|
-
except IntegrityError:
|
182
|
-
await session.rollback()
|
183
|
-
return await self.get_file_by_id(file_metadata.id, actor=actor)
|
184
|
-
|
185
|
-
# TODO: We make actor optional for now, but should most likely be enforced due to security reasons
|
186
|
-
@enforce_types
|
187
|
-
@trace_method
|
188
|
-
async def get_file_by_id(
|
189
|
-
self,
|
190
|
-
file_id: str,
|
191
|
-
actor: Optional[PydanticUser] = None,
|
192
|
-
*,
|
193
|
-
include_content: bool = False,
|
194
|
-
) -> Optional[PydanticFileMetadata]:
|
195
|
-
"""Retrieve a file by its ID.
|
196
|
-
|
197
|
-
If `include_content=True`, the FileContent relationship is eagerly
|
198
|
-
loaded so `to_pydantic(include_content=True)` never triggers a
|
199
|
-
lazy SELECT (avoids MissingGreenlet).
|
200
|
-
"""
|
201
|
-
async with db_registry.async_session() as session:
|
202
|
-
try:
|
203
|
-
if include_content:
|
204
|
-
# explicit eager load
|
205
|
-
query = (
|
206
|
-
select(FileMetadataModel).where(FileMetadataModel.id == file_id).options(selectinload(FileMetadataModel.content))
|
207
|
-
)
|
208
|
-
# apply org-scoping if actor provided
|
209
|
-
if actor:
|
210
|
-
query = FileMetadataModel.apply_access_predicate(
|
211
|
-
query,
|
212
|
-
actor,
|
213
|
-
access=["read"],
|
214
|
-
access_type=AccessType.ORGANIZATION,
|
215
|
-
)
|
216
|
-
|
217
|
-
result = await session.execute(query)
|
218
|
-
file_orm = result.scalar_one()
|
219
|
-
else:
|
220
|
-
# fast path (metadata only)
|
221
|
-
file_orm = await FileMetadataModel.read_async(
|
222
|
-
db_session=session,
|
223
|
-
identifier=file_id,
|
224
|
-
actor=actor,
|
225
|
-
)
|
226
|
-
|
227
|
-
return await file_orm.to_pydantic_async(include_content=include_content)
|
228
|
-
|
229
|
-
except NoResultFound:
|
230
|
-
return None
|
231
|
-
|
232
|
-
@enforce_types
|
233
|
-
@trace_method
|
234
|
-
async def update_file_status(
|
235
|
-
self,
|
236
|
-
*,
|
237
|
-
file_id: str,
|
238
|
-
actor: PydanticUser,
|
239
|
-
processing_status: Optional[FileProcessingStatus] = None,
|
240
|
-
error_message: Optional[str] = None,
|
241
|
-
) -> PydanticFileMetadata:
|
242
|
-
"""
|
243
|
-
Update processing_status and/or error_message on a FileMetadata row.
|
244
|
-
|
245
|
-
* 1st round-trip → UPDATE
|
246
|
-
* 2nd round-trip → SELECT fresh row (same as read_async)
|
247
|
-
"""
|
248
|
-
|
249
|
-
if processing_status is None and error_message is None:
|
250
|
-
raise ValueError("Nothing to update")
|
251
|
-
|
252
|
-
values: dict[str, object] = {"updated_at": datetime.utcnow()}
|
253
|
-
if processing_status is not None:
|
254
|
-
values["processing_status"] = processing_status
|
255
|
-
if error_message is not None:
|
256
|
-
values["error_message"] = error_message
|
257
|
-
|
258
|
-
async with db_registry.async_session() as session:
|
259
|
-
# Fast in-place update – no ORM hydration
|
260
|
-
stmt = (
|
261
|
-
update(FileMetadataModel)
|
262
|
-
.where(
|
263
|
-
FileMetadataModel.id == file_id,
|
264
|
-
FileMetadataModel.organization_id == actor.organization_id,
|
265
|
-
)
|
266
|
-
.values(**values)
|
267
|
-
)
|
268
|
-
await session.execute(stmt)
|
269
|
-
await session.commit()
|
270
|
-
|
271
|
-
# Reload via normal accessor so we return a fully-attached object
|
272
|
-
file_orm = await FileMetadataModel.read_async(
|
273
|
-
db_session=session,
|
274
|
-
identifier=file_id,
|
275
|
-
actor=actor,
|
276
|
-
)
|
277
|
-
return await file_orm.to_pydantic_async()
|
278
|
-
|
279
|
-
@enforce_types
|
280
|
-
@trace_method
|
281
|
-
async def upsert_file_content(
|
282
|
-
self,
|
283
|
-
*,
|
284
|
-
file_id: str,
|
285
|
-
text: str,
|
286
|
-
actor: PydanticUser,
|
287
|
-
) -> PydanticFileMetadata:
|
288
|
-
async with db_registry.async_session() as session:
|
289
|
-
await FileMetadataModel.read_async(session, file_id, actor)
|
290
|
-
|
291
|
-
dialect_name = session.bind.dialect.name
|
292
|
-
|
293
|
-
if dialect_name == "postgresql":
|
294
|
-
stmt = (
|
295
|
-
pg_insert(FileContentModel)
|
296
|
-
.values(file_id=file_id, text=text)
|
297
|
-
.on_conflict_do_update(
|
298
|
-
index_elements=[FileContentModel.file_id],
|
299
|
-
set_={"text": text},
|
300
|
-
)
|
301
|
-
)
|
302
|
-
await session.execute(stmt)
|
303
|
-
else:
|
304
|
-
# Emulate upsert for SQLite and others
|
305
|
-
stmt = select(FileContentModel).where(FileContentModel.file_id == file_id)
|
306
|
-
result = await session.execute(stmt)
|
307
|
-
existing = result.scalar_one_or_none()
|
308
|
-
|
309
|
-
if existing:
|
310
|
-
await session.execute(update(FileContentModel).where(FileContentModel.file_id == file_id).values(text=text))
|
311
|
-
else:
|
312
|
-
session.add(FileContentModel(file_id=file_id, text=text))
|
313
|
-
|
314
|
-
await session.commit()
|
315
|
-
|
316
|
-
# Reload with content
|
317
|
-
query = select(FileMetadataModel).options(selectinload(FileMetadataModel.content)).where(FileMetadataModel.id == file_id)
|
318
|
-
result = await session.execute(query)
|
319
|
-
return await result.scalar_one().to_pydantic_async(include_content=True)
|
320
|
-
|
321
|
-
@enforce_types
|
322
|
-
@trace_method
|
323
|
-
async def list_files(
|
324
|
-
self, source_id: str, actor: PydanticUser, after: Optional[str] = None, limit: Optional[int] = 50, include_content: bool = False
|
325
|
-
) -> List[PydanticFileMetadata]:
|
326
|
-
"""List all files with optional pagination."""
|
327
|
-
async with db_registry.async_session() as session:
|
328
|
-
options = [selectinload(FileMetadataModel.content)] if include_content else None
|
329
|
-
|
330
|
-
files = await FileMetadataModel.list_async(
|
331
|
-
db_session=session,
|
332
|
-
after=after,
|
333
|
-
limit=limit,
|
334
|
-
organization_id=actor.organization_id,
|
335
|
-
source_id=source_id,
|
336
|
-
query_options=options,
|
337
|
-
)
|
338
|
-
return [await file.to_pydantic_async(include_content=include_content) for file in files]
|
339
|
-
|
340
|
-
@enforce_types
|
341
|
-
@trace_method
|
342
|
-
async def delete_file(self, file_id: str, actor: PydanticUser) -> PydanticFileMetadata:
|
343
|
-
"""Delete a file by its ID."""
|
344
|
-
async with db_registry.async_session() as session:
|
345
|
-
file = await FileMetadataModel.read_async(db_session=session, identifier=file_id)
|
346
|
-
await file.hard_delete_async(db_session=session, actor=actor)
|
347
|
-
return await file.to_pydantic_async()
|
@@ -26,7 +26,11 @@ class ExternalComposioToolExecutor(ToolExecutor):
|
|
26
26
|
sandbox_config: Optional[SandboxConfig] = None,
|
27
27
|
sandbox_env_vars: Optional[Dict[str, Any]] = None,
|
28
28
|
) -> ToolExecutionResult:
|
29
|
-
|
29
|
+
if agent_state is None:
|
30
|
+
return ToolExecutionResult(
|
31
|
+
status="error",
|
32
|
+
func_return="Agent state is required for external Composio tools. Please contact Letta support if you see this error.",
|
33
|
+
)
|
30
34
|
action_name = generate_composio_action_from_func_name(tool.name)
|
31
35
|
|
32
36
|
# Get entity ID from the agent_state
|
@@ -1,12 +1,17 @@
|
|
1
|
+
import asyncio
|
2
|
+
import re
|
1
3
|
from typing import Any, Dict, List, Optional, Tuple
|
2
4
|
|
5
|
+
from letta.log import get_logger
|
3
6
|
from letta.schemas.agent import AgentState
|
7
|
+
from letta.schemas.file import FileMetadata
|
4
8
|
from letta.schemas.sandbox_config import SandboxConfig
|
5
9
|
from letta.schemas.tool import Tool
|
6
10
|
from letta.schemas.tool_execution_result import ToolExecutionResult
|
7
11
|
from letta.schemas.user import User
|
8
12
|
from letta.services.agent_manager import AgentManager
|
9
13
|
from letta.services.block_manager import BlockManager
|
14
|
+
from letta.services.file_manager import FileManager
|
10
15
|
from letta.services.file_processor.chunker.line_chunker import LineChunker
|
11
16
|
from letta.services.files_agents_manager import FileAgentManager
|
12
17
|
from letta.services.message_manager import MessageManager
|
@@ -19,6 +24,15 @@ from letta.utils import get_friendly_error_msg
|
|
19
24
|
class LettaFileToolExecutor(ToolExecutor):
|
20
25
|
"""Executor for Letta file tools with direct implementation of functions."""
|
21
26
|
|
27
|
+
# Production safety constants
|
28
|
+
MAX_FILE_SIZE_BYTES = 50 * 1024 * 1024 # 50MB limit per file
|
29
|
+
MAX_TOTAL_CONTENT_SIZE = 200 * 1024 * 1024 # 200MB total across all files
|
30
|
+
MAX_REGEX_COMPLEXITY = 1000 # Prevent catastrophic backtracking
|
31
|
+
MAX_MATCHES_PER_FILE = 20 # Limit matches per file
|
32
|
+
MAX_TOTAL_MATCHES = 50 # Global match limit
|
33
|
+
GREP_TIMEOUT_SECONDS = 30 # Max time for grep operation
|
34
|
+
MAX_CONTEXT_LINES = 1 # Lines of context around matches
|
35
|
+
|
22
36
|
def __init__(
|
23
37
|
self,
|
24
38
|
message_manager: MessageManager,
|
@@ -37,7 +51,9 @@ class LettaFileToolExecutor(ToolExecutor):
|
|
37
51
|
|
38
52
|
# TODO: This should be passed in to for testing purposes
|
39
53
|
self.files_agents_manager = FileAgentManager()
|
54
|
+
self.file_manager = FileManager()
|
40
55
|
self.source_manager = SourceManager()
|
56
|
+
self.logger = get_logger(__name__)
|
41
57
|
|
42
58
|
async def execute(
|
43
59
|
self,
|
@@ -99,12 +115,12 @@ class LettaFileToolExecutor(ToolExecutor):
|
|
99
115
|
)
|
100
116
|
|
101
117
|
file_id = file_agent.file_id
|
102
|
-
file = await self.
|
118
|
+
file = await self.file_manager.get_file_by_id(file_id=file_id, actor=self.actor, include_content=True)
|
103
119
|
|
104
120
|
# TODO: Inefficient, maybe we can pre-compute this
|
105
121
|
# TODO: This is also not the best way to split things - would be cool to have "content aware" splitting
|
106
122
|
# TODO: Split code differently from large text blurbs
|
107
|
-
content_lines = LineChunker().chunk_text(text=file.content, start=start, end=end)
|
123
|
+
content_lines = LineChunker().chunk_text(text=file.content, file_metadata=file, start=start, end=end)
|
108
124
|
visible_content = "\n".join(content_lines)
|
109
125
|
|
110
126
|
await self.files_agents_manager.update_file_agent_by_id(
|
@@ -120,19 +136,279 @@ class LettaFileToolExecutor(ToolExecutor):
|
|
120
136
|
)
|
121
137
|
return "Success"
|
122
138
|
|
123
|
-
|
124
|
-
"""
|
125
|
-
|
139
|
+
def _validate_regex_pattern(self, pattern: str) -> None:
|
140
|
+
"""Validate regex pattern to prevent catastrophic backtracking."""
|
141
|
+
if len(pattern) > self.MAX_REGEX_COMPLEXITY:
|
142
|
+
raise ValueError(f"Pattern too complex: {len(pattern)} chars > {self.MAX_REGEX_COMPLEXITY} limit")
|
143
|
+
|
144
|
+
# Test compile the pattern to catch syntax errors early
|
145
|
+
try:
|
146
|
+
re.compile(pattern, re.IGNORECASE | re.MULTILINE)
|
147
|
+
except re.error as e:
|
148
|
+
raise ValueError(f"Invalid regex pattern: {e}")
|
149
|
+
|
150
|
+
def _get_context_lines(self, text: str, file_metadata: FileMetadata, match_line_idx: int, total_lines: int) -> List[str]:
|
151
|
+
"""Get context lines around a match using LineChunker."""
|
152
|
+
start_idx = max(0, match_line_idx - self.MAX_CONTEXT_LINES)
|
153
|
+
end_idx = min(total_lines, match_line_idx + self.MAX_CONTEXT_LINES + 1)
|
154
|
+
|
155
|
+
# Use LineChunker to get formatted lines with numbers
|
156
|
+
chunker = LineChunker()
|
157
|
+
context_lines = chunker.chunk_text(text, file_metadata=file_metadata, start=start_idx, end=end_idx, add_metadata=False)
|
158
|
+
|
159
|
+
# Add match indicator
|
160
|
+
formatted_lines = []
|
161
|
+
for line in context_lines:
|
162
|
+
if line and ":" in line:
|
163
|
+
line_num_str = line.split(":")[0].strip()
|
164
|
+
try:
|
165
|
+
line_num = int(line_num_str)
|
166
|
+
prefix = ">" if line_num == match_line_idx + 1 else " "
|
167
|
+
formatted_lines.append(f"{prefix} {line}")
|
168
|
+
except ValueError:
|
169
|
+
formatted_lines.append(f" {line}")
|
170
|
+
else:
|
171
|
+
formatted_lines.append(f" {line}")
|
172
|
+
|
173
|
+
return formatted_lines
|
174
|
+
|
175
|
+
async def grep(self, agent_state: AgentState, pattern: str, include: Optional[str] = None) -> str:
|
176
|
+
"""
|
177
|
+
Search for pattern in all attached files and return matches with context.
|
178
|
+
|
179
|
+
Args:
|
180
|
+
agent_state: Current agent state
|
181
|
+
pattern: Regular expression pattern to search for
|
182
|
+
include: Optional pattern to filter filenames to include in the search
|
183
|
+
|
184
|
+
Returns:
|
185
|
+
Formatted string with search results, file names, line numbers, and context
|
186
|
+
"""
|
187
|
+
if not pattern or not pattern.strip():
|
188
|
+
raise ValueError("Empty search pattern provided")
|
189
|
+
|
190
|
+
pattern = pattern.strip()
|
191
|
+
self._validate_regex_pattern(pattern)
|
192
|
+
|
193
|
+
# Validate include pattern if provided
|
194
|
+
include_regex = None
|
195
|
+
if include and include.strip():
|
196
|
+
include = include.strip()
|
197
|
+
# Convert glob pattern to regex if it looks like a glob pattern
|
198
|
+
if "*" in include and not any(c in include for c in ["^", "$", "(", ")", "[", "]", "{", "}", "\\", "+"]):
|
199
|
+
# Simple glob to regex conversion
|
200
|
+
include_pattern = include.replace(".", r"\.").replace("*", ".*").replace("?", ".")
|
201
|
+
if not include_pattern.endswith("$"):
|
202
|
+
include_pattern += "$"
|
203
|
+
else:
|
204
|
+
include_pattern = include
|
205
|
+
|
206
|
+
self._validate_regex_pattern(include_pattern)
|
207
|
+
include_regex = re.compile(include_pattern, re.IGNORECASE)
|
208
|
+
|
209
|
+
# Get all attached files for this agent
|
210
|
+
file_agents = await self.files_agents_manager.list_files_for_agent(agent_id=agent_state.id, actor=self.actor)
|
211
|
+
|
212
|
+
if not file_agents:
|
213
|
+
return "No files are currently attached to search"
|
214
|
+
|
215
|
+
# Filter files by filename pattern if include is specified
|
216
|
+
if include_regex:
|
217
|
+
original_count = len(file_agents)
|
218
|
+
file_agents = [fa for fa in file_agents if include_regex.search(fa.file_name)]
|
219
|
+
if not file_agents:
|
220
|
+
return f"No files match the filename pattern '{include}' (filtered {original_count} files)"
|
221
|
+
|
222
|
+
# Compile regex pattern with appropriate flags
|
223
|
+
regex_flags = re.MULTILINE
|
224
|
+
regex_flags |= re.IGNORECASE
|
225
|
+
|
226
|
+
pattern_regex = re.compile(pattern, regex_flags)
|
227
|
+
|
228
|
+
results = []
|
229
|
+
total_matches = 0
|
230
|
+
total_content_size = 0
|
231
|
+
files_processed = 0
|
232
|
+
files_skipped = 0
|
233
|
+
|
234
|
+
# Use asyncio timeout to prevent hanging
|
235
|
+
async def _search_files():
|
236
|
+
nonlocal results, total_matches, total_content_size, files_processed, files_skipped
|
237
|
+
|
238
|
+
for file_agent in file_agents:
|
239
|
+
# Load file content
|
240
|
+
file = await self.file_manager.get_file_by_id(file_id=file_agent.file_id, actor=self.actor, include_content=True)
|
241
|
+
|
242
|
+
if not file or not file.content:
|
243
|
+
files_skipped += 1
|
244
|
+
self.logger.warning(f"Grep: Skipping file {file_agent.file_name} - no content available")
|
245
|
+
continue
|
246
|
+
|
247
|
+
# Check individual file size
|
248
|
+
content_size = len(file.content.encode("utf-8"))
|
249
|
+
if content_size > self.MAX_FILE_SIZE_BYTES:
|
250
|
+
files_skipped += 1
|
251
|
+
self.logger.warning(
|
252
|
+
f"Grep: Skipping file {file.file_name} - too large ({content_size:,} bytes > {self.MAX_FILE_SIZE_BYTES:,} limit)"
|
253
|
+
)
|
254
|
+
results.append(f"[SKIPPED] {file.file_name}: File too large ({content_size:,} bytes)")
|
255
|
+
continue
|
256
|
+
|
257
|
+
# Check total content size across all files
|
258
|
+
total_content_size += content_size
|
259
|
+
if total_content_size > self.MAX_TOTAL_CONTENT_SIZE:
|
260
|
+
files_skipped += 1
|
261
|
+
self.logger.warning(
|
262
|
+
f"Grep: Skipping file {file.file_name} - total content size limit exceeded ({total_content_size:,} bytes > {self.MAX_TOTAL_CONTENT_SIZE:,} limit)"
|
263
|
+
)
|
264
|
+
results.append(f"[SKIPPED] {file.file_name}: Total content size limit exceeded")
|
265
|
+
break
|
266
|
+
|
267
|
+
files_processed += 1
|
268
|
+
file_matches = 0
|
269
|
+
|
270
|
+
# Use LineChunker to get all lines with proper formatting
|
271
|
+
chunker = LineChunker()
|
272
|
+
formatted_lines = chunker.chunk_text(file.content, file_metadata=file)
|
273
|
+
|
274
|
+
# Remove metadata header
|
275
|
+
if formatted_lines and formatted_lines[0].startswith("[Viewing"):
|
276
|
+
formatted_lines = formatted_lines[1:]
|
277
|
+
|
278
|
+
# Search for matches in formatted lines
|
279
|
+
for formatted_line in formatted_lines:
|
280
|
+
if total_matches >= self.MAX_TOTAL_MATCHES:
|
281
|
+
results.append(f"[TRUNCATED] Maximum total matches ({self.MAX_TOTAL_MATCHES}) reached")
|
282
|
+
return
|
283
|
+
|
284
|
+
if file_matches >= self.MAX_MATCHES_PER_FILE:
|
285
|
+
results.append(f"[TRUNCATED] {file.file_name}: Maximum matches per file ({self.MAX_MATCHES_PER_FILE}) reached")
|
286
|
+
break
|
287
|
+
|
288
|
+
# Extract line number and content from formatted line
|
289
|
+
if ":" in formatted_line:
|
290
|
+
try:
|
291
|
+
line_parts = formatted_line.split(":", 1)
|
292
|
+
line_num = int(line_parts[0].strip())
|
293
|
+
line_content = line_parts[1].strip() if len(line_parts) > 1 else ""
|
294
|
+
except (ValueError, IndexError):
|
295
|
+
continue
|
126
296
|
|
127
|
-
|
128
|
-
|
129
|
-
|
297
|
+
if pattern_regex.search(line_content):
|
298
|
+
# Get context around the match (convert back to 0-based indexing)
|
299
|
+
context_lines = self._get_context_lines(file.content, file, line_num - 1, len(file.content.splitlines()))
|
300
|
+
|
301
|
+
# Format the match result
|
302
|
+
match_header = f"\n=== {file.file_name}:{line_num} ==="
|
303
|
+
match_content = "\n".join(context_lines)
|
304
|
+
results.append(f"{match_header}\n{match_content}")
|
305
|
+
|
306
|
+
file_matches += 1
|
307
|
+
total_matches += 1
|
308
|
+
|
309
|
+
# Break if global limits reached
|
310
|
+
if total_matches >= self.MAX_TOTAL_MATCHES:
|
311
|
+
break
|
312
|
+
|
313
|
+
# Execute with timeout
|
314
|
+
await asyncio.wait_for(_search_files(), timeout=self.GREP_TIMEOUT_SECONDS)
|
315
|
+
|
316
|
+
# Format final results
|
317
|
+
if not results or total_matches == 0:
|
318
|
+
summary = f"No matches found for pattern: '{pattern}'"
|
319
|
+
if include:
|
320
|
+
summary += f" in files matching '{include}'"
|
321
|
+
if files_skipped > 0:
|
322
|
+
summary += f" (searched {files_processed} files, skipped {files_skipped})"
|
323
|
+
return summary
|
324
|
+
|
325
|
+
# Add summary header
|
326
|
+
summary_parts = [f"Found {total_matches} matches"]
|
327
|
+
if files_processed > 0:
|
328
|
+
summary_parts.append(f"in {files_processed} files")
|
329
|
+
if files_skipped > 0:
|
330
|
+
summary_parts.append(f"({files_skipped} files skipped)")
|
331
|
+
|
332
|
+
summary = " ".join(summary_parts) + f" for pattern: '{pattern}'"
|
333
|
+
if include:
|
334
|
+
summary += f" in files matching '{include}'"
|
335
|
+
|
336
|
+
# Combine all results
|
337
|
+
formatted_results = [summary, "=" * len(summary)] + results
|
338
|
+
|
339
|
+
return "\n".join(formatted_results)
|
340
|
+
|
341
|
+
async def search_files(self, agent_state: AgentState, query: str, limit: int = 10) -> str:
|
342
|
+
"""
|
343
|
+
Search for text within attached files using semantic search and return passages with their source filenames.
|
344
|
+
|
345
|
+
Args:
|
346
|
+
agent_state: Current agent state
|
347
|
+
query: Search query for semantic matching
|
348
|
+
limit: Maximum number of results to return (default: 10)
|
349
|
+
|
350
|
+
Returns:
|
351
|
+
Formatted string with search results in IDE/terminal style
|
352
|
+
"""
|
353
|
+
if not query or not query.strip():
|
354
|
+
raise ValueError("Empty search query provided")
|
355
|
+
|
356
|
+
query = query.strip()
|
357
|
+
|
358
|
+
# Apply reasonable limit
|
359
|
+
limit = min(limit, self.MAX_TOTAL_MATCHES)
|
360
|
+
|
361
|
+
self.logger.info(f"Semantic search started for agent {agent_state.id} with query '{query}' (limit: {limit})")
|
362
|
+
|
363
|
+
# Get semantic search results
|
130
364
|
passages = await self.agent_manager.list_source_passages_async(actor=self.actor, agent_id=agent_state.id, query_text=query)
|
131
|
-
|
365
|
+
|
366
|
+
if not passages:
|
367
|
+
return f"No semantic matches found for query: '{query}'"
|
368
|
+
|
369
|
+
# Limit results
|
370
|
+
passages = passages[:limit]
|
371
|
+
|
372
|
+
# Group passages by file for better organization
|
373
|
+
files_with_passages = {}
|
132
374
|
for p in passages:
|
133
|
-
if p.file_name
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
375
|
+
file_name = p.file_name if p.file_name else "Unknown File"
|
376
|
+
if file_name not in files_with_passages:
|
377
|
+
files_with_passages[file_name] = []
|
378
|
+
files_with_passages[file_name].append(p)
|
379
|
+
|
380
|
+
results = []
|
381
|
+
total_passages = 0
|
382
|
+
|
383
|
+
for file_name, file_passages in files_with_passages.items():
|
384
|
+
for passage in file_passages:
|
385
|
+
total_passages += 1
|
386
|
+
|
387
|
+
# Format each passage with terminal-style header
|
388
|
+
passage_header = f"\n=== {file_name} (passage #{total_passages}) ==="
|
389
|
+
|
390
|
+
# Format the passage text with some basic formatting
|
391
|
+
passage_text = passage.text.strip()
|
392
|
+
|
393
|
+
# Format the passage text without line numbers
|
394
|
+
lines = passage_text.splitlines()
|
395
|
+
formatted_lines = []
|
396
|
+
for line in lines[:20]: # Limit to first 20 lines per passage
|
397
|
+
formatted_lines.append(f" {line}")
|
398
|
+
|
399
|
+
if len(lines) > 20:
|
400
|
+
formatted_lines.append(f" ... [truncated {len(lines) - 20} more lines]")
|
401
|
+
|
402
|
+
passage_content = "\n".join(formatted_lines)
|
403
|
+
results.append(f"{passage_header}\n{passage_content}")
|
404
|
+
|
405
|
+
# Create summary header
|
406
|
+
file_count = len(files_with_passages)
|
407
|
+
summary = f"Found {total_passages} semantic matches in {file_count} file{'s' if file_count != 1 else ''} for query: '{query}'"
|
408
|
+
|
409
|
+
# Combine all results
|
410
|
+
formatted_results = [summary, "=" * len(summary)] + results
|
411
|
+
|
412
|
+
self.logger.info(f"Semantic search completed: {total_passages} matches across {file_count} files")
|
413
|
+
|
414
|
+
return "\n".join(formatted_results)
|
letta/services/user_manager.py
CHANGED
@@ -2,6 +2,7 @@ from typing import List, Optional
|
|
2
2
|
|
3
3
|
from sqlalchemy import select, text
|
4
4
|
|
5
|
+
from letta.constants import DEFAULT_ORG_ID
|
5
6
|
from letta.orm.errors import NoResultFound
|
6
7
|
from letta.orm.organization import Organization as OrganizationModel
|
7
8
|
from letta.orm.user import User as UserModel
|
@@ -9,7 +10,6 @@ from letta.otel.tracing import trace_method
|
|
9
10
|
from letta.schemas.user import User as PydanticUser
|
10
11
|
from letta.schemas.user import UserUpdate
|
11
12
|
from letta.server.db import db_registry
|
12
|
-
from letta.services.organization_manager import OrganizationManager
|
13
13
|
from letta.utils import enforce_types
|
14
14
|
from letta.settings import settings
|
15
15
|
|
@@ -22,7 +22,7 @@ class UserManager:
|
|
22
22
|
|
23
23
|
@enforce_types
|
24
24
|
@trace_method
|
25
|
-
def create_default_user(self, org_id: str =
|
25
|
+
def create_default_user(self, org_id: str = DEFAULT_ORG_ID) -> PydanticUser:
|
26
26
|
"""Create the default user."""
|
27
27
|
with db_registry.session() as session:
|
28
28
|
# Make sure the org id exists
|
@@ -43,7 +43,7 @@ class UserManager:
|
|
43
43
|
|
44
44
|
@enforce_types
|
45
45
|
@trace_method
|
46
|
-
async def create_default_actor_async(self, org_id: str =
|
46
|
+
async def create_default_actor_async(self, org_id: str = DEFAULT_ORG_ID) -> PydanticUser:
|
47
47
|
"""Create the default user."""
|
48
48
|
async with db_registry.async_session() as session:
|
49
49
|
# Make sure the org id exists
|
@@ -191,19 +191,19 @@ class UserManager:
|
|
191
191
|
try:
|
192
192
|
return await self.get_actor_by_id_async(self.DEFAULT_USER_ID)
|
193
193
|
except NoResultFound:
|
194
|
-
return await self.create_default_actor_async(org_id=
|
194
|
+
return await self.create_default_actor_async(org_id=DEFAULT_ORG_ID)
|
195
195
|
|
196
196
|
@enforce_types
|
197
197
|
@trace_method
|
198
198
|
async def get_actor_or_default_async(self, actor_id: Optional[str] = None):
|
199
199
|
"""Fetch the user or default user asynchronously."""
|
200
|
-
|
201
|
-
return await self.get_default_actor_async()
|
200
|
+
target_id = actor_id or self.DEFAULT_USER_ID
|
202
201
|
|
203
202
|
try:
|
204
|
-
return await self.get_actor_by_id_async(
|
203
|
+
return await self.get_actor_by_id_async(target_id)
|
205
204
|
except NoResultFound:
|
206
|
-
|
205
|
+
user = await self.create_default_actor_async(org_id=DEFAULT_ORG_ID)
|
206
|
+
return user
|
207
207
|
|
208
208
|
@enforce_types
|
209
209
|
@trace_method
|
letta/system.py
CHANGED
@@ -218,11 +218,13 @@ def get_token_limit_warning():
|
|
218
218
|
return json_dumps(packaged_message)
|
219
219
|
|
220
220
|
|
221
|
-
def unpack_message(packed_message) -> str:
|
221
|
+
def unpack_message(packed_message: str) -> str:
|
222
222
|
"""Take a packed message string and attempt to extract the inner message content"""
|
223
223
|
|
224
224
|
try:
|
225
225
|
message_json = json.loads(packed_message)
|
226
|
+
if type(message_json) is not dict:
|
227
|
+
return packed_message
|
226
228
|
except:
|
227
229
|
return packed_message
|
228
230
|
|