letta-nightly 0.8.8.dev20250703104323__py3-none-any.whl → 0.8.8.dev20250703174903__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/agent.py +1 -0
- letta/agents/base_agent.py +8 -2
- letta/agents/ephemeral_summary_agent.py +33 -33
- letta/agents/letta_agent.py +104 -53
- letta/agents/voice_agent.py +2 -1
- letta/constants.py +8 -4
- letta/functions/function_sets/files.py +22 -7
- letta/functions/function_sets/multi_agent.py +34 -0
- letta/functions/types.py +1 -1
- letta/groups/helpers.py +8 -5
- letta/groups/sleeptime_multi_agent_v2.py +20 -15
- letta/interface.py +1 -1
- letta/interfaces/anthropic_streaming_interface.py +15 -8
- letta/interfaces/openai_chat_completions_streaming_interface.py +9 -6
- letta/interfaces/openai_streaming_interface.py +17 -11
- letta/llm_api/openai_client.py +2 -1
- letta/orm/agent.py +1 -0
- letta/orm/file.py +8 -2
- letta/orm/files_agents.py +36 -11
- letta/orm/mcp_server.py +3 -0
- letta/orm/source.py +2 -1
- letta/orm/step.py +3 -0
- letta/prompts/system/memgpt_v2_chat.txt +5 -8
- letta/schemas/agent.py +58 -23
- letta/schemas/embedding_config.py +3 -2
- letta/schemas/enums.py +4 -0
- letta/schemas/file.py +1 -0
- letta/schemas/letta_stop_reason.py +18 -0
- letta/schemas/mcp.py +15 -10
- letta/schemas/memory.py +35 -5
- letta/schemas/providers.py +11 -0
- letta/schemas/step.py +1 -0
- letta/schemas/tool.py +2 -1
- letta/server/rest_api/routers/v1/agents.py +320 -184
- letta/server/rest_api/routers/v1/groups.py +6 -2
- letta/server/rest_api/routers/v1/identities.py +6 -2
- letta/server/rest_api/routers/v1/jobs.py +49 -1
- letta/server/rest_api/routers/v1/sources.py +28 -19
- letta/server/rest_api/routers/v1/steps.py +7 -2
- letta/server/rest_api/routers/v1/tools.py +40 -9
- letta/server/rest_api/streaming_response.py +88 -0
- letta/server/server.py +61 -55
- letta/services/agent_manager.py +28 -16
- letta/services/file_manager.py +58 -9
- letta/services/file_processor/chunker/llama_index_chunker.py +2 -0
- letta/services/file_processor/embedder/openai_embedder.py +54 -10
- letta/services/file_processor/file_processor.py +59 -0
- letta/services/file_processor/parser/mistral_parser.py +2 -0
- letta/services/files_agents_manager.py +120 -2
- letta/services/helpers/agent_manager_helper.py +21 -4
- letta/services/job_manager.py +57 -6
- letta/services/mcp/base_client.py +1 -0
- letta/services/mcp_manager.py +13 -1
- letta/services/step_manager.py +14 -5
- letta/services/summarizer/summarizer.py +6 -22
- letta/services/tool_executor/builtin_tool_executor.py +0 -1
- letta/services/tool_executor/files_tool_executor.py +2 -2
- letta/services/tool_executor/multi_agent_tool_executor.py +23 -0
- letta/services/tool_manager.py +7 -7
- letta/settings.py +11 -2
- letta/templates/summary_request_text.j2 +19 -0
- letta/utils.py +95 -14
- {letta_nightly-0.8.8.dev20250703104323.dist-info → letta_nightly-0.8.8.dev20250703174903.dist-info}/METADATA +2 -2
- {letta_nightly-0.8.8.dev20250703104323.dist-info → letta_nightly-0.8.8.dev20250703174903.dist-info}/RECORD +68 -67
- /letta/{agents/prompts → prompts/system}/summary_system_prompt.txt +0 -0
- {letta_nightly-0.8.8.dev20250703104323.dist-info → letta_nightly-0.8.8.dev20250703174903.dist-info}/LICENSE +0 -0
- {letta_nightly-0.8.8.dev20250703104323.dist-info → letta_nightly-0.8.8.dev20250703174903.dist-info}/WHEEL +0 -0
- {letta_nightly-0.8.8.dev20250703104323.dist-info → letta_nightly-0.8.8.dev20250703174903.dist-info}/entry_points.txt +0 -0
letta/services/agent_manager.py
CHANGED
@@ -16,9 +16,8 @@ from letta.constants import (
|
|
16
16
|
BASE_VOICE_SLEEPTIME_CHAT_TOOLS,
|
17
17
|
BASE_VOICE_SLEEPTIME_TOOLS,
|
18
18
|
DEFAULT_TIMEZONE,
|
19
|
-
|
19
|
+
DEPRECATED_LETTA_TOOLS,
|
20
20
|
FILES_TOOLS,
|
21
|
-
MULTI_AGENT_TOOLS,
|
22
21
|
)
|
23
22
|
from letta.helpers import ToolRulesSolver
|
24
23
|
from letta.helpers.datetime_helpers import get_utc_time
|
@@ -79,6 +78,7 @@ from letta.services.helpers.agent_manager_helper import (
|
|
79
78
|
build_passage_query,
|
80
79
|
build_source_passage_query,
|
81
80
|
calculate_base_tools,
|
81
|
+
calculate_multi_agent_tools,
|
82
82
|
check_supports_structured_output,
|
83
83
|
compile_system_message,
|
84
84
|
derive_system_message,
|
@@ -271,7 +271,7 @@ class AgentManager:
|
|
271
271
|
else:
|
272
272
|
tool_names |= calculate_base_tools(is_v2=False)
|
273
273
|
if agent_create.include_multi_agent_tools:
|
274
|
-
tool_names |=
|
274
|
+
tool_names |= calculate_multi_agent_tools()
|
275
275
|
|
276
276
|
supplied_ids = set(agent_create.tool_ids or [])
|
277
277
|
|
@@ -294,7 +294,7 @@ class AgentManager:
|
|
294
294
|
tool_rules = list(agent_create.tool_rules or [])
|
295
295
|
if agent_create.include_base_tool_rules:
|
296
296
|
for tn in tool_names:
|
297
|
-
if tn in {"send_message", "memory_finish_edits"}:
|
297
|
+
if tn in {"send_message", "send_message_to_agent_async", "memory_finish_edits"}:
|
298
298
|
tool_rules.append(TerminalToolRule(tool_name=tn))
|
299
299
|
elif tn in (BASE_TOOLS + BASE_MEMORY_TOOLS + BASE_SLEEPTIME_TOOLS):
|
300
300
|
tool_rules.append(ContinueToolRule(tool_name=tn))
|
@@ -438,10 +438,10 @@ class AgentManager:
|
|
438
438
|
else:
|
439
439
|
tool_names |= calculate_base_tools(is_v2=False)
|
440
440
|
if agent_create.include_multi_agent_tools:
|
441
|
-
tool_names |=
|
441
|
+
tool_names |= calculate_multi_agent_tools()
|
442
442
|
|
443
443
|
# take out the deprecated tool names
|
444
|
-
tool_names.difference_update(set(
|
444
|
+
tool_names.difference_update(set(DEPRECATED_LETTA_TOOLS))
|
445
445
|
|
446
446
|
supplied_ids = set(agent_create.tool_ids or [])
|
447
447
|
|
@@ -479,7 +479,7 @@ class AgentManager:
|
|
479
479
|
tool_rules = list(agent_create.tool_rules or [])
|
480
480
|
if agent_create.include_base_tool_rules:
|
481
481
|
for tn in tool_names:
|
482
|
-
if tn in {"send_message", "memory_finish_edits"}:
|
482
|
+
if tn in {"send_message", "send_message_to_agent_async", "memory_finish_edits"}:
|
483
483
|
tool_rules.append(TerminalToolRule(tool_name=tn))
|
484
484
|
elif tn in (BASE_TOOLS + BASE_MEMORY_TOOLS + BASE_MEMORY_TOOLS_V2 + BASE_SLEEPTIME_TOOLS):
|
485
485
|
tool_rules.append(ContinueToolRule(tool_name=tn))
|
@@ -1111,6 +1111,7 @@ class AgentManager:
|
|
1111
1111
|
include_relationships: Optional[List[str]] = None,
|
1112
1112
|
) -> PydanticAgentState:
|
1113
1113
|
"""Fetch an agent by its ID."""
|
1114
|
+
|
1114
1115
|
async with db_registry.async_session() as session:
|
1115
1116
|
agent = await AgentModel.read_async(db_session=session, identifier=agent_id, actor=actor)
|
1116
1117
|
return await agent.to_pydantic_async(include_relationships=include_relationships)
|
@@ -1434,7 +1435,7 @@ class AgentManager:
|
|
1434
1435
|
|
1435
1436
|
# note: we only update the system prompt if the core memory is changed
|
1436
1437
|
# this means that the archival/recall memory statistics may be someout out of date
|
1437
|
-
curr_memory_str = agent_state.memory.compile()
|
1438
|
+
curr_memory_str = agent_state.memory.compile(sources=agent_state.sources)
|
1438
1439
|
if curr_memory_str in curr_system_message_openai["content"] and not force:
|
1439
1440
|
# NOTE: could this cause issues if a block is removed? (substring match would still work)
|
1440
1441
|
logger.debug(
|
@@ -1461,6 +1462,7 @@ class AgentManager:
|
|
1461
1462
|
timezone=agent_state.timezone,
|
1462
1463
|
previous_message_count=num_messages - len(agent_state.message_ids),
|
1463
1464
|
archival_memory_size=num_archival_memories,
|
1465
|
+
sources=agent_state.sources,
|
1464
1466
|
)
|
1465
1467
|
|
1466
1468
|
diff = united_diff(curr_system_message_openai["content"], new_system_message_str)
|
@@ -1493,7 +1495,8 @@ class AgentManager:
|
|
1493
1495
|
|
1494
1496
|
Updates to the memory header should *not* trigger a rebuild, since that will simply flood recall storage with excess messages
|
1495
1497
|
"""
|
1496
|
-
|
1498
|
+
# Get the current agent state
|
1499
|
+
agent_state = await self.get_agent_by_id_async(agent_id=agent_id, include_relationships=["memory", "sources"], actor=actor)
|
1497
1500
|
if not tool_rules_solver:
|
1498
1501
|
tool_rules_solver = ToolRulesSolver(agent_state.tool_rules)
|
1499
1502
|
|
@@ -1509,7 +1512,9 @@ class AgentManager:
|
|
1509
1512
|
|
1510
1513
|
# note: we only update the system prompt if the core memory is changed
|
1511
1514
|
# this means that the archival/recall memory statistics may be someout out of date
|
1512
|
-
curr_memory_str = agent_state.memory.compile(
|
1515
|
+
curr_memory_str = agent_state.memory.compile(
|
1516
|
+
sources=agent_state.sources, tool_usage_rules=tool_rules_solver.compile_tool_rule_prompts()
|
1517
|
+
)
|
1513
1518
|
if curr_memory_str in curr_system_message_openai["content"] and not force:
|
1514
1519
|
# NOTE: could this cause issues if a block is removed? (substring match would still work)
|
1515
1520
|
logger.debug(
|
@@ -1529,6 +1534,7 @@ class AgentManager:
|
|
1529
1534
|
num_archival_memories = await self.passage_manager.agent_passage_size_async(actor=actor, agent_id=agent_id)
|
1530
1535
|
|
1531
1536
|
# update memory (TODO: potentially update recall/archival stats separately)
|
1537
|
+
|
1532
1538
|
new_system_message_str = compile_system_message(
|
1533
1539
|
system_prompt=agent_state.system,
|
1534
1540
|
in_context_memory=agent_state.memory,
|
@@ -1537,6 +1543,7 @@ class AgentManager:
|
|
1537
1543
|
previous_message_count=num_messages - len(agent_state.message_ids),
|
1538
1544
|
archival_memory_size=num_archival_memories,
|
1539
1545
|
tool_rules_solver=tool_rules_solver,
|
1546
|
+
sources=agent_state.sources,
|
1540
1547
|
)
|
1541
1548
|
|
1542
1549
|
diff = united_diff(curr_system_message_openai["content"], new_system_message_str)
|
@@ -1654,7 +1661,7 @@ class AgentManager:
|
|
1654
1661
|
# Update agent to only keep the system message
|
1655
1662
|
agent.message_ids = [system_message_id]
|
1656
1663
|
await agent.update_async(db_session=session, actor=actor)
|
1657
|
-
agent_state = await agent.to_pydantic_async()
|
1664
|
+
agent_state = await agent.to_pydantic_async(include_relationships=["sources"])
|
1658
1665
|
|
1659
1666
|
# Optionally add default initial messages after the system message
|
1660
1667
|
if add_default_initial_messages:
|
@@ -1688,9 +1695,13 @@ class AgentManager:
|
|
1688
1695
|
Returns:
|
1689
1696
|
modified (bool): whether the memory was updated
|
1690
1697
|
"""
|
1691
|
-
agent_state = await self.get_agent_by_id_async(agent_id=agent_id, actor=actor)
|
1698
|
+
agent_state = await self.get_agent_by_id_async(agent_id=agent_id, actor=actor, include_relationships=["memory", "sources"])
|
1692
1699
|
system_message = await self.message_manager.get_message_by_id_async(message_id=agent_state.message_ids[0], actor=actor)
|
1693
|
-
|
1700
|
+
temp_tool_rules_solver = ToolRulesSolver(agent_state.tool_rules)
|
1701
|
+
if (
|
1702
|
+
new_memory.compile(sources=agent_state.sources, tool_usage_rules=temp_tool_rules_solver.compile_tool_rule_prompts())
|
1703
|
+
not in system_message.content[0].text
|
1704
|
+
):
|
1694
1705
|
# update the blocks (LRW) in the DB
|
1695
1706
|
for label in agent_state.memory.list_block_labels():
|
1696
1707
|
updated_value = new_memory.get_block(label).value
|
@@ -1730,7 +1741,9 @@ class AgentManager:
|
|
1730
1741
|
agent_state.memory.blocks = [b for b in blocks if b is not None]
|
1731
1742
|
|
1732
1743
|
if file_block_names:
|
1733
|
-
file_blocks = await self.file_agent_manager.get_all_file_blocks_by_name(
|
1744
|
+
file_blocks = await self.file_agent_manager.get_all_file_blocks_by_name(
|
1745
|
+
file_names=file_block_names, agent_id=agent_state.id, actor=actor
|
1746
|
+
)
|
1734
1747
|
agent_state.memory.file_blocks = [b for b in file_blocks if b is not None]
|
1735
1748
|
|
1736
1749
|
return agent_state
|
@@ -1772,8 +1785,7 @@ class AgentManager:
|
|
1772
1785
|
relationship_name="sources",
|
1773
1786
|
model_class=SourceModel,
|
1774
1787
|
item_ids=[source_id],
|
1775
|
-
|
1776
|
-
replace=False, # Extend existing sources rather than replace
|
1788
|
+
replace=False,
|
1777
1789
|
)
|
1778
1790
|
|
1779
1791
|
# Commit the changes
|
letta/services/file_manager.py
CHANGED
@@ -1,11 +1,13 @@
|
|
1
|
+
import os
|
1
2
|
from datetime import datetime
|
2
3
|
from typing import List, Optional
|
3
4
|
|
4
|
-
from sqlalchemy import select, update
|
5
|
+
from sqlalchemy import func, select, update
|
5
6
|
from sqlalchemy.dialects.postgresql import insert as pg_insert
|
6
7
|
from sqlalchemy.exc import IntegrityError
|
7
8
|
from sqlalchemy.orm import selectinload
|
8
9
|
|
10
|
+
from letta.constants import MAX_FILENAME_LENGTH
|
9
11
|
from letta.orm.errors import NoResultFound
|
10
12
|
from letta.orm.file import FileContent as FileContentModel
|
11
13
|
from letta.orm.file import FileMetadata as FileMetadataModel
|
@@ -13,6 +15,7 @@ from letta.orm.sqlalchemy_base import AccessType
|
|
13
15
|
from letta.otel.tracing import trace_method
|
14
16
|
from letta.schemas.enums import FileProcessingStatus
|
15
17
|
from letta.schemas.file import FileMetadata as PydanticFileMetadata
|
18
|
+
from letta.schemas.source import Source as PydanticSource
|
16
19
|
from letta.schemas.user import User as PydanticUser
|
17
20
|
from letta.server.db import db_registry
|
18
21
|
from letta.utils import enforce_types
|
@@ -58,11 +61,7 @@ class FileManager:
|
|
58
61
|
@enforce_types
|
59
62
|
@trace_method
|
60
63
|
async def get_file_by_id(
|
61
|
-
self,
|
62
|
-
file_id: str,
|
63
|
-
actor: Optional[PydanticUser] = None,
|
64
|
-
*,
|
65
|
-
include_content: bool = False,
|
64
|
+
self, file_id: str, actor: Optional[PydanticUser] = None, *, include_content: bool = False, strip_directory_prefix: bool = False
|
66
65
|
) -> Optional[PydanticFileMetadata]:
|
67
66
|
"""Retrieve a file by its ID.
|
68
67
|
|
@@ -96,7 +95,7 @@ class FileManager:
|
|
96
95
|
actor=actor,
|
97
96
|
)
|
98
97
|
|
99
|
-
return await file_orm.to_pydantic_async(include_content=include_content)
|
98
|
+
return await file_orm.to_pydantic_async(include_content=include_content, strip_directory_prefix=strip_directory_prefix)
|
100
99
|
|
101
100
|
except NoResultFound:
|
102
101
|
return None
|
@@ -193,7 +192,13 @@ class FileManager:
|
|
193
192
|
@enforce_types
|
194
193
|
@trace_method
|
195
194
|
async def list_files(
|
196
|
-
self,
|
195
|
+
self,
|
196
|
+
source_id: str,
|
197
|
+
actor: PydanticUser,
|
198
|
+
after: Optional[str] = None,
|
199
|
+
limit: Optional[int] = 50,
|
200
|
+
include_content: bool = False,
|
201
|
+
strip_directory_prefix: bool = False,
|
197
202
|
) -> List[PydanticFileMetadata]:
|
198
203
|
"""List all files with optional pagination."""
|
199
204
|
async with db_registry.async_session() as session:
|
@@ -207,7 +212,10 @@ class FileManager:
|
|
207
212
|
source_id=source_id,
|
208
213
|
query_options=options,
|
209
214
|
)
|
210
|
-
return [
|
215
|
+
return [
|
216
|
+
await file.to_pydantic_async(include_content=include_content, strip_directory_prefix=strip_directory_prefix)
|
217
|
+
for file in files
|
218
|
+
]
|
211
219
|
|
212
220
|
@enforce_types
|
213
221
|
@trace_method
|
@@ -217,3 +225,44 @@ class FileManager:
|
|
217
225
|
file = await FileMetadataModel.read_async(db_session=session, identifier=file_id)
|
218
226
|
await file.hard_delete_async(db_session=session, actor=actor)
|
219
227
|
return await file.to_pydantic_async()
|
228
|
+
|
229
|
+
@enforce_types
|
230
|
+
@trace_method
|
231
|
+
async def generate_unique_filename(self, original_filename: str, source: PydanticSource, organization_id: str) -> str:
|
232
|
+
"""
|
233
|
+
Generate a unique filename by checking for duplicates and adding a numeric suffix if needed.
|
234
|
+
Similar to how filesystems handle duplicates (e.g., file.txt, file (1).txt, file (2).txt).
|
235
|
+
|
236
|
+
Parameters:
|
237
|
+
original_filename (str): The original filename as uploaded.
|
238
|
+
source_id (str): Source ID to check for duplicates within.
|
239
|
+
organization_id (str): Organization ID to check for duplicates within.
|
240
|
+
|
241
|
+
Returns:
|
242
|
+
str: A unique filename with numeric suffix if needed.
|
243
|
+
"""
|
244
|
+
base, ext = os.path.splitext(original_filename)
|
245
|
+
|
246
|
+
# Reserve space for potential suffix: " (999)" = 6 characters
|
247
|
+
max_base_length = MAX_FILENAME_LENGTH - len(ext) - 6
|
248
|
+
if len(base) > max_base_length:
|
249
|
+
base = base[:max_base_length]
|
250
|
+
original_filename = f"{base}{ext}"
|
251
|
+
|
252
|
+
async with db_registry.async_session() as session:
|
253
|
+
# Count existing files with the same original_file_name in this source
|
254
|
+
query = select(func.count(FileMetadataModel.id)).where(
|
255
|
+
FileMetadataModel.original_file_name == original_filename,
|
256
|
+
FileMetadataModel.source_id == source.id,
|
257
|
+
FileMetadataModel.organization_id == organization_id,
|
258
|
+
FileMetadataModel.is_deleted == False,
|
259
|
+
)
|
260
|
+
result = await session.execute(query)
|
261
|
+
count = result.scalar() or 0
|
262
|
+
|
263
|
+
if count == 0:
|
264
|
+
# No duplicates, return original filename with source.name
|
265
|
+
return f"{source.name}/{original_filename}"
|
266
|
+
else:
|
267
|
+
# Add numeric suffix
|
268
|
+
return f"{source.name}/{base}_({count}){ext}"
|
@@ -3,6 +3,7 @@ from typing import List, Tuple
|
|
3
3
|
from mistralai import OCRPageObject
|
4
4
|
|
5
5
|
from letta.log import get_logger
|
6
|
+
from letta.otel.tracing import trace_method
|
6
7
|
|
7
8
|
logger = get_logger(__name__)
|
8
9
|
|
@@ -19,6 +20,7 @@ class LlamaIndexChunker:
|
|
19
20
|
self.parser = SentenceSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
20
21
|
|
21
22
|
# TODO: Make this more general beyond Mistral
|
23
|
+
@trace_method
|
22
24
|
def chunk_text(self, page: OCRPageObject) -> List[str]:
|
23
25
|
"""Chunk text using LlamaIndex splitter"""
|
24
26
|
try:
|
@@ -1,10 +1,12 @@
|
|
1
1
|
import asyncio
|
2
|
-
from typing import List, Optional, Tuple
|
3
|
-
|
4
|
-
import openai
|
2
|
+
from typing import List, Optional, Tuple, cast
|
5
3
|
|
4
|
+
from letta.llm_api.llm_client import LLMClient
|
5
|
+
from letta.llm_api.openai_client import OpenAIClient
|
6
6
|
from letta.log import get_logger
|
7
|
+
from letta.otel.tracing import log_event, trace_method
|
7
8
|
from letta.schemas.embedding_config import EmbeddingConfig
|
9
|
+
from letta.schemas.enums import ProviderType
|
8
10
|
from letta.schemas.passage import Passage
|
9
11
|
from letta.schemas.user import User
|
10
12
|
from letta.settings import model_settings
|
@@ -24,45 +26,83 @@ class OpenAIEmbedder:
|
|
24
26
|
self.embedding_config = embedding_config or self.default_embedding_config
|
25
27
|
|
26
28
|
# TODO: Unify to global OpenAI client
|
27
|
-
self.client =
|
28
|
-
|
29
|
+
self.client: OpenAIClient = cast(
|
30
|
+
OpenAIClient,
|
31
|
+
LLMClient.create(
|
32
|
+
provider_type=ProviderType.openai,
|
33
|
+
put_inner_thoughts_first=False,
|
34
|
+
actor=None, # Not necessary
|
35
|
+
),
|
36
|
+
)
|
29
37
|
self.max_concurrent_requests = 20
|
30
38
|
|
39
|
+
@trace_method
|
31
40
|
async def _embed_batch(self, batch: List[str], batch_indices: List[int]) -> List[Tuple[int, List[float]]]:
|
32
41
|
"""Embed a single batch and return embeddings with their original indices"""
|
33
|
-
|
34
|
-
|
42
|
+
log_event(
|
43
|
+
"embedder.batch_started",
|
44
|
+
{
|
45
|
+
"batch_size": len(batch),
|
46
|
+
"model": self.embedding_config.embedding_model,
|
47
|
+
"embedding_endpoint_type": self.embedding_config.embedding_endpoint_type,
|
48
|
+
},
|
49
|
+
)
|
50
|
+
embeddings = await self.client.request_embeddings(inputs=batch, embedding_config=self.embedding_config)
|
51
|
+
log_event("embedder.batch_completed", {"batch_size": len(batch), "embeddings_generated": len(embeddings)})
|
52
|
+
return [(idx, e) for idx, e in zip(batch_indices, embeddings)]
|
35
53
|
|
54
|
+
@trace_method
|
36
55
|
async def generate_embedded_passages(self, file_id: str, source_id: str, chunks: List[str], actor: User) -> List[Passage]:
|
37
56
|
"""Generate embeddings for chunks with batching and concurrent processing"""
|
38
57
|
if not chunks:
|
39
58
|
return []
|
40
59
|
|
41
60
|
logger.info(f"Generating embeddings for {len(chunks)} chunks using {self.embedding_config.embedding_model}")
|
61
|
+
log_event(
|
62
|
+
"embedder.generation_started",
|
63
|
+
{
|
64
|
+
"total_chunks": len(chunks),
|
65
|
+
"model": self.embedding_config.embedding_model,
|
66
|
+
"embedding_endpoint_type": self.embedding_config.embedding_endpoint_type,
|
67
|
+
"batch_size": self.embedding_config.batch_size,
|
68
|
+
"file_id": file_id,
|
69
|
+
"source_id": source_id,
|
70
|
+
},
|
71
|
+
)
|
42
72
|
|
43
73
|
# Create batches with their original indices
|
44
74
|
batches = []
|
45
75
|
batch_indices = []
|
46
76
|
|
47
|
-
for i in range(0, len(chunks), self.
|
48
|
-
batch = chunks[i : i + self.
|
49
|
-
indices = list(range(i, min(i + self.
|
77
|
+
for i in range(0, len(chunks), self.embedding_config.batch_size):
|
78
|
+
batch = chunks[i : i + self.embedding_config.batch_size]
|
79
|
+
indices = list(range(i, min(i + self.embedding_config.batch_size, len(chunks))))
|
50
80
|
batches.append(batch)
|
51
81
|
batch_indices.append(indices)
|
52
82
|
|
53
83
|
logger.info(f"Processing {len(batches)} batches")
|
84
|
+
log_event(
|
85
|
+
"embedder.batching_completed",
|
86
|
+
{"total_batches": len(batches), "batch_size": self.embedding_config.batch_size, "total_chunks": len(chunks)},
|
87
|
+
)
|
54
88
|
|
55
89
|
async def process(batch: List[str], indices: List[int]):
|
56
90
|
try:
|
57
91
|
return await self._embed_batch(batch, indices)
|
58
92
|
except Exception as e:
|
59
93
|
logger.error(f"Failed to embed batch of size {len(batch)}: {str(e)}")
|
94
|
+
log_event("embedder.batch_failed", {"batch_size": len(batch), "error": str(e), "error_type": type(e).__name__})
|
60
95
|
raise
|
61
96
|
|
62
97
|
# Execute all batches concurrently with semaphore control
|
63
98
|
tasks = [process(batch, indices) for batch, indices in zip(batches, batch_indices)]
|
64
99
|
|
100
|
+
log_event(
|
101
|
+
"embedder.concurrent_processing_started",
|
102
|
+
{"concurrent_tasks": len(tasks), "max_concurrent_requests": self.max_concurrent_requests},
|
103
|
+
)
|
65
104
|
results = await asyncio.gather(*tasks)
|
105
|
+
log_event("embedder.concurrent_processing_completed", {"batches_processed": len(results)})
|
66
106
|
|
67
107
|
# Flatten results and sort by original index
|
68
108
|
indexed_embeddings = []
|
@@ -86,4 +126,8 @@ class OpenAIEmbedder:
|
|
86
126
|
passages.append(passage)
|
87
127
|
|
88
128
|
logger.info(f"Successfully generated {len(passages)} embeddings")
|
129
|
+
log_event(
|
130
|
+
"embedder.generation_completed",
|
131
|
+
{"passages_created": len(passages), "total_chunks_processed": len(chunks), "file_id": file_id, "source_id": source_id},
|
132
|
+
)
|
89
133
|
return passages
|
@@ -1,6 +1,7 @@
|
|
1
1
|
from typing import List
|
2
2
|
|
3
3
|
from letta.log import get_logger
|
4
|
+
from letta.otel.tracing import log_event, trace_method
|
4
5
|
from letta.schemas.agent import AgentState
|
5
6
|
from letta.schemas.enums import FileProcessingStatus
|
6
7
|
from letta.schemas.file import FileMetadata
|
@@ -42,6 +43,7 @@ class FileProcessor:
|
|
42
43
|
self.actor = actor
|
43
44
|
|
44
45
|
# TODO: Factor this function out of SyncServer
|
46
|
+
@trace_method
|
45
47
|
async def process(
|
46
48
|
self, server: SyncServer, agent_states: List[AgentState], source_id: str, content: bytes, file_metadata: FileMetadata
|
47
49
|
) -> List[Passage]:
|
@@ -50,6 +52,15 @@ class FileProcessor:
|
|
50
52
|
# Create file as early as possible with no content
|
51
53
|
file_metadata.processing_status = FileProcessingStatus.PARSING # Parsing now
|
52
54
|
file_metadata = await self.file_manager.create_file(file_metadata, self.actor)
|
55
|
+
log_event(
|
56
|
+
"file_processor.file_created",
|
57
|
+
{
|
58
|
+
"file_id": str(file_metadata.id),
|
59
|
+
"filename": filename,
|
60
|
+
"file_type": file_metadata.file_type,
|
61
|
+
"status": FileProcessingStatus.PARSING.value,
|
62
|
+
},
|
63
|
+
)
|
53
64
|
|
54
65
|
try:
|
55
66
|
# Ensure we're working with bytes
|
@@ -57,13 +68,22 @@ class FileProcessor:
|
|
57
68
|
content = content.encode("utf-8")
|
58
69
|
|
59
70
|
if len(content) > self.max_file_size:
|
71
|
+
log_event(
|
72
|
+
"file_processor.size_limit_exceeded",
|
73
|
+
{"filename": filename, "file_size": len(content), "max_file_size": self.max_file_size},
|
74
|
+
)
|
60
75
|
raise ValueError(f"PDF size exceeds maximum allowed size of {self.max_file_size} bytes")
|
61
76
|
|
62
77
|
logger.info(f"Starting OCR extraction for {filename}")
|
78
|
+
log_event("file_processor.ocr_started", {"filename": filename, "file_size": len(content), "mime_type": file_metadata.file_type})
|
63
79
|
ocr_response = await self.file_parser.extract_text(content, mime_type=file_metadata.file_type)
|
64
80
|
|
65
81
|
# update file with raw text
|
66
82
|
raw_markdown_text = "".join([page.markdown for page in ocr_response.pages])
|
83
|
+
log_event(
|
84
|
+
"file_processor.ocr_completed",
|
85
|
+
{"filename": filename, "pages_extracted": len(ocr_response.pages), "text_length": len(raw_markdown_text)},
|
86
|
+
)
|
67
87
|
file_metadata = await self.file_manager.update_file_status(
|
68
88
|
file_id=file_metadata.id, actor=self.actor, processing_status=FileProcessingStatus.EMBEDDING
|
69
89
|
)
|
@@ -77,27 +97,56 @@ class FileProcessor:
|
|
77
97
|
)
|
78
98
|
|
79
99
|
if not ocr_response or len(ocr_response.pages) == 0:
|
100
|
+
log_event(
|
101
|
+
"file_processor.ocr_no_text",
|
102
|
+
{
|
103
|
+
"filename": filename,
|
104
|
+
"ocr_response_empty": not ocr_response,
|
105
|
+
"pages_count": len(ocr_response.pages) if ocr_response else 0,
|
106
|
+
},
|
107
|
+
)
|
80
108
|
raise ValueError("No text extracted from PDF")
|
81
109
|
|
82
110
|
logger.info("Chunking extracted text")
|
111
|
+
log_event("file_processor.chunking_started", {"filename": filename, "pages_to_process": len(ocr_response.pages)})
|
83
112
|
all_passages = []
|
84
113
|
|
85
114
|
for page in ocr_response.pages:
|
86
115
|
chunks = self.text_chunker.chunk_text(page)
|
87
116
|
|
88
117
|
if not chunks:
|
118
|
+
log_event("file_processor.chunking_failed", {"filename": filename, "page_index": ocr_response.pages.index(page)})
|
89
119
|
raise ValueError("No chunks created from text")
|
90
120
|
|
91
121
|
passages = await self.embedder.generate_embedded_passages(
|
92
122
|
file_id=file_metadata.id, source_id=source_id, chunks=chunks, actor=self.actor
|
93
123
|
)
|
124
|
+
log_event(
|
125
|
+
"file_processor.page_processed",
|
126
|
+
{
|
127
|
+
"filename": filename,
|
128
|
+
"page_index": ocr_response.pages.index(page),
|
129
|
+
"chunks_created": len(chunks),
|
130
|
+
"passages_generated": len(passages),
|
131
|
+
},
|
132
|
+
)
|
94
133
|
all_passages.extend(passages)
|
95
134
|
|
96
135
|
all_passages = await self.passage_manager.create_many_source_passages_async(
|
97
136
|
passages=all_passages, file_metadata=file_metadata, actor=self.actor
|
98
137
|
)
|
138
|
+
log_event("file_processor.passages_created", {"filename": filename, "total_passages": len(all_passages)})
|
99
139
|
|
100
140
|
logger.info(f"Successfully processed {filename}: {len(all_passages)} passages")
|
141
|
+
log_event(
|
142
|
+
"file_processor.processing_completed",
|
143
|
+
{
|
144
|
+
"filename": filename,
|
145
|
+
"file_id": str(file_metadata.id),
|
146
|
+
"total_passages": len(all_passages),
|
147
|
+
"status": FileProcessingStatus.COMPLETED.value,
|
148
|
+
},
|
149
|
+
)
|
101
150
|
|
102
151
|
# update job status
|
103
152
|
await self.file_manager.update_file_status(
|
@@ -108,6 +157,16 @@ class FileProcessor:
|
|
108
157
|
|
109
158
|
except Exception as e:
|
110
159
|
logger.error(f"File processing failed for {filename}: {str(e)}")
|
160
|
+
log_event(
|
161
|
+
"file_processor.processing_failed",
|
162
|
+
{
|
163
|
+
"filename": filename,
|
164
|
+
"file_id": str(file_metadata.id),
|
165
|
+
"error": str(e),
|
166
|
+
"error_type": type(e).__name__,
|
167
|
+
"status": FileProcessingStatus.ERROR.value,
|
168
|
+
},
|
169
|
+
)
|
111
170
|
await self.file_manager.update_file_status(
|
112
171
|
file_id=file_metadata.id, actor=self.actor, processing_status=FileProcessingStatus.ERROR, error_message=str(e)
|
113
172
|
)
|
@@ -3,6 +3,7 @@ import base64
|
|
3
3
|
from mistralai import Mistral, OCRPageObject, OCRResponse, OCRUsageInfo
|
4
4
|
|
5
5
|
from letta.log import get_logger
|
6
|
+
from letta.otel.tracing import trace_method
|
6
7
|
from letta.services.file_processor.file_types import is_simple_text_mime_type
|
7
8
|
from letta.services.file_processor.parser.base_parser import FileParser
|
8
9
|
from letta.settings import settings
|
@@ -17,6 +18,7 @@ class MistralFileParser(FileParser):
|
|
17
18
|
self.model = model
|
18
19
|
|
19
20
|
# TODO: Make this return something general if we add more file parsers
|
21
|
+
@trace_method
|
20
22
|
async def extract_text(self, content: bytes, mime_type: str) -> OCRResponse:
|
21
23
|
"""Extract text using Mistral OCR or shortcut for plain text."""
|
22
24
|
try:
|