letta-nightly 0.8.9.dev20250706104157__py3-none-any.whl → 0.8.10.dev20250707035305__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/__init__.py +1 -1
- letta/agents/letta_agent.py +24 -7
- letta/agents/voice_agent.py +1 -1
- letta/agents/voice_sleeptime_agent.py +1 -1
- letta/constants.py +7 -0
- letta/functions/function_sets/files.py +2 -1
- letta/functions/functions.py +0 -1
- letta/helpers/pinecone_utils.py +143 -0
- letta/llm_api/openai_client.py +4 -0
- letta/orm/file.py +4 -0
- letta/prompts/gpt_summarize.py +4 -6
- letta/schemas/file.py +6 -0
- letta/schemas/letta_base.py +4 -4
- letta/schemas/letta_message.py +15 -7
- letta/schemas/letta_message_content.py +15 -15
- letta/schemas/llm_config.py +4 -0
- letta/schemas/message.py +35 -31
- letta/schemas/providers.py +17 -10
- letta/server/rest_api/app.py +11 -0
- letta/server/rest_api/routers/v1/agents.py +19 -0
- letta/server/rest_api/routers/v1/sources.py +36 -7
- letta/services/file_manager.py +8 -2
- letta/services/file_processor/embedder/base_embedder.py +16 -0
- letta/services/file_processor/embedder/openai_embedder.py +3 -2
- letta/services/file_processor/embedder/pinecone_embedder.py +74 -0
- letta/services/file_processor/file_processor.py +22 -22
- letta/services/job_manager.py +0 -4
- letta/services/source_manager.py +0 -1
- letta/services/summarizer/enums.py +1 -0
- letta/services/summarizer/summarizer.py +237 -6
- letta/services/tool_executor/files_tool_executor.py +109 -3
- letta/services/user_manager.py +0 -1
- letta/settings.py +13 -1
- letta/system.py +16 -0
- {letta_nightly-0.8.9.dev20250706104157.dist-info → letta_nightly-0.8.10.dev20250707035305.dist-info}/METADATA +2 -1
- {letta_nightly-0.8.9.dev20250706104157.dist-info → letta_nightly-0.8.10.dev20250707035305.dist-info}/RECORD +39 -36
- {letta_nightly-0.8.9.dev20250706104157.dist-info → letta_nightly-0.8.10.dev20250707035305.dist-info}/LICENSE +0 -0
- {letta_nightly-0.8.9.dev20250706104157.dist-info → letta_nightly-0.8.10.dev20250707035305.dist-info}/WHEEL +0 -0
- {letta_nightly-0.8.9.dev20250706104157.dist-info → letta_nightly-0.8.10.dev20250707035305.dist-info}/entry_points.txt +0 -0
letta/schemas/message.py
CHANGED
|
@@ -84,11 +84,11 @@ class MessageCreate(BaseModel):
|
|
|
84
84
|
description="The content of the message.",
|
|
85
85
|
json_schema_extra=get_letta_message_content_union_str_json_schema(),
|
|
86
86
|
)
|
|
87
|
-
name: Optional[str] = Field(None, description="The name of the participant.")
|
|
88
|
-
otid: Optional[str] = Field(None, description="The offline threading id associated with this message")
|
|
89
|
-
sender_id: Optional[str] = Field(None, description="The id of the sender of the message, can be an identity id or agent id")
|
|
90
|
-
batch_item_id: Optional[str] = Field(None, description="The id of the LLMBatchItem that this message is associated with")
|
|
91
|
-
group_id: Optional[str] = Field(None, description="The multi-agent group that the message was sent in")
|
|
87
|
+
name: Optional[str] = Field(default=None, description="The name of the participant.")
|
|
88
|
+
otid: Optional[str] = Field(default=None, description="The offline threading id associated with this message")
|
|
89
|
+
sender_id: Optional[str] = Field(default=None, description="The id of the sender of the message, can be an identity id or agent id")
|
|
90
|
+
batch_item_id: Optional[str] = Field(default=None, description="The id of the LLMBatchItem that this message is associated with")
|
|
91
|
+
group_id: Optional[str] = Field(default=None, description="The multi-agent group that the message was sent in")
|
|
92
92
|
|
|
93
93
|
def model_dump(self, to_orm: bool = False, **kwargs) -> Dict[str, Any]:
|
|
94
94
|
data = super().model_dump(**kwargs)
|
|
@@ -101,9 +101,9 @@ class MessageCreate(BaseModel):
|
|
|
101
101
|
class MessageUpdate(BaseModel):
|
|
102
102
|
"""Request to update a message"""
|
|
103
103
|
|
|
104
|
-
role: Optional[MessageRole] = Field(None, description="The role of the participant.")
|
|
104
|
+
role: Optional[MessageRole] = Field(default=None, description="The role of the participant.")
|
|
105
105
|
content: Optional[Union[str, List[LettaMessageContentUnion]]] = Field(
|
|
106
|
-
None,
|
|
106
|
+
default=None,
|
|
107
107
|
description="The content of the message.",
|
|
108
108
|
json_schema_extra=get_letta_message_content_union_str_json_schema(),
|
|
109
109
|
)
|
|
@@ -112,11 +112,11 @@ class MessageUpdate(BaseModel):
|
|
|
112
112
|
# agent_id: Optional[str] = Field(None, description="The unique identifier of the agent.")
|
|
113
113
|
# NOTE: we probably shouldn't allow updating the model field, otherwise this loses meaning
|
|
114
114
|
# model: Optional[str] = Field(None, description="The model used to make the function call.")
|
|
115
|
-
name: Optional[str] = Field(None, description="The name of the participant.")
|
|
115
|
+
name: Optional[str] = Field(default=None, description="The name of the participant.")
|
|
116
116
|
# NOTE: we probably shouldn't allow updating the created_at field, right?
|
|
117
117
|
# created_at: Optional[datetime] = Field(None, description="The time the message was created.")
|
|
118
|
-
tool_calls: Optional[List[OpenAIToolCall,]] = Field(None, description="The list of tool calls requested.")
|
|
119
|
-
tool_call_id: Optional[str] = Field(None, description="The id of the tool call.")
|
|
118
|
+
tool_calls: Optional[List[OpenAIToolCall,]] = Field(default=None, description="The list of tool calls requested.")
|
|
119
|
+
tool_call_id: Optional[str] = Field(default=None, description="The id of the tool call.")
|
|
120
120
|
|
|
121
121
|
def model_dump(self, to_orm: bool = False, **kwargs) -> Dict[str, Any]:
|
|
122
122
|
data = super().model_dump(**kwargs)
|
|
@@ -150,28 +150,28 @@ class Message(BaseMessage):
|
|
|
150
150
|
"""
|
|
151
151
|
|
|
152
152
|
id: str = BaseMessage.generate_id_field()
|
|
153
|
-
organization_id: Optional[str] = Field(None, description="The unique identifier of the organization.")
|
|
154
|
-
agent_id: Optional[str] = Field(None, description="The unique identifier of the agent.")
|
|
155
|
-
model: Optional[str] = Field(None, description="The model used to make the function call.")
|
|
153
|
+
organization_id: Optional[str] = Field(default=None, description="The unique identifier of the organization.")
|
|
154
|
+
agent_id: Optional[str] = Field(default=None, description="The unique identifier of the agent.")
|
|
155
|
+
model: Optional[str] = Field(default=None, description="The model used to make the function call.")
|
|
156
156
|
# Basic OpenAI-style fields
|
|
157
157
|
role: MessageRole = Field(..., description="The role of the participant.")
|
|
158
|
-
content: Optional[List[LettaMessageContentUnion]] = Field(None, description="The content of the message.")
|
|
158
|
+
content: Optional[List[LettaMessageContentUnion]] = Field(default=None, description="The content of the message.")
|
|
159
159
|
# NOTE: in OpenAI, this field is only used for roles 'user', 'assistant', and 'function' (now deprecated). 'tool' does not use it.
|
|
160
160
|
name: Optional[str] = Field(
|
|
161
|
-
None,
|
|
161
|
+
default=None,
|
|
162
162
|
description="For role user/assistant: the (optional) name of the participant. For role tool/function: the name of the function called.",
|
|
163
163
|
)
|
|
164
164
|
tool_calls: Optional[List[OpenAIToolCall]] = Field(
|
|
165
|
-
None, description="The list of tool calls requested. Only applicable for role assistant."
|
|
165
|
+
default=None, description="The list of tool calls requested. Only applicable for role assistant."
|
|
166
166
|
)
|
|
167
|
-
tool_call_id: Optional[str] = Field(None, description="The ID of the tool call. Only applicable for role tool.")
|
|
167
|
+
tool_call_id: Optional[str] = Field(default=None, description="The ID of the tool call. Only applicable for role tool.")
|
|
168
168
|
# Extras
|
|
169
|
-
step_id: Optional[str] = Field(None, description="The id of the step that this message was created in.")
|
|
170
|
-
otid: Optional[str] = Field(None, description="The offline threading id associated with this message")
|
|
171
|
-
tool_returns: Optional[List[ToolReturn]] = Field(None, description="Tool execution return information for prior tool calls")
|
|
172
|
-
group_id: Optional[str] = Field(None, description="The multi-agent group that the message was sent in")
|
|
173
|
-
sender_id: Optional[str] = Field(None, description="The id of the sender of the message, can be an identity id or agent id")
|
|
174
|
-
batch_item_id: Optional[str] = Field(None, description="The id of the LLMBatchItem that this message is associated with")
|
|
169
|
+
step_id: Optional[str] = Field(default=None, description="The id of the step that this message was created in.")
|
|
170
|
+
otid: Optional[str] = Field(default=None, description="The offline threading id associated with this message")
|
|
171
|
+
tool_returns: Optional[List[ToolReturn]] = Field(default=None, description="Tool execution return information for prior tool calls")
|
|
172
|
+
group_id: Optional[str] = Field(default=None, description="The multi-agent group that the message was sent in")
|
|
173
|
+
sender_id: Optional[str] = Field(default=None, description="The id of the sender of the message, can be an identity id or agent id")
|
|
174
|
+
batch_item_id: Optional[str] = Field(default=None, description="The id of the LLMBatchItem that this message is associated with")
|
|
175
175
|
# This overrides the optional base orm schema, created_at MUST exist on all messages objects
|
|
176
176
|
created_at: datetime = Field(default_factory=get_utc_time, description="The timestamp when the object was created.")
|
|
177
177
|
|
|
@@ -482,7 +482,9 @@ class Message(BaseMessage):
|
|
|
482
482
|
# TODO(caren) implicit support for only non-parts/list content types
|
|
483
483
|
if openai_message_dict["content"] is not None and type(openai_message_dict["content"]) is not str:
|
|
484
484
|
raise ValueError(f"Invalid content type: {type(openai_message_dict['content'])}")
|
|
485
|
-
content
|
|
485
|
+
content: List[LettaMessageContentUnion] = (
|
|
486
|
+
[TextContent(text=openai_message_dict["content"])] if openai_message_dict["content"] else []
|
|
487
|
+
)
|
|
486
488
|
|
|
487
489
|
# TODO(caren) bad assumption here that "reasoning_content" always comes before "redacted_reasoning_content"
|
|
488
490
|
if "reasoning_content" in openai_message_dict and openai_message_dict["reasoning_content"]:
|
|
@@ -491,14 +493,16 @@ class Message(BaseMessage):
|
|
|
491
493
|
reasoning=openai_message_dict["reasoning_content"],
|
|
492
494
|
is_native=True,
|
|
493
495
|
signature=(
|
|
494
|
-
openai_message_dict["reasoning_content_signature"]
|
|
496
|
+
str(openai_message_dict["reasoning_content_signature"])
|
|
497
|
+
if "reasoning_content_signature" in openai_message_dict
|
|
498
|
+
else None
|
|
495
499
|
),
|
|
496
500
|
),
|
|
497
501
|
)
|
|
498
502
|
if "redacted_reasoning_content" in openai_message_dict and openai_message_dict["redacted_reasoning_content"]:
|
|
499
503
|
content.append(
|
|
500
504
|
RedactedReasoningContent(
|
|
501
|
-
data=openai_message_dict["redacted_reasoning_content"]
|
|
505
|
+
data=str(openai_message_dict["redacted_reasoning_content"]),
|
|
502
506
|
),
|
|
503
507
|
)
|
|
504
508
|
if "omitted_reasoning_content" in openai_message_dict and openai_message_dict["omitted_reasoning_content"]:
|
|
@@ -694,7 +698,7 @@ class Message(BaseMessage):
|
|
|
694
698
|
elif self.role == "assistant":
|
|
695
699
|
assert self.tool_calls is not None or text_content is not None
|
|
696
700
|
openai_message = {
|
|
697
|
-
"content": None if put_inner_thoughts_in_kwargs else text_content,
|
|
701
|
+
"content": None if (put_inner_thoughts_in_kwargs and self.tool_calls is not None) else text_content,
|
|
698
702
|
"role": self.role,
|
|
699
703
|
}
|
|
700
704
|
|
|
@@ -733,7 +737,7 @@ class Message(BaseMessage):
|
|
|
733
737
|
else:
|
|
734
738
|
warnings.warn(f"Using OpenAI with invalid 'name' field (name={self.name} role={self.role}).")
|
|
735
739
|
|
|
736
|
-
if parse_content_parts:
|
|
740
|
+
if parse_content_parts and self.content is not None:
|
|
737
741
|
for content in self.content:
|
|
738
742
|
if isinstance(content, ReasoningContent):
|
|
739
743
|
openai_message["reasoning_content"] = content.reasoning
|
|
@@ -819,7 +823,7 @@ class Message(BaseMessage):
|
|
|
819
823
|
}
|
|
820
824
|
content = []
|
|
821
825
|
# COT / reasoning / thinking
|
|
822
|
-
if len(self.content) > 1:
|
|
826
|
+
if self.content is not None and len(self.content) > 1:
|
|
823
827
|
for content_part in self.content:
|
|
824
828
|
if isinstance(content_part, ReasoningContent):
|
|
825
829
|
content.append(
|
|
@@ -1154,6 +1158,6 @@ class Message(BaseMessage):
|
|
|
1154
1158
|
|
|
1155
1159
|
class ToolReturn(BaseModel):
|
|
1156
1160
|
status: Literal["success", "error"] = Field(..., description="The status of the tool call")
|
|
1157
|
-
stdout: Optional[List[str]] = Field(None, description="Captured stdout (e.g. prints, logs) from the tool invocation")
|
|
1158
|
-
stderr: Optional[List[str]] = Field(None, description="Captured stderr from the tool invocation")
|
|
1161
|
+
stdout: Optional[List[str]] = Field(default=None, description="Captured stdout (e.g. prints, logs) from the tool invocation")
|
|
1162
|
+
stderr: Optional[List[str]] = Field(default=None, description="Captured stderr from the tool invocation")
|
|
1159
1163
|
# func_return: Optional[Any] = Field(None, description="The function return object")
|
letta/schemas/providers.py
CHANGED
|
@@ -324,18 +324,25 @@ class OpenAIProvider(Provider):
|
|
|
324
324
|
else:
|
|
325
325
|
handle = self.get_handle(model_name)
|
|
326
326
|
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
provider_category=self.provider_category,
|
|
336
|
-
)
|
|
327
|
+
llm_config = LLMConfig(
|
|
328
|
+
model=model_name,
|
|
329
|
+
model_endpoint_type="openai",
|
|
330
|
+
model_endpoint=self.base_url,
|
|
331
|
+
context_window=context_window_size,
|
|
332
|
+
handle=handle,
|
|
333
|
+
provider_name=self.name,
|
|
334
|
+
provider_category=self.provider_category,
|
|
337
335
|
)
|
|
338
336
|
|
|
337
|
+
# gpt-4o-mini has started to regress with pretty bad emoji spam loops
|
|
338
|
+
# this is to counteract that
|
|
339
|
+
if "gpt-4o-mini" in model_name:
|
|
340
|
+
llm_config.frequency_penalty = 1.0
|
|
341
|
+
if "gpt-4.1-mini" in model_name:
|
|
342
|
+
llm_config.frequency_penalty = 1.0
|
|
343
|
+
|
|
344
|
+
configs.append(llm_config)
|
|
345
|
+
|
|
339
346
|
# for OpenAI, sort in reverse order
|
|
340
347
|
if self.base_url == "https://api.openai.com/v1":
|
|
341
348
|
# alphnumeric sort
|
letta/server/rest_api/app.py
CHANGED
|
@@ -17,6 +17,7 @@ from letta.__init__ import __version__ as letta_version
|
|
|
17
17
|
from letta.agents.exceptions import IncompatibleAgentType
|
|
18
18
|
from letta.constants import ADMIN_PREFIX, API_PREFIX, OPENAI_API_PREFIX
|
|
19
19
|
from letta.errors import BedrockPermissionError, LettaAgentNotFoundError, LettaUserNotFoundError
|
|
20
|
+
from letta.helpers.pinecone_utils import get_pinecone_indices, should_use_pinecone, upsert_pinecone_indices
|
|
20
21
|
from letta.jobs.scheduler import start_scheduler_with_leader_election
|
|
21
22
|
from letta.log import get_logger
|
|
22
23
|
from letta.orm.errors import DatabaseTimeoutError, ForeignKeyConstraintViolationError, NoResultFound, UniqueConstraintViolationError
|
|
@@ -127,6 +128,16 @@ async def lifespan(app_: FastAPI):
|
|
|
127
128
|
db_registry.initialize_async()
|
|
128
129
|
logger.info(f"[Worker {worker_id}] Database connections initialized")
|
|
129
130
|
|
|
131
|
+
if should_use_pinecone():
|
|
132
|
+
if settings.upsert_pinecone_indices:
|
|
133
|
+
logger.info(f"[Worker {worker_id}] Upserting pinecone indices: {get_pinecone_indices()}")
|
|
134
|
+
await upsert_pinecone_indices()
|
|
135
|
+
logger.info(f"[Worker {worker_id}] Upserted pinecone indices")
|
|
136
|
+
else:
|
|
137
|
+
logger.info(f"[Worker {worker_id}] Enabled pinecone")
|
|
138
|
+
else:
|
|
139
|
+
logger.info(f"[Worker {worker_id}] Disabled pinecone")
|
|
140
|
+
|
|
130
141
|
logger.info(f"[Worker {worker_id}] Starting scheduler with leader election")
|
|
131
142
|
global server
|
|
132
143
|
try:
|
|
@@ -38,6 +38,7 @@ from letta.schemas.user import User
|
|
|
38
38
|
from letta.serialize_schemas.pydantic_agent_schema import AgentSchema
|
|
39
39
|
from letta.server.rest_api.utils import get_letta_server
|
|
40
40
|
from letta.server.server import SyncServer
|
|
41
|
+
from letta.services.summarizer.enums import SummarizationMode
|
|
41
42
|
from letta.services.telemetry_manager import NoopTelemetryManager
|
|
42
43
|
from letta.settings import settings
|
|
43
44
|
from letta.utils import safe_create_task
|
|
@@ -750,6 +751,12 @@ async def send_message(
|
|
|
750
751
|
step_manager=server.step_manager,
|
|
751
752
|
telemetry_manager=server.telemetry_manager if settings.llm_api_logging else NoopTelemetryManager(),
|
|
752
753
|
current_run_id=run.id,
|
|
754
|
+
# summarizer settings to be added here
|
|
755
|
+
summarizer_mode=(
|
|
756
|
+
SummarizationMode.STATIC_MESSAGE_BUFFER
|
|
757
|
+
if agent.agent_type == AgentType.voice_convo_agent
|
|
758
|
+
else SummarizationMode.PARTIAL_EVICT_MESSAGE_BUFFER
|
|
759
|
+
),
|
|
753
760
|
)
|
|
754
761
|
|
|
755
762
|
result = await agent_loop.step(
|
|
@@ -878,6 +885,12 @@ async def send_message_streaming(
|
|
|
878
885
|
step_manager=server.step_manager,
|
|
879
886
|
telemetry_manager=server.telemetry_manager if settings.llm_api_logging else NoopTelemetryManager(),
|
|
880
887
|
current_run_id=run.id,
|
|
888
|
+
# summarizer settings to be added here
|
|
889
|
+
summarizer_mode=(
|
|
890
|
+
SummarizationMode.STATIC_MESSAGE_BUFFER
|
|
891
|
+
if agent.agent_type == AgentType.voice_convo_agent
|
|
892
|
+
else SummarizationMode.PARTIAL_EVICT_MESSAGE_BUFFER
|
|
893
|
+
),
|
|
881
894
|
)
|
|
882
895
|
from letta.server.rest_api.streaming_response import StreamingResponseWithStatusCode
|
|
883
896
|
|
|
@@ -1014,6 +1027,12 @@ async def _process_message_background(
|
|
|
1014
1027
|
actor=actor,
|
|
1015
1028
|
step_manager=server.step_manager,
|
|
1016
1029
|
telemetry_manager=server.telemetry_manager if settings.llm_api_logging else NoopTelemetryManager(),
|
|
1030
|
+
# summarizer settings to be added here
|
|
1031
|
+
summarizer_mode=(
|
|
1032
|
+
SummarizationMode.STATIC_MESSAGE_BUFFER
|
|
1033
|
+
if agent.agent_type == AgentType.voice_convo_agent
|
|
1034
|
+
else SummarizationMode.PARTIAL_EVICT_MESSAGE_BUFFER
|
|
1035
|
+
),
|
|
1017
1036
|
)
|
|
1018
1037
|
|
|
1019
1038
|
result = await agent_loop.step(
|
|
@@ -9,6 +9,12 @@ from fastapi import APIRouter, Depends, Header, HTTPException, Query, UploadFile
|
|
|
9
9
|
from starlette import status
|
|
10
10
|
|
|
11
11
|
import letta.constants as constants
|
|
12
|
+
from letta.helpers.pinecone_utils import (
|
|
13
|
+
delete_file_records_from_pinecone_index,
|
|
14
|
+
delete_source_records_from_pinecone_index,
|
|
15
|
+
list_pinecone_index_for_files,
|
|
16
|
+
should_use_pinecone,
|
|
17
|
+
)
|
|
12
18
|
from letta.log import get_logger
|
|
13
19
|
from letta.otel.tracing import trace_method
|
|
14
20
|
from letta.schemas.agent import AgentState
|
|
@@ -22,6 +28,7 @@ from letta.server.rest_api.utils import get_letta_server
|
|
|
22
28
|
from letta.server.server import SyncServer
|
|
23
29
|
from letta.services.file_processor.chunker.llama_index_chunker import LlamaIndexChunker
|
|
24
30
|
from letta.services.file_processor.embedder.openai_embedder import OpenAIEmbedder
|
|
31
|
+
from letta.services.file_processor.embedder.pinecone_embedder import PineconeEmbedder
|
|
25
32
|
from letta.services.file_processor.file_processor import FileProcessor
|
|
26
33
|
from letta.services.file_processor.file_types import (
|
|
27
34
|
get_allowed_media_types,
|
|
@@ -163,6 +170,10 @@ async def delete_source(
|
|
|
163
170
|
files = await server.file_manager.list_files(source_id, actor)
|
|
164
171
|
file_ids = [f.id for f in files]
|
|
165
172
|
|
|
173
|
+
if should_use_pinecone():
|
|
174
|
+
logger.info(f"Deleting source {source_id} from pinecone index")
|
|
175
|
+
await delete_source_records_from_pinecone_index(source_id=source_id, actor=actor)
|
|
176
|
+
|
|
166
177
|
for agent_state in agent_states:
|
|
167
178
|
await server.remove_files_from_context_window(agent_state=agent_state, file_ids=file_ids, actor=actor)
|
|
168
179
|
|
|
@@ -326,16 +337,24 @@ async def get_file_metadata(
|
|
|
326
337
|
"""
|
|
327
338
|
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
|
|
328
339
|
|
|
329
|
-
# Verify the source exists and user has access
|
|
330
|
-
source = await server.source_manager.get_source_by_id(source_id=source_id, actor=actor)
|
|
331
|
-
if not source:
|
|
332
|
-
raise HTTPException(status_code=404, detail=f"Source with id={source_id} not found.")
|
|
333
|
-
|
|
334
340
|
# Get file metadata using the file manager
|
|
335
341
|
file_metadata = await server.file_manager.get_file_by_id(
|
|
336
342
|
file_id=file_id, actor=actor, include_content=include_content, strip_directory_prefix=True
|
|
337
343
|
)
|
|
338
344
|
|
|
345
|
+
if should_use_pinecone() and not file_metadata.is_processing_terminal():
|
|
346
|
+
ids = await list_pinecone_index_for_files(file_id=file_id, actor=actor, limit=file_metadata.total_chunks)
|
|
347
|
+
logger.info(f"Embedded chunks {len(ids)}/{file_metadata.total_chunks} for {file_id} in organization {actor.organization_id}")
|
|
348
|
+
|
|
349
|
+
if len(ids) != file_metadata.chunks_embedded or len(ids) == file_metadata.total_chunks:
|
|
350
|
+
if len(ids) != file_metadata.total_chunks:
|
|
351
|
+
file_status = file_metadata.processing_status
|
|
352
|
+
else:
|
|
353
|
+
file_status = FileProcessingStatus.COMPLETED
|
|
354
|
+
await server.file_manager.update_file_status(
|
|
355
|
+
file_id=file_metadata.id, actor=actor, chunks_embedded=len(ids), processing_status=file_status
|
|
356
|
+
)
|
|
357
|
+
|
|
339
358
|
if not file_metadata:
|
|
340
359
|
raise HTTPException(status_code=404, detail=f"File with id={file_id} not found.")
|
|
341
360
|
|
|
@@ -364,6 +383,10 @@ async def delete_file_from_source(
|
|
|
364
383
|
|
|
365
384
|
await server.remove_file_from_context_windows(source_id=source_id, file_id=deleted_file.id, actor=actor)
|
|
366
385
|
|
|
386
|
+
if should_use_pinecone():
|
|
387
|
+
logger.info(f"Deleting file {file_id} from pinecone index")
|
|
388
|
+
await delete_file_records_from_pinecone_index(file_id=file_id, actor=actor)
|
|
389
|
+
|
|
367
390
|
asyncio.create_task(sleeptime_document_ingest_async(server, source_id, actor, clear_history=True))
|
|
368
391
|
if deleted_file is None:
|
|
369
392
|
raise HTTPException(status_code=404, detail=f"File with id={file_id} not found.")
|
|
@@ -402,8 +425,14 @@ async def load_file_to_source_cloud(
|
|
|
402
425
|
):
|
|
403
426
|
file_processor = MistralFileParser()
|
|
404
427
|
text_chunker = LlamaIndexChunker(chunk_size=embedding_config.embedding_chunk_size)
|
|
405
|
-
|
|
406
|
-
|
|
428
|
+
using_pinecone = should_use_pinecone()
|
|
429
|
+
if using_pinecone:
|
|
430
|
+
embedder = PineconeEmbedder()
|
|
431
|
+
else:
|
|
432
|
+
embedder = OpenAIEmbedder(embedding_config=embedding_config)
|
|
433
|
+
file_processor = FileProcessor(
|
|
434
|
+
file_parser=file_processor, text_chunker=text_chunker, embedder=embedder, actor=actor, using_pinecone=using_pinecone
|
|
435
|
+
)
|
|
407
436
|
await file_processor.process(
|
|
408
437
|
server=server, agent_states=agent_states, source_id=source_id, content=content, file_metadata=file_metadata
|
|
409
438
|
)
|
letta/services/file_manager.py
CHANGED
|
@@ -109,15 +109,17 @@ class FileManager:
|
|
|
109
109
|
actor: PydanticUser,
|
|
110
110
|
processing_status: Optional[FileProcessingStatus] = None,
|
|
111
111
|
error_message: Optional[str] = None,
|
|
112
|
+
total_chunks: Optional[int] = None,
|
|
113
|
+
chunks_embedded: Optional[int] = None,
|
|
112
114
|
) -> PydanticFileMetadata:
|
|
113
115
|
"""
|
|
114
|
-
Update processing_status and/or
|
|
116
|
+
Update processing_status, error_message, total_chunks, and/or chunks_embedded on a FileMetadata row.
|
|
115
117
|
|
|
116
118
|
* 1st round-trip → UPDATE
|
|
117
119
|
* 2nd round-trip → SELECT fresh row (same as read_async)
|
|
118
120
|
"""
|
|
119
121
|
|
|
120
|
-
if processing_status is None and error_message is None:
|
|
122
|
+
if processing_status is None and error_message is None and total_chunks is None and chunks_embedded is None:
|
|
121
123
|
raise ValueError("Nothing to update")
|
|
122
124
|
|
|
123
125
|
values: dict[str, object] = {"updated_at": datetime.utcnow()}
|
|
@@ -125,6 +127,10 @@ class FileManager:
|
|
|
125
127
|
values["processing_status"] = processing_status
|
|
126
128
|
if error_message is not None:
|
|
127
129
|
values["error_message"] = error_message
|
|
130
|
+
if total_chunks is not None:
|
|
131
|
+
values["total_chunks"] = total_chunks
|
|
132
|
+
if chunks_embedded is not None:
|
|
133
|
+
values["chunks_embedded"] = chunks_embedded
|
|
128
134
|
|
|
129
135
|
async with db_registry.async_session() as session:
|
|
130
136
|
# Fast in-place update – no ORM hydration
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
from letta.log import get_logger
|
|
5
|
+
from letta.schemas.passage import Passage
|
|
6
|
+
from letta.schemas.user import User
|
|
7
|
+
|
|
8
|
+
logger = get_logger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class BaseEmbedder(ABC):
|
|
12
|
+
"""Abstract base class for embedding generation"""
|
|
13
|
+
|
|
14
|
+
@abstractmethod
|
|
15
|
+
async def generate_embedded_passages(self, file_id: str, source_id: str, chunks: List[str], actor: User) -> List[Passage]:
|
|
16
|
+
"""Generate embeddings for chunks with batching and concurrent processing"""
|
|
@@ -9,12 +9,13 @@ from letta.schemas.embedding_config import EmbeddingConfig
|
|
|
9
9
|
from letta.schemas.enums import ProviderType
|
|
10
10
|
from letta.schemas.passage import Passage
|
|
11
11
|
from letta.schemas.user import User
|
|
12
|
+
from letta.services.file_processor.embedder.base_embedder import BaseEmbedder
|
|
12
13
|
from letta.settings import model_settings
|
|
13
14
|
|
|
14
15
|
logger = get_logger(__name__)
|
|
15
16
|
|
|
16
17
|
|
|
17
|
-
class OpenAIEmbedder:
|
|
18
|
+
class OpenAIEmbedder(BaseEmbedder):
|
|
18
19
|
"""OpenAI-based embedding generation"""
|
|
19
20
|
|
|
20
21
|
def __init__(self, embedding_config: Optional[EmbeddingConfig] = None):
|
|
@@ -24,6 +25,7 @@ class OpenAIEmbedder:
|
|
|
24
25
|
else EmbeddingConfig.default_config(model_name="letta")
|
|
25
26
|
)
|
|
26
27
|
self.embedding_config = embedding_config or self.default_embedding_config
|
|
28
|
+
self.max_concurrent_requests = 20
|
|
27
29
|
|
|
28
30
|
# TODO: Unify to global OpenAI client
|
|
29
31
|
self.client: OpenAIClient = cast(
|
|
@@ -34,7 +36,6 @@ class OpenAIEmbedder:
|
|
|
34
36
|
actor=None, # Not necessary
|
|
35
37
|
),
|
|
36
38
|
)
|
|
37
|
-
self.max_concurrent_requests = 20
|
|
38
39
|
|
|
39
40
|
@trace_method
|
|
40
41
|
async def _embed_batch(self, batch: List[str], batch_indices: List[int]) -> List[Tuple[int, List[float]]]:
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from letta.helpers.pinecone_utils import upsert_file_records_to_pinecone_index
|
|
4
|
+
from letta.log import get_logger
|
|
5
|
+
from letta.otel.tracing import log_event, trace_method
|
|
6
|
+
from letta.schemas.passage import Passage
|
|
7
|
+
from letta.schemas.user import User
|
|
8
|
+
from letta.services.file_processor.embedder.base_embedder import BaseEmbedder
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
PINECONE_AVAILABLE = True
|
|
12
|
+
except ImportError:
|
|
13
|
+
PINECONE_AVAILABLE = False
|
|
14
|
+
|
|
15
|
+
logger = get_logger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class PineconeEmbedder(BaseEmbedder):
|
|
19
|
+
"""Pinecone-based embedding generation"""
|
|
20
|
+
|
|
21
|
+
def __init__(self):
|
|
22
|
+
if not PINECONE_AVAILABLE:
|
|
23
|
+
raise ImportError("Pinecone package is not installed. Install it with: pip install pinecone")
|
|
24
|
+
|
|
25
|
+
super().__init__()
|
|
26
|
+
|
|
27
|
+
@trace_method
|
|
28
|
+
async def generate_embedded_passages(self, file_id: str, source_id: str, chunks: List[str], actor: User) -> List[Passage]:
|
|
29
|
+
"""Generate embeddings and upsert to Pinecone, then return Passage objects"""
|
|
30
|
+
if not chunks:
|
|
31
|
+
return []
|
|
32
|
+
|
|
33
|
+
logger.info(f"Upserting {len(chunks)} chunks to Pinecone using namespace {source_id}")
|
|
34
|
+
log_event(
|
|
35
|
+
"embedder.generation_started",
|
|
36
|
+
{
|
|
37
|
+
"total_chunks": len(chunks),
|
|
38
|
+
"file_id": file_id,
|
|
39
|
+
"source_id": source_id,
|
|
40
|
+
},
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
# Upsert records to Pinecone using source_id as namespace
|
|
44
|
+
try:
|
|
45
|
+
await upsert_file_records_to_pinecone_index(file_id=file_id, source_id=source_id, chunks=chunks, actor=actor)
|
|
46
|
+
logger.info(f"Successfully kicked off upserting {len(chunks)} records to Pinecone")
|
|
47
|
+
log_event(
|
|
48
|
+
"embedder.upsert_started",
|
|
49
|
+
{"records_upserted": len(chunks), "namespace": source_id, "file_id": file_id},
|
|
50
|
+
)
|
|
51
|
+
except Exception as e:
|
|
52
|
+
logger.error(f"Failed to upsert records to Pinecone: {str(e)}")
|
|
53
|
+
log_event("embedder.upsert_failed", {"error": str(e), "error_type": type(e).__name__})
|
|
54
|
+
raise
|
|
55
|
+
|
|
56
|
+
# Create Passage objects (without embeddings since Pinecone handles them)
|
|
57
|
+
passages = []
|
|
58
|
+
for i, text in enumerate(chunks):
|
|
59
|
+
passage = Passage(
|
|
60
|
+
text=text,
|
|
61
|
+
file_id=file_id,
|
|
62
|
+
source_id=source_id,
|
|
63
|
+
embedding=None, # Pinecone handles embeddings internally
|
|
64
|
+
embedding_config=None, # None
|
|
65
|
+
organization_id=actor.organization_id,
|
|
66
|
+
)
|
|
67
|
+
passages.append(passage)
|
|
68
|
+
|
|
69
|
+
logger.info(f"Successfully created {len(passages)} passages")
|
|
70
|
+
log_event(
|
|
71
|
+
"embedder.generation_completed",
|
|
72
|
+
{"passages_created": len(passages), "total_chunks_processed": len(chunks), "file_id": file_id, "source_id": source_id},
|
|
73
|
+
)
|
|
74
|
+
return passages
|
|
@@ -11,7 +11,7 @@ from letta.server.server import SyncServer
|
|
|
11
11
|
from letta.services.file_manager import FileManager
|
|
12
12
|
from letta.services.file_processor.chunker.line_chunker import LineChunker
|
|
13
13
|
from letta.services.file_processor.chunker.llama_index_chunker import LlamaIndexChunker
|
|
14
|
-
from letta.services.file_processor.embedder.
|
|
14
|
+
from letta.services.file_processor.embedder.base_embedder import BaseEmbedder
|
|
15
15
|
from letta.services.file_processor.parser.mistral_parser import MistralFileParser
|
|
16
16
|
from letta.services.job_manager import JobManager
|
|
17
17
|
from letta.services.passage_manager import PassageManager
|
|
@@ -27,8 +27,9 @@ class FileProcessor:
|
|
|
27
27
|
self,
|
|
28
28
|
file_parser: MistralFileParser,
|
|
29
29
|
text_chunker: LlamaIndexChunker,
|
|
30
|
-
embedder:
|
|
30
|
+
embedder: BaseEmbedder,
|
|
31
31
|
actor: User,
|
|
32
|
+
using_pinecone: bool,
|
|
32
33
|
max_file_size: int = 50 * 1024 * 1024, # 50MB default
|
|
33
34
|
):
|
|
34
35
|
self.file_parser = file_parser
|
|
@@ -41,6 +42,7 @@ class FileProcessor:
|
|
|
41
42
|
self.passage_manager = PassageManager()
|
|
42
43
|
self.job_manager = JobManager()
|
|
43
44
|
self.actor = actor
|
|
45
|
+
self.using_pinecone = using_pinecone
|
|
44
46
|
|
|
45
47
|
# TODO: Factor this function out of SyncServer
|
|
46
48
|
@trace_method
|
|
@@ -109,7 +111,7 @@ class FileProcessor:
|
|
|
109
111
|
|
|
110
112
|
logger.info("Chunking extracted text")
|
|
111
113
|
log_event("file_processor.chunking_started", {"filename": filename, "pages_to_process": len(ocr_response.pages)})
|
|
112
|
-
|
|
114
|
+
all_chunks = []
|
|
113
115
|
|
|
114
116
|
for page in ocr_response.pages:
|
|
115
117
|
chunks = self.text_chunker.chunk_text(page)
|
|
@@ -118,24 +120,17 @@ class FileProcessor:
|
|
|
118
120
|
log_event("file_processor.chunking_failed", {"filename": filename, "page_index": ocr_response.pages.index(page)})
|
|
119
121
|
raise ValueError("No chunks created from text")
|
|
120
122
|
|
|
121
|
-
|
|
122
|
-
file_id=file_metadata.id, source_id=source_id, chunks=chunks, actor=self.actor
|
|
123
|
-
)
|
|
124
|
-
log_event(
|
|
125
|
-
"file_processor.page_processed",
|
|
126
|
-
{
|
|
127
|
-
"filename": filename,
|
|
128
|
-
"page_index": ocr_response.pages.index(page),
|
|
129
|
-
"chunks_created": len(chunks),
|
|
130
|
-
"passages_generated": len(passages),
|
|
131
|
-
},
|
|
132
|
-
)
|
|
133
|
-
all_passages.extend(passages)
|
|
123
|
+
all_chunks.extend(self.text_chunker.chunk_text(page))
|
|
134
124
|
|
|
135
|
-
all_passages = await self.
|
|
136
|
-
|
|
125
|
+
all_passages = await self.embedder.generate_embedded_passages(
|
|
126
|
+
file_id=file_metadata.id, source_id=source_id, chunks=all_chunks, actor=self.actor
|
|
137
127
|
)
|
|
138
|
-
|
|
128
|
+
|
|
129
|
+
if not self.using_pinecone:
|
|
130
|
+
all_passages = await self.passage_manager.create_many_source_passages_async(
|
|
131
|
+
passages=all_passages, file_metadata=file_metadata, actor=self.actor
|
|
132
|
+
)
|
|
133
|
+
log_event("file_processor.passages_created", {"filename": filename, "total_passages": len(all_passages)})
|
|
139
134
|
|
|
140
135
|
logger.info(f"Successfully processed {filename}: {len(all_passages)} passages")
|
|
141
136
|
log_event(
|
|
@@ -149,9 +144,14 @@ class FileProcessor:
|
|
|
149
144
|
)
|
|
150
145
|
|
|
151
146
|
# update job status
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
147
|
+
if not self.using_pinecone:
|
|
148
|
+
await self.file_manager.update_file_status(
|
|
149
|
+
file_id=file_metadata.id, actor=self.actor, processing_status=FileProcessingStatus.COMPLETED
|
|
150
|
+
)
|
|
151
|
+
else:
|
|
152
|
+
await self.file_manager.update_file_status(
|
|
153
|
+
file_id=file_metadata.id, actor=self.actor, total_chunks=len(all_passages), chunks_embedded=0
|
|
154
|
+
)
|
|
155
155
|
|
|
156
156
|
return all_passages
|
|
157
157
|
|
letta/services/job_manager.py
CHANGED
|
@@ -115,10 +115,6 @@ class JobManager:
|
|
|
115
115
|
job.completed_at = get_utc_time().replace(tzinfo=None)
|
|
116
116
|
if job.callback_url:
|
|
117
117
|
await self._dispatch_callback_async(job)
|
|
118
|
-
else:
|
|
119
|
-
logger.info(f"Job does not contain callback url: {job}")
|
|
120
|
-
else:
|
|
121
|
-
logger.info(f"Job update is not terminal {job_update}")
|
|
122
118
|
|
|
123
119
|
# Save the updated job to the database
|
|
124
120
|
await job.update_async(db_session=session, actor=actor)
|
letta/services/source_manager.py
CHANGED
|
@@ -19,7 +19,6 @@ class SourceManager:
|
|
|
19
19
|
@trace_method
|
|
20
20
|
async def create_source(self, source: PydanticSource, actor: PydanticUser) -> PydanticSource:
|
|
21
21
|
"""Create a new source based on the PydanticSource schema."""
|
|
22
|
-
# Try getting the source first by id
|
|
23
22
|
db_source = await self.get_source_by_id(source.id, actor=actor)
|
|
24
23
|
if db_source:
|
|
25
24
|
return db_source
|