letta-nightly 0.8.9.dev20250705104147__py3-none-any.whl → 0.8.10.dev20250707035305__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (39) hide show
  1. letta/__init__.py +1 -1
  2. letta/agents/letta_agent.py +24 -7
  3. letta/agents/voice_agent.py +1 -1
  4. letta/agents/voice_sleeptime_agent.py +1 -1
  5. letta/constants.py +7 -0
  6. letta/functions/function_sets/files.py +2 -1
  7. letta/functions/functions.py +0 -1
  8. letta/helpers/pinecone_utils.py +143 -0
  9. letta/llm_api/openai_client.py +4 -0
  10. letta/orm/file.py +4 -0
  11. letta/prompts/gpt_summarize.py +4 -6
  12. letta/schemas/file.py +6 -0
  13. letta/schemas/letta_base.py +4 -4
  14. letta/schemas/letta_message.py +15 -7
  15. letta/schemas/letta_message_content.py +15 -15
  16. letta/schemas/llm_config.py +4 -0
  17. letta/schemas/message.py +35 -31
  18. letta/schemas/providers.py +17 -10
  19. letta/server/rest_api/app.py +11 -0
  20. letta/server/rest_api/routers/v1/agents.py +19 -0
  21. letta/server/rest_api/routers/v1/sources.py +36 -7
  22. letta/services/file_manager.py +8 -2
  23. letta/services/file_processor/embedder/base_embedder.py +16 -0
  24. letta/services/file_processor/embedder/openai_embedder.py +3 -2
  25. letta/services/file_processor/embedder/pinecone_embedder.py +74 -0
  26. letta/services/file_processor/file_processor.py +22 -22
  27. letta/services/job_manager.py +0 -4
  28. letta/services/source_manager.py +0 -1
  29. letta/services/summarizer/enums.py +1 -0
  30. letta/services/summarizer/summarizer.py +237 -6
  31. letta/services/tool_executor/files_tool_executor.py +109 -3
  32. letta/services/user_manager.py +0 -1
  33. letta/settings.py +13 -1
  34. letta/system.py +16 -0
  35. {letta_nightly-0.8.9.dev20250705104147.dist-info → letta_nightly-0.8.10.dev20250707035305.dist-info}/METADATA +2 -1
  36. {letta_nightly-0.8.9.dev20250705104147.dist-info → letta_nightly-0.8.10.dev20250707035305.dist-info}/RECORD +39 -36
  37. {letta_nightly-0.8.9.dev20250705104147.dist-info → letta_nightly-0.8.10.dev20250707035305.dist-info}/LICENSE +0 -0
  38. {letta_nightly-0.8.9.dev20250705104147.dist-info → letta_nightly-0.8.10.dev20250707035305.dist-info}/WHEEL +0 -0
  39. {letta_nightly-0.8.9.dev20250705104147.dist-info → letta_nightly-0.8.10.dev20250707035305.dist-info}/entry_points.txt +0 -0
letta/schemas/message.py CHANGED
@@ -84,11 +84,11 @@ class MessageCreate(BaseModel):
84
84
  description="The content of the message.",
85
85
  json_schema_extra=get_letta_message_content_union_str_json_schema(),
86
86
  )
87
- name: Optional[str] = Field(None, description="The name of the participant.")
88
- otid: Optional[str] = Field(None, description="The offline threading id associated with this message")
89
- sender_id: Optional[str] = Field(None, description="The id of the sender of the message, can be an identity id or agent id")
90
- batch_item_id: Optional[str] = Field(None, description="The id of the LLMBatchItem that this message is associated with")
91
- group_id: Optional[str] = Field(None, description="The multi-agent group that the message was sent in")
87
+ name: Optional[str] = Field(default=None, description="The name of the participant.")
88
+ otid: Optional[str] = Field(default=None, description="The offline threading id associated with this message")
89
+ sender_id: Optional[str] = Field(default=None, description="The id of the sender of the message, can be an identity id or agent id")
90
+ batch_item_id: Optional[str] = Field(default=None, description="The id of the LLMBatchItem that this message is associated with")
91
+ group_id: Optional[str] = Field(default=None, description="The multi-agent group that the message was sent in")
92
92
 
93
93
  def model_dump(self, to_orm: bool = False, **kwargs) -> Dict[str, Any]:
94
94
  data = super().model_dump(**kwargs)
@@ -101,9 +101,9 @@ class MessageCreate(BaseModel):
101
101
  class MessageUpdate(BaseModel):
102
102
  """Request to update a message"""
103
103
 
104
- role: Optional[MessageRole] = Field(None, description="The role of the participant.")
104
+ role: Optional[MessageRole] = Field(default=None, description="The role of the participant.")
105
105
  content: Optional[Union[str, List[LettaMessageContentUnion]]] = Field(
106
- None,
106
+ default=None,
107
107
  description="The content of the message.",
108
108
  json_schema_extra=get_letta_message_content_union_str_json_schema(),
109
109
  )
@@ -112,11 +112,11 @@ class MessageUpdate(BaseModel):
112
112
  # agent_id: Optional[str] = Field(None, description="The unique identifier of the agent.")
113
113
  # NOTE: we probably shouldn't allow updating the model field, otherwise this loses meaning
114
114
  # model: Optional[str] = Field(None, description="The model used to make the function call.")
115
- name: Optional[str] = Field(None, description="The name of the participant.")
115
+ name: Optional[str] = Field(default=None, description="The name of the participant.")
116
116
  # NOTE: we probably shouldn't allow updating the created_at field, right?
117
117
  # created_at: Optional[datetime] = Field(None, description="The time the message was created.")
118
- tool_calls: Optional[List[OpenAIToolCall,]] = Field(None, description="The list of tool calls requested.")
119
- tool_call_id: Optional[str] = Field(None, description="The id of the tool call.")
118
+ tool_calls: Optional[List[OpenAIToolCall,]] = Field(default=None, description="The list of tool calls requested.")
119
+ tool_call_id: Optional[str] = Field(default=None, description="The id of the tool call.")
120
120
 
121
121
  def model_dump(self, to_orm: bool = False, **kwargs) -> Dict[str, Any]:
122
122
  data = super().model_dump(**kwargs)
@@ -150,28 +150,28 @@ class Message(BaseMessage):
150
150
  """
151
151
 
152
152
  id: str = BaseMessage.generate_id_field()
153
- organization_id: Optional[str] = Field(None, description="The unique identifier of the organization.")
154
- agent_id: Optional[str] = Field(None, description="The unique identifier of the agent.")
155
- model: Optional[str] = Field(None, description="The model used to make the function call.")
153
+ organization_id: Optional[str] = Field(default=None, description="The unique identifier of the organization.")
154
+ agent_id: Optional[str] = Field(default=None, description="The unique identifier of the agent.")
155
+ model: Optional[str] = Field(default=None, description="The model used to make the function call.")
156
156
  # Basic OpenAI-style fields
157
157
  role: MessageRole = Field(..., description="The role of the participant.")
158
- content: Optional[List[LettaMessageContentUnion]] = Field(None, description="The content of the message.")
158
+ content: Optional[List[LettaMessageContentUnion]] = Field(default=None, description="The content of the message.")
159
159
  # NOTE: in OpenAI, this field is only used for roles 'user', 'assistant', and 'function' (now deprecated). 'tool' does not use it.
160
160
  name: Optional[str] = Field(
161
- None,
161
+ default=None,
162
162
  description="For role user/assistant: the (optional) name of the participant. For role tool/function: the name of the function called.",
163
163
  )
164
164
  tool_calls: Optional[List[OpenAIToolCall]] = Field(
165
- None, description="The list of tool calls requested. Only applicable for role assistant."
165
+ default=None, description="The list of tool calls requested. Only applicable for role assistant."
166
166
  )
167
- tool_call_id: Optional[str] = Field(None, description="The ID of the tool call. Only applicable for role tool.")
167
+ tool_call_id: Optional[str] = Field(default=None, description="The ID of the tool call. Only applicable for role tool.")
168
168
  # Extras
169
- step_id: Optional[str] = Field(None, description="The id of the step that this message was created in.")
170
- otid: Optional[str] = Field(None, description="The offline threading id associated with this message")
171
- tool_returns: Optional[List[ToolReturn]] = Field(None, description="Tool execution return information for prior tool calls")
172
- group_id: Optional[str] = Field(None, description="The multi-agent group that the message was sent in")
173
- sender_id: Optional[str] = Field(None, description="The id of the sender of the message, can be an identity id or agent id")
174
- batch_item_id: Optional[str] = Field(None, description="The id of the LLMBatchItem that this message is associated with")
169
+ step_id: Optional[str] = Field(default=None, description="The id of the step that this message was created in.")
170
+ otid: Optional[str] = Field(default=None, description="The offline threading id associated with this message")
171
+ tool_returns: Optional[List[ToolReturn]] = Field(default=None, description="Tool execution return information for prior tool calls")
172
+ group_id: Optional[str] = Field(default=None, description="The multi-agent group that the message was sent in")
173
+ sender_id: Optional[str] = Field(default=None, description="The id of the sender of the message, can be an identity id or agent id")
174
+ batch_item_id: Optional[str] = Field(default=None, description="The id of the LLMBatchItem that this message is associated with")
175
175
  # This overrides the optional base orm schema, created_at MUST exist on all messages objects
176
176
  created_at: datetime = Field(default_factory=get_utc_time, description="The timestamp when the object was created.")
177
177
 
@@ -482,7 +482,9 @@ class Message(BaseMessage):
482
482
  # TODO(caren) implicit support for only non-parts/list content types
483
483
  if openai_message_dict["content"] is not None and type(openai_message_dict["content"]) is not str:
484
484
  raise ValueError(f"Invalid content type: {type(openai_message_dict['content'])}")
485
- content = [TextContent(text=openai_message_dict["content"])] if openai_message_dict["content"] else []
485
+ content: List[LettaMessageContentUnion] = (
486
+ [TextContent(text=openai_message_dict["content"])] if openai_message_dict["content"] else []
487
+ )
486
488
 
487
489
  # TODO(caren) bad assumption here that "reasoning_content" always comes before "redacted_reasoning_content"
488
490
  if "reasoning_content" in openai_message_dict and openai_message_dict["reasoning_content"]:
@@ -491,14 +493,16 @@ class Message(BaseMessage):
491
493
  reasoning=openai_message_dict["reasoning_content"],
492
494
  is_native=True,
493
495
  signature=(
494
- openai_message_dict["reasoning_content_signature"] if openai_message_dict["reasoning_content_signature"] else None
496
+ str(openai_message_dict["reasoning_content_signature"])
497
+ if "reasoning_content_signature" in openai_message_dict
498
+ else None
495
499
  ),
496
500
  ),
497
501
  )
498
502
  if "redacted_reasoning_content" in openai_message_dict and openai_message_dict["redacted_reasoning_content"]:
499
503
  content.append(
500
504
  RedactedReasoningContent(
501
- data=openai_message_dict["redacted_reasoning_content"] if "redacted_reasoning_content" in openai_message_dict else None,
505
+ data=str(openai_message_dict["redacted_reasoning_content"]),
502
506
  ),
503
507
  )
504
508
  if "omitted_reasoning_content" in openai_message_dict and openai_message_dict["omitted_reasoning_content"]:
@@ -694,7 +698,7 @@ class Message(BaseMessage):
694
698
  elif self.role == "assistant":
695
699
  assert self.tool_calls is not None or text_content is not None
696
700
  openai_message = {
697
- "content": None if put_inner_thoughts_in_kwargs else text_content,
701
+ "content": None if (put_inner_thoughts_in_kwargs and self.tool_calls is not None) else text_content,
698
702
  "role": self.role,
699
703
  }
700
704
 
@@ -733,7 +737,7 @@ class Message(BaseMessage):
733
737
  else:
734
738
  warnings.warn(f"Using OpenAI with invalid 'name' field (name={self.name} role={self.role}).")
735
739
 
736
- if parse_content_parts:
740
+ if parse_content_parts and self.content is not None:
737
741
  for content in self.content:
738
742
  if isinstance(content, ReasoningContent):
739
743
  openai_message["reasoning_content"] = content.reasoning
@@ -819,7 +823,7 @@ class Message(BaseMessage):
819
823
  }
820
824
  content = []
821
825
  # COT / reasoning / thinking
822
- if len(self.content) > 1:
826
+ if self.content is not None and len(self.content) > 1:
823
827
  for content_part in self.content:
824
828
  if isinstance(content_part, ReasoningContent):
825
829
  content.append(
@@ -1154,6 +1158,6 @@ class Message(BaseMessage):
1154
1158
 
1155
1159
  class ToolReturn(BaseModel):
1156
1160
  status: Literal["success", "error"] = Field(..., description="The status of the tool call")
1157
- stdout: Optional[List[str]] = Field(None, description="Captured stdout (e.g. prints, logs) from the tool invocation")
1158
- stderr: Optional[List[str]] = Field(None, description="Captured stderr from the tool invocation")
1161
+ stdout: Optional[List[str]] = Field(default=None, description="Captured stdout (e.g. prints, logs) from the tool invocation")
1162
+ stderr: Optional[List[str]] = Field(default=None, description="Captured stderr from the tool invocation")
1159
1163
  # func_return: Optional[Any] = Field(None, description="The function return object")
@@ -324,18 +324,25 @@ class OpenAIProvider(Provider):
324
324
  else:
325
325
  handle = self.get_handle(model_name)
326
326
 
327
- configs.append(
328
- LLMConfig(
329
- model=model_name,
330
- model_endpoint_type="openai",
331
- model_endpoint=self.base_url,
332
- context_window=context_window_size,
333
- handle=handle,
334
- provider_name=self.name,
335
- provider_category=self.provider_category,
336
- )
327
+ llm_config = LLMConfig(
328
+ model=model_name,
329
+ model_endpoint_type="openai",
330
+ model_endpoint=self.base_url,
331
+ context_window=context_window_size,
332
+ handle=handle,
333
+ provider_name=self.name,
334
+ provider_category=self.provider_category,
337
335
  )
338
336
 
337
+ # gpt-4o-mini has started to regress with pretty bad emoji spam loops
338
+ # this is to counteract that
339
+ if "gpt-4o-mini" in model_name:
340
+ llm_config.frequency_penalty = 1.0
341
+ if "gpt-4.1-mini" in model_name:
342
+ llm_config.frequency_penalty = 1.0
343
+
344
+ configs.append(llm_config)
345
+
339
346
  # for OpenAI, sort in reverse order
340
347
  if self.base_url == "https://api.openai.com/v1":
341
348
  # alphnumeric sort
@@ -17,6 +17,7 @@ from letta.__init__ import __version__ as letta_version
17
17
  from letta.agents.exceptions import IncompatibleAgentType
18
18
  from letta.constants import ADMIN_PREFIX, API_PREFIX, OPENAI_API_PREFIX
19
19
  from letta.errors import BedrockPermissionError, LettaAgentNotFoundError, LettaUserNotFoundError
20
+ from letta.helpers.pinecone_utils import get_pinecone_indices, should_use_pinecone, upsert_pinecone_indices
20
21
  from letta.jobs.scheduler import start_scheduler_with_leader_election
21
22
  from letta.log import get_logger
22
23
  from letta.orm.errors import DatabaseTimeoutError, ForeignKeyConstraintViolationError, NoResultFound, UniqueConstraintViolationError
@@ -127,6 +128,16 @@ async def lifespan(app_: FastAPI):
127
128
  db_registry.initialize_async()
128
129
  logger.info(f"[Worker {worker_id}] Database connections initialized")
129
130
 
131
+ if should_use_pinecone():
132
+ if settings.upsert_pinecone_indices:
133
+ logger.info(f"[Worker {worker_id}] Upserting pinecone indices: {get_pinecone_indices()}")
134
+ await upsert_pinecone_indices()
135
+ logger.info(f"[Worker {worker_id}] Upserted pinecone indices")
136
+ else:
137
+ logger.info(f"[Worker {worker_id}] Enabled pinecone")
138
+ else:
139
+ logger.info(f"[Worker {worker_id}] Disabled pinecone")
140
+
130
141
  logger.info(f"[Worker {worker_id}] Starting scheduler with leader election")
131
142
  global server
132
143
  try:
@@ -38,6 +38,7 @@ from letta.schemas.user import User
38
38
  from letta.serialize_schemas.pydantic_agent_schema import AgentSchema
39
39
  from letta.server.rest_api.utils import get_letta_server
40
40
  from letta.server.server import SyncServer
41
+ from letta.services.summarizer.enums import SummarizationMode
41
42
  from letta.services.telemetry_manager import NoopTelemetryManager
42
43
  from letta.settings import settings
43
44
  from letta.utils import safe_create_task
@@ -750,6 +751,12 @@ async def send_message(
750
751
  step_manager=server.step_manager,
751
752
  telemetry_manager=server.telemetry_manager if settings.llm_api_logging else NoopTelemetryManager(),
752
753
  current_run_id=run.id,
754
+ # summarizer settings to be added here
755
+ summarizer_mode=(
756
+ SummarizationMode.STATIC_MESSAGE_BUFFER
757
+ if agent.agent_type == AgentType.voice_convo_agent
758
+ else SummarizationMode.PARTIAL_EVICT_MESSAGE_BUFFER
759
+ ),
753
760
  )
754
761
 
755
762
  result = await agent_loop.step(
@@ -878,6 +885,12 @@ async def send_message_streaming(
878
885
  step_manager=server.step_manager,
879
886
  telemetry_manager=server.telemetry_manager if settings.llm_api_logging else NoopTelemetryManager(),
880
887
  current_run_id=run.id,
888
+ # summarizer settings to be added here
889
+ summarizer_mode=(
890
+ SummarizationMode.STATIC_MESSAGE_BUFFER
891
+ if agent.agent_type == AgentType.voice_convo_agent
892
+ else SummarizationMode.PARTIAL_EVICT_MESSAGE_BUFFER
893
+ ),
881
894
  )
882
895
  from letta.server.rest_api.streaming_response import StreamingResponseWithStatusCode
883
896
 
@@ -1014,6 +1027,12 @@ async def _process_message_background(
1014
1027
  actor=actor,
1015
1028
  step_manager=server.step_manager,
1016
1029
  telemetry_manager=server.telemetry_manager if settings.llm_api_logging else NoopTelemetryManager(),
1030
+ # summarizer settings to be added here
1031
+ summarizer_mode=(
1032
+ SummarizationMode.STATIC_MESSAGE_BUFFER
1033
+ if agent.agent_type == AgentType.voice_convo_agent
1034
+ else SummarizationMode.PARTIAL_EVICT_MESSAGE_BUFFER
1035
+ ),
1017
1036
  )
1018
1037
 
1019
1038
  result = await agent_loop.step(
@@ -9,6 +9,12 @@ from fastapi import APIRouter, Depends, Header, HTTPException, Query, UploadFile
9
9
  from starlette import status
10
10
 
11
11
  import letta.constants as constants
12
+ from letta.helpers.pinecone_utils import (
13
+ delete_file_records_from_pinecone_index,
14
+ delete_source_records_from_pinecone_index,
15
+ list_pinecone_index_for_files,
16
+ should_use_pinecone,
17
+ )
12
18
  from letta.log import get_logger
13
19
  from letta.otel.tracing import trace_method
14
20
  from letta.schemas.agent import AgentState
@@ -22,6 +28,7 @@ from letta.server.rest_api.utils import get_letta_server
22
28
  from letta.server.server import SyncServer
23
29
  from letta.services.file_processor.chunker.llama_index_chunker import LlamaIndexChunker
24
30
  from letta.services.file_processor.embedder.openai_embedder import OpenAIEmbedder
31
+ from letta.services.file_processor.embedder.pinecone_embedder import PineconeEmbedder
25
32
  from letta.services.file_processor.file_processor import FileProcessor
26
33
  from letta.services.file_processor.file_types import (
27
34
  get_allowed_media_types,
@@ -163,6 +170,10 @@ async def delete_source(
163
170
  files = await server.file_manager.list_files(source_id, actor)
164
171
  file_ids = [f.id for f in files]
165
172
 
173
+ if should_use_pinecone():
174
+ logger.info(f"Deleting source {source_id} from pinecone index")
175
+ await delete_source_records_from_pinecone_index(source_id=source_id, actor=actor)
176
+
166
177
  for agent_state in agent_states:
167
178
  await server.remove_files_from_context_window(agent_state=agent_state, file_ids=file_ids, actor=actor)
168
179
 
@@ -326,16 +337,24 @@ async def get_file_metadata(
326
337
  """
327
338
  actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
328
339
 
329
- # Verify the source exists and user has access
330
- source = await server.source_manager.get_source_by_id(source_id=source_id, actor=actor)
331
- if not source:
332
- raise HTTPException(status_code=404, detail=f"Source with id={source_id} not found.")
333
-
334
340
  # Get file metadata using the file manager
335
341
  file_metadata = await server.file_manager.get_file_by_id(
336
342
  file_id=file_id, actor=actor, include_content=include_content, strip_directory_prefix=True
337
343
  )
338
344
 
345
+ if should_use_pinecone() and not file_metadata.is_processing_terminal():
346
+ ids = await list_pinecone_index_for_files(file_id=file_id, actor=actor, limit=file_metadata.total_chunks)
347
+ logger.info(f"Embedded chunks {len(ids)}/{file_metadata.total_chunks} for {file_id} in organization {actor.organization_id}")
348
+
349
+ if len(ids) != file_metadata.chunks_embedded or len(ids) == file_metadata.total_chunks:
350
+ if len(ids) != file_metadata.total_chunks:
351
+ file_status = file_metadata.processing_status
352
+ else:
353
+ file_status = FileProcessingStatus.COMPLETED
354
+ await server.file_manager.update_file_status(
355
+ file_id=file_metadata.id, actor=actor, chunks_embedded=len(ids), processing_status=file_status
356
+ )
357
+
339
358
  if not file_metadata:
340
359
  raise HTTPException(status_code=404, detail=f"File with id={file_id} not found.")
341
360
 
@@ -364,6 +383,10 @@ async def delete_file_from_source(
364
383
 
365
384
  await server.remove_file_from_context_windows(source_id=source_id, file_id=deleted_file.id, actor=actor)
366
385
 
386
+ if should_use_pinecone():
387
+ logger.info(f"Deleting file {file_id} from pinecone index")
388
+ await delete_file_records_from_pinecone_index(file_id=file_id, actor=actor)
389
+
367
390
  asyncio.create_task(sleeptime_document_ingest_async(server, source_id, actor, clear_history=True))
368
391
  if deleted_file is None:
369
392
  raise HTTPException(status_code=404, detail=f"File with id={file_id} not found.")
@@ -402,8 +425,14 @@ async def load_file_to_source_cloud(
402
425
  ):
403
426
  file_processor = MistralFileParser()
404
427
  text_chunker = LlamaIndexChunker(chunk_size=embedding_config.embedding_chunk_size)
405
- embedder = OpenAIEmbedder(embedding_config=embedding_config)
406
- file_processor = FileProcessor(file_parser=file_processor, text_chunker=text_chunker, embedder=embedder, actor=actor)
428
+ using_pinecone = should_use_pinecone()
429
+ if using_pinecone:
430
+ embedder = PineconeEmbedder()
431
+ else:
432
+ embedder = OpenAIEmbedder(embedding_config=embedding_config)
433
+ file_processor = FileProcessor(
434
+ file_parser=file_processor, text_chunker=text_chunker, embedder=embedder, actor=actor, using_pinecone=using_pinecone
435
+ )
407
436
  await file_processor.process(
408
437
  server=server, agent_states=agent_states, source_id=source_id, content=content, file_metadata=file_metadata
409
438
  )
@@ -109,15 +109,17 @@ class FileManager:
109
109
  actor: PydanticUser,
110
110
  processing_status: Optional[FileProcessingStatus] = None,
111
111
  error_message: Optional[str] = None,
112
+ total_chunks: Optional[int] = None,
113
+ chunks_embedded: Optional[int] = None,
112
114
  ) -> PydanticFileMetadata:
113
115
  """
114
- Update processing_status and/or error_message on a FileMetadata row.
116
+ Update processing_status, error_message, total_chunks, and/or chunks_embedded on a FileMetadata row.
115
117
 
116
118
  * 1st round-trip → UPDATE
117
119
  * 2nd round-trip → SELECT fresh row (same as read_async)
118
120
  """
119
121
 
120
- if processing_status is None and error_message is None:
122
+ if processing_status is None and error_message is None and total_chunks is None and chunks_embedded is None:
121
123
  raise ValueError("Nothing to update")
122
124
 
123
125
  values: dict[str, object] = {"updated_at": datetime.utcnow()}
@@ -125,6 +127,10 @@ class FileManager:
125
127
  values["processing_status"] = processing_status
126
128
  if error_message is not None:
127
129
  values["error_message"] = error_message
130
+ if total_chunks is not None:
131
+ values["total_chunks"] = total_chunks
132
+ if chunks_embedded is not None:
133
+ values["chunks_embedded"] = chunks_embedded
128
134
 
129
135
  async with db_registry.async_session() as session:
130
136
  # Fast in-place update – no ORM hydration
@@ -0,0 +1,16 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import List
3
+
4
+ from letta.log import get_logger
5
+ from letta.schemas.passage import Passage
6
+ from letta.schemas.user import User
7
+
8
+ logger = get_logger(__name__)
9
+
10
+
11
+ class BaseEmbedder(ABC):
12
+ """Abstract base class for embedding generation"""
13
+
14
+ @abstractmethod
15
+ async def generate_embedded_passages(self, file_id: str, source_id: str, chunks: List[str], actor: User) -> List[Passage]:
16
+ """Generate embeddings for chunks with batching and concurrent processing"""
@@ -9,12 +9,13 @@ from letta.schemas.embedding_config import EmbeddingConfig
9
9
  from letta.schemas.enums import ProviderType
10
10
  from letta.schemas.passage import Passage
11
11
  from letta.schemas.user import User
12
+ from letta.services.file_processor.embedder.base_embedder import BaseEmbedder
12
13
  from letta.settings import model_settings
13
14
 
14
15
  logger = get_logger(__name__)
15
16
 
16
17
 
17
- class OpenAIEmbedder:
18
+ class OpenAIEmbedder(BaseEmbedder):
18
19
  """OpenAI-based embedding generation"""
19
20
 
20
21
  def __init__(self, embedding_config: Optional[EmbeddingConfig] = None):
@@ -24,6 +25,7 @@ class OpenAIEmbedder:
24
25
  else EmbeddingConfig.default_config(model_name="letta")
25
26
  )
26
27
  self.embedding_config = embedding_config or self.default_embedding_config
28
+ self.max_concurrent_requests = 20
27
29
 
28
30
  # TODO: Unify to global OpenAI client
29
31
  self.client: OpenAIClient = cast(
@@ -34,7 +36,6 @@ class OpenAIEmbedder:
34
36
  actor=None, # Not necessary
35
37
  ),
36
38
  )
37
- self.max_concurrent_requests = 20
38
39
 
39
40
  @trace_method
40
41
  async def _embed_batch(self, batch: List[str], batch_indices: List[int]) -> List[Tuple[int, List[float]]]:
@@ -0,0 +1,74 @@
1
+ from typing import List
2
+
3
+ from letta.helpers.pinecone_utils import upsert_file_records_to_pinecone_index
4
+ from letta.log import get_logger
5
+ from letta.otel.tracing import log_event, trace_method
6
+ from letta.schemas.passage import Passage
7
+ from letta.schemas.user import User
8
+ from letta.services.file_processor.embedder.base_embedder import BaseEmbedder
9
+
10
+ try:
11
+ PINECONE_AVAILABLE = True
12
+ except ImportError:
13
+ PINECONE_AVAILABLE = False
14
+
15
+ logger = get_logger(__name__)
16
+
17
+
18
+ class PineconeEmbedder(BaseEmbedder):
19
+ """Pinecone-based embedding generation"""
20
+
21
+ def __init__(self):
22
+ if not PINECONE_AVAILABLE:
23
+ raise ImportError("Pinecone package is not installed. Install it with: pip install pinecone")
24
+
25
+ super().__init__()
26
+
27
+ @trace_method
28
+ async def generate_embedded_passages(self, file_id: str, source_id: str, chunks: List[str], actor: User) -> List[Passage]:
29
+ """Generate embeddings and upsert to Pinecone, then return Passage objects"""
30
+ if not chunks:
31
+ return []
32
+
33
+ logger.info(f"Upserting {len(chunks)} chunks to Pinecone using namespace {source_id}")
34
+ log_event(
35
+ "embedder.generation_started",
36
+ {
37
+ "total_chunks": len(chunks),
38
+ "file_id": file_id,
39
+ "source_id": source_id,
40
+ },
41
+ )
42
+
43
+ # Upsert records to Pinecone using source_id as namespace
44
+ try:
45
+ await upsert_file_records_to_pinecone_index(file_id=file_id, source_id=source_id, chunks=chunks, actor=actor)
46
+ logger.info(f"Successfully kicked off upserting {len(chunks)} records to Pinecone")
47
+ log_event(
48
+ "embedder.upsert_started",
49
+ {"records_upserted": len(chunks), "namespace": source_id, "file_id": file_id},
50
+ )
51
+ except Exception as e:
52
+ logger.error(f"Failed to upsert records to Pinecone: {str(e)}")
53
+ log_event("embedder.upsert_failed", {"error": str(e), "error_type": type(e).__name__})
54
+ raise
55
+
56
+ # Create Passage objects (without embeddings since Pinecone handles them)
57
+ passages = []
58
+ for i, text in enumerate(chunks):
59
+ passage = Passage(
60
+ text=text,
61
+ file_id=file_id,
62
+ source_id=source_id,
63
+ embedding=None, # Pinecone handles embeddings internally
64
+ embedding_config=None, # None
65
+ organization_id=actor.organization_id,
66
+ )
67
+ passages.append(passage)
68
+
69
+ logger.info(f"Successfully created {len(passages)} passages")
70
+ log_event(
71
+ "embedder.generation_completed",
72
+ {"passages_created": len(passages), "total_chunks_processed": len(chunks), "file_id": file_id, "source_id": source_id},
73
+ )
74
+ return passages
@@ -11,7 +11,7 @@ from letta.server.server import SyncServer
11
11
  from letta.services.file_manager import FileManager
12
12
  from letta.services.file_processor.chunker.line_chunker import LineChunker
13
13
  from letta.services.file_processor.chunker.llama_index_chunker import LlamaIndexChunker
14
- from letta.services.file_processor.embedder.openai_embedder import OpenAIEmbedder
14
+ from letta.services.file_processor.embedder.base_embedder import BaseEmbedder
15
15
  from letta.services.file_processor.parser.mistral_parser import MistralFileParser
16
16
  from letta.services.job_manager import JobManager
17
17
  from letta.services.passage_manager import PassageManager
@@ -27,8 +27,9 @@ class FileProcessor:
27
27
  self,
28
28
  file_parser: MistralFileParser,
29
29
  text_chunker: LlamaIndexChunker,
30
- embedder: OpenAIEmbedder,
30
+ embedder: BaseEmbedder,
31
31
  actor: User,
32
+ using_pinecone: bool,
32
33
  max_file_size: int = 50 * 1024 * 1024, # 50MB default
33
34
  ):
34
35
  self.file_parser = file_parser
@@ -41,6 +42,7 @@ class FileProcessor:
41
42
  self.passage_manager = PassageManager()
42
43
  self.job_manager = JobManager()
43
44
  self.actor = actor
45
+ self.using_pinecone = using_pinecone
44
46
 
45
47
  # TODO: Factor this function out of SyncServer
46
48
  @trace_method
@@ -109,7 +111,7 @@ class FileProcessor:
109
111
 
110
112
  logger.info("Chunking extracted text")
111
113
  log_event("file_processor.chunking_started", {"filename": filename, "pages_to_process": len(ocr_response.pages)})
112
- all_passages = []
114
+ all_chunks = []
113
115
 
114
116
  for page in ocr_response.pages:
115
117
  chunks = self.text_chunker.chunk_text(page)
@@ -118,24 +120,17 @@ class FileProcessor:
118
120
  log_event("file_processor.chunking_failed", {"filename": filename, "page_index": ocr_response.pages.index(page)})
119
121
  raise ValueError("No chunks created from text")
120
122
 
121
- passages = await self.embedder.generate_embedded_passages(
122
- file_id=file_metadata.id, source_id=source_id, chunks=chunks, actor=self.actor
123
- )
124
- log_event(
125
- "file_processor.page_processed",
126
- {
127
- "filename": filename,
128
- "page_index": ocr_response.pages.index(page),
129
- "chunks_created": len(chunks),
130
- "passages_generated": len(passages),
131
- },
132
- )
133
- all_passages.extend(passages)
123
+ all_chunks.extend(self.text_chunker.chunk_text(page))
134
124
 
135
- all_passages = await self.passage_manager.create_many_source_passages_async(
136
- passages=all_passages, file_metadata=file_metadata, actor=self.actor
125
+ all_passages = await self.embedder.generate_embedded_passages(
126
+ file_id=file_metadata.id, source_id=source_id, chunks=all_chunks, actor=self.actor
137
127
  )
138
- log_event("file_processor.passages_created", {"filename": filename, "total_passages": len(all_passages)})
128
+
129
+ if not self.using_pinecone:
130
+ all_passages = await self.passage_manager.create_many_source_passages_async(
131
+ passages=all_passages, file_metadata=file_metadata, actor=self.actor
132
+ )
133
+ log_event("file_processor.passages_created", {"filename": filename, "total_passages": len(all_passages)})
139
134
 
140
135
  logger.info(f"Successfully processed {filename}: {len(all_passages)} passages")
141
136
  log_event(
@@ -149,9 +144,14 @@ class FileProcessor:
149
144
  )
150
145
 
151
146
  # update job status
152
- await self.file_manager.update_file_status(
153
- file_id=file_metadata.id, actor=self.actor, processing_status=FileProcessingStatus.COMPLETED
154
- )
147
+ if not self.using_pinecone:
148
+ await self.file_manager.update_file_status(
149
+ file_id=file_metadata.id, actor=self.actor, processing_status=FileProcessingStatus.COMPLETED
150
+ )
151
+ else:
152
+ await self.file_manager.update_file_status(
153
+ file_id=file_metadata.id, actor=self.actor, total_chunks=len(all_passages), chunks_embedded=0
154
+ )
155
155
 
156
156
  return all_passages
157
157
 
@@ -115,10 +115,6 @@ class JobManager:
115
115
  job.completed_at = get_utc_time().replace(tzinfo=None)
116
116
  if job.callback_url:
117
117
  await self._dispatch_callback_async(job)
118
- else:
119
- logger.info(f"Job does not contain callback url: {job}")
120
- else:
121
- logger.info(f"Job update is not terminal {job_update}")
122
118
 
123
119
  # Save the updated job to the database
124
120
  await job.update_async(db_session=session, actor=actor)
@@ -19,7 +19,6 @@ class SourceManager:
19
19
  @trace_method
20
20
  async def create_source(self, source: PydanticSource, actor: PydanticUser) -> PydanticSource:
21
21
  """Create a new source based on the PydanticSource schema."""
22
- # Try getting the source first by id
23
22
  db_source = await self.get_source_by_id(source.id, actor=actor)
24
23
  if db_source:
25
24
  return db_source
@@ -7,3 +7,4 @@ class SummarizationMode(str, Enum):
7
7
  """
8
8
 
9
9
  STATIC_MESSAGE_BUFFER = "static_message_buffer_mode"
10
+ PARTIAL_EVICT_MESSAGE_BUFFER = "partial_evict_message_buffer_mode"