letta-nightly 0.8.9.dev20250705104147__py3-none-any.whl → 0.8.10.dev20250707035305__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/__init__.py +1 -1
- letta/agents/letta_agent.py +24 -7
- letta/agents/voice_agent.py +1 -1
- letta/agents/voice_sleeptime_agent.py +1 -1
- letta/constants.py +7 -0
- letta/functions/function_sets/files.py +2 -1
- letta/functions/functions.py +0 -1
- letta/helpers/pinecone_utils.py +143 -0
- letta/llm_api/openai_client.py +4 -0
- letta/orm/file.py +4 -0
- letta/prompts/gpt_summarize.py +4 -6
- letta/schemas/file.py +6 -0
- letta/schemas/letta_base.py +4 -4
- letta/schemas/letta_message.py +15 -7
- letta/schemas/letta_message_content.py +15 -15
- letta/schemas/llm_config.py +4 -0
- letta/schemas/message.py +35 -31
- letta/schemas/providers.py +17 -10
- letta/server/rest_api/app.py +11 -0
- letta/server/rest_api/routers/v1/agents.py +19 -0
- letta/server/rest_api/routers/v1/sources.py +36 -7
- letta/services/file_manager.py +8 -2
- letta/services/file_processor/embedder/base_embedder.py +16 -0
- letta/services/file_processor/embedder/openai_embedder.py +3 -2
- letta/services/file_processor/embedder/pinecone_embedder.py +74 -0
- letta/services/file_processor/file_processor.py +22 -22
- letta/services/job_manager.py +0 -4
- letta/services/source_manager.py +0 -1
- letta/services/summarizer/enums.py +1 -0
- letta/services/summarizer/summarizer.py +237 -6
- letta/services/tool_executor/files_tool_executor.py +109 -3
- letta/services/user_manager.py +0 -1
- letta/settings.py +13 -1
- letta/system.py +16 -0
- {letta_nightly-0.8.9.dev20250705104147.dist-info → letta_nightly-0.8.10.dev20250707035305.dist-info}/METADATA +2 -1
- {letta_nightly-0.8.9.dev20250705104147.dist-info → letta_nightly-0.8.10.dev20250707035305.dist-info}/RECORD +39 -36
- {letta_nightly-0.8.9.dev20250705104147.dist-info → letta_nightly-0.8.10.dev20250707035305.dist-info}/LICENSE +0 -0
- {letta_nightly-0.8.9.dev20250705104147.dist-info → letta_nightly-0.8.10.dev20250707035305.dist-info}/WHEEL +0 -0
- {letta_nightly-0.8.9.dev20250705104147.dist-info → letta_nightly-0.8.10.dev20250707035305.dist-info}/entry_points.txt +0 -0
|
@@ -4,13 +4,19 @@ import traceback
|
|
|
4
4
|
from typing import List, Optional, Tuple, Union
|
|
5
5
|
|
|
6
6
|
from letta.agents.ephemeral_summary_agent import EphemeralSummaryAgent
|
|
7
|
-
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
|
|
7
|
+
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG, MESSAGE_SUMMARY_REQUEST_ACK
|
|
8
|
+
from letta.helpers.message_helper import convert_message_creates_to_messages
|
|
9
|
+
from letta.llm_api.llm_client import LLMClient
|
|
8
10
|
from letta.log import get_logger
|
|
9
11
|
from letta.otel.tracing import trace_method
|
|
12
|
+
from letta.prompts import gpt_summarize
|
|
10
13
|
from letta.schemas.enums import MessageRole
|
|
11
14
|
from letta.schemas.letta_message_content import TextContent
|
|
15
|
+
from letta.schemas.llm_config import LLMConfig
|
|
12
16
|
from letta.schemas.message import Message, MessageCreate
|
|
17
|
+
from letta.schemas.user import User
|
|
13
18
|
from letta.services.summarizer.enums import SummarizationMode
|
|
19
|
+
from letta.system import package_summarize_message_no_counts
|
|
14
20
|
from letta.templates.template_helper import render_template
|
|
15
21
|
|
|
16
22
|
logger = get_logger(__name__)
|
|
@@ -29,18 +35,24 @@ class Summarizer:
|
|
|
29
35
|
summarizer_agent: Optional[Union[EphemeralSummaryAgent, "VoiceSleeptimeAgent"]] = None,
|
|
30
36
|
message_buffer_limit: int = 10,
|
|
31
37
|
message_buffer_min: int = 3,
|
|
38
|
+
partial_evict_summarizer_percentage: float = 0.30,
|
|
32
39
|
):
|
|
33
40
|
self.mode = mode
|
|
34
41
|
|
|
35
42
|
# Need to do validation on this
|
|
43
|
+
# TODO: Move this to config
|
|
36
44
|
self.message_buffer_limit = message_buffer_limit
|
|
37
45
|
self.message_buffer_min = message_buffer_min
|
|
38
46
|
self.summarizer_agent = summarizer_agent
|
|
39
|
-
|
|
47
|
+
self.partial_evict_summarizer_percentage = partial_evict_summarizer_percentage
|
|
40
48
|
|
|
41
49
|
@trace_method
|
|
42
|
-
def summarize(
|
|
43
|
-
self,
|
|
50
|
+
async def summarize(
|
|
51
|
+
self,
|
|
52
|
+
in_context_messages: List[Message],
|
|
53
|
+
new_letta_messages: List[Message],
|
|
54
|
+
force: bool = False,
|
|
55
|
+
clear: bool = False,
|
|
44
56
|
) -> Tuple[List[Message], bool]:
|
|
45
57
|
"""
|
|
46
58
|
Summarizes or trims in_context_messages according to the chosen mode,
|
|
@@ -58,7 +70,19 @@ class Summarizer:
|
|
|
58
70
|
(could be appended to the conversation if desired)
|
|
59
71
|
"""
|
|
60
72
|
if self.mode == SummarizationMode.STATIC_MESSAGE_BUFFER:
|
|
61
|
-
return self._static_buffer_summarization(
|
|
73
|
+
return self._static_buffer_summarization(
|
|
74
|
+
in_context_messages,
|
|
75
|
+
new_letta_messages,
|
|
76
|
+
force=force,
|
|
77
|
+
clear=clear,
|
|
78
|
+
)
|
|
79
|
+
elif self.mode == SummarizationMode.PARTIAL_EVICT_MESSAGE_BUFFER:
|
|
80
|
+
return await self._partial_evict_buffer_summarization(
|
|
81
|
+
in_context_messages,
|
|
82
|
+
new_letta_messages,
|
|
83
|
+
force=force,
|
|
84
|
+
clear=clear,
|
|
85
|
+
)
|
|
62
86
|
else:
|
|
63
87
|
# Fallback or future logic
|
|
64
88
|
return in_context_messages, False
|
|
@@ -75,9 +99,131 @@ class Summarizer:
|
|
|
75
99
|
task.add_done_callback(callback)
|
|
76
100
|
return task
|
|
77
101
|
|
|
102
|
+
async def _partial_evict_buffer_summarization(
|
|
103
|
+
self,
|
|
104
|
+
in_context_messages: List[Message],
|
|
105
|
+
new_letta_messages: List[Message],
|
|
106
|
+
force: bool = False,
|
|
107
|
+
clear: bool = False,
|
|
108
|
+
) -> Tuple[List[Message], bool]:
|
|
109
|
+
"""Summarization as implemented in the original MemGPT loop, but using message count instead of token count.
|
|
110
|
+
Evict a partial amount of messages, and replace message[1] with a recursive summary.
|
|
111
|
+
|
|
112
|
+
Note that this can't be made sync, because we're waiting on the summary to inject it into the context window,
|
|
113
|
+
unlike the version that writes it to a block.
|
|
114
|
+
|
|
115
|
+
Unless force is True, don't summarize.
|
|
116
|
+
Ignore clear, we don't use it.
|
|
117
|
+
"""
|
|
118
|
+
all_in_context_messages = in_context_messages + new_letta_messages
|
|
119
|
+
|
|
120
|
+
if not force:
|
|
121
|
+
logger.debug("Not forcing summarization, returning in-context messages as is.")
|
|
122
|
+
return all_in_context_messages, False
|
|
123
|
+
|
|
124
|
+
# Very ugly code to pull LLMConfig etc from the SummarizerAgent if we're not using it for anything else
|
|
125
|
+
assert self.summarizer_agent is not None
|
|
126
|
+
|
|
127
|
+
# First step: determine how many messages to retain
|
|
128
|
+
total_message_count = len(all_in_context_messages)
|
|
129
|
+
assert self.partial_evict_summarizer_percentage >= 0.0 and self.partial_evict_summarizer_percentage <= 1.0
|
|
130
|
+
target_message_start = round((1.0 - self.partial_evict_summarizer_percentage) * total_message_count)
|
|
131
|
+
logger.info(f"Target message count: {total_message_count}->{(total_message_count-target_message_start)}")
|
|
132
|
+
|
|
133
|
+
# The summary message we'll insert is role 'user' (vs 'assistant', 'tool', or 'system')
|
|
134
|
+
# We are going to put it at index 1 (index 0 is the system message)
|
|
135
|
+
# That means that index 2 needs to be role 'assistant', so walk up the list starting at
|
|
136
|
+
# the target_message_count and find the first assistant message
|
|
137
|
+
for i in range(target_message_start, total_message_count):
|
|
138
|
+
if all_in_context_messages[i].role == MessageRole.assistant:
|
|
139
|
+
assistant_message_index = i
|
|
140
|
+
break
|
|
141
|
+
else:
|
|
142
|
+
raise ValueError(f"No assistant message found from indices {target_message_start} to {total_message_count}")
|
|
143
|
+
|
|
144
|
+
# The sequence to summarize is index 1 -> assistant_message_index
|
|
145
|
+
messages_to_summarize = all_in_context_messages[1:assistant_message_index]
|
|
146
|
+
logger.info(f"Eviction indices: {1}->{assistant_message_index}(/{total_message_count})")
|
|
147
|
+
|
|
148
|
+
# Dynamically get the LLMConfig from the summarizer agent
|
|
149
|
+
# Pretty cringe code here that we need the agent for this but we don't use it
|
|
150
|
+
agent_state = await self.summarizer_agent.agent_manager.get_agent_by_id_async(
|
|
151
|
+
agent_id=self.summarizer_agent.agent_id, actor=self.summarizer_agent.actor
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
# TODO if we do this via the "agent", then we can more easily allow toggling on the memory block version
|
|
155
|
+
summary_message_str = await simple_summary(
|
|
156
|
+
messages=messages_to_summarize,
|
|
157
|
+
llm_config=agent_state.llm_config,
|
|
158
|
+
actor=self.summarizer_agent.actor,
|
|
159
|
+
include_ack=True,
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
# TODO add counts back
|
|
163
|
+
# Recall message count
|
|
164
|
+
# num_recall_messages_current = await self.message_manager.size_async(actor=self.actor, agent_id=agent_state.id)
|
|
165
|
+
# num_messages_evicted = len(messages_to_summarize)
|
|
166
|
+
# num_recall_messages_hidden = num_recall_messages_total - len()
|
|
167
|
+
|
|
168
|
+
# Create the summary message
|
|
169
|
+
summary_message_str_packed = package_summarize_message_no_counts(
|
|
170
|
+
summary=summary_message_str,
|
|
171
|
+
timezone=agent_state.timezone,
|
|
172
|
+
)
|
|
173
|
+
summary_message_obj = convert_message_creates_to_messages(
|
|
174
|
+
message_creates=[
|
|
175
|
+
MessageCreate(
|
|
176
|
+
role=MessageRole.user,
|
|
177
|
+
content=[TextContent(text=summary_message_str_packed)],
|
|
178
|
+
)
|
|
179
|
+
],
|
|
180
|
+
agent_id=agent_state.id,
|
|
181
|
+
timezone=agent_state.timezone,
|
|
182
|
+
# We already packed, don't pack again
|
|
183
|
+
wrap_user_message=False,
|
|
184
|
+
wrap_system_message=False,
|
|
185
|
+
)[0]
|
|
186
|
+
|
|
187
|
+
# Create the message in the DB
|
|
188
|
+
await self.summarizer_agent.message_manager.create_many_messages_async(
|
|
189
|
+
pydantic_msgs=[summary_message_obj],
|
|
190
|
+
actor=self.summarizer_agent.actor,
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
updated_in_context_messages = all_in_context_messages[assistant_message_index:]
|
|
194
|
+
return [all_in_context_messages[0], summary_message_obj] + updated_in_context_messages, True
|
|
195
|
+
|
|
78
196
|
def _static_buffer_summarization(
|
|
79
|
-
self,
|
|
197
|
+
self,
|
|
198
|
+
in_context_messages: List[Message],
|
|
199
|
+
new_letta_messages: List[Message],
|
|
200
|
+
force: bool = False,
|
|
201
|
+
clear: bool = False,
|
|
80
202
|
) -> Tuple[List[Message], bool]:
|
|
203
|
+
"""
|
|
204
|
+
Implements static buffer summarization by maintaining a fixed-size message buffer (< N messages).
|
|
205
|
+
|
|
206
|
+
Logic:
|
|
207
|
+
1. Combine existing context messages with new messages
|
|
208
|
+
2. If total messages <= buffer limit and not forced, return unchanged
|
|
209
|
+
3. Calculate how many messages to retain (0 if clear=True, otherwise message_buffer_min)
|
|
210
|
+
4. Find the trim index to keep the most recent messages while preserving user message boundaries
|
|
211
|
+
5. Evict older messages (everything between system message and trim index)
|
|
212
|
+
6. If summarizer agent is available, trigger background summarization of evicted messages
|
|
213
|
+
7. Return updated context with system message + retained recent messages
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
in_context_messages: Existing conversation context messages
|
|
217
|
+
new_letta_messages: Newly added messages to append
|
|
218
|
+
force: Force summarization even if buffer limit not exceeded
|
|
219
|
+
clear: Clear all messages except system message (retain_count = 0)
|
|
220
|
+
|
|
221
|
+
Returns:
|
|
222
|
+
Tuple of (updated_messages, was_summarized)
|
|
223
|
+
- updated_messages: New context after trimming/summarization
|
|
224
|
+
- was_summarized: True if messages were evicted and summarization triggered
|
|
225
|
+
"""
|
|
226
|
+
|
|
81
227
|
all_in_context_messages = in_context_messages + new_letta_messages
|
|
82
228
|
|
|
83
229
|
if len(all_in_context_messages) <= self.message_buffer_limit and not force:
|
|
@@ -139,6 +285,91 @@ class Summarizer:
|
|
|
139
285
|
return [all_in_context_messages[0]] + updated_in_context_messages, True
|
|
140
286
|
|
|
141
287
|
|
|
288
|
+
def simple_formatter(messages: List[Message], include_system: bool = False) -> str:
|
|
289
|
+
"""Go from an OpenAI-style list of messages to a concatenated string"""
|
|
290
|
+
|
|
291
|
+
parsed_messages = [message.to_openai_dict() for message in messages if message.role != MessageRole.system or include_system]
|
|
292
|
+
return "\n".join(json.dumps(msg) for msg in parsed_messages)
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def simple_message_wrapper(openai_msg: dict) -> Message:
|
|
296
|
+
"""Extremely simple way to map from role/content to Message object w/ throwaway dummy fields"""
|
|
297
|
+
|
|
298
|
+
if "role" not in openai_msg:
|
|
299
|
+
raise ValueError(f"Missing role in openai_msg: {openai_msg}")
|
|
300
|
+
if "content" not in openai_msg:
|
|
301
|
+
raise ValueError(f"Missing content in openai_msg: {openai_msg}")
|
|
302
|
+
|
|
303
|
+
if openai_msg["role"] == "user":
|
|
304
|
+
return Message(
|
|
305
|
+
role=MessageRole.user,
|
|
306
|
+
content=[TextContent(text=openai_msg["content"])],
|
|
307
|
+
)
|
|
308
|
+
elif openai_msg["role"] == "assistant":
|
|
309
|
+
return Message(
|
|
310
|
+
role=MessageRole.assistant,
|
|
311
|
+
content=[TextContent(text=openai_msg["content"])],
|
|
312
|
+
)
|
|
313
|
+
elif openai_msg["role"] == "system":
|
|
314
|
+
return Message(
|
|
315
|
+
role=MessageRole.system,
|
|
316
|
+
content=[TextContent(text=openai_msg["content"])],
|
|
317
|
+
)
|
|
318
|
+
else:
|
|
319
|
+
raise ValueError(f"Unknown role: {openai_msg['role']}")
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
async def simple_summary(messages: List[Message], llm_config: LLMConfig, actor: User, include_ack: bool = True) -> str:
|
|
323
|
+
"""Generate a simple summary from a list of messages.
|
|
324
|
+
|
|
325
|
+
Intentionally kept functional due to the simplicity of the prompt.
|
|
326
|
+
"""
|
|
327
|
+
|
|
328
|
+
# Create an LLMClient from the config
|
|
329
|
+
llm_client = LLMClient.create(
|
|
330
|
+
provider_type=llm_config.model_endpoint_type,
|
|
331
|
+
put_inner_thoughts_first=True,
|
|
332
|
+
actor=actor,
|
|
333
|
+
)
|
|
334
|
+
assert llm_client is not None
|
|
335
|
+
|
|
336
|
+
# Prepare the messages payload to send to the LLM
|
|
337
|
+
system_prompt = gpt_summarize.SYSTEM
|
|
338
|
+
summary_transcript = simple_formatter(messages)
|
|
339
|
+
|
|
340
|
+
if include_ack:
|
|
341
|
+
input_messages = [
|
|
342
|
+
{"role": "system", "content": system_prompt},
|
|
343
|
+
{"role": "assistant", "content": MESSAGE_SUMMARY_REQUEST_ACK},
|
|
344
|
+
{"role": "user", "content": summary_transcript},
|
|
345
|
+
]
|
|
346
|
+
else:
|
|
347
|
+
input_messages = [
|
|
348
|
+
{"role": "system", "content": system_prompt},
|
|
349
|
+
{"role": "user", "content": summary_transcript},
|
|
350
|
+
]
|
|
351
|
+
print("messages going to summarizer:", input_messages)
|
|
352
|
+
input_messages_obj = [simple_message_wrapper(msg) for msg in input_messages]
|
|
353
|
+
print("messages going to summarizer (objs):", input_messages_obj)
|
|
354
|
+
|
|
355
|
+
request_data = llm_client.build_request_data(input_messages_obj, llm_config, tools=[])
|
|
356
|
+
print("request data:", request_data)
|
|
357
|
+
# NOTE: we should disable the inner_thoughts_in_kwargs here, because we don't use it
|
|
358
|
+
# I'm leaving it commented it out for now for safety but is fine assuming the var here is a copy not a reference
|
|
359
|
+
# llm_config.put_inner_thoughts_in_kwargs = False
|
|
360
|
+
response_data = await llm_client.request_async(request_data, llm_config)
|
|
361
|
+
response = llm_client.convert_response_to_chat_completion(response_data, input_messages_obj, llm_config)
|
|
362
|
+
if response.choices[0].message.content is None:
|
|
363
|
+
logger.warning("No content returned from summarizer")
|
|
364
|
+
# TODO raise an error error instead?
|
|
365
|
+
# return "[Summary failed to generate]"
|
|
366
|
+
raise Exception("Summary failed to generate")
|
|
367
|
+
else:
|
|
368
|
+
summary = response.choices[0].message.content.strip()
|
|
369
|
+
|
|
370
|
+
return summary
|
|
371
|
+
|
|
372
|
+
|
|
142
373
|
def format_transcript(messages: List[Message], include_system: bool = False) -> List[str]:
|
|
143
374
|
"""
|
|
144
375
|
Turn a list of Message objects into a human-readable transcript.
|
|
@@ -2,8 +2,9 @@ import asyncio
|
|
|
2
2
|
import re
|
|
3
3
|
from typing import Any, Dict, List, Optional
|
|
4
4
|
|
|
5
|
-
from letta.constants import MAX_FILES_OPEN
|
|
5
|
+
from letta.constants import MAX_FILES_OPEN, PINECONE_TEXT_FIELD_NAME
|
|
6
6
|
from letta.functions.types import FileOpenRequest
|
|
7
|
+
from letta.helpers.pinecone_utils import search_pinecone_index, should_use_pinecone
|
|
7
8
|
from letta.log import get_logger
|
|
8
9
|
from letta.otel.tracing import trace_method
|
|
9
10
|
from letta.schemas.agent import AgentState
|
|
@@ -463,14 +464,15 @@ class LettaFileToolExecutor(ToolExecutor):
|
|
|
463
464
|
return "\n".join(formatted_results)
|
|
464
465
|
|
|
465
466
|
@trace_method
|
|
466
|
-
async def semantic_search_files(self, agent_state: AgentState, query: str, limit: int =
|
|
467
|
+
async def semantic_search_files(self, agent_state: AgentState, query: str, limit: int = 5) -> str:
|
|
467
468
|
"""
|
|
468
469
|
Search for text within attached files using semantic search and return passages with their source filenames.
|
|
470
|
+
Uses Pinecone if configured, otherwise falls back to traditional search.
|
|
469
471
|
|
|
470
472
|
Args:
|
|
471
473
|
agent_state: Current agent state
|
|
472
474
|
query: Search query for semantic matching
|
|
473
|
-
limit: Maximum number of results to return (default:
|
|
475
|
+
limit: Maximum number of results to return (default: 5)
|
|
474
476
|
|
|
475
477
|
Returns:
|
|
476
478
|
Formatted string with search results in IDE/terminal style
|
|
@@ -485,6 +487,110 @@ class LettaFileToolExecutor(ToolExecutor):
|
|
|
485
487
|
|
|
486
488
|
self.logger.info(f"Semantic search started for agent {agent_state.id} with query '{query}' (limit: {limit})")
|
|
487
489
|
|
|
490
|
+
# Check if Pinecone is enabled and use it if available
|
|
491
|
+
if should_use_pinecone():
|
|
492
|
+
return await self._search_files_pinecone(agent_state, query, limit)
|
|
493
|
+
else:
|
|
494
|
+
return await self._search_files_traditional(agent_state, query, limit)
|
|
495
|
+
|
|
496
|
+
async def _search_files_pinecone(self, agent_state: AgentState, query: str, limit: int) -> str:
|
|
497
|
+
"""Search files using Pinecone vector database."""
|
|
498
|
+
|
|
499
|
+
# Extract unique source_ids
|
|
500
|
+
# TODO: Inefficient
|
|
501
|
+
attached_sources = await self.agent_manager.list_attached_sources_async(agent_id=agent_state.id, actor=self.actor)
|
|
502
|
+
source_ids = [source.id for source in attached_sources]
|
|
503
|
+
if not source_ids:
|
|
504
|
+
return f"No valid source IDs found for attached files"
|
|
505
|
+
|
|
506
|
+
# Get all attached files for this agent
|
|
507
|
+
file_agents = await self.files_agents_manager.list_files_for_agent(agent_id=agent_state.id, actor=self.actor)
|
|
508
|
+
if not file_agents:
|
|
509
|
+
return "No files are currently attached to search"
|
|
510
|
+
|
|
511
|
+
results = []
|
|
512
|
+
total_hits = 0
|
|
513
|
+
files_with_matches = {}
|
|
514
|
+
|
|
515
|
+
try:
|
|
516
|
+
filter = {"source_id": {"$in": source_ids}}
|
|
517
|
+
search_results = await search_pinecone_index(query, limit, filter, self.actor)
|
|
518
|
+
|
|
519
|
+
# Process search results
|
|
520
|
+
if "result" in search_results and "hits" in search_results["result"]:
|
|
521
|
+
for hit in search_results["result"]["hits"]:
|
|
522
|
+
if total_hits >= limit:
|
|
523
|
+
break
|
|
524
|
+
|
|
525
|
+
total_hits += 1
|
|
526
|
+
|
|
527
|
+
# Extract hit information
|
|
528
|
+
hit_id = hit.get("_id", "unknown")
|
|
529
|
+
score = hit.get("_score", 0.0)
|
|
530
|
+
fields = hit.get("fields", {})
|
|
531
|
+
text = fields.get(PINECONE_TEXT_FIELD_NAME, "")
|
|
532
|
+
file_id = fields.get("file_id", "")
|
|
533
|
+
|
|
534
|
+
# Find corresponding file name
|
|
535
|
+
file_name = "Unknown File"
|
|
536
|
+
for fa in file_agents:
|
|
537
|
+
if fa.file_id == file_id:
|
|
538
|
+
file_name = fa.file_name
|
|
539
|
+
break
|
|
540
|
+
|
|
541
|
+
# Group by file name
|
|
542
|
+
if file_name not in files_with_matches:
|
|
543
|
+
files_with_matches[file_name] = []
|
|
544
|
+
files_with_matches[file_name].append({"text": text, "score": score, "hit_id": hit_id})
|
|
545
|
+
|
|
546
|
+
except Exception as e:
|
|
547
|
+
self.logger.error(f"Pinecone search failed: {str(e)}")
|
|
548
|
+
raise e
|
|
549
|
+
|
|
550
|
+
if not files_with_matches:
|
|
551
|
+
return f"No semantic matches found in Pinecone for query: '{query}'"
|
|
552
|
+
|
|
553
|
+
# Format results
|
|
554
|
+
passage_num = 0
|
|
555
|
+
for file_name, matches in files_with_matches.items():
|
|
556
|
+
for match in matches:
|
|
557
|
+
passage_num += 1
|
|
558
|
+
|
|
559
|
+
# Format each passage with terminal-style header
|
|
560
|
+
score_display = f"(score: {match['score']:.3f})"
|
|
561
|
+
passage_header = f"\n=== {file_name} (passage #{passage_num}) {score_display} ==="
|
|
562
|
+
|
|
563
|
+
# Format the passage text
|
|
564
|
+
passage_text = match["text"].strip()
|
|
565
|
+
lines = passage_text.splitlines()
|
|
566
|
+
formatted_lines = []
|
|
567
|
+
for line in lines[:20]: # Limit to first 20 lines per passage
|
|
568
|
+
formatted_lines.append(f" {line}")
|
|
569
|
+
|
|
570
|
+
if len(lines) > 20:
|
|
571
|
+
formatted_lines.append(f" ... [truncated {len(lines) - 20} more lines]")
|
|
572
|
+
|
|
573
|
+
passage_content = "\n".join(formatted_lines)
|
|
574
|
+
results.append(f"{passage_header}\n{passage_content}")
|
|
575
|
+
|
|
576
|
+
# Mark access for files that had matches
|
|
577
|
+
if files_with_matches:
|
|
578
|
+
matched_file_names = [name for name in files_with_matches.keys() if name != "Unknown File"]
|
|
579
|
+
if matched_file_names:
|
|
580
|
+
await self.files_agents_manager.mark_access_bulk(agent_id=agent_state.id, file_names=matched_file_names, actor=self.actor)
|
|
581
|
+
|
|
582
|
+
# Create summary header
|
|
583
|
+
file_count = len(files_with_matches)
|
|
584
|
+
summary = f"Found {total_hits} Pinecone matches in {file_count} file{'s' if file_count != 1 else ''} for query: '{query}'"
|
|
585
|
+
|
|
586
|
+
# Combine all results
|
|
587
|
+
formatted_results = [summary, "=" * len(summary)] + results
|
|
588
|
+
|
|
589
|
+
self.logger.info(f"Pinecone search completed: {total_hits} matches across {file_count} files")
|
|
590
|
+
return "\n".join(formatted_results)
|
|
591
|
+
|
|
592
|
+
async def _search_files_traditional(self, agent_state: AgentState, query: str, limit: int) -> str:
|
|
593
|
+
"""Traditional search using existing passage manager."""
|
|
488
594
|
# Get semantic search results
|
|
489
595
|
passages = await self.agent_manager.list_source_passages_async(
|
|
490
596
|
actor=self.actor,
|
letta/services/user_manager.py
CHANGED
|
@@ -14,7 +14,6 @@ from letta.otel.tracing import trace_method
|
|
|
14
14
|
from letta.schemas.user import User as PydanticUser
|
|
15
15
|
from letta.schemas.user import UserUpdate
|
|
16
16
|
from letta.server.db import db_registry
|
|
17
|
-
from letta.settings import settings
|
|
18
17
|
from letta.utils import enforce_types
|
|
19
18
|
|
|
20
19
|
logger = get_logger(__name__)
|
letta/settings.py
CHANGED
|
@@ -39,12 +39,17 @@ class ToolSettings(BaseSettings):
|
|
|
39
39
|
class SummarizerSettings(BaseSettings):
|
|
40
40
|
model_config = SettingsConfigDict(env_prefix="letta_summarizer_", extra="ignore")
|
|
41
41
|
|
|
42
|
-
mode: SummarizationMode = SummarizationMode.STATIC_MESSAGE_BUFFER
|
|
42
|
+
# mode: SummarizationMode = SummarizationMode.STATIC_MESSAGE_BUFFER
|
|
43
|
+
mode: SummarizationMode = SummarizationMode.PARTIAL_EVICT_MESSAGE_BUFFER
|
|
43
44
|
message_buffer_limit: int = 60
|
|
44
45
|
message_buffer_min: int = 15
|
|
45
46
|
enable_summarization: bool = True
|
|
46
47
|
max_summarization_retries: int = 3
|
|
47
48
|
|
|
49
|
+
# partial evict summarizer percentage
|
|
50
|
+
# eviction based on percentage of message count, not token count
|
|
51
|
+
partial_evict_summarizer_percentage: float = 0.30
|
|
52
|
+
|
|
48
53
|
# TODO(cliandy): the below settings are tied to old summarization and should be deprecated or moved
|
|
49
54
|
# Controls if we should evict all messages
|
|
50
55
|
# TODO: Can refactor this into an enum if we have a bunch of different kinds of summarizers
|
|
@@ -253,6 +258,13 @@ class Settings(BaseSettings):
|
|
|
253
258
|
llm_request_timeout_seconds: float = Field(default=60.0, ge=10.0, le=1800.0, description="Timeout for LLM requests in seconds")
|
|
254
259
|
llm_stream_timeout_seconds: float = Field(default=60.0, ge=10.0, le=1800.0, description="Timeout for LLM streaming requests in seconds")
|
|
255
260
|
|
|
261
|
+
# For embeddings
|
|
262
|
+
enable_pinecone: bool = False
|
|
263
|
+
pinecone_api_key: Optional[str] = None
|
|
264
|
+
pinecone_source_index: Optional[str] = "sources"
|
|
265
|
+
pinecone_agent_index: Optional[str] = "recall"
|
|
266
|
+
upsert_pinecone_indices: bool = False
|
|
267
|
+
|
|
256
268
|
@property
|
|
257
269
|
def letta_pg_uri(self) -> str:
|
|
258
270
|
if self.pg_uri:
|
letta/system.py
CHANGED
|
@@ -188,6 +188,22 @@ def package_summarize_message(summary, summary_message_count, hidden_message_cou
|
|
|
188
188
|
return json_dumps(packaged_message)
|
|
189
189
|
|
|
190
190
|
|
|
191
|
+
def package_summarize_message_no_counts(summary, timezone):
|
|
192
|
+
context_message = (
|
|
193
|
+
f"Note: prior messages have been hidden from view due to conversation memory constraints.\n"
|
|
194
|
+
+ f"The following is a summary of the previous messages:\n {summary}"
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
formatted_time = get_local_time(timezone=timezone)
|
|
198
|
+
packaged_message = {
|
|
199
|
+
"type": "system_alert",
|
|
200
|
+
"message": context_message,
|
|
201
|
+
"time": formatted_time,
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
return json_dumps(packaged_message)
|
|
205
|
+
|
|
206
|
+
|
|
191
207
|
def package_summarize_message_no_summary(hidden_message_count, message=None, timezone=None):
|
|
192
208
|
"""Add useful metadata to the summary message"""
|
|
193
209
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: letta-nightly
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.10.dev20250707035305
|
|
4
4
|
Summary: Create LLM agents with long-term memory and custom tools
|
|
5
5
|
License: Apache License
|
|
6
6
|
Author: Letta Team
|
|
@@ -75,6 +75,7 @@ Requires-Dist: pathvalidate (>=3.2.1,<4.0.0)
|
|
|
75
75
|
Requires-Dist: pexpect (>=4.9.0,<5.0.0) ; extra == "dev" or extra == "all"
|
|
76
76
|
Requires-Dist: pg8000 (>=1.30.3,<2.0.0) ; extra == "postgres" or extra == "desktop" or extra == "all"
|
|
77
77
|
Requires-Dist: pgvector (>=0.2.3,<0.3.0) ; extra == "postgres" or extra == "desktop" or extra == "all"
|
|
78
|
+
Requires-Dist: pinecone[asyncio] (>=7.3.0,<8.0.0)
|
|
78
79
|
Requires-Dist: pre-commit (>=3.5.0,<4.0.0) ; extra == "dev" or extra == "all"
|
|
79
80
|
Requires-Dist: prettytable (>=3.9.0,<4.0.0)
|
|
80
81
|
Requires-Dist: psycopg2 (>=2.9.10,<3.0.0) ; extra == "postgres" or extra == "desktop" or extra == "all"
|