letta-nightly 0.8.9.dev20250706104157__py3-none-any.whl → 0.8.10.dev20250707104400__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (39) hide show
  1. letta/__init__.py +1 -1
  2. letta/agents/letta_agent.py +24 -7
  3. letta/agents/voice_agent.py +1 -1
  4. letta/agents/voice_sleeptime_agent.py +1 -1
  5. letta/constants.py +7 -0
  6. letta/functions/function_sets/files.py +2 -1
  7. letta/functions/functions.py +0 -1
  8. letta/helpers/pinecone_utils.py +143 -0
  9. letta/llm_api/openai_client.py +4 -0
  10. letta/orm/file.py +4 -0
  11. letta/prompts/gpt_summarize.py +4 -6
  12. letta/schemas/file.py +6 -0
  13. letta/schemas/letta_base.py +4 -4
  14. letta/schemas/letta_message.py +15 -7
  15. letta/schemas/letta_message_content.py +15 -15
  16. letta/schemas/llm_config.py +4 -0
  17. letta/schemas/message.py +35 -31
  18. letta/schemas/providers.py +17 -10
  19. letta/server/rest_api/app.py +11 -0
  20. letta/server/rest_api/routers/v1/agents.py +19 -0
  21. letta/server/rest_api/routers/v1/sources.py +36 -7
  22. letta/services/file_manager.py +8 -2
  23. letta/services/file_processor/embedder/base_embedder.py +16 -0
  24. letta/services/file_processor/embedder/openai_embedder.py +3 -2
  25. letta/services/file_processor/embedder/pinecone_embedder.py +74 -0
  26. letta/services/file_processor/file_processor.py +22 -22
  27. letta/services/job_manager.py +0 -4
  28. letta/services/source_manager.py +0 -1
  29. letta/services/summarizer/enums.py +1 -0
  30. letta/services/summarizer/summarizer.py +237 -6
  31. letta/services/tool_executor/files_tool_executor.py +109 -3
  32. letta/services/user_manager.py +0 -1
  33. letta/settings.py +13 -1
  34. letta/system.py +16 -0
  35. {letta_nightly-0.8.9.dev20250706104157.dist-info → letta_nightly-0.8.10.dev20250707104400.dist-info}/METADATA +2 -1
  36. {letta_nightly-0.8.9.dev20250706104157.dist-info → letta_nightly-0.8.10.dev20250707104400.dist-info}/RECORD +39 -36
  37. {letta_nightly-0.8.9.dev20250706104157.dist-info → letta_nightly-0.8.10.dev20250707104400.dist-info}/LICENSE +0 -0
  38. {letta_nightly-0.8.9.dev20250706104157.dist-info → letta_nightly-0.8.10.dev20250707104400.dist-info}/WHEEL +0 -0
  39. {letta_nightly-0.8.9.dev20250706104157.dist-info → letta_nightly-0.8.10.dev20250707104400.dist-info}/entry_points.txt +0 -0
@@ -4,13 +4,19 @@ import traceback
4
4
  from typing import List, Optional, Tuple, Union
5
5
 
6
6
  from letta.agents.ephemeral_summary_agent import EphemeralSummaryAgent
7
- from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
7
+ from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG, MESSAGE_SUMMARY_REQUEST_ACK
8
+ from letta.helpers.message_helper import convert_message_creates_to_messages
9
+ from letta.llm_api.llm_client import LLMClient
8
10
  from letta.log import get_logger
9
11
  from letta.otel.tracing import trace_method
12
+ from letta.prompts import gpt_summarize
10
13
  from letta.schemas.enums import MessageRole
11
14
  from letta.schemas.letta_message_content import TextContent
15
+ from letta.schemas.llm_config import LLMConfig
12
16
  from letta.schemas.message import Message, MessageCreate
17
+ from letta.schemas.user import User
13
18
  from letta.services.summarizer.enums import SummarizationMode
19
+ from letta.system import package_summarize_message_no_counts
14
20
  from letta.templates.template_helper import render_template
15
21
 
16
22
  logger = get_logger(__name__)
@@ -29,18 +35,24 @@ class Summarizer:
29
35
  summarizer_agent: Optional[Union[EphemeralSummaryAgent, "VoiceSleeptimeAgent"]] = None,
30
36
  message_buffer_limit: int = 10,
31
37
  message_buffer_min: int = 3,
38
+ partial_evict_summarizer_percentage: float = 0.30,
32
39
  ):
33
40
  self.mode = mode
34
41
 
35
42
  # Need to do validation on this
43
+ # TODO: Move this to config
36
44
  self.message_buffer_limit = message_buffer_limit
37
45
  self.message_buffer_min = message_buffer_min
38
46
  self.summarizer_agent = summarizer_agent
39
- # TODO: Move this to config
47
+ self.partial_evict_summarizer_percentage = partial_evict_summarizer_percentage
40
48
 
41
49
  @trace_method
42
- def summarize(
43
- self, in_context_messages: List[Message], new_letta_messages: List[Message], force: bool = False, clear: bool = False
50
+ async def summarize(
51
+ self,
52
+ in_context_messages: List[Message],
53
+ new_letta_messages: List[Message],
54
+ force: bool = False,
55
+ clear: bool = False,
44
56
  ) -> Tuple[List[Message], bool]:
45
57
  """
46
58
  Summarizes or trims in_context_messages according to the chosen mode,
@@ -58,7 +70,19 @@ class Summarizer:
58
70
  (could be appended to the conversation if desired)
59
71
  """
60
72
  if self.mode == SummarizationMode.STATIC_MESSAGE_BUFFER:
61
- return self._static_buffer_summarization(in_context_messages, new_letta_messages, force=force, clear=clear)
73
+ return self._static_buffer_summarization(
74
+ in_context_messages,
75
+ new_letta_messages,
76
+ force=force,
77
+ clear=clear,
78
+ )
79
+ elif self.mode == SummarizationMode.PARTIAL_EVICT_MESSAGE_BUFFER:
80
+ return await self._partial_evict_buffer_summarization(
81
+ in_context_messages,
82
+ new_letta_messages,
83
+ force=force,
84
+ clear=clear,
85
+ )
62
86
  else:
63
87
  # Fallback or future logic
64
88
  return in_context_messages, False
@@ -75,9 +99,131 @@ class Summarizer:
75
99
  task.add_done_callback(callback)
76
100
  return task
77
101
 
102
+ async def _partial_evict_buffer_summarization(
103
+ self,
104
+ in_context_messages: List[Message],
105
+ new_letta_messages: List[Message],
106
+ force: bool = False,
107
+ clear: bool = False,
108
+ ) -> Tuple[List[Message], bool]:
109
+ """Summarization as implemented in the original MemGPT loop, but using message count instead of token count.
110
+ Evict a partial amount of messages, and replace message[1] with a recursive summary.
111
+
112
+ Note that this can't be made sync, because we're waiting on the summary to inject it into the context window,
113
+ unlike the version that writes it to a block.
114
+
115
+ Unless force is True, don't summarize.
116
+ Ignore clear, we don't use it.
117
+ """
118
+ all_in_context_messages = in_context_messages + new_letta_messages
119
+
120
+ if not force:
121
+ logger.debug("Not forcing summarization, returning in-context messages as is.")
122
+ return all_in_context_messages, False
123
+
124
+ # Very ugly code to pull LLMConfig etc from the SummarizerAgent if we're not using it for anything else
125
+ assert self.summarizer_agent is not None
126
+
127
+ # First step: determine how many messages to retain
128
+ total_message_count = len(all_in_context_messages)
129
+ assert self.partial_evict_summarizer_percentage >= 0.0 and self.partial_evict_summarizer_percentage <= 1.0
130
+ target_message_start = round((1.0 - self.partial_evict_summarizer_percentage) * total_message_count)
131
+ logger.info(f"Target message count: {total_message_count}->{(total_message_count-target_message_start)}")
132
+
133
+ # The summary message we'll insert is role 'user' (vs 'assistant', 'tool', or 'system')
134
+ # We are going to put it at index 1 (index 0 is the system message)
135
+ # That means that index 2 needs to be role 'assistant', so walk up the list starting at
136
+ # the target_message_count and find the first assistant message
137
+ for i in range(target_message_start, total_message_count):
138
+ if all_in_context_messages[i].role == MessageRole.assistant:
139
+ assistant_message_index = i
140
+ break
141
+ else:
142
+ raise ValueError(f"No assistant message found from indices {target_message_start} to {total_message_count}")
143
+
144
+ # The sequence to summarize is index 1 -> assistant_message_index
145
+ messages_to_summarize = all_in_context_messages[1:assistant_message_index]
146
+ logger.info(f"Eviction indices: {1}->{assistant_message_index}(/{total_message_count})")
147
+
148
+ # Dynamically get the LLMConfig from the summarizer agent
149
+ # Pretty cringe code here that we need the agent for this but we don't use it
150
+ agent_state = await self.summarizer_agent.agent_manager.get_agent_by_id_async(
151
+ agent_id=self.summarizer_agent.agent_id, actor=self.summarizer_agent.actor
152
+ )
153
+
154
+ # TODO if we do this via the "agent", then we can more easily allow toggling on the memory block version
155
+ summary_message_str = await simple_summary(
156
+ messages=messages_to_summarize,
157
+ llm_config=agent_state.llm_config,
158
+ actor=self.summarizer_agent.actor,
159
+ include_ack=True,
160
+ )
161
+
162
+ # TODO add counts back
163
+ # Recall message count
164
+ # num_recall_messages_current = await self.message_manager.size_async(actor=self.actor, agent_id=agent_state.id)
165
+ # num_messages_evicted = len(messages_to_summarize)
166
+ # num_recall_messages_hidden = num_recall_messages_total - len()
167
+
168
+ # Create the summary message
169
+ summary_message_str_packed = package_summarize_message_no_counts(
170
+ summary=summary_message_str,
171
+ timezone=agent_state.timezone,
172
+ )
173
+ summary_message_obj = convert_message_creates_to_messages(
174
+ message_creates=[
175
+ MessageCreate(
176
+ role=MessageRole.user,
177
+ content=[TextContent(text=summary_message_str_packed)],
178
+ )
179
+ ],
180
+ agent_id=agent_state.id,
181
+ timezone=agent_state.timezone,
182
+ # We already packed, don't pack again
183
+ wrap_user_message=False,
184
+ wrap_system_message=False,
185
+ )[0]
186
+
187
+ # Create the message in the DB
188
+ await self.summarizer_agent.message_manager.create_many_messages_async(
189
+ pydantic_msgs=[summary_message_obj],
190
+ actor=self.summarizer_agent.actor,
191
+ )
192
+
193
+ updated_in_context_messages = all_in_context_messages[assistant_message_index:]
194
+ return [all_in_context_messages[0], summary_message_obj] + updated_in_context_messages, True
195
+
78
196
  def _static_buffer_summarization(
79
- self, in_context_messages: List[Message], new_letta_messages: List[Message], force: bool = False, clear: bool = False
197
+ self,
198
+ in_context_messages: List[Message],
199
+ new_letta_messages: List[Message],
200
+ force: bool = False,
201
+ clear: bool = False,
80
202
  ) -> Tuple[List[Message], bool]:
203
+ """
204
+ Implements static buffer summarization by maintaining a fixed-size message buffer (< N messages).
205
+
206
+ Logic:
207
+ 1. Combine existing context messages with new messages
208
+ 2. If total messages <= buffer limit and not forced, return unchanged
209
+ 3. Calculate how many messages to retain (0 if clear=True, otherwise message_buffer_min)
210
+ 4. Find the trim index to keep the most recent messages while preserving user message boundaries
211
+ 5. Evict older messages (everything between system message and trim index)
212
+ 6. If summarizer agent is available, trigger background summarization of evicted messages
213
+ 7. Return updated context with system message + retained recent messages
214
+
215
+ Args:
216
+ in_context_messages: Existing conversation context messages
217
+ new_letta_messages: Newly added messages to append
218
+ force: Force summarization even if buffer limit not exceeded
219
+ clear: Clear all messages except system message (retain_count = 0)
220
+
221
+ Returns:
222
+ Tuple of (updated_messages, was_summarized)
223
+ - updated_messages: New context after trimming/summarization
224
+ - was_summarized: True if messages were evicted and summarization triggered
225
+ """
226
+
81
227
  all_in_context_messages = in_context_messages + new_letta_messages
82
228
 
83
229
  if len(all_in_context_messages) <= self.message_buffer_limit and not force:
@@ -139,6 +285,91 @@ class Summarizer:
139
285
  return [all_in_context_messages[0]] + updated_in_context_messages, True
140
286
 
141
287
 
288
+ def simple_formatter(messages: List[Message], include_system: bool = False) -> str:
289
+ """Go from an OpenAI-style list of messages to a concatenated string"""
290
+
291
+ parsed_messages = [message.to_openai_dict() for message in messages if message.role != MessageRole.system or include_system]
292
+ return "\n".join(json.dumps(msg) for msg in parsed_messages)
293
+
294
+
295
+ def simple_message_wrapper(openai_msg: dict) -> Message:
296
+ """Extremely simple way to map from role/content to Message object w/ throwaway dummy fields"""
297
+
298
+ if "role" not in openai_msg:
299
+ raise ValueError(f"Missing role in openai_msg: {openai_msg}")
300
+ if "content" not in openai_msg:
301
+ raise ValueError(f"Missing content in openai_msg: {openai_msg}")
302
+
303
+ if openai_msg["role"] == "user":
304
+ return Message(
305
+ role=MessageRole.user,
306
+ content=[TextContent(text=openai_msg["content"])],
307
+ )
308
+ elif openai_msg["role"] == "assistant":
309
+ return Message(
310
+ role=MessageRole.assistant,
311
+ content=[TextContent(text=openai_msg["content"])],
312
+ )
313
+ elif openai_msg["role"] == "system":
314
+ return Message(
315
+ role=MessageRole.system,
316
+ content=[TextContent(text=openai_msg["content"])],
317
+ )
318
+ else:
319
+ raise ValueError(f"Unknown role: {openai_msg['role']}")
320
+
321
+
322
+ async def simple_summary(messages: List[Message], llm_config: LLMConfig, actor: User, include_ack: bool = True) -> str:
323
+ """Generate a simple summary from a list of messages.
324
+
325
+ Intentionally kept functional due to the simplicity of the prompt.
326
+ """
327
+
328
+ # Create an LLMClient from the config
329
+ llm_client = LLMClient.create(
330
+ provider_type=llm_config.model_endpoint_type,
331
+ put_inner_thoughts_first=True,
332
+ actor=actor,
333
+ )
334
+ assert llm_client is not None
335
+
336
+ # Prepare the messages payload to send to the LLM
337
+ system_prompt = gpt_summarize.SYSTEM
338
+ summary_transcript = simple_formatter(messages)
339
+
340
+ if include_ack:
341
+ input_messages = [
342
+ {"role": "system", "content": system_prompt},
343
+ {"role": "assistant", "content": MESSAGE_SUMMARY_REQUEST_ACK},
344
+ {"role": "user", "content": summary_transcript},
345
+ ]
346
+ else:
347
+ input_messages = [
348
+ {"role": "system", "content": system_prompt},
349
+ {"role": "user", "content": summary_transcript},
350
+ ]
351
+ print("messages going to summarizer:", input_messages)
352
+ input_messages_obj = [simple_message_wrapper(msg) for msg in input_messages]
353
+ print("messages going to summarizer (objs):", input_messages_obj)
354
+
355
+ request_data = llm_client.build_request_data(input_messages_obj, llm_config, tools=[])
356
+ print("request data:", request_data)
357
+ # NOTE: we should disable the inner_thoughts_in_kwargs here, because we don't use it
358
+ # I'm leaving it commented it out for now for safety but is fine assuming the var here is a copy not a reference
359
+ # llm_config.put_inner_thoughts_in_kwargs = False
360
+ response_data = await llm_client.request_async(request_data, llm_config)
361
+ response = llm_client.convert_response_to_chat_completion(response_data, input_messages_obj, llm_config)
362
+ if response.choices[0].message.content is None:
363
+ logger.warning("No content returned from summarizer")
364
+ # TODO raise an error error instead?
365
+ # return "[Summary failed to generate]"
366
+ raise Exception("Summary failed to generate")
367
+ else:
368
+ summary = response.choices[0].message.content.strip()
369
+
370
+ return summary
371
+
372
+
142
373
  def format_transcript(messages: List[Message], include_system: bool = False) -> List[str]:
143
374
  """
144
375
  Turn a list of Message objects into a human-readable transcript.
@@ -2,8 +2,9 @@ import asyncio
2
2
  import re
3
3
  from typing import Any, Dict, List, Optional
4
4
 
5
- from letta.constants import MAX_FILES_OPEN
5
+ from letta.constants import MAX_FILES_OPEN, PINECONE_TEXT_FIELD_NAME
6
6
  from letta.functions.types import FileOpenRequest
7
+ from letta.helpers.pinecone_utils import search_pinecone_index, should_use_pinecone
7
8
  from letta.log import get_logger
8
9
  from letta.otel.tracing import trace_method
9
10
  from letta.schemas.agent import AgentState
@@ -463,14 +464,15 @@ class LettaFileToolExecutor(ToolExecutor):
463
464
  return "\n".join(formatted_results)
464
465
 
465
466
  @trace_method
466
- async def semantic_search_files(self, agent_state: AgentState, query: str, limit: int = 10) -> str:
467
+ async def semantic_search_files(self, agent_state: AgentState, query: str, limit: int = 5) -> str:
467
468
  """
468
469
  Search for text within attached files using semantic search and return passages with their source filenames.
470
+ Uses Pinecone if configured, otherwise falls back to traditional search.
469
471
 
470
472
  Args:
471
473
  agent_state: Current agent state
472
474
  query: Search query for semantic matching
473
- limit: Maximum number of results to return (default: 10)
475
+ limit: Maximum number of results to return (default: 5)
474
476
 
475
477
  Returns:
476
478
  Formatted string with search results in IDE/terminal style
@@ -485,6 +487,110 @@ class LettaFileToolExecutor(ToolExecutor):
485
487
 
486
488
  self.logger.info(f"Semantic search started for agent {agent_state.id} with query '{query}' (limit: {limit})")
487
489
 
490
+ # Check if Pinecone is enabled and use it if available
491
+ if should_use_pinecone():
492
+ return await self._search_files_pinecone(agent_state, query, limit)
493
+ else:
494
+ return await self._search_files_traditional(agent_state, query, limit)
495
+
496
+ async def _search_files_pinecone(self, agent_state: AgentState, query: str, limit: int) -> str:
497
+ """Search files using Pinecone vector database."""
498
+
499
+ # Extract unique source_ids
500
+ # TODO: Inefficient
501
+ attached_sources = await self.agent_manager.list_attached_sources_async(agent_id=agent_state.id, actor=self.actor)
502
+ source_ids = [source.id for source in attached_sources]
503
+ if not source_ids:
504
+ return f"No valid source IDs found for attached files"
505
+
506
+ # Get all attached files for this agent
507
+ file_agents = await self.files_agents_manager.list_files_for_agent(agent_id=agent_state.id, actor=self.actor)
508
+ if not file_agents:
509
+ return "No files are currently attached to search"
510
+
511
+ results = []
512
+ total_hits = 0
513
+ files_with_matches = {}
514
+
515
+ try:
516
+ filter = {"source_id": {"$in": source_ids}}
517
+ search_results = await search_pinecone_index(query, limit, filter, self.actor)
518
+
519
+ # Process search results
520
+ if "result" in search_results and "hits" in search_results["result"]:
521
+ for hit in search_results["result"]["hits"]:
522
+ if total_hits >= limit:
523
+ break
524
+
525
+ total_hits += 1
526
+
527
+ # Extract hit information
528
+ hit_id = hit.get("_id", "unknown")
529
+ score = hit.get("_score", 0.0)
530
+ fields = hit.get("fields", {})
531
+ text = fields.get(PINECONE_TEXT_FIELD_NAME, "")
532
+ file_id = fields.get("file_id", "")
533
+
534
+ # Find corresponding file name
535
+ file_name = "Unknown File"
536
+ for fa in file_agents:
537
+ if fa.file_id == file_id:
538
+ file_name = fa.file_name
539
+ break
540
+
541
+ # Group by file name
542
+ if file_name not in files_with_matches:
543
+ files_with_matches[file_name] = []
544
+ files_with_matches[file_name].append({"text": text, "score": score, "hit_id": hit_id})
545
+
546
+ except Exception as e:
547
+ self.logger.error(f"Pinecone search failed: {str(e)}")
548
+ raise e
549
+
550
+ if not files_with_matches:
551
+ return f"No semantic matches found in Pinecone for query: '{query}'"
552
+
553
+ # Format results
554
+ passage_num = 0
555
+ for file_name, matches in files_with_matches.items():
556
+ for match in matches:
557
+ passage_num += 1
558
+
559
+ # Format each passage with terminal-style header
560
+ score_display = f"(score: {match['score']:.3f})"
561
+ passage_header = f"\n=== {file_name} (passage #{passage_num}) {score_display} ==="
562
+
563
+ # Format the passage text
564
+ passage_text = match["text"].strip()
565
+ lines = passage_text.splitlines()
566
+ formatted_lines = []
567
+ for line in lines[:20]: # Limit to first 20 lines per passage
568
+ formatted_lines.append(f" {line}")
569
+
570
+ if len(lines) > 20:
571
+ formatted_lines.append(f" ... [truncated {len(lines) - 20} more lines]")
572
+
573
+ passage_content = "\n".join(formatted_lines)
574
+ results.append(f"{passage_header}\n{passage_content}")
575
+
576
+ # Mark access for files that had matches
577
+ if files_with_matches:
578
+ matched_file_names = [name for name in files_with_matches.keys() if name != "Unknown File"]
579
+ if matched_file_names:
580
+ await self.files_agents_manager.mark_access_bulk(agent_id=agent_state.id, file_names=matched_file_names, actor=self.actor)
581
+
582
+ # Create summary header
583
+ file_count = len(files_with_matches)
584
+ summary = f"Found {total_hits} Pinecone matches in {file_count} file{'s' if file_count != 1 else ''} for query: '{query}'"
585
+
586
+ # Combine all results
587
+ formatted_results = [summary, "=" * len(summary)] + results
588
+
589
+ self.logger.info(f"Pinecone search completed: {total_hits} matches across {file_count} files")
590
+ return "\n".join(formatted_results)
591
+
592
+ async def _search_files_traditional(self, agent_state: AgentState, query: str, limit: int) -> str:
593
+ """Traditional search using existing passage manager."""
488
594
  # Get semantic search results
489
595
  passages = await self.agent_manager.list_source_passages_async(
490
596
  actor=self.actor,
@@ -14,7 +14,6 @@ from letta.otel.tracing import trace_method
14
14
  from letta.schemas.user import User as PydanticUser
15
15
  from letta.schemas.user import UserUpdate
16
16
  from letta.server.db import db_registry
17
- from letta.settings import settings
18
17
  from letta.utils import enforce_types
19
18
 
20
19
  logger = get_logger(__name__)
letta/settings.py CHANGED
@@ -39,12 +39,17 @@ class ToolSettings(BaseSettings):
39
39
  class SummarizerSettings(BaseSettings):
40
40
  model_config = SettingsConfigDict(env_prefix="letta_summarizer_", extra="ignore")
41
41
 
42
- mode: SummarizationMode = SummarizationMode.STATIC_MESSAGE_BUFFER
42
+ # mode: SummarizationMode = SummarizationMode.STATIC_MESSAGE_BUFFER
43
+ mode: SummarizationMode = SummarizationMode.PARTIAL_EVICT_MESSAGE_BUFFER
43
44
  message_buffer_limit: int = 60
44
45
  message_buffer_min: int = 15
45
46
  enable_summarization: bool = True
46
47
  max_summarization_retries: int = 3
47
48
 
49
+ # partial evict summarizer percentage
50
+ # eviction based on percentage of message count, not token count
51
+ partial_evict_summarizer_percentage: float = 0.30
52
+
48
53
  # TODO(cliandy): the below settings are tied to old summarization and should be deprecated or moved
49
54
  # Controls if we should evict all messages
50
55
  # TODO: Can refactor this into an enum if we have a bunch of different kinds of summarizers
@@ -253,6 +258,13 @@ class Settings(BaseSettings):
253
258
  llm_request_timeout_seconds: float = Field(default=60.0, ge=10.0, le=1800.0, description="Timeout for LLM requests in seconds")
254
259
  llm_stream_timeout_seconds: float = Field(default=60.0, ge=10.0, le=1800.0, description="Timeout for LLM streaming requests in seconds")
255
260
 
261
+ # For embeddings
262
+ enable_pinecone: bool = False
263
+ pinecone_api_key: Optional[str] = None
264
+ pinecone_source_index: Optional[str] = "sources"
265
+ pinecone_agent_index: Optional[str] = "recall"
266
+ upsert_pinecone_indices: bool = False
267
+
256
268
  @property
257
269
  def letta_pg_uri(self) -> str:
258
270
  if self.pg_uri:
letta/system.py CHANGED
@@ -188,6 +188,22 @@ def package_summarize_message(summary, summary_message_count, hidden_message_cou
188
188
  return json_dumps(packaged_message)
189
189
 
190
190
 
191
+ def package_summarize_message_no_counts(summary, timezone):
192
+ context_message = (
193
+ f"Note: prior messages have been hidden from view due to conversation memory constraints.\n"
194
+ + f"The following is a summary of the previous messages:\n {summary}"
195
+ )
196
+
197
+ formatted_time = get_local_time(timezone=timezone)
198
+ packaged_message = {
199
+ "type": "system_alert",
200
+ "message": context_message,
201
+ "time": formatted_time,
202
+ }
203
+
204
+ return json_dumps(packaged_message)
205
+
206
+
191
207
  def package_summarize_message_no_summary(hidden_message_count, message=None, timezone=None):
192
208
  """Add useful metadata to the summary message"""
193
209
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: letta-nightly
3
- Version: 0.8.9.dev20250706104157
3
+ Version: 0.8.10.dev20250707104400
4
4
  Summary: Create LLM agents with long-term memory and custom tools
5
5
  License: Apache License
6
6
  Author: Letta Team
@@ -75,6 +75,7 @@ Requires-Dist: pathvalidate (>=3.2.1,<4.0.0)
75
75
  Requires-Dist: pexpect (>=4.9.0,<5.0.0) ; extra == "dev" or extra == "all"
76
76
  Requires-Dist: pg8000 (>=1.30.3,<2.0.0) ; extra == "postgres" or extra == "desktop" or extra == "all"
77
77
  Requires-Dist: pgvector (>=0.2.3,<0.3.0) ; extra == "postgres" or extra == "desktop" or extra == "all"
78
+ Requires-Dist: pinecone[asyncio] (>=7.3.0,<8.0.0)
78
79
  Requires-Dist: pre-commit (>=3.5.0,<4.0.0) ; extra == "dev" or extra == "all"
79
80
  Requires-Dist: prettytable (>=3.9.0,<4.0.0)
80
81
  Requires-Dist: psycopg2 (>=2.9.10,<3.0.0) ; extra == "postgres" or extra == "desktop" or extra == "all"