letta-nightly 0.10.0.dev20250806104523__py3-none-any.whl → 0.11.0.dev20250807000848__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. letta/__init__.py +1 -4
  2. letta/agent.py +1 -2
  3. letta/agents/base_agent.py +4 -7
  4. letta/agents/letta_agent.py +59 -51
  5. letta/agents/letta_agent_batch.py +1 -2
  6. letta/agents/voice_agent.py +1 -2
  7. letta/agents/voice_sleeptime_agent.py +1 -3
  8. letta/constants.py +4 -1
  9. letta/embeddings.py +1 -1
  10. letta/functions/function_sets/base.py +0 -1
  11. letta/functions/mcp_client/types.py +4 -0
  12. letta/groups/supervisor_multi_agent.py +1 -1
  13. letta/interfaces/anthropic_streaming_interface.py +16 -24
  14. letta/interfaces/openai_streaming_interface.py +16 -28
  15. letta/llm_api/llm_api_tools.py +3 -3
  16. letta/local_llm/vllm/api.py +3 -0
  17. letta/orm/__init__.py +3 -1
  18. letta/orm/agent.py +8 -0
  19. letta/orm/archive.py +86 -0
  20. letta/orm/archives_agents.py +27 -0
  21. letta/orm/job.py +5 -1
  22. letta/orm/mixins.py +8 -0
  23. letta/orm/organization.py +7 -8
  24. letta/orm/passage.py +12 -10
  25. letta/orm/sqlite_functions.py +2 -2
  26. letta/orm/tool.py +5 -4
  27. letta/schemas/agent.py +4 -2
  28. letta/schemas/agent_file.py +18 -1
  29. letta/schemas/archive.py +44 -0
  30. letta/schemas/embedding_config.py +2 -16
  31. letta/schemas/enums.py +2 -1
  32. letta/schemas/group.py +28 -3
  33. letta/schemas/job.py +4 -0
  34. letta/schemas/llm_config.py +29 -14
  35. letta/schemas/memory.py +9 -3
  36. letta/schemas/npm_requirement.py +12 -0
  37. letta/schemas/passage.py +3 -3
  38. letta/schemas/providers/letta.py +1 -1
  39. letta/schemas/providers/vllm.py +4 -4
  40. letta/schemas/sandbox_config.py +3 -1
  41. letta/schemas/tool.py +10 -38
  42. letta/schemas/tool_rule.py +2 -2
  43. letta/server/db.py +8 -2
  44. letta/server/rest_api/routers/v1/agents.py +9 -8
  45. letta/server/server.py +6 -40
  46. letta/server/startup.sh +3 -0
  47. letta/services/agent_manager.py +92 -31
  48. letta/services/agent_serialization_manager.py +62 -3
  49. letta/services/archive_manager.py +269 -0
  50. letta/services/helpers/agent_manager_helper.py +111 -37
  51. letta/services/job_manager.py +24 -0
  52. letta/services/passage_manager.py +98 -54
  53. letta/services/tool_executor/core_tool_executor.py +0 -1
  54. letta/services/tool_executor/sandbox_tool_executor.py +2 -2
  55. letta/services/tool_executor/tool_execution_manager.py +1 -1
  56. letta/services/tool_manager.py +70 -26
  57. letta/services/tool_sandbox/base.py +2 -2
  58. letta/services/tool_sandbox/local_sandbox.py +5 -1
  59. letta/templates/template_helper.py +8 -0
  60. {letta_nightly-0.10.0.dev20250806104523.dist-info → letta_nightly-0.11.0.dev20250807000848.dist-info}/METADATA +5 -6
  61. {letta_nightly-0.10.0.dev20250806104523.dist-info → letta_nightly-0.11.0.dev20250807000848.dist-info}/RECORD +64 -61
  62. letta/client/client.py +0 -2207
  63. letta/orm/enums.py +0 -21
  64. {letta_nightly-0.10.0.dev20250806104523.dist-info → letta_nightly-0.11.0.dev20250807000848.dist-info}/LICENSE +0 -0
  65. {letta_nightly-0.10.0.dev20250806104523.dist-info → letta_nightly-0.11.0.dev20250807000848.dist-info}/WHEEL +0 -0
  66. {letta_nightly-0.10.0.dev20250806104523.dist-info → letta_nightly-0.11.0.dev20250807000848.dist-info}/entry_points.txt +0 -0
letta/__init__.py CHANGED
@@ -5,14 +5,11 @@ try:
5
5
  __version__ = version("letta")
6
6
  except PackageNotFoundError:
7
7
  # Fallback for development installations
8
- __version__ = "0.10.0"
8
+ __version__ = "0.11.0"
9
9
 
10
10
  if os.environ.get("LETTA_VERSION"):
11
11
  __version__ = os.environ["LETTA_VERSION"]
12
12
 
13
- # import clients
14
- from letta.client.client import RESTClient
15
-
16
13
  # Import sqlite_functions early to ensure event handlers are registered
17
14
  from letta.orm import sqlite_functions
18
15
 
letta/agent.py CHANGED
@@ -41,12 +41,11 @@ from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_mes
41
41
  from letta.log import get_logger
42
42
  from letta.memory import summarize_messages
43
43
  from letta.orm import User
44
- from letta.orm.enums import ToolType
45
44
  from letta.otel.tracing import log_event, trace_method
46
45
  from letta.schemas.agent import AgentState, AgentStepResponse, UpdateAgent, get_prompt_template_for_agent_type
47
46
  from letta.schemas.block import BlockUpdate
48
47
  from letta.schemas.embedding_config import EmbeddingConfig
49
- from letta.schemas.enums import MessageRole, ProviderType
48
+ from letta.schemas.enums import MessageRole, ProviderType, ToolType
50
49
  from letta.schemas.letta_message_content import ImageContent, TextContent
51
50
  from letta.schemas.memory import ContextWindowOverview, Memory
52
51
  from letta.schemas.message import Message, MessageCreate, ToolReturn
@@ -17,7 +17,7 @@ from letta.schemas.message import Message, MessageCreate, MessageUpdate
17
17
  from letta.schemas.usage import LettaUsageStatistics
18
18
  from letta.schemas.user import User
19
19
  from letta.services.agent_manager import AgentManager
20
- from letta.services.helpers.agent_manager_helper import compile_system_message_async
20
+ from letta.services.helpers.agent_manager_helper import get_system_message_from_compiled_memory
21
21
  from letta.services.message_manager import MessageManager
22
22
  from letta.services.passage_manager import PassageManager
23
23
  from letta.utils import united_diff
@@ -122,7 +122,7 @@ class BaseAgent(ABC):
122
122
  curr_dynamic_section = extract_dynamic_section(curr_system_message_text)
123
123
 
124
124
  # generate just the memory string with current state for comparison
125
- curr_memory_str = await agent_state.memory.compile_async(
125
+ curr_memory_str = await agent_state.memory.compile_in_thread_async(
126
126
  tool_usage_rules=tool_constraint_block, sources=agent_state.sources, max_files_open=agent_state.max_files_open
127
127
  )
128
128
  new_dynamic_section = extract_dynamic_section(curr_memory_str)
@@ -142,16 +142,13 @@ class BaseAgent(ABC):
142
142
  if num_archival_memories is None:
143
143
  num_archival_memories = await self.passage_manager.agent_passage_size_async(actor=self.actor, agent_id=agent_state.id)
144
144
 
145
- new_system_message_str = await compile_system_message_async(
145
+ new_system_message_str = get_system_message_from_compiled_memory(
146
146
  system_prompt=agent_state.system,
147
- in_context_memory=agent_state.memory,
147
+ memory_with_sources=curr_memory_str,
148
148
  in_context_memory_last_edit=memory_edit_timestamp,
149
149
  timezone=agent_state.timezone,
150
150
  previous_message_count=num_messages - len(in_context_messages),
151
151
  archival_memory_size=num_archival_memories,
152
- tool_rules_solver=tool_rules_solver,
153
- sources=agent_state.sources,
154
- max_files_open=agent_state.max_files_open,
155
152
  )
156
153
 
157
154
  diff = united_diff(curr_system_message_text, new_system_message_str)
@@ -30,12 +30,11 @@ from letta.llm_api.llm_client import LLMClient
30
30
  from letta.llm_api.llm_client_base import LLMClientBase
31
31
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG
32
32
  from letta.log import get_logger
33
- from letta.orm.enums import ToolType
34
33
  from letta.otel.context import get_ctx_attributes
35
34
  from letta.otel.metric_registry import MetricRegistry
36
35
  from letta.otel.tracing import log_event, trace_method, tracer
37
36
  from letta.schemas.agent import AgentState, UpdateAgent
38
- from letta.schemas.enums import JobStatus, MessageRole, ProviderType
37
+ from letta.schemas.enums import JobStatus, MessageRole, ProviderType, ToolType
39
38
  from letta.schemas.letta_message import MessageType
40
39
  from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
41
40
  from letta.schemas.letta_response import LettaResponse
@@ -362,8 +361,16 @@ class LettaAgent(BaseAgent):
362
361
  if settings.track_stop_reason:
363
362
  if step_progression == StepProgression.FINISHED and should_continue:
364
363
  continue
364
+
365
+ self.logger.debug("Running cleanup for agent loop run: %s", self.current_run_id)
365
366
  self.logger.info("Running final update. Step Progression: %s", step_progression)
366
367
  try:
368
+ if step_progression == StepProgression.FINISHED and not should_continue:
369
+ if stop_reason is None:
370
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
371
+ await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
372
+ break
373
+
367
374
  if step_progression < StepProgression.STEP_LOGGED:
368
375
  await self.step_manager.log_step_async(
369
376
  actor=self.actor,
@@ -392,12 +399,11 @@ class LettaAgent(BaseAgent):
392
399
  self.logger.error("Error in step after logging step")
393
400
  stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
394
401
  await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
395
- elif step_progression == StepProgression.FINISHED and not should_continue:
396
- if stop_reason is None:
397
- stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
398
- await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
399
402
  else:
400
403
  self.logger.error("Invalid StepProgression value")
404
+
405
+ await self._log_request(request_start_timestamp_ns, request_span)
406
+
401
407
  except Exception as e:
402
408
  self.logger.error("Failed to update step: %s", e)
403
409
 
@@ -414,17 +420,7 @@ class LettaAgent(BaseAgent):
414
420
  force=False,
415
421
  )
416
422
 
417
- # log request time
418
- if request_start_timestamp_ns:
419
- now = get_utc_timestamp_ns()
420
- duration_ms = ns_to_ms(now - request_start_timestamp_ns)
421
- request_span.add_event(name="letta_request_ms", attributes={"duration_ms": duration_ms})
422
-
423
- # update agent's last run metrics
424
- now_datetime = get_utc_time()
425
- await self._update_agent_last_run_metrics(now_datetime, duration_ms)
426
-
427
- request_span.end()
423
+ await self._log_request(request_start_timestamp_ns, request_span)
428
424
 
429
425
  # Return back usage
430
426
  for finish_chunk in self.get_finish_chunks_for_stream(usage, stop_reason):
@@ -591,8 +587,16 @@ class LettaAgent(BaseAgent):
591
587
  if settings.track_stop_reason:
592
588
  if step_progression == StepProgression.FINISHED and should_continue:
593
589
  continue
590
+
591
+ self.logger.debug("Running cleanup for agent loop run: %s", self.current_run_id)
594
592
  self.logger.info("Running final update. Step Progression: %s", step_progression)
595
593
  try:
594
+ if step_progression == StepProgression.FINISHED and not should_continue:
595
+ if stop_reason is None:
596
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
597
+ await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
598
+ break
599
+
596
600
  if step_progression < StepProgression.STEP_LOGGED:
597
601
  await self.step_manager.log_step_async(
598
602
  actor=self.actor,
@@ -621,30 +625,17 @@ class LettaAgent(BaseAgent):
621
625
  self.logger.error("Error in step after logging step")
622
626
  stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
623
627
  await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
624
- elif step_progression == StepProgression.FINISHED and not should_continue:
625
- if stop_reason is None:
626
- stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
627
- await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
628
628
  else:
629
629
  self.logger.error("Invalid StepProgression value")
630
+
631
+ await self._log_request(request_start_timestamp_ns, request_span)
632
+
630
633
  except Exception as e:
631
634
  self.logger.error("Failed to update step: %s", e)
632
635
 
633
636
  if not should_continue:
634
637
  break
635
638
 
636
- # log request time
637
- if request_start_timestamp_ns:
638
- now = get_utc_timestamp_ns()
639
- duration_ms = ns_to_ms(now - request_start_timestamp_ns)
640
- request_span.add_event(name="request_ms", attributes={"duration_ms": duration_ms})
641
-
642
- # update agent's last run metrics
643
- now_datetime = get_utc_time()
644
- await self._update_agent_last_run_metrics(now_datetime, duration_ms)
645
-
646
- request_span.end()
647
-
648
639
  # Extend the in context message ids
649
640
  if not agent_state.message_buffer_autoclear:
650
641
  await self._rebuild_context_window(
@@ -655,6 +646,8 @@ class LettaAgent(BaseAgent):
655
646
  force=False,
656
647
  )
657
648
 
649
+ await self._log_request(request_start_timestamp_ns, request_span)
650
+
658
651
  return current_in_context_messages, new_in_context_messages, stop_reason, usage
659
652
 
660
653
  async def _update_agent_last_run_metrics(self, completion_time: datetime, duration_ms: float) -> None:
@@ -756,7 +749,6 @@ class LettaAgent(BaseAgent):
756
749
  elif agent_state.llm_config.model_endpoint_type == ProviderType.openai:
757
750
  interface = OpenAIStreamingInterface(
758
751
  use_assistant_message=use_assistant_message,
759
- put_inner_thoughts_in_kwarg=agent_state.llm_config.put_inner_thoughts_in_kwargs,
760
752
  is_openai_proxy=agent_state.llm_config.provider_name == "lmstudio_openai",
761
753
  messages=current_in_context_messages + new_in_context_messages,
762
754
  tools=request_data.get("tools", []),
@@ -767,16 +759,20 @@ class LettaAgent(BaseAgent):
767
759
  async for chunk in interface.process(
768
760
  stream,
769
761
  ttft_span=request_span,
770
- provider_request_start_timestamp_ns=provider_request_start_timestamp_ns,
771
762
  ):
772
- # Measure time to first token
763
+ # Measure TTFT (trace, metric, and db). This should be consolidated.
773
764
  if first_chunk and request_span is not None:
774
765
  now = get_utc_timestamp_ns()
775
766
  ttft_ns = now - request_start_timestamp_ns
767
+
776
768
  request_span.add_event(name="time_to_first_token_ms", attributes={"ttft_ms": ns_to_ms(ttft_ns)})
777
769
  metric_attributes = get_ctx_attributes()
778
770
  metric_attributes["model.name"] = agent_state.llm_config.model
779
771
  MetricRegistry().ttft_ms_histogram.record(ns_to_ms(ttft_ns), metric_attributes)
772
+
773
+ if self.current_run_id and self.job_manager:
774
+ await self.job_manager.record_ttft(self.current_run_id, ttft_ns, self.actor)
775
+
780
776
  first_chunk = False
781
777
 
782
778
  if include_return_message_types is None or chunk.message_type in include_return_message_types:
@@ -914,8 +910,16 @@ class LettaAgent(BaseAgent):
914
910
  if settings.track_stop_reason:
915
911
  if step_progression == StepProgression.FINISHED and should_continue:
916
912
  continue
913
+
914
+ self.logger.debug("Running cleanup for agent loop run: %s", self.current_run_id)
917
915
  self.logger.info("Running final update. Step Progression: %s", step_progression)
918
916
  try:
917
+ if step_progression == StepProgression.FINISHED and not should_continue:
918
+ if stop_reason is None:
919
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
920
+ await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
921
+ break
922
+
919
923
  if step_progression < StepProgression.STEP_LOGGED:
920
924
  await self.step_manager.log_step_async(
921
925
  actor=self.actor,
@@ -943,12 +947,12 @@ class LettaAgent(BaseAgent):
943
947
  self.logger.error("Error in step after logging step")
944
948
  stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
945
949
  await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
946
- elif step_progression == StepProgression.FINISHED and not should_continue:
947
- if stop_reason is None:
948
- stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
949
- await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
950
950
  else:
951
951
  self.logger.error("Invalid StepProgression value")
952
+
953
+ # Do tracking for failure cases. Can consolidate with success conditions later.
954
+ await self._log_request(request_start_timestamp_ns, request_span)
955
+
952
956
  except Exception as e:
953
957
  self.logger.error("Failed to update step: %s", e)
954
958
 
@@ -964,21 +968,23 @@ class LettaAgent(BaseAgent):
964
968
  force=False,
965
969
  )
966
970
 
967
- # log time of entire request
968
- if request_start_timestamp_ns:
969
- now = get_utc_timestamp_ns()
970
- duration_ms = ns_to_ms(now - request_start_timestamp_ns)
971
- request_span.add_event(name="letta_request_ms", attributes={"duration_ms": duration_ms})
972
-
973
- # update agent's last run metrics
974
- completion_time = get_utc_time()
975
- await self._update_agent_last_run_metrics(completion_time, duration_ms)
976
-
977
- request_span.end()
971
+ await self._log_request(request_start_timestamp_ns, request_span)
978
972
 
979
973
  for finish_chunk in self.get_finish_chunks_for_stream(usage, stop_reason):
980
974
  yield f"data: {finish_chunk}\n\n"
981
975
 
976
+ async def _log_request(self, request_start_timestamp_ns: int, request_span: "Span | None"):
977
+ if request_start_timestamp_ns:
978
+ now_ns, now = get_utc_timestamp_ns(), get_utc_time()
979
+ duration_ns = now_ns - request_start_timestamp_ns
980
+ if request_span:
981
+ request_span.add_event(name="letta_request_ms", attributes={"duration_ms": ns_to_ms(duration_ns)})
982
+ await self._update_agent_last_run_metrics(now, ns_to_ms(duration_ns))
983
+ if self.current_run_id:
984
+ await self.job_manager.record_response_duration(self.current_run_id, duration_ns, self.actor)
985
+ if request_span:
986
+ request_span.end()
987
+
982
988
  # noinspection PyInconsistentReturns
983
989
  async def _build_and_request_from_llm(
984
990
  self,
@@ -1429,6 +1435,8 @@ class LettaAgent(BaseAgent):
1429
1435
  status="error",
1430
1436
  )
1431
1437
 
1438
+ print(target_tool)
1439
+
1432
1440
  # TODO: This temp. Move this logic and code to executors
1433
1441
 
1434
1442
  if agent_step_span:
@@ -16,10 +16,9 @@ from letta.jobs.types import RequestStatusUpdateInfo, StepStatusUpdateInfo
16
16
  from letta.llm_api.llm_client import LLMClient
17
17
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG
18
18
  from letta.log import get_logger
19
- from letta.orm.enums import ToolType
20
19
  from letta.otel.tracing import log_event, trace_method
21
20
  from letta.schemas.agent import AgentState
22
- from letta.schemas.enums import AgentStepStatus, JobStatus, MessageStreamStatus, ProviderType, SandboxType
21
+ from letta.schemas.enums import AgentStepStatus, JobStatus, MessageStreamStatus, ProviderType, SandboxType, ToolType
23
22
  from letta.schemas.job import JobUpdate
24
23
  from letta.schemas.letta_message import LegacyLettaMessage, LettaMessage
25
24
  from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
@@ -13,9 +13,8 @@ from letta.helpers.datetime_helpers import get_utc_time
13
13
  from letta.helpers.tool_execution_helper import add_pre_execution_message, enable_strict_mode, remove_request_heartbeat
14
14
  from letta.interfaces.openai_chat_completions_streaming_interface import OpenAIChatCompletionsStreamingInterface
15
15
  from letta.log import get_logger
16
- from letta.orm.enums import ToolType
17
16
  from letta.schemas.agent import AgentState, AgentType
18
- from letta.schemas.enums import MessageRole
17
+ from letta.schemas.enums import MessageRole, ToolType
19
18
  from letta.schemas.letta_response import LettaResponse
20
19
  from letta.schemas.message import Message, MessageCreate
21
20
  from letta.schemas.openai.chat_completion_request import (
@@ -3,11 +3,10 @@ from typing import AsyncGenerator, List, Optional, Tuple, Union
3
3
  from letta.agents.helpers import _create_letta_response, serialize_message_history
4
4
  from letta.agents.letta_agent import LettaAgent
5
5
  from letta.constants import DEFAULT_MAX_STEPS
6
- from letta.orm.enums import ToolType
7
6
  from letta.otel.tracing import trace_method
8
7
  from letta.schemas.agent import AgentState
9
8
  from letta.schemas.block import BlockUpdate
10
- from letta.schemas.enums import MessageStreamStatus
9
+ from letta.schemas.enums import MessageStreamStatus, ToolType
11
10
  from letta.schemas.letta_message import LegacyLettaMessage, LettaMessage, MessageType
12
11
  from letta.schemas.letta_response import LettaResponse
13
12
  from letta.schemas.message import MessageCreate
@@ -166,7 +165,6 @@ class VoiceSleeptimeAgent(LettaAgent):
166
165
  memory = serialize_message_history(messages, context)
167
166
  self.agent_manager.passage_manager.insert_passage(
168
167
  agent_state=agent_state,
169
- agent_id=agent_state.id,
170
168
  text=memory,
171
169
  actor=self.actor,
172
170
  )
letta/constants.py CHANGED
@@ -5,7 +5,7 @@ from logging import CRITICAL, DEBUG, ERROR, INFO, NOTSET, WARN, WARNING
5
5
  LETTA_DIR = os.path.join(os.path.expanduser("~"), ".letta")
6
6
  LETTA_TOOL_EXECUTION_DIR = os.path.join(LETTA_DIR, "tool_execution_dir")
7
7
 
8
- LETTA_MODEL_ENDPOINT = "https://inference.letta.com"
8
+ LETTA_MODEL_ENDPOINT = "https://inference.letta.com/v1/"
9
9
  DEFAULT_TIMEZONE = "UTC"
10
10
 
11
11
  ADMIN_PREFIX = "/v1/admin"
@@ -385,3 +385,6 @@ PINECONE_THROTTLE_DELAY = 0.75 # seconds base delay between batches
385
385
  # builtin web search
386
386
  WEB_SEARCH_MODEL_ENV_VAR_NAME = "LETTA_BUILTIN_WEBSEARCH_OPENAI_MODEL_NAME"
387
387
  WEB_SEARCH_MODEL_ENV_VAR_DEFAULT_VALUE = "gpt-4.1-mini-2025-04-14"
388
+
389
+ # Excluded providers from base tool rules
390
+ EXCLUDED_PROVIDERS_FROM_BASE_TOOL_RULES = {"anthropic", "openai", "google_ai", "google_vertex"}
letta/embeddings.py CHANGED
@@ -235,7 +235,7 @@ def embedding_model(config: EmbeddingConfig, user_id: Optional[uuid.UUID] = None
235
235
  return OpenAIEmbeddings(
236
236
  api_key=model_settings.openai_api_key,
237
237
  model=config.embedding_model,
238
- base_url=model_settings.openai_api_base,
238
+ base_url=config.embedding_endpoint or model_settings.openai_api_base,
239
239
  )
240
240
 
241
241
  elif endpoint_type == "azure":
@@ -75,7 +75,6 @@ def archival_memory_insert(self: "Agent", content: str) -> Optional[str]:
75
75
  """
76
76
  self.passage_manager.insert_passage(
77
77
  agent_state=self.agent_state,
78
- agent_id=self.agent_state.id,
79
78
  text=content,
80
79
  actor=self.user,
81
80
  )
@@ -6,6 +6,8 @@ from typing import Dict, List, Optional
6
6
  from mcp import Tool
7
7
  from pydantic import BaseModel, Field
8
8
 
9
+ from letta.utils import get_logger
10
+
9
11
  # MCP Authentication Constants
10
12
  MCP_AUTH_HEADER_AUTHORIZATION = "Authorization"
11
13
  MCP_AUTH_TOKEN_BEARER_PREFIX = "Bearer"
@@ -13,6 +15,8 @@ TEMPLATED_VARIABLE_REGEX = (
13
15
  r"\{\{\s*([A-Z_][A-Z0-9_]*)\s*(?:\|\s*([^}]+?)\s*)?\}\}" # Allows for optional whitespace around the variable name and default value
14
16
  )
15
17
 
18
+ logger = get_logger(__name__)
19
+
16
20
 
17
21
  class MCPTool(Tool):
18
22
  """A simple wrapper around MCP's tool definition (to avoid conflict with our own)"""
@@ -7,7 +7,7 @@ from letta.functions.functions import parse_source_code
7
7
  from letta.functions.schema_generator import generate_schema
8
8
  from letta.interface import AgentInterface
9
9
  from letta.orm import User
10
- from letta.orm.enums import ToolType
10
+ from letta.schemas.enums import ToolType
11
11
  from letta.schemas.letta_message_content import TextContent
12
12
  from letta.schemas.message import MessageCreate
13
13
  from letta.schemas.tool import Tool
@@ -25,11 +25,8 @@ from anthropic.types.beta import (
25
25
  )
26
26
 
27
27
  from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
28
- from letta.helpers.datetime_helpers import get_utc_timestamp_ns, ns_to_ms
29
28
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG
30
29
  from letta.log import get_logger
31
- from letta.otel.context import get_ctx_attributes
32
- from letta.otel.metric_registry import MetricRegistry
33
30
  from letta.schemas.letta_message import (
34
31
  AssistantMessage,
35
32
  HiddenReasoningMessage,
@@ -133,28 +130,12 @@ class AnthropicStreamingInterface:
133
130
  self,
134
131
  stream: AsyncStream[BetaRawMessageStreamEvent],
135
132
  ttft_span: Optional["Span"] = None,
136
- provider_request_start_timestamp_ns: int | None = None,
137
133
  ) -> AsyncGenerator[LettaMessage | LettaStopReason, None]:
138
134
  prev_message_type = None
139
135
  message_index = 0
140
- first_chunk = True
141
136
  try:
142
137
  async with stream:
143
138
  async for event in stream:
144
- # TODO (cliandy): reconsider in stream cancellations
145
- # await cancellation_token.check_and_raise_if_cancelled()
146
- if first_chunk and ttft_span is not None and provider_request_start_timestamp_ns is not None:
147
- now = get_utc_timestamp_ns()
148
- ttft_ns = now - provider_request_start_timestamp_ns
149
- ttft_span.add_event(
150
- name="anthropic_time_to_first_token_ms", attributes={"anthropic_time_to_first_token_ms": ns_to_ms(ttft_ns)}
151
- )
152
- metric_attributes = get_ctx_attributes()
153
- if isinstance(event, BetaRawMessageStartEvent):
154
- metric_attributes["model.name"] = event.message.model
155
- MetricRegistry().ttft_ms_histogram.record(ns_to_ms(ttft_ns), metric_attributes)
156
- first_chunk = False
157
-
158
139
  # TODO: Support BetaThinkingBlock, BetaRedactedThinkingBlock
159
140
  if isinstance(event, BetaRawContentBlockStartEvent):
160
141
  content = event.content_block
@@ -389,13 +370,24 @@ class AnthropicStreamingInterface:
389
370
 
390
371
  self.anthropic_mode = None
391
372
  except asyncio.CancelledError as e:
392
- logger.info("Cancelled stream %s", e)
393
- yield LettaStopReason(stop_reason=StopReasonType.cancelled)
394
- raise
373
+ import traceback
374
+
375
+ logger.error("Cancelled stream %s: %s", e, traceback.format_exc())
376
+ ttft_span.add_event(
377
+ name="stop_reason",
378
+ attributes={"stop_reason": StopReasonType.cancelled.value, "error": str(e), "stacktrace": traceback.format_exc()},
379
+ )
380
+ raise e
395
381
  except Exception as e:
396
- logger.error("Error processing stream: %s", e)
382
+ import traceback
383
+
384
+ logger.error("Error processing stream: %s", e, traceback.format_exc())
385
+ ttft_span.add_event(
386
+ name="stop_reason",
387
+ attributes={"stop_reason": StopReasonType.error.value, "error": str(e), "stacktrace": traceback.format_exc()},
388
+ )
397
389
  yield LettaStopReason(stop_reason=StopReasonType.error)
398
- raise
390
+ raise e
399
391
  finally:
400
392
  logger.info("AnthropicStreamingInterface: Stream processing complete.")
401
393
 
@@ -7,12 +7,9 @@ from openai import AsyncStream
7
7
  from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
8
8
 
9
9
  from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
10
- from letta.helpers.datetime_helpers import get_utc_timestamp_ns, ns_to_ms
11
10
  from letta.llm_api.openai_client import is_openai_reasoning_model
12
11
  from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
13
12
  from letta.log import get_logger
14
- from letta.otel.context import get_ctx_attributes
15
- from letta.otel.metric_registry import MetricRegistry
16
13
  from letta.schemas.letta_message import AssistantMessage, LettaMessage, ReasoningMessage, ToolCallDelta, ToolCallMessage
17
14
  from letta.schemas.letta_message_content import OmittedReasoningContent, TextContent
18
15
  from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
@@ -35,7 +32,6 @@ class OpenAIStreamingInterface:
35
32
  def __init__(
36
33
  self,
37
34
  use_assistant_message: bool = False,
38
- put_inner_thoughts_in_kwarg: bool = False,
39
35
  is_openai_proxy: bool = False,
40
36
  messages: Optional[list] = None,
41
37
  tools: Optional[list] = None,
@@ -107,7 +103,6 @@ class OpenAIStreamingInterface:
107
103
  self,
108
104
  stream: AsyncStream[ChatCompletionChunk],
109
105
  ttft_span: Optional["Span"] = None,
110
- provider_request_start_timestamp_ns: int | None = None,
111
106
  ) -> AsyncGenerator[LettaMessage | LettaStopReason, None]:
112
107
  """
113
108
  Iterates over the OpenAI stream, yielding SSE events.
@@ -125,29 +120,11 @@ class OpenAIStreamingInterface:
125
120
  tool_dicts = [tool["function"] if isinstance(tool, dict) and "function" in tool else tool for tool in self.tools]
126
121
  self.fallback_input_tokens += num_tokens_from_functions(tool_dicts)
127
122
 
128
- first_chunk = True
129
123
  try:
130
124
  async with stream:
131
125
  prev_message_type = None
132
126
  message_index = 0
133
127
  async for chunk in stream:
134
- # TODO (cliandy): reconsider in stream cancellations
135
- # await cancellation_token.check_and_raise_if_cancelled()
136
- if first_chunk and ttft_span is not None and provider_request_start_timestamp_ns is not None:
137
- now = get_utc_timestamp_ns()
138
- ttft_ns = now - provider_request_start_timestamp_ns
139
- ttft_span.add_event(
140
- name="openai_time_to_first_token_ms", attributes={"openai_time_to_first_token_ms": ns_to_ms(ttft_ns)}
141
- )
142
- metric_attributes = get_ctx_attributes()
143
- metric_attributes["model.name"] = chunk.model
144
- MetricRegistry().ttft_ms_histogram.record(ns_to_ms(ttft_ns), metric_attributes)
145
-
146
- if self.is_openai_proxy:
147
- self.fallback_output_tokens += count_tokens(chunk.model_dump_json())
148
-
149
- first_chunk = False
150
-
151
128
  if not self.model or not self.message_id:
152
129
  self.model = chunk.model
153
130
  self.message_id = chunk.id
@@ -389,12 +366,23 @@ class OpenAIStreamingInterface:
389
366
  yield tool_call_msg
390
367
  self.function_id_buffer = None
391
368
  except asyncio.CancelledError as e:
392
- logger.info("Cancelled stream %s", e)
393
- yield LettaStopReason(stop_reason=StopReasonType.cancelled)
394
- raise
369
+ import traceback
370
+
371
+ logger.error("Cancelled stream %s: %s", e, traceback.format_exc())
372
+ ttft_span.add_event(
373
+ name="stop_reason",
374
+ attributes={"stop_reason": StopReasonType.cancelled.value, "error": str(e), "stacktrace": traceback.format_exc()},
375
+ )
376
+ raise e
395
377
  except Exception as e:
396
- logger.error("Error processing stream: %s", e)
378
+ import traceback
379
+
380
+ logger.error("Error processing stream: %s", e, traceback.format_exc())
381
+ ttft_span.add_event(
382
+ name="stop_reason",
383
+ attributes={"stop_reason": StopReasonType.error.value, "error": str(e), "stacktrace": traceback.format_exc()},
384
+ )
397
385
  yield LettaStopReason(stop_reason=StopReasonType.error)
398
- raise
386
+ raise e
399
387
  finally:
400
388
  logger.info("OpenAIStreamingInterface: Stream processing complete.")
@@ -5,7 +5,7 @@ from typing import List, Optional, Union
5
5
 
6
6
  import requests
7
7
 
8
- from letta.constants import CLI_WARNING_PREFIX, LETTA_MODEL_ENDPOINT
8
+ from letta.constants import CLI_WARNING_PREFIX
9
9
  from letta.errors import LettaConfigurationError, RateLimitExceededError
10
10
  from letta.llm_api.anthropic import (
11
11
  anthropic_bedrock_chat_completions_request,
@@ -193,8 +193,8 @@ def create(
193
193
  # force function calling for reliability, see https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice
194
194
  # TODO(matt) move into LLMConfig
195
195
  # TODO: This vllm checking is very brittle and is a patch at most
196
- if llm_config.model_endpoint == LETTA_MODEL_ENDPOINT or (llm_config.handle and "vllm" in llm_config.handle):
197
- function_call = "auto" # TODO change to "required" once proxy supports it
196
+ if llm_config.handle and "vllm" in llm_config.handle:
197
+ function_call = "auto"
198
198
  else:
199
199
  function_call = "required"
200
200
 
@@ -32,6 +32,9 @@ def get_vllm_completion(endpoint, auth_type, auth_key, model, prompt, context_wi
32
32
  if not endpoint.startswith(("http://", "https://")):
33
33
  raise ValueError(f"Endpoint ({endpoint}) must begin with http:// or https://")
34
34
 
35
+ if not endpoint.endswith("/v1"):
36
+ endpoint = endpoint.rstrip("/") + "/v1"
37
+
35
38
  try:
36
39
  URI = urljoin(endpoint.strip("/") + "/", WEBUI_API_SUFFIX.strip("/"))
37
40
  response = post_json_auth_request(uri=URI, json_payload=request, auth_type=auth_type, auth_key=auth_key)
letta/orm/__init__.py CHANGED
@@ -1,5 +1,7 @@
1
1
  from letta.orm.agent import Agent
2
2
  from letta.orm.agents_tags import AgentsTags
3
+ from letta.orm.archive import Archive
4
+ from letta.orm.archives_agents import ArchivesAgents
3
5
  from letta.orm.base import Base
4
6
  from letta.orm.block import Block
5
7
  from letta.orm.block_history import BlockHistory
@@ -19,7 +21,7 @@ from letta.orm.llm_batch_job import LLMBatchJob
19
21
  from letta.orm.mcp_server import MCPServer
20
22
  from letta.orm.message import Message
21
23
  from letta.orm.organization import Organization
22
- from letta.orm.passage import AgentPassage, BasePassage, SourcePassage
24
+ from letta.orm.passage import ArchivalPassage, BasePassage, SourcePassage
23
25
  from letta.orm.prompt import Prompt
24
26
  from letta.orm.provider import Provider
25
27
  from letta.orm.provider_trace import ProviderTrace
letta/orm/agent.py CHANGED
@@ -24,6 +24,7 @@ from letta.utils import calculate_file_defaults_based_on_context_window
24
24
 
25
25
  if TYPE_CHECKING:
26
26
  from letta.orm.agents_tags import AgentsTags
27
+ from letta.orm.archives_agents import ArchivesAgents
27
28
  from letta.orm.files_agents import FileAgent
28
29
  from letta.orm.identity import Identity
29
30
  from letta.orm.organization import Organization
@@ -156,6 +157,13 @@ class Agent(SqlalchemyBase, OrganizationMixin, ProjectMixin, AsyncAttrs):
156
157
  cascade="all, delete-orphan",
157
158
  lazy="selectin",
158
159
  )
160
+ archives_agents: Mapped[List["ArchivesAgents"]] = relationship(
161
+ "ArchivesAgents",
162
+ back_populates="agent",
163
+ cascade="all, delete-orphan",
164
+ lazy="noload",
165
+ doc="Archives accessible by this agent.",
166
+ )
159
167
 
160
168
  def _get_per_file_view_window_char_limit(self) -> int:
161
169
  """Get the per_file_view_window_char_limit, calculating defaults if None."""