letta-nightly 0.8.0.dev20250606195656__py3-none-any.whl → 0.8.3.dev20250607000559__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +16 -12
  3. letta/agents/base_agent.py +1 -1
  4. letta/agents/helpers.py +13 -2
  5. letta/agents/letta_agent.py +72 -34
  6. letta/agents/letta_agent_batch.py +1 -2
  7. letta/agents/voice_agent.py +19 -13
  8. letta/agents/voice_sleeptime_agent.py +23 -6
  9. letta/constants.py +18 -0
  10. letta/data_sources/__init__.py +0 -0
  11. letta/data_sources/redis_client.py +282 -0
  12. letta/errors.py +0 -4
  13. letta/functions/function_sets/files.py +58 -0
  14. letta/functions/schema_generator.py +18 -1
  15. letta/groups/sleeptime_multi_agent_v2.py +13 -3
  16. letta/helpers/datetime_helpers.py +47 -3
  17. letta/helpers/decorators.py +69 -0
  18. letta/{services/helpers/noop_helper.py → helpers/singleton.py} +5 -0
  19. letta/interfaces/anthropic_streaming_interface.py +43 -24
  20. letta/interfaces/openai_streaming_interface.py +21 -19
  21. letta/llm_api/anthropic.py +1 -1
  22. letta/llm_api/anthropic_client.py +30 -16
  23. letta/llm_api/google_vertex_client.py +1 -1
  24. letta/llm_api/helpers.py +36 -30
  25. letta/llm_api/llm_api_tools.py +1 -1
  26. letta/llm_api/llm_client_base.py +29 -1
  27. letta/llm_api/openai.py +1 -1
  28. letta/llm_api/openai_client.py +6 -8
  29. letta/local_llm/chat_completion_proxy.py +1 -1
  30. letta/memory.py +1 -1
  31. letta/orm/enums.py +1 -0
  32. letta/orm/file.py +80 -3
  33. letta/orm/files_agents.py +13 -0
  34. letta/orm/passage.py +2 -0
  35. letta/orm/sqlalchemy_base.py +34 -11
  36. letta/otel/__init__.py +0 -0
  37. letta/otel/context.py +25 -0
  38. letta/otel/events.py +0 -0
  39. letta/otel/metric_registry.py +122 -0
  40. letta/otel/metrics.py +66 -0
  41. letta/otel/resource.py +26 -0
  42. letta/{tracing.py → otel/tracing.py} +55 -78
  43. letta/plugins/README.md +22 -0
  44. letta/plugins/__init__.py +0 -0
  45. letta/plugins/defaults.py +11 -0
  46. letta/plugins/plugins.py +72 -0
  47. letta/schemas/enums.py +8 -0
  48. letta/schemas/file.py +12 -0
  49. letta/schemas/letta_request.py +6 -0
  50. letta/schemas/passage.py +1 -0
  51. letta/schemas/tool.py +4 -0
  52. letta/server/db.py +7 -7
  53. letta/server/rest_api/app.py +8 -6
  54. letta/server/rest_api/routers/v1/agents.py +46 -37
  55. letta/server/rest_api/routers/v1/groups.py +3 -3
  56. letta/server/rest_api/routers/v1/sources.py +26 -3
  57. letta/server/rest_api/routers/v1/tools.py +7 -2
  58. letta/server/rest_api/utils.py +9 -6
  59. letta/server/server.py +25 -13
  60. letta/services/agent_manager.py +186 -194
  61. letta/services/block_manager.py +1 -1
  62. letta/services/context_window_calculator/context_window_calculator.py +1 -1
  63. letta/services/context_window_calculator/token_counter.py +3 -2
  64. letta/services/file_processor/chunker/line_chunker.py +34 -0
  65. letta/services/file_processor/file_processor.py +43 -12
  66. letta/services/file_processor/parser/mistral_parser.py +11 -1
  67. letta/services/files_agents_manager.py +96 -7
  68. letta/services/group_manager.py +6 -6
  69. letta/services/helpers/agent_manager_helper.py +404 -3
  70. letta/services/identity_manager.py +1 -1
  71. letta/services/job_manager.py +1 -1
  72. letta/services/llm_batch_manager.py +1 -1
  73. letta/services/mcp/stdio_client.py +5 -1
  74. letta/services/mcp_manager.py +4 -4
  75. letta/services/message_manager.py +1 -1
  76. letta/services/organization_manager.py +1 -1
  77. letta/services/passage_manager.py +604 -19
  78. letta/services/per_agent_lock_manager.py +1 -1
  79. letta/services/provider_manager.py +1 -1
  80. letta/services/sandbox_config_manager.py +1 -1
  81. letta/services/source_manager.py +178 -19
  82. letta/services/step_manager.py +2 -2
  83. letta/services/summarizer/summarizer.py +1 -1
  84. letta/services/telemetry_manager.py +1 -1
  85. letta/services/tool_executor/builtin_tool_executor.py +117 -0
  86. letta/services/tool_executor/composio_tool_executor.py +53 -0
  87. letta/services/tool_executor/core_tool_executor.py +474 -0
  88. letta/services/tool_executor/files_tool_executor.py +138 -0
  89. letta/services/tool_executor/mcp_tool_executor.py +45 -0
  90. letta/services/tool_executor/multi_agent_tool_executor.py +123 -0
  91. letta/services/tool_executor/tool_execution_manager.py +34 -14
  92. letta/services/tool_executor/tool_execution_sandbox.py +1 -1
  93. letta/services/tool_executor/tool_executor.py +3 -802
  94. letta/services/tool_executor/tool_executor_base.py +43 -0
  95. letta/services/tool_manager.py +55 -59
  96. letta/services/tool_sandbox/e2b_sandbox.py +1 -1
  97. letta/services/tool_sandbox/local_sandbox.py +6 -3
  98. letta/services/user_manager.py +6 -3
  99. letta/settings.py +23 -2
  100. letta/utils.py +7 -2
  101. {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.3.dev20250607000559.dist-info}/METADATA +4 -2
  102. {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.3.dev20250607000559.dist-info}/RECORD +105 -83
  103. {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.3.dev20250607000559.dist-info}/LICENSE +0 -0
  104. {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.3.dev20250607000559.dist-info}/WHEEL +0 -0
  105. {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.3.dev20250607000559.dist-info}/entry_points.txt +0 -0
@@ -256,13 +256,15 @@ def create_application() -> "FastAPI":
256
256
  print(f"▶ Using OTLP tracing with endpoint: {otlp_endpoint}")
257
257
  env_name_suffix = os.getenv("ENV_NAME")
258
258
  service_name = f"letta-server-{env_name_suffix.lower()}" if env_name_suffix else "letta-server"
259
- from letta.tracing import setup_tracing
259
+ from letta.otel.metrics import setup_metrics
260
+ from letta.otel.tracing import setup_tracing
260
261
 
261
262
  setup_tracing(
262
263
  endpoint=otlp_endpoint,
263
264
  app=app,
264
265
  service_name=service_name,
265
266
  )
267
+ setup_metrics(endpoint=otlp_endpoint, app=app, service_name=service_name)
266
268
 
267
269
  for route in v1_routes:
268
270
  app.include_router(route, prefix=API_PREFIX)
@@ -331,7 +333,7 @@ def start_server(
331
333
  if (os.getenv("LOCAL_HTTPS") == "true") or "--localhttps" in sys.argv:
332
334
  print(f"▶ Server running at: https://{host or 'localhost'}:{port or REST_DEFAULT_PORT}")
333
335
  print(f"▶ View using ADE at: https://app.letta.com/development-servers/local/dashboard\n")
334
- if importlib.util.find_spec("granian") is not None and settings.use_uvloop:
336
+ if importlib.util.find_spec("granian") is not None and settings.use_granian:
335
337
  from granian import Granian
336
338
 
337
339
  # Experimental Granian engine
@@ -339,14 +341,14 @@ def start_server(
339
341
  target="letta.server.rest_api.app:app",
340
342
  # factory=True,
341
343
  interface="asgi",
342
- address=host or "localhost",
344
+ address=host or "127.0.0.1", # Note granian address must be an ip address
343
345
  port=port or REST_DEFAULT_PORT,
344
346
  workers=settings.uvicorn_workers,
345
347
  # threads=
346
348
  reload=reload or settings.uvicorn_reload,
347
349
  reload_ignore_patterns=["openapi_letta.json"],
348
350
  reload_ignore_worker_failure=True,
349
- reload_tick=100,
351
+ reload_tick=4000, # set to 4s to prevent crashing on weird state
350
352
  # log_level="info"
351
353
  ssl_keyfile="certs/localhost-key.pem",
352
354
  ssl_cert="certs/localhost.pem",
@@ -380,14 +382,14 @@ def start_server(
380
382
  target="letta.server.rest_api.app:app",
381
383
  # factory=True,
382
384
  interface="asgi",
383
- address=host or "localhost",
385
+ address=host or "127.0.0.1", # Note granian address must be an ip address
384
386
  port=port or REST_DEFAULT_PORT,
385
387
  workers=settings.uvicorn_workers,
386
388
  # threads=
387
389
  reload=reload or settings.uvicorn_reload,
388
390
  reload_ignore_patterns=["openapi_letta.json"],
389
391
  reload_ignore_worker_failure=True,
390
- reload_tick=100,
392
+ reload_tick=4000, # set to 4s to prevent crashing on weird state
391
393
  # log_level="info"
392
394
  ).serve()
393
395
  else:
@@ -12,16 +12,18 @@ from sqlalchemy.exc import IntegrityError, OperationalError
12
12
  from starlette.responses import Response, StreamingResponse
13
13
 
14
14
  from letta.agents.letta_agent import LettaAgent
15
- from letta.constants import CORE_MEMORY_SOURCE_CHAR_LIMIT, DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
15
+ from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
16
16
  from letta.groups.sleeptime_multi_agent_v2 import SleeptimeMultiAgentV2
17
17
  from letta.helpers.datetime_helpers import get_utc_timestamp_ns
18
18
  from letta.log import get_logger
19
19
  from letta.orm.errors import NoResultFound
20
+ from letta.otel.context import get_ctx_attributes
21
+ from letta.otel.metric_registry import MetricRegistry
20
22
  from letta.schemas.agent import AgentState, AgentType, CreateAgent, UpdateAgent
21
23
  from letta.schemas.block import Block, BlockUpdate
22
24
  from letta.schemas.group import Group
23
25
  from letta.schemas.job import JobStatus, JobUpdate, LettaRequestConfig
24
- from letta.schemas.letta_message import LettaMessageUnion, LettaMessageUpdateUnion
26
+ from letta.schemas.letta_message import LettaMessageUnion, LettaMessageUpdateUnion, MessageType
25
27
  from letta.schemas.letta_request import LettaRequest, LettaStreamingRequest
26
28
  from letta.schemas.letta_response import LettaResponse
27
29
  from letta.schemas.memory import ContextWindowOverview, CreateArchivalMemory, Memory
@@ -149,7 +151,7 @@ def export_agent_serialized(
149
151
 
150
152
 
151
153
  @router.post("/import", response_model=AgentState, operation_id="import_agent_serialized")
152
- async def import_agent_serialized(
154
+ def import_agent_serialized(
153
155
  file: UploadFile = File(...),
154
156
  server: "SyncServer" = Depends(get_letta_server),
155
157
  actor_id: Optional[str] = Header(None, alias="user_id"),
@@ -167,10 +169,10 @@ async def import_agent_serialized(
167
169
  """
168
170
  Import a serialized agent file and recreate the agent in the system.
169
171
  """
170
- actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
172
+ actor = server.user_manager.get_user_or_default(user_id=actor_id)
171
173
 
172
174
  try:
173
- serialized_data = await file.read()
175
+ serialized_data = file.file.read()
174
176
  agent_json = json.loads(serialized_data)
175
177
 
176
178
  # Validate the JSON against AgentSchema before passing it to deserialize
@@ -311,20 +313,21 @@ async def attach_source(
311
313
  actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
312
314
  agent_state = await server.agent_manager.attach_source_async(agent_id=agent_id, source_id=source_id, actor=actor)
313
315
 
314
- files = await server.source_manager.list_files(source_id, actor)
316
+ # Check if the agent is missing any files tools
317
+ agent_state = await server.agent_manager.attach_missing_files_tools_async(agent_state=agent_state, actor=actor)
318
+
319
+ files = await server.source_manager.list_files(source_id, actor, include_content=True)
315
320
  texts = []
316
321
  file_ids = []
322
+ file_names = []
317
323
  for f in files:
318
- passages = await server.passage_manager.list_passages_by_file_id_async(file_id=f.id, actor=actor)
319
- passage_text = ""
320
- for p in passages:
321
- if len(passage_text) <= CORE_MEMORY_SOURCE_CHAR_LIMIT:
322
- passage_text += p.text
323
-
324
- texts.append(passage_text)
324
+ texts.append(f.content if f.content else "")
325
325
  file_ids.append(f.id)
326
+ file_names.append(f.file_name)
326
327
 
327
- await server.insert_files_into_context_window(agent_state=agent_state, texts=texts, file_ids=file_ids, actor=actor)
328
+ await server.insert_files_into_context_window(
329
+ agent_state=agent_state, texts=texts, file_ids=file_ids, file_names=file_names, actor=actor
330
+ )
328
331
 
329
332
  if agent_state.enable_sleeptime:
330
333
  source = await server.source_manager.get_source_by_id(source_id=source_id)
@@ -347,6 +350,10 @@ async def detach_source(
347
350
  """
348
351
  actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
349
352
  agent_state = await server.agent_manager.detach_source_async(agent_id=agent_id, source_id=source_id, actor=actor)
353
+
354
+ if not agent_state.sources:
355
+ agent_state = await server.agent_manager.detach_all_files_tools_async(agent_state=agent_state, actor=actor)
356
+
350
357
  files = await server.source_manager.list_files(source_id, actor)
351
358
  file_ids = [f.id for f in files]
352
359
  await server.remove_files_from_context_window(agent_state=agent_state, file_ids=file_ids, actor=actor)
@@ -451,7 +458,7 @@ async def list_blocks(
451
458
  """
452
459
  Retrieve the core memory blocks of a specific agent.
453
460
  """
454
- actor = server.user_manager.get_user_or_default(user_id=actor_id)
461
+ actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
455
462
  try:
456
463
  agent = await server.agent_manager.get_agent_by_id_async(agent_id=agent_id, include_relationships=["memory"], actor=actor)
457
464
  return agent.memory.blocks
@@ -658,19 +665,18 @@ async def send_message(
658
665
  Process a user message and return the agent's response.
659
666
  This endpoint accepts a message from a user and processes it through the agent.
660
667
  """
668
+ MetricRegistry().user_message_counter.add(1, get_ctx_attributes())
669
+
661
670
  actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
662
671
  request_start_timestamp_ns = get_utc_timestamp_ns()
663
- user_eligible = True
664
672
  # TODO: This is redundant, remove soon
665
673
  agent = await server.agent_manager.get_agent_by_id_async(agent_id, actor, include_relationships=["multi_agent_group"])
666
674
  agent_eligible = agent.enable_sleeptime or agent.agent_type == AgentType.sleeptime_agent or not agent.multi_agent_group
667
- experimental_header = request_obj.headers.get("X-EXPERIMENTAL") or "false"
668
- feature_enabled = settings.use_experimental or experimental_header.lower() == "true"
669
675
  model_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "together", "google_ai", "google_vertex"]
670
676
 
671
- if user_eligible and agent_eligible and feature_enabled and model_compatible:
677
+ if agent_eligible and model_compatible:
672
678
  if agent.enable_sleeptime and agent.agent_type != AgentType.voice_convo_agent:
673
- experimental_agent = SleeptimeMultiAgentV2(
679
+ agent_loop = SleeptimeMultiAgentV2(
674
680
  agent_id=agent_id,
675
681
  message_manager=server.message_manager,
676
682
  agent_manager=server.agent_manager,
@@ -682,7 +688,7 @@ async def send_message(
682
688
  group=agent.multi_agent_group,
683
689
  )
684
690
  else:
685
- experimental_agent = LettaAgent(
691
+ agent_loop = LettaAgent(
686
692
  agent_id=agent_id,
687
693
  message_manager=server.message_manager,
688
694
  agent_manager=server.agent_manager,
@@ -693,11 +699,12 @@ async def send_message(
693
699
  telemetry_manager=server.telemetry_manager if settings.llm_api_logging else NoopTelemetryManager(),
694
700
  )
695
701
 
696
- result = await experimental_agent.step(
702
+ result = await agent_loop.step(
697
703
  request.messages,
698
704
  max_steps=10,
699
705
  use_assistant_message=request.use_assistant_message,
700
706
  request_start_timestamp_ns=request_start_timestamp_ns,
707
+ include_return_message_types=request.include_return_message_types,
701
708
  )
702
709
  else:
703
710
  result = await server.send_message_to_agent(
@@ -710,6 +717,7 @@ async def send_message(
710
717
  use_assistant_message=request.use_assistant_message,
711
718
  assistant_message_tool_name=request.assistant_message_tool_name,
712
719
  assistant_message_tool_kwarg=request.assistant_message_tool_kwarg,
720
+ include_return_message_types=request.include_return_message_types,
713
721
  )
714
722
  return result
715
723
 
@@ -739,22 +747,20 @@ async def send_message_streaming(
739
747
  This endpoint accepts a message from a user and processes it through the agent.
740
748
  It will stream the steps of the response always, and stream the tokens if 'stream_tokens' is set to True.
741
749
  """
742
- request_start_timestamp_ns = get_utc_timestamp_ns()
750
+ MetricRegistry().user_message_counter.add(1, get_ctx_attributes())
751
+
743
752
  actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
744
- user_eligible = actor.organization_id not in ["org-4a3af5dd-4c6a-48cb-ac13-3f73ecaaa4bf", "org-4ab3f6e8-9a44-4bee-aeb6-c681cbbc7bf6"]
745
753
  # TODO: This is redundant, remove soon
746
754
  agent = await server.agent_manager.get_agent_by_id_async(agent_id, actor, include_relationships=["multi_agent_group"])
747
755
  agent_eligible = agent.enable_sleeptime or agent.agent_type == AgentType.sleeptime_agent or not agent.multi_agent_group
748
- experimental_header = request_obj.headers.get("X-EXPERIMENTAL") or "false"
749
- feature_enabled = settings.use_experimental or experimental_header.lower() == "true"
750
756
  model_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "together", "google_ai", "google_vertex"]
751
757
  model_compatible_token_streaming = agent.llm_config.model_endpoint_type in ["anthropic", "openai"]
752
758
  not_letta_endpoint = not ("inference.letta.com" in agent.llm_config.model_endpoint)
753
759
  request_start_timestamp_ns = get_utc_timestamp_ns()
754
760
 
755
- if user_eligible and agent_eligible and feature_enabled and model_compatible:
761
+ if agent_eligible and model_compatible:
756
762
  if agent.enable_sleeptime and agent.agent_type != AgentType.voice_convo_agent:
757
- experimental_agent = SleeptimeMultiAgentV2(
763
+ agent_loop = SleeptimeMultiAgentV2(
758
764
  agent_id=agent_id,
759
765
  message_manager=server.message_manager,
760
766
  agent_manager=server.agent_manager,
@@ -768,7 +774,7 @@ async def send_message_streaming(
768
774
  group=agent.multi_agent_group,
769
775
  )
770
776
  else:
771
- experimental_agent = LettaAgent(
777
+ agent_loop = LettaAgent(
772
778
  agent_id=agent_id,
773
779
  message_manager=server.message_manager,
774
780
  agent_manager=server.agent_manager,
@@ -782,21 +788,23 @@ async def send_message_streaming(
782
788
 
783
789
  if request.stream_tokens and model_compatible_token_streaming and not_letta_endpoint:
784
790
  result = StreamingResponseWithStatusCode(
785
- experimental_agent.step_stream(
791
+ agent_loop.step_stream(
786
792
  input_messages=request.messages,
787
793
  max_steps=10,
788
794
  use_assistant_message=request.use_assistant_message,
789
795
  request_start_timestamp_ns=request_start_timestamp_ns,
796
+ include_return_message_types=request.include_return_message_types,
790
797
  ),
791
798
  media_type="text/event-stream",
792
799
  )
793
800
  else:
794
801
  result = StreamingResponseWithStatusCode(
795
- experimental_agent.step_stream_no_tokens(
802
+ agent_loop.step_stream_no_tokens(
796
803
  request.messages,
797
804
  max_steps=10,
798
805
  use_assistant_message=request.use_assistant_message,
799
806
  request_start_timestamp_ns=request_start_timestamp_ns,
807
+ include_return_message_types=request.include_return_message_types,
800
808
  ),
801
809
  media_type="text/event-stream",
802
810
  )
@@ -812,6 +820,7 @@ async def send_message_streaming(
812
820
  assistant_message_tool_name=request.assistant_message_tool_name,
813
821
  assistant_message_tool_kwarg=request.assistant_message_tool_kwarg,
814
822
  request_start_timestamp_ns=request_start_timestamp_ns,
823
+ include_return_message_types=request.include_return_message_types,
815
824
  )
816
825
 
817
826
  return result
@@ -826,6 +835,7 @@ async def process_message_background(
826
835
  use_assistant_message: bool,
827
836
  assistant_message_tool_name: str,
828
837
  assistant_message_tool_kwarg: str,
838
+ include_return_message_types: Optional[List[MessageType]] = None,
829
839
  ) -> None:
830
840
  """Background task to process the message and update job status."""
831
841
  try:
@@ -841,6 +851,7 @@ async def process_message_background(
841
851
  assistant_message_tool_kwarg=assistant_message_tool_kwarg,
842
852
  metadata={"job_id": job_id}, # Pass job_id through metadata
843
853
  request_start_timestamp_ns=request_start_timestamp_ns,
854
+ include_return_message_types=include_return_message_types,
844
855
  )
845
856
 
846
857
  # Update job status to completed
@@ -878,6 +889,7 @@ async def send_message_async(
878
889
  Asynchronously process a user message and return a run object.
879
890
  The actual processing happens in the background, and the status can be checked using the run ID.
880
891
  """
892
+ MetricRegistry().user_message_counter.add(1, get_ctx_attributes())
881
893
  actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
882
894
 
883
895
  # Create a new job
@@ -907,6 +919,7 @@ async def send_message_async(
907
919
  use_assistant_message=request.use_assistant_message,
908
920
  assistant_message_tool_name=request.assistant_message_tool_name,
909
921
  assistant_message_tool_kwarg=request.assistant_message_tool_kwarg,
922
+ include_return_message_types=request.include_return_message_types,
910
923
  )
911
924
 
912
925
  return run
@@ -953,17 +966,13 @@ async def summarize_agent_conversation(
953
966
  This endpoint summarizes the current message history for a given agent,
954
967
  truncating and compressing it down to the specified `max_message_length`.
955
968
  """
956
- actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
957
969
 
958
- # user_eligible = actor.organization_id not in ["org-4a3af5dd-4c6a-48cb-ac13-3f73ecaaa4bf", "org-4ab3f6e8-9a44-4bee-aeb6-c681cbbc7bf6"]
959
- # TODO: This is redundant, remove soon
970
+ actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
960
971
  agent = await server.agent_manager.get_agent_by_id_async(agent_id, actor, include_relationships=["multi_agent_group"])
961
972
  agent_eligible = agent.enable_sleeptime or agent.agent_type == AgentType.sleeptime_agent or not agent.multi_agent_group
962
- experimental_header = request_obj.headers.get("X-EXPERIMENTAL") or "false"
963
- feature_enabled = settings.use_experimental or experimental_header.lower() == "true"
964
973
  model_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "together", "google_ai", "google_vertex"]
965
974
 
966
- if agent_eligible and feature_enabled and model_compatible:
975
+ if agent_eligible and model_compatible:
967
976
  agent = LettaAgent(
968
977
  agent_id=agent_id,
969
978
  message_manager=server.message_manager,
@@ -86,7 +86,7 @@ def create_group(
86
86
 
87
87
 
88
88
  @router.patch("/{group_id}", response_model=Group, operation_id="modify_group")
89
- def modify_group(
89
+ async def modify_group(
90
90
  group_id: str,
91
91
  group: GroupUpdate = Body(...),
92
92
  server: "SyncServer" = Depends(get_letta_server),
@@ -97,8 +97,8 @@ def modify_group(
97
97
  Create a new multi-agent group with the specified configuration.
98
98
  """
99
99
  try:
100
- actor = server.user_manager.get_user_or_default(user_id=actor_id)
101
- return server.group_manager.modify_group(group_id=group_id, group_update=group, actor=actor)
100
+ actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
101
+ return await server.group_manager.modify_group_async(group_id=group_id, group_update=group, actor=actor)
102
102
  except Exception as e:
103
103
  raise HTTPException(status_code=500, detail=str(e))
104
104
 
@@ -27,6 +27,11 @@ from letta.utils import safe_create_task, sanitize_filename
27
27
 
28
28
  logger = get_logger(__name__)
29
29
 
30
+ mimetypes.add_type("text/markdown", ".md")
31
+ mimetypes.add_type("text/markdown", ".markdown")
32
+ mimetypes.add_type("application/jsonl", ".jsonl")
33
+ mimetypes.add_type("application/x-jsonlines", ".jsonl")
34
+
30
35
 
31
36
  router = APIRouter(prefix="/sources", tags=["sources"])
32
37
 
@@ -174,7 +179,15 @@ async def upload_file_to_source(
174
179
  """
175
180
  Upload a file to a data source.
176
181
  """
177
- allowed_media_types = {"application/pdf", "text/plain", "application/json"}
182
+ allowed_media_types = {
183
+ "application/pdf",
184
+ "text/plain",
185
+ "text/markdown",
186
+ "text/x-markdown",
187
+ "application/json",
188
+ "application/jsonl",
189
+ "application/x-jsonlines",
190
+ }
178
191
 
179
192
  # Normalize incoming Content-Type header (strip charset or any parameters).
180
193
  raw_ct = file.content_type or ""
@@ -192,6 +205,9 @@ async def upload_file_to_source(
192
205
  ".pdf": "application/pdf",
193
206
  ".txt": "text/plain",
194
207
  ".json": "application/json",
208
+ ".md": "text/markdown",
209
+ ".markdown": "text/markdown",
210
+ ".jsonl": "application/jsonl",
195
211
  }
196
212
  media_type = ext_map.get(ext, media_type)
197
213
 
@@ -270,14 +286,21 @@ async def list_source_files(
270
286
  source_id: str,
271
287
  limit: int = Query(1000, description="Number of files to return"),
272
288
  after: Optional[str] = Query(None, description="Pagination cursor to fetch the next set of results"),
289
+ include_content: bool = Query(False, description="Whether to include full file content"),
273
290
  server: "SyncServer" = Depends(get_letta_server),
274
- actor_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
291
+ actor_id: Optional[str] = Header(None, alias="user_id"),
275
292
  ):
276
293
  """
277
294
  List paginated files associated with a data source.
278
295
  """
279
296
  actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
280
- return await server.source_manager.list_files(source_id=source_id, limit=limit, after=after, actor=actor)
297
+ return await server.source_manager.list_files(
298
+ source_id=source_id,
299
+ limit=limit,
300
+ after=after,
301
+ actor=actor,
302
+ include_content=include_content,
303
+ )
281
304
 
282
305
 
283
306
  # it's redundant to include /delete in the URL path. The HTTP verb DELETE already implies that action.
@@ -501,7 +501,8 @@ async def add_mcp_server_to_config(
501
501
  if isinstance(request, StdioServerConfig):
502
502
  mapped_request = MCPServer(server_name=request.server_name, server_type=request.type, stdio_config=request)
503
503
  # don't allow stdio servers
504
- raise HTTPException(status_code=400, detail="StdioServerConfig is not supported")
504
+ if tool_settings.mcp_disable_stdio: # protected server
505
+ raise HTTPException(status_code=400, detail="StdioServerConfig is not supported")
505
506
  elif isinstance(request, SSEServerConfig):
506
507
  mapped_request = MCPServer(server_name=request.server_name, server_type=request.type, server_url=request.server_url)
507
508
  # TODO: add HTTP streaming
@@ -530,4 +531,8 @@ async def delete_mcp_server_from_config(
530
531
  # log to DB
531
532
  actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
532
533
  mcp_server_id = await server.mcp_manager.get_mcp_server_id_by_name(mcp_server_name, actor)
533
- return server.mcp_manager.delete_mcp_server_by_id(mcp_server_id, actor=actor)
534
+ await server.mcp_manager.delete_mcp_server_by_id(mcp_server_id, actor=actor)
535
+
536
+ # TODO: don't do this in the future (just return MCPServer)
537
+ all_servers = await server.mcp_manager.list_mcp_servers(actor=actor)
538
+ return [server.to_config() for server in all_servers]
@@ -15,9 +15,12 @@ from pydantic import BaseModel
15
15
 
16
16
  from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG, FUNC_FAILED_HEARTBEAT_MESSAGE, REQ_HEARTBEAT_MESSAGE
17
17
  from letta.errors import ContextWindowExceededError, RateLimitExceededError
18
- from letta.helpers.datetime_helpers import get_utc_time, get_utc_timestamp_ns
18
+ from letta.helpers.datetime_helpers import get_utc_time, get_utc_timestamp_ns, ns_to_ms
19
19
  from letta.helpers.message_helper import convert_message_creates_to_messages
20
20
  from letta.log import get_logger
21
+ from letta.otel.context import get_ctx_attributes
22
+ from letta.otel.metric_registry import MetricRegistry
23
+ from letta.otel.tracing import tracer
21
24
  from letta.schemas.enums import MessageRole
22
25
  from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
23
26
  from letta.schemas.llm_config import LLMConfig
@@ -27,7 +30,6 @@ from letta.schemas.usage import LettaUsageStatistics
27
30
  from letta.schemas.user import User
28
31
  from letta.server.rest_api.interface import StreamingServerInterface
29
32
  from letta.system import get_heartbeat, package_function_response
30
- from letta.tracing import tracer
31
33
 
32
34
  if TYPE_CHECKING:
33
35
  from letta.server.server import SyncServer
@@ -81,8 +83,12 @@ async def sse_async_generator(
81
83
  if first_chunk and ttft_span is not None:
82
84
  now = get_utc_timestamp_ns()
83
85
  ttft_ns = now - request_start_timestamp_ns
84
- ttft_span.add_event(name="time_to_first_token_ms", attributes={"ttft_ms": ttft_ns // 1_000_000})
86
+ ttft_span.add_event(name="time_to_first_token_ms", attributes={"ttft_ms": ns_to_ms(ttft_ns)})
85
87
  ttft_span.end()
88
+ metric_attributes = get_ctx_attributes()
89
+ if llm_config:
90
+ metric_attributes["model.name"] = llm_config.model
91
+ MetricRegistry().ttft_ms_histogram.record(ns_to_ms(ttft_ns), metric_attributes)
86
92
  first_chunk = False
87
93
 
88
94
  # yield f"data: {json.dumps(chunk)}\n\n"
@@ -190,7 +196,6 @@ def create_letta_messages_from_llm_response(
190
196
  add_heartbeat_request_system_message: bool = False,
191
197
  reasoning_content: Optional[List[Union[TextContent, ReasoningContent, RedactedReasoningContent, OmittedReasoningContent]]] = None,
192
198
  pre_computed_assistant_message_id: Optional[str] = None,
193
- pre_computed_tool_message_id: Optional[str] = None,
194
199
  llm_batch_item_id: Optional[str] = None,
195
200
  step_id: str | None = None,
196
201
  ) -> List[Message]:
@@ -245,8 +250,6 @@ def create_letta_messages_from_llm_response(
245
250
  )
246
251
  ],
247
252
  )
248
- if pre_computed_tool_message_id:
249
- tool_message.id = pre_computed_tool_message_id
250
253
  messages.append(tool_message)
251
254
 
252
255
  if add_heartbeat_request_system_message:
letta/server/server.py CHANGED
@@ -21,7 +21,7 @@ import letta.system as system
21
21
  from letta.agent import Agent, save_agent
22
22
  from letta.agents.letta_agent import LettaAgent
23
23
  from letta.config import LettaConfig
24
- from letta.constants import CORE_MEMORY_SOURCE_CHAR_LIMIT, LETTA_TOOL_EXECUTION_DIR
24
+ from letta.constants import LETTA_TOOL_EXECUTION_DIR
25
25
  from letta.data_sources.connectors import DataConnector, load_data
26
26
  from letta.errors import HandleNotFoundError
27
27
  from letta.functions.mcp_client.types import MCPServerType, MCPTool, SSEServerConfig, StdioServerConfig
@@ -34,6 +34,7 @@ from letta.interface import AgentInterface # abstract
34
34
  from letta.interface import CLIInterface # for printing to terminal
35
35
  from letta.log import get_logger
36
36
  from letta.orm.errors import NoResultFound
37
+ from letta.otel.tracing import log_event, trace_method
37
38
  from letta.prompts.gpt_system import get_system_text
38
39
  from letta.schemas.agent import AgentState, AgentType, CreateAgent, UpdateAgent
39
40
  from letta.schemas.block import Block, BlockUpdate, CreateBlock
@@ -44,7 +45,7 @@ from letta.schemas.enums import JobStatus, MessageStreamStatus, ProviderCategory
44
45
  from letta.schemas.environment_variables import SandboxEnvironmentVariableCreate
45
46
  from letta.schemas.group import GroupCreate, ManagerType, SleeptimeManager, VoiceSleeptimeManager
46
47
  from letta.schemas.job import Job, JobUpdate
47
- from letta.schemas.letta_message import LegacyLettaMessage, LettaMessage, ToolReturnMessage
48
+ from letta.schemas.letta_message import LegacyLettaMessage, LettaMessage, MessageType, ToolReturnMessage
48
49
  from letta.schemas.letta_message_content import TextContent
49
50
  from letta.schemas.letta_response import LettaResponse
50
51
  from letta.schemas.llm_config import LLMConfig
@@ -101,7 +102,6 @@ from letta.services.tool_executor.tool_execution_manager import ToolExecutionMan
101
102
  from letta.services.tool_manager import ToolManager
102
103
  from letta.services.user_manager import UserManager
103
104
  from letta.settings import model_settings, settings, tool_settings
104
- from letta.tracing import log_event, trace_method
105
105
  from letta.utils import get_friendly_error_msg, get_persona_text, make_key
106
106
 
107
107
  config = LettaConfig.load()
@@ -1108,13 +1108,11 @@ class SyncServer(Server):
1108
1108
  after: Optional[str] = None,
1109
1109
  before: Optional[str] = None,
1110
1110
  limit: Optional[int] = 100,
1111
- order_by: Optional[str] = "created_at",
1112
- reverse: Optional[bool] = False,
1113
1111
  query_text: Optional[str] = None,
1114
1112
  ascending: Optional[bool] = True,
1115
1113
  ) -> List[Passage]:
1116
1114
  # iterate over records
1117
- records = await self.agent_manager.list_passages_async(
1115
+ records = await self.agent_manager.list_agent_passages_async(
1118
1116
  actor=actor,
1119
1117
  agent_id=agent_id,
1120
1118
  after=after,
@@ -1368,12 +1366,13 @@ class SyncServer(Server):
1368
1366
  )
1369
1367
  await self.agent_manager.delete_agent_async(agent_id=sleeptime_agent_state.id, actor=actor)
1370
1368
 
1371
- async def _upsert_file_to_agent(self, agent_id: str, text: str, file_id: str, actor: User) -> None:
1369
+ async def _upsert_file_to_agent(self, agent_id: str, text: str, file_id: str, file_name: str, actor: User) -> None:
1372
1370
  """
1373
1371
  Internal method to create or update a file <-> agent association
1374
1372
  """
1375
- truncated_text = text[:CORE_MEMORY_SOURCE_CHAR_LIMIT]
1376
- await self.file_agent_manager.attach_file(agent_id=agent_id, file_id=file_id, actor=actor, visible_content=truncated_text)
1373
+ await self.file_agent_manager.attach_file(
1374
+ agent_id=agent_id, file_id=file_id, file_name=file_name, actor=actor, visible_content=text
1375
+ )
1377
1376
 
1378
1377
  async def _remove_file_from_agent(self, agent_id: str, file_id: str, actor: User) -> None:
1379
1378
  """
@@ -1389,7 +1388,7 @@ class SyncServer(Server):
1389
1388
  logger.info(f"File {file_id} already removed from agent {agent_id}, skipping...")
1390
1389
 
1391
1390
  async def insert_file_into_context_windows(
1392
- self, source_id: str, text: str, file_id: str, actor: User, agent_states: Optional[List[AgentState]] = None
1391
+ self, source_id: str, text: str, file_id: str, file_name: str, actor: User, agent_states: Optional[List[AgentState]] = None
1393
1392
  ) -> List[AgentState]:
1394
1393
  """
1395
1394
  Insert the uploaded document into the context window of all agents
@@ -1404,11 +1403,13 @@ class SyncServer(Server):
1404
1403
  logger.info(f"Inserting document into context window for source: {source_id}")
1405
1404
  logger.info(f"Attached agents: {[a.id for a in agent_states]}")
1406
1405
 
1407
- await asyncio.gather(*(self._upsert_file_to_agent(agent_state.id, text, file_id, actor) for agent_state in agent_states))
1406
+ await asyncio.gather(*(self._upsert_file_to_agent(agent_state.id, text, file_id, file_name, actor) for agent_state in agent_states))
1408
1407
 
1409
1408
  return agent_states
1410
1409
 
1411
- async def insert_files_into_context_window(self, agent_state: AgentState, texts: List[str], file_ids: List[str], actor: User) -> None:
1410
+ async def insert_files_into_context_window(
1411
+ self, agent_state: AgentState, texts: List[str], file_ids: List[str], file_names: List[str], actor: User
1412
+ ) -> None:
1412
1413
  """
1413
1414
  Insert the uploaded documents into the context window of an agent
1414
1415
  attached to the given source.
@@ -1418,7 +1419,12 @@ class SyncServer(Server):
1418
1419
  if len(texts) != len(file_ids):
1419
1420
  raise ValueError(f"Mismatch between number of texts ({len(texts)}) and file ids ({len(file_ids)})")
1420
1421
 
1421
- await asyncio.gather(*(self._upsert_file_to_agent(agent_state.id, text, file_id, actor) for text, file_id in zip(texts, file_ids)))
1422
+ await asyncio.gather(
1423
+ *(
1424
+ self._upsert_file_to_agent(agent_state.id, text, file_id, file_name, actor)
1425
+ for text, file_id, file_name in zip(texts, file_ids, file_names)
1426
+ )
1427
+ )
1422
1428
 
1423
1429
  async def remove_file_from_context_windows(self, source_id: str, file_id: str, actor: User) -> None:
1424
1430
  """
@@ -2231,6 +2237,7 @@ class SyncServer(Server):
2231
2237
  assistant_message_tool_kwarg: str = constants.DEFAULT_MESSAGE_TOOL_KWARG,
2232
2238
  metadata: Optional[dict] = None,
2233
2239
  request_start_timestamp_ns: Optional[int] = None,
2240
+ include_return_message_types: Optional[List[MessageType]] = None,
2234
2241
  ) -> Union[StreamingResponse, LettaResponse]:
2235
2242
  """Split off into a separate function so that it can be imported in the /chat/completion proxy."""
2236
2243
  # TODO: @charles is this the correct way to handle?
@@ -2336,6 +2343,11 @@ class SyncServer(Server):
2336
2343
 
2337
2344
  # Get rid of the stream status messages
2338
2345
  filtered_stream = [d for d in generated_stream if not isinstance(d, MessageStreamStatus)]
2346
+
2347
+ # Apply message type filtering if specified
2348
+ if include_return_message_types is not None:
2349
+ filtered_stream = [msg for msg in filtered_stream if msg.message_type in include_return_message_types]
2350
+
2339
2351
  usage = await task
2340
2352
 
2341
2353
  # By default the stream will be messages of type LettaMessage or LettaLegacyMessage