letta-nightly 0.8.0.dev20250606195656__py3-none-any.whl → 0.8.3.dev20250607000559__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agent.py +16 -12
- letta/agents/base_agent.py +1 -1
- letta/agents/helpers.py +13 -2
- letta/agents/letta_agent.py +72 -34
- letta/agents/letta_agent_batch.py +1 -2
- letta/agents/voice_agent.py +19 -13
- letta/agents/voice_sleeptime_agent.py +23 -6
- letta/constants.py +18 -0
- letta/data_sources/__init__.py +0 -0
- letta/data_sources/redis_client.py +282 -0
- letta/errors.py +0 -4
- letta/functions/function_sets/files.py +58 -0
- letta/functions/schema_generator.py +18 -1
- letta/groups/sleeptime_multi_agent_v2.py +13 -3
- letta/helpers/datetime_helpers.py +47 -3
- letta/helpers/decorators.py +69 -0
- letta/{services/helpers/noop_helper.py → helpers/singleton.py} +5 -0
- letta/interfaces/anthropic_streaming_interface.py +43 -24
- letta/interfaces/openai_streaming_interface.py +21 -19
- letta/llm_api/anthropic.py +1 -1
- letta/llm_api/anthropic_client.py +30 -16
- letta/llm_api/google_vertex_client.py +1 -1
- letta/llm_api/helpers.py +36 -30
- letta/llm_api/llm_api_tools.py +1 -1
- letta/llm_api/llm_client_base.py +29 -1
- letta/llm_api/openai.py +1 -1
- letta/llm_api/openai_client.py +6 -8
- letta/local_llm/chat_completion_proxy.py +1 -1
- letta/memory.py +1 -1
- letta/orm/enums.py +1 -0
- letta/orm/file.py +80 -3
- letta/orm/files_agents.py +13 -0
- letta/orm/passage.py +2 -0
- letta/orm/sqlalchemy_base.py +34 -11
- letta/otel/__init__.py +0 -0
- letta/otel/context.py +25 -0
- letta/otel/events.py +0 -0
- letta/otel/metric_registry.py +122 -0
- letta/otel/metrics.py +66 -0
- letta/otel/resource.py +26 -0
- letta/{tracing.py → otel/tracing.py} +55 -78
- letta/plugins/README.md +22 -0
- letta/plugins/__init__.py +0 -0
- letta/plugins/defaults.py +11 -0
- letta/plugins/plugins.py +72 -0
- letta/schemas/enums.py +8 -0
- letta/schemas/file.py +12 -0
- letta/schemas/letta_request.py +6 -0
- letta/schemas/passage.py +1 -0
- letta/schemas/tool.py +4 -0
- letta/server/db.py +7 -7
- letta/server/rest_api/app.py +8 -6
- letta/server/rest_api/routers/v1/agents.py +46 -37
- letta/server/rest_api/routers/v1/groups.py +3 -3
- letta/server/rest_api/routers/v1/sources.py +26 -3
- letta/server/rest_api/routers/v1/tools.py +7 -2
- letta/server/rest_api/utils.py +9 -6
- letta/server/server.py +25 -13
- letta/services/agent_manager.py +186 -194
- letta/services/block_manager.py +1 -1
- letta/services/context_window_calculator/context_window_calculator.py +1 -1
- letta/services/context_window_calculator/token_counter.py +3 -2
- letta/services/file_processor/chunker/line_chunker.py +34 -0
- letta/services/file_processor/file_processor.py +43 -12
- letta/services/file_processor/parser/mistral_parser.py +11 -1
- letta/services/files_agents_manager.py +96 -7
- letta/services/group_manager.py +6 -6
- letta/services/helpers/agent_manager_helper.py +404 -3
- letta/services/identity_manager.py +1 -1
- letta/services/job_manager.py +1 -1
- letta/services/llm_batch_manager.py +1 -1
- letta/services/mcp/stdio_client.py +5 -1
- letta/services/mcp_manager.py +4 -4
- letta/services/message_manager.py +1 -1
- letta/services/organization_manager.py +1 -1
- letta/services/passage_manager.py +604 -19
- letta/services/per_agent_lock_manager.py +1 -1
- letta/services/provider_manager.py +1 -1
- letta/services/sandbox_config_manager.py +1 -1
- letta/services/source_manager.py +178 -19
- letta/services/step_manager.py +2 -2
- letta/services/summarizer/summarizer.py +1 -1
- letta/services/telemetry_manager.py +1 -1
- letta/services/tool_executor/builtin_tool_executor.py +117 -0
- letta/services/tool_executor/composio_tool_executor.py +53 -0
- letta/services/tool_executor/core_tool_executor.py +474 -0
- letta/services/tool_executor/files_tool_executor.py +138 -0
- letta/services/tool_executor/mcp_tool_executor.py +45 -0
- letta/services/tool_executor/multi_agent_tool_executor.py +123 -0
- letta/services/tool_executor/tool_execution_manager.py +34 -14
- letta/services/tool_executor/tool_execution_sandbox.py +1 -1
- letta/services/tool_executor/tool_executor.py +3 -802
- letta/services/tool_executor/tool_executor_base.py +43 -0
- letta/services/tool_manager.py +55 -59
- letta/services/tool_sandbox/e2b_sandbox.py +1 -1
- letta/services/tool_sandbox/local_sandbox.py +6 -3
- letta/services/user_manager.py +6 -3
- letta/settings.py +23 -2
- letta/utils.py +7 -2
- {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.3.dev20250607000559.dist-info}/METADATA +4 -2
- {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.3.dev20250607000559.dist-info}/RECORD +105 -83
- {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.3.dev20250607000559.dist-info}/LICENSE +0 -0
- {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.3.dev20250607000559.dist-info}/WHEEL +0 -0
- {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.3.dev20250607000559.dist-info}/entry_points.txt +0 -0
letta/server/rest_api/app.py
CHANGED
@@ -256,13 +256,15 @@ def create_application() -> "FastAPI":
|
|
256
256
|
print(f"▶ Using OTLP tracing with endpoint: {otlp_endpoint}")
|
257
257
|
env_name_suffix = os.getenv("ENV_NAME")
|
258
258
|
service_name = f"letta-server-{env_name_suffix.lower()}" if env_name_suffix else "letta-server"
|
259
|
-
from letta.
|
259
|
+
from letta.otel.metrics import setup_metrics
|
260
|
+
from letta.otel.tracing import setup_tracing
|
260
261
|
|
261
262
|
setup_tracing(
|
262
263
|
endpoint=otlp_endpoint,
|
263
264
|
app=app,
|
264
265
|
service_name=service_name,
|
265
266
|
)
|
267
|
+
setup_metrics(endpoint=otlp_endpoint, app=app, service_name=service_name)
|
266
268
|
|
267
269
|
for route in v1_routes:
|
268
270
|
app.include_router(route, prefix=API_PREFIX)
|
@@ -331,7 +333,7 @@ def start_server(
|
|
331
333
|
if (os.getenv("LOCAL_HTTPS") == "true") or "--localhttps" in sys.argv:
|
332
334
|
print(f"▶ Server running at: https://{host or 'localhost'}:{port or REST_DEFAULT_PORT}")
|
333
335
|
print(f"▶ View using ADE at: https://app.letta.com/development-servers/local/dashboard\n")
|
334
|
-
if importlib.util.find_spec("granian") is not None and settings.
|
336
|
+
if importlib.util.find_spec("granian") is not None and settings.use_granian:
|
335
337
|
from granian import Granian
|
336
338
|
|
337
339
|
# Experimental Granian engine
|
@@ -339,14 +341,14 @@ def start_server(
|
|
339
341
|
target="letta.server.rest_api.app:app",
|
340
342
|
# factory=True,
|
341
343
|
interface="asgi",
|
342
|
-
address=host or "
|
344
|
+
address=host or "127.0.0.1", # Note granian address must be an ip address
|
343
345
|
port=port or REST_DEFAULT_PORT,
|
344
346
|
workers=settings.uvicorn_workers,
|
345
347
|
# threads=
|
346
348
|
reload=reload or settings.uvicorn_reload,
|
347
349
|
reload_ignore_patterns=["openapi_letta.json"],
|
348
350
|
reload_ignore_worker_failure=True,
|
349
|
-
reload_tick=
|
351
|
+
reload_tick=4000, # set to 4s to prevent crashing on weird state
|
350
352
|
# log_level="info"
|
351
353
|
ssl_keyfile="certs/localhost-key.pem",
|
352
354
|
ssl_cert="certs/localhost.pem",
|
@@ -380,14 +382,14 @@ def start_server(
|
|
380
382
|
target="letta.server.rest_api.app:app",
|
381
383
|
# factory=True,
|
382
384
|
interface="asgi",
|
383
|
-
address=host or "
|
385
|
+
address=host or "127.0.0.1", # Note granian address must be an ip address
|
384
386
|
port=port or REST_DEFAULT_PORT,
|
385
387
|
workers=settings.uvicorn_workers,
|
386
388
|
# threads=
|
387
389
|
reload=reload or settings.uvicorn_reload,
|
388
390
|
reload_ignore_patterns=["openapi_letta.json"],
|
389
391
|
reload_ignore_worker_failure=True,
|
390
|
-
reload_tick=
|
392
|
+
reload_tick=4000, # set to 4s to prevent crashing on weird state
|
391
393
|
# log_level="info"
|
392
394
|
).serve()
|
393
395
|
else:
|
@@ -12,16 +12,18 @@ from sqlalchemy.exc import IntegrityError, OperationalError
|
|
12
12
|
from starlette.responses import Response, StreamingResponse
|
13
13
|
|
14
14
|
from letta.agents.letta_agent import LettaAgent
|
15
|
-
from letta.constants import
|
15
|
+
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
|
16
16
|
from letta.groups.sleeptime_multi_agent_v2 import SleeptimeMultiAgentV2
|
17
17
|
from letta.helpers.datetime_helpers import get_utc_timestamp_ns
|
18
18
|
from letta.log import get_logger
|
19
19
|
from letta.orm.errors import NoResultFound
|
20
|
+
from letta.otel.context import get_ctx_attributes
|
21
|
+
from letta.otel.metric_registry import MetricRegistry
|
20
22
|
from letta.schemas.agent import AgentState, AgentType, CreateAgent, UpdateAgent
|
21
23
|
from letta.schemas.block import Block, BlockUpdate
|
22
24
|
from letta.schemas.group import Group
|
23
25
|
from letta.schemas.job import JobStatus, JobUpdate, LettaRequestConfig
|
24
|
-
from letta.schemas.letta_message import LettaMessageUnion, LettaMessageUpdateUnion
|
26
|
+
from letta.schemas.letta_message import LettaMessageUnion, LettaMessageUpdateUnion, MessageType
|
25
27
|
from letta.schemas.letta_request import LettaRequest, LettaStreamingRequest
|
26
28
|
from letta.schemas.letta_response import LettaResponse
|
27
29
|
from letta.schemas.memory import ContextWindowOverview, CreateArchivalMemory, Memory
|
@@ -149,7 +151,7 @@ def export_agent_serialized(
|
|
149
151
|
|
150
152
|
|
151
153
|
@router.post("/import", response_model=AgentState, operation_id="import_agent_serialized")
|
152
|
-
|
154
|
+
def import_agent_serialized(
|
153
155
|
file: UploadFile = File(...),
|
154
156
|
server: "SyncServer" = Depends(get_letta_server),
|
155
157
|
actor_id: Optional[str] = Header(None, alias="user_id"),
|
@@ -167,10 +169,10 @@ async def import_agent_serialized(
|
|
167
169
|
"""
|
168
170
|
Import a serialized agent file and recreate the agent in the system.
|
169
171
|
"""
|
170
|
-
actor =
|
172
|
+
actor = server.user_manager.get_user_or_default(user_id=actor_id)
|
171
173
|
|
172
174
|
try:
|
173
|
-
serialized_data =
|
175
|
+
serialized_data = file.file.read()
|
174
176
|
agent_json = json.loads(serialized_data)
|
175
177
|
|
176
178
|
# Validate the JSON against AgentSchema before passing it to deserialize
|
@@ -311,20 +313,21 @@ async def attach_source(
|
|
311
313
|
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
|
312
314
|
agent_state = await server.agent_manager.attach_source_async(agent_id=agent_id, source_id=source_id, actor=actor)
|
313
315
|
|
314
|
-
|
316
|
+
# Check if the agent is missing any files tools
|
317
|
+
agent_state = await server.agent_manager.attach_missing_files_tools_async(agent_state=agent_state, actor=actor)
|
318
|
+
|
319
|
+
files = await server.source_manager.list_files(source_id, actor, include_content=True)
|
315
320
|
texts = []
|
316
321
|
file_ids = []
|
322
|
+
file_names = []
|
317
323
|
for f in files:
|
318
|
-
|
319
|
-
passage_text = ""
|
320
|
-
for p in passages:
|
321
|
-
if len(passage_text) <= CORE_MEMORY_SOURCE_CHAR_LIMIT:
|
322
|
-
passage_text += p.text
|
323
|
-
|
324
|
-
texts.append(passage_text)
|
324
|
+
texts.append(f.content if f.content else "")
|
325
325
|
file_ids.append(f.id)
|
326
|
+
file_names.append(f.file_name)
|
326
327
|
|
327
|
-
await server.insert_files_into_context_window(
|
328
|
+
await server.insert_files_into_context_window(
|
329
|
+
agent_state=agent_state, texts=texts, file_ids=file_ids, file_names=file_names, actor=actor
|
330
|
+
)
|
328
331
|
|
329
332
|
if agent_state.enable_sleeptime:
|
330
333
|
source = await server.source_manager.get_source_by_id(source_id=source_id)
|
@@ -347,6 +350,10 @@ async def detach_source(
|
|
347
350
|
"""
|
348
351
|
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
|
349
352
|
agent_state = await server.agent_manager.detach_source_async(agent_id=agent_id, source_id=source_id, actor=actor)
|
353
|
+
|
354
|
+
if not agent_state.sources:
|
355
|
+
agent_state = await server.agent_manager.detach_all_files_tools_async(agent_state=agent_state, actor=actor)
|
356
|
+
|
350
357
|
files = await server.source_manager.list_files(source_id, actor)
|
351
358
|
file_ids = [f.id for f in files]
|
352
359
|
await server.remove_files_from_context_window(agent_state=agent_state, file_ids=file_ids, actor=actor)
|
@@ -451,7 +458,7 @@ async def list_blocks(
|
|
451
458
|
"""
|
452
459
|
Retrieve the core memory blocks of a specific agent.
|
453
460
|
"""
|
454
|
-
actor = server.user_manager.
|
461
|
+
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
|
455
462
|
try:
|
456
463
|
agent = await server.agent_manager.get_agent_by_id_async(agent_id=agent_id, include_relationships=["memory"], actor=actor)
|
457
464
|
return agent.memory.blocks
|
@@ -658,19 +665,18 @@ async def send_message(
|
|
658
665
|
Process a user message and return the agent's response.
|
659
666
|
This endpoint accepts a message from a user and processes it through the agent.
|
660
667
|
"""
|
668
|
+
MetricRegistry().user_message_counter.add(1, get_ctx_attributes())
|
669
|
+
|
661
670
|
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
|
662
671
|
request_start_timestamp_ns = get_utc_timestamp_ns()
|
663
|
-
user_eligible = True
|
664
672
|
# TODO: This is redundant, remove soon
|
665
673
|
agent = await server.agent_manager.get_agent_by_id_async(agent_id, actor, include_relationships=["multi_agent_group"])
|
666
674
|
agent_eligible = agent.enable_sleeptime or agent.agent_type == AgentType.sleeptime_agent or not agent.multi_agent_group
|
667
|
-
experimental_header = request_obj.headers.get("X-EXPERIMENTAL") or "false"
|
668
|
-
feature_enabled = settings.use_experimental or experimental_header.lower() == "true"
|
669
675
|
model_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "together", "google_ai", "google_vertex"]
|
670
676
|
|
671
|
-
if
|
677
|
+
if agent_eligible and model_compatible:
|
672
678
|
if agent.enable_sleeptime and agent.agent_type != AgentType.voice_convo_agent:
|
673
|
-
|
679
|
+
agent_loop = SleeptimeMultiAgentV2(
|
674
680
|
agent_id=agent_id,
|
675
681
|
message_manager=server.message_manager,
|
676
682
|
agent_manager=server.agent_manager,
|
@@ -682,7 +688,7 @@ async def send_message(
|
|
682
688
|
group=agent.multi_agent_group,
|
683
689
|
)
|
684
690
|
else:
|
685
|
-
|
691
|
+
agent_loop = LettaAgent(
|
686
692
|
agent_id=agent_id,
|
687
693
|
message_manager=server.message_manager,
|
688
694
|
agent_manager=server.agent_manager,
|
@@ -693,11 +699,12 @@ async def send_message(
|
|
693
699
|
telemetry_manager=server.telemetry_manager if settings.llm_api_logging else NoopTelemetryManager(),
|
694
700
|
)
|
695
701
|
|
696
|
-
result = await
|
702
|
+
result = await agent_loop.step(
|
697
703
|
request.messages,
|
698
704
|
max_steps=10,
|
699
705
|
use_assistant_message=request.use_assistant_message,
|
700
706
|
request_start_timestamp_ns=request_start_timestamp_ns,
|
707
|
+
include_return_message_types=request.include_return_message_types,
|
701
708
|
)
|
702
709
|
else:
|
703
710
|
result = await server.send_message_to_agent(
|
@@ -710,6 +717,7 @@ async def send_message(
|
|
710
717
|
use_assistant_message=request.use_assistant_message,
|
711
718
|
assistant_message_tool_name=request.assistant_message_tool_name,
|
712
719
|
assistant_message_tool_kwarg=request.assistant_message_tool_kwarg,
|
720
|
+
include_return_message_types=request.include_return_message_types,
|
713
721
|
)
|
714
722
|
return result
|
715
723
|
|
@@ -739,22 +747,20 @@ async def send_message_streaming(
|
|
739
747
|
This endpoint accepts a message from a user and processes it through the agent.
|
740
748
|
It will stream the steps of the response always, and stream the tokens if 'stream_tokens' is set to True.
|
741
749
|
"""
|
742
|
-
|
750
|
+
MetricRegistry().user_message_counter.add(1, get_ctx_attributes())
|
751
|
+
|
743
752
|
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
|
744
|
-
user_eligible = actor.organization_id not in ["org-4a3af5dd-4c6a-48cb-ac13-3f73ecaaa4bf", "org-4ab3f6e8-9a44-4bee-aeb6-c681cbbc7bf6"]
|
745
753
|
# TODO: This is redundant, remove soon
|
746
754
|
agent = await server.agent_manager.get_agent_by_id_async(agent_id, actor, include_relationships=["multi_agent_group"])
|
747
755
|
agent_eligible = agent.enable_sleeptime or agent.agent_type == AgentType.sleeptime_agent or not agent.multi_agent_group
|
748
|
-
experimental_header = request_obj.headers.get("X-EXPERIMENTAL") or "false"
|
749
|
-
feature_enabled = settings.use_experimental or experimental_header.lower() == "true"
|
750
756
|
model_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "together", "google_ai", "google_vertex"]
|
751
757
|
model_compatible_token_streaming = agent.llm_config.model_endpoint_type in ["anthropic", "openai"]
|
752
758
|
not_letta_endpoint = not ("inference.letta.com" in agent.llm_config.model_endpoint)
|
753
759
|
request_start_timestamp_ns = get_utc_timestamp_ns()
|
754
760
|
|
755
|
-
if
|
761
|
+
if agent_eligible and model_compatible:
|
756
762
|
if agent.enable_sleeptime and agent.agent_type != AgentType.voice_convo_agent:
|
757
|
-
|
763
|
+
agent_loop = SleeptimeMultiAgentV2(
|
758
764
|
agent_id=agent_id,
|
759
765
|
message_manager=server.message_manager,
|
760
766
|
agent_manager=server.agent_manager,
|
@@ -768,7 +774,7 @@ async def send_message_streaming(
|
|
768
774
|
group=agent.multi_agent_group,
|
769
775
|
)
|
770
776
|
else:
|
771
|
-
|
777
|
+
agent_loop = LettaAgent(
|
772
778
|
agent_id=agent_id,
|
773
779
|
message_manager=server.message_manager,
|
774
780
|
agent_manager=server.agent_manager,
|
@@ -782,21 +788,23 @@ async def send_message_streaming(
|
|
782
788
|
|
783
789
|
if request.stream_tokens and model_compatible_token_streaming and not_letta_endpoint:
|
784
790
|
result = StreamingResponseWithStatusCode(
|
785
|
-
|
791
|
+
agent_loop.step_stream(
|
786
792
|
input_messages=request.messages,
|
787
793
|
max_steps=10,
|
788
794
|
use_assistant_message=request.use_assistant_message,
|
789
795
|
request_start_timestamp_ns=request_start_timestamp_ns,
|
796
|
+
include_return_message_types=request.include_return_message_types,
|
790
797
|
),
|
791
798
|
media_type="text/event-stream",
|
792
799
|
)
|
793
800
|
else:
|
794
801
|
result = StreamingResponseWithStatusCode(
|
795
|
-
|
802
|
+
agent_loop.step_stream_no_tokens(
|
796
803
|
request.messages,
|
797
804
|
max_steps=10,
|
798
805
|
use_assistant_message=request.use_assistant_message,
|
799
806
|
request_start_timestamp_ns=request_start_timestamp_ns,
|
807
|
+
include_return_message_types=request.include_return_message_types,
|
800
808
|
),
|
801
809
|
media_type="text/event-stream",
|
802
810
|
)
|
@@ -812,6 +820,7 @@ async def send_message_streaming(
|
|
812
820
|
assistant_message_tool_name=request.assistant_message_tool_name,
|
813
821
|
assistant_message_tool_kwarg=request.assistant_message_tool_kwarg,
|
814
822
|
request_start_timestamp_ns=request_start_timestamp_ns,
|
823
|
+
include_return_message_types=request.include_return_message_types,
|
815
824
|
)
|
816
825
|
|
817
826
|
return result
|
@@ -826,6 +835,7 @@ async def process_message_background(
|
|
826
835
|
use_assistant_message: bool,
|
827
836
|
assistant_message_tool_name: str,
|
828
837
|
assistant_message_tool_kwarg: str,
|
838
|
+
include_return_message_types: Optional[List[MessageType]] = None,
|
829
839
|
) -> None:
|
830
840
|
"""Background task to process the message and update job status."""
|
831
841
|
try:
|
@@ -841,6 +851,7 @@ async def process_message_background(
|
|
841
851
|
assistant_message_tool_kwarg=assistant_message_tool_kwarg,
|
842
852
|
metadata={"job_id": job_id}, # Pass job_id through metadata
|
843
853
|
request_start_timestamp_ns=request_start_timestamp_ns,
|
854
|
+
include_return_message_types=include_return_message_types,
|
844
855
|
)
|
845
856
|
|
846
857
|
# Update job status to completed
|
@@ -878,6 +889,7 @@ async def send_message_async(
|
|
878
889
|
Asynchronously process a user message and return a run object.
|
879
890
|
The actual processing happens in the background, and the status can be checked using the run ID.
|
880
891
|
"""
|
892
|
+
MetricRegistry().user_message_counter.add(1, get_ctx_attributes())
|
881
893
|
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
|
882
894
|
|
883
895
|
# Create a new job
|
@@ -907,6 +919,7 @@ async def send_message_async(
|
|
907
919
|
use_assistant_message=request.use_assistant_message,
|
908
920
|
assistant_message_tool_name=request.assistant_message_tool_name,
|
909
921
|
assistant_message_tool_kwarg=request.assistant_message_tool_kwarg,
|
922
|
+
include_return_message_types=request.include_return_message_types,
|
910
923
|
)
|
911
924
|
|
912
925
|
return run
|
@@ -953,17 +966,13 @@ async def summarize_agent_conversation(
|
|
953
966
|
This endpoint summarizes the current message history for a given agent,
|
954
967
|
truncating and compressing it down to the specified `max_message_length`.
|
955
968
|
"""
|
956
|
-
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
|
957
969
|
|
958
|
-
|
959
|
-
# TODO: This is redundant, remove soon
|
970
|
+
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
|
960
971
|
agent = await server.agent_manager.get_agent_by_id_async(agent_id, actor, include_relationships=["multi_agent_group"])
|
961
972
|
agent_eligible = agent.enable_sleeptime or agent.agent_type == AgentType.sleeptime_agent or not agent.multi_agent_group
|
962
|
-
experimental_header = request_obj.headers.get("X-EXPERIMENTAL") or "false"
|
963
|
-
feature_enabled = settings.use_experimental or experimental_header.lower() == "true"
|
964
973
|
model_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "together", "google_ai", "google_vertex"]
|
965
974
|
|
966
|
-
if agent_eligible and
|
975
|
+
if agent_eligible and model_compatible:
|
967
976
|
agent = LettaAgent(
|
968
977
|
agent_id=agent_id,
|
969
978
|
message_manager=server.message_manager,
|
@@ -86,7 +86,7 @@ def create_group(
|
|
86
86
|
|
87
87
|
|
88
88
|
@router.patch("/{group_id}", response_model=Group, operation_id="modify_group")
|
89
|
-
def modify_group(
|
89
|
+
async def modify_group(
|
90
90
|
group_id: str,
|
91
91
|
group: GroupUpdate = Body(...),
|
92
92
|
server: "SyncServer" = Depends(get_letta_server),
|
@@ -97,8 +97,8 @@ def modify_group(
|
|
97
97
|
Create a new multi-agent group with the specified configuration.
|
98
98
|
"""
|
99
99
|
try:
|
100
|
-
actor = server.user_manager.
|
101
|
-
return server.group_manager.
|
100
|
+
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
|
101
|
+
return await server.group_manager.modify_group_async(group_id=group_id, group_update=group, actor=actor)
|
102
102
|
except Exception as e:
|
103
103
|
raise HTTPException(status_code=500, detail=str(e))
|
104
104
|
|
@@ -27,6 +27,11 @@ from letta.utils import safe_create_task, sanitize_filename
|
|
27
27
|
|
28
28
|
logger = get_logger(__name__)
|
29
29
|
|
30
|
+
mimetypes.add_type("text/markdown", ".md")
|
31
|
+
mimetypes.add_type("text/markdown", ".markdown")
|
32
|
+
mimetypes.add_type("application/jsonl", ".jsonl")
|
33
|
+
mimetypes.add_type("application/x-jsonlines", ".jsonl")
|
34
|
+
|
30
35
|
|
31
36
|
router = APIRouter(prefix="/sources", tags=["sources"])
|
32
37
|
|
@@ -174,7 +179,15 @@ async def upload_file_to_source(
|
|
174
179
|
"""
|
175
180
|
Upload a file to a data source.
|
176
181
|
"""
|
177
|
-
allowed_media_types = {
|
182
|
+
allowed_media_types = {
|
183
|
+
"application/pdf",
|
184
|
+
"text/plain",
|
185
|
+
"text/markdown",
|
186
|
+
"text/x-markdown",
|
187
|
+
"application/json",
|
188
|
+
"application/jsonl",
|
189
|
+
"application/x-jsonlines",
|
190
|
+
}
|
178
191
|
|
179
192
|
# Normalize incoming Content-Type header (strip charset or any parameters).
|
180
193
|
raw_ct = file.content_type or ""
|
@@ -192,6 +205,9 @@ async def upload_file_to_source(
|
|
192
205
|
".pdf": "application/pdf",
|
193
206
|
".txt": "text/plain",
|
194
207
|
".json": "application/json",
|
208
|
+
".md": "text/markdown",
|
209
|
+
".markdown": "text/markdown",
|
210
|
+
".jsonl": "application/jsonl",
|
195
211
|
}
|
196
212
|
media_type = ext_map.get(ext, media_type)
|
197
213
|
|
@@ -270,14 +286,21 @@ async def list_source_files(
|
|
270
286
|
source_id: str,
|
271
287
|
limit: int = Query(1000, description="Number of files to return"),
|
272
288
|
after: Optional[str] = Query(None, description="Pagination cursor to fetch the next set of results"),
|
289
|
+
include_content: bool = Query(False, description="Whether to include full file content"),
|
273
290
|
server: "SyncServer" = Depends(get_letta_server),
|
274
|
-
actor_id: Optional[str] = Header(None, alias="user_id"),
|
291
|
+
actor_id: Optional[str] = Header(None, alias="user_id"),
|
275
292
|
):
|
276
293
|
"""
|
277
294
|
List paginated files associated with a data source.
|
278
295
|
"""
|
279
296
|
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
|
280
|
-
return await server.source_manager.list_files(
|
297
|
+
return await server.source_manager.list_files(
|
298
|
+
source_id=source_id,
|
299
|
+
limit=limit,
|
300
|
+
after=after,
|
301
|
+
actor=actor,
|
302
|
+
include_content=include_content,
|
303
|
+
)
|
281
304
|
|
282
305
|
|
283
306
|
# it's redundant to include /delete in the URL path. The HTTP verb DELETE already implies that action.
|
@@ -501,7 +501,8 @@ async def add_mcp_server_to_config(
|
|
501
501
|
if isinstance(request, StdioServerConfig):
|
502
502
|
mapped_request = MCPServer(server_name=request.server_name, server_type=request.type, stdio_config=request)
|
503
503
|
# don't allow stdio servers
|
504
|
-
|
504
|
+
if tool_settings.mcp_disable_stdio: # protected server
|
505
|
+
raise HTTPException(status_code=400, detail="StdioServerConfig is not supported")
|
505
506
|
elif isinstance(request, SSEServerConfig):
|
506
507
|
mapped_request = MCPServer(server_name=request.server_name, server_type=request.type, server_url=request.server_url)
|
507
508
|
# TODO: add HTTP streaming
|
@@ -530,4 +531,8 @@ async def delete_mcp_server_from_config(
|
|
530
531
|
# log to DB
|
531
532
|
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
|
532
533
|
mcp_server_id = await server.mcp_manager.get_mcp_server_id_by_name(mcp_server_name, actor)
|
533
|
-
|
534
|
+
await server.mcp_manager.delete_mcp_server_by_id(mcp_server_id, actor=actor)
|
535
|
+
|
536
|
+
# TODO: don't do this in the future (just return MCPServer)
|
537
|
+
all_servers = await server.mcp_manager.list_mcp_servers(actor=actor)
|
538
|
+
return [server.to_config() for server in all_servers]
|
letta/server/rest_api/utils.py
CHANGED
@@ -15,9 +15,12 @@ from pydantic import BaseModel
|
|
15
15
|
|
16
16
|
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG, FUNC_FAILED_HEARTBEAT_MESSAGE, REQ_HEARTBEAT_MESSAGE
|
17
17
|
from letta.errors import ContextWindowExceededError, RateLimitExceededError
|
18
|
-
from letta.helpers.datetime_helpers import get_utc_time, get_utc_timestamp_ns
|
18
|
+
from letta.helpers.datetime_helpers import get_utc_time, get_utc_timestamp_ns, ns_to_ms
|
19
19
|
from letta.helpers.message_helper import convert_message_creates_to_messages
|
20
20
|
from letta.log import get_logger
|
21
|
+
from letta.otel.context import get_ctx_attributes
|
22
|
+
from letta.otel.metric_registry import MetricRegistry
|
23
|
+
from letta.otel.tracing import tracer
|
21
24
|
from letta.schemas.enums import MessageRole
|
22
25
|
from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
|
23
26
|
from letta.schemas.llm_config import LLMConfig
|
@@ -27,7 +30,6 @@ from letta.schemas.usage import LettaUsageStatistics
|
|
27
30
|
from letta.schemas.user import User
|
28
31
|
from letta.server.rest_api.interface import StreamingServerInterface
|
29
32
|
from letta.system import get_heartbeat, package_function_response
|
30
|
-
from letta.tracing import tracer
|
31
33
|
|
32
34
|
if TYPE_CHECKING:
|
33
35
|
from letta.server.server import SyncServer
|
@@ -81,8 +83,12 @@ async def sse_async_generator(
|
|
81
83
|
if first_chunk and ttft_span is not None:
|
82
84
|
now = get_utc_timestamp_ns()
|
83
85
|
ttft_ns = now - request_start_timestamp_ns
|
84
|
-
ttft_span.add_event(name="time_to_first_token_ms", attributes={"ttft_ms": ttft_ns
|
86
|
+
ttft_span.add_event(name="time_to_first_token_ms", attributes={"ttft_ms": ns_to_ms(ttft_ns)})
|
85
87
|
ttft_span.end()
|
88
|
+
metric_attributes = get_ctx_attributes()
|
89
|
+
if llm_config:
|
90
|
+
metric_attributes["model.name"] = llm_config.model
|
91
|
+
MetricRegistry().ttft_ms_histogram.record(ns_to_ms(ttft_ns), metric_attributes)
|
86
92
|
first_chunk = False
|
87
93
|
|
88
94
|
# yield f"data: {json.dumps(chunk)}\n\n"
|
@@ -190,7 +196,6 @@ def create_letta_messages_from_llm_response(
|
|
190
196
|
add_heartbeat_request_system_message: bool = False,
|
191
197
|
reasoning_content: Optional[List[Union[TextContent, ReasoningContent, RedactedReasoningContent, OmittedReasoningContent]]] = None,
|
192
198
|
pre_computed_assistant_message_id: Optional[str] = None,
|
193
|
-
pre_computed_tool_message_id: Optional[str] = None,
|
194
199
|
llm_batch_item_id: Optional[str] = None,
|
195
200
|
step_id: str | None = None,
|
196
201
|
) -> List[Message]:
|
@@ -245,8 +250,6 @@ def create_letta_messages_from_llm_response(
|
|
245
250
|
)
|
246
251
|
],
|
247
252
|
)
|
248
|
-
if pre_computed_tool_message_id:
|
249
|
-
tool_message.id = pre_computed_tool_message_id
|
250
253
|
messages.append(tool_message)
|
251
254
|
|
252
255
|
if add_heartbeat_request_system_message:
|
letta/server/server.py
CHANGED
@@ -21,7 +21,7 @@ import letta.system as system
|
|
21
21
|
from letta.agent import Agent, save_agent
|
22
22
|
from letta.agents.letta_agent import LettaAgent
|
23
23
|
from letta.config import LettaConfig
|
24
|
-
from letta.constants import
|
24
|
+
from letta.constants import LETTA_TOOL_EXECUTION_DIR
|
25
25
|
from letta.data_sources.connectors import DataConnector, load_data
|
26
26
|
from letta.errors import HandleNotFoundError
|
27
27
|
from letta.functions.mcp_client.types import MCPServerType, MCPTool, SSEServerConfig, StdioServerConfig
|
@@ -34,6 +34,7 @@ from letta.interface import AgentInterface # abstract
|
|
34
34
|
from letta.interface import CLIInterface # for printing to terminal
|
35
35
|
from letta.log import get_logger
|
36
36
|
from letta.orm.errors import NoResultFound
|
37
|
+
from letta.otel.tracing import log_event, trace_method
|
37
38
|
from letta.prompts.gpt_system import get_system_text
|
38
39
|
from letta.schemas.agent import AgentState, AgentType, CreateAgent, UpdateAgent
|
39
40
|
from letta.schemas.block import Block, BlockUpdate, CreateBlock
|
@@ -44,7 +45,7 @@ from letta.schemas.enums import JobStatus, MessageStreamStatus, ProviderCategory
|
|
44
45
|
from letta.schemas.environment_variables import SandboxEnvironmentVariableCreate
|
45
46
|
from letta.schemas.group import GroupCreate, ManagerType, SleeptimeManager, VoiceSleeptimeManager
|
46
47
|
from letta.schemas.job import Job, JobUpdate
|
47
|
-
from letta.schemas.letta_message import LegacyLettaMessage, LettaMessage, ToolReturnMessage
|
48
|
+
from letta.schemas.letta_message import LegacyLettaMessage, LettaMessage, MessageType, ToolReturnMessage
|
48
49
|
from letta.schemas.letta_message_content import TextContent
|
49
50
|
from letta.schemas.letta_response import LettaResponse
|
50
51
|
from letta.schemas.llm_config import LLMConfig
|
@@ -101,7 +102,6 @@ from letta.services.tool_executor.tool_execution_manager import ToolExecutionMan
|
|
101
102
|
from letta.services.tool_manager import ToolManager
|
102
103
|
from letta.services.user_manager import UserManager
|
103
104
|
from letta.settings import model_settings, settings, tool_settings
|
104
|
-
from letta.tracing import log_event, trace_method
|
105
105
|
from letta.utils import get_friendly_error_msg, get_persona_text, make_key
|
106
106
|
|
107
107
|
config = LettaConfig.load()
|
@@ -1108,13 +1108,11 @@ class SyncServer(Server):
|
|
1108
1108
|
after: Optional[str] = None,
|
1109
1109
|
before: Optional[str] = None,
|
1110
1110
|
limit: Optional[int] = 100,
|
1111
|
-
order_by: Optional[str] = "created_at",
|
1112
|
-
reverse: Optional[bool] = False,
|
1113
1111
|
query_text: Optional[str] = None,
|
1114
1112
|
ascending: Optional[bool] = True,
|
1115
1113
|
) -> List[Passage]:
|
1116
1114
|
# iterate over records
|
1117
|
-
records = await self.agent_manager.
|
1115
|
+
records = await self.agent_manager.list_agent_passages_async(
|
1118
1116
|
actor=actor,
|
1119
1117
|
agent_id=agent_id,
|
1120
1118
|
after=after,
|
@@ -1368,12 +1366,13 @@ class SyncServer(Server):
|
|
1368
1366
|
)
|
1369
1367
|
await self.agent_manager.delete_agent_async(agent_id=sleeptime_agent_state.id, actor=actor)
|
1370
1368
|
|
1371
|
-
async def _upsert_file_to_agent(self, agent_id: str, text: str, file_id: str, actor: User) -> None:
|
1369
|
+
async def _upsert_file_to_agent(self, agent_id: str, text: str, file_id: str, file_name: str, actor: User) -> None:
|
1372
1370
|
"""
|
1373
1371
|
Internal method to create or update a file <-> agent association
|
1374
1372
|
"""
|
1375
|
-
|
1376
|
-
|
1373
|
+
await self.file_agent_manager.attach_file(
|
1374
|
+
agent_id=agent_id, file_id=file_id, file_name=file_name, actor=actor, visible_content=text
|
1375
|
+
)
|
1377
1376
|
|
1378
1377
|
async def _remove_file_from_agent(self, agent_id: str, file_id: str, actor: User) -> None:
|
1379
1378
|
"""
|
@@ -1389,7 +1388,7 @@ class SyncServer(Server):
|
|
1389
1388
|
logger.info(f"File {file_id} already removed from agent {agent_id}, skipping...")
|
1390
1389
|
|
1391
1390
|
async def insert_file_into_context_windows(
|
1392
|
-
self, source_id: str, text: str, file_id: str, actor: User, agent_states: Optional[List[AgentState]] = None
|
1391
|
+
self, source_id: str, text: str, file_id: str, file_name: str, actor: User, agent_states: Optional[List[AgentState]] = None
|
1393
1392
|
) -> List[AgentState]:
|
1394
1393
|
"""
|
1395
1394
|
Insert the uploaded document into the context window of all agents
|
@@ -1404,11 +1403,13 @@ class SyncServer(Server):
|
|
1404
1403
|
logger.info(f"Inserting document into context window for source: {source_id}")
|
1405
1404
|
logger.info(f"Attached agents: {[a.id for a in agent_states]}")
|
1406
1405
|
|
1407
|
-
await asyncio.gather(*(self._upsert_file_to_agent(agent_state.id, text, file_id, actor) for agent_state in agent_states))
|
1406
|
+
await asyncio.gather(*(self._upsert_file_to_agent(agent_state.id, text, file_id, file_name, actor) for agent_state in agent_states))
|
1408
1407
|
|
1409
1408
|
return agent_states
|
1410
1409
|
|
1411
|
-
async def insert_files_into_context_window(
|
1410
|
+
async def insert_files_into_context_window(
|
1411
|
+
self, agent_state: AgentState, texts: List[str], file_ids: List[str], file_names: List[str], actor: User
|
1412
|
+
) -> None:
|
1412
1413
|
"""
|
1413
1414
|
Insert the uploaded documents into the context window of an agent
|
1414
1415
|
attached to the given source.
|
@@ -1418,7 +1419,12 @@ class SyncServer(Server):
|
|
1418
1419
|
if len(texts) != len(file_ids):
|
1419
1420
|
raise ValueError(f"Mismatch between number of texts ({len(texts)}) and file ids ({len(file_ids)})")
|
1420
1421
|
|
1421
|
-
await asyncio.gather(
|
1422
|
+
await asyncio.gather(
|
1423
|
+
*(
|
1424
|
+
self._upsert_file_to_agent(agent_state.id, text, file_id, file_name, actor)
|
1425
|
+
for text, file_id, file_name in zip(texts, file_ids, file_names)
|
1426
|
+
)
|
1427
|
+
)
|
1422
1428
|
|
1423
1429
|
async def remove_file_from_context_windows(self, source_id: str, file_id: str, actor: User) -> None:
|
1424
1430
|
"""
|
@@ -2231,6 +2237,7 @@ class SyncServer(Server):
|
|
2231
2237
|
assistant_message_tool_kwarg: str = constants.DEFAULT_MESSAGE_TOOL_KWARG,
|
2232
2238
|
metadata: Optional[dict] = None,
|
2233
2239
|
request_start_timestamp_ns: Optional[int] = None,
|
2240
|
+
include_return_message_types: Optional[List[MessageType]] = None,
|
2234
2241
|
) -> Union[StreamingResponse, LettaResponse]:
|
2235
2242
|
"""Split off into a separate function so that it can be imported in the /chat/completion proxy."""
|
2236
2243
|
# TODO: @charles is this the correct way to handle?
|
@@ -2336,6 +2343,11 @@ class SyncServer(Server):
|
|
2336
2343
|
|
2337
2344
|
# Get rid of the stream status messages
|
2338
2345
|
filtered_stream = [d for d in generated_stream if not isinstance(d, MessageStreamStatus)]
|
2346
|
+
|
2347
|
+
# Apply message type filtering if specified
|
2348
|
+
if include_return_message_types is not None:
|
2349
|
+
filtered_stream = [msg for msg in filtered_stream if msg.message_type in include_return_message_types]
|
2350
|
+
|
2339
2351
|
usage = await task
|
2340
2352
|
|
2341
2353
|
# By default the stream will be messages of type LettaMessage or LettaLegacyMessage
|