letta-nightly 0.8.0.dev20250606195656__py3-none-any.whl → 0.8.2.dev20250606215616__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agent.py +1 -1
- letta/agents/letta_agent.py +49 -29
- letta/agents/letta_agent_batch.py +1 -2
- letta/agents/voice_agent.py +19 -13
- letta/agents/voice_sleeptime_agent.py +11 -3
- letta/constants.py +18 -0
- letta/data_sources/__init__.py +0 -0
- letta/data_sources/redis_client.py +282 -0
- letta/errors.py +0 -4
- letta/functions/function_sets/files.py +58 -0
- letta/functions/schema_generator.py +18 -1
- letta/groups/sleeptime_multi_agent_v2.py +1 -1
- letta/helpers/datetime_helpers.py +47 -3
- letta/helpers/decorators.py +69 -0
- letta/{services/helpers/noop_helper.py → helpers/singleton.py} +5 -0
- letta/interfaces/anthropic_streaming_interface.py +43 -24
- letta/interfaces/openai_streaming_interface.py +21 -19
- letta/llm_api/anthropic.py +1 -1
- letta/llm_api/anthropic_client.py +22 -14
- letta/llm_api/google_vertex_client.py +1 -1
- letta/llm_api/helpers.py +36 -30
- letta/llm_api/llm_api_tools.py +1 -1
- letta/llm_api/llm_client_base.py +29 -1
- letta/llm_api/openai.py +1 -1
- letta/llm_api/openai_client.py +6 -8
- letta/local_llm/chat_completion_proxy.py +1 -1
- letta/memory.py +1 -1
- letta/orm/enums.py +1 -0
- letta/orm/file.py +80 -3
- letta/orm/files_agents.py +13 -0
- letta/orm/sqlalchemy_base.py +34 -11
- letta/otel/__init__.py +0 -0
- letta/otel/context.py +25 -0
- letta/otel/events.py +0 -0
- letta/otel/metric_registry.py +122 -0
- letta/otel/metrics.py +66 -0
- letta/otel/resource.py +26 -0
- letta/{tracing.py → otel/tracing.py} +55 -78
- letta/plugins/README.md +22 -0
- letta/plugins/__init__.py +0 -0
- letta/plugins/defaults.py +11 -0
- letta/plugins/plugins.py +72 -0
- letta/schemas/enums.py +8 -0
- letta/schemas/file.py +12 -0
- letta/schemas/tool.py +4 -0
- letta/server/db.py +7 -7
- letta/server/rest_api/app.py +8 -6
- letta/server/rest_api/routers/v1/agents.py +37 -36
- letta/server/rest_api/routers/v1/groups.py +3 -3
- letta/server/rest_api/routers/v1/sources.py +26 -3
- letta/server/rest_api/utils.py +9 -6
- letta/server/server.py +18 -12
- letta/services/agent_manager.py +185 -193
- letta/services/block_manager.py +1 -1
- letta/services/context_window_calculator/token_counter.py +3 -2
- letta/services/file_processor/chunker/line_chunker.py +34 -0
- letta/services/file_processor/file_processor.py +40 -11
- letta/services/file_processor/parser/mistral_parser.py +11 -1
- letta/services/files_agents_manager.py +96 -7
- letta/services/group_manager.py +6 -6
- letta/services/helpers/agent_manager_helper.py +373 -3
- letta/services/identity_manager.py +1 -1
- letta/services/job_manager.py +1 -1
- letta/services/llm_batch_manager.py +1 -1
- letta/services/message_manager.py +1 -1
- letta/services/organization_manager.py +1 -1
- letta/services/passage_manager.py +1 -1
- letta/services/per_agent_lock_manager.py +1 -1
- letta/services/provider_manager.py +1 -1
- letta/services/sandbox_config_manager.py +1 -1
- letta/services/source_manager.py +178 -19
- letta/services/step_manager.py +2 -2
- letta/services/summarizer/summarizer.py +1 -1
- letta/services/telemetry_manager.py +1 -1
- letta/services/tool_executor/builtin_tool_executor.py +117 -0
- letta/services/tool_executor/composio_tool_executor.py +53 -0
- letta/services/tool_executor/core_tool_executor.py +474 -0
- letta/services/tool_executor/files_tool_executor.py +131 -0
- letta/services/tool_executor/mcp_tool_executor.py +45 -0
- letta/services/tool_executor/multi_agent_tool_executor.py +123 -0
- letta/services/tool_executor/tool_execution_manager.py +34 -14
- letta/services/tool_executor/tool_execution_sandbox.py +1 -1
- letta/services/tool_executor/tool_executor.py +3 -802
- letta/services/tool_executor/tool_executor_base.py +43 -0
- letta/services/tool_manager.py +55 -59
- letta/services/tool_sandbox/e2b_sandbox.py +1 -1
- letta/services/tool_sandbox/local_sandbox.py +6 -3
- letta/services/user_manager.py +6 -3
- letta/settings.py +21 -1
- letta/utils.py +7 -2
- {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.2.dev20250606215616.dist-info}/METADATA +4 -2
- {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.2.dev20250606215616.dist-info}/RECORD +96 -74
- {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.2.dev20250606215616.dist-info}/LICENSE +0 -0
- {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.2.dev20250606215616.dist-info}/WHEEL +0 -0
- {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.2.dev20250606215616.dist-info}/entry_points.txt +0 -0
@@ -12,11 +12,13 @@ from sqlalchemy.exc import IntegrityError, OperationalError
|
|
12
12
|
from starlette.responses import Response, StreamingResponse
|
13
13
|
|
14
14
|
from letta.agents.letta_agent import LettaAgent
|
15
|
-
from letta.constants import
|
15
|
+
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
|
16
16
|
from letta.groups.sleeptime_multi_agent_v2 import SleeptimeMultiAgentV2
|
17
17
|
from letta.helpers.datetime_helpers import get_utc_timestamp_ns
|
18
18
|
from letta.log import get_logger
|
19
19
|
from letta.orm.errors import NoResultFound
|
20
|
+
from letta.otel.context import get_ctx_attributes
|
21
|
+
from letta.otel.metric_registry import MetricRegistry
|
20
22
|
from letta.schemas.agent import AgentState, AgentType, CreateAgent, UpdateAgent
|
21
23
|
from letta.schemas.block import Block, BlockUpdate
|
22
24
|
from letta.schemas.group import Group
|
@@ -149,7 +151,7 @@ def export_agent_serialized(
|
|
149
151
|
|
150
152
|
|
151
153
|
@router.post("/import", response_model=AgentState, operation_id="import_agent_serialized")
|
152
|
-
|
154
|
+
def import_agent_serialized(
|
153
155
|
file: UploadFile = File(...),
|
154
156
|
server: "SyncServer" = Depends(get_letta_server),
|
155
157
|
actor_id: Optional[str] = Header(None, alias="user_id"),
|
@@ -167,10 +169,10 @@ async def import_agent_serialized(
|
|
167
169
|
"""
|
168
170
|
Import a serialized agent file and recreate the agent in the system.
|
169
171
|
"""
|
170
|
-
actor =
|
172
|
+
actor = server.user_manager.get_user_or_default(user_id=actor_id)
|
171
173
|
|
172
174
|
try:
|
173
|
-
serialized_data =
|
175
|
+
serialized_data = file.file.read()
|
174
176
|
agent_json = json.loads(serialized_data)
|
175
177
|
|
176
178
|
# Validate the JSON against AgentSchema before passing it to deserialize
|
@@ -311,20 +313,21 @@ async def attach_source(
|
|
311
313
|
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
|
312
314
|
agent_state = await server.agent_manager.attach_source_async(agent_id=agent_id, source_id=source_id, actor=actor)
|
313
315
|
|
314
|
-
|
316
|
+
# Check if the agent is missing any files tools
|
317
|
+
agent_state = await server.agent_manager.attach_missing_files_tools_async(agent_state=agent_state, actor=actor)
|
318
|
+
|
319
|
+
files = await server.source_manager.list_files(source_id, actor, include_content=True)
|
315
320
|
texts = []
|
316
321
|
file_ids = []
|
322
|
+
file_names = []
|
317
323
|
for f in files:
|
318
|
-
|
319
|
-
passage_text = ""
|
320
|
-
for p in passages:
|
321
|
-
if len(passage_text) <= CORE_MEMORY_SOURCE_CHAR_LIMIT:
|
322
|
-
passage_text += p.text
|
323
|
-
|
324
|
-
texts.append(passage_text)
|
324
|
+
texts.append(f.content if f.content else "")
|
325
325
|
file_ids.append(f.id)
|
326
|
+
file_names.append(f.file_name)
|
326
327
|
|
327
|
-
await server.insert_files_into_context_window(
|
328
|
+
await server.insert_files_into_context_window(
|
329
|
+
agent_state=agent_state, texts=texts, file_ids=file_ids, file_names=file_names, actor=actor
|
330
|
+
)
|
328
331
|
|
329
332
|
if agent_state.enable_sleeptime:
|
330
333
|
source = await server.source_manager.get_source_by_id(source_id=source_id)
|
@@ -347,6 +350,10 @@ async def detach_source(
|
|
347
350
|
"""
|
348
351
|
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
|
349
352
|
agent_state = await server.agent_manager.detach_source_async(agent_id=agent_id, source_id=source_id, actor=actor)
|
353
|
+
|
354
|
+
if not agent_state.sources:
|
355
|
+
agent_state = await server.agent_manager.detach_all_files_tools_async(agent_state=agent_state, actor=actor)
|
356
|
+
|
350
357
|
files = await server.source_manager.list_files(source_id, actor)
|
351
358
|
file_ids = [f.id for f in files]
|
352
359
|
await server.remove_files_from_context_window(agent_state=agent_state, file_ids=file_ids, actor=actor)
|
@@ -451,7 +458,7 @@ async def list_blocks(
|
|
451
458
|
"""
|
452
459
|
Retrieve the core memory blocks of a specific agent.
|
453
460
|
"""
|
454
|
-
actor = server.user_manager.
|
461
|
+
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
|
455
462
|
try:
|
456
463
|
agent = await server.agent_manager.get_agent_by_id_async(agent_id=agent_id, include_relationships=["memory"], actor=actor)
|
457
464
|
return agent.memory.blocks
|
@@ -658,19 +665,18 @@ async def send_message(
|
|
658
665
|
Process a user message and return the agent's response.
|
659
666
|
This endpoint accepts a message from a user and processes it through the agent.
|
660
667
|
"""
|
668
|
+
MetricRegistry().user_message_counter.add(1, get_ctx_attributes())
|
669
|
+
|
661
670
|
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
|
662
671
|
request_start_timestamp_ns = get_utc_timestamp_ns()
|
663
|
-
user_eligible = True
|
664
672
|
# TODO: This is redundant, remove soon
|
665
673
|
agent = await server.agent_manager.get_agent_by_id_async(agent_id, actor, include_relationships=["multi_agent_group"])
|
666
674
|
agent_eligible = agent.enable_sleeptime or agent.agent_type == AgentType.sleeptime_agent or not agent.multi_agent_group
|
667
|
-
experimental_header = request_obj.headers.get("X-EXPERIMENTAL") or "false"
|
668
|
-
feature_enabled = settings.use_experimental or experimental_header.lower() == "true"
|
669
675
|
model_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "together", "google_ai", "google_vertex"]
|
670
676
|
|
671
|
-
if
|
677
|
+
if agent_eligible and model_compatible:
|
672
678
|
if agent.enable_sleeptime and agent.agent_type != AgentType.voice_convo_agent:
|
673
|
-
|
679
|
+
agent_loop = SleeptimeMultiAgentV2(
|
674
680
|
agent_id=agent_id,
|
675
681
|
message_manager=server.message_manager,
|
676
682
|
agent_manager=server.agent_manager,
|
@@ -682,7 +688,7 @@ async def send_message(
|
|
682
688
|
group=agent.multi_agent_group,
|
683
689
|
)
|
684
690
|
else:
|
685
|
-
|
691
|
+
agent_loop = LettaAgent(
|
686
692
|
agent_id=agent_id,
|
687
693
|
message_manager=server.message_manager,
|
688
694
|
agent_manager=server.agent_manager,
|
@@ -693,7 +699,7 @@ async def send_message(
|
|
693
699
|
telemetry_manager=server.telemetry_manager if settings.llm_api_logging else NoopTelemetryManager(),
|
694
700
|
)
|
695
701
|
|
696
|
-
result = await
|
702
|
+
result = await agent_loop.step(
|
697
703
|
request.messages,
|
698
704
|
max_steps=10,
|
699
705
|
use_assistant_message=request.use_assistant_message,
|
@@ -739,22 +745,20 @@ async def send_message_streaming(
|
|
739
745
|
This endpoint accepts a message from a user and processes it through the agent.
|
740
746
|
It will stream the steps of the response always, and stream the tokens if 'stream_tokens' is set to True.
|
741
747
|
"""
|
742
|
-
|
748
|
+
MetricRegistry().user_message_counter.add(1, get_ctx_attributes())
|
749
|
+
|
743
750
|
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
|
744
|
-
user_eligible = actor.organization_id not in ["org-4a3af5dd-4c6a-48cb-ac13-3f73ecaaa4bf", "org-4ab3f6e8-9a44-4bee-aeb6-c681cbbc7bf6"]
|
745
751
|
# TODO: This is redundant, remove soon
|
746
752
|
agent = await server.agent_manager.get_agent_by_id_async(agent_id, actor, include_relationships=["multi_agent_group"])
|
747
753
|
agent_eligible = agent.enable_sleeptime or agent.agent_type == AgentType.sleeptime_agent or not agent.multi_agent_group
|
748
|
-
experimental_header = request_obj.headers.get("X-EXPERIMENTAL") or "false"
|
749
|
-
feature_enabled = settings.use_experimental or experimental_header.lower() == "true"
|
750
754
|
model_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "together", "google_ai", "google_vertex"]
|
751
755
|
model_compatible_token_streaming = agent.llm_config.model_endpoint_type in ["anthropic", "openai"]
|
752
756
|
not_letta_endpoint = not ("inference.letta.com" in agent.llm_config.model_endpoint)
|
753
757
|
request_start_timestamp_ns = get_utc_timestamp_ns()
|
754
758
|
|
755
|
-
if
|
759
|
+
if agent_eligible and model_compatible:
|
756
760
|
if agent.enable_sleeptime and agent.agent_type != AgentType.voice_convo_agent:
|
757
|
-
|
761
|
+
agent_loop = SleeptimeMultiAgentV2(
|
758
762
|
agent_id=agent_id,
|
759
763
|
message_manager=server.message_manager,
|
760
764
|
agent_manager=server.agent_manager,
|
@@ -768,7 +772,7 @@ async def send_message_streaming(
|
|
768
772
|
group=agent.multi_agent_group,
|
769
773
|
)
|
770
774
|
else:
|
771
|
-
|
775
|
+
agent_loop = LettaAgent(
|
772
776
|
agent_id=agent_id,
|
773
777
|
message_manager=server.message_manager,
|
774
778
|
agent_manager=server.agent_manager,
|
@@ -782,7 +786,7 @@ async def send_message_streaming(
|
|
782
786
|
|
783
787
|
if request.stream_tokens and model_compatible_token_streaming and not_letta_endpoint:
|
784
788
|
result = StreamingResponseWithStatusCode(
|
785
|
-
|
789
|
+
agent_loop.step_stream(
|
786
790
|
input_messages=request.messages,
|
787
791
|
max_steps=10,
|
788
792
|
use_assistant_message=request.use_assistant_message,
|
@@ -792,7 +796,7 @@ async def send_message_streaming(
|
|
792
796
|
)
|
793
797
|
else:
|
794
798
|
result = StreamingResponseWithStatusCode(
|
795
|
-
|
799
|
+
agent_loop.step_stream_no_tokens(
|
796
800
|
request.messages,
|
797
801
|
max_steps=10,
|
798
802
|
use_assistant_message=request.use_assistant_message,
|
@@ -878,6 +882,7 @@ async def send_message_async(
|
|
878
882
|
Asynchronously process a user message and return a run object.
|
879
883
|
The actual processing happens in the background, and the status can be checked using the run ID.
|
880
884
|
"""
|
885
|
+
MetricRegistry().user_message_counter.add(1, get_ctx_attributes())
|
881
886
|
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
|
882
887
|
|
883
888
|
# Create a new job
|
@@ -953,17 +958,13 @@ async def summarize_agent_conversation(
|
|
953
958
|
This endpoint summarizes the current message history for a given agent,
|
954
959
|
truncating and compressing it down to the specified `max_message_length`.
|
955
960
|
"""
|
956
|
-
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
|
957
961
|
|
958
|
-
|
959
|
-
# TODO: This is redundant, remove soon
|
962
|
+
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
|
960
963
|
agent = await server.agent_manager.get_agent_by_id_async(agent_id, actor, include_relationships=["multi_agent_group"])
|
961
964
|
agent_eligible = agent.enable_sleeptime or agent.agent_type == AgentType.sleeptime_agent or not agent.multi_agent_group
|
962
|
-
experimental_header = request_obj.headers.get("X-EXPERIMENTAL") or "false"
|
963
|
-
feature_enabled = settings.use_experimental or experimental_header.lower() == "true"
|
964
965
|
model_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "together", "google_ai", "google_vertex"]
|
965
966
|
|
966
|
-
if agent_eligible and
|
967
|
+
if agent_eligible and model_compatible:
|
967
968
|
agent = LettaAgent(
|
968
969
|
agent_id=agent_id,
|
969
970
|
message_manager=server.message_manager,
|
@@ -86,7 +86,7 @@ def create_group(
|
|
86
86
|
|
87
87
|
|
88
88
|
@router.patch("/{group_id}", response_model=Group, operation_id="modify_group")
|
89
|
-
def modify_group(
|
89
|
+
async def modify_group(
|
90
90
|
group_id: str,
|
91
91
|
group: GroupUpdate = Body(...),
|
92
92
|
server: "SyncServer" = Depends(get_letta_server),
|
@@ -97,8 +97,8 @@ def modify_group(
|
|
97
97
|
Create a new multi-agent group with the specified configuration.
|
98
98
|
"""
|
99
99
|
try:
|
100
|
-
actor = server.user_manager.
|
101
|
-
return server.group_manager.
|
100
|
+
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
|
101
|
+
return await server.group_manager.modify_group_async(group_id=group_id, group_update=group, actor=actor)
|
102
102
|
except Exception as e:
|
103
103
|
raise HTTPException(status_code=500, detail=str(e))
|
104
104
|
|
@@ -27,6 +27,11 @@ from letta.utils import safe_create_task, sanitize_filename
|
|
27
27
|
|
28
28
|
logger = get_logger(__name__)
|
29
29
|
|
30
|
+
mimetypes.add_type("text/markdown", ".md")
|
31
|
+
mimetypes.add_type("text/markdown", ".markdown")
|
32
|
+
mimetypes.add_type("application/jsonl", ".jsonl")
|
33
|
+
mimetypes.add_type("application/x-jsonlines", ".jsonl")
|
34
|
+
|
30
35
|
|
31
36
|
router = APIRouter(prefix="/sources", tags=["sources"])
|
32
37
|
|
@@ -174,7 +179,15 @@ async def upload_file_to_source(
|
|
174
179
|
"""
|
175
180
|
Upload a file to a data source.
|
176
181
|
"""
|
177
|
-
allowed_media_types = {
|
182
|
+
allowed_media_types = {
|
183
|
+
"application/pdf",
|
184
|
+
"text/plain",
|
185
|
+
"text/markdown",
|
186
|
+
"text/x-markdown",
|
187
|
+
"application/json",
|
188
|
+
"application/jsonl",
|
189
|
+
"application/x-jsonlines",
|
190
|
+
}
|
178
191
|
|
179
192
|
# Normalize incoming Content-Type header (strip charset or any parameters).
|
180
193
|
raw_ct = file.content_type or ""
|
@@ -192,6 +205,9 @@ async def upload_file_to_source(
|
|
192
205
|
".pdf": "application/pdf",
|
193
206
|
".txt": "text/plain",
|
194
207
|
".json": "application/json",
|
208
|
+
".md": "text/markdown",
|
209
|
+
".markdown": "text/markdown",
|
210
|
+
".jsonl": "application/jsonl",
|
195
211
|
}
|
196
212
|
media_type = ext_map.get(ext, media_type)
|
197
213
|
|
@@ -270,14 +286,21 @@ async def list_source_files(
|
|
270
286
|
source_id: str,
|
271
287
|
limit: int = Query(1000, description="Number of files to return"),
|
272
288
|
after: Optional[str] = Query(None, description="Pagination cursor to fetch the next set of results"),
|
289
|
+
include_content: bool = Query(False, description="Whether to include full file content"),
|
273
290
|
server: "SyncServer" = Depends(get_letta_server),
|
274
|
-
actor_id: Optional[str] = Header(None, alias="user_id"),
|
291
|
+
actor_id: Optional[str] = Header(None, alias="user_id"),
|
275
292
|
):
|
276
293
|
"""
|
277
294
|
List paginated files associated with a data source.
|
278
295
|
"""
|
279
296
|
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
|
280
|
-
return await server.source_manager.list_files(
|
297
|
+
return await server.source_manager.list_files(
|
298
|
+
source_id=source_id,
|
299
|
+
limit=limit,
|
300
|
+
after=after,
|
301
|
+
actor=actor,
|
302
|
+
include_content=include_content,
|
303
|
+
)
|
281
304
|
|
282
305
|
|
283
306
|
# it's redundant to include /delete in the URL path. The HTTP verb DELETE already implies that action.
|
letta/server/rest_api/utils.py
CHANGED
@@ -15,9 +15,12 @@ from pydantic import BaseModel
|
|
15
15
|
|
16
16
|
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG, FUNC_FAILED_HEARTBEAT_MESSAGE, REQ_HEARTBEAT_MESSAGE
|
17
17
|
from letta.errors import ContextWindowExceededError, RateLimitExceededError
|
18
|
-
from letta.helpers.datetime_helpers import get_utc_time, get_utc_timestamp_ns
|
18
|
+
from letta.helpers.datetime_helpers import get_utc_time, get_utc_timestamp_ns, ns_to_ms
|
19
19
|
from letta.helpers.message_helper import convert_message_creates_to_messages
|
20
20
|
from letta.log import get_logger
|
21
|
+
from letta.otel.context import get_ctx_attributes
|
22
|
+
from letta.otel.metric_registry import MetricRegistry
|
23
|
+
from letta.otel.tracing import tracer
|
21
24
|
from letta.schemas.enums import MessageRole
|
22
25
|
from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
|
23
26
|
from letta.schemas.llm_config import LLMConfig
|
@@ -27,7 +30,6 @@ from letta.schemas.usage import LettaUsageStatistics
|
|
27
30
|
from letta.schemas.user import User
|
28
31
|
from letta.server.rest_api.interface import StreamingServerInterface
|
29
32
|
from letta.system import get_heartbeat, package_function_response
|
30
|
-
from letta.tracing import tracer
|
31
33
|
|
32
34
|
if TYPE_CHECKING:
|
33
35
|
from letta.server.server import SyncServer
|
@@ -81,8 +83,12 @@ async def sse_async_generator(
|
|
81
83
|
if first_chunk and ttft_span is not None:
|
82
84
|
now = get_utc_timestamp_ns()
|
83
85
|
ttft_ns = now - request_start_timestamp_ns
|
84
|
-
ttft_span.add_event(name="time_to_first_token_ms", attributes={"ttft_ms": ttft_ns
|
86
|
+
ttft_span.add_event(name="time_to_first_token_ms", attributes={"ttft_ms": ns_to_ms(ttft_ns)})
|
85
87
|
ttft_span.end()
|
88
|
+
metric_attributes = get_ctx_attributes()
|
89
|
+
if llm_config:
|
90
|
+
metric_attributes["model.name"] = llm_config.model
|
91
|
+
MetricRegistry().ttft_ms_histogram.record(ns_to_ms(ttft_ns), metric_attributes)
|
86
92
|
first_chunk = False
|
87
93
|
|
88
94
|
# yield f"data: {json.dumps(chunk)}\n\n"
|
@@ -190,7 +196,6 @@ def create_letta_messages_from_llm_response(
|
|
190
196
|
add_heartbeat_request_system_message: bool = False,
|
191
197
|
reasoning_content: Optional[List[Union[TextContent, ReasoningContent, RedactedReasoningContent, OmittedReasoningContent]]] = None,
|
192
198
|
pre_computed_assistant_message_id: Optional[str] = None,
|
193
|
-
pre_computed_tool_message_id: Optional[str] = None,
|
194
199
|
llm_batch_item_id: Optional[str] = None,
|
195
200
|
step_id: str | None = None,
|
196
201
|
) -> List[Message]:
|
@@ -245,8 +250,6 @@ def create_letta_messages_from_llm_response(
|
|
245
250
|
)
|
246
251
|
],
|
247
252
|
)
|
248
|
-
if pre_computed_tool_message_id:
|
249
|
-
tool_message.id = pre_computed_tool_message_id
|
250
253
|
messages.append(tool_message)
|
251
254
|
|
252
255
|
if add_heartbeat_request_system_message:
|
letta/server/server.py
CHANGED
@@ -21,7 +21,7 @@ import letta.system as system
|
|
21
21
|
from letta.agent import Agent, save_agent
|
22
22
|
from letta.agents.letta_agent import LettaAgent
|
23
23
|
from letta.config import LettaConfig
|
24
|
-
from letta.constants import
|
24
|
+
from letta.constants import LETTA_TOOL_EXECUTION_DIR
|
25
25
|
from letta.data_sources.connectors import DataConnector, load_data
|
26
26
|
from letta.errors import HandleNotFoundError
|
27
27
|
from letta.functions.mcp_client.types import MCPServerType, MCPTool, SSEServerConfig, StdioServerConfig
|
@@ -34,6 +34,7 @@ from letta.interface import AgentInterface # abstract
|
|
34
34
|
from letta.interface import CLIInterface # for printing to terminal
|
35
35
|
from letta.log import get_logger
|
36
36
|
from letta.orm.errors import NoResultFound
|
37
|
+
from letta.otel.tracing import log_event, trace_method
|
37
38
|
from letta.prompts.gpt_system import get_system_text
|
38
39
|
from letta.schemas.agent import AgentState, AgentType, CreateAgent, UpdateAgent
|
39
40
|
from letta.schemas.block import Block, BlockUpdate, CreateBlock
|
@@ -101,7 +102,6 @@ from letta.services.tool_executor.tool_execution_manager import ToolExecutionMan
|
|
101
102
|
from letta.services.tool_manager import ToolManager
|
102
103
|
from letta.services.user_manager import UserManager
|
103
104
|
from letta.settings import model_settings, settings, tool_settings
|
104
|
-
from letta.tracing import log_event, trace_method
|
105
105
|
from letta.utils import get_friendly_error_msg, get_persona_text, make_key
|
106
106
|
|
107
107
|
config = LettaConfig.load()
|
@@ -1108,13 +1108,11 @@ class SyncServer(Server):
|
|
1108
1108
|
after: Optional[str] = None,
|
1109
1109
|
before: Optional[str] = None,
|
1110
1110
|
limit: Optional[int] = 100,
|
1111
|
-
order_by: Optional[str] = "created_at",
|
1112
|
-
reverse: Optional[bool] = False,
|
1113
1111
|
query_text: Optional[str] = None,
|
1114
1112
|
ascending: Optional[bool] = True,
|
1115
1113
|
) -> List[Passage]:
|
1116
1114
|
# iterate over records
|
1117
|
-
records = await self.agent_manager.
|
1115
|
+
records = await self.agent_manager.list_agent_passages_async(
|
1118
1116
|
actor=actor,
|
1119
1117
|
agent_id=agent_id,
|
1120
1118
|
after=after,
|
@@ -1368,12 +1366,13 @@ class SyncServer(Server):
|
|
1368
1366
|
)
|
1369
1367
|
await self.agent_manager.delete_agent_async(agent_id=sleeptime_agent_state.id, actor=actor)
|
1370
1368
|
|
1371
|
-
async def _upsert_file_to_agent(self, agent_id: str, text: str, file_id: str, actor: User) -> None:
|
1369
|
+
async def _upsert_file_to_agent(self, agent_id: str, text: str, file_id: str, file_name: str, actor: User) -> None:
|
1372
1370
|
"""
|
1373
1371
|
Internal method to create or update a file <-> agent association
|
1374
1372
|
"""
|
1375
|
-
|
1376
|
-
|
1373
|
+
await self.file_agent_manager.attach_file(
|
1374
|
+
agent_id=agent_id, file_id=file_id, file_name=file_name, actor=actor, visible_content=text
|
1375
|
+
)
|
1377
1376
|
|
1378
1377
|
async def _remove_file_from_agent(self, agent_id: str, file_id: str, actor: User) -> None:
|
1379
1378
|
"""
|
@@ -1389,7 +1388,7 @@ class SyncServer(Server):
|
|
1389
1388
|
logger.info(f"File {file_id} already removed from agent {agent_id}, skipping...")
|
1390
1389
|
|
1391
1390
|
async def insert_file_into_context_windows(
|
1392
|
-
self, source_id: str, text: str, file_id: str, actor: User, agent_states: Optional[List[AgentState]] = None
|
1391
|
+
self, source_id: str, text: str, file_id: str, file_name: str, actor: User, agent_states: Optional[List[AgentState]] = None
|
1393
1392
|
) -> List[AgentState]:
|
1394
1393
|
"""
|
1395
1394
|
Insert the uploaded document into the context window of all agents
|
@@ -1404,11 +1403,13 @@ class SyncServer(Server):
|
|
1404
1403
|
logger.info(f"Inserting document into context window for source: {source_id}")
|
1405
1404
|
logger.info(f"Attached agents: {[a.id for a in agent_states]}")
|
1406
1405
|
|
1407
|
-
await asyncio.gather(*(self._upsert_file_to_agent(agent_state.id, text, file_id, actor) for agent_state in agent_states))
|
1406
|
+
await asyncio.gather(*(self._upsert_file_to_agent(agent_state.id, text, file_id, file_name, actor) for agent_state in agent_states))
|
1408
1407
|
|
1409
1408
|
return agent_states
|
1410
1409
|
|
1411
|
-
async def insert_files_into_context_window(
|
1410
|
+
async def insert_files_into_context_window(
|
1411
|
+
self, agent_state: AgentState, texts: List[str], file_ids: List[str], file_names: List[str], actor: User
|
1412
|
+
) -> None:
|
1412
1413
|
"""
|
1413
1414
|
Insert the uploaded documents into the context window of an agent
|
1414
1415
|
attached to the given source.
|
@@ -1418,7 +1419,12 @@ class SyncServer(Server):
|
|
1418
1419
|
if len(texts) != len(file_ids):
|
1419
1420
|
raise ValueError(f"Mismatch between number of texts ({len(texts)}) and file ids ({len(file_ids)})")
|
1420
1421
|
|
1421
|
-
await asyncio.gather(
|
1422
|
+
await asyncio.gather(
|
1423
|
+
*(
|
1424
|
+
self._upsert_file_to_agent(agent_state.id, text, file_id, file_name, actor)
|
1425
|
+
for text, file_id, file_name in zip(texts, file_ids, file_names)
|
1426
|
+
)
|
1427
|
+
)
|
1422
1428
|
|
1423
1429
|
async def remove_file_from_context_windows(self, source_id: str, file_id: str, actor: User) -> None:
|
1424
1430
|
"""
|