letta-nightly 0.6.34.dev20250303104329__py3-none-any.whl → 0.6.34.dev20250303230404__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/__init__.py +1 -1
- letta/agent.py +40 -15
- letta/agents/__init__.py +0 -0
- letta/agents/base_agent.py +51 -0
- letta/agents/ephemeral_agent.py +72 -0
- letta/agents/low_latency_agent.py +315 -0
- letta/constants.py +3 -1
- letta/functions/ast_parsers.py +50 -1
- letta/functions/helpers.py +79 -2
- letta/functions/schema_generator.py +3 -0
- letta/helpers/converters.py +3 -3
- letta/interfaces/__init__.py +0 -0
- letta/interfaces/openai_chat_completions_streaming_interface.py +109 -0
- letta/interfaces/utils.py +11 -0
- letta/llm_api/anthropic.py +9 -1
- letta/llm_api/azure_openai.py +3 -0
- letta/llm_api/google_ai.py +3 -0
- letta/llm_api/google_vertex.py +4 -0
- letta/llm_api/llm_api_tools.py +1 -1
- letta/llm_api/openai.py +6 -0
- letta/local_llm/chat_completion_proxy.py +6 -1
- letta/log.py +2 -2
- letta/orm/step.py +1 -0
- letta/orm/tool.py +1 -1
- letta/prompts/system/memgpt_convo_only.txt +3 -5
- letta/prompts/system/memgpt_memory_only.txt +29 -0
- letta/schemas/agent.py +0 -1
- letta/schemas/step.py +1 -1
- letta/schemas/tool.py +16 -2
- letta/server/rest_api/app.py +5 -1
- letta/server/rest_api/routers/v1/agents.py +32 -21
- letta/server/rest_api/routers/v1/identities.py +9 -1
- letta/server/rest_api/routers/v1/runs.py +49 -0
- letta/server/rest_api/routers/v1/tools.py +1 -0
- letta/server/rest_api/routers/v1/voice.py +19 -255
- letta/server/rest_api/utils.py +3 -2
- letta/server/server.py +15 -7
- letta/services/agent_manager.py +10 -6
- letta/services/helpers/agent_manager_helper.py +0 -2
- letta/services/helpers/tool_execution_helper.py +18 -0
- letta/services/job_manager.py +98 -0
- letta/services/step_manager.py +2 -0
- letta/services/summarizer/__init__.py +0 -0
- letta/services/summarizer/enums.py +9 -0
- letta/services/summarizer/summarizer.py +102 -0
- letta/services/tool_execution_sandbox.py +20 -3
- letta/services/tool_manager.py +1 -1
- letta/settings.py +2 -0
- letta/tracing.py +176 -156
- {letta_nightly-0.6.34.dev20250303104329.dist-info → letta_nightly-0.6.34.dev20250303230404.dist-info}/METADATA +6 -5
- {letta_nightly-0.6.34.dev20250303104329.dist-info → letta_nightly-0.6.34.dev20250303230404.dist-info}/RECORD +54 -44
- letta/chat_only_agent.py +0 -101
- {letta_nightly-0.6.34.dev20250303104329.dist-info → letta_nightly-0.6.34.dev20250303230404.dist-info}/LICENSE +0 -0
- {letta_nightly-0.6.34.dev20250303104329.dist-info → letta_nightly-0.6.34.dev20250303230404.dist-info}/WHEEL +0 -0
- {letta_nightly-0.6.34.dev20250303104329.dist-info → letta_nightly-0.6.34.dev20250303230404.dist-info}/entry_points.txt +0 -0
|
@@ -17,14 +17,13 @@ from letta.schemas.letta_request import LettaRequest, LettaStreamingRequest
|
|
|
17
17
|
from letta.schemas.letta_response import LettaResponse
|
|
18
18
|
from letta.schemas.memory import ContextWindowOverview, CreateArchivalMemory, Memory
|
|
19
19
|
from letta.schemas.message import Message, MessageUpdate
|
|
20
|
-
from letta.schemas.passage import Passage
|
|
20
|
+
from letta.schemas.passage import Passage, PassageUpdate
|
|
21
21
|
from letta.schemas.run import Run
|
|
22
22
|
from letta.schemas.source import Source
|
|
23
23
|
from letta.schemas.tool import Tool
|
|
24
24
|
from letta.schemas.user import User
|
|
25
25
|
from letta.server.rest_api.utils import get_letta_server
|
|
26
26
|
from letta.server.server import SyncServer
|
|
27
|
-
from letta.tracing import trace_method
|
|
28
27
|
|
|
29
28
|
# These can be forward refs, but because Fastapi needs them at runtime the must be imported normally
|
|
30
29
|
|
|
@@ -273,14 +272,14 @@ def retrieve_agent_memory(
|
|
|
273
272
|
|
|
274
273
|
|
|
275
274
|
@router.get("/{agent_id}/core-memory/blocks/{block_label}", response_model=Block, operation_id="retrieve_core_memory_block")
|
|
276
|
-
def
|
|
275
|
+
def retrieve_block(
|
|
277
276
|
agent_id: str,
|
|
278
277
|
block_label: str,
|
|
279
278
|
server: "SyncServer" = Depends(get_letta_server),
|
|
280
279
|
actor_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
|
|
281
280
|
):
|
|
282
281
|
"""
|
|
283
|
-
Retrieve a memory block from an agent.
|
|
282
|
+
Retrieve a core memory block from an agent.
|
|
284
283
|
"""
|
|
285
284
|
actor = server.user_manager.get_user_or_default(user_id=actor_id)
|
|
286
285
|
|
|
@@ -291,13 +290,13 @@ def retrieve_core_memory_block(
|
|
|
291
290
|
|
|
292
291
|
|
|
293
292
|
@router.get("/{agent_id}/core-memory/blocks", response_model=List[Block], operation_id="list_core_memory_blocks")
|
|
294
|
-
def
|
|
293
|
+
def list_blocks(
|
|
295
294
|
agent_id: str,
|
|
296
295
|
server: "SyncServer" = Depends(get_letta_server),
|
|
297
296
|
actor_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
|
|
298
297
|
):
|
|
299
298
|
"""
|
|
300
|
-
Retrieve the memory blocks of a specific agent.
|
|
299
|
+
Retrieve the core memory blocks of a specific agent.
|
|
301
300
|
"""
|
|
302
301
|
actor = server.user_manager.get_user_or_default(user_id=actor_id)
|
|
303
302
|
try:
|
|
@@ -308,7 +307,7 @@ def list_core_memory_blocks(
|
|
|
308
307
|
|
|
309
308
|
|
|
310
309
|
@router.patch("/{agent_id}/core-memory/blocks/{block_label}", response_model=Block, operation_id="modify_core_memory_block")
|
|
311
|
-
def
|
|
310
|
+
def modify_block(
|
|
312
311
|
agent_id: str,
|
|
313
312
|
block_label: str,
|
|
314
313
|
block_update: BlockUpdate = Body(...),
|
|
@@ -316,7 +315,7 @@ def modify_core_memory_block(
|
|
|
316
315
|
actor_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
|
|
317
316
|
):
|
|
318
317
|
"""
|
|
319
|
-
Updates a memory block of an agent.
|
|
318
|
+
Updates a core memory block of an agent.
|
|
320
319
|
"""
|
|
321
320
|
actor = server.user_manager.get_user_or_default(user_id=actor_id)
|
|
322
321
|
|
|
@@ -330,35 +329,35 @@ def modify_core_memory_block(
|
|
|
330
329
|
|
|
331
330
|
|
|
332
331
|
@router.patch("/{agent_id}/core-memory/blocks/attach/{block_id}", response_model=AgentState, operation_id="attach_core_memory_block")
|
|
333
|
-
def
|
|
332
|
+
def attach_block(
|
|
334
333
|
agent_id: str,
|
|
335
334
|
block_id: str,
|
|
336
335
|
server: "SyncServer" = Depends(get_letta_server),
|
|
337
336
|
actor_id: Optional[str] = Header(None, alias="user_id"),
|
|
338
337
|
):
|
|
339
338
|
"""
|
|
340
|
-
Attach a
|
|
339
|
+
Attach a core memoryblock to an agent.
|
|
341
340
|
"""
|
|
342
341
|
actor = server.user_manager.get_user_or_default(user_id=actor_id)
|
|
343
342
|
return server.agent_manager.attach_block(agent_id=agent_id, block_id=block_id, actor=actor)
|
|
344
343
|
|
|
345
344
|
|
|
346
345
|
@router.patch("/{agent_id}/core-memory/blocks/detach/{block_id}", response_model=AgentState, operation_id="detach_core_memory_block")
|
|
347
|
-
def
|
|
346
|
+
def detach_block(
|
|
348
347
|
agent_id: str,
|
|
349
348
|
block_id: str,
|
|
350
349
|
server: "SyncServer" = Depends(get_letta_server),
|
|
351
350
|
actor_id: Optional[str] = Header(None, alias="user_id"),
|
|
352
351
|
):
|
|
353
352
|
"""
|
|
354
|
-
Detach a block from an agent.
|
|
353
|
+
Detach a core memory block from an agent.
|
|
355
354
|
"""
|
|
356
355
|
actor = server.user_manager.get_user_or_default(user_id=actor_id)
|
|
357
356
|
return server.agent_manager.detach_block(agent_id=agent_id, block_id=block_id, actor=actor)
|
|
358
357
|
|
|
359
358
|
|
|
360
|
-
@router.get("/{agent_id}/archival-memory", response_model=List[Passage], operation_id="
|
|
361
|
-
def
|
|
359
|
+
@router.get("/{agent_id}/archival-memory", response_model=List[Passage], operation_id="list_passages")
|
|
360
|
+
def list_passages(
|
|
362
361
|
agent_id: str,
|
|
363
362
|
server: "SyncServer" = Depends(get_letta_server),
|
|
364
363
|
after: Optional[int] = Query(None, description="Unique ID of the memory to start the query range at."),
|
|
@@ -380,8 +379,8 @@ def list_archival_memory(
|
|
|
380
379
|
)
|
|
381
380
|
|
|
382
381
|
|
|
383
|
-
@router.post("/{agent_id}/archival-memory", response_model=List[Passage], operation_id="
|
|
384
|
-
def
|
|
382
|
+
@router.post("/{agent_id}/archival-memory", response_model=List[Passage], operation_id="create_passage")
|
|
383
|
+
def create_passage(
|
|
385
384
|
agent_id: str,
|
|
386
385
|
request: CreateArchivalMemory = Body(...),
|
|
387
386
|
server: "SyncServer" = Depends(get_letta_server),
|
|
@@ -395,10 +394,25 @@ def create_archival_memory(
|
|
|
395
394
|
return server.insert_archival_memory(agent_id=agent_id, memory_contents=request.text, actor=actor)
|
|
396
395
|
|
|
397
396
|
|
|
397
|
+
@router.patch("/{agent_id}/archival-memory/{memory_id}", response_model=List[Passage], operation_id="modify_passage")
|
|
398
|
+
def modify_passage(
|
|
399
|
+
agent_id: str,
|
|
400
|
+
memory_id: str,
|
|
401
|
+
passage: PassageUpdate = Body(...),
|
|
402
|
+
server: "SyncServer" = Depends(get_letta_server),
|
|
403
|
+
actor_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
|
|
404
|
+
):
|
|
405
|
+
"""
|
|
406
|
+
Modify a memory in the agent's archival memory store.
|
|
407
|
+
"""
|
|
408
|
+
actor = server.user_manager.get_user_or_default(user_id=actor_id)
|
|
409
|
+
return server.modify_archival_memory(agent_id=agent_id, memory_id=memory_id, passage=passage, actor=actor)
|
|
410
|
+
|
|
411
|
+
|
|
398
412
|
# TODO(ethan): query or path parameter for memory_id?
|
|
399
413
|
# @router.delete("/{agent_id}/archival")
|
|
400
|
-
@router.delete("/{agent_id}/archival-memory/{memory_id}", response_model=None, operation_id="
|
|
401
|
-
def
|
|
414
|
+
@router.delete("/{agent_id}/archival-memory/{memory_id}", response_model=None, operation_id="delete_passage")
|
|
415
|
+
def delete_passage(
|
|
402
416
|
agent_id: str,
|
|
403
417
|
memory_id: str,
|
|
404
418
|
# memory_id: str = Query(..., description="Unique ID of the memory to be deleted."),
|
|
@@ -471,7 +485,6 @@ def modify_message(
|
|
|
471
485
|
response_model=LettaResponse,
|
|
472
486
|
operation_id="send_message",
|
|
473
487
|
)
|
|
474
|
-
@trace_method("POST /v1/agents/{agent_id}/messages")
|
|
475
488
|
async def send_message(
|
|
476
489
|
agent_id: str,
|
|
477
490
|
server: SyncServer = Depends(get_letta_server),
|
|
@@ -510,7 +523,6 @@ async def send_message(
|
|
|
510
523
|
}
|
|
511
524
|
},
|
|
512
525
|
)
|
|
513
|
-
@trace_method("POST /v1/agents/{agent_id}/messages/stream")
|
|
514
526
|
async def send_message_streaming(
|
|
515
527
|
agent_id: str,
|
|
516
528
|
server: SyncServer = Depends(get_letta_server),
|
|
@@ -586,7 +598,6 @@ async def process_message_background(
|
|
|
586
598
|
response_model=Run,
|
|
587
599
|
operation_id="create_agent_message_async",
|
|
588
600
|
)
|
|
589
|
-
@trace_method("POST /v1/agents/{agent_id}/messages/async")
|
|
590
601
|
async def send_message_async(
|
|
591
602
|
agent_id: str,
|
|
592
603
|
background_tasks: BackgroundTasks,
|
|
@@ -2,7 +2,7 @@ from typing import TYPE_CHECKING, List, Optional
|
|
|
2
2
|
|
|
3
3
|
from fastapi import APIRouter, Body, Depends, Header, HTTPException, Query
|
|
4
4
|
|
|
5
|
-
from letta.orm.errors import NoResultFound
|
|
5
|
+
from letta.orm.errors import NoResultFound, UniqueConstraintViolationError
|
|
6
6
|
from letta.schemas.identity import Identity, IdentityCreate, IdentityType, IdentityUpdate
|
|
7
7
|
from letta.server.rest_api.utils import get_letta_server
|
|
8
8
|
|
|
@@ -72,6 +72,14 @@ def create_identity(
|
|
|
72
72
|
return server.identity_manager.create_identity(identity=identity, actor=actor)
|
|
73
73
|
except HTTPException:
|
|
74
74
|
raise
|
|
75
|
+
except UniqueConstraintViolationError:
|
|
76
|
+
if identity.project_id:
|
|
77
|
+
raise HTTPException(
|
|
78
|
+
status_code=400,
|
|
79
|
+
detail=f"An identity with identifier key {identity.identifier_key} already exists for project {identity.project_id}",
|
|
80
|
+
)
|
|
81
|
+
else:
|
|
82
|
+
raise HTTPException(status_code=400, detail=f"An identity with identifier key {identity.identifier_key} already exists")
|
|
75
83
|
except Exception as e:
|
|
76
84
|
raise HTTPException(status_code=500, detail=f"{e}")
|
|
77
85
|
|
|
@@ -9,6 +9,7 @@ from letta.schemas.enums import JobStatus, MessageRole
|
|
|
9
9
|
from letta.schemas.letta_message import LettaMessageUnion
|
|
10
10
|
from letta.schemas.openai.chat_completion_response import UsageStatistics
|
|
11
11
|
from letta.schemas.run import Run
|
|
12
|
+
from letta.schemas.step import Step
|
|
12
13
|
from letta.server.rest_api.utils import get_letta_server
|
|
13
14
|
from letta.server.server import SyncServer
|
|
14
15
|
|
|
@@ -137,6 +138,54 @@ def retrieve_run_usage(
|
|
|
137
138
|
raise HTTPException(status_code=404, detail=f"Run '{run_id}' not found")
|
|
138
139
|
|
|
139
140
|
|
|
141
|
+
@router.get(
|
|
142
|
+
"/{run_id}/steps",
|
|
143
|
+
response_model=List[Step],
|
|
144
|
+
operation_id="list_run_steps",
|
|
145
|
+
)
|
|
146
|
+
async def list_run_steps(
|
|
147
|
+
run_id: str,
|
|
148
|
+
server: "SyncServer" = Depends(get_letta_server),
|
|
149
|
+
actor_id: Optional[str] = Header(None, alias="user_id"),
|
|
150
|
+
before: Optional[str] = Query(None, description="Cursor for pagination"),
|
|
151
|
+
after: Optional[str] = Query(None, description="Cursor for pagination"),
|
|
152
|
+
limit: Optional[int] = Query(100, description="Maximum number of messages to return"),
|
|
153
|
+
order: str = Query(
|
|
154
|
+
"desc", description="Sort order by the created_at timestamp of the objects. asc for ascending order and desc for descending order."
|
|
155
|
+
),
|
|
156
|
+
):
|
|
157
|
+
"""
|
|
158
|
+
Get messages associated with a run with filtering options.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
run_id: ID of the run
|
|
162
|
+
before: A cursor for use in pagination. `before` is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, starting with obj_foo, your subsequent call can include before=obj_foo in order to fetch the previous page of the list.
|
|
163
|
+
after: A cursor for use in pagination. `after` is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list.
|
|
164
|
+
limit: Maximum number of steps to return
|
|
165
|
+
order: Sort order by the created_at timestamp of the objects. asc for ascending order and desc for descending order.
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
A list of steps associated with the run.
|
|
169
|
+
"""
|
|
170
|
+
if order not in ["asc", "desc"]:
|
|
171
|
+
raise HTTPException(status_code=400, detail="Order must be 'asc' or 'desc'")
|
|
172
|
+
|
|
173
|
+
actor = server.user_manager.get_user_or_default(user_id=actor_id)
|
|
174
|
+
|
|
175
|
+
try:
|
|
176
|
+
steps = server.job_manager.get_job_steps(
|
|
177
|
+
job_id=run_id,
|
|
178
|
+
actor=actor,
|
|
179
|
+
limit=limit,
|
|
180
|
+
before=before,
|
|
181
|
+
after=after,
|
|
182
|
+
ascending=(order == "asc"),
|
|
183
|
+
)
|
|
184
|
+
return steps
|
|
185
|
+
except NoResultFound as e:
|
|
186
|
+
raise HTTPException(status_code=404, detail=str(e))
|
|
187
|
+
|
|
188
|
+
|
|
140
189
|
@router.delete("/{run_id}", response_model=Run, operation_id="delete_run")
|
|
141
190
|
def delete_run(
|
|
142
191
|
run_id: str,
|
|
@@ -1,42 +1,15 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import uuid
|
|
3
1
|
from typing import TYPE_CHECKING, Optional
|
|
4
2
|
|
|
5
3
|
import httpx
|
|
6
4
|
import openai
|
|
7
5
|
from fastapi import APIRouter, Body, Depends, Header, HTTPException
|
|
8
6
|
from fastapi.responses import StreamingResponse
|
|
9
|
-
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk, Choice, ChoiceDelta
|
|
10
7
|
from openai.types.chat.completion_create_params import CompletionCreateParams
|
|
11
|
-
from starlette.concurrency import run_in_threadpool
|
|
12
8
|
|
|
13
|
-
from letta.
|
|
14
|
-
from letta.helpers.tool_execution_helper import (
|
|
15
|
-
add_pre_execution_message,
|
|
16
|
-
enable_strict_mode,
|
|
17
|
-
execute_external_tool,
|
|
18
|
-
remove_request_heartbeat,
|
|
19
|
-
)
|
|
9
|
+
from letta.agents.low_latency_agent import LowLatencyAgent
|
|
20
10
|
from letta.log import get_logger
|
|
21
|
-
from letta.
|
|
22
|
-
from letta.
|
|
23
|
-
AssistantMessage,
|
|
24
|
-
ChatCompletionRequest,
|
|
25
|
-
Tool,
|
|
26
|
-
ToolCall,
|
|
27
|
-
ToolCallFunction,
|
|
28
|
-
ToolMessage,
|
|
29
|
-
UserMessage,
|
|
30
|
-
)
|
|
31
|
-
from letta.server.rest_api.optimistic_json_parser import OptimisticJSONParser
|
|
32
|
-
from letta.server.rest_api.utils import (
|
|
33
|
-
convert_letta_messages_to_openai,
|
|
34
|
-
create_assistant_messages_from_openai_response,
|
|
35
|
-
create_tool_call_messages_from_openai_response,
|
|
36
|
-
create_user_message,
|
|
37
|
-
get_letta_server,
|
|
38
|
-
get_messages_from_completion_request,
|
|
39
|
-
)
|
|
11
|
+
from letta.schemas.openai.chat_completions import UserMessage
|
|
12
|
+
from letta.server.rest_api.utils import get_letta_server, get_messages_from_completion_request
|
|
40
13
|
from letta.settings import model_settings
|
|
41
14
|
|
|
42
15
|
if TYPE_CHECKING:
|
|
@@ -72,42 +45,10 @@ async def create_voice_chat_completions(
|
|
|
72
45
|
if agent_id is None:
|
|
73
46
|
raise HTTPException(status_code=400, detail="Must pass agent_id in the 'user' field")
|
|
74
47
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
raise HTTPException(status_code=400, detail="Only OpenAI models are supported by this endpoint.")
|
|
78
|
-
|
|
79
|
-
# Convert Letta messages to OpenAI messages
|
|
80
|
-
in_context_messages = server.message_manager.get_messages_by_ids(message_ids=agent_state.message_ids, actor=actor)
|
|
81
|
-
openai_messages = convert_letta_messages_to_openai(in_context_messages)
|
|
82
|
-
|
|
83
|
-
# Also parse user input from completion_request and append
|
|
84
|
-
input_message = get_messages_from_completion_request(completion_request)[-1]
|
|
85
|
-
openai_messages.append(input_message)
|
|
48
|
+
# Also parse the user's new input
|
|
49
|
+
input_message = UserMessage(**get_messages_from_completion_request(completion_request)[-1])
|
|
86
50
|
|
|
87
|
-
#
|
|
88
|
-
tools = [t for t in agent_state.tools if t.name not in LETTA_TOOL_SET and t.tool_type in {ToolType.EXTERNAL_COMPOSIO, ToolType.CUSTOM}]
|
|
89
|
-
|
|
90
|
-
# Initial request
|
|
91
|
-
openai_request = ChatCompletionRequest(
|
|
92
|
-
model=agent_state.llm_config.model,
|
|
93
|
-
messages=openai_messages,
|
|
94
|
-
# TODO: This nested thing here is so ugly, need to refactor
|
|
95
|
-
tools=(
|
|
96
|
-
[
|
|
97
|
-
Tool(type="function", function=enable_strict_mode(add_pre_execution_message(remove_request_heartbeat(t.json_schema))))
|
|
98
|
-
for t in tools
|
|
99
|
-
]
|
|
100
|
-
if tools
|
|
101
|
-
else None
|
|
102
|
-
),
|
|
103
|
-
tool_choice="auto",
|
|
104
|
-
user=user_id,
|
|
105
|
-
max_completion_tokens=agent_state.llm_config.max_tokens,
|
|
106
|
-
temperature=agent_state.llm_config.temperature,
|
|
107
|
-
stream=True,
|
|
108
|
-
)
|
|
109
|
-
|
|
110
|
-
# Create the OpenAI async client
|
|
51
|
+
# Create OpenAI async client
|
|
111
52
|
client = openai.AsyncClient(
|
|
112
53
|
api_key=model_settings.openai_api_key,
|
|
113
54
|
max_retries=0,
|
|
@@ -122,194 +63,17 @@ async def create_voice_chat_completions(
|
|
|
122
63
|
),
|
|
123
64
|
)
|
|
124
65
|
|
|
125
|
-
#
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
(b) a tool message referencing that ID, containing the tool result.
|
|
137
|
-
- Re-invoke the OpenAI request with updated conversation, streaming again.
|
|
138
|
-
- End when finish_reason="stop" or no more tool calls.
|
|
139
|
-
"""
|
|
140
|
-
|
|
141
|
-
# We'll keep updating this conversation in a loop
|
|
142
|
-
conversation = openai_messages[:]
|
|
143
|
-
|
|
144
|
-
while True:
|
|
145
|
-
# Make the streaming request to OpenAI
|
|
146
|
-
stream = await client.chat.completions.create(**openai_request.model_dump(exclude_unset=True))
|
|
147
|
-
|
|
148
|
-
content_buffer = []
|
|
149
|
-
tool_call_name = None
|
|
150
|
-
tool_call_args_str = ""
|
|
151
|
-
tool_call_id = None
|
|
152
|
-
tool_call_happened = False
|
|
153
|
-
finish_reason_stop = False
|
|
154
|
-
optimistic_json_parser = OptimisticJSONParser(strict=True)
|
|
155
|
-
current_parsed_json_result = {}
|
|
156
|
-
|
|
157
|
-
async with stream:
|
|
158
|
-
async for chunk in stream:
|
|
159
|
-
choice = chunk.choices[0]
|
|
160
|
-
delta = choice.delta
|
|
161
|
-
finish_reason = choice.finish_reason # "tool_calls", "stop", or None
|
|
162
|
-
|
|
163
|
-
if delta.content:
|
|
164
|
-
content_buffer.append(delta.content)
|
|
165
|
-
yield f"data: {chunk.model_dump_json()}\n\n"
|
|
166
|
-
|
|
167
|
-
# CASE B: Partial tool call info
|
|
168
|
-
if delta.tool_calls:
|
|
169
|
-
# Typically there's only one in delta.tool_calls
|
|
170
|
-
tc = delta.tool_calls[0]
|
|
171
|
-
if tc.function.name:
|
|
172
|
-
tool_call_name = tc.function.name
|
|
173
|
-
if tc.function.arguments:
|
|
174
|
-
tool_call_args_str += tc.function.arguments
|
|
175
|
-
|
|
176
|
-
# See if we can stream out the pre-execution message
|
|
177
|
-
parsed_args = optimistic_json_parser.parse(tool_call_args_str)
|
|
178
|
-
if parsed_args.get(
|
|
179
|
-
PRE_EXECUTION_MESSAGE_ARG
|
|
180
|
-
) and current_parsed_json_result.get( # Ensure key exists and is not None/empty
|
|
181
|
-
PRE_EXECUTION_MESSAGE_ARG
|
|
182
|
-
) != parsed_args.get(
|
|
183
|
-
PRE_EXECUTION_MESSAGE_ARG
|
|
184
|
-
):
|
|
185
|
-
# Only stream if there's something new to stream
|
|
186
|
-
# We do this way to avoid hanging JSON at the end of the stream, e.g. '}'
|
|
187
|
-
if parsed_args != current_parsed_json_result:
|
|
188
|
-
current_parsed_json_result = parsed_args
|
|
189
|
-
synthetic_chunk = ChatCompletionChunk(
|
|
190
|
-
id=chunk.id,
|
|
191
|
-
object=chunk.object,
|
|
192
|
-
created=chunk.created,
|
|
193
|
-
model=chunk.model,
|
|
194
|
-
choices=[
|
|
195
|
-
Choice(
|
|
196
|
-
index=choice.index,
|
|
197
|
-
delta=ChoiceDelta(content=tc.function.arguments, role="assistant"),
|
|
198
|
-
finish_reason=None,
|
|
199
|
-
)
|
|
200
|
-
],
|
|
201
|
-
)
|
|
202
|
-
|
|
203
|
-
yield f"data: {synthetic_chunk.model_dump_json()}\n\n"
|
|
204
|
-
|
|
205
|
-
# We might generate a unique ID for the tool call
|
|
206
|
-
if tc.id:
|
|
207
|
-
tool_call_id = tc.id
|
|
208
|
-
|
|
209
|
-
# Check finish_reason
|
|
210
|
-
if finish_reason == "tool_calls":
|
|
211
|
-
tool_call_happened = True
|
|
212
|
-
break
|
|
213
|
-
elif finish_reason == "stop":
|
|
214
|
-
finish_reason_stop = True
|
|
215
|
-
break
|
|
216
|
-
|
|
217
|
-
if content_buffer:
|
|
218
|
-
# We treat that partial text as an assistant message
|
|
219
|
-
content = "".join(content_buffer)
|
|
220
|
-
conversation.append({"role": "assistant", "content": content})
|
|
221
|
-
|
|
222
|
-
# Create an assistant message here to persist later
|
|
223
|
-
assistant_messages = create_assistant_messages_from_openai_response(
|
|
224
|
-
response_text=content, agent_id=agent_id, model=agent_state.llm_config.model, actor=actor
|
|
225
|
-
)
|
|
226
|
-
message_db_queue.extend(assistant_messages)
|
|
227
|
-
|
|
228
|
-
if tool_call_happened:
|
|
229
|
-
# Parse the tool call arguments
|
|
230
|
-
try:
|
|
231
|
-
tool_args = json.loads(tool_call_args_str)
|
|
232
|
-
except json.JSONDecodeError:
|
|
233
|
-
tool_args = {}
|
|
234
|
-
|
|
235
|
-
if not tool_call_id:
|
|
236
|
-
# If no tool_call_id given by the model, generate one
|
|
237
|
-
tool_call_id = f"call_{uuid.uuid4().hex[:8]}"
|
|
238
|
-
|
|
239
|
-
# 1) Insert the "assistant" message with the tool_calls field
|
|
240
|
-
# referencing the same tool_call_id
|
|
241
|
-
assistant_tool_call_msg = AssistantMessage(
|
|
242
|
-
content=None,
|
|
243
|
-
tool_calls=[ToolCall(id=tool_call_id, function=ToolCallFunction(name=tool_call_name, arguments=tool_call_args_str))],
|
|
244
|
-
)
|
|
245
|
-
|
|
246
|
-
conversation.append(assistant_tool_call_msg.model_dump())
|
|
247
|
-
|
|
248
|
-
# 2) Execute the tool
|
|
249
|
-
target_tool = next((x for x in tools if x.name == tool_call_name), None)
|
|
250
|
-
if not target_tool:
|
|
251
|
-
# Tool not found, handle error
|
|
252
|
-
yield f"data: {json.dumps({'error': 'Tool not found', 'tool': tool_call_name})}\n\n"
|
|
253
|
-
break
|
|
254
|
-
|
|
255
|
-
try:
|
|
256
|
-
tool_result, _ = execute_external_tool(
|
|
257
|
-
agent_state=agent_state,
|
|
258
|
-
function_name=tool_call_name,
|
|
259
|
-
function_args=tool_args,
|
|
260
|
-
target_letta_tool=target_tool,
|
|
261
|
-
actor=actor,
|
|
262
|
-
allow_agent_state_modifications=False,
|
|
263
|
-
)
|
|
264
|
-
function_call_success = True
|
|
265
|
-
except Exception as e:
|
|
266
|
-
tool_result = f"Failed to call tool. Error: {e}"
|
|
267
|
-
function_call_success = False
|
|
268
|
-
|
|
269
|
-
# 3) Insert the "tool" message referencing the same tool_call_id
|
|
270
|
-
tool_message = ToolMessage(content=json.dumps({"result": tool_result}), tool_call_id=tool_call_id)
|
|
271
|
-
|
|
272
|
-
conversation.append(tool_message.model_dump())
|
|
273
|
-
|
|
274
|
-
# 4) Add a user message prompting the tool call result summarization
|
|
275
|
-
heartbeat_user_message = UserMessage(
|
|
276
|
-
content=f"{NON_USER_MSG_PREFIX} Tool finished executing. Summarize the result for the user.",
|
|
277
|
-
)
|
|
278
|
-
conversation.append(heartbeat_user_message.model_dump())
|
|
279
|
-
|
|
280
|
-
# Now, re-invoke OpenAI with the updated conversation
|
|
281
|
-
openai_request.messages = conversation
|
|
282
|
-
|
|
283
|
-
# Create a tool call message and append to message_db_queue
|
|
284
|
-
tool_call_messages = create_tool_call_messages_from_openai_response(
|
|
285
|
-
agent_id=agent_state.id,
|
|
286
|
-
model=agent_state.llm_config.model,
|
|
287
|
-
function_name=tool_call_name,
|
|
288
|
-
function_arguments=tool_args,
|
|
289
|
-
tool_call_id=tool_call_id,
|
|
290
|
-
function_call_success=function_call_success,
|
|
291
|
-
function_response=tool_result,
|
|
292
|
-
actor=actor,
|
|
293
|
-
add_heartbeat_request_system_message=True,
|
|
294
|
-
)
|
|
295
|
-
message_db_queue.extend(tool_call_messages)
|
|
296
|
-
|
|
297
|
-
continue # Start the while loop again
|
|
298
|
-
|
|
299
|
-
if finish_reason_stop:
|
|
300
|
-
break
|
|
301
|
-
|
|
302
|
-
# If we reach here, no tool call, no "stop", but we've ended streaming
|
|
303
|
-
# Possibly a model error or some other finish reason. We'll just end.
|
|
304
|
-
break
|
|
305
|
-
|
|
306
|
-
await run_in_threadpool(
|
|
307
|
-
server.agent_manager.append_to_in_context_messages,
|
|
308
|
-
message_db_queue,
|
|
309
|
-
agent_id=agent_id,
|
|
310
|
-
actor=actor,
|
|
311
|
-
)
|
|
312
|
-
|
|
313
|
-
yield "data: [DONE]\n\n"
|
|
66
|
+
# Instantiate our LowLatencyAgent
|
|
67
|
+
agent = LowLatencyAgent(
|
|
68
|
+
agent_id=agent_id,
|
|
69
|
+
openai_client=client,
|
|
70
|
+
message_manager=server.message_manager,
|
|
71
|
+
agent_manager=server.agent_manager,
|
|
72
|
+
block_manager=server.block_manager,
|
|
73
|
+
actor=actor,
|
|
74
|
+
message_buffer_limit=10,
|
|
75
|
+
message_buffer_min=4,
|
|
76
|
+
)
|
|
314
77
|
|
|
315
|
-
|
|
78
|
+
# Return the streaming generator
|
|
79
|
+
return StreamingResponse(agent.step_stream(input_message=input_message), media_type="text/event-stream")
|
letta/server/rest_api/utils.py
CHANGED
|
@@ -13,7 +13,7 @@ from openai.types.chat.chat_completion_message_tool_call import Function as Open
|
|
|
13
13
|
from openai.types.chat.completion_create_params import CompletionCreateParams
|
|
14
14
|
from pydantic import BaseModel
|
|
15
15
|
|
|
16
|
-
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG, REQ_HEARTBEAT_MESSAGE
|
|
16
|
+
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG, FUNC_FAILED_HEARTBEAT_MESSAGE, REQ_HEARTBEAT_MESSAGE
|
|
17
17
|
from letta.errors import ContextWindowExceededError, RateLimitExceededError
|
|
18
18
|
from letta.helpers.datetime_helpers import get_utc_time
|
|
19
19
|
from letta.log import get_logger
|
|
@@ -216,9 +216,10 @@ def create_tool_call_messages_from_openai_response(
|
|
|
216
216
|
messages.append(tool_message)
|
|
217
217
|
|
|
218
218
|
if add_heartbeat_request_system_message:
|
|
219
|
+
text_content = REQ_HEARTBEAT_MESSAGE if function_call_success else FUNC_FAILED_HEARTBEAT_MESSAGE
|
|
219
220
|
heartbeat_system_message = Message(
|
|
220
221
|
role=MessageRole.user,
|
|
221
|
-
content=[TextContent(text=get_heartbeat(
|
|
222
|
+
content=[TextContent(text=get_heartbeat(text_content))],
|
|
222
223
|
organization_id=actor.organization_id,
|
|
223
224
|
agent_id=agent_id,
|
|
224
225
|
model=model,
|
letta/server/server.py
CHANGED
|
@@ -6,7 +6,7 @@ import traceback
|
|
|
6
6
|
import warnings
|
|
7
7
|
from abc import abstractmethod
|
|
8
8
|
from datetime import datetime
|
|
9
|
-
from typing import Callable, Dict, List, Optional, Tuple, Union
|
|
9
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
|
10
10
|
|
|
11
11
|
from composio.client import Composio
|
|
12
12
|
from composio.client.collections import ActionModel, AppModel
|
|
@@ -17,7 +17,6 @@ import letta.constants as constants
|
|
|
17
17
|
import letta.server.utils as server_utils
|
|
18
18
|
import letta.system as system
|
|
19
19
|
from letta.agent import Agent, save_agent
|
|
20
|
-
from letta.chat_only_agent import ChatOnlyAgent
|
|
21
20
|
from letta.config import LettaConfig
|
|
22
21
|
from letta.data_sources.connectors import DataConnector, load_data
|
|
23
22
|
from letta.helpers.datetime_helpers import get_utc_time
|
|
@@ -43,7 +42,7 @@ from letta.schemas.llm_config import LLMConfig
|
|
|
43
42
|
from letta.schemas.memory import ArchivalMemorySummary, ContextWindowOverview, Memory, RecallMemorySummary
|
|
44
43
|
from letta.schemas.message import Message, MessageCreate, MessageRole, MessageUpdate, TextContent
|
|
45
44
|
from letta.schemas.organization import Organization
|
|
46
|
-
from letta.schemas.passage import Passage
|
|
45
|
+
from letta.schemas.passage import Passage, PassageUpdate
|
|
47
46
|
from letta.schemas.providers import (
|
|
48
47
|
AnthropicBedrockProvider,
|
|
49
48
|
AnthropicProvider,
|
|
@@ -326,8 +325,6 @@ class SyncServer(Server):
|
|
|
326
325
|
agent = Agent(agent_state=agent_state, interface=interface, user=actor)
|
|
327
326
|
elif agent_state.agent_type == AgentType.offline_memory_agent:
|
|
328
327
|
agent = OfflineMemoryAgent(agent_state=agent_state, interface=interface, user=actor)
|
|
329
|
-
elif agent_state.agent_type == AgentType.chat_only_agent:
|
|
330
|
-
agent = ChatOnlyAgent(agent_state=agent_state, interface=interface, user=actor)
|
|
331
328
|
else:
|
|
332
329
|
raise ValueError(f"Invalid agent type {agent_state.agent_type}")
|
|
333
330
|
|
|
@@ -770,6 +767,11 @@ class SyncServer(Server):
|
|
|
770
767
|
|
|
771
768
|
return passages
|
|
772
769
|
|
|
770
|
+
def modify_archival_memory(self, agent_id: str, memory_id: str, passage: PassageUpdate, actor: User) -> List[Passage]:
|
|
771
|
+
passage = Passage(**passage.model_dump(exclude_unset=True, exclude_none=True))
|
|
772
|
+
passages = self.passage_manager.update_passage_by_id(passage_id=memory_id, passage=passage, actor=actor)
|
|
773
|
+
return passages
|
|
774
|
+
|
|
773
775
|
def delete_archival_memory(self, memory_id: str, actor: User):
|
|
774
776
|
# TODO check if it exists first, and throw error if not
|
|
775
777
|
# TODO: @mindy make this return the deleted passage instead
|
|
@@ -978,6 +980,10 @@ class SyncServer(Server):
|
|
|
978
980
|
warnings.warn(f"An error occurred while listing LLM models for provider {provider}: {e}")
|
|
979
981
|
|
|
980
982
|
llm_models.extend(self.get_local_llm_configs())
|
|
983
|
+
|
|
984
|
+
# respect global maximum
|
|
985
|
+
for llm_config in llm_models:
|
|
986
|
+
llm_config.context_window = min(llm_config.context_window, model_settings.global_max_context_window_limit)
|
|
981
987
|
return llm_models
|
|
982
988
|
|
|
983
989
|
def list_embedding_models(self) -> List[EmbeddingConfig]:
|
|
@@ -1023,7 +1029,7 @@ class SyncServer(Server):
|
|
|
1023
1029
|
raise ValueError(f"Context window limit ({context_window_limit}) is greater than maximum of ({llm_config.context_window})")
|
|
1024
1030
|
llm_config.context_window = context_window_limit
|
|
1025
1031
|
else:
|
|
1026
|
-
llm_config.context_window = min(llm_config.context_window,
|
|
1032
|
+
llm_config.context_window = min(llm_config.context_window, model_settings.global_max_context_window_limit)
|
|
1027
1033
|
|
|
1028
1034
|
return llm_config
|
|
1029
1035
|
|
|
@@ -1098,6 +1104,7 @@ class SyncServer(Server):
|
|
|
1098
1104
|
tool_env_vars: Optional[Dict[str, str]] = None,
|
|
1099
1105
|
tool_source_type: Optional[str] = None,
|
|
1100
1106
|
tool_name: Optional[str] = None,
|
|
1107
|
+
tool_args_json_schema: Optional[Dict[str, Any]] = None,
|
|
1101
1108
|
) -> ToolReturnMessage:
|
|
1102
1109
|
"""Run a tool from source code"""
|
|
1103
1110
|
if tool_source_type is not None and tool_source_type != "python":
|
|
@@ -1107,6 +1114,7 @@ class SyncServer(Server):
|
|
|
1107
1114
|
tool = Tool(
|
|
1108
1115
|
name=tool_name,
|
|
1109
1116
|
source_code=tool_source,
|
|
1117
|
+
args_json_schema=tool_args_json_schema,
|
|
1110
1118
|
)
|
|
1111
1119
|
assert tool.name is not None, "Failed to create tool object"
|
|
1112
1120
|
|
|
@@ -1164,7 +1172,7 @@ class SyncServer(Server):
|
|
|
1164
1172
|
actions = self.get_composio_client(api_key=api_key).actions.get(apps=[composio_app_name])
|
|
1165
1173
|
return actions
|
|
1166
1174
|
|
|
1167
|
-
@trace_method
|
|
1175
|
+
@trace_method
|
|
1168
1176
|
async def send_message_to_agent(
|
|
1169
1177
|
self,
|
|
1170
1178
|
agent_id: str,
|