letta-nightly 0.6.9.dev20250116104035__py3-none-any.whl → 0.6.9.dev20250116195713__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/__init__.py +1 -0
- letta/agent.py +24 -0
- letta/client/client.py +274 -11
- letta/constants.py +5 -0
- letta/functions/function_sets/multi_agent.py +96 -0
- letta/functions/helpers.py +105 -1
- letta/functions/schema_generator.py +8 -0
- letta/llm_api/openai.py +18 -2
- letta/local_llm/utils.py +4 -0
- letta/orm/__init__.py +1 -0
- letta/orm/enums.py +6 -0
- letta/orm/job.py +24 -2
- letta/orm/job_messages.py +33 -0
- letta/orm/job_usage_statistics.py +30 -0
- letta/orm/message.py +10 -0
- letta/orm/sqlalchemy_base.py +28 -4
- letta/orm/tool.py +0 -3
- letta/schemas/agent.py +10 -4
- letta/schemas/job.py +2 -0
- letta/schemas/letta_base.py +6 -1
- letta/schemas/letta_request.py +6 -4
- letta/schemas/llm_config.py +1 -1
- letta/schemas/message.py +2 -4
- letta/schemas/providers.py +1 -1
- letta/schemas/run.py +61 -0
- letta/schemas/tool.py +9 -17
- letta/server/rest_api/interface.py +3 -0
- letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +6 -12
- letta/server/rest_api/routers/v1/__init__.py +4 -0
- letta/server/rest_api/routers/v1/agents.py +47 -151
- letta/server/rest_api/routers/v1/runs.py +137 -0
- letta/server/rest_api/routers/v1/tags.py +27 -0
- letta/server/rest_api/utils.py +5 -3
- letta/server/server.py +139 -2
- letta/services/agent_manager.py +101 -6
- letta/services/job_manager.py +274 -9
- letta/services/tool_execution_sandbox.py +1 -1
- letta/services/tool_manager.py +30 -25
- letta/utils.py +3 -4
- {letta_nightly-0.6.9.dev20250116104035.dist-info → letta_nightly-0.6.9.dev20250116195713.dist-info}/METADATA +4 -3
- {letta_nightly-0.6.9.dev20250116104035.dist-info → letta_nightly-0.6.9.dev20250116195713.dist-info}/RECORD +44 -38
- {letta_nightly-0.6.9.dev20250116104035.dist-info → letta_nightly-0.6.9.dev20250116195713.dist-info}/LICENSE +0 -0
- {letta_nightly-0.6.9.dev20250116104035.dist-info → letta_nightly-0.6.9.dev20250116195713.dist-info}/WHEEL +0 -0
- {letta_nightly-0.6.9.dev20250116104035.dist-info → letta_nightly-0.6.9.dev20250116195713.dist-info}/entry_points.txt +0 -0
letta/schemas/tool.py
CHANGED
|
@@ -2,13 +2,17 @@ from typing import Any, Dict, List, Optional
|
|
|
2
2
|
|
|
3
3
|
from pydantic import Field, model_validator
|
|
4
4
|
|
|
5
|
-
from letta.constants import
|
|
5
|
+
from letta.constants import (
|
|
6
|
+
COMPOSIO_TOOL_TAG_NAME,
|
|
7
|
+
FUNCTION_RETURN_CHAR_LIMIT,
|
|
8
|
+
LETTA_CORE_TOOL_MODULE_NAME,
|
|
9
|
+
LETTA_MULTI_AGENT_TOOL_MODULE_NAME,
|
|
10
|
+
)
|
|
6
11
|
from letta.functions.functions import derive_openai_json_schema, get_json_schema_from_module
|
|
7
12
|
from letta.functions.helpers import generate_composio_tool_wrapper, generate_langchain_tool_wrapper
|
|
8
13
|
from letta.functions.schema_generator import generate_schema_from_args_schema_v2
|
|
9
14
|
from letta.orm.enums import ToolType
|
|
10
15
|
from letta.schemas.letta_base import LettaBase
|
|
11
|
-
from letta.schemas.openai.chat_completions import ToolCall
|
|
12
16
|
|
|
13
17
|
|
|
14
18
|
class BaseTool(LettaBase):
|
|
@@ -32,7 +36,6 @@ class Tool(BaseTool):
|
|
|
32
36
|
tool_type: ToolType = Field(ToolType.CUSTOM, description="The type of the tool.")
|
|
33
37
|
description: Optional[str] = Field(None, description="The description of the tool.")
|
|
34
38
|
source_type: Optional[str] = Field(None, description="The type of the source code.")
|
|
35
|
-
module: Optional[str] = Field(None, description="The module of the function.")
|
|
36
39
|
organization_id: Optional[str] = Field(None, description="The unique identifier of the organization associated with the tool.")
|
|
37
40
|
name: Optional[str] = Field(None, description="The name of the function.")
|
|
38
41
|
tags: List[str] = Field([], description="Metadata tags.")
|
|
@@ -66,6 +69,9 @@ class Tool(BaseTool):
|
|
|
66
69
|
elif self.tool_type in {ToolType.LETTA_CORE, ToolType.LETTA_MEMORY_CORE}:
|
|
67
70
|
# If it's letta core tool, we generate the json_schema on the fly here
|
|
68
71
|
self.json_schema = get_json_schema_from_module(module_name=LETTA_CORE_TOOL_MODULE_NAME, function_name=self.name)
|
|
72
|
+
elif self.tool_type in {ToolType.LETTA_MULTI_AGENT_CORE}:
|
|
73
|
+
# If it's letta multi-agent tool, we also generate the json_schema on the fly here
|
|
74
|
+
self.json_schema = get_json_schema_from_module(module_name=LETTA_MULTI_AGENT_TOOL_MODULE_NAME, function_name=self.name)
|
|
69
75
|
|
|
70
76
|
# Derive name from the JSON schema if not provided
|
|
71
77
|
if not self.name:
|
|
@@ -81,24 +87,11 @@ class Tool(BaseTool):
|
|
|
81
87
|
|
|
82
88
|
return self
|
|
83
89
|
|
|
84
|
-
def to_dict(self):
|
|
85
|
-
"""
|
|
86
|
-
Convert tool into OpenAI representation.
|
|
87
|
-
"""
|
|
88
|
-
return vars(
|
|
89
|
-
ToolCall(
|
|
90
|
-
tool_id=self.id,
|
|
91
|
-
tool_call_type="function",
|
|
92
|
-
function=self.module,
|
|
93
|
-
)
|
|
94
|
-
)
|
|
95
|
-
|
|
96
90
|
|
|
97
91
|
class ToolCreate(LettaBase):
|
|
98
92
|
name: Optional[str] = Field(None, description="The name of the function (auto-generated from source_code if not provided).")
|
|
99
93
|
description: Optional[str] = Field(None, description="The description of the tool.")
|
|
100
94
|
tags: List[str] = Field([], description="Metadata tags.")
|
|
101
|
-
module: Optional[str] = Field(None, description="The source code of the function.")
|
|
102
95
|
source_code: str = Field(..., description="The source code of the function.")
|
|
103
96
|
source_type: str = Field("python", description="The source type of the function.")
|
|
104
97
|
json_schema: Optional[Dict] = Field(
|
|
@@ -212,7 +205,6 @@ class ToolUpdate(LettaBase):
|
|
|
212
205
|
description: Optional[str] = Field(None, description="The description of the tool.")
|
|
213
206
|
name: Optional[str] = Field(None, description="The name of the function.")
|
|
214
207
|
tags: Optional[List[str]] = Field(None, description="Metadata tags.")
|
|
215
|
-
module: Optional[str] = Field(None, description="The source code of the function.")
|
|
216
208
|
source_code: Optional[str] = Field(None, description="The source code of the function.")
|
|
217
209
|
source_type: Optional[str] = Field(None, description="The type of the source code.")
|
|
218
210
|
json_schema: Optional[Dict] = Field(
|
|
@@ -281,6 +281,9 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
281
281
|
# turn function argument to send_message into a normal text stream
|
|
282
282
|
self.streaming_chat_completion_json_reader = FunctionArgumentsStreamHandler(json_key=assistant_message_tool_kwarg)
|
|
283
283
|
|
|
284
|
+
# Store metadata passed from server
|
|
285
|
+
self.metadata = {}
|
|
286
|
+
|
|
284
287
|
self._chunks = deque()
|
|
285
288
|
self._event = asyncio.Event() # Use an event to notify when chunks are available
|
|
286
289
|
self._active = True # This should be set to False to stop the generator
|
|
@@ -3,13 +3,11 @@ from typing import TYPE_CHECKING, Optional
|
|
|
3
3
|
|
|
4
4
|
from fastapi import APIRouter, Body, Depends, Header, HTTPException
|
|
5
5
|
|
|
6
|
-
from letta.schemas.enums import MessageRole
|
|
7
6
|
from letta.schemas.letta_message import LettaMessage, ToolCall
|
|
8
7
|
from letta.schemas.openai.chat_completion_request import ChatCompletionRequest
|
|
9
8
|
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, Message, UsageStatistics
|
|
10
9
|
|
|
11
10
|
# TODO this belongs in a controller!
|
|
12
|
-
from letta.server.rest_api.routers.v1.agents import send_message_to_agent
|
|
13
11
|
from letta.server.rest_api.utils import get_letta_server
|
|
14
12
|
|
|
15
13
|
if TYPE_CHECKING:
|
|
@@ -52,12 +50,10 @@ async def create_chat_completion(
|
|
|
52
50
|
# TODO(charles) support multimodal parts
|
|
53
51
|
assert isinstance(input_message.content, str)
|
|
54
52
|
|
|
55
|
-
return await send_message_to_agent(
|
|
56
|
-
server=server,
|
|
53
|
+
return await server.send_message_to_agent(
|
|
57
54
|
agent_id=agent_id,
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
message=input_message.content,
|
|
55
|
+
actor=actor,
|
|
56
|
+
message=input_message.content, # TODO: This is broken
|
|
61
57
|
# Turn streaming ON
|
|
62
58
|
stream_steps=True,
|
|
63
59
|
stream_tokens=True,
|
|
@@ -71,12 +67,10 @@ async def create_chat_completion(
|
|
|
71
67
|
# TODO(charles) support multimodal parts
|
|
72
68
|
assert isinstance(input_message.content, str)
|
|
73
69
|
|
|
74
|
-
response_messages = await send_message_to_agent(
|
|
75
|
-
server=server,
|
|
70
|
+
response_messages = await server.send_message_to_agent(
|
|
76
71
|
agent_id=agent_id,
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
message=input_message.content,
|
|
72
|
+
actor=actor,
|
|
73
|
+
message=input_message.content, # TODO: This is broken
|
|
80
74
|
# Turn streaming OFF
|
|
81
75
|
stream_steps=False,
|
|
82
76
|
stream_tokens=False,
|
|
@@ -4,8 +4,10 @@ from letta.server.rest_api.routers.v1.health import router as health_router
|
|
|
4
4
|
from letta.server.rest_api.routers.v1.jobs import router as jobs_router
|
|
5
5
|
from letta.server.rest_api.routers.v1.llms import router as llm_router
|
|
6
6
|
from letta.server.rest_api.routers.v1.providers import router as providers_router
|
|
7
|
+
from letta.server.rest_api.routers.v1.runs import router as runs_router
|
|
7
8
|
from letta.server.rest_api.routers.v1.sandbox_configs import router as sandbox_configs_router
|
|
8
9
|
from letta.server.rest_api.routers.v1.sources import router as sources_router
|
|
10
|
+
from letta.server.rest_api.routers.v1.tags import router as tags_router
|
|
9
11
|
from letta.server.rest_api.routers.v1.tools import router as tools_router
|
|
10
12
|
|
|
11
13
|
ROUTERS = [
|
|
@@ -18,4 +20,6 @@ ROUTERS = [
|
|
|
18
20
|
health_router,
|
|
19
21
|
sandbox_configs_router,
|
|
20
22
|
providers_router,
|
|
23
|
+
runs_router,
|
|
24
|
+
tags_router,
|
|
21
25
|
]
|
|
@@ -1,10 +1,8 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
import warnings
|
|
3
1
|
from datetime import datetime
|
|
4
2
|
from typing import List, Optional, Union
|
|
5
3
|
|
|
6
4
|
from fastapi import APIRouter, BackgroundTasks, Body, Depends, Header, HTTPException, Query, status
|
|
7
|
-
from fastapi.responses import JSONResponse
|
|
5
|
+
from fastapi.responses import JSONResponse
|
|
8
6
|
from pydantic import Field
|
|
9
7
|
|
|
10
8
|
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
|
|
@@ -12,19 +10,18 @@ from letta.log import get_logger
|
|
|
12
10
|
from letta.orm.errors import NoResultFound
|
|
13
11
|
from letta.schemas.agent import AgentState, CreateAgent, UpdateAgent
|
|
14
12
|
from letta.schemas.block import Block, BlockUpdate, CreateBlock # , BlockLabelUpdate, BlockLimitUpdate
|
|
15
|
-
from letta.schemas.
|
|
16
|
-
from letta.schemas.
|
|
17
|
-
from letta.schemas.letta_message import LegacyLettaMessage, LettaMessage, LettaMessageUnion
|
|
13
|
+
from letta.schemas.job import JobStatus, JobUpdate
|
|
14
|
+
from letta.schemas.letta_message import LettaMessageUnion
|
|
18
15
|
from letta.schemas.letta_request import LettaRequest, LettaStreamingRequest
|
|
19
16
|
from letta.schemas.letta_response import LettaResponse
|
|
20
17
|
from letta.schemas.memory import ArchivalMemorySummary, ContextWindowOverview, CreateArchivalMemory, Memory, RecallMemorySummary
|
|
21
|
-
from letta.schemas.message import Message,
|
|
18
|
+
from letta.schemas.message import Message, MessageUpdate
|
|
22
19
|
from letta.schemas.passage import Passage
|
|
20
|
+
from letta.schemas.run import Run
|
|
23
21
|
from letta.schemas.source import Source
|
|
24
22
|
from letta.schemas.tool import Tool
|
|
25
23
|
from letta.schemas.user import User
|
|
26
|
-
from letta.server.rest_api.
|
|
27
|
-
from letta.server.rest_api.utils import get_letta_server, sse_async_generator
|
|
24
|
+
from letta.server.rest_api.utils import get_letta_server
|
|
28
25
|
from letta.server.server import SyncServer
|
|
29
26
|
|
|
30
27
|
# These can be forward refs, but because Fastapi needs them at runtime the must be imported normally
|
|
@@ -46,9 +43,9 @@ def list_agents(
|
|
|
46
43
|
),
|
|
47
44
|
server: "SyncServer" = Depends(get_letta_server),
|
|
48
45
|
user_id: Optional[str] = Header(None, alias="user_id"),
|
|
49
|
-
cursor: Optional[
|
|
46
|
+
cursor: Optional[str] = Query(None, description="Cursor for pagination"),
|
|
50
47
|
limit: Optional[int] = Query(None, description="Limit for pagination"),
|
|
51
|
-
|
|
48
|
+
query_text: Optional[str] = Query(None, description="Search agents by name"),
|
|
52
49
|
):
|
|
53
50
|
"""
|
|
54
51
|
List all agents associated with a given user.
|
|
@@ -63,6 +60,7 @@ def list_agents(
|
|
|
63
60
|
"tags": tags,
|
|
64
61
|
"match_all_tags": match_all_tags,
|
|
65
62
|
"name": name,
|
|
63
|
+
"query_text": query_text,
|
|
66
64
|
}.items()
|
|
67
65
|
if value is not None
|
|
68
66
|
}
|
|
@@ -155,6 +153,18 @@ def remove_tool_from_agent(
|
|
|
155
153
|
return server.agent_manager.detach_tool(agent_id=agent_id, tool_id=tool_id, actor=actor)
|
|
156
154
|
|
|
157
155
|
|
|
156
|
+
@router.patch("/{agent_id}/reset-messages", response_model=AgentState, operation_id="reset_messages")
|
|
157
|
+
def reset_messages(
|
|
158
|
+
agent_id: str,
|
|
159
|
+
add_default_initial_messages: bool = Query(default=False, description="If true, adds the default initial messages after resetting."),
|
|
160
|
+
server: "SyncServer" = Depends(get_letta_server),
|
|
161
|
+
user_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
|
|
162
|
+
):
|
|
163
|
+
"""Resets the messages for an agent"""
|
|
164
|
+
actor = server.user_manager.get_user_or_default(user_id=user_id)
|
|
165
|
+
return server.agent_manager.reset_messages(agent_id=agent_id, actor=actor, add_default_initial_messages=add_default_initial_messages)
|
|
166
|
+
|
|
167
|
+
|
|
158
168
|
@router.get("/{agent_id}", response_model=AgentState, operation_id="get_agent")
|
|
159
169
|
def get_agent_state(
|
|
160
170
|
agent_id: str,
|
|
@@ -485,17 +495,16 @@ async def send_message(
|
|
|
485
495
|
This endpoint accepts a message from a user and processes it through the agent.
|
|
486
496
|
"""
|
|
487
497
|
actor = server.user_manager.get_user_or_default(user_id=user_id)
|
|
488
|
-
result = await send_message_to_agent(
|
|
489
|
-
server=server,
|
|
498
|
+
result = await server.send_message_to_agent(
|
|
490
499
|
agent_id=agent_id,
|
|
491
500
|
actor=actor,
|
|
492
501
|
messages=request.messages,
|
|
493
502
|
stream_steps=False,
|
|
494
503
|
stream_tokens=False,
|
|
495
504
|
# Support for AssistantMessage
|
|
496
|
-
use_assistant_message=request.use_assistant_message,
|
|
497
|
-
assistant_message_tool_name=request.assistant_message_tool_name,
|
|
498
|
-
assistant_message_tool_kwarg=request.assistant_message_tool_kwarg,
|
|
505
|
+
use_assistant_message=request.config.use_assistant_message,
|
|
506
|
+
assistant_message_tool_name=request.config.assistant_message_tool_name,
|
|
507
|
+
assistant_message_tool_kwarg=request.config.assistant_message_tool_kwarg,
|
|
499
508
|
)
|
|
500
509
|
return result
|
|
501
510
|
|
|
@@ -526,16 +535,16 @@ async def send_message_streaming(
|
|
|
526
535
|
"""
|
|
527
536
|
|
|
528
537
|
actor = server.user_manager.get_user_or_default(user_id=user_id)
|
|
529
|
-
result = await send_message_to_agent(
|
|
530
|
-
server=server,
|
|
538
|
+
result = await server.send_message_to_agent(
|
|
531
539
|
agent_id=agent_id,
|
|
532
540
|
actor=actor,
|
|
533
541
|
messages=request.messages,
|
|
534
542
|
stream_steps=True,
|
|
535
543
|
stream_tokens=request.stream_tokens,
|
|
536
544
|
# Support for AssistantMessage
|
|
537
|
-
|
|
538
|
-
|
|
545
|
+
use_assistant_message=request.config.use_assistant_message,
|
|
546
|
+
assistant_message_tool_name=request.config.assistant_message_tool_name,
|
|
547
|
+
assistant_message_tool_kwarg=request.config.assistant_message_tool_kwarg,
|
|
539
548
|
)
|
|
540
549
|
return result
|
|
541
550
|
|
|
@@ -546,21 +555,23 @@ async def process_message_background(
|
|
|
546
555
|
actor: User,
|
|
547
556
|
agent_id: str,
|
|
548
557
|
messages: list,
|
|
558
|
+
use_assistant_message: bool,
|
|
549
559
|
assistant_message_tool_name: str,
|
|
550
560
|
assistant_message_tool_kwarg: str,
|
|
551
561
|
) -> None:
|
|
552
562
|
"""Background task to process the message and update job status."""
|
|
553
563
|
try:
|
|
554
564
|
# TODO(matt) we should probably make this stream_steps and log each step as it progresses, so the job update GET can see the total steps so far + partial usage?
|
|
555
|
-
result = await send_message_to_agent(
|
|
556
|
-
server=server,
|
|
565
|
+
result = await server.send_message_to_agent(
|
|
557
566
|
agent_id=agent_id,
|
|
558
567
|
actor=actor,
|
|
559
568
|
messages=messages,
|
|
560
569
|
stream_steps=False, # NOTE(matt)
|
|
561
570
|
stream_tokens=False,
|
|
571
|
+
use_assistant_message=use_assistant_message,
|
|
562
572
|
assistant_message_tool_name=assistant_message_tool_name,
|
|
563
573
|
assistant_message_tool_kwarg=assistant_message_tool_kwarg,
|
|
574
|
+
metadata={"job_id": job_id}, # Pass job_id through metadata
|
|
564
575
|
)
|
|
565
576
|
|
|
566
577
|
# Update job status to completed
|
|
@@ -571,6 +582,9 @@ async def process_message_background(
|
|
|
571
582
|
)
|
|
572
583
|
server.job_manager.update_job_by_id(job_id=job_id, job_update=job_update, actor=actor)
|
|
573
584
|
|
|
585
|
+
# Add job usage statistics
|
|
586
|
+
server.job_manager.add_job_usage(job_id=job_id, usage=result.usage, actor=actor)
|
|
587
|
+
|
|
574
588
|
except Exception as e:
|
|
575
589
|
# Update job status to failed
|
|
576
590
|
job_update = JobUpdate(
|
|
@@ -584,7 +598,7 @@ async def process_message_background(
|
|
|
584
598
|
|
|
585
599
|
@router.post(
|
|
586
600
|
"/{agent_id}/messages/async",
|
|
587
|
-
response_model=
|
|
601
|
+
response_model=Run,
|
|
588
602
|
operation_id="create_agent_message_async",
|
|
589
603
|
)
|
|
590
604
|
async def send_message_async(
|
|
@@ -595,152 +609,34 @@ async def send_message_async(
|
|
|
595
609
|
user_id: Optional[str] = Header(None, alias="user_id"),
|
|
596
610
|
):
|
|
597
611
|
"""
|
|
598
|
-
Asynchronously process a user message and return a
|
|
599
|
-
The actual processing happens in the background, and the status can be checked using the
|
|
612
|
+
Asynchronously process a user message and return a run object.
|
|
613
|
+
The actual processing happens in the background, and the status can be checked using the run ID.
|
|
600
614
|
"""
|
|
601
615
|
actor = server.user_manager.get_user_or_default(user_id=user_id)
|
|
602
616
|
|
|
603
617
|
# Create a new job
|
|
604
|
-
|
|
618
|
+
run = Run(
|
|
605
619
|
user_id=actor.id,
|
|
606
620
|
status=JobStatus.created,
|
|
607
621
|
metadata_={
|
|
608
622
|
"job_type": "send_message_async",
|
|
609
623
|
"agent_id": agent_id,
|
|
610
624
|
},
|
|
625
|
+
request_config=request.config,
|
|
611
626
|
)
|
|
612
|
-
|
|
627
|
+
run = server.job_manager.create_job(pydantic_job=run, actor=actor)
|
|
613
628
|
|
|
614
629
|
# Add the background task
|
|
615
630
|
background_tasks.add_task(
|
|
616
631
|
process_message_background,
|
|
617
|
-
job_id=
|
|
632
|
+
job_id=run.id,
|
|
618
633
|
server=server,
|
|
619
634
|
actor=actor,
|
|
620
635
|
agent_id=agent_id,
|
|
621
636
|
messages=request.messages,
|
|
622
|
-
|
|
623
|
-
|
|
637
|
+
use_assistant_message=request.config.use_assistant_message,
|
|
638
|
+
assistant_message_tool_name=request.config.assistant_message_tool_name,
|
|
639
|
+
assistant_message_tool_kwarg=request.config.assistant_message_tool_kwarg,
|
|
624
640
|
)
|
|
625
641
|
|
|
626
|
-
return
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
# TODO: move this into server.py?
|
|
630
|
-
async def send_message_to_agent(
|
|
631
|
-
server: SyncServer,
|
|
632
|
-
agent_id: str,
|
|
633
|
-
actor: User,
|
|
634
|
-
# role: MessageRole,
|
|
635
|
-
messages: Union[List[Message], List[MessageCreate]],
|
|
636
|
-
stream_steps: bool,
|
|
637
|
-
stream_tokens: bool,
|
|
638
|
-
# related to whether or not we return `LettaMessage`s or `Message`s
|
|
639
|
-
chat_completion_mode: bool = False,
|
|
640
|
-
timestamp: Optional[datetime] = None,
|
|
641
|
-
# Support for AssistantMessage
|
|
642
|
-
use_assistant_message: bool = True,
|
|
643
|
-
assistant_message_tool_name: str = DEFAULT_MESSAGE_TOOL,
|
|
644
|
-
assistant_message_tool_kwarg: str = DEFAULT_MESSAGE_TOOL_KWARG,
|
|
645
|
-
) -> Union[StreamingResponse, LettaResponse]:
|
|
646
|
-
"""Split off into a separate function so that it can be imported in the /chat/completion proxy."""
|
|
647
|
-
|
|
648
|
-
# TODO: @charles is this the correct way to handle?
|
|
649
|
-
include_final_message = True
|
|
650
|
-
|
|
651
|
-
if not stream_steps and stream_tokens:
|
|
652
|
-
raise HTTPException(status_code=400, detail="stream_steps must be 'true' if stream_tokens is 'true'")
|
|
653
|
-
|
|
654
|
-
# For streaming response
|
|
655
|
-
try:
|
|
656
|
-
|
|
657
|
-
# TODO: move this logic into server.py
|
|
658
|
-
|
|
659
|
-
# Get the generator object off of the agent's streaming interface
|
|
660
|
-
# This will be attached to the POST SSE request used under-the-hood
|
|
661
|
-
letta_agent = server.load_agent(agent_id=agent_id, actor=actor)
|
|
662
|
-
|
|
663
|
-
# Disable token streaming if not OpenAI
|
|
664
|
-
# TODO: cleanup this logic
|
|
665
|
-
llm_config = letta_agent.agent_state.llm_config
|
|
666
|
-
if stream_tokens and (llm_config.model_endpoint_type != "openai" or "inference.memgpt.ai" in llm_config.model_endpoint):
|
|
667
|
-
warnings.warn(
|
|
668
|
-
"Token streaming is only supported for models with type 'openai' or `inference.memgpt.ai` in the model_endpoint: agent has endpoint type {llm_config.model_endpoint_type} and {llm_config.model_endpoint}. Setting stream_tokens to False."
|
|
669
|
-
)
|
|
670
|
-
stream_tokens = False
|
|
671
|
-
|
|
672
|
-
# Create a new interface per request
|
|
673
|
-
letta_agent.interface = StreamingServerInterface(use_assistant_message)
|
|
674
|
-
streaming_interface = letta_agent.interface
|
|
675
|
-
if not isinstance(streaming_interface, StreamingServerInterface):
|
|
676
|
-
raise ValueError(f"Agent has wrong type of interface: {type(streaming_interface)}")
|
|
677
|
-
|
|
678
|
-
# Enable token-streaming within the request if desired
|
|
679
|
-
streaming_interface.streaming_mode = stream_tokens
|
|
680
|
-
# "chatcompletion mode" does some remapping and ignores inner thoughts
|
|
681
|
-
streaming_interface.streaming_chat_completion_mode = chat_completion_mode
|
|
682
|
-
|
|
683
|
-
# streaming_interface.allow_assistant_message = stream
|
|
684
|
-
# streaming_interface.function_call_legacy_mode = stream
|
|
685
|
-
|
|
686
|
-
# Allow AssistantMessage is desired by client
|
|
687
|
-
streaming_interface.assistant_message_tool_name = assistant_message_tool_name
|
|
688
|
-
streaming_interface.assistant_message_tool_kwarg = assistant_message_tool_kwarg
|
|
689
|
-
|
|
690
|
-
# Related to JSON buffer reader
|
|
691
|
-
streaming_interface.inner_thoughts_in_kwargs = (
|
|
692
|
-
llm_config.put_inner_thoughts_in_kwargs if llm_config.put_inner_thoughts_in_kwargs is not None else False
|
|
693
|
-
)
|
|
694
|
-
|
|
695
|
-
# Offload the synchronous message_func to a separate thread
|
|
696
|
-
streaming_interface.stream_start()
|
|
697
|
-
task = asyncio.create_task(
|
|
698
|
-
asyncio.to_thread(
|
|
699
|
-
server.send_messages,
|
|
700
|
-
actor=actor,
|
|
701
|
-
agent_id=agent_id,
|
|
702
|
-
messages=messages,
|
|
703
|
-
interface=streaming_interface,
|
|
704
|
-
)
|
|
705
|
-
)
|
|
706
|
-
|
|
707
|
-
if stream_steps:
|
|
708
|
-
# return a stream
|
|
709
|
-
return StreamingResponse(
|
|
710
|
-
sse_async_generator(
|
|
711
|
-
streaming_interface.get_generator(),
|
|
712
|
-
usage_task=task,
|
|
713
|
-
finish_message=include_final_message,
|
|
714
|
-
),
|
|
715
|
-
media_type="text/event-stream",
|
|
716
|
-
)
|
|
717
|
-
|
|
718
|
-
else:
|
|
719
|
-
# buffer the stream, then return the list
|
|
720
|
-
generated_stream = []
|
|
721
|
-
async for message in streaming_interface.get_generator():
|
|
722
|
-
assert (
|
|
723
|
-
isinstance(message, LettaMessage) or isinstance(message, LegacyLettaMessage) or isinstance(message, MessageStreamStatus)
|
|
724
|
-
), type(message)
|
|
725
|
-
generated_stream.append(message)
|
|
726
|
-
if message == MessageStreamStatus.done:
|
|
727
|
-
break
|
|
728
|
-
|
|
729
|
-
# Get rid of the stream status messages
|
|
730
|
-
filtered_stream = [d for d in generated_stream if not isinstance(d, MessageStreamStatus)]
|
|
731
|
-
usage = await task
|
|
732
|
-
|
|
733
|
-
# By default the stream will be messages of type LettaMessage or LettaLegacyMessage
|
|
734
|
-
# If we want to convert these to Message, we can use the attached IDs
|
|
735
|
-
# NOTE: we will need to de-duplicate the Messsage IDs though (since Assistant->Inner+Func_Call)
|
|
736
|
-
# TODO: eventually update the interface to use `Message` and `MessageChunk` (new) inside the deque instead
|
|
737
|
-
return LettaResponse(messages=filtered_stream, usage=usage)
|
|
738
|
-
|
|
739
|
-
except HTTPException:
|
|
740
|
-
raise
|
|
741
|
-
except Exception as e:
|
|
742
|
-
print(e)
|
|
743
|
-
import traceback
|
|
744
|
-
|
|
745
|
-
traceback.print_exc()
|
|
746
|
-
raise HTTPException(status_code=500, detail=f"{e}")
|
|
642
|
+
return run
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
from typing import List, Optional
|
|
2
|
+
|
|
3
|
+
from fastapi import APIRouter, Depends, Header, HTTPException, Query
|
|
4
|
+
|
|
5
|
+
from letta.orm.enums import JobType
|
|
6
|
+
from letta.orm.errors import NoResultFound
|
|
7
|
+
from letta.schemas.enums import JobStatus, MessageRole
|
|
8
|
+
from letta.schemas.letta_message import LettaMessageUnion
|
|
9
|
+
from letta.schemas.openai.chat_completion_response import UsageStatistics
|
|
10
|
+
from letta.schemas.run import Run
|
|
11
|
+
from letta.server.rest_api.utils import get_letta_server
|
|
12
|
+
from letta.server.server import SyncServer
|
|
13
|
+
|
|
14
|
+
router = APIRouter(prefix="/runs", tags=["runs"])
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@router.get("/", response_model=List[Run], operation_id="list_runs")
|
|
18
|
+
def list_runs(
|
|
19
|
+
server: "SyncServer" = Depends(get_letta_server),
|
|
20
|
+
user_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
|
|
21
|
+
):
|
|
22
|
+
"""
|
|
23
|
+
List all runs.
|
|
24
|
+
"""
|
|
25
|
+
actor = server.user_manager.get_user_or_default(user_id=user_id)
|
|
26
|
+
|
|
27
|
+
return [Run.from_job(job) for job in server.job_manager.list_jobs(actor=actor, job_type=JobType.RUN)]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@router.get("/active", response_model=List[Run], operation_id="list_active_runs")
|
|
31
|
+
def list_active_runs(
|
|
32
|
+
server: "SyncServer" = Depends(get_letta_server),
|
|
33
|
+
user_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
|
|
34
|
+
):
|
|
35
|
+
"""
|
|
36
|
+
List all active runs.
|
|
37
|
+
"""
|
|
38
|
+
actor = server.user_manager.get_user_or_default(user_id=user_id)
|
|
39
|
+
|
|
40
|
+
active_runs = server.job_manager.list_jobs(actor=actor, statuses=[JobStatus.created, JobStatus.running], job_type=JobType.RUN)
|
|
41
|
+
|
|
42
|
+
return [Run.from_job(job) for job in active_runs]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@router.get("/{run_id}", response_model=Run, operation_id="get_run")
|
|
46
|
+
def get_run(
|
|
47
|
+
run_id: str,
|
|
48
|
+
user_id: Optional[str] = Header(None, alias="user_id"),
|
|
49
|
+
server: "SyncServer" = Depends(get_letta_server),
|
|
50
|
+
):
|
|
51
|
+
"""
|
|
52
|
+
Get the status of a run.
|
|
53
|
+
"""
|
|
54
|
+
actor = server.user_manager.get_user_or_default(user_id=user_id)
|
|
55
|
+
|
|
56
|
+
try:
|
|
57
|
+
job = server.job_manager.get_job_by_id(job_id=run_id, actor=actor)
|
|
58
|
+
return Run.from_job(job)
|
|
59
|
+
except NoResultFound:
|
|
60
|
+
raise HTTPException(status_code=404, detail="Run not found")
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@router.get("/{run_id}/messages", response_model=List[LettaMessageUnion], operation_id="get_run_messages")
|
|
64
|
+
async def get_run_messages(
|
|
65
|
+
run_id: str,
|
|
66
|
+
server: "SyncServer" = Depends(get_letta_server),
|
|
67
|
+
user_id: Optional[str] = Header(None, alias="user_id"),
|
|
68
|
+
cursor: Optional[str] = Query(None, description="Cursor for pagination"),
|
|
69
|
+
limit: Optional[int] = Query(100, description="Maximum number of messages to return"),
|
|
70
|
+
ascending: bool = Query(True, description="Sort order by creation time"),
|
|
71
|
+
role: Optional[MessageRole] = Query(None, description="Filter by role"),
|
|
72
|
+
):
|
|
73
|
+
"""
|
|
74
|
+
Get messages associated with a run with filtering options.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
run_id: ID of the run
|
|
78
|
+
cursor: Cursor for pagination
|
|
79
|
+
limit: Maximum number of messages to return
|
|
80
|
+
ascending: Sort order by creation time
|
|
81
|
+
role: Filter by role (user/assistant/system/tool)
|
|
82
|
+
return_message_object: Whether to return Message objects or LettaMessage objects
|
|
83
|
+
user_id: ID of the user making the request
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
A list of messages associated with the run. Default is List[LettaMessage].
|
|
87
|
+
"""
|
|
88
|
+
actor = server.user_manager.get_user_or_default(user_id=user_id)
|
|
89
|
+
|
|
90
|
+
try:
|
|
91
|
+
messages = server.job_manager.get_run_messages_cursor(
|
|
92
|
+
run_id=run_id,
|
|
93
|
+
actor=actor,
|
|
94
|
+
limit=limit,
|
|
95
|
+
cursor=cursor,
|
|
96
|
+
ascending=ascending,
|
|
97
|
+
role=role,
|
|
98
|
+
)
|
|
99
|
+
return messages
|
|
100
|
+
except NoResultFound as e:
|
|
101
|
+
raise HTTPException(status_code=404, detail=str(e))
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@router.get("/{run_id}/usage", response_model=UsageStatistics, operation_id="get_run_usage")
|
|
105
|
+
def get_run_usage(
|
|
106
|
+
run_id: str,
|
|
107
|
+
user_id: Optional[str] = Header(None, alias="user_id"),
|
|
108
|
+
server: "SyncServer" = Depends(get_letta_server),
|
|
109
|
+
):
|
|
110
|
+
"""
|
|
111
|
+
Get usage statistics for a run.
|
|
112
|
+
"""
|
|
113
|
+
actor = server.user_manager.get_user_or_default(user_id=user_id)
|
|
114
|
+
|
|
115
|
+
try:
|
|
116
|
+
usage = server.job_manager.get_job_usage(job_id=run_id, actor=actor)
|
|
117
|
+
return usage
|
|
118
|
+
except NoResultFound:
|
|
119
|
+
raise HTTPException(status_code=404, detail=f"Run '{run_id}' not found")
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
@router.delete("/{run_id}", response_model=Run, operation_id="delete_run")
|
|
123
|
+
def delete_run(
|
|
124
|
+
run_id: str,
|
|
125
|
+
user_id: Optional[str] = Header(None, alias="user_id"),
|
|
126
|
+
server: "SyncServer" = Depends(get_letta_server),
|
|
127
|
+
):
|
|
128
|
+
"""
|
|
129
|
+
Delete a run by its run_id.
|
|
130
|
+
"""
|
|
131
|
+
actor = server.user_manager.get_user_or_default(user_id=user_id)
|
|
132
|
+
|
|
133
|
+
try:
|
|
134
|
+
job = server.job_manager.delete_job_by_id(job_id=run_id, actor=actor)
|
|
135
|
+
return Run.from_job(job)
|
|
136
|
+
except NoResultFound:
|
|
137
|
+
raise HTTPException(status_code=404, detail="Run not found")
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from typing import TYPE_CHECKING, List, Optional
|
|
2
|
+
|
|
3
|
+
from fastapi import APIRouter, Depends, Header, Query
|
|
4
|
+
|
|
5
|
+
from letta.server.rest_api.utils import get_letta_server
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from letta.server.server import SyncServer
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
router = APIRouter(prefix="/tags", tags=["tag", "admin"])
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@router.get("/", tags=["admin"], response_model=List[str], operation_id="list_tags")
|
|
15
|
+
def get_tags(
|
|
16
|
+
cursor: Optional[str] = Query(None),
|
|
17
|
+
limit: Optional[int] = Query(50),
|
|
18
|
+
server: "SyncServer" = Depends(get_letta_server),
|
|
19
|
+
query_text: Optional[str] = Query(None),
|
|
20
|
+
user_id: Optional[str] = Header(None, alias="user_id"),
|
|
21
|
+
):
|
|
22
|
+
"""
|
|
23
|
+
Get a list of all tags in the database
|
|
24
|
+
"""
|
|
25
|
+
actor = server.user_manager.get_user_or_default(user_id=user_id)
|
|
26
|
+
tags = server.agent_manager.list_tags(actor=actor, cursor=cursor, limit=limit, query_text=query_text)
|
|
27
|
+
return tags
|
letta/server/rest_api/utils.py
CHANGED
|
@@ -3,7 +3,7 @@ import json
|
|
|
3
3
|
import os
|
|
4
4
|
import warnings
|
|
5
5
|
from enum import Enum
|
|
6
|
-
from typing import AsyncGenerator, Optional, Union
|
|
6
|
+
from typing import TYPE_CHECKING, AsyncGenerator, Optional, Union
|
|
7
7
|
|
|
8
8
|
from fastapi import Header
|
|
9
9
|
from pydantic import BaseModel
|
|
@@ -11,7 +11,9 @@ from pydantic import BaseModel
|
|
|
11
11
|
from letta.errors import ContextWindowExceededError, RateLimitExceededError
|
|
12
12
|
from letta.schemas.usage import LettaUsageStatistics
|
|
13
13
|
from letta.server.rest_api.interface import StreamingServerInterface
|
|
14
|
-
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from letta.server.server import SyncServer
|
|
15
17
|
|
|
16
18
|
# from letta.orm.user import User
|
|
17
19
|
# from letta.orm.utilities import get_db_session
|
|
@@ -86,7 +88,7 @@ async def sse_async_generator(
|
|
|
86
88
|
|
|
87
89
|
|
|
88
90
|
# TODO: why does this double up the interface?
|
|
89
|
-
def get_letta_server() -> SyncServer:
|
|
91
|
+
def get_letta_server() -> "SyncServer":
|
|
90
92
|
# Check if a global server is already instantiated
|
|
91
93
|
from letta.server.rest_api.app import server
|
|
92
94
|
|