letta-nightly 0.11.7.dev20251007104119__py3-none-any.whl → 0.11.7.dev20251008104128__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/adapters/letta_llm_adapter.py +1 -0
- letta/adapters/letta_llm_request_adapter.py +0 -1
- letta/adapters/letta_llm_stream_adapter.py +7 -2
- letta/adapters/simple_llm_request_adapter.py +88 -0
- letta/adapters/simple_llm_stream_adapter.py +192 -0
- letta/agents/agent_loop.py +6 -0
- letta/agents/ephemeral_summary_agent.py +2 -1
- letta/agents/helpers.py +142 -6
- letta/agents/letta_agent.py +13 -33
- letta/agents/letta_agent_batch.py +2 -4
- letta/agents/letta_agent_v2.py +87 -77
- letta/agents/letta_agent_v3.py +899 -0
- letta/agents/voice_agent.py +2 -6
- letta/constants.py +8 -4
- letta/errors.py +40 -0
- letta/functions/function_sets/base.py +84 -4
- letta/functions/function_sets/multi_agent.py +0 -3
- letta/functions/schema_generator.py +113 -71
- letta/groups/dynamic_multi_agent.py +3 -2
- letta/groups/helpers.py +1 -2
- letta/groups/round_robin_multi_agent.py +3 -2
- letta/groups/sleeptime_multi_agent.py +3 -2
- letta/groups/sleeptime_multi_agent_v2.py +1 -1
- letta/groups/sleeptime_multi_agent_v3.py +17 -17
- letta/groups/supervisor_multi_agent.py +84 -80
- letta/helpers/converters.py +3 -0
- letta/helpers/message_helper.py +4 -0
- letta/helpers/tool_rule_solver.py +92 -5
- letta/interfaces/anthropic_streaming_interface.py +409 -0
- letta/interfaces/gemini_streaming_interface.py +296 -0
- letta/interfaces/openai_streaming_interface.py +752 -1
- letta/llm_api/anthropic_client.py +126 -16
- letta/llm_api/bedrock_client.py +4 -2
- letta/llm_api/deepseek_client.py +4 -1
- letta/llm_api/google_vertex_client.py +123 -42
- letta/llm_api/groq_client.py +4 -1
- letta/llm_api/llm_api_tools.py +11 -4
- letta/llm_api/llm_client_base.py +6 -2
- letta/llm_api/openai.py +32 -2
- letta/llm_api/openai_client.py +423 -18
- letta/llm_api/xai_client.py +4 -1
- letta/main.py +9 -5
- letta/memory.py +1 -0
- letta/orm/__init__.py +1 -1
- letta/orm/agent.py +10 -0
- letta/orm/block.py +7 -16
- letta/orm/blocks_agents.py +8 -2
- letta/orm/files_agents.py +2 -0
- letta/orm/job.py +7 -5
- letta/orm/mcp_oauth.py +1 -0
- letta/orm/message.py +21 -6
- letta/orm/organization.py +2 -0
- letta/orm/provider.py +6 -2
- letta/orm/run.py +71 -0
- letta/orm/sandbox_config.py +7 -1
- letta/orm/sqlalchemy_base.py +0 -306
- letta/orm/step.py +6 -5
- letta/orm/step_metrics.py +5 -5
- letta/otel/tracing.py +28 -3
- letta/plugins/defaults.py +4 -4
- letta/prompts/system_prompts/__init__.py +2 -0
- letta/prompts/system_prompts/letta_v1.py +25 -0
- letta/schemas/agent.py +3 -2
- letta/schemas/agent_file.py +9 -3
- letta/schemas/block.py +23 -10
- letta/schemas/enums.py +21 -2
- letta/schemas/job.py +17 -4
- letta/schemas/letta_message_content.py +71 -2
- letta/schemas/letta_stop_reason.py +5 -5
- letta/schemas/llm_config.py +53 -3
- letta/schemas/memory.py +1 -1
- letta/schemas/message.py +504 -117
- letta/schemas/openai/responses_request.py +64 -0
- letta/schemas/providers/__init__.py +2 -0
- letta/schemas/providers/anthropic.py +16 -0
- letta/schemas/providers/ollama.py +115 -33
- letta/schemas/providers/openrouter.py +52 -0
- letta/schemas/providers/vllm.py +2 -1
- letta/schemas/run.py +48 -42
- letta/schemas/step.py +2 -2
- letta/schemas/step_metrics.py +1 -1
- letta/schemas/tool.py +15 -107
- letta/schemas/tool_rule.py +88 -5
- letta/serialize_schemas/marshmallow_agent.py +1 -0
- letta/server/db.py +86 -408
- letta/server/rest_api/app.py +61 -10
- letta/server/rest_api/dependencies.py +14 -0
- letta/server/rest_api/redis_stream_manager.py +19 -8
- letta/server/rest_api/routers/v1/agents.py +364 -292
- letta/server/rest_api/routers/v1/blocks.py +14 -20
- letta/server/rest_api/routers/v1/identities.py +45 -110
- letta/server/rest_api/routers/v1/internal_templates.py +21 -0
- letta/server/rest_api/routers/v1/jobs.py +23 -6
- letta/server/rest_api/routers/v1/messages.py +1 -1
- letta/server/rest_api/routers/v1/runs.py +126 -85
- letta/server/rest_api/routers/v1/sandbox_configs.py +10 -19
- letta/server/rest_api/routers/v1/tools.py +281 -594
- letta/server/rest_api/routers/v1/voice.py +1 -1
- letta/server/rest_api/streaming_response.py +29 -29
- letta/server/rest_api/utils.py +122 -64
- letta/server/server.py +160 -887
- letta/services/agent_manager.py +236 -919
- letta/services/agent_serialization_manager.py +16 -0
- letta/services/archive_manager.py +0 -100
- letta/services/block_manager.py +211 -168
- letta/services/file_manager.py +1 -1
- letta/services/files_agents_manager.py +24 -33
- letta/services/group_manager.py +0 -142
- letta/services/helpers/agent_manager_helper.py +7 -2
- letta/services/helpers/run_manager_helper.py +85 -0
- letta/services/job_manager.py +96 -411
- letta/services/lettuce/__init__.py +6 -0
- letta/services/lettuce/lettuce_client_base.py +86 -0
- letta/services/mcp_manager.py +38 -6
- letta/services/message_manager.py +165 -362
- letta/services/organization_manager.py +0 -36
- letta/services/passage_manager.py +0 -345
- letta/services/provider_manager.py +0 -80
- letta/services/run_manager.py +301 -0
- letta/services/sandbox_config_manager.py +0 -234
- letta/services/step_manager.py +62 -39
- letta/services/summarizer/summarizer.py +9 -7
- letta/services/telemetry_manager.py +0 -16
- letta/services/tool_executor/builtin_tool_executor.py +35 -0
- letta/services/tool_executor/core_tool_executor.py +397 -2
- letta/services/tool_executor/files_tool_executor.py +3 -3
- letta/services/tool_executor/multi_agent_tool_executor.py +30 -15
- letta/services/tool_executor/tool_execution_manager.py +6 -8
- letta/services/tool_executor/tool_executor_base.py +3 -3
- letta/services/tool_manager.py +85 -339
- letta/services/tool_sandbox/base.py +24 -13
- letta/services/tool_sandbox/e2b_sandbox.py +16 -1
- letta/services/tool_schema_generator.py +123 -0
- letta/services/user_manager.py +0 -99
- letta/settings.py +20 -4
- {letta_nightly-0.11.7.dev20251007104119.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/METADATA +3 -5
- {letta_nightly-0.11.7.dev20251007104119.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/RECORD +140 -132
- letta/agents/temporal/activities/__init__.py +0 -4
- letta/agents/temporal/activities/example_activity.py +0 -7
- letta/agents/temporal/activities/prepare_messages.py +0 -10
- letta/agents/temporal/temporal_agent_workflow.py +0 -56
- letta/agents/temporal/types.py +0 -25
- {letta_nightly-0.11.7.dev20251007104119.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/WHEEL +0 -0
- {letta_nightly-0.11.7.dev20251007104119.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/entry_points.txt +0 -0
- {letta_nightly-0.11.7.dev20251007104119.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,64 @@
|
|
1
|
+
from typing import Any, Dict, Iterable, List, Literal, Optional, Union
|
2
|
+
|
3
|
+
from openai import NOT_GIVEN
|
4
|
+
from openai.types import Metadata, Reasoning, ResponsesModel
|
5
|
+
|
6
|
+
# from openai._types import Headers, Query, Body
|
7
|
+
from openai.types.responses import (
|
8
|
+
ResponseIncludable,
|
9
|
+
ResponseInputParam,
|
10
|
+
ResponsePromptParam,
|
11
|
+
ResponseTextConfigParam,
|
12
|
+
ToolParam,
|
13
|
+
response_create_params,
|
14
|
+
)
|
15
|
+
|
16
|
+
# import httpx
|
17
|
+
from pydantic import BaseModel, Field
|
18
|
+
|
19
|
+
|
20
|
+
class ResponsesRequest(BaseModel):
|
21
|
+
background: Optional[bool] = Field(default=NOT_GIVEN)
|
22
|
+
include: Optional[List[ResponseIncludable]] = Field(default=NOT_GIVEN)
|
23
|
+
input: Optional[Union[str, ResponseInputParam]] = Field(default=NOT_GIVEN)
|
24
|
+
instructions: Optional[str] = Field(default=NOT_GIVEN)
|
25
|
+
max_output_tokens: Optional[int] = Field(default=NOT_GIVEN)
|
26
|
+
max_tool_calls: Optional[int] = Field(default=NOT_GIVEN)
|
27
|
+
metadata: Optional[Metadata] = Field(default=NOT_GIVEN)
|
28
|
+
model: Optional[ResponsesModel] = Field(default=NOT_GIVEN)
|
29
|
+
parallel_tool_calls: Optional[bool] = Field(default=NOT_GIVEN)
|
30
|
+
previous_response_id: Optional[str] = Field(default=NOT_GIVEN)
|
31
|
+
prompt: Optional[ResponsePromptParam] = Field(default=NOT_GIVEN)
|
32
|
+
prompt_cache_key: Optional[str] = Field(default=NOT_GIVEN)
|
33
|
+
reasoning: Optional[Reasoning] = Field(default=NOT_GIVEN)
|
34
|
+
safety_identifier: Optional[str] = Field(default=NOT_GIVEN)
|
35
|
+
service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] = Field(default=NOT_GIVEN)
|
36
|
+
store: Optional[bool] = Field(default=NOT_GIVEN)
|
37
|
+
stream: Optional[Literal[False]] = Field(default=NOT_GIVEN)
|
38
|
+
stream_options: Optional[response_create_params.StreamOptions] = Field(default=NOT_GIVEN)
|
39
|
+
temperature: Optional[float] = Field(default=NOT_GIVEN)
|
40
|
+
text: Optional[ResponseTextConfigParam] = Field(default=NOT_GIVEN)
|
41
|
+
tool_choice: Optional[response_create_params.ToolChoice] = Field(default=NOT_GIVEN)
|
42
|
+
tools: Optional[Iterable[ToolParam]] = Field(default=NOT_GIVEN)
|
43
|
+
top_logprobs: Optional[int] = Field(default=NOT_GIVEN)
|
44
|
+
top_p: Optional[float] = Field(default=NOT_GIVEN)
|
45
|
+
truncation: Optional[Literal["auto", "disabled"]] = Field(default=NOT_GIVEN)
|
46
|
+
user: Optional[str] = Field(default=NOT_GIVEN)
|
47
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
48
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
49
|
+
# extra_headers: Headers | None = (None,)
|
50
|
+
# extra_query: Query | None = (None,)
|
51
|
+
# extra_body: Body | None = (None,)
|
52
|
+
# timeout: float | httpx.Timeout | None | NotGiven = (NOT_GIVEN,)
|
53
|
+
|
54
|
+
def model_dump(self, **kwargs) -> Dict[str, Any]:
|
55
|
+
"""Custom model_dump that properly serializes complex OpenAI types for JSON compatibility."""
|
56
|
+
# Force JSON mode to ensure full serialization of complex OpenAI types
|
57
|
+
# This prevents SerializationIterator objects from being created
|
58
|
+
kwargs["mode"] = "json"
|
59
|
+
|
60
|
+
# Get the JSON-serialized dump
|
61
|
+
data = super().model_dump(**kwargs)
|
62
|
+
|
63
|
+
# The API expects dicts, which JSON mode provides
|
64
|
+
return data
|
@@ -14,6 +14,7 @@ from .lmstudio import LMStudioOpenAIProvider
|
|
14
14
|
from .mistral import MistralProvider
|
15
15
|
from .ollama import OllamaProvider
|
16
16
|
from .openai import OpenAIProvider
|
17
|
+
from .openrouter import OpenRouterProvider
|
17
18
|
from .together import TogetherProvider
|
18
19
|
from .vllm import VLLMProvider
|
19
20
|
from .xai import XAIProvider
|
@@ -42,4 +43,5 @@ __all__ = [
|
|
42
43
|
"TogetherProvider",
|
43
44
|
"VLLMProvider", # Replaces ChatCompletions and Completions
|
44
45
|
"XAIProvider",
|
46
|
+
"OpenRouterProvider",
|
45
47
|
]
|
@@ -67,6 +67,11 @@ MODEL_LIST = [
|
|
67
67
|
"name": "claude-sonnet-4-20250514",
|
68
68
|
"context_window": 200000,
|
69
69
|
},
|
70
|
+
# 4.5
|
71
|
+
{
|
72
|
+
"name": "claude-sonnet-4-5-20250929",
|
73
|
+
"context_window": 200000,
|
74
|
+
},
|
70
75
|
## Haiku
|
71
76
|
# 3.0
|
72
77
|
{
|
@@ -143,6 +148,17 @@ class AnthropicProvider(Provider):
|
|
143
148
|
warnings.warn(f"Couldn't find context window size for model {model['id']}, defaulting to 200,000")
|
144
149
|
model["context_window"] = 200000
|
145
150
|
|
151
|
+
# Optional override: enable 1M context for Sonnet 4/4.5 when flag is set
|
152
|
+
try:
|
153
|
+
from letta.settings import model_settings
|
154
|
+
|
155
|
+
if model_settings.anthropic_sonnet_1m and (
|
156
|
+
model["id"].startswith("claude-sonnet-4") or model["id"].startswith("claude-sonnet-4-5")
|
157
|
+
):
|
158
|
+
model["context_window"] = 1_000_000
|
159
|
+
except Exception:
|
160
|
+
pass
|
161
|
+
|
146
162
|
max_tokens = 8192
|
147
163
|
if "claude-3-opus" in model["id"]:
|
148
164
|
max_tokens = 4096
|
@@ -3,7 +3,7 @@ from typing import Literal
|
|
3
3
|
import aiohttp
|
4
4
|
from pydantic import Field
|
5
5
|
|
6
|
-
from letta.constants import DEFAULT_CONTEXT_WINDOW, DEFAULT_EMBEDDING_CHUNK_SIZE
|
6
|
+
from letta.constants import DEFAULT_CONTEXT_WINDOW, DEFAULT_EMBEDDING_CHUNK_SIZE
|
7
7
|
from letta.log import get_logger
|
8
8
|
from letta.schemas.embedding_config import EmbeddingConfig
|
9
9
|
from letta.schemas.enums import ProviderCategory, ProviderType
|
@@ -27,82 +27,163 @@ class OllamaProvider(OpenAIProvider):
|
|
27
27
|
..., description="Default prompt formatter (aka model wrapper) to use on a /completions style API."
|
28
28
|
)
|
29
29
|
|
30
|
+
@property
|
31
|
+
def raw_base_url(self) -> str:
|
32
|
+
"""Base URL for native Ollama /api endpoints (no trailing /v1)."""
|
33
|
+
if self.base_url.endswith("/v1"):
|
34
|
+
return self.base_url[: -len("/v1")]
|
35
|
+
return self.base_url
|
36
|
+
|
37
|
+
@property
|
38
|
+
def openai_compat_base_url(self) -> str:
|
39
|
+
"""Base URL with /v1 appended for OpenAI-compatible clients if ever needed.
|
40
|
+
|
41
|
+
Note: We do not use OpenAI chat completions for Ollama, but expose this
|
42
|
+
helper to clarify intent and avoid duplicating logic elsewhere.
|
43
|
+
"""
|
44
|
+
return self.base_url if self.base_url.endswith("/v1") else f"{self.base_url.rstrip('/')}" + "/v1"
|
45
|
+
|
30
46
|
async def list_llm_models_async(self) -> list[LLMConfig]:
|
31
|
-
"""List available LLM Models from Ollama
|
47
|
+
"""List available LLM Models from Ollama.
|
48
|
+
|
49
|
+
Note: Older Ollama versions do not expose a "capabilities" field on /api/show.
|
50
|
+
We therefore avoid filtering on capabilities and instead infer support from
|
51
|
+
/api/show model_info (falling back to safe defaults).
|
32
52
|
|
33
|
-
https://github.com/ollama/ollama/blob/main/docs/api.md#list-local-models
|
34
|
-
|
53
|
+
https://github.com/ollama/ollama/blob/main/docs/api.md#list-local-models
|
54
|
+
"""
|
55
|
+
endpoint = f"{self.raw_base_url}/api/tags"
|
35
56
|
async with aiohttp.ClientSession() as session:
|
36
57
|
async with session.get(endpoint) as response:
|
37
58
|
if response.status != 200:
|
38
|
-
|
59
|
+
# aiohttp: .text() is async
|
60
|
+
error_text = await response.text()
|
61
|
+
raise Exception(f"Failed to list Ollama models: {response.status} - {error_text}")
|
39
62
|
response_json = await response.json()
|
40
63
|
|
41
|
-
configs = []
|
42
|
-
for
|
43
|
-
model_name =
|
44
|
-
|
45
|
-
if not model_details or "completion" not in model_details.get("capabilities", []):
|
64
|
+
configs: list[LLMConfig] = []
|
65
|
+
for m in response_json.get("models", []):
|
66
|
+
model_name = m.get("name")
|
67
|
+
if not model_name:
|
46
68
|
continue
|
47
69
|
|
48
|
-
|
49
|
-
|
50
|
-
if
|
51
|
-
|
52
|
-
|
70
|
+
# Use /api/show to check capabilities, specifically tools support
|
71
|
+
details = await self._get_model_details_async(model_name)
|
72
|
+
if not details:
|
73
|
+
# If details cannot be fetched, skip to avoid tool errors later
|
74
|
+
continue
|
75
|
+
caps = details.get("capabilities") or []
|
76
|
+
if not isinstance(caps, list):
|
77
|
+
caps = []
|
78
|
+
if "tools" not in [str(c).lower() for c in caps]:
|
79
|
+
# Only include models that declare tools support
|
80
|
+
continue
|
53
81
|
|
82
|
+
# Derive context window from /api/show model_info if available
|
83
|
+
context_window = None
|
84
|
+
model_info = details.get("model_info", {}) if isinstance(details, dict) else {}
|
85
|
+
architecture = model_info.get("general.architecture") if isinstance(model_info, dict) else None
|
86
|
+
if architecture:
|
87
|
+
ctx_len = model_info.get(f"{architecture}.context_length")
|
88
|
+
if ctx_len is not None:
|
89
|
+
try:
|
90
|
+
context_window = int(ctx_len)
|
91
|
+
except Exception:
|
92
|
+
context_window = None
|
54
93
|
if context_window is None:
|
55
|
-
logger.warning(f"Ollama model {model_name} has no context window, using default {DEFAULT_CONTEXT_WINDOW}")
|
94
|
+
logger.warning(f"Ollama model {model_name} has no context window in /api/show, using default {DEFAULT_CONTEXT_WINDOW}")
|
56
95
|
context_window = DEFAULT_CONTEXT_WINDOW
|
57
96
|
|
97
|
+
# === Capability stubs ===
|
98
|
+
# Compute support flags from /api/show capabilities. These are not
|
99
|
+
# yet plumbed through LLMConfig, but are captured here for later use.
|
100
|
+
caps_lower = [str(c).lower() for c in caps]
|
101
|
+
supports_tools = "tools" in caps_lower
|
102
|
+
supports_thinking = "thinking" in caps_lower
|
103
|
+
supports_vision = "vision" in caps_lower
|
104
|
+
supports_completion = "completion" in caps_lower
|
105
|
+
_ = (supports_tools, supports_thinking, supports_vision, supports_completion)
|
106
|
+
|
58
107
|
configs.append(
|
108
|
+
# Legacy Ollama using raw generate
|
109
|
+
# LLMConfig(
|
110
|
+
# model=model_name,
|
111
|
+
# model_endpoint_type="ollama",
|
112
|
+
# model_endpoint=self.openai_compat_base_url,
|
113
|
+
# model_wrapper=self.default_prompt_formatter,
|
114
|
+
# context_window=context_window,
|
115
|
+
# # Ollama specific
|
116
|
+
# handle=self.get_handle(model_name),
|
117
|
+
# provider_name=self.name,
|
118
|
+
# provider_category=self.provider_category,
|
119
|
+
# )
|
120
|
+
# New "trust Ollama" version w/ pure OpenAI proxy
|
59
121
|
LLMConfig(
|
60
122
|
model=model_name,
|
61
|
-
model_endpoint_type=
|
62
|
-
model_endpoint=
|
63
|
-
model_wrapper=self.default_prompt_formatter,
|
123
|
+
model_endpoint_type="openai",
|
124
|
+
model_endpoint=self.openai_compat_base_url,
|
125
|
+
# model_wrapper=self.default_prompt_formatter,
|
64
126
|
context_window=context_window,
|
65
127
|
handle=self.get_handle(model_name),
|
66
128
|
provider_name=self.name,
|
67
129
|
provider_category=self.provider_category,
|
130
|
+
# put_inner_thoughts_in_kwargs=True,
|
131
|
+
# enable_reasoner=supports_thinking,
|
68
132
|
)
|
69
133
|
)
|
70
134
|
return configs
|
71
135
|
|
72
136
|
async def list_embedding_models_async(self) -> list[EmbeddingConfig]:
|
73
|
-
"""List available embedding models from Ollama
|
137
|
+
"""List available embedding models from Ollama.
|
138
|
+
|
139
|
+
We infer embedding support via model_info.*.embedding_length when available.
|
74
140
|
|
75
141
|
https://github.com/ollama/ollama/blob/main/docs/api.md#list-local-models
|
76
142
|
"""
|
77
|
-
endpoint = f"{self.
|
143
|
+
endpoint = f"{self.raw_base_url}/api/tags"
|
78
144
|
async with aiohttp.ClientSession() as session:
|
79
145
|
async with session.get(endpoint) as response:
|
80
146
|
if response.status != 200:
|
81
|
-
|
147
|
+
error_text = await response.text()
|
148
|
+
raise Exception(f"Failed to list Ollama models: {response.status} - {error_text}")
|
82
149
|
response_json = await response.json()
|
83
150
|
|
84
|
-
configs = []
|
151
|
+
configs: list[EmbeddingConfig] = []
|
85
152
|
for model in response_json.get("models", []):
|
86
153
|
model_name = model["name"]
|
87
154
|
model_details = await self._get_model_details_async(model_name)
|
88
|
-
|
155
|
+
|
156
|
+
if not model_details:
|
157
|
+
continue
|
158
|
+
|
159
|
+
# Filter to true embedding models via capabilities
|
160
|
+
caps = model_details.get("capabilities") or []
|
161
|
+
if not isinstance(caps, list):
|
162
|
+
caps = []
|
163
|
+
if "embedding" not in [str(c).lower() for c in caps]:
|
89
164
|
continue
|
90
165
|
|
91
166
|
embedding_dim = None
|
92
167
|
model_info = model_details.get("model_info", {})
|
93
|
-
|
94
|
-
|
95
|
-
|
168
|
+
architecture = model_info.get("general.architecture")
|
169
|
+
if architecture:
|
170
|
+
embedding_length = model_info.get(f"{architecture}.embedding_length")
|
171
|
+
if embedding_length is not None:
|
172
|
+
try:
|
173
|
+
embedding_dim = int(embedding_length)
|
174
|
+
except Exception:
|
175
|
+
pass
|
96
176
|
|
97
177
|
if not embedding_dim:
|
98
|
-
|
99
|
-
|
178
|
+
# Skip models without a reported embedding dimension to avoid DB dimension mismatches
|
179
|
+
continue
|
100
180
|
|
101
181
|
configs.append(
|
102
182
|
EmbeddingConfig(
|
103
183
|
embedding_model=model_name,
|
104
|
-
|
105
|
-
|
184
|
+
# Use OpenAI-compatible proxy for embeddings
|
185
|
+
embedding_endpoint_type=ProviderType.openai,
|
186
|
+
embedding_endpoint=self.openai_compat_base_url,
|
106
187
|
embedding_dim=embedding_dim,
|
107
188
|
embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE,
|
108
189
|
handle=self.get_handle(model_name, is_embedding=True),
|
@@ -112,11 +193,12 @@ class OllamaProvider(OpenAIProvider):
|
|
112
193
|
|
113
194
|
async def _get_model_details_async(self, model_name: str) -> dict | None:
|
114
195
|
"""Get detailed information for a specific model from /api/show."""
|
115
|
-
endpoint = f"{self.
|
196
|
+
endpoint = f"{self.raw_base_url}/api/show"
|
116
197
|
payload = {"name": model_name}
|
117
198
|
|
118
199
|
try:
|
119
|
-
|
200
|
+
timeout = aiohttp.ClientTimeout(total=2.0)
|
201
|
+
async with aiohttp.ClientSession(timeout=timeout) as session:
|
120
202
|
async with session.post(endpoint, json=payload) as response:
|
121
203
|
if response.status != 200:
|
122
204
|
error_text = await response.text()
|
@@ -0,0 +1,52 @@
|
|
1
|
+
from typing import Literal
|
2
|
+
|
3
|
+
from pydantic import Field
|
4
|
+
|
5
|
+
from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE, LLM_MAX_TOKENS
|
6
|
+
from letta.log import get_logger
|
7
|
+
from letta.schemas.embedding_config import EmbeddingConfig
|
8
|
+
from letta.schemas.enums import ProviderCategory, ProviderType
|
9
|
+
from letta.schemas.llm_config import LLMConfig
|
10
|
+
from letta.schemas.providers.openai import OpenAIProvider
|
11
|
+
|
12
|
+
logger = get_logger(__name__)
|
13
|
+
|
14
|
+
# ALLOWED_PREFIXES = {"gpt-4", "gpt-5", "o1", "o3", "o4"}
|
15
|
+
# DISALLOWED_KEYWORDS = {"transcribe", "search", "realtime", "tts", "audio", "computer", "o1-mini", "o1-preview", "o1-pro", "chat"}
|
16
|
+
# DEFAULT_EMBEDDING_BATCH_SIZE = 1024
|
17
|
+
|
18
|
+
|
19
|
+
class OpenRouterProvider(OpenAIProvider):
|
20
|
+
provider_type: Literal[ProviderType.openai] = Field(ProviderType.openai, description="The type of the provider.")
|
21
|
+
provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
|
22
|
+
api_key: str = Field(..., description="API key for the OpenRouter API.")
|
23
|
+
base_url: str = Field("https://openrouter.ai/api/v1", description="Base URL for the OpenRouter API.")
|
24
|
+
handle_base: str | None = Field(None, description="Custom handle base name for model handles (e.g., 'custom' instead of 'openrouter').")
|
25
|
+
|
26
|
+
def _list_llm_models(self, data: list[dict]) -> list[LLMConfig]:
|
27
|
+
"""
|
28
|
+
This handles filtering out LLM Models by provider that meet Letta's requirements.
|
29
|
+
"""
|
30
|
+
configs = []
|
31
|
+
for model in data:
|
32
|
+
check = self._do_model_checks_for_name_and_context_size(model)
|
33
|
+
if check is None:
|
34
|
+
continue
|
35
|
+
model_name, context_window_size = check
|
36
|
+
|
37
|
+
handle = self.get_handle(model_name, base_name=self.handle_base) if self.handle_base else self.get_handle(model_name)
|
38
|
+
|
39
|
+
config = LLMConfig(
|
40
|
+
model=model_name,
|
41
|
+
model_endpoint_type="openai",
|
42
|
+
model_endpoint=self.base_url,
|
43
|
+
context_window=context_window_size,
|
44
|
+
handle=handle,
|
45
|
+
provider_name=self.name,
|
46
|
+
provider_category=self.provider_category,
|
47
|
+
)
|
48
|
+
|
49
|
+
config = self._set_model_parameter_tuned_defaults(model_name, config)
|
50
|
+
configs.append(config)
|
51
|
+
|
52
|
+
return configs
|
letta/schemas/providers/vllm.py
CHANGED
@@ -23,6 +23,7 @@ class VLLMProvider(Provider):
|
|
23
23
|
default_prompt_formatter: str | None = Field(
|
24
24
|
default=None, description="Default prompt formatter (aka model wrapper) to use on a /completions style API."
|
25
25
|
)
|
26
|
+
handle_base: str | None = Field(None, description="Custom handle base name for model handles (e.g., 'custom' instead of 'vllm').")
|
26
27
|
|
27
28
|
async def list_llm_models_async(self) -> list[LLMConfig]:
|
28
29
|
from letta.llm_api.openai import openai_get_model_list_async
|
@@ -43,7 +44,7 @@ class VLLMProvider(Provider):
|
|
43
44
|
model_endpoint=base_url,
|
44
45
|
model_wrapper=self.default_prompt_formatter,
|
45
46
|
context_window=model["max_model_len"],
|
46
|
-
handle=self.get_handle(model_name),
|
47
|
+
handle=self.get_handle(model_name, base_name=self.handle_base) if self.handle_base else self.get_handle(model_name),
|
47
48
|
provider_name=self.name,
|
48
49
|
provider_category=self.provider_category,
|
49
50
|
)
|
letta/schemas/run.py
CHANGED
@@ -1,62 +1,68 @@
|
|
1
|
+
from datetime import datetime
|
1
2
|
from typing import Optional
|
2
3
|
|
3
|
-
from pydantic import Field
|
4
|
+
from pydantic import ConfigDict, Field
|
4
5
|
|
5
|
-
from letta.
|
6
|
-
from letta.schemas.
|
6
|
+
from letta.helpers.datetime_helpers import get_utc_time
|
7
|
+
from letta.schemas.enums import RunStatus
|
8
|
+
from letta.schemas.job import LettaRequestConfig
|
9
|
+
from letta.schemas.letta_base import LettaBase
|
7
10
|
from letta.schemas.letta_stop_reason import StopReasonType
|
8
11
|
|
9
12
|
|
10
|
-
class RunBase(
|
11
|
-
"""Base class for Run schemas that inherits from JobBase but uses 'run' prefix for IDs"""
|
12
|
-
|
13
|
+
class RunBase(LettaBase):
|
13
14
|
__id_prefix__ = "run"
|
14
|
-
job_type: JobType = JobType.RUN
|
15
15
|
|
16
16
|
|
17
17
|
class Run(RunBase):
|
18
18
|
"""
|
19
|
-
Representation of a run
|
20
|
-
|
19
|
+
Representation of a run - a conversation or processing session for an agent.
|
20
|
+
Runs track when agents process messages and maintain the relationship between agents, steps, and messages.
|
21
21
|
|
22
22
|
Parameters:
|
23
23
|
id (str): The unique identifier of the run (prefixed with 'run-').
|
24
|
-
status (JobStatus): The status of the run.
|
25
|
-
created_at (datetime): The
|
26
|
-
completed_at (datetime): The
|
27
|
-
|
24
|
+
status (JobStatus): The current status of the run.
|
25
|
+
created_at (datetime): The timestamp when the run was created.
|
26
|
+
completed_at (datetime): The timestamp when the run was completed.
|
27
|
+
agent_id (str): The unique identifier of the agent associated with the run.
|
28
|
+
stop_reason (StopReasonType): The reason why the run was stopped.
|
29
|
+
background (bool): Whether the run was created in background mode.
|
30
|
+
metadata (dict): Additional metadata for the run.
|
31
|
+
request_config (LettaRequestConfig): The request configuration for the run.
|
28
32
|
"""
|
29
33
|
|
30
34
|
id: str = RunBase.generate_id_field()
|
31
|
-
|
35
|
+
|
36
|
+
# Core run fields
|
37
|
+
status: RunStatus = Field(default=RunStatus.created, description="The current status of the run.")
|
38
|
+
created_at: datetime = Field(default_factory=get_utc_time, description="The timestamp when the run was created.")
|
39
|
+
completed_at: Optional[datetime] = Field(None, description="The timestamp when the run was completed.")
|
40
|
+
|
41
|
+
# Agent relationship
|
42
|
+
agent_id: str = Field(..., description="The unique identifier of the agent associated with the run.")
|
43
|
+
|
44
|
+
# Run configuration
|
45
|
+
background: Optional[bool] = Field(None, description="Whether the run was created in background mode.")
|
46
|
+
metadata: Optional[dict] = Field(None, validation_alias="metadata_", description="Additional metadata for the run.")
|
32
47
|
request_config: Optional[LettaRequestConfig] = Field(None, description="The request configuration for the run.")
|
33
48
|
stop_reason: Optional[StopReasonType] = Field(None, description="The reason why the run was stopped.")
|
34
49
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
"""
|
55
|
-
Convert this Run instance to a Job instance by replacing the ID prefix.
|
56
|
-
All other fields are copied as-is.
|
57
|
-
|
58
|
-
Returns:
|
59
|
-
A new Job instance with the same data but 'job-' prefix in ID
|
60
|
-
"""
|
61
|
-
run_data = self.model_dump(exclude_none=True)
|
62
|
-
return Job(**run_data)
|
50
|
+
# Callback configuration
|
51
|
+
callback_url: Optional[str] = Field(None, description="If set, POST to this URL when the run completes.")
|
52
|
+
callback_sent_at: Optional[datetime] = Field(None, description="Timestamp when the callback was last attempted.")
|
53
|
+
callback_status_code: Optional[int] = Field(None, description="HTTP status code returned by the callback endpoint.")
|
54
|
+
callback_error: Optional[str] = Field(None, description="Optional error message from attempting to POST the callback endpoint.")
|
55
|
+
|
56
|
+
# Timing metrics (in nanoseconds for precision)
|
57
|
+
ttft_ns: Optional[int] = Field(None, description="Time to first token for a run in nanoseconds")
|
58
|
+
total_duration_ns: Optional[int] = Field(None, description="Total run duration in nanoseconds")
|
59
|
+
|
60
|
+
|
61
|
+
class RunUpdate(RunBase):
|
62
|
+
"""Update model for Run."""
|
63
|
+
|
64
|
+
status: Optional[RunStatus] = Field(None, description="The status of the run.")
|
65
|
+
completed_at: Optional[datetime] = Field(None, description="The timestamp when the run was completed.")
|
66
|
+
stop_reason: Optional[StopReasonType] = Field(None, description="The reason why the run was stopped.")
|
67
|
+
metadata: Optional[dict] = Field(None, validation_alias="metadata_", description="Additional metadata for the run.")
|
68
|
+
model_config = ConfigDict(extra="ignore") # Ignores extra fields
|
letta/schemas/step.py
CHANGED
@@ -18,8 +18,8 @@ class Step(StepBase):
|
|
18
18
|
origin: Optional[str] = Field(None, description="The surface that this agent step was initiated from.")
|
19
19
|
organization_id: Optional[str] = Field(None, description="The unique identifier of the organization associated with the step.")
|
20
20
|
provider_id: Optional[str] = Field(None, description="The unique identifier of the provider that was configured for this step")
|
21
|
-
|
22
|
-
None, description="The unique identifier of the
|
21
|
+
run_id: Optional[str] = Field(
|
22
|
+
None, description="The unique identifier of the run that this step belongs to. Only included for async calls."
|
23
23
|
)
|
24
24
|
agent_id: Optional[str] = Field(None, description="The ID of the agent that performed the step.")
|
25
25
|
provider_name: Optional[str] = Field(None, description="The name of the provider used for this step.")
|
letta/schemas/step_metrics.py
CHANGED
@@ -13,7 +13,7 @@ class StepMetrics(StepMetricsBase):
|
|
13
13
|
id: str = Field(..., description="The id of the step this metric belongs to (matches steps.id).")
|
14
14
|
organization_id: Optional[str] = Field(None, description="The unique identifier of the organization.")
|
15
15
|
provider_id: Optional[str] = Field(None, description="The unique identifier of the provider.")
|
16
|
-
|
16
|
+
run_id: Optional[str] = Field(None, description="The unique identifier of the run.")
|
17
17
|
agent_id: Optional[str] = Field(None, description="The unique identifier of the agent.")
|
18
18
|
step_start_ns: Optional[int] = Field(None, description="The timestamp of the start of the step in nanoseconds.")
|
19
19
|
llm_request_start_ns: Optional[int] = Field(None, description="The timestamp of the start of the llm request in nanoseconds.")
|