letta-nightly 0.11.7.dev20251006104136__py3-none-any.whl → 0.11.7.dev20251008104128__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. letta/adapters/letta_llm_adapter.py +1 -0
  2. letta/adapters/letta_llm_request_adapter.py +0 -1
  3. letta/adapters/letta_llm_stream_adapter.py +7 -2
  4. letta/adapters/simple_llm_request_adapter.py +88 -0
  5. letta/adapters/simple_llm_stream_adapter.py +192 -0
  6. letta/agents/agent_loop.py +6 -0
  7. letta/agents/ephemeral_summary_agent.py +2 -1
  8. letta/agents/helpers.py +142 -6
  9. letta/agents/letta_agent.py +13 -33
  10. letta/agents/letta_agent_batch.py +2 -4
  11. letta/agents/letta_agent_v2.py +87 -77
  12. letta/agents/letta_agent_v3.py +899 -0
  13. letta/agents/voice_agent.py +2 -6
  14. letta/constants.py +8 -4
  15. letta/errors.py +40 -0
  16. letta/functions/function_sets/base.py +84 -4
  17. letta/functions/function_sets/multi_agent.py +0 -3
  18. letta/functions/schema_generator.py +113 -71
  19. letta/groups/dynamic_multi_agent.py +3 -2
  20. letta/groups/helpers.py +1 -2
  21. letta/groups/round_robin_multi_agent.py +3 -2
  22. letta/groups/sleeptime_multi_agent.py +3 -2
  23. letta/groups/sleeptime_multi_agent_v2.py +1 -1
  24. letta/groups/sleeptime_multi_agent_v3.py +17 -17
  25. letta/groups/supervisor_multi_agent.py +84 -80
  26. letta/helpers/converters.py +3 -0
  27. letta/helpers/message_helper.py +4 -0
  28. letta/helpers/tool_rule_solver.py +92 -5
  29. letta/interfaces/anthropic_streaming_interface.py +409 -0
  30. letta/interfaces/gemini_streaming_interface.py +296 -0
  31. letta/interfaces/openai_streaming_interface.py +752 -1
  32. letta/llm_api/anthropic_client.py +126 -16
  33. letta/llm_api/bedrock_client.py +4 -2
  34. letta/llm_api/deepseek_client.py +4 -1
  35. letta/llm_api/google_vertex_client.py +123 -42
  36. letta/llm_api/groq_client.py +4 -1
  37. letta/llm_api/llm_api_tools.py +11 -4
  38. letta/llm_api/llm_client_base.py +6 -2
  39. letta/llm_api/openai.py +32 -2
  40. letta/llm_api/openai_client.py +423 -18
  41. letta/llm_api/xai_client.py +4 -1
  42. letta/main.py +9 -5
  43. letta/memory.py +1 -0
  44. letta/orm/__init__.py +1 -1
  45. letta/orm/agent.py +10 -0
  46. letta/orm/block.py +7 -16
  47. letta/orm/blocks_agents.py +8 -2
  48. letta/orm/files_agents.py +2 -0
  49. letta/orm/job.py +7 -5
  50. letta/orm/mcp_oauth.py +1 -0
  51. letta/orm/message.py +21 -6
  52. letta/orm/organization.py +2 -0
  53. letta/orm/provider.py +6 -2
  54. letta/orm/run.py +71 -0
  55. letta/orm/sandbox_config.py +7 -1
  56. letta/orm/sqlalchemy_base.py +0 -306
  57. letta/orm/step.py +6 -5
  58. letta/orm/step_metrics.py +5 -5
  59. letta/otel/tracing.py +28 -3
  60. letta/plugins/defaults.py +4 -4
  61. letta/prompts/system_prompts/__init__.py +2 -0
  62. letta/prompts/system_prompts/letta_v1.py +25 -0
  63. letta/schemas/agent.py +3 -2
  64. letta/schemas/agent_file.py +9 -3
  65. letta/schemas/block.py +23 -10
  66. letta/schemas/enums.py +21 -2
  67. letta/schemas/job.py +17 -4
  68. letta/schemas/letta_message_content.py +71 -2
  69. letta/schemas/letta_stop_reason.py +5 -5
  70. letta/schemas/llm_config.py +53 -3
  71. letta/schemas/memory.py +1 -1
  72. letta/schemas/message.py +504 -117
  73. letta/schemas/openai/responses_request.py +64 -0
  74. letta/schemas/providers/__init__.py +2 -0
  75. letta/schemas/providers/anthropic.py +16 -0
  76. letta/schemas/providers/ollama.py +115 -33
  77. letta/schemas/providers/openrouter.py +52 -0
  78. letta/schemas/providers/vllm.py +2 -1
  79. letta/schemas/run.py +48 -42
  80. letta/schemas/step.py +2 -2
  81. letta/schemas/step_metrics.py +1 -1
  82. letta/schemas/tool.py +15 -107
  83. letta/schemas/tool_rule.py +88 -5
  84. letta/serialize_schemas/marshmallow_agent.py +1 -0
  85. letta/server/db.py +86 -408
  86. letta/server/rest_api/app.py +61 -10
  87. letta/server/rest_api/dependencies.py +14 -0
  88. letta/server/rest_api/redis_stream_manager.py +19 -8
  89. letta/server/rest_api/routers/v1/agents.py +364 -292
  90. letta/server/rest_api/routers/v1/blocks.py +14 -20
  91. letta/server/rest_api/routers/v1/identities.py +45 -110
  92. letta/server/rest_api/routers/v1/internal_templates.py +21 -0
  93. letta/server/rest_api/routers/v1/jobs.py +23 -6
  94. letta/server/rest_api/routers/v1/messages.py +1 -1
  95. letta/server/rest_api/routers/v1/runs.py +126 -85
  96. letta/server/rest_api/routers/v1/sandbox_configs.py +10 -19
  97. letta/server/rest_api/routers/v1/tools.py +281 -594
  98. letta/server/rest_api/routers/v1/voice.py +1 -1
  99. letta/server/rest_api/streaming_response.py +29 -29
  100. letta/server/rest_api/utils.py +122 -64
  101. letta/server/server.py +160 -887
  102. letta/services/agent_manager.py +236 -919
  103. letta/services/agent_serialization_manager.py +16 -0
  104. letta/services/archive_manager.py +0 -100
  105. letta/services/block_manager.py +211 -168
  106. letta/services/file_manager.py +1 -1
  107. letta/services/files_agents_manager.py +24 -33
  108. letta/services/group_manager.py +0 -142
  109. letta/services/helpers/agent_manager_helper.py +7 -2
  110. letta/services/helpers/run_manager_helper.py +85 -0
  111. letta/services/job_manager.py +96 -411
  112. letta/services/lettuce/__init__.py +6 -0
  113. letta/services/lettuce/lettuce_client_base.py +86 -0
  114. letta/services/mcp_manager.py +38 -6
  115. letta/services/message_manager.py +165 -362
  116. letta/services/organization_manager.py +0 -36
  117. letta/services/passage_manager.py +0 -345
  118. letta/services/provider_manager.py +0 -80
  119. letta/services/run_manager.py +301 -0
  120. letta/services/sandbox_config_manager.py +0 -234
  121. letta/services/step_manager.py +62 -39
  122. letta/services/summarizer/summarizer.py +9 -7
  123. letta/services/telemetry_manager.py +0 -16
  124. letta/services/tool_executor/builtin_tool_executor.py +35 -0
  125. letta/services/tool_executor/core_tool_executor.py +397 -2
  126. letta/services/tool_executor/files_tool_executor.py +3 -3
  127. letta/services/tool_executor/multi_agent_tool_executor.py +30 -15
  128. letta/services/tool_executor/tool_execution_manager.py +6 -8
  129. letta/services/tool_executor/tool_executor_base.py +3 -3
  130. letta/services/tool_manager.py +85 -339
  131. letta/services/tool_sandbox/base.py +24 -13
  132. letta/services/tool_sandbox/e2b_sandbox.py +16 -1
  133. letta/services/tool_schema_generator.py +123 -0
  134. letta/services/user_manager.py +0 -99
  135. letta/settings.py +20 -4
  136. {letta_nightly-0.11.7.dev20251006104136.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/METADATA +3 -5
  137. {letta_nightly-0.11.7.dev20251006104136.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/RECORD +140 -132
  138. letta/agents/temporal/activities/__init__.py +0 -4
  139. letta/agents/temporal/activities/example_activity.py +0 -7
  140. letta/agents/temporal/activities/prepare_messages.py +0 -10
  141. letta/agents/temporal/temporal_agent_workflow.py +0 -56
  142. letta/agents/temporal/types.py +0 -25
  143. {letta_nightly-0.11.7.dev20251006104136.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/WHEEL +0 -0
  144. {letta_nightly-0.11.7.dev20251006104136.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/entry_points.txt +0 -0
  145. {letta_nightly-0.11.7.dev20251006104136.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,64 @@
1
+ from typing import Any, Dict, Iterable, List, Literal, Optional, Union
2
+
3
+ from openai import NOT_GIVEN
4
+ from openai.types import Metadata, Reasoning, ResponsesModel
5
+
6
+ # from openai._types import Headers, Query, Body
7
+ from openai.types.responses import (
8
+ ResponseIncludable,
9
+ ResponseInputParam,
10
+ ResponsePromptParam,
11
+ ResponseTextConfigParam,
12
+ ToolParam,
13
+ response_create_params,
14
+ )
15
+
16
+ # import httpx
17
+ from pydantic import BaseModel, Field
18
+
19
+
20
+ class ResponsesRequest(BaseModel):
21
+ background: Optional[bool] = Field(default=NOT_GIVEN)
22
+ include: Optional[List[ResponseIncludable]] = Field(default=NOT_GIVEN)
23
+ input: Optional[Union[str, ResponseInputParam]] = Field(default=NOT_GIVEN)
24
+ instructions: Optional[str] = Field(default=NOT_GIVEN)
25
+ max_output_tokens: Optional[int] = Field(default=NOT_GIVEN)
26
+ max_tool_calls: Optional[int] = Field(default=NOT_GIVEN)
27
+ metadata: Optional[Metadata] = Field(default=NOT_GIVEN)
28
+ model: Optional[ResponsesModel] = Field(default=NOT_GIVEN)
29
+ parallel_tool_calls: Optional[bool] = Field(default=NOT_GIVEN)
30
+ previous_response_id: Optional[str] = Field(default=NOT_GIVEN)
31
+ prompt: Optional[ResponsePromptParam] = Field(default=NOT_GIVEN)
32
+ prompt_cache_key: Optional[str] = Field(default=NOT_GIVEN)
33
+ reasoning: Optional[Reasoning] = Field(default=NOT_GIVEN)
34
+ safety_identifier: Optional[str] = Field(default=NOT_GIVEN)
35
+ service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] = Field(default=NOT_GIVEN)
36
+ store: Optional[bool] = Field(default=NOT_GIVEN)
37
+ stream: Optional[Literal[False]] = Field(default=NOT_GIVEN)
38
+ stream_options: Optional[response_create_params.StreamOptions] = Field(default=NOT_GIVEN)
39
+ temperature: Optional[float] = Field(default=NOT_GIVEN)
40
+ text: Optional[ResponseTextConfigParam] = Field(default=NOT_GIVEN)
41
+ tool_choice: Optional[response_create_params.ToolChoice] = Field(default=NOT_GIVEN)
42
+ tools: Optional[Iterable[ToolParam]] = Field(default=NOT_GIVEN)
43
+ top_logprobs: Optional[int] = Field(default=NOT_GIVEN)
44
+ top_p: Optional[float] = Field(default=NOT_GIVEN)
45
+ truncation: Optional[Literal["auto", "disabled"]] = Field(default=NOT_GIVEN)
46
+ user: Optional[str] = Field(default=NOT_GIVEN)
47
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
48
+ # The extra values given here take precedence over values defined on the client or passed to this method.
49
+ # extra_headers: Headers | None = (None,)
50
+ # extra_query: Query | None = (None,)
51
+ # extra_body: Body | None = (None,)
52
+ # timeout: float | httpx.Timeout | None | NotGiven = (NOT_GIVEN,)
53
+
54
+ def model_dump(self, **kwargs) -> Dict[str, Any]:
55
+ """Custom model_dump that properly serializes complex OpenAI types for JSON compatibility."""
56
+ # Force JSON mode to ensure full serialization of complex OpenAI types
57
+ # This prevents SerializationIterator objects from being created
58
+ kwargs["mode"] = "json"
59
+
60
+ # Get the JSON-serialized dump
61
+ data = super().model_dump(**kwargs)
62
+
63
+ # The API expects dicts, which JSON mode provides
64
+ return data
@@ -14,6 +14,7 @@ from .lmstudio import LMStudioOpenAIProvider
14
14
  from .mistral import MistralProvider
15
15
  from .ollama import OllamaProvider
16
16
  from .openai import OpenAIProvider
17
+ from .openrouter import OpenRouterProvider
17
18
  from .together import TogetherProvider
18
19
  from .vllm import VLLMProvider
19
20
  from .xai import XAIProvider
@@ -42,4 +43,5 @@ __all__ = [
42
43
  "TogetherProvider",
43
44
  "VLLMProvider", # Replaces ChatCompletions and Completions
44
45
  "XAIProvider",
46
+ "OpenRouterProvider",
45
47
  ]
@@ -67,6 +67,11 @@ MODEL_LIST = [
67
67
  "name": "claude-sonnet-4-20250514",
68
68
  "context_window": 200000,
69
69
  },
70
+ # 4.5
71
+ {
72
+ "name": "claude-sonnet-4-5-20250929",
73
+ "context_window": 200000,
74
+ },
70
75
  ## Haiku
71
76
  # 3.0
72
77
  {
@@ -143,6 +148,17 @@ class AnthropicProvider(Provider):
143
148
  warnings.warn(f"Couldn't find context window size for model {model['id']}, defaulting to 200,000")
144
149
  model["context_window"] = 200000
145
150
 
151
+ # Optional override: enable 1M context for Sonnet 4/4.5 when flag is set
152
+ try:
153
+ from letta.settings import model_settings
154
+
155
+ if model_settings.anthropic_sonnet_1m and (
156
+ model["id"].startswith("claude-sonnet-4") or model["id"].startswith("claude-sonnet-4-5")
157
+ ):
158
+ model["context_window"] = 1_000_000
159
+ except Exception:
160
+ pass
161
+
146
162
  max_tokens = 8192
147
163
  if "claude-3-opus" in model["id"]:
148
164
  max_tokens = 4096
@@ -3,7 +3,7 @@ from typing import Literal
3
3
  import aiohttp
4
4
  from pydantic import Field
5
5
 
6
- from letta.constants import DEFAULT_CONTEXT_WINDOW, DEFAULT_EMBEDDING_CHUNK_SIZE, DEFAULT_EMBEDDING_DIM, OLLAMA_API_PREFIX
6
+ from letta.constants import DEFAULT_CONTEXT_WINDOW, DEFAULT_EMBEDDING_CHUNK_SIZE
7
7
  from letta.log import get_logger
8
8
  from letta.schemas.embedding_config import EmbeddingConfig
9
9
  from letta.schemas.enums import ProviderCategory, ProviderType
@@ -27,82 +27,163 @@ class OllamaProvider(OpenAIProvider):
27
27
  ..., description="Default prompt formatter (aka model wrapper) to use on a /completions style API."
28
28
  )
29
29
 
30
+ @property
31
+ def raw_base_url(self) -> str:
32
+ """Base URL for native Ollama /api endpoints (no trailing /v1)."""
33
+ if self.base_url.endswith("/v1"):
34
+ return self.base_url[: -len("/v1")]
35
+ return self.base_url
36
+
37
+ @property
38
+ def openai_compat_base_url(self) -> str:
39
+ """Base URL with /v1 appended for OpenAI-compatible clients if ever needed.
40
+
41
+ Note: We do not use OpenAI chat completions for Ollama, but expose this
42
+ helper to clarify intent and avoid duplicating logic elsewhere.
43
+ """
44
+ return self.base_url if self.base_url.endswith("/v1") else f"{self.base_url.rstrip('/')}" + "/v1"
45
+
30
46
  async def list_llm_models_async(self) -> list[LLMConfig]:
31
- """List available LLM Models from Ollama
47
+ """List available LLM Models from Ollama.
48
+
49
+ Note: Older Ollama versions do not expose a "capabilities" field on /api/show.
50
+ We therefore avoid filtering on capabilities and instead infer support from
51
+ /api/show model_info (falling back to safe defaults).
32
52
 
33
- https://github.com/ollama/ollama/blob/main/docs/api.md#list-local-models"""
34
- endpoint = f"{self.base_url}/api/tags"
53
+ https://github.com/ollama/ollama/blob/main/docs/api.md#list-local-models
54
+ """
55
+ endpoint = f"{self.raw_base_url}/api/tags"
35
56
  async with aiohttp.ClientSession() as session:
36
57
  async with session.get(endpoint) as response:
37
58
  if response.status != 200:
38
- raise Exception(f"Failed to list Ollama models: {response.text}")
59
+ # aiohttp: .text() is async
60
+ error_text = await response.text()
61
+ raise Exception(f"Failed to list Ollama models: {response.status} - {error_text}")
39
62
  response_json = await response.json()
40
63
 
41
- configs = []
42
- for model in response_json.get("models", []):
43
- model_name = model["name"]
44
- model_details = await self._get_model_details_async(model_name)
45
- if not model_details or "completion" not in model_details.get("capabilities", []):
64
+ configs: list[LLMConfig] = []
65
+ for m in response_json.get("models", []):
66
+ model_name = m.get("name")
67
+ if not model_name:
46
68
  continue
47
69
 
48
- context_window = None
49
- model_info = model_details.get("model_info", {})
50
- if architecture := model_info.get("general.architecture"):
51
- if context_length := model_info.get(f"{architecture}.context_length"):
52
- context_window = int(context_length)
70
+ # Use /api/show to check capabilities, specifically tools support
71
+ details = await self._get_model_details_async(model_name)
72
+ if not details:
73
+ # If details cannot be fetched, skip to avoid tool errors later
74
+ continue
75
+ caps = details.get("capabilities") or []
76
+ if not isinstance(caps, list):
77
+ caps = []
78
+ if "tools" not in [str(c).lower() for c in caps]:
79
+ # Only include models that declare tools support
80
+ continue
53
81
 
82
+ # Derive context window from /api/show model_info if available
83
+ context_window = None
84
+ model_info = details.get("model_info", {}) if isinstance(details, dict) else {}
85
+ architecture = model_info.get("general.architecture") if isinstance(model_info, dict) else None
86
+ if architecture:
87
+ ctx_len = model_info.get(f"{architecture}.context_length")
88
+ if ctx_len is not None:
89
+ try:
90
+ context_window = int(ctx_len)
91
+ except Exception:
92
+ context_window = None
54
93
  if context_window is None:
55
- logger.warning(f"Ollama model {model_name} has no context window, using default {DEFAULT_CONTEXT_WINDOW}")
94
+ logger.warning(f"Ollama model {model_name} has no context window in /api/show, using default {DEFAULT_CONTEXT_WINDOW}")
56
95
  context_window = DEFAULT_CONTEXT_WINDOW
57
96
 
97
+ # === Capability stubs ===
98
+ # Compute support flags from /api/show capabilities. These are not
99
+ # yet plumbed through LLMConfig, but are captured here for later use.
100
+ caps_lower = [str(c).lower() for c in caps]
101
+ supports_tools = "tools" in caps_lower
102
+ supports_thinking = "thinking" in caps_lower
103
+ supports_vision = "vision" in caps_lower
104
+ supports_completion = "completion" in caps_lower
105
+ _ = (supports_tools, supports_thinking, supports_vision, supports_completion)
106
+
58
107
  configs.append(
108
+ # Legacy Ollama using raw generate
109
+ # LLMConfig(
110
+ # model=model_name,
111
+ # model_endpoint_type="ollama",
112
+ # model_endpoint=self.openai_compat_base_url,
113
+ # model_wrapper=self.default_prompt_formatter,
114
+ # context_window=context_window,
115
+ # # Ollama specific
116
+ # handle=self.get_handle(model_name),
117
+ # provider_name=self.name,
118
+ # provider_category=self.provider_category,
119
+ # )
120
+ # New "trust Ollama" version w/ pure OpenAI proxy
59
121
  LLMConfig(
60
122
  model=model_name,
61
- model_endpoint_type=ProviderType.ollama,
62
- model_endpoint=f"{self.base_url}{OLLAMA_API_PREFIX}",
63
- model_wrapper=self.default_prompt_formatter,
123
+ model_endpoint_type="openai",
124
+ model_endpoint=self.openai_compat_base_url,
125
+ # model_wrapper=self.default_prompt_formatter,
64
126
  context_window=context_window,
65
127
  handle=self.get_handle(model_name),
66
128
  provider_name=self.name,
67
129
  provider_category=self.provider_category,
130
+ # put_inner_thoughts_in_kwargs=True,
131
+ # enable_reasoner=supports_thinking,
68
132
  )
69
133
  )
70
134
  return configs
71
135
 
72
136
  async def list_embedding_models_async(self) -> list[EmbeddingConfig]:
73
- """List available embedding models from Ollama
137
+ """List available embedding models from Ollama.
138
+
139
+ We infer embedding support via model_info.*.embedding_length when available.
74
140
 
75
141
  https://github.com/ollama/ollama/blob/main/docs/api.md#list-local-models
76
142
  """
77
- endpoint = f"{self.base_url}/api/tags"
143
+ endpoint = f"{self.raw_base_url}/api/tags"
78
144
  async with aiohttp.ClientSession() as session:
79
145
  async with session.get(endpoint) as response:
80
146
  if response.status != 200:
81
- raise Exception(f"Failed to list Ollama models: {response.text}")
147
+ error_text = await response.text()
148
+ raise Exception(f"Failed to list Ollama models: {response.status} - {error_text}")
82
149
  response_json = await response.json()
83
150
 
84
- configs = []
151
+ configs: list[EmbeddingConfig] = []
85
152
  for model in response_json.get("models", []):
86
153
  model_name = model["name"]
87
154
  model_details = await self._get_model_details_async(model_name)
88
- if not model_details or "embedding" not in model_details.get("capabilities", []):
155
+
156
+ if not model_details:
157
+ continue
158
+
159
+ # Filter to true embedding models via capabilities
160
+ caps = model_details.get("capabilities") or []
161
+ if not isinstance(caps, list):
162
+ caps = []
163
+ if "embedding" not in [str(c).lower() for c in caps]:
89
164
  continue
90
165
 
91
166
  embedding_dim = None
92
167
  model_info = model_details.get("model_info", {})
93
- if architecture := model_info.get("general.architecture"):
94
- if embedding_length := model_info.get(f"{architecture}.embedding_length"):
95
- embedding_dim = int(embedding_length)
168
+ architecture = model_info.get("general.architecture")
169
+ if architecture:
170
+ embedding_length = model_info.get(f"{architecture}.embedding_length")
171
+ if embedding_length is not None:
172
+ try:
173
+ embedding_dim = int(embedding_length)
174
+ except Exception:
175
+ pass
96
176
 
97
177
  if not embedding_dim:
98
- logger.warning(f"Ollama model {model_name} has no embedding dimension, using default {DEFAULT_EMBEDDING_DIM}")
99
- embedding_dim = DEFAULT_EMBEDDING_DIM
178
+ # Skip models without a reported embedding dimension to avoid DB dimension mismatches
179
+ continue
100
180
 
101
181
  configs.append(
102
182
  EmbeddingConfig(
103
183
  embedding_model=model_name,
104
- embedding_endpoint_type=ProviderType.ollama,
105
- embedding_endpoint=f"{self.base_url}{OLLAMA_API_PREFIX}",
184
+ # Use OpenAI-compatible proxy for embeddings
185
+ embedding_endpoint_type=ProviderType.openai,
186
+ embedding_endpoint=self.openai_compat_base_url,
106
187
  embedding_dim=embedding_dim,
107
188
  embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE,
108
189
  handle=self.get_handle(model_name, is_embedding=True),
@@ -112,11 +193,12 @@ class OllamaProvider(OpenAIProvider):
112
193
 
113
194
  async def _get_model_details_async(self, model_name: str) -> dict | None:
114
195
  """Get detailed information for a specific model from /api/show."""
115
- endpoint = f"{self.base_url}/api/show"
196
+ endpoint = f"{self.raw_base_url}/api/show"
116
197
  payload = {"name": model_name}
117
198
 
118
199
  try:
119
- async with aiohttp.ClientSession() as session:
200
+ timeout = aiohttp.ClientTimeout(total=2.0)
201
+ async with aiohttp.ClientSession(timeout=timeout) as session:
120
202
  async with session.post(endpoint, json=payload) as response:
121
203
  if response.status != 200:
122
204
  error_text = await response.text()
@@ -0,0 +1,52 @@
1
+ from typing import Literal
2
+
3
+ from pydantic import Field
4
+
5
+ from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE, LLM_MAX_TOKENS
6
+ from letta.log import get_logger
7
+ from letta.schemas.embedding_config import EmbeddingConfig
8
+ from letta.schemas.enums import ProviderCategory, ProviderType
9
+ from letta.schemas.llm_config import LLMConfig
10
+ from letta.schemas.providers.openai import OpenAIProvider
11
+
12
+ logger = get_logger(__name__)
13
+
14
+ # ALLOWED_PREFIXES = {"gpt-4", "gpt-5", "o1", "o3", "o4"}
15
+ # DISALLOWED_KEYWORDS = {"transcribe", "search", "realtime", "tts", "audio", "computer", "o1-mini", "o1-preview", "o1-pro", "chat"}
16
+ # DEFAULT_EMBEDDING_BATCH_SIZE = 1024
17
+
18
+
19
+ class OpenRouterProvider(OpenAIProvider):
20
+ provider_type: Literal[ProviderType.openai] = Field(ProviderType.openai, description="The type of the provider.")
21
+ provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
22
+ api_key: str = Field(..., description="API key for the OpenRouter API.")
23
+ base_url: str = Field("https://openrouter.ai/api/v1", description="Base URL for the OpenRouter API.")
24
+ handle_base: str | None = Field(None, description="Custom handle base name for model handles (e.g., 'custom' instead of 'openrouter').")
25
+
26
+ def _list_llm_models(self, data: list[dict]) -> list[LLMConfig]:
27
+ """
28
+ This handles filtering out LLM Models by provider that meet Letta's requirements.
29
+ """
30
+ configs = []
31
+ for model in data:
32
+ check = self._do_model_checks_for_name_and_context_size(model)
33
+ if check is None:
34
+ continue
35
+ model_name, context_window_size = check
36
+
37
+ handle = self.get_handle(model_name, base_name=self.handle_base) if self.handle_base else self.get_handle(model_name)
38
+
39
+ config = LLMConfig(
40
+ model=model_name,
41
+ model_endpoint_type="openai",
42
+ model_endpoint=self.base_url,
43
+ context_window=context_window_size,
44
+ handle=handle,
45
+ provider_name=self.name,
46
+ provider_category=self.provider_category,
47
+ )
48
+
49
+ config = self._set_model_parameter_tuned_defaults(model_name, config)
50
+ configs.append(config)
51
+
52
+ return configs
@@ -23,6 +23,7 @@ class VLLMProvider(Provider):
23
23
  default_prompt_formatter: str | None = Field(
24
24
  default=None, description="Default prompt formatter (aka model wrapper) to use on a /completions style API."
25
25
  )
26
+ handle_base: str | None = Field(None, description="Custom handle base name for model handles (e.g., 'custom' instead of 'vllm').")
26
27
 
27
28
  async def list_llm_models_async(self) -> list[LLMConfig]:
28
29
  from letta.llm_api.openai import openai_get_model_list_async
@@ -43,7 +44,7 @@ class VLLMProvider(Provider):
43
44
  model_endpoint=base_url,
44
45
  model_wrapper=self.default_prompt_formatter,
45
46
  context_window=model["max_model_len"],
46
- handle=self.get_handle(model_name),
47
+ handle=self.get_handle(model_name, base_name=self.handle_base) if self.handle_base else self.get_handle(model_name),
47
48
  provider_name=self.name,
48
49
  provider_category=self.provider_category,
49
50
  )
letta/schemas/run.py CHANGED
@@ -1,62 +1,68 @@
1
+ from datetime import datetime
1
2
  from typing import Optional
2
3
 
3
- from pydantic import Field
4
+ from pydantic import ConfigDict, Field
4
5
 
5
- from letta.schemas.enums import JobType
6
- from letta.schemas.job import Job, JobBase, LettaRequestConfig
6
+ from letta.helpers.datetime_helpers import get_utc_time
7
+ from letta.schemas.enums import RunStatus
8
+ from letta.schemas.job import LettaRequestConfig
9
+ from letta.schemas.letta_base import LettaBase
7
10
  from letta.schemas.letta_stop_reason import StopReasonType
8
11
 
9
12
 
10
- class RunBase(JobBase):
11
- """Base class for Run schemas that inherits from JobBase but uses 'run' prefix for IDs"""
12
-
13
+ class RunBase(LettaBase):
13
14
  __id_prefix__ = "run"
14
- job_type: JobType = JobType.RUN
15
15
 
16
16
 
17
17
  class Run(RunBase):
18
18
  """
19
- Representation of a run, which is a job with a 'run' prefix in its ID.
20
- Inherits all fields and behavior from Job except for the ID prefix.
19
+ Representation of a run - a conversation or processing session for an agent.
20
+ Runs track when agents process messages and maintain the relationship between agents, steps, and messages.
21
21
 
22
22
  Parameters:
23
23
  id (str): The unique identifier of the run (prefixed with 'run-').
24
- status (JobStatus): The status of the run.
25
- created_at (datetime): The unix timestamp of when the run was created.
26
- completed_at (datetime): The unix timestamp of when the run was completed.
27
- user_id (str): The unique identifier of the user associated with the run.
24
+ status (JobStatus): The current status of the run.
25
+ created_at (datetime): The timestamp when the run was created.
26
+ completed_at (datetime): The timestamp when the run was completed.
27
+ agent_id (str): The unique identifier of the agent associated with the run.
28
+ stop_reason (StopReasonType): The reason why the run was stopped.
29
+ background (bool): Whether the run was created in background mode.
30
+ metadata (dict): Additional metadata for the run.
31
+ request_config (LettaRequestConfig): The request configuration for the run.
28
32
  """
29
33
 
30
34
  id: str = RunBase.generate_id_field()
31
- user_id: Optional[str] = Field(None, description="The unique identifier of the user associated with the run.")
35
+
36
+ # Core run fields
37
+ status: RunStatus = Field(default=RunStatus.created, description="The current status of the run.")
38
+ created_at: datetime = Field(default_factory=get_utc_time, description="The timestamp when the run was created.")
39
+ completed_at: Optional[datetime] = Field(None, description="The timestamp when the run was completed.")
40
+
41
+ # Agent relationship
42
+ agent_id: str = Field(..., description="The unique identifier of the agent associated with the run.")
43
+
44
+ # Run configuration
45
+ background: Optional[bool] = Field(None, description="Whether the run was created in background mode.")
46
+ metadata: Optional[dict] = Field(None, validation_alias="metadata_", description="Additional metadata for the run.")
32
47
  request_config: Optional[LettaRequestConfig] = Field(None, description="The request configuration for the run.")
33
48
  stop_reason: Optional[StopReasonType] = Field(None, description="The reason why the run was stopped.")
34
49
 
35
- @classmethod
36
- def from_job(cls, job: Job) -> "Run":
37
- """
38
- Convert a Job instance to a Run instance by replacing the ID prefix.
39
- All other fields are copied as-is.
40
-
41
- Args:
42
- job: The Job instance to convert
43
-
44
- Returns:
45
- A new Run instance with the same data but 'run-' prefix in ID
46
- """
47
- # Convert job dict to exclude None values
48
- job_data = job.model_dump(exclude_none=True)
49
-
50
- # Create new Run instance with converted data
51
- return cls(**job_data)
52
-
53
- def to_job(self) -> Job:
54
- """
55
- Convert this Run instance to a Job instance by replacing the ID prefix.
56
- All other fields are copied as-is.
57
-
58
- Returns:
59
- A new Job instance with the same data but 'job-' prefix in ID
60
- """
61
- run_data = self.model_dump(exclude_none=True)
62
- return Job(**run_data)
50
+ # Callback configuration
51
+ callback_url: Optional[str] = Field(None, description="If set, POST to this URL when the run completes.")
52
+ callback_sent_at: Optional[datetime] = Field(None, description="Timestamp when the callback was last attempted.")
53
+ callback_status_code: Optional[int] = Field(None, description="HTTP status code returned by the callback endpoint.")
54
+ callback_error: Optional[str] = Field(None, description="Optional error message from attempting to POST the callback endpoint.")
55
+
56
+ # Timing metrics (in nanoseconds for precision)
57
+ ttft_ns: Optional[int] = Field(None, description="Time to first token for a run in nanoseconds")
58
+ total_duration_ns: Optional[int] = Field(None, description="Total run duration in nanoseconds")
59
+
60
+
61
+ class RunUpdate(RunBase):
62
+ """Update model for Run."""
63
+
64
+ status: Optional[RunStatus] = Field(None, description="The status of the run.")
65
+ completed_at: Optional[datetime] = Field(None, description="The timestamp when the run was completed.")
66
+ stop_reason: Optional[StopReasonType] = Field(None, description="The reason why the run was stopped.")
67
+ metadata: Optional[dict] = Field(None, validation_alias="metadata_", description="Additional metadata for the run.")
68
+ model_config = ConfigDict(extra="ignore") # Ignores extra fields
letta/schemas/step.py CHANGED
@@ -18,8 +18,8 @@ class Step(StepBase):
18
18
  origin: Optional[str] = Field(None, description="The surface that this agent step was initiated from.")
19
19
  organization_id: Optional[str] = Field(None, description="The unique identifier of the organization associated with the step.")
20
20
  provider_id: Optional[str] = Field(None, description="The unique identifier of the provider that was configured for this step")
21
- job_id: Optional[str] = Field(
22
- None, description="The unique identifier of the job that this step belongs to. Only included for async calls."
21
+ run_id: Optional[str] = Field(
22
+ None, description="The unique identifier of the run that this step belongs to. Only included for async calls."
23
23
  )
24
24
  agent_id: Optional[str] = Field(None, description="The ID of the agent that performed the step.")
25
25
  provider_name: Optional[str] = Field(None, description="The name of the provider used for this step.")
@@ -13,7 +13,7 @@ class StepMetrics(StepMetricsBase):
13
13
  id: str = Field(..., description="The id of the step this metric belongs to (matches steps.id).")
14
14
  organization_id: Optional[str] = Field(None, description="The unique identifier of the organization.")
15
15
  provider_id: Optional[str] = Field(None, description="The unique identifier of the provider.")
16
- job_id: Optional[str] = Field(None, description="The unique identifier of the job.")
16
+ run_id: Optional[str] = Field(None, description="The unique identifier of the run.")
17
17
  agent_id: Optional[str] = Field(None, description="The unique identifier of the agent.")
18
18
  step_start_ns: Optional[int] = Field(None, description="The timestamp of the start of the step in nanoseconds.")
19
19
  llm_request_start_ns: Optional[int] = Field(None, description="The timestamp of the start of the llm request in nanoseconds.")