letta-nightly 0.11.0.dev20250807104511__py3-none-any.whl → 0.11.0.dev20250808104456__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. letta/agent.py +2 -1
  2. letta/agents/letta_agent.py +215 -143
  3. letta/constants.py +4 -1
  4. letta/embeddings.py +6 -5
  5. letta/functions/function_sets/base.py +2 -2
  6. letta/functions/function_sets/files.py +22 -9
  7. letta/interfaces/anthropic_streaming_interface.py +291 -265
  8. letta/interfaces/openai_streaming_interface.py +270 -250
  9. letta/llm_api/anthropic.py +3 -10
  10. letta/llm_api/openai_client.py +6 -1
  11. letta/orm/__init__.py +1 -0
  12. letta/orm/step.py +14 -0
  13. letta/orm/step_metrics.py +71 -0
  14. letta/schemas/enums.py +9 -0
  15. letta/schemas/llm_config.py +8 -6
  16. letta/schemas/providers/lmstudio.py +2 -2
  17. letta/schemas/providers/ollama.py +42 -54
  18. letta/schemas/providers/openai.py +1 -1
  19. letta/schemas/step.py +6 -0
  20. letta/schemas/step_metrics.py +23 -0
  21. letta/schemas/tool_rule.py +10 -29
  22. letta/services/step_manager.py +179 -1
  23. letta/services/tool_executor/builtin_tool_executor.py +4 -1
  24. letta/services/tool_executor/core_tool_executor.py +2 -10
  25. letta/services/tool_executor/files_tool_executor.py +89 -40
  26. {letta_nightly-0.11.0.dev20250807104511.dist-info → letta_nightly-0.11.0.dev20250808104456.dist-info}/METADATA +1 -1
  27. {letta_nightly-0.11.0.dev20250807104511.dist-info → letta_nightly-0.11.0.dev20250808104456.dist-info}/RECORD +30 -28
  28. {letta_nightly-0.11.0.dev20250807104511.dist-info → letta_nightly-0.11.0.dev20250808104456.dist-info}/LICENSE +0 -0
  29. {letta_nightly-0.11.0.dev20250807104511.dist-info → letta_nightly-0.11.0.dev20250808104456.dist-info}/WHEEL +0 -0
  30. {letta_nightly-0.11.0.dev20250807104511.dist-info → letta_nightly-0.11.0.dev20250808104456.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,71 @@
1
+ from typing import TYPE_CHECKING, Optional
2
+
3
+ from sqlalchemy import BigInteger, ForeignKey, String
4
+ from sqlalchemy.orm import Mapped, mapped_column, relationship
5
+
6
+ from letta.orm.mixins import AgentMixin, ProjectMixin
7
+ from letta.orm.sqlalchemy_base import SqlalchemyBase
8
+ from letta.schemas.step_metrics import StepMetrics as PydanticStepMetrics
9
+
10
+ if TYPE_CHECKING:
11
+ from letta.orm.agent import Agent
12
+ from letta.orm.job import Job
13
+ from letta.orm.step import Step
14
+
15
+
16
+ class StepMetrics(SqlalchemyBase, ProjectMixin, AgentMixin):
17
+ """Tracks performance metrics for agent steps."""
18
+
19
+ __tablename__ = "step_metrics"
20
+ __pydantic_model__ = PydanticStepMetrics
21
+
22
+ id: Mapped[str] = mapped_column(
23
+ ForeignKey("steps.id", ondelete="CASCADE"),
24
+ primary_key=True,
25
+ doc="The unique identifier of the step this metric belongs to (also serves as PK)",
26
+ )
27
+ organization_id: Mapped[str] = mapped_column(
28
+ ForeignKey("organizations.id", ondelete="RESTRICT"),
29
+ nullable=True,
30
+ doc="The unique identifier of the organization",
31
+ )
32
+ provider_id: Mapped[Optional[str]] = mapped_column(
33
+ ForeignKey("providers.id", ondelete="RESTRICT"),
34
+ nullable=True,
35
+ doc="The unique identifier of the provider",
36
+ )
37
+ job_id: Mapped[Optional[str]] = mapped_column(
38
+ ForeignKey("jobs.id", ondelete="SET NULL"),
39
+ nullable=True,
40
+ doc="The unique identifier of the job",
41
+ )
42
+ llm_request_ns: Mapped[Optional[int]] = mapped_column(
43
+ BigInteger,
44
+ nullable=True,
45
+ doc="Time spent on the LLM request in nanoseconds",
46
+ )
47
+ tool_execution_ns: Mapped[Optional[int]] = mapped_column(
48
+ BigInteger,
49
+ nullable=True,
50
+ doc="Time spent on tool execution in nanoseconds",
51
+ )
52
+ step_ns: Mapped[Optional[int]] = mapped_column(
53
+ BigInteger,
54
+ nullable=True,
55
+ doc="Total time for the step in nanoseconds",
56
+ )
57
+ base_template_id: Mapped[Optional[str]] = mapped_column(
58
+ String,
59
+ nullable=True,
60
+ doc="The base template ID for the step",
61
+ )
62
+ template_id: Mapped[Optional[str]] = mapped_column(
63
+ String,
64
+ nullable=True,
65
+ doc="The template ID for the step",
66
+ )
67
+
68
+ # Relationships (foreign keys)
69
+ step: Mapped["Step"] = relationship("Step", back_populates="metrics", uselist=False)
70
+ job: Mapped[Optional["Job"]] = relationship("Job")
71
+ agent: Mapped[Optional["Agent"]] = relationship("Agent")
letta/schemas/enums.py CHANGED
@@ -160,3 +160,12 @@ class SandboxType(str, Enum):
160
160
  E2B = "e2b"
161
161
  MODAL = "modal"
162
162
  LOCAL = "local"
163
+
164
+
165
+ class StepStatus(str, Enum):
166
+ """Status of a step execution"""
167
+
168
+ PENDING = "pending"
169
+ SUCCESS = "success"
170
+ FAILED = "failed"
171
+ CANCELLED = "cancelled"
@@ -58,7 +58,7 @@ class LLMConfig(BaseModel):
58
58
  enable_reasoner: bool = Field(
59
59
  False, description="Whether or not the model should use extended thinking if it is a 'reasoning' style model"
60
60
  )
61
- reasoning_effort: Optional[Literal["low", "medium", "high"]] = Field(
61
+ reasoning_effort: Optional[Literal["minimal", "low", "medium", "high"]] = Field(
62
62
  None,
63
63
  description="The reasoning effort to use when generating text reasoning models",
64
64
  )
@@ -188,6 +188,8 @@ class LLMConfig(BaseModel):
188
188
  @classmethod
189
189
  def apply_reasoning_setting_to_config(cls, config: "LLMConfig", reasoning: bool):
190
190
  if reasoning:
191
+ config.enable_reasoner = True
192
+
191
193
  if (
192
194
  config.model_endpoint_type == "anthropic"
193
195
  and ("claude-opus-4" in config.model or "claude-sonnet-4" in config.model or "claude-3-7-sonnet" in config.model)
@@ -195,19 +197,19 @@ class LLMConfig(BaseModel):
195
197
  config.model_endpoint_type == "google_vertex" and ("gemini-2.5-flash" in config.model or "gemini-2.0-pro" in config.model)
196
198
  ):
197
199
  config.put_inner_thoughts_in_kwargs = False
198
- config.enable_reasoner = True
199
200
  if config.max_reasoning_tokens == 0:
200
201
  config.max_reasoning_tokens = 1024
201
202
  elif config.model_endpoint_type == "openai" and (
202
203
  config.model.startswith("o1") or config.model.startswith("o3") or config.model.startswith("o4")
203
204
  ):
204
- config.put_inner_thoughts_in_kwargs = True
205
- config.enable_reasoner = True
205
+ config.put_inner_thoughts_in_kwargs = False
206
206
  if config.reasoning_effort is None:
207
207
  config.reasoning_effort = "medium"
208
208
  else:
209
209
  config.put_inner_thoughts_in_kwargs = True
210
- config.enable_reasoner = False
210
+
211
211
  else:
212
- config.put_inner_thoughts_in_kwargs = False
213
212
  config.enable_reasoner = False
213
+ config.put_inner_thoughts_in_kwargs = False
214
+
215
+ return config
@@ -55,7 +55,7 @@ class LMStudioOpenAIProvider(OpenAIProvider):
55
55
  LLMConfig(
56
56
  model=model_name,
57
57
  model_endpoint_type="openai",
58
- model_endpoint=self.base_url,
58
+ model_endpoint=self.model_endpoint_url,
59
59
  context_window=context_window_size,
60
60
  handle=self.get_handle(model_name),
61
61
  compatibility_type=compatibility_type,
@@ -94,7 +94,7 @@ class LMStudioOpenAIProvider(OpenAIProvider):
94
94
  EmbeddingConfig(
95
95
  embedding_model=model_name,
96
96
  embedding_endpoint_type="openai",
97
- embedding_endpoint=self.base_url,
97
+ embedding_endpoint=self.model_endpoint_url,
98
98
  embedding_dim=768, # Default embedding dimension, not context window
99
99
  embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE, # NOTE: max is 2048
100
100
  handle=self.get_handle(model_name),
@@ -3,7 +3,7 @@ from typing import Literal
3
3
  import aiohttp
4
4
  from pydantic import Field
5
5
 
6
- from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE
6
+ from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE, DEFAULT_CONTEXT_WINDOW, DEFAULT_EMBEDDING_DIM, OLLAMA_API_PREFIX
7
7
  from letta.log import get_logger
8
8
  from letta.schemas.embedding_config import EmbeddingConfig
9
9
  from letta.schemas.enums import ProviderCategory, ProviderType
@@ -12,8 +12,6 @@ from letta.schemas.providers.openai import OpenAIProvider
12
12
 
13
13
  logger = get_logger(__name__)
14
14
 
15
- ollama_prefix = "/v1"
16
-
17
15
 
18
16
  class OllamaProvider(OpenAIProvider):
19
17
  """Ollama provider that uses the native /api/generate endpoint
@@ -41,19 +39,30 @@ class OllamaProvider(OpenAIProvider):
41
39
  response_json = await response.json()
42
40
 
43
41
  configs = []
44
- for model in response_json["models"]:
45
- context_window = await self._get_model_context_window(model["name"])
42
+ for model in response_json.get("models", []):
43
+ model_name = model["name"]
44
+ model_details = await self._get_model_details_async(model_name)
45
+ if not model_details or "completion" not in model_details.get("capabilities", []):
46
+ continue
47
+
48
+ context_window = None
49
+ model_info = model_details.get("model_info", {})
50
+ if architecture := model_info.get("general.architecture"):
51
+ if context_length := model_info.get(f"{architecture}.context_length"):
52
+ context_window = int(context_length)
53
+
46
54
  if context_window is None:
47
- print(f"Ollama model {model['name']} has no context window, using default 32000")
48
- context_window = 32000
55
+ logger.warning(f"Ollama model {model_name} has no context window, using default {DEFAULT_CONTEXT_WINDOW}")
56
+ context_window = DEFAULT_CONTEXT_WINDOW
57
+
49
58
  configs.append(
50
59
  LLMConfig(
51
- model=model["name"],
60
+ model=model_name,
52
61
  model_endpoint_type=ProviderType.ollama,
53
- model_endpoint=f"{self.base_url}{ollama_prefix}",
62
+ model_endpoint=f"{self.base_url}{OLLAMA_API_PREFIX}",
54
63
  model_wrapper=self.default_prompt_formatter,
55
64
  context_window=context_window,
56
- handle=self.get_handle(model["name"]),
65
+ handle=self.get_handle(model_name),
57
66
  provider_name=self.name,
58
67
  provider_category=self.provider_category,
59
68
  )
@@ -73,25 +82,36 @@ class OllamaProvider(OpenAIProvider):
73
82
  response_json = await response.json()
74
83
 
75
84
  configs = []
76
- for model in response_json["models"]:
77
- embedding_dim = await self._get_model_embedding_dim(model["name"])
85
+ for model in response_json.get("models", []):
86
+ model_name = model["name"]
87
+ model_details = await self._get_model_details_async(model_name)
88
+ if not model_details or "embedding" not in model_details.get("capabilities", []):
89
+ continue
90
+
91
+ embedding_dim = None
92
+ model_info = model_details.get("model_info", {})
93
+ if architecture := model_info.get("general.architecture"):
94
+ if embedding_length := model_info.get(f"{architecture}.embedding_length"):
95
+ embedding_dim = int(embedding_length)
96
+
78
97
  if not embedding_dim:
79
- print(f"Ollama model {model['name']} has no embedding dimension, using default 1024")
80
- # continue
81
- embedding_dim = 1024
98
+ logger.warning(f"Ollama model {model_name} has no embedding dimension, using default {DEFAULT_EMBEDDING_DIM}")
99
+ embedding_dim = DEFAULT_EMBEDDING_DIM
100
+
82
101
  configs.append(
83
102
  EmbeddingConfig(
84
- embedding_model=model["name"],
103
+ embedding_model=model_name,
85
104
  embedding_endpoint_type=ProviderType.ollama,
86
- embedding_endpoint=f"{self.base_url}{ollama_prefix}",
105
+ embedding_endpoint=f"{self.base_url}{OLLAMA_API_PREFIX}",
87
106
  embedding_dim=embedding_dim,
88
107
  embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE,
89
- handle=self.get_handle(model["name"], is_embedding=True),
108
+ handle=self.get_handle(model_name, is_embedding=True),
90
109
  )
91
110
  )
92
111
  return configs
93
112
 
94
- async def _get_model_context_window(self, model_name: str) -> int | None:
113
+ async def _get_model_details_async(self, model_name: str) -> dict | None:
114
+ """Get detailed information for a specific model from /api/show."""
95
115
  endpoint = f"{self.base_url}/api/show"
96
116
  payload = {"name": model_name}
97
117
 
@@ -102,39 +122,7 @@ class OllamaProvider(OpenAIProvider):
102
122
  error_text = await response.text()
103
123
  logger.warning(f"Failed to get model info for {model_name}: {response.status} - {error_text}")
104
124
  return None
105
-
106
- response_json = await response.json()
107
- model_info = response_json.get("model_info", {})
108
-
109
- if architecture := model_info.get("general.architecture"):
110
- if context_length := model_info.get(f"{architecture}.context_length"):
111
- return int(context_length)
112
-
125
+ return await response.json()
113
126
  except Exception as e:
114
- logger.warning(f"Failed to get model context window for {model_name} with error: {e}")
115
-
116
- return None
117
-
118
- async def _get_model_embedding_dim(self, model_name: str) -> int | None:
119
- endpoint = f"{self.base_url}/api/show"
120
- payload = {"name": model_name}
121
-
122
- try:
123
- async with aiohttp.ClientSession() as session:
124
- async with session.post(endpoint, json=payload) as response:
125
- if response.status != 200:
126
- error_text = await response.text()
127
- logger.warning(f"Failed to get model info for {model_name}: {response.status} - {error_text}")
128
- return None
129
-
130
- response_json = await response.json()
131
- model_info = response_json.get("model_info", {})
132
-
133
- if architecture := model_info.get("general.architecture"):
134
- if embedding_length := model_info.get(f"{architecture}.embedding_length"):
135
- return int(embedding_length)
136
-
137
- except Exception as e:
138
- logger.warning(f"Failed to get model embedding dimension for {model_name} with error: {e}")
139
-
140
- return None
127
+ logger.warning(f"Failed to get model details for {model_name} with error: {e}")
128
+ return None
@@ -11,7 +11,7 @@ from letta.schemas.providers.base import Provider
11
11
 
12
12
  logger = get_logger(__name__)
13
13
 
14
- ALLOWED_PREFIXES = {"gpt-4", "o1", "o3", "o4"}
14
+ ALLOWED_PREFIXES = {"gpt-4", "gpt-5", "o1", "o3", "o4"}
15
15
  DISALLOWED_KEYWORDS = {"transcribe", "search", "realtime", "tts", "audio", "computer", "o1-mini", "o1-preview", "o1-pro"}
16
16
  DEFAULT_EMBEDDING_BATCH_SIZE = 1024
17
17
 
letta/schemas/step.py CHANGED
@@ -3,6 +3,7 @@ from typing import Dict, List, Literal, Optional
3
3
 
4
4
  from pydantic import Field
5
5
 
6
+ from letta.schemas.enums import StepStatus
6
7
  from letta.schemas.letta_base import LettaBase
7
8
  from letta.schemas.letta_stop_reason import StopReasonType
8
9
  from letta.schemas.message import Message
@@ -40,6 +41,11 @@ class Step(StepBase):
40
41
  )
41
42
  project_id: Optional[str] = Field(None, description="The project that the agent that executed this step belongs to (cloud only).")
42
43
 
44
+ # error tracking fields
45
+ error_type: Optional[str] = Field(None, description="The type/class of the error that occurred")
46
+ error_data: Optional[Dict] = Field(None, description="Error details including message, traceback, and additional context")
47
+ status: Optional[StepStatus] = Field(StepStatus.PENDING, description="Step status: pending, success, or failed")
48
+
43
49
 
44
50
  class StepProgression(int, Enum):
45
51
  START = auto()
@@ -0,0 +1,23 @@
1
+ from typing import Optional
2
+
3
+ from pydantic import Field
4
+
5
+ from letta.schemas.letta_base import LettaBase
6
+
7
+
8
+ class StepMetricsBase(LettaBase):
9
+ __id_prefix__ = "step"
10
+
11
+
12
+ class StepMetrics(StepMetricsBase):
13
+ id: str = Field(..., description="The id of the step this metric belongs to (matches steps.id).")
14
+ organization_id: Optional[str] = Field(None, description="The unique identifier of the organization.")
15
+ provider_id: Optional[str] = Field(None, description="The unique identifier of the provider.")
16
+ job_id: Optional[str] = Field(None, description="The unique identifier of the job.")
17
+ agent_id: Optional[str] = Field(None, description="The unique identifier of the agent.")
18
+ llm_request_ns: Optional[int] = Field(None, description="Time spent on LLM requests in nanoseconds.")
19
+ tool_execution_ns: Optional[int] = Field(None, description="Time spent on tool execution in nanoseconds.")
20
+ step_ns: Optional[int] = Field(None, description="Total time for the step in nanoseconds.")
21
+ base_template_id: Optional[str] = Field(None, description="The base template ID that the step belongs to (cloud only).")
22
+ template_id: Optional[str] = Field(None, description="The template ID that the step belongs to (cloud only).")
23
+ project_id: Optional[str] = Field(None, description="The project that the step belongs to (cloud only).")
@@ -23,26 +23,24 @@ class BaseToolRule(LettaBase):
23
23
  def get_valid_tools(self, tool_call_history: List[str], available_tools: Set[str], last_function_response: Optional[str]) -> set[str]:
24
24
  raise NotImplementedError
25
25
 
26
- def render_prompt(self) -> Optional[str]:
26
+ def render_prompt(self) -> str | None:
27
27
  """Render the prompt template with this rule's attributes."""
28
- template_to_use = self.prompt_template or self._get_default_template()
29
- if not template_to_use:
28
+ if not self.prompt_template:
30
29
  return None
31
30
 
32
31
  try:
33
- template = Template(template_to_use)
32
+ template = Template(self.prompt_template)
34
33
  return template.render(**self.model_dump())
35
34
  except Exception as e:
36
35
  logger.warning(
37
- f"Failed to render prompt template for tool rule '{self.tool_name}' (type: {self.type}). "
38
- f"Template: '{template_to_use}'. Error: {e}"
36
+ "Failed to render prompt template for tool rule '%s' (type: %s). Template: '%s'. Error: %s",
37
+ self.tool_name,
38
+ self.type,
39
+ self.prompt_template,
40
+ e,
39
41
  )
40
42
  return None
41
43
 
42
- def _get_default_template(self) -> Optional[str]:
43
- """Get the default template for this rule type. Override in subclasses."""
44
- return None
45
-
46
44
 
47
45
  class ChildToolRule(BaseToolRule):
48
46
  """
@@ -60,9 +58,6 @@ class ChildToolRule(BaseToolRule):
60
58
  last_tool = tool_call_history[-1] if tool_call_history else None
61
59
  return set(self.children) if last_tool == self.tool_name else available_tools
62
60
 
63
- def _get_default_template(self) -> Optional[str]:
64
- return "<tool_rule>\nAfter using {{ tool_name }}, you must use one of these tools: {{ children | join(', ') }}\n</tool_rule>"
65
-
66
61
 
67
62
  class ParentToolRule(BaseToolRule):
68
63
  """
@@ -80,9 +75,6 @@ class ParentToolRule(BaseToolRule):
80
75
  last_tool = tool_call_history[-1] if tool_call_history else None
81
76
  return set(self.children) if last_tool == self.tool_name else available_tools - set(self.children)
82
77
 
83
- def _get_default_template(self) -> Optional[str]:
84
- return "<tool_rule>\n{{ children | join(', ') }} can only be used after {{ tool_name }}\n</tool_rule>"
85
-
86
78
 
87
79
  class ConditionalToolRule(BaseToolRule):
88
80
  """
@@ -125,7 +117,8 @@ class ConditionalToolRule(BaseToolRule):
125
117
 
126
118
  return {self.default_child} if self.default_child else available_tools
127
119
 
128
- def _matches_key(self, function_output: str, key: Any) -> bool:
120
+ @staticmethod
121
+ def _matches_key(function_output: str, key: Any) -> bool:
129
122
  """Helper function to determine if function output matches a mapping key."""
130
123
  if isinstance(key, bool):
131
124
  return function_output.lower() == "true" if key else function_output.lower() == "false"
@@ -142,9 +135,6 @@ class ConditionalToolRule(BaseToolRule):
142
135
  else: # Assume string
143
136
  return str(function_output) == str(key)
144
137
 
145
- def _get_default_template(self) -> Optional[str]:
146
- return "<tool_rule>\n{{ tool_name }} will determine which tool to use next based on its output\n</tool_rule>"
147
-
148
138
 
149
139
  class InitToolRule(BaseToolRule):
150
140
  """
@@ -165,9 +155,6 @@ class TerminalToolRule(BaseToolRule):
165
155
  description="Optional Jinja2 template for generating agent prompt about this tool rule.",
166
156
  )
167
157
 
168
- def _get_default_template(self) -> Optional[str]:
169
- return "<tool_rule>\n{{ tool_name }} ends your response (yields control) when called\n</tool_rule>"
170
-
171
158
 
172
159
  class ContinueToolRule(BaseToolRule):
173
160
  """
@@ -196,9 +183,6 @@ class RequiredBeforeExitToolRule(BaseToolRule):
196
183
  """Returns all available tools - the logic for preventing exit is handled elsewhere."""
197
184
  return available_tools
198
185
 
199
- def _get_default_template(self) -> Optional[str]:
200
- return "<tool_rule>{{ tool_name }} must be called before ending the conversation</tool_rule>"
201
-
202
186
 
203
187
  class MaxCountPerStepToolRule(BaseToolRule):
204
188
  """
@@ -222,9 +206,6 @@ class MaxCountPerStepToolRule(BaseToolRule):
222
206
 
223
207
  return available_tools
224
208
 
225
- def _get_default_template(self) -> Optional[str]:
226
- return "<tool_rule>\n{{ tool_name }}: at most {{ max_count_limit }} use(s) per response\n</tool_rule>"
227
-
228
209
 
229
210
  ToolRule = Annotated[
230
211
  Union[