PyPI - letta-nightly - Versions diffs - 0.11.7.dev20251006104136__py3-none-any.whl → 0.11.7.dev20251008104128__py3-none-any.whl - Mend

letta-nightly 0.11.7.dev20251006104136py3-none-any.whl → 0.11.7.dev20251008104128py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (145) hide show

letta/adapters/letta_llm_adapter.py +1 -0
letta/adapters/letta_llm_request_adapter.py +0 -1
letta/adapters/letta_llm_stream_adapter.py +7 -2
letta/adapters/simple_llm_request_adapter.py +88 -0
letta/adapters/simple_llm_stream_adapter.py +192 -0
letta/agents/agent_loop.py +6 -0
letta/agents/ephemeral_summary_agent.py +2 -1
letta/agents/helpers.py +142 -6
letta/agents/letta_agent.py +13 -33
letta/agents/letta_agent_batch.py +2 -4
letta/agents/letta_agent_v2.py +87 -77
letta/agents/letta_agent_v3.py +899 -0
letta/agents/voice_agent.py +2 -6
letta/constants.py +8 -4
letta/errors.py +40 -0
letta/functions/function_sets/base.py +84 -4
letta/functions/function_sets/multi_agent.py +0 -3
letta/functions/schema_generator.py +113 -71
letta/groups/dynamic_multi_agent.py +3 -2
letta/groups/helpers.py +1 -2
letta/groups/round_robin_multi_agent.py +3 -2
letta/groups/sleeptime_multi_agent.py +3 -2
letta/groups/sleeptime_multi_agent_v2.py +1 -1
letta/groups/sleeptime_multi_agent_v3.py +17 -17
letta/groups/supervisor_multi_agent.py +84 -80
letta/helpers/converters.py +3 -0
letta/helpers/message_helper.py +4 -0
letta/helpers/tool_rule_solver.py +92 -5
letta/interfaces/anthropic_streaming_interface.py +409 -0
letta/interfaces/gemini_streaming_interface.py +296 -0
letta/interfaces/openai_streaming_interface.py +752 -1
letta/llm_api/anthropic_client.py +126 -16
letta/llm_api/bedrock_client.py +4 -2
letta/llm_api/deepseek_client.py +4 -1
letta/llm_api/google_vertex_client.py +123 -42
letta/llm_api/groq_client.py +4 -1
letta/llm_api/llm_api_tools.py +11 -4
letta/llm_api/llm_client_base.py +6 -2
letta/llm_api/openai.py +32 -2
letta/llm_api/openai_client.py +423 -18
letta/llm_api/xai_client.py +4 -1
letta/main.py +9 -5
letta/memory.py +1 -0
letta/orm/__init__.py +1 -1
letta/orm/agent.py +10 -0
letta/orm/block.py +7 -16
letta/orm/blocks_agents.py +8 -2
letta/orm/files_agents.py +2 -0
letta/orm/job.py +7 -5
letta/orm/mcp_oauth.py +1 -0
letta/orm/message.py +21 -6
letta/orm/organization.py +2 -0
letta/orm/provider.py +6 -2
letta/orm/run.py +71 -0
letta/orm/sandbox_config.py +7 -1
letta/orm/sqlalchemy_base.py +0 -306
letta/orm/step.py +6 -5
letta/orm/step_metrics.py +5 -5
letta/otel/tracing.py +28 -3
letta/plugins/defaults.py +4 -4
letta/prompts/system_prompts/__init__.py +2 -0
letta/prompts/system_prompts/letta_v1.py +25 -0
letta/schemas/agent.py +3 -2
letta/schemas/agent_file.py +9 -3
letta/schemas/block.py +23 -10
letta/schemas/enums.py +21 -2
letta/schemas/job.py +17 -4
letta/schemas/letta_message_content.py +71 -2
letta/schemas/letta_stop_reason.py +5 -5
letta/schemas/llm_config.py +53 -3
letta/schemas/memory.py +1 -1
letta/schemas/message.py +504 -117
letta/schemas/openai/responses_request.py +64 -0
letta/schemas/providers/__init__.py +2 -0
letta/schemas/providers/anthropic.py +16 -0
letta/schemas/providers/ollama.py +115 -33
letta/schemas/providers/openrouter.py +52 -0
letta/schemas/providers/vllm.py +2 -1
letta/schemas/run.py +48 -42
letta/schemas/step.py +2 -2
letta/schemas/step_metrics.py +1 -1
letta/schemas/tool.py +15 -107
letta/schemas/tool_rule.py +88 -5
letta/serialize_schemas/marshmallow_agent.py +1 -0
letta/server/db.py +86 -408
letta/server/rest_api/app.py +61 -10
letta/server/rest_api/dependencies.py +14 -0
letta/server/rest_api/redis_stream_manager.py +19 -8
letta/server/rest_api/routers/v1/agents.py +364 -292
letta/server/rest_api/routers/v1/blocks.py +14 -20
letta/server/rest_api/routers/v1/identities.py +45 -110
letta/server/rest_api/routers/v1/internal_templates.py +21 -0
letta/server/rest_api/routers/v1/jobs.py +23 -6
letta/server/rest_api/routers/v1/messages.py +1 -1
letta/server/rest_api/routers/v1/runs.py +126 -85
letta/server/rest_api/routers/v1/sandbox_configs.py +10 -19
letta/server/rest_api/routers/v1/tools.py +281 -594
letta/server/rest_api/routers/v1/voice.py +1 -1
letta/server/rest_api/streaming_response.py +29 -29
letta/server/rest_api/utils.py +122 -64
letta/server/server.py +160 -887
letta/services/agent_manager.py +236 -919
letta/services/agent_serialization_manager.py +16 -0
letta/services/archive_manager.py +0 -100
letta/services/block_manager.py +211 -168
letta/services/file_manager.py +1 -1
letta/services/files_agents_manager.py +24 -33
letta/services/group_manager.py +0 -142
letta/services/helpers/agent_manager_helper.py +7 -2
letta/services/helpers/run_manager_helper.py +85 -0
letta/services/job_manager.py +96 -411
letta/services/lettuce/__init__.py +6 -0
letta/services/lettuce/lettuce_client_base.py +86 -0
letta/services/mcp_manager.py +38 -6
letta/services/message_manager.py +165 -362
letta/services/organization_manager.py +0 -36
letta/services/passage_manager.py +0 -345
letta/services/provider_manager.py +0 -80
letta/services/run_manager.py +301 -0
letta/services/sandbox_config_manager.py +0 -234
letta/services/step_manager.py +62 -39
letta/services/summarizer/summarizer.py +9 -7
letta/services/telemetry_manager.py +0 -16
letta/services/tool_executor/builtin_tool_executor.py +35 -0
letta/services/tool_executor/core_tool_executor.py +397 -2
letta/services/tool_executor/files_tool_executor.py +3 -3
letta/services/tool_executor/multi_agent_tool_executor.py +30 -15
letta/services/tool_executor/tool_execution_manager.py +6 -8
letta/services/tool_executor/tool_executor_base.py +3 -3
letta/services/tool_manager.py +85 -339
letta/services/tool_sandbox/base.py +24 -13
letta/services/tool_sandbox/e2b_sandbox.py +16 -1
letta/services/tool_schema_generator.py +123 -0
letta/services/user_manager.py +0 -99
letta/settings.py +20 -4
{letta_nightly-0.11.7.dev20251006104136.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/METADATA +3 -5
{letta_nightly-0.11.7.dev20251006104136.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/RECORD +140 -132
letta/agents/temporal/activities/__init__.py +0 -4
letta/agents/temporal/activities/example_activity.py +0 -7
letta/agents/temporal/activities/prepare_messages.py +0 -10
letta/agents/temporal/temporal_agent_workflow.py +0 -56
letta/agents/temporal/types.py +0 -25
{letta_nightly-0.11.7.dev20251006104136.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/WHEEL +0 -0
{letta_nightly-0.11.7.dev20251006104136.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/entry_points.txt +0 -0
{letta_nightly-0.11.7.dev20251006104136.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/licenses/LICENSE +0 -0

letta/schemas/openai/responses_request.py ADDED Viewed

@@ -0,0 +1,64 @@
+from typing import Any, Dict, Iterable, List, Literal, Optional, Union
+from openai import NOT_GIVEN
+from openai.types import Metadata, Reasoning, ResponsesModel
+# from openai._types import Headers, Query, Body
+from openai.types.responses import (
+    ResponseIncludable,
+    ResponseInputParam,
+    ResponsePromptParam,
+    ResponseTextConfigParam,
+    ToolParam,
+    response_create_params,
+)
+# import httpx
+from pydantic import BaseModel, Field
+class ResponsesRequest(BaseModel):
+    background: Optional[bool] = Field(default=NOT_GIVEN)
+    include: Optional[List[ResponseIncludable]] = Field(default=NOT_GIVEN)
+    input: Optional[Union[str, ResponseInputParam]] = Field(default=NOT_GIVEN)
+    instructions: Optional[str] = Field(default=NOT_GIVEN)
+    max_output_tokens: Optional[int] = Field(default=NOT_GIVEN)
+    max_tool_calls: Optional[int] = Field(default=NOT_GIVEN)
+    metadata: Optional[Metadata] = Field(default=NOT_GIVEN)
+    model: Optional[ResponsesModel] = Field(default=NOT_GIVEN)
+    parallel_tool_calls: Optional[bool] = Field(default=NOT_GIVEN)
+    previous_response_id: Optional[str] = Field(default=NOT_GIVEN)
+    prompt: Optional[ResponsePromptParam] = Field(default=NOT_GIVEN)
+    prompt_cache_key: Optional[str] = Field(default=NOT_GIVEN)
+    reasoning: Optional[Reasoning] = Field(default=NOT_GIVEN)
+    safety_identifier: Optional[str] = Field(default=NOT_GIVEN)
+    service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] = Field(default=NOT_GIVEN)
+    store: Optional[bool] = Field(default=NOT_GIVEN)
+    stream: Optional[Literal[False]] = Field(default=NOT_GIVEN)
+    stream_options: Optional[response_create_params.StreamOptions] = Field(default=NOT_GIVEN)
+    temperature: Optional[float] = Field(default=NOT_GIVEN)
+    text: Optional[ResponseTextConfigParam] = Field(default=NOT_GIVEN)
+    tool_choice: Optional[response_create_params.ToolChoice] = Field(default=NOT_GIVEN)
+    tools: Optional[Iterable[ToolParam]] = Field(default=NOT_GIVEN)
+    top_logprobs: Optional[int] = Field(default=NOT_GIVEN)
+    top_p: Optional[float] = Field(default=NOT_GIVEN)
+    truncation: Optional[Literal["auto", "disabled"]] = Field(default=NOT_GIVEN)
+    user: Optional[str] = Field(default=NOT_GIVEN)
+    # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+    # The extra values given here take precedence over values defined on the client or passed to this method.
+    # extra_headers: Headers | None = (None,)
+    # extra_query: Query | None = (None,)
+    # extra_body: Body | None = (None,)
+    # timeout: float | httpx.Timeout | None | NotGiven = (NOT_GIVEN,)
+    def model_dump(self, **kwargs) -> Dict[str, Any]:
+        """Custom model_dump that properly serializes complex OpenAI types for JSON compatibility."""
+        # Force JSON mode to ensure full serialization of complex OpenAI types
+        # This prevents SerializationIterator objects from being created
+        kwargs["mode"] = "json"
+        # Get the JSON-serialized dump
+        data = super().model_dump(**kwargs)
+        # The API expects dicts, which JSON mode provides
+        return data

letta/schemas/providers/__init__.py CHANGED Viewed

@@ -14,6 +14,7 @@ from .lmstudio import LMStudioOpenAIProvider
 from .mistral import MistralProvider
 from .ollama import OllamaProvider
 from .openai import OpenAIProvider
+from .openrouter import OpenRouterProvider
 from .together import TogetherProvider
 from .vllm import VLLMProvider
 from .xai import XAIProvider
@@ -42,4 +43,5 @@ __all__ = [
     "TogetherProvider",
     "VLLMProvider",  # Replaces ChatCompletions and Completions
     "XAIProvider",
+    "OpenRouterProvider",
 ]

letta/schemas/providers/anthropic.py CHANGED Viewed

@@ -67,6 +67,11 @@ MODEL_LIST = [
         "name": "claude-sonnet-4-20250514",
         "context_window": 200000,
     },
+    # 4.5
+    {
+        "name": "claude-sonnet-4-5-20250929",
+        "context_window": 200000,
+    },
     ## Haiku
     # 3.0
     {
@@ -143,6 +148,17 @@ class AnthropicProvider(Provider):
                     warnings.warn(f"Couldn't find context window size for model {model['id']}, defaulting to 200,000")
                     model["context_window"] = 200000
+            # Optional override: enable 1M context for Sonnet 4/4.5 when flag is set
+            try:
+                from letta.settings import model_settings
+                if model_settings.anthropic_sonnet_1m and (
+                    model["id"].startswith("claude-sonnet-4") or model["id"].startswith("claude-sonnet-4-5")
+                ):
+                    model["context_window"] = 1_000_000
+            except Exception:
+                pass
             max_tokens = 8192
             if "claude-3-opus" in model["id"]:
                 max_tokens = 4096

letta/schemas/providers/ollama.py CHANGED Viewed

@@ -3,7 +3,7 @@ from typing import Literal
 import aiohttp
 from pydantic import Field
-from letta.constants import DEFAULT_CONTEXT_WINDOW, DEFAULT_EMBEDDING_CHUNK_SIZE, DEFAULT_EMBEDDING_DIM, OLLAMA_API_PREFIX
+from letta.constants import DEFAULT_CONTEXT_WINDOW, DEFAULT_EMBEDDING_CHUNK_SIZE
 from letta.log import get_logger
 from letta.schemas.embedding_config import EmbeddingConfig
 from letta.schemas.enums import ProviderCategory, ProviderType
@@ -27,82 +27,163 @@ class OllamaProvider(OpenAIProvider):
         ..., description="Default prompt formatter (aka model wrapper) to use on a /completions style API."
     )
+    @property
+    def raw_base_url(self) -> str:
+        """Base URL for native Ollama /api endpoints (no trailing /v1)."""
+        if self.base_url.endswith("/v1"):
+            return self.base_url[: -len("/v1")]
+        return self.base_url
+    @property
+    def openai_compat_base_url(self) -> str:
+        """Base URL with /v1 appended for OpenAI-compatible clients if ever needed.
+        Note: We do not use OpenAI chat completions for Ollama, but expose this
+        helper to clarify intent and avoid duplicating logic elsewhere.
+        """
+        return self.base_url if self.base_url.endswith("/v1") else f"{self.base_url.rstrip('/')}" + "/v1"
     async def list_llm_models_async(self) -> list[LLMConfig]:
-        """List available LLM Models from Ollama
+        """List available LLM Models from Ollama.
+        Note: Older Ollama versions do not expose a "capabilities" field on /api/show.
+        We therefore avoid filtering on capabilities and instead infer support from
+        /api/show model_info (falling back to safe defaults).
-        https://github.com/ollama/ollama/blob/main/docs/api.md#list-local-models"""
-        endpoint = f"{self.base_url}/api/tags"
+        https://github.com/ollama/ollama/blob/main/docs/api.md#list-local-models
+        """
+        endpoint = f"{self.raw_base_url}/api/tags"
         async with aiohttp.ClientSession() as session:
             async with session.get(endpoint) as response:
                 if response.status != 200:
-                    raise Exception(f"Failed to list Ollama models: {response.text}")
+                    # aiohttp: .text() is async
+                    error_text = await response.text()
+                    raise Exception(f"Failed to list Ollama models: {response.status} - {error_text}")
                 response_json = await response.json()
-        configs = []
-        for model in response_json.get("models", []):
-            model_name = model["name"]
-            model_details = await self._get_model_details_async(model_name)
-            if not model_details or "completion" not in model_details.get("capabilities", []):
+        configs: list[LLMConfig] = []
+        for m in response_json.get("models", []):
+            model_name = m.get("name")
+            if not model_name:
                 continue
-            context_window = None
-            model_info = model_details.get("model_info", {})
-            if architecture := model_info.get("general.architecture"):
-                if context_length := model_info.get(f"{architecture}.context_length"):
-                    context_window = int(context_length)
+            # Use /api/show to check capabilities, specifically tools support
+            details = await self._get_model_details_async(model_name)
+            if not details:
+                # If details cannot be fetched, skip to avoid tool errors later
+                continue
+            caps = details.get("capabilities") or []
+            if not isinstance(caps, list):
+                caps = []
+            if "tools" not in [str(c).lower() for c in caps]:
+                # Only include models that declare tools support
+                continue
+            # Derive context window from /api/show model_info if available
+            context_window = None
+            model_info = details.get("model_info", {}) if isinstance(details, dict) else {}
+            architecture = model_info.get("general.architecture") if isinstance(model_info, dict) else None
+            if architecture:
+                ctx_len = model_info.get(f"{architecture}.context_length")
+                if ctx_len is not None:
+                    try:
+                        context_window = int(ctx_len)
+                    except Exception:
+                        context_window = None
             if context_window is None:
-                logger.warning(f"Ollama model {model_name} has no context window, using default {DEFAULT_CONTEXT_WINDOW}")
+                logger.warning(f"Ollama model {model_name} has no context window in /api/show, using default {DEFAULT_CONTEXT_WINDOW}")
                 context_window = DEFAULT_CONTEXT_WINDOW
+            # === Capability stubs ===
+            # Compute support flags from /api/show capabilities. These are not
+            # yet plumbed through LLMConfig, but are captured here for later use.
+            caps_lower = [str(c).lower() for c in caps]
+            supports_tools = "tools" in caps_lower
+            supports_thinking = "thinking" in caps_lower
+            supports_vision = "vision" in caps_lower
+            supports_completion = "completion" in caps_lower
+            _ = (supports_tools, supports_thinking, supports_vision, supports_completion)
             configs.append(
+                # Legacy Ollama using raw generate
+                # LLMConfig(
+                #     model=model_name,
+                #     model_endpoint_type="ollama",
+                #     model_endpoint=self.openai_compat_base_url,
+                #     model_wrapper=self.default_prompt_formatter,
+                #     context_window=context_window,
+                #     # Ollama specific
+                #     handle=self.get_handle(model_name),
+                #     provider_name=self.name,
+                #     provider_category=self.provider_category,
+                # )
+                # New "trust Ollama" version w/ pure OpenAI proxy
                 LLMConfig(
                     model=model_name,
-                    model_endpoint_type=ProviderType.ollama,
-                    model_endpoint=f"{self.base_url}{OLLAMA_API_PREFIX}",
-                    model_wrapper=self.default_prompt_formatter,
+                    model_endpoint_type="openai",
+                    model_endpoint=self.openai_compat_base_url,
+                    # model_wrapper=self.default_prompt_formatter,
                     context_window=context_window,
                     handle=self.get_handle(model_name),
                     provider_name=self.name,
                     provider_category=self.provider_category,
+                    # put_inner_thoughts_in_kwargs=True,
+                    # enable_reasoner=supports_thinking,
                 )
             )
         return configs
     async def list_embedding_models_async(self) -> list[EmbeddingConfig]:
-        """List available embedding models from Ollama
+        """List available embedding models from Ollama.
+        We infer embedding support via model_info.*.embedding_length when available.
         https://github.com/ollama/ollama/blob/main/docs/api.md#list-local-models
         """
-        endpoint = f"{self.base_url}/api/tags"
+        endpoint = f"{self.raw_base_url}/api/tags"
         async with aiohttp.ClientSession() as session:
             async with session.get(endpoint) as response:
                 if response.status != 200:
-                    raise Exception(f"Failed to list Ollama models: {response.text}")
+                    error_text = await response.text()
+                    raise Exception(f"Failed to list Ollama models: {response.status} - {error_text}")
                 response_json = await response.json()
-        configs = []
+        configs: list[EmbeddingConfig] = []
         for model in response_json.get("models", []):
             model_name = model["name"]
             model_details = await self._get_model_details_async(model_name)
-            if not model_details or "embedding" not in model_details.get("capabilities", []):
+            if not model_details:
+                continue
+            # Filter to true embedding models via capabilities
+            caps = model_details.get("capabilities") or []
+            if not isinstance(caps, list):
+                caps = []
+            if "embedding" not in [str(c).lower() for c in caps]:
                 continue
             embedding_dim = None
             model_info = model_details.get("model_info", {})
-            if architecture := model_info.get("general.architecture"):
-                if embedding_length := model_info.get(f"{architecture}.embedding_length"):
-                    embedding_dim = int(embedding_length)
+            architecture = model_info.get("general.architecture")
+            if architecture:
+                embedding_length = model_info.get(f"{architecture}.embedding_length")
+                if embedding_length is not None:
+                    try:
+                        embedding_dim = int(embedding_length)
+                    except Exception:
+                        pass
             if not embedding_dim:
-                logger.warning(f"Ollama model {model_name} has no embedding dimension, using default {DEFAULT_EMBEDDING_DIM}")
-                embedding_dim = DEFAULT_EMBEDDING_DIM
+                # Skip models without a reported embedding dimension to avoid DB dimension mismatches
+                continue
             configs.append(
                 EmbeddingConfig(
                     embedding_model=model_name,
-                    embedding_endpoint_type=ProviderType.ollama,
-                    embedding_endpoint=f"{self.base_url}{OLLAMA_API_PREFIX}",
+                    # Use OpenAI-compatible proxy for embeddings
+                    embedding_endpoint_type=ProviderType.openai,
+                    embedding_endpoint=self.openai_compat_base_url,
                     embedding_dim=embedding_dim,
                     embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE,
                     handle=self.get_handle(model_name, is_embedding=True),
@@ -112,11 +193,12 @@ class OllamaProvider(OpenAIProvider):
     async def _get_model_details_async(self, model_name: str) -> dict | None:
         """Get detailed information for a specific model from /api/show."""
-        endpoint = f"{self.base_url}/api/show"
+        endpoint = f"{self.raw_base_url}/api/show"
         payload = {"name": model_name}
         try:
-            async with aiohttp.ClientSession() as session:
+            timeout = aiohttp.ClientTimeout(total=2.0)
+            async with aiohttp.ClientSession(timeout=timeout) as session:
                 async with session.post(endpoint, json=payload) as response:
                     if response.status != 200:
                         error_text = await response.text()

letta/schemas/providers/openrouter.py ADDED Viewed

@@ -0,0 +1,52 @@
+from typing import Literal
+from pydantic import Field
+from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE, LLM_MAX_TOKENS
+from letta.log import get_logger
+from letta.schemas.embedding_config import EmbeddingConfig
+from letta.schemas.enums import ProviderCategory, ProviderType
+from letta.schemas.llm_config import LLMConfig
+from letta.schemas.providers.openai import OpenAIProvider
+logger = get_logger(__name__)
+# ALLOWED_PREFIXES = {"gpt-4", "gpt-5", "o1", "o3", "o4"}
+# DISALLOWED_KEYWORDS = {"transcribe", "search", "realtime", "tts", "audio", "computer", "o1-mini", "o1-preview", "o1-pro", "chat"}
+# DEFAULT_EMBEDDING_BATCH_SIZE = 1024
+class OpenRouterProvider(OpenAIProvider):
+    provider_type: Literal[ProviderType.openai] = Field(ProviderType.openai, description="The type of the provider.")
+    provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
+    api_key: str = Field(..., description="API key for the OpenRouter API.")
+    base_url: str = Field("https://openrouter.ai/api/v1", description="Base URL for the OpenRouter API.")
+    handle_base: str | None = Field(None, description="Custom handle base name for model handles (e.g., 'custom' instead of 'openrouter').")
+    def _list_llm_models(self, data: list[dict]) -> list[LLMConfig]:
+        """
+        This handles filtering out LLM Models by provider that meet Letta's requirements.
+        """
+        configs = []
+        for model in data:
+            check = self._do_model_checks_for_name_and_context_size(model)
+            if check is None:
+                continue
+            model_name, context_window_size = check
+            handle = self.get_handle(model_name, base_name=self.handle_base) if self.handle_base else self.get_handle(model_name)
+            config = LLMConfig(
+                model=model_name,
+                model_endpoint_type="openai",
+                model_endpoint=self.base_url,
+                context_window=context_window_size,
+                handle=handle,
+                provider_name=self.name,
+                provider_category=self.provider_category,
+            )
+            config = self._set_model_parameter_tuned_defaults(model_name, config)
+            configs.append(config)
+        return configs

letta/schemas/providers/vllm.py CHANGED Viewed

@@ -23,6 +23,7 @@ class VLLMProvider(Provider):
     default_prompt_formatter: str | None = Field(
         default=None, description="Default prompt formatter (aka model wrapper) to use on a /completions style API."
     )
+    handle_base: str | None = Field(None, description="Custom handle base name for model handles (e.g., 'custom' instead of 'vllm').")
     async def list_llm_models_async(self) -> list[LLMConfig]:
         from letta.llm_api.openai import openai_get_model_list_async
@@ -43,7 +44,7 @@ class VLLMProvider(Provider):
                     model_endpoint=base_url,
                     model_wrapper=self.default_prompt_formatter,
                     context_window=model["max_model_len"],
-                    handle=self.get_handle(model_name),
+                    handle=self.get_handle(model_name, base_name=self.handle_base) if self.handle_base else self.get_handle(model_name),
                     provider_name=self.name,
                     provider_category=self.provider_category,
                 )

letta/schemas/run.py CHANGED Viewed

@@ -1,62 +1,68 @@
+from datetime import datetime
 from typing import Optional
-from pydantic import Field
+from pydantic import ConfigDict, Field
-from letta.schemas.enums import JobType
-from letta.schemas.job import Job, JobBase, LettaRequestConfig
+from letta.helpers.datetime_helpers import get_utc_time
+from letta.schemas.enums import RunStatus
+from letta.schemas.job import LettaRequestConfig
+from letta.schemas.letta_base import LettaBase
 from letta.schemas.letta_stop_reason import StopReasonType
-class RunBase(JobBase):
-    """Base class for Run schemas that inherits from JobBase but uses 'run' prefix for IDs"""
+class RunBase(LettaBase):
     __id_prefix__ = "run"
-    job_type: JobType = JobType.RUN
 class Run(RunBase):
     """
-    Representation of a run, which is a job with a 'run' prefix in its ID.
-    Inherits all fields and behavior from Job except for the ID prefix.
+    Representation of a run - a conversation or processing session for an agent.
+    Runs track when agents process messages and maintain the relationship between agents, steps, and messages.
     Parameters:
         id (str): The unique identifier of the run (prefixed with 'run-').
-        status (JobStatus): The status of the run.
-        created_at (datetime): The unix timestamp of when the run was created.
-        completed_at (datetime): The unix timestamp of when the run was completed.
-        user_id (str): The unique identifier of the user associated with the run.
+        status (JobStatus): The current status of the run.
+        created_at (datetime): The timestamp when the run was created.
+        completed_at (datetime): The timestamp when the run was completed.
+        agent_id (str): The unique identifier of the agent associated with the run.
+        stop_reason (StopReasonType): The reason why the run was stopped.
+        background (bool): Whether the run was created in background mode.
+        metadata (dict): Additional metadata for the run.
+        request_config (LettaRequestConfig): The request configuration for the run.
     """
     id: str = RunBase.generate_id_field()
-    user_id: Optional[str] = Field(None, description="The unique identifier of the user associated with the run.")
+    # Core run fields
+    status: RunStatus = Field(default=RunStatus.created, description="The current status of the run.")
+    created_at: datetime = Field(default_factory=get_utc_time, description="The timestamp when the run was created.")
+    completed_at: Optional[datetime] = Field(None, description="The timestamp when the run was completed.")
+    # Agent relationship
+    agent_id: str = Field(..., description="The unique identifier of the agent associated with the run.")
+    # Run configuration
+    background: Optional[bool] = Field(None, description="Whether the run was created in background mode.")
+    metadata: Optional[dict] = Field(None, validation_alias="metadata_", description="Additional metadata for the run.")
     request_config: Optional[LettaRequestConfig] = Field(None, description="The request configuration for the run.")
     stop_reason: Optional[StopReasonType] = Field(None, description="The reason why the run was stopped.")
-    @classmethod
-    def from_job(cls, job: Job) -> "Run":
-        """
-        Convert a Job instance to a Run instance by replacing the ID prefix.
-        All other fields are copied as-is.
-        Args:
-            job: The Job instance to convert
-        Returns:
-            A new Run instance with the same data but 'run-' prefix in ID
-        """
-        # Convert job dict to exclude None values
-        job_data = job.model_dump(exclude_none=True)
-        # Create new Run instance with converted data
-        return cls(**job_data)
-    def to_job(self) -> Job:
-        """
-        Convert this Run instance to a Job instance by replacing the ID prefix.
-        All other fields are copied as-is.
-        Returns:
-            A new Job instance with the same data but 'job-' prefix in ID
-        """
-        run_data = self.model_dump(exclude_none=True)
-        return Job(**run_data)
+    # Callback configuration
+    callback_url: Optional[str] = Field(None, description="If set, POST to this URL when the run completes.")
+    callback_sent_at: Optional[datetime] = Field(None, description="Timestamp when the callback was last attempted.")
+    callback_status_code: Optional[int] = Field(None, description="HTTP status code returned by the callback endpoint.")
+    callback_error: Optional[str] = Field(None, description="Optional error message from attempting to POST the callback endpoint.")
+    # Timing metrics (in nanoseconds for precision)
+    ttft_ns: Optional[int] = Field(None, description="Time to first token for a run in nanoseconds")
+    total_duration_ns: Optional[int] = Field(None, description="Total run duration in nanoseconds")
+class RunUpdate(RunBase):
+    """Update model for Run."""
+    status: Optional[RunStatus] = Field(None, description="The status of the run.")
+    completed_at: Optional[datetime] = Field(None, description="The timestamp when the run was completed.")
+    stop_reason: Optional[StopReasonType] = Field(None, description="The reason why the run was stopped.")
+    metadata: Optional[dict] = Field(None, validation_alias="metadata_", description="Additional metadata for the run.")
+    model_config = ConfigDict(extra="ignore")  # Ignores extra fields

letta/schemas/step.py CHANGED Viewed

@@ -18,8 +18,8 @@ class Step(StepBase):
     origin: Optional[str] = Field(None, description="The surface that this agent step was initiated from.")
     organization_id: Optional[str] = Field(None, description="The unique identifier of the organization associated with the step.")
     provider_id: Optional[str] = Field(None, description="The unique identifier of the provider that was configured for this step")
-    job_id: Optional[str] = Field(
-        None, description="The unique identifier of the job that this step belongs to. Only included for async calls."
+    run_id: Optional[str] = Field(
+        None, description="The unique identifier of the run that this step belongs to. Only included for async calls."
     )
     agent_id: Optional[str] = Field(None, description="The ID of the agent that performed the step.")
     provider_name: Optional[str] = Field(None, description="The name of the provider used for this step.")

letta/schemas/step_metrics.py CHANGED Viewed

@@ -13,7 +13,7 @@ class StepMetrics(StepMetricsBase):
     id: str = Field(..., description="The id of the step this metric belongs to (matches steps.id).")
     organization_id: Optional[str] = Field(None, description="The unique identifier of the organization.")
     provider_id: Optional[str] = Field(None, description="The unique identifier of the provider.")
-    job_id: Optional[str] = Field(None, description="The unique identifier of the job.")
+    run_id: Optional[str] = Field(None, description="The unique identifier of the run.")
     agent_id: Optional[str] = Field(None, description="The unique identifier of the agent.")
     step_start_ns: Optional[int] = Field(None, description="The timestamp of the start of the step in nanoseconds.")
     llm_request_start_ns: Optional[int] = Field(None, description="The timestamp of the start of the llm request in nanoseconds.")

letta-nightly 0.11.7.dev20251006104136__py3-none-any.whl → 0.11.7.dev20251008104128__py3-none-any.whl

letta-nightly 0.11.7.dev20251006104136py3-none-any.whl → 0.11.7.dev20251008104128py3-none-any.whl