PyPI - llama-stack - Versions diffs - 0.2.20__tar.gz → 0.2.22__tar.gz - Mend

llama-stack 0.2.20tar.gz → 0.2.22tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (661) hide show

{llama_stack-0.2.20/llama_stack.egg-info → llama_stack-0.2.22}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: llama_stack
-Version: 0.2.20
+Version: 0.2.22
 Summary: Llama Stack
 Author-email: Meta Llama <llama-oss@meta.com>
 License: MIT
@@ -23,13 +23,12 @@ Requires-Dist: httpx
 Requires-Dist: huggingface-hub<1.0,>=0.34.0
 Requires-Dist: jinja2>=3.1.6
 Requires-Dist: jsonschema
-Requires-Dist: llama-stack-client>=0.2.20
-Requires-Dist: llama-api-client>=0.1.2
-Requires-Dist: openai<1.100.0,>=1.99.6
+Requires-Dist: llama-stack-client>=0.2.22
+Requires-Dist: openai>=1.100.0
 Requires-Dist: prompt-toolkit
 Requires-Dist: python-dotenv
 Requires-Dist: python-jose[cryptography]
-Requires-Dist: pydantic>=2
+Requires-Dist: pydantic>=2.11.9
 Requires-Dist: rich
 Requires-Dist: starlette
 Requires-Dist: termcolor
@@ -45,7 +44,7 @@ Requires-Dist: asyncpg
 Provides-Extra: ui
 Requires-Dist: streamlit; extra == "ui"
 Requires-Dist: pandas; extra == "ui"
-Requires-Dist: llama-stack-client>=0.2.20; extra == "ui"
+Requires-Dist: llama-stack-client>=0.2.22; extra == "ui"
 Requires-Dist: streamlit-option-menu; extra == "ui"
 Dynamic: license-file

{llama_stack-0.2.20 → llama_stack-0.2.22}/llama_stack/apis/benchmarks/benchmarks.py RENAMED Viewed

@@ -93,3 +93,11 @@ class Benchmarks(Protocol):
         :param metadata: The metadata to use for the benchmark.
         """
         ...
+    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE")
+    async def unregister_benchmark(self, benchmark_id: str) -> None:
+        """Unregister a benchmark.
+        :param benchmark_id: The ID of the benchmark to unregister.
+        """
+        ...

{llama_stack-0.2.20 → llama_stack-0.2.22}/llama_stack/apis/common/errors.py RENAMED Viewed

@@ -79,3 +79,10 @@ class ConflictError(ValueError):
     def __init__(self, message: str) -> None:
         super().__init__(message)
+class TokenValidationError(ValueError):
+    """raised when token validation fails during authentication"""
+    def __init__(self, message: str) -> None:
+        super().__init__(message)

{llama_stack-0.2.20 → llama_stack-0.2.22}/llama_stack/apis/datatypes.py RENAMED Viewed

@@ -102,6 +102,7 @@ class Api(Enum, metaclass=DynamicApiMeta):
     :cvar benchmarks: Benchmark suite management
     :cvar tool_groups: Tool group organization
     :cvar files: File storage and management
+    :cvar prompts: Prompt versions and management
     :cvar inspect: Built-in system inspection and introspection
     """
@@ -127,6 +128,7 @@ class Api(Enum, metaclass=DynamicApiMeta):
     benchmarks = "benchmarks"
     tool_groups = "tool_groups"
     files = "files"
+    prompts = "prompts"
     # built-in API
     inspect = "inspect"

{llama_stack-0.2.20 → llama_stack-0.2.22}/llama_stack/apis/files/files.py RENAMED Viewed

@@ -5,10 +5,10 @@
 # the root directory of this source tree.
 from enum import StrEnum
-from typing import Annotated, Literal, Protocol, runtime_checkable
+from typing import Annotated, ClassVar, Literal, Protocol, runtime_checkable
 from fastapi import File, Form, Response, UploadFile
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 from llama_stack.apis.common.responses import Order
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
@@ -49,6 +49,23 @@ class OpenAIFileObject(BaseModel):
     purpose: OpenAIFilePurpose
+@json_schema_type
+class ExpiresAfter(BaseModel):
+    """
+    Control expiration of uploaded files.
+    Params:
+     - anchor, must be "created_at"
+     - seconds, must be int between 3600 and 2592000 (1 hour to 30 days)
+    """
+    MIN: ClassVar[int] = 3600  # 1 hour
+    MAX: ClassVar[int] = 2592000  # 30 days
+    anchor: Literal["created_at"]
+    seconds: int = Field(..., ge=3600, le=2592000)
 @json_schema_type
 class ListOpenAIFileResponse(BaseModel):
     """
@@ -92,6 +109,9 @@ class Files(Protocol):
         self,
         file: Annotated[UploadFile, File()],
         purpose: Annotated[OpenAIFilePurpose, Form()],
+        expires_after_anchor: Annotated[str | None, Form(alias="expires_after[anchor]")] = None,
+        expires_after_seconds: Annotated[int | None, Form(alias="expires_after[seconds]")] = None,
+        # TODO: expires_after is producing strange openapi spec, params are showing up as a required w/ oneOf being null
     ) -> OpenAIFileObject:
         """
         Upload a file that can be used across various endpoints.
@@ -99,6 +119,7 @@ class Files(Protocol):
         The file upload should be a multipart form request with:
         - file: The File object (not file name) to be uploaded.
         - purpose: The intended purpose of the uploaded file.
+        - expires_after: Optional form values describing expiration for the file. Expected expires_after[anchor] = "created_at", expires_after[seconds] = <int>. Seconds must be between 3600 and 2592000 (1 hour to 30 days).
         :param file: The uploaded file object containing content and metadata (filename, content_type, etc.).
         :param purpose: The intended purpose of the uploaded file (e.g., "assistants", "fine-tune").

llama_stack-0.2.22/llama_stack/apis/prompts/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from .prompts import ListPromptsResponse, Prompt, Prompts
+__all__ = ["Prompt", "Prompts", "ListPromptsResponse"]

llama_stack-0.2.22/llama_stack/apis/prompts/prompts.py ADDED Viewed

@@ -0,0 +1,189 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+import re
+import secrets
+from typing import Protocol, runtime_checkable
+from pydantic import BaseModel, Field, field_validator, model_validator
+from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
+from llama_stack.schema_utils import json_schema_type, webmethod
+@json_schema_type
+class Prompt(BaseModel):
+    """A prompt resource representing a stored OpenAI Compatible prompt template in Llama Stack.
+    :param prompt: The system prompt text with variable placeholders. Variables are only supported when using the Responses API.
+    :param version: Version (integer starting at 1, incremented on save)
+    :param prompt_id: Unique identifier formatted as 'pmpt_<48-digit-hash>'
+    :param variables: List of prompt variable names that can be used in the prompt template
+    :param is_default: Boolean indicating whether this version is the default version for this prompt
+    """
+    prompt: str | None = Field(default=None, description="The system prompt with variable placeholders")
+    version: int = Field(description="Version (integer starting at 1, incremented on save)", ge=1)
+    prompt_id: str = Field(description="Unique identifier in format 'pmpt_<48-digit-hash>'")
+    variables: list[str] = Field(
+        default_factory=list, description="List of variable names that can be used in the prompt template"
+    )
+    is_default: bool = Field(
+        default=False, description="Boolean indicating whether this version is the default version"
+    )
+    @field_validator("prompt_id")
+    @classmethod
+    def validate_prompt_id(cls, prompt_id: str) -> str:
+        if not isinstance(prompt_id, str):
+            raise TypeError("prompt_id must be a string in format 'pmpt_<48-digit-hash>'")
+        if not prompt_id.startswith("pmpt_"):
+            raise ValueError("prompt_id must start with 'pmpt_' prefix")
+        hex_part = prompt_id[5:]
+        if len(hex_part) != 48:
+            raise ValueError("prompt_id must be in format 'pmpt_<48-digit-hash>' (48 lowercase hex chars)")
+        for char in hex_part:
+            if char not in "0123456789abcdef":
+                raise ValueError("prompt_id hex part must contain only lowercase hex characters [0-9a-f]")
+        return prompt_id
+    @field_validator("version")
+    @classmethod
+    def validate_version(cls, prompt_version: int) -> int:
+        if prompt_version < 1:
+            raise ValueError("version must be >= 1")
+        return prompt_version
+    @model_validator(mode="after")
+    def validate_prompt_variables(self):
+        """Validate that all variables used in the prompt are declared in the variables list."""
+        if not self.prompt:
+            return self
+        prompt_variables = set(re.findall(r"{{\s*(\w+)\s*}}", self.prompt))
+        declared_variables = set(self.variables)
+        undeclared = prompt_variables - declared_variables
+        if undeclared:
+            raise ValueError(f"Prompt contains undeclared variables: {sorted(undeclared)}")
+        return self
+    @classmethod
+    def generate_prompt_id(cls) -> str:
+        # Generate 48 hex characters (24 bytes)
+        random_bytes = secrets.token_bytes(24)
+        hex_string = random_bytes.hex()
+        return f"pmpt_{hex_string}"
+class ListPromptsResponse(BaseModel):
+    """Response model to list prompts."""
+    data: list[Prompt]
+@runtime_checkable
+@trace_protocol
+class Prompts(Protocol):
+    """Protocol for prompt management operations."""
+    @webmethod(route="/prompts", method="GET")
+    async def list_prompts(self) -> ListPromptsResponse:
+        """List all prompts.
+        :returns: A ListPromptsResponse containing all prompts.
+        """
+        ...
+    @webmethod(route="/prompts/{prompt_id}/versions", method="GET")
+    async def list_prompt_versions(
+        self,
+        prompt_id: str,
+    ) -> ListPromptsResponse:
+        """List all versions of a specific prompt.
+        :param prompt_id: The identifier of the prompt to list versions for.
+        :returns: A ListPromptsResponse containing all versions of the prompt.
+        """
+        ...
+    @webmethod(route="/prompts/{prompt_id}", method="GET")
+    async def get_prompt(
+        self,
+        prompt_id: str,
+        version: int | None = None,
+    ) -> Prompt:
+        """Get a prompt by its identifier and optional version.
+        :param prompt_id: The identifier of the prompt to get.
+        :param version: The version of the prompt to get (defaults to latest).
+        :returns: A Prompt resource.
+        """
+        ...
+    @webmethod(route="/prompts", method="POST")
+    async def create_prompt(
+        self,
+        prompt: str,
+        variables: list[str] | None = None,
+    ) -> Prompt:
+        """Create a new prompt.
+        :param prompt: The prompt text content with variable placeholders.
+        :param variables: List of variable names that can be used in the prompt template.
+        :returns: The created Prompt resource.
+        """
+        ...
+    @webmethod(route="/prompts/{prompt_id}", method="PUT")
+    async def update_prompt(
+        self,
+        prompt_id: str,
+        prompt: str,
+        version: int,
+        variables: list[str] | None = None,
+        set_as_default: bool = True,
+    ) -> Prompt:
+        """Update an existing prompt (increments version).
+        :param prompt_id: The identifier of the prompt to update.
+        :param prompt: The updated prompt text content.
+        :param version: The current version of the prompt being updated.
+        :param variables: Updated list of variable names that can be used in the prompt template.
+        :param set_as_default: Set the new version as the default (default=True).
+        :returns: The updated Prompt resource with incremented version.
+        """
+        ...
+    @webmethod(route="/prompts/{prompt_id}", method="DELETE")
+    async def delete_prompt(
+        self,
+        prompt_id: str,
+    ) -> None:
+        """Delete a prompt.
+        :param prompt_id: The identifier of the prompt to delete.
+        """
+        ...
+    @webmethod(route="/prompts/{prompt_id}/set-default-version", method="PUT")
+    async def set_default_version(
+        self,
+        prompt_id: str,
+        version: int,
+    ) -> Prompt:
+        """Set which version of a prompt should be the default in get_prompt (latest).
+        :param prompt_id: The identifier of the prompt.
+        :param version: The version to set as default.
+        :returns: The prompt with the specified version now set as default.
+        """
+        ...

{llama_stack-0.2.20 → llama_stack-0.2.22}/llama_stack/apis/resource.py RENAMED Viewed

@@ -19,6 +19,7 @@ class ResourceType(StrEnum):
     benchmark = "benchmark"
     tool = "tool"
     tool_group = "tool_group"
+    prompt = "prompt"
 class Resource(BaseModel):

{llama_stack-0.2.20 → llama_stack-0.2.22}/llama_stack/apis/scoring_functions/scoring_functions.py RENAMED Viewed

@@ -197,3 +197,11 @@ class ScoringFunctions(Protocol):
         :param params: The parameters for the scoring function for benchmark eval, these can be overridden for app eval.
         """
         ...
+    @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="DELETE")
+    async def unregister_scoring_function(self, scoring_fn_id: str) -> None:
+        """Unregister a scoring function.
+        :param scoring_fn_id: The ID of the scoring function to unregister.
+        """
+        ...

{llama_stack-0.2.20 → llama_stack-0.2.22}/llama_stack/cli/stack/_build.py RENAMED Viewed

@@ -45,6 +45,7 @@ from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.core.utils.exec import formulate_run_args, run_command
 from llama_stack.core.utils.image_types import LlamaStackImageType
 from llama_stack.providers.datatypes import Api
+from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
 DISTRIBS_PATH = Path(__file__).parent.parent.parent / "distributions"
@@ -294,6 +295,12 @@ def _generate_run_config(
         if build_config.external_providers_dir
         else EXTERNAL_PROVIDERS_DIR,
     )
+    if not run_config.inference_store:
+        run_config.inference_store = SqliteSqlStoreConfig(
+            **SqliteSqlStoreConfig.sample_run_config(
+                __distro_dir__=(DISTRIBS_BASE_DIR / image_name).as_posix(), db_name="inference_store.db"
+            )
+        )
     # build providers dict
     provider_registry = get_provider_registry(build_config)
     for api in apis:

{llama_stack-0.2.20 → llama_stack-0.2.22}/llama_stack/cli/verify_download.py RENAMED Viewed

@@ -48,15 +48,12 @@ def setup_verify_download_parser(parser: argparse.ArgumentParser) -> None:
     parser.set_defaults(func=partial(run_verify_cmd, parser=parser))
-def calculate_md5(filepath: Path, chunk_size: int = 8192) -> str:
-    # NOTE: MD5 is used here only for download integrity verification,
-    # not for security purposes
-    # TODO: switch to SHA256
-    md5_hash = hashlib.md5(usedforsecurity=False)
+def calculate_sha256(filepath: Path, chunk_size: int = 8192) -> str:
+    sha256_hash = hashlib.sha256()
     with open(filepath, "rb") as f:
         for chunk in iter(lambda: f.read(chunk_size), b""):
-            md5_hash.update(chunk)
-    return md5_hash.hexdigest()
+            sha256_hash.update(chunk)
+    return sha256_hash.hexdigest()
 def load_checksums(checklist_path: Path) -> dict[str, str]:
@@ -64,10 +61,10 @@ def load_checksums(checklist_path: Path) -> dict[str, str]:
     with open(checklist_path) as f:
         for line in f:
             if line.strip():
-                md5sum, filepath = line.strip().split("  ", 1)
+                sha256sum, filepath = line.strip().split("  ", 1)
                 # Remove leading './' if present
                 filepath = filepath.lstrip("./")
-                checksums[filepath] = md5sum
+                checksums[filepath] = sha256sum
     return checksums
@@ -88,7 +85,7 @@ def verify_files(model_dir: Path, checksums: dict[str, str], console: Console) -
             matches = False
             if exists:
-                actual_hash = calculate_md5(full_path)
+                actual_hash = calculate_sha256(full_path)
                 matches = actual_hash == expected_hash
             results.append(

{llama_stack-0.2.20 → llama_stack-0.2.22}/llama_stack/core/datatypes.py RENAMED Viewed

@@ -7,6 +7,7 @@
 from enum import StrEnum
 from pathlib import Path
 from typing import Annotated, Any, Literal, Self
+from urllib.parse import urlparse
 from pydantic import BaseModel, Field, field_validator, model_validator
@@ -212,6 +213,7 @@ class AuthProviderType(StrEnum):
     OAUTH2_TOKEN = "oauth2_token"
     GITHUB_TOKEN = "github_token"
     CUSTOM = "custom"
+    KUBERNETES = "kubernetes"
 class OAuth2TokenAuthConfig(BaseModel):
@@ -282,8 +284,45 @@ class GitHubTokenAuthConfig(BaseModel):
     )
+class KubernetesAuthProviderConfig(BaseModel):
+    """Configuration for Kubernetes authentication provider."""
+    type: Literal[AuthProviderType.KUBERNETES] = AuthProviderType.KUBERNETES
+    api_server_url: str = Field(
+        default="https://kubernetes.default.svc",
+        description="Kubernetes API server URL (e.g., https://api.cluster.domain:6443)",
+    )
+    verify_tls: bool = Field(default=True, description="Whether to verify TLS certificates")
+    tls_cafile: Path | None = Field(default=None, description="Path to CA certificate file for TLS verification")
+    claims_mapping: dict[str, str] = Field(
+        default_factory=lambda: {
+            "username": "roles",
+            "groups": "roles",
+        },
+        description="Mapping of Kubernetes user claims to access attributes",
+    )
+    @field_validator("api_server_url")
+    @classmethod
+    def validate_api_server_url(cls, v):
+        parsed = urlparse(v)
+        if not parsed.scheme or not parsed.netloc:
+            raise ValueError(f"api_server_url must be a valid URL with scheme and host: {v}")
+        if parsed.scheme not in ["http", "https"]:
+            raise ValueError(f"api_server_url scheme must be http or https: {v}")
+        return v
+    @field_validator("claims_mapping")
+    @classmethod
+    def validate_claims_mapping(cls, v):
+        for key, value in v.items():
+            if not value:
+                raise ValueError(f"claims_mapping value cannot be empty: {key}")
+        return v
 AuthProviderConfig = Annotated[
-    OAuth2TokenAuthConfig | GitHubTokenAuthConfig | CustomAuthConfig,
+    OAuth2TokenAuthConfig | GitHubTokenAuthConfig | CustomAuthConfig | KubernetesAuthProviderConfig,
     Field(discriminator="type"),
 ]
@@ -392,6 +431,12 @@ class ServerConfig(BaseModel):
     )
+class InferenceStoreConfig(BaseModel):
+    sql_store_config: SqlStoreConfig
+    max_write_queue_size: int = Field(default=10000, description="Max queued writes for inference store")
+    num_writers: int = Field(default=4, description="Number of concurrent background writers")
 class StackRunConfig(BaseModel):
     version: int = LLAMA_STACK_RUN_CONFIG_VERSION
@@ -425,11 +470,12 @@ Configuration for the persistence store used by the distribution registry. If no
 a default SQLite store will be used.""",
     )
-    inference_store: SqlStoreConfig | None = Field(
+    inference_store: InferenceStoreConfig | SqlStoreConfig | None = Field(
         default=None,
         description="""
-Configuration for the persistence store used by the inference API. If not specified,
-a default SQLite store will be used.""",
+Configuration for the persistence store used by the inference API. Can be either a
+InferenceStoreConfig (with queue tuning parameters) or a SqlStoreConfig (deprecated).
+If not specified, a default SQLite store will be used.""",
     )
     # registry of "resources" in the distribution

{llama_stack-0.2.20 → llama_stack-0.2.22}/llama_stack/core/library_client.py RENAMED Viewed

@@ -10,7 +10,6 @@ import json
 import logging  # allow-direct-logging
 import os
 import sys
-from concurrent.futures import ThreadPoolExecutor
 from enum import Enum
 from io import BytesIO
 from pathlib import Path
@@ -148,7 +147,6 @@ class LlamaStackAsLibraryClient(LlamaStackClient):
         self.async_client = AsyncLlamaStackAsLibraryClient(
             config_path_or_distro_name, custom_provider_registry, provider_data, skip_logger_removal
         )
-        self.pool_executor = ThreadPoolExecutor(max_workers=4)
         self.provider_data = provider_data
         self.loop = asyncio.new_event_loop()

llama-stack 0.2.20__tar.gz → 0.2.22__tar.gz

llama-stack 0.2.20tar.gz → 0.2.22tar.gz