PyPI - llama-stack - Versions diffs - 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

llama-stack 0.3.5py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (458) hide show

llama_stack/core/routing_tables/models.py CHANGED Viewed

@@ -7,13 +7,23 @@
 import time
 from typing import Any
-from llama_stack.apis.common.errors import ModelNotFoundError
-from llama_stack.apis.models import ListModelsResponse, Model, Models, ModelType, OpenAIListModelsResponse, OpenAIModel
+from llama_stack.core.access_control.access_control import is_action_allowed
 from llama_stack.core.datatypes import (
     ModelWithOwner,
     RegistryEntrySource,
 )
+from llama_stack.core.request_headers import PROVIDER_DATA_VAR, NeedsRequestProviderData, get_authenticated_user
+from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.log import get_logger
+from llama_stack_api import (
+    ListModelsResponse,
+    Model,
+    ModelNotFoundError,
+    Models,
+    ModelType,
+    OpenAIListModelsResponse,
+    OpenAIModel,
+)
 from .common import CommonRoutingTableImpl, lookup_model
@@ -42,19 +52,122 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
             await self.update_registered_models(provider_id, models)
+    async def _get_dynamic_models_from_provider_data(self) -> list[Model]:
+        """
+        Fetch models from providers that have credentials in the current request's provider_data.
+        This allows users to see models available to them from providers that require
+        per-request API keys (via X-LlamaStack-Provider-Data header).
+        Returns models with fully qualified identifiers (provider_id/model_id) but does NOT
+        cache them in the registry since they are user-specific.
+        """
+        provider_data = PROVIDER_DATA_VAR.get()
+        if not provider_data:
+            return []
+        dynamic_models = []
+        user = get_authenticated_user()
+        for provider_id, provider in self.impls_by_provider_id.items():
+            # Check if this provider supports provider_data
+            if not isinstance(provider, NeedsRequestProviderData):
+                continue
+            # Check if provider has a validator (some providers like ollama don't need per-request credentials)
+            spec = getattr(provider, "__provider_spec__", None)
+            if not spec or not getattr(spec, "provider_data_validator", None):
+                continue
+            # Validate provider_data silently - we're speculatively checking all providers
+            # so validation failures are expected when user didn't provide keys for this provider
+            try:
+                validator = instantiate_class_type(spec.provider_data_validator)
+                validator(**provider_data)
+            except Exception:
+                # User didn't provide credentials for this provider - skip silently
+                continue
+            # Validation succeeded! User has credentials for this provider
+            # Now try to list models
+            try:
+                models = await provider.list_models()
+                if not models:
+                    continue
+                # Ensure models have fully qualified identifiers and apply RBAC filtering
+                for model in models:
+                    # Only add prefix if model identifier doesn't already have it
+                    if not model.identifier.startswith(f"{provider_id}/"):
+                        model.identifier = f"{provider_id}/{model.provider_resource_id}"
+                    # Convert to ModelWithOwner for RBAC check
+                    temp_model = ModelWithOwner(
+                        identifier=model.identifier,
+                        provider_id=provider_id,
+                        provider_resource_id=model.provider_resource_id,
+                        model_type=model.model_type,
+                        metadata=model.metadata,
+                    )
+                    # Apply RBAC check - only include models user has read permission for
+                    if is_action_allowed(self.policy, "read", temp_model, user):
+                        dynamic_models.append(model)
+                    else:
+                        logger.debug(
+                            f"Access denied to dynamic model '{model.identifier}' for user {user.principal if user else 'anonymous'}"
+                        )
+                logger.debug(
+                    f"Fetched {len(dynamic_models)} accessible models from provider {provider_id} using provider_data"
+                )
+            except Exception as e:
+                logger.debug(f"Failed to list models from provider {provider_id} with provider_data: {e}")
+                continue
+        return dynamic_models
     async def list_models(self) -> ListModelsResponse:
-        return ListModelsResponse(data=await self.get_all_with_type("model"))
+        # Get models from registry
+        registry_models = await self.get_all_with_type("model")
+        # Get additional models available via provider_data (user-specific, not cached)
+        dynamic_models = await self._get_dynamic_models_from_provider_data()
+        # Combine, avoiding duplicates (registry takes precedence)
+        registry_identifiers = {m.identifier for m in registry_models}
+        unique_dynamic_models = [m for m in dynamic_models if m.identifier not in registry_identifiers]
+        return ListModelsResponse(data=registry_models + unique_dynamic_models)
     async def openai_list_models(self) -> OpenAIListModelsResponse:
-        models = await self.get_all_with_type("model")
+        # Get models from registry
+        registry_models = await self.get_all_with_type("model")
+        # Get additional models available via provider_data (user-specific, not cached)
+        dynamic_models = await self._get_dynamic_models_from_provider_data()
+        # Combine, avoiding duplicates (registry takes precedence)
+        registry_identifiers = {m.identifier for m in registry_models}
+        unique_dynamic_models = [m for m in dynamic_models if m.identifier not in registry_identifiers]
+        all_models = registry_models + unique_dynamic_models
         openai_models = [
             OpenAIModel(
                 id=model.identifier,
                 object="model",
                 created=int(time.time()),
                 owned_by="llama_stack",
+                custom_metadata={
+                    "model_type": model.model_type,
+                    "provider_id": model.provider_id,
+                    "provider_resource_id": model.provider_resource_id,
+                    **model.metadata,
+                },
             )
-            for model in models
+            for model in all_models
         ]
         return OpenAIListModelsResponse(data=openai_models)
@@ -130,7 +243,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
         existing_models = await self.get_all_with_type("model")
         # we may have an alias for the model registered by the user (or during initialization
-        # from run.yaml) that we need to keep track of
+        # from config.yaml) that we need to keep track of
         model_ids = {}
         for model in existing_models:
             if model.provider_id != provider_id:

llama_stack/core/routing_tables/scoring_functions.py CHANGED Viewed

@@ -4,18 +4,18 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from llama_stack.apis.common.type_system import ParamType
-from llama_stack.apis.resource import ResourceType
-from llama_stack.apis.scoring_functions import (
+from llama_stack.core.datatypes import (
+    ScoringFnWithOwner,
+)
+from llama_stack.log import get_logger
+from llama_stack_api import (
     ListScoringFunctionsResponse,
+    ParamType,
+    ResourceType,
     ScoringFn,
     ScoringFnParams,
     ScoringFunctions,
 )
-from llama_stack.core.datatypes import (
-    ScoringFnWithOwner,
-)
-from llama_stack.log import get_logger
 from .common import CommonRoutingTableImpl

llama_stack/core/routing_tables/shields.py CHANGED Viewed

@@ -6,12 +6,11 @@
 from typing import Any
-from llama_stack.apis.resource import ResourceType
-from llama_stack.apis.shields import ListShieldsResponse, Shield, Shields
 from llama_stack.core.datatypes import (
     ShieldWithOwner,
 )
 from llama_stack.log import get_logger
+from llama_stack_api import ListShieldsResponse, ResourceType, Shield, Shields
 from .common import CommonRoutingTableImpl

llama_stack/core/routing_tables/toolgroups.py CHANGED Viewed

@@ -6,11 +6,17 @@
 from typing import Any
-from llama_stack.apis.common.content_types import URL
-from llama_stack.apis.common.errors import ToolGroupNotFoundError
-from llama_stack.apis.tools import ListToolDefsResponse, ListToolGroupsResponse, ToolDef, ToolGroup, ToolGroups
 from llama_stack.core.datatypes import AuthenticationRequiredError, ToolGroupWithOwner
 from llama_stack.log import get_logger
+from llama_stack_api import (
+    URL,
+    ListToolDefsResponse,
+    ListToolGroupsResponse,
+    ToolDef,
+    ToolGroup,
+    ToolGroupNotFoundError,
+    ToolGroups,
+)
 from .common import CommonRoutingTableImpl
@@ -43,7 +49,9 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
             routing_key = self.tool_to_toolgroup[routing_key]
         return await super().get_provider_impl(routing_key, provider_id)
-    async def list_tools(self, toolgroup_id: str | None = None) -> ListToolDefsResponse:
+    async def list_tools(
+        self, toolgroup_id: str | None = None, authorization: str | None = None
+    ) -> ListToolDefsResponse:
         if toolgroup_id:
             if group_id := parse_toolgroup_from_toolgroup_name_pair(toolgroup_id):
                 toolgroup_id = group_id
@@ -55,7 +63,7 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
         for toolgroup in toolgroups:
             if toolgroup.identifier not in self.toolgroups_to_tools:
                 try:
-                    await self._index_tools(toolgroup)
+                    await self._index_tools(toolgroup, authorization=authorization)
                 except AuthenticationRequiredError:
                     # Send authentication errors back to the client so it knows
                     # that it needs to supply credentials for remote MCP servers.
@@ -70,9 +78,11 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
         return ListToolDefsResponse(data=all_tools)
-    async def _index_tools(self, toolgroup: ToolGroup):
+    async def _index_tools(self, toolgroup: ToolGroup, authorization: str | None = None):
         provider_impl = await super().get_provider_impl(toolgroup.identifier, toolgroup.provider_id)
-        tooldefs_response = await provider_impl.list_runtime_tools(toolgroup.identifier, toolgroup.mcp_endpoint)
+        tooldefs_response = await provider_impl.list_runtime_tools(
+            toolgroup.identifier, toolgroup.mcp_endpoint, authorization=authorization
+        )
         tooldefs = tooldefs_response.data
         for t in tooldefs:

llama_stack/core/routing_tables/vector_stores.py CHANGED Viewed

@@ -6,26 +6,31 @@
 from typing import Any
-from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError
-from llama_stack.apis.models import ModelType
-from llama_stack.apis.resource import ResourceType
+from llama_stack.core.datatypes import (
+    VectorStoreWithOwner,
+)
+from llama_stack.log import get_logger
 # Removed VectorStores import to avoid exposing public API
-from llama_stack.apis.vector_io.vector_io import (
+from llama_stack_api import (
+    EmbeddedChunk,
+    InterleavedContent,
+    ModelNotFoundError,
+    ModelType,
+    ModelTypeError,
+    OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
+    QueryChunksResponse,
+    ResourceType,
     SearchRankingOptions,
     VectorStoreChunkingStrategy,
     VectorStoreDeleteResponse,
-    VectorStoreFileContentsResponse,
+    VectorStoreFileContentResponse,
     VectorStoreFileDeleteResponse,
     VectorStoreFileObject,
     VectorStoreFileStatus,
     VectorStoreObject,
     VectorStoreSearchResponsePage,
 )
-from llama_stack.core.datatypes import (
-    VectorStoreWithOwner,
-)
-from llama_stack.log import get_logger
 from .common import CommonRoutingTableImpl, lookup_model
@@ -39,6 +44,15 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
     Only provides internal routing functionality for VectorIORouter.
     """
+    def __init__(
+        self,
+        impls_by_provider_id: dict[str, Any],
+        dist_registry: Any,
+        policy: list[Any],
+    ) -> None:
+        super().__init__(impls_by_provider_id, dist_registry, policy)
+        self.vector_io_router = None  # Will be set post-instantiation
     # Internal methods only - no public API exposure
     async def register_vector_store(
@@ -77,6 +91,26 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
         await self.register_object(vector_store)
         return vector_store
+    async def insert_chunks(
+        self,
+        vector_store_id: str,
+        chunks: list[EmbeddedChunk],
+        ttl_seconds: int | None = None,
+    ) -> None:
+        await self.assert_action_allowed("update", "vector_store", vector_store_id)
+        provider = await self.get_provider_impl(vector_store_id)
+        return await provider.insert_chunks(vector_store_id, chunks, ttl_seconds)
+    async def query_chunks(
+        self,
+        vector_store_id: str,
+        query: InterleavedContent,
+        params: dict[str, Any] | None = None,
+    ) -> QueryChunksResponse:
+        await self.assert_action_allowed("read", "vector_store", vector_store_id)
+        provider = await self.get_provider_impl(vector_store_id)
+        return await provider.query_chunks(vector_store_id, query, params)
     async def openai_retrieve_vector_store(
         self,
         vector_store_id: str,
@@ -195,12 +229,17 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
         self,
         vector_store_id: str,
         file_id: str,
-    ) -> VectorStoreFileContentsResponse:
+        include_embeddings: bool | None = False,
+        include_metadata: bool | None = False,
+    ) -> VectorStoreFileContentResponse:
         await self.assert_action_allowed("read", "vector_store", vector_store_id)
         provider = await self.get_provider_impl(vector_store_id)
         return await provider.openai_retrieve_vector_store_file_contents(
             vector_store_id=vector_store_id,
             file_id=file_id,
+            include_embeddings=include_embeddings,
+            include_metadata=include_metadata,
         )
     async def openai_update_vector_store_file(
@@ -232,17 +271,13 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
     async def openai_create_vector_store_file_batch(
         self,
         vector_store_id: str,
-        file_ids: list[str],
-        attributes: dict[str, Any] | None = None,
-        chunking_strategy: Any | None = None,
+        params: OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
     ):
         await self.assert_action_allowed("update", "vector_store", vector_store_id)
         provider = await self.get_provider_impl(vector_store_id)
         return await provider.openai_create_vector_store_file_batch(
             vector_store_id=vector_store_id,
-            file_ids=file_ids,
-            attributes=attributes,
-            chunking_strategy=chunking_strategy,
+            params=params,
         )
     async def openai_retrieve_vector_store_file_batch(

llama_stack/core/server/auth.py CHANGED Viewed

@@ -28,9 +28,11 @@ class AuthenticationMiddleware:
     4. Makes these attributes available to the route handlers for access control
     Unauthenticated Access:
-    Endpoints can opt out of authentication by setting require_authentication=False
-    in their @webmethod decorator. This is typically used for operational endpoints
-    like /health and /version to support monitoring, load balancers, and observability tools.
+    Endpoints can opt out of authentication by:
+    - For legacy @webmethod routes: setting require_authentication=False in the decorator
+    - For FastAPI router routes: setting openapi_extra={PUBLIC_ROUTE_KEY: True}
+    This is typically used for operational endpoints like /health and /version to support
+    monitoring, load balancers, and observability tools.
     The middleware supports multiple authentication providers through the AuthProvider interface:
     - Kubernetes: Validates tokens against the Kubernetes API server

llama_stack/core/server/auth_providers.py CHANGED Viewed

@@ -6,13 +6,13 @@
 import ssl
 from abc import ABC, abstractmethod
+from typing import Any
 from urllib.parse import parse_qs, urljoin, urlparse
 import httpx
 import jwt
 from pydantic import BaseModel, Field
-from llama_stack.apis.common.errors import TokenValidationError
 from llama_stack.core.datatypes import (
     AuthenticationConfig,
     CustomAuthConfig,
@@ -22,6 +22,7 @@ from llama_stack.core.datatypes import (
     User,
 )
 from llama_stack.log import get_logger
+from llama_stack_api import TokenValidationError
 logger = get_logger(name=__name__, category="core::auth")
@@ -143,14 +144,21 @@ class OAuth2TokenAuthProvider(AuthProvider):
             if self.config.jwks and self.config.jwks.token:
                 headers["Authorization"] = f"Bearer {self.config.jwks.token}"
-            self._jwks_client = jwt.PyJWKClient(
-                self.config.jwks.uri if self.config.jwks else None,
-                cache_keys=True,
-                max_cached_keys=10,
-                lifespan=self.config.jwks.key_recheck_period if self.config.jwks else None,
-                headers=headers,
-                ssl_context=ssl_context,
-            )
+            # Ensure uri is not None for PyJWKClient
+            if not self.config.jwks or not self.config.jwks.uri:
+                raise ValueError("JWKS configuration requires a valid URI")
+            # Build kwargs conditionally to avoid passing None values
+            jwks_kwargs: dict[str, Any] = {
+                "cache_keys": True,
+                "max_cached_keys": 10,
+                "headers": headers,
+                "ssl_context": ssl_context,
+            }
+            if self.config.jwks.key_recheck_period is not None:
+                jwks_kwargs["lifespan"] = self.config.jwks.key_recheck_period
+            self._jwks_client = jwt.PyJWKClient(self.config.jwks.uri, **jwks_kwargs)
         return self._jwks_client
     async def validate_jwt_token(self, token: str, scope: dict | None = None) -> User:
@@ -197,23 +205,31 @@ class OAuth2TokenAuthProvider(AuthProvider):
         if self.config.introspection is None:
             raise ValueError("Introspection is not configured")
+        # ssl_ctxt can be None, bool, str, or SSLContext - httpx accepts all
+        ssl_ctxt: ssl.SSLContext | bool = False  # Default to no verification if no cafile
+        if self.config.tls_cafile:
+            ssl_ctxt = ssl.create_default_context(cafile=self.config.tls_cafile.as_posix())
+        # Build post kwargs conditionally based on auth method
+        post_kwargs: dict[str, Any] = {
+            "url": self.config.introspection.url,
+            "data": form,
+            "timeout": 10.0,
+        }
         if self.config.introspection.send_secret_in_body:
             form["client_id"] = self.config.introspection.client_id
             form["client_secret"] = self.config.introspection.client_secret
-            auth = None
         else:
-            auth = (self.config.introspection.client_id, self.config.introspection.client_secret)
-        ssl_ctxt = None
-        if self.config.tls_cafile:
-            ssl_ctxt = ssl.create_default_context(cafile=self.config.tls_cafile.as_posix())
+            # httpx auth parameter expects tuple[str | bytes, str | bytes]
+            post_kwargs["auth"] = (
+                self.config.introspection.client_id,
+                self.config.introspection.client_secret,
+            )
         try:
             async with httpx.AsyncClient(verify=ssl_ctxt) as client:
-                response = await client.post(
-                    self.config.introspection.url,
-                    data=form,
-                    auth=auth,
-                    timeout=10.0,  # Add a reasonable timeout
-                )
+                response = await client.post(**post_kwargs)
                 if response.status_code != httpx.codes.OK:
                     logger.warning(f"Token introspection failed with status code: {response.status_code}")
                     raise ValueError(f"Token introspection failed: {response.status_code}")

llama_stack/core/server/fastapi_router_registry.py ADDED Viewed

@@ -0,0 +1,84 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+"""Router utilities for FastAPI routers.
+This module provides utilities to create FastAPI routers from API packages.
+APIs with routers are explicitly listed here.
+"""
+from collections.abc import Callable
+from typing import Any, cast
+from fastapi import APIRouter
+from fastapi.routing import APIRoute
+from llama_stack_api import admin, batches, benchmarks, datasets, files, inspect_api, providers
+# Router factories for APIs that have FastAPI routers
+# Add new APIs here as they are migrated to the router system
+from llama_stack_api.datatypes import Api
+_ROUTER_FACTORIES: dict[str, Callable[[Any], APIRouter]] = {
+    "admin": admin.fastapi_routes.create_router,
+    "batches": batches.fastapi_routes.create_router,
+    "benchmarks": benchmarks.fastapi_routes.create_router,
+    "datasets": datasets.fastapi_routes.create_router,
+    "providers": providers.fastapi_routes.create_router,
+    "inspect": inspect_api.fastapi_routes.create_router,
+    "files": files.fastapi_routes.create_router,
+}
+def has_router(api: "Api") -> bool:
+    """Check if an API has a router factory.
+    Args:
+        api: The API enum value
+    Returns:
+        True if the API has a router factory, False otherwise
+    """
+    return api.value in _ROUTER_FACTORIES
+def build_fastapi_router(api: "Api", impl: Any) -> APIRouter | None:
+    """Build a router for an API by combining its router factory with the implementation.
+    Args:
+        api: The API enum value
+        impl: The implementation instance for the API
+    Returns:
+        APIRouter if the API has a router factory, None otherwise
+    """
+    router_factory = _ROUTER_FACTORIES.get(api.value)
+    if router_factory is None:
+        return None
+    # cast is safe here: all router factories in API packages are required to return APIRouter.
+    # If a router factory returns the wrong type, it will fail at runtime when
+    # app.include_router(router) is called
+    return cast(APIRouter, router_factory(impl))
+def get_router_routes(router: APIRouter) -> list[APIRoute]:
+    """Extract APIRoute objects from a FastAPI router.
+    Args:
+        router: The FastAPI router to extract routes from
+    Returns:
+        List of APIRoute objects from the router (preserves tags and other metadata)
+    """
+    routes = []
+    for route in router.routes:
+        # FastAPI routers use APIRoute objects, which have path, methods, tags, etc.
+        if isinstance(route, APIRoute):
+            routes.append(route)
+    return routes

llama_stack/core/server/quota.py CHANGED Viewed

@@ -11,9 +11,9 @@ from datetime import UTC, datetime, timedelta
 from starlette.types import ASGIApp, Receive, Scope, Send
 from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendType
+from llama_stack.core.storage.kvstore.kvstore import _KVSTORE_BACKENDS, kvstore_impl
 from llama_stack.log import get_logger
-from llama_stack.providers.utils.kvstore.api import KVStore
-from llama_stack.providers.utils.kvstore.kvstore import _KVSTORE_BACKENDS, kvstore_impl
+from llama_stack_api.internal.kvstore import KVStore
 logger = get_logger(name=__name__, category="core::server")

llama-stack 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

llama-stack 0.3.5py3-none-any.whl → 0.4.0py3-none-any.whl