PyPI - llama-stack - Versions diffs - 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

llama-stack 0.3.5py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (458) hide show

llama_stack/core/inspect.py CHANGED Viewed

@@ -8,21 +8,28 @@ from importlib.metadata import version
 from pydantic import BaseModel
-from llama_stack.apis.inspect import (
+from llama_stack.core.datatypes import StackConfig
+from llama_stack.core.distribution import builtin_automatically_routed_apis
+from llama_stack.core.external import load_external_apis
+from llama_stack.core.server.fastapi_router_registry import (
+    _ROUTER_FACTORIES,
+    build_fastapi_router,
+    get_router_routes,
+)
+from llama_stack.core.server.routes import get_all_api_routes
+from llama_stack_api import (
+    Api,
     HealthInfo,
+    HealthStatus,
     Inspect,
     ListRoutesResponse,
     RouteInfo,
     VersionInfo,
 )
-from llama_stack.core.datatypes import StackRunConfig
-from llama_stack.core.external import load_external_apis
-from llama_stack.core.server.routes import get_all_api_routes
-from llama_stack.providers.datatypes import HealthStatus
 class DistributionInspectConfig(BaseModel):
-    run_config: StackRunConfig
+    config: StackConfig
 async def get_provider_impl(config, deps):
@@ -33,19 +40,95 @@ async def get_provider_impl(config, deps):
 class DistributionInspectImpl(Inspect):
     def __init__(self, config: DistributionInspectConfig, deps):
-        self.config = config
+        self.stack_config = config.config
         self.deps = deps
     async def initialize(self) -> None:
         pass
-    async def list_routes(self) -> ListRoutesResponse:
-        run_config: StackRunConfig = self.config.run_config
+    async def list_routes(self, api_filter: str | None = None) -> ListRoutesResponse:
+        config: StackConfig = self.stack_config
+        # Helper function to determine if a route should be included based on api_filter
+        # TODO: remove this once we've migrated all APIs to FastAPI routers
+        def should_include_route(webmethod) -> bool:
+            if api_filter is None:
+                # Default: only non-deprecated APIs
+                return not webmethod.deprecated
+            elif api_filter == "deprecated":
+                # Special filter: show deprecated routes regardless of their actual level
+                return bool(webmethod.deprecated)
+            else:
+                # Filter by API level (non-deprecated routes only)
+                return not webmethod.deprecated and webmethod.level == api_filter
+        # Helper function to get provider types for an API
+        def _get_provider_types(api: Api) -> list[str]:
+            if api.value in ["providers", "inspect"]:
+                return []  # These APIs don't have "real" providers  they're internal to the stack
+            # For routing table APIs, look up providers from their router API
+            # (e.g., benchmarks -> eval, models -> inference, etc.)
+            auto_routed_apis = builtin_automatically_routed_apis()
+            for auto_routed in auto_routed_apis:
+                if auto_routed.routing_table_api == api:
+                    # This is a routing table API, use its router API for providers
+                    providers = config.providers.get(auto_routed.router_api.value, [])
+                    return [p.provider_type for p in providers] if providers else []
+            # Regular API, look up providers directly
+            providers = config.providers.get(api.value, [])
+            return [p.provider_type for p in providers] if providers else []
+        # Helper function to determine if a router route should be included based on api_filter
+        def _should_include_router_route(route, router_prefix: str | None) -> bool:
+            """Check if a router-based route should be included based on api_filter."""
+            # Check deprecated status
+            route_deprecated = getattr(route, "deprecated", False) or False
+            if api_filter is None:
+                # Default: only non-deprecated routes
+                return not route_deprecated
+            elif api_filter == "deprecated":
+                # Special filter: show deprecated routes regardless of their actual level
+                return route_deprecated
+            else:
+                # Filter by API level (non-deprecated routes only)
+                # Extract level from router prefix (e.g., "/v1" -> "v1")
+                if router_prefix:
+                    prefix_level = router_prefix.lstrip("/")
+                    return not route_deprecated and prefix_level == api_filter
+                return not route_deprecated
         ret = []
-        external_apis = load_external_apis(run_config)
+        external_apis = load_external_apis(config)
         all_endpoints = get_all_api_routes(external_apis)
+        # Process routes from APIs with FastAPI routers
+        for api_name in _ROUTER_FACTORIES.keys():
+            api = Api(api_name)
+            router = build_fastapi_router(api, None)  # we don't need the impl here, just the routes
+            if router:
+                router_routes = get_router_routes(router)
+                for route in router_routes:
+                    if _should_include_router_route(route, router.prefix):
+                        if route.methods is not None:
+                            available_methods = [m for m in route.methods if m != "HEAD"]
+                            if available_methods:
+                                ret.append(
+                                    RouteInfo(
+                                        route=route.path,
+                                        method=available_methods[0],
+                                        provider_types=_get_provider_types(api),
+                                    )
+                                )
+        # Process routes from legacy webmethod-based APIs
         for api, endpoints in all_endpoints.items():
+            # Skip APIs that have routers (already processed above)
+            if api.value in _ROUTER_FACTORIES:
+                continue
             # Always include provider and inspect APIs, filter others based on run config
             if api.value in ["providers", "inspect"]:
                 ret.extend(
@@ -55,12 +138,12 @@ class DistributionInspectImpl(Inspect):
                             method=next(iter([m for m in e.methods if m != "HEAD"])),
                             provider_types=[],  # These APIs don't have "real" providers - they're internal to the stack
                         )
-                        for e, _ in endpoints
-                        if e.methods is not None
+                        for e, webmethod in endpoints
+                        if e.methods is not None and should_include_route(webmethod)
                     ]
                 )
             else:
-                providers = run_config.providers.get(api.value, [])
+                providers = config.providers.get(api.value, [])
                 if providers:  # Only process if there are providers for this API
                     ret.extend(
                         [
@@ -69,8 +152,8 @@ class DistributionInspectImpl(Inspect):
                                 method=next(iter([m for m in e.methods if m != "HEAD"])),
                                 provider_types=[p.provider_type for p in providers],
                             )
-                            for e, _ in endpoints
-                            if e.methods is not None
+                            for e, webmethod in endpoints
+                            if e.methods is not None and should_include_route(webmethod)
                         ]
                     )

llama_stack/core/library_client.py CHANGED Viewed

@@ -10,6 +10,7 @@ import json
 import logging  # allow-direct-logging
 import os
 import sys
+import typing
 from enum import Enum
 from io import BytesIO
 from pathlib import Path
@@ -18,38 +19,37 @@ from typing import Any, TypeVar, Union, get_args, get_origin
 import httpx
 import yaml
 from fastapi import Response as FastAPIResponse
-from llama_stack_client import (
-    NOT_GIVEN,
-    APIResponse,
-    AsyncAPIResponse,
-    AsyncLlamaStackClient,
-    AsyncStream,
-    LlamaStackClient,
-)
+from llama_stack.core.utils.type_inspection import is_unwrapped_body_param
+try:
+    from llama_stack_client import (
+        NOT_GIVEN,
+        APIResponse,
+        AsyncAPIResponse,
+        AsyncLlamaStackClient,
+        AsyncStream,
+        LlamaStackClient,
+    )
+except ImportError as e:
+    raise ImportError(
+        "llama-stack-client is not installed. Please install it with `uv pip install llama-stack[client]`."
+    ) from e
 from pydantic import BaseModel, TypeAdapter
 from rich.console import Console
 from termcolor import cprint
 from llama_stack.core.build import print_pip_install_help
 from llama_stack.core.configure import parse_and_maybe_upgrade_config
-from llama_stack.core.datatypes import Api, BuildConfig, BuildProvider, DistributionSpec
-from llama_stack.core.request_headers import (
-    PROVIDER_DATA_VAR,
-    request_provider_data_context,
-)
+from llama_stack.core.request_headers import PROVIDER_DATA_VAR, request_provider_data_context
 from llama_stack.core.resolver import ProviderRegistry
 from llama_stack.core.server.routes import RouteImpls, find_matching_route, initialize_route_impls
-from llama_stack.core.stack import (
-    Stack,
-    get_stack_run_config_from_distro,
-    replace_env_vars,
-)
+from llama_stack.core.stack import Stack, get_stack_run_config_from_distro, replace_env_vars
 from llama_stack.core.utils.config import redact_sensitive_fields
 from llama_stack.core.utils.context import preserve_contexts_async_generator
 from llama_stack.core.utils.exec import in_notebook
 from llama_stack.log import get_logger, setup_logging
-from llama_stack.providers.utils.telemetry.tracing import CURRENT_TRACE_CONTEXT, end_trace, setup_logger, start_trace
-from llama_stack.strong_typing.inspection import is_unwrapped_body_param
 logger = get_logger(name=__name__, category="core")
@@ -202,13 +202,6 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
         super().__init__()
         # Initialize logging from environment variables first
         setup_logging()
-        # when using the library client, we should not log to console since many
-        # of our logs are intended for server-side usage
-        if sinks_from_env := os.environ.get("TELEMETRY_SINKS", None):
-            current_sinks = sinks_from_env.strip().lower().split(",")
-            os.environ["TELEMETRY_SINKS"] = ",".join(sink for sink in current_sinks if sink != "console")
         if in_notebook():
             import nest_asyncio
@@ -264,20 +257,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
                 file=sys.stderr,
             )
             if self.config_path_or_distro_name.endswith(".yaml"):
-                providers: dict[str, list[BuildProvider]] = {}
-                for api, run_providers in self.config.providers.items():
-                    for provider in run_providers:
-                        providers.setdefault(api, []).append(
-                            BuildProvider(provider_type=provider.provider_type, module=provider.module)
-                        )
-                providers = dict(providers)
-                build_config = BuildConfig(
-                    distribution_spec=DistributionSpec(
-                        providers=providers,
-                    ),
-                    external_providers_dir=self.config.external_providers_dir,
-                )
-                print_pip_install_help(build_config)
+                print_pip_install_help(self.config)
             else:
                 prefix = "!" if in_notebook() else ""
                 cprint(
@@ -293,8 +273,6 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
             raise _e
         assert self.impls is not None
-        if Api.telemetry in self.impls:
-            setup_logger(self.impls[Api.telemetry])
         if not os.environ.get("PYTEST_CURRENT_TEST"):
             console = Console()
@@ -381,16 +359,16 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
         matched_func, path_params, route_path, webmethod = find_matching_route(options.method, path, self.route_impls)
         body |= path_params
+        # Pass through params that aren't already handled as path params
+        if options.params:
+            extra_query_params = {k: v for k, v in options.params.items() if k not in path_params}
+            if extra_query_params:
+                body["extra_query"] = extra_query_params
         body, field_names = self._handle_file_uploads(options, body)
         body = self._convert_body(matched_func, body, exclude_params=set(field_names))
-        trace_path = webmethod.descriptive_name or route_path
-        await start_trace(trace_path, {"__location__": "library_client"})
-        try:
-            result = await matched_func(**body)
-        finally:
-            await end_trace()
+        result = await matched_func(**body)
         # Handle FastAPI Response objects (e.g., from file content retrieval)
         if isinstance(result, FastAPIResponse):
@@ -449,19 +427,13 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
         # Prepare body for the function call (handles both Pydantic and traditional params)
         body = self._convert_body(func, body)
-        trace_path = webmethod.descriptive_name or route_path
-        await start_trace(trace_path, {"__location__": "library_client"})
         async def gen():
-            try:
-                async for chunk in await func(**body):
-                    data = json.dumps(convert_pydantic_to_json_value(chunk))
-                    sse_event = f"data: {data}\n\n"
-                    yield sse_event.encode("utf-8")
-            finally:
-                await end_trace()
+            async for chunk in await func(**body):
+                data = json.dumps(convert_pydantic_to_json_value(chunk))
+                sse_event = f"data: {data}\n\n"
+                yield sse_event.encode("utf-8")
-        wrapped_gen = preserve_contexts_async_generator(gen(), [CURRENT_TRACE_CONTEXT, PROVIDER_DATA_VAR])
+        wrapped_gen = preserve_contexts_async_generator(gen(), [PROVIDER_DATA_VAR])
         mock_response = httpx.Response(
             status_code=httpx.codes.OK,
@@ -519,6 +491,25 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
                 unwrapped_body_param = param
                 break
+        # Check for parameters with Depends() annotation (FastAPI router endpoints)
+        # These need special handling: construct the request model from body
+        depends_param = None
+        for param in params_list:
+            param_type = param.annotation
+            if get_origin(param_type) is typing.Annotated:
+                args = get_args(param_type)
+                if len(args) > 1:
+                    # Check if any metadata is Depends
+                    metadata = args[1:]
+                    for item in metadata:
+                        # Check if it's a Depends object (has dependency attribute or is a callable)
+                        # Depends objects typically have a 'dependency' attribute or are callable functions
+                        if hasattr(item, "dependency") or callable(item) or "Depends" in str(type(item)):
+                            depends_param = param
+                            break
+                if depends_param:
+                    break
         # Convert parameters to Pydantic models where needed
         converted_body = {}
         for param_name, param in sig.parameters.items():
@@ -529,6 +520,27 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
                 else:
                     converted_body[param_name] = convert_to_pydantic(param.annotation, value)
+        # Handle Depends parameter: construct request model from body
+        if depends_param and depends_param.name not in converted_body:
+            param_type = depends_param.annotation
+            if get_origin(param_type) is typing.Annotated:
+                base_type = get_args(param_type)[0]
+                # Handle Union types (e.g., SomeRequestModel | None) - extract the non-None type
+                # In Python 3.10+, Union types created with | syntax are still typing.Union
+                origin = get_origin(base_type)
+                if origin is Union:
+                    # Get the first non-None type from the Union
+                    union_args = get_args(base_type)
+                    base_type = next(
+                        (t for t in union_args if t is not type(None) and t is not None),
+                        union_args[0] if union_args else None,
+                    )
+                # Only try to instantiate if it's a class (not a Union or other non-callable type)
+                if base_type is not None and inspect.isclass(base_type) and callable(base_type):
+                    # Construct the request model from all body parameters
+                    converted_body[depends_param.name] = base_type(**body)
         # handle unwrapped body parameter after processing all named parameters
         if unwrapped_body_param:
             base_type = get_args(unwrapped_body_param.annotation)[0]

llama_stack/core/prompts/prompts.py CHANGED Viewed

@@ -9,10 +9,9 @@ from typing import Any
 from pydantic import BaseModel
-from llama_stack.apis.prompts import ListPromptsResponse, Prompt, Prompts
-from llama_stack.core.datatypes import StackRunConfig
-from llama_stack.core.storage.datatypes import KVStoreReference
-from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
+from llama_stack.core.datatypes import StackConfig
+from llama_stack.core.storage.kvstore import KVStore, kvstore_impl
+from llama_stack_api import ListPromptsResponse, Prompt, Prompts
 class PromptServiceConfig(BaseModel):
@@ -21,7 +20,7 @@ class PromptServiceConfig(BaseModel):
     :param run_config: Stack run configuration containing distribution info
     """
-    run_config: StackRunConfig
+    config: StackConfig
 async def get_provider_impl(config: PromptServiceConfig, deps: dict[Any, Any]):
@@ -35,16 +34,15 @@ class PromptServiceImpl(Prompts):
     """Built-in prompt service implementation using KVStore."""
     def __init__(self, config: PromptServiceConfig, deps: dict[Any, Any]):
-        self.config = config
+        self.stack_config = config.config
         self.deps = deps
         self.kvstore: KVStore
     async def initialize(self) -> None:
-        # Use metadata store backend with prompts-specific namespace
-        metadata_ref = self.config.run_config.storage.stores.metadata
-        if not metadata_ref:
-            raise ValueError("storage.stores.metadata must be configured in run config")
-        prompts_ref = KVStoreReference(namespace="prompts", backend=metadata_ref.backend)
+        # Use prompts store reference from run config
+        prompts_ref = self.stack_config.storage.stores.prompts
+        if not prompts_ref:
+            raise ValueError("storage.stores.prompts must be configured in run config")
         self.kvstore = await kvstore_impl(prompts_ref)
     def _get_default_key(self, prompt_id: str) -> str:
@@ -232,3 +230,6 @@ class PromptServiceImpl(Prompts):
         await self.kvstore.set(default_key, str(version))
         return self._deserialize_prompt(data)
+    async def shutdown(self) -> None:
+        pass

llama_stack/core/providers.py CHANGED Viewed

@@ -9,18 +9,24 @@ from typing import Any
 from pydantic import BaseModel
-from llama_stack.apis.providers import ListProvidersResponse, ProviderInfo, Providers
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import HealthResponse, HealthStatus
-from .datatypes import StackRunConfig
+from llama_stack_api import (
+    HealthResponse,
+    HealthStatus,
+    InspectProviderRequest,
+    ListProvidersResponse,
+    ProviderInfo,
+    Providers,
+)
+from .datatypes import StackConfig
 from .utils.config import redact_sensitive_fields
 logger = get_logger(name=__name__, category="core")
 class ProviderImplConfig(BaseModel):
-    run_config: StackRunConfig
+    config: StackConfig
 async def get_provider_impl(config, deps):
@@ -31,7 +37,7 @@ async def get_provider_impl(config, deps):
 class ProviderImpl(Providers):
     def __init__(self, config, deps):
-        self.config = config
+        self.stack_config = config.config
         self.deps = deps
     async def initialize(self) -> None:
@@ -42,8 +48,8 @@ class ProviderImpl(Providers):
         pass
     async def list_providers(self) -> ListProvidersResponse:
-        run_config = self.config.run_config
-        safe_config = StackRunConfig(**redact_sensitive_fields(run_config.model_dump()))
+        run_config = self.stack_config
+        safe_config = StackConfig(**redact_sensitive_fields(run_config.model_dump()))
         providers_health = await self.get_providers_health()
         ret = []
         for api, providers in safe_config.providers.items():
@@ -68,13 +74,13 @@ class ProviderImpl(Providers):
         return ListProvidersResponse(data=ret)
-    async def inspect_provider(self, provider_id: str) -> ProviderInfo:
+    async def inspect_provider(self, request: InspectProviderRequest) -> ProviderInfo:
         all_providers = await self.list_providers()
         for p in all_providers.data:
-            if p.provider_id == provider_id:
+            if p.provider_id == request.provider_id:
                 return p
-        raise ValueError(f"Provider {provider_id} not found")
+        raise ValueError(f"Provider {request.provider_id} not found")
     async def get_providers_health(self) -> dict[str, dict[str, HealthResponse]]:
         """Get health status for all providers.

llama-stack 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

llama-stack 0.3.5py3-none-any.whl → 0.4.0py3-none-any.whl