PyPI - llama-stack - Versions diffs - 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

llama-stack 0.3.5py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (458) hide show

llama_stack/core/server/routes.py CHANGED Viewed

@@ -12,10 +12,14 @@ from typing import Any
 from aiohttp import hdrs
 from starlette.routing import Route
-from llama_stack.apis.datatypes import Api, ExternalApiSpec
-from llama_stack.apis.tools import RAGToolRuntime, SpecialToolGroup
 from llama_stack.core.resolver import api_protocol_map
-from llama_stack.schema_utils import WebMethod
+from llama_stack.core.server.fastapi_router_registry import (
+    _ROUTER_FACTORIES,
+    build_fastapi_router,
+    get_router_routes,
+)
+from llama_stack_api import Api, ExternalApiSpec, WebMethod
+from llama_stack_api.router_utils import PUBLIC_ROUTE_KEY
 EndpointFunc = Callable[..., Any]
 PathParams = dict[str, str]
@@ -25,33 +29,28 @@ RouteImpls = dict[str, PathImpl]
 RouteMatch = tuple[EndpointFunc, PathParams, str, WebMethod]
-def toolgroup_protocol_map():
-    return {
-        SpecialToolGroup.rag_tool: RAGToolRuntime,
-    }
 def get_all_api_routes(
     external_apis: dict[Api, ExternalApiSpec] | None = None,
 ) -> dict[Api, list[tuple[Route, WebMethod]]]:
+    """Get all API routes from webmethod-based protocols.
+    This function only returns routes from APIs that use the legacy @webmethod
+    decorator system. For APIs that have been migrated to FastAPI routers,
+    use the router registry (fastapi_router_registry.has_router() and fastapi_router_registry.build_fastapi_router()).
+    Args:
+        external_apis: Optional dictionary of external API specifications
+    Returns:
+        Dictionary mapping API to list of (Route, WebMethod) tuples
+    """
     apis = {}
     protocols = api_protocol_map(external_apis)
-    toolgroup_protocols = toolgroup_protocol_map()
     for api, protocol in protocols.items():
         routes = []
         protocol_methods = inspect.getmembers(protocol, predicate=inspect.isfunction)
-        # HACK ALERT
-        if api == Api.tool_runtime:
-            for tool_group in SpecialToolGroup:
-                sub_protocol = toolgroup_protocols[tool_group]
-                sub_protocol_methods = inspect.getmembers(sub_protocol, predicate=inspect.isfunction)
-                for name, method in sub_protocol_methods:
-                    if not hasattr(method, "__webmethod__"):
-                        continue
-                    protocol_methods.append((f"{tool_group.value}.{name}", method))
         for name, method in protocol_methods:
             # Get all webmethods for this method (supports multiple decorators)
             webmethods = getattr(method, "__webmethods__", [])
@@ -68,8 +67,9 @@ def get_all_api_routes(
                 else:
                     http_method = hdrs.METH_POST
                 routes.append(
-                    (Route(path=path, methods=[http_method], name=name, endpoint=None), webmethod)
-                )  # setting endpoint to None since don't use a Router object
+                    # setting endpoint to None since don't use a Router object
+                    (Route(path=path, methods=[http_method], name=name, endpoint=None), webmethod)  # type: ignore[arg-type]
+                )
         apis[api] = routes
@@ -91,22 +91,74 @@ def initialize_route_impls(impls, external_apis: dict[Api, ExternalApiSpec] | No
         return f"^{pattern}$"
+    # Process routes from FastAPI routers
+    for api_name in _ROUTER_FACTORIES.keys():
+        api = Api(api_name)
+        if api not in impls:
+            continue
+        impl = impls[api]
+        router = build_fastapi_router(api, impl)
+        if router:
+            router_routes = get_router_routes(router)
+            for route in router_routes:
+                # Get the endpoint function from the route
+                # For FastAPI routes, the endpoint is the actual function
+                func = route.endpoint
+                if func is None:
+                    continue
+                # Get the first (and typically only) method from the set, filtering out HEAD
+                available_methods = [m for m in (route.methods or []) if m != "HEAD"]
+                if not available_methods:
+                    continue  # Skip if only HEAD method is available
+                method = available_methods[0].lower()
+                if method not in route_impls:
+                    route_impls[method] = {}
+                # Create a minimal WebMethod for router routes (needed for RouteMatch tuple)
+                # We don't have webmethod metadata for router routes, so create a minimal one
+                # that has the attributes used by the library client (descriptive_name for tracing)
+                #
+                # TODO: Long-term migration plan (once all APIs are migrated to FastAPI routers):
+                #   - Extract summary from APIRoute: route.summary (available on FastAPI APIRoute objects)
+                #   - Pass summary directly in RouteMatch instead of WebMethod
+                #   - Remove this WebMethod() instantiation entirely
+                #   - Update library_client.py to use the extracted summary instead of webmethod.descriptive_name
+                # Routes with openapi_extra[PUBLIC_ROUTE_KEY]=True don't require authentication
+                is_public = (route.openapi_extra or {}).get(PUBLIC_ROUTE_KEY, False)
+                webmethod = WebMethod(
+                    descriptive_name=None,
+                    require_authentication=not is_public,
+                )
+                route_impls[method][_convert_path_to_regex(route.path)] = (
+                    func,
+                    route.path,
+                    webmethod,
+                )
+    # Process routes from legacy webmethod-based APIs
     for api, api_routes in api_to_routes.items():
+        # Skip APIs that have routers (already processed above)
+        if api.value in _ROUTER_FACTORIES:
+            continue
         if api not in impls:
             continue
-        for route, webmethod in api_routes:
+        for legacy_route, webmethod in api_routes:
             impl = impls[api]
-            func = getattr(impl, route.name)
+            func = getattr(impl, legacy_route.name)
             # Get the first (and typically only) method from the set, filtering out HEAD
-            available_methods = [m for m in route.methods if m != "HEAD"]
+            available_methods = [m for m in (legacy_route.methods or []) if m != "HEAD"]
             if not available_methods:
                 continue  # Skip if only HEAD method is available
             method = available_methods[0].lower()
             if method not in route_impls:
                 route_impls[method] = {}
-            route_impls[method][_convert_path_to_regex(route.path)] = (
+            route_impls[method][_convert_path_to_regex(legacy_route.path)] = (
                 func,
-                route.path,
+                legacy_route.path,
                 webmethod,
             )

llama_stack/core/server/server.py CHANGED Viewed

@@ -31,13 +31,10 @@ from fastapi.responses import JSONResponse, StreamingResponse
 from openai import BadRequestError
 from pydantic import BaseModel, ValidationError
-from llama_stack.apis.common.errors import ConflictError, ResourceNotFoundError
-from llama_stack.apis.common.responses import PaginatedResponse
 from llama_stack.core.access_control.access_control import AccessDeniedError
 from llama_stack.core.datatypes import (
     AuthenticationRequiredError,
-    LoggingConfig,
-    StackRunConfig,
+    StackConfig,
     process_cors_config,
 )
 from llama_stack.core.distribution import builtin_automatically_routed_apis
@@ -47,6 +44,7 @@ from llama_stack.core.request_headers import (
     request_provider_data_context,
     user_from_scope,
 )
+from llama_stack.core.server.fastapi_router_registry import build_fastapi_router
 from llama_stack.core.server.routes import get_all_api_routes
 from llama_stack.core.stack import (
     Stack,
@@ -54,22 +52,13 @@ from llama_stack.core.stack import (
     replace_env_vars,
 )
 from llama_stack.core.utils.config import redact_sensitive_fields
-from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
+from llama_stack.core.utils.config_resolution import resolve_config_or_distro
 from llama_stack.core.utils.context import preserve_contexts_async_generator
-from llama_stack.log import get_logger, setup_logging
-from llama_stack.providers.datatypes import Api
-from llama_stack.providers.inline.telemetry.meta_reference.config import TelemetryConfig
-from llama_stack.providers.inline.telemetry.meta_reference.telemetry import (
-    TelemetryAdapter,
-)
-from llama_stack.providers.utils.telemetry.tracing import (
-    CURRENT_TRACE_CONTEXT,
-    setup_logger,
-)
+from llama_stack.log import LoggingConfig, get_logger
+from llama_stack_api import Api, ConflictError, PaginatedResponse, ResourceNotFoundError
 from .auth import AuthenticationMiddleware
 from .quota import QuotaMiddleware
-from .tracing import TracingMiddleware
 REPO_ROOT = Path(__file__).parent.parent.parent.parent
@@ -96,7 +85,7 @@ def create_sse_event(data: Any) -> str:
 async def global_exception_handler(request: Request, exc: Exception):
-    traceback.print_exception(exc)
+    traceback.print_exception(type(exc), exc, exc.__traceback__)
     http_exc = translate_exception(exc)
     return JSONResponse(status_code=http_exc.status_code, content={"error": {"detail": http_exc.detail}})
@@ -158,7 +147,7 @@ class StackApp(FastAPI):
     start background tasks (e.g. refresh model registry periodically) from the lifespan context manager.
     """
-    def __init__(self, config: StackRunConfig, *args, **kwargs):
+    def __init__(self, config: StackConfig, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.stack: Stack = Stack(config)
@@ -174,7 +163,9 @@ class StackApp(FastAPI):
 @asynccontextmanager
 async def lifespan(app: StackApp):
-    logger.info("Starting up")
+    server_version = parse_version("llama-stack")
+    logger.info(f"Starting up Llama Stack server (version: {server_version})")
     assert app.stack is not None
     app.stack.create_registry_refresh_task()
     yield
@@ -244,56 +235,36 @@ async def log_request_pre_validation(request: Request):
 def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable:
     @functools.wraps(func)
     async def route_handler(request: Request, **kwargs):
-        # Get auth attributes from the request scope
-        user = user_from_scope(request.scope)
         await log_request_pre_validation(request)
-        test_context_token = None
-        test_context_var = None
-        reset_test_context_fn = None
-        # Use context manager with both provider data and auth attributes
-        with request_provider_data_context(request.headers, user):
-            if os.environ.get("LLAMA_STACK_TEST_INFERENCE_MODE"):
-                from llama_stack.core.testing_context import (
-                    TEST_CONTEXT,
-                    reset_test_context,
-                    sync_test_context_from_provider_data,
-                )
+        is_streaming = is_streaming_request(func.__name__, request, **kwargs)
-                test_context_token = sync_test_context_from_provider_data()
-                test_context_var = TEST_CONTEXT
-                reset_test_context_fn = reset_test_context
-            is_streaming = is_streaming_request(func.__name__, request, **kwargs)
-            try:
-                if is_streaming:
-                    context_vars = [CURRENT_TRACE_CONTEXT, PROVIDER_DATA_VAR]
-                    if test_context_var is not None:
-                        context_vars.append(test_context_var)
-                    gen = preserve_contexts_async_generator(sse_generator(func(**kwargs)), context_vars)
-                    return StreamingResponse(gen, media_type="text/event-stream")
-                else:
-                    value = func(**kwargs)
-                    result = await maybe_await(value)
-                    if isinstance(result, PaginatedResponse) and result.url is None:
-                        result.url = route
-                    if method.upper() == "DELETE" and result is None:
-                        return Response(status_code=httpx.codes.NO_CONTENT)
-                    return result
-            except Exception as e:
-                if logger.isEnabledFor(logging.INFO):
-                    logger.exception(f"Error executing endpoint {route=} {method=}")
-                else:
-                    logger.error(f"Error executing endpoint {route=} {method=}: {str(e)}")
-                raise translate_exception(e) from e
-            finally:
-                if test_context_token is not None and reset_test_context_fn is not None:
-                    reset_test_context_fn(test_context_token)
+        try:
+            if is_streaming:
+                # Preserve context vars across async generator boundaries
+                context_vars = [PROVIDER_DATA_VAR]
+                if os.environ.get("LLAMA_STACK_TEST_INFERENCE_MODE"):
+                    from llama_stack.core.testing_context import TEST_CONTEXT
+                    context_vars.append(TEST_CONTEXT)
+                gen = preserve_contexts_async_generator(sse_generator(func(**kwargs)), context_vars)
+                return StreamingResponse(gen, media_type="text/event-stream")
+            else:
+                value = func(**kwargs)
+                result = await maybe_await(value)
+                if isinstance(result, PaginatedResponse) and result.url is None:
+                    result.url = route
+                if method.upper() == "DELETE" and result is None:
+                    return Response(status_code=httpx.codes.NO_CONTENT)
+                return result
+        except Exception as e:
+            if logger.isEnabledFor(logging.INFO):
+                logger.exception(f"Error executing endpoint {route=} {method=}")
+            else:
+                logger.error(f"Error executing endpoint {route=} {method=}: {str(e)}")
+            raise translate_exception(e) from e
     sig = inspect.signature(func)
@@ -365,6 +336,42 @@ class ClientVersionMiddleware:
         return await self.app(scope, receive, send)
+class ProviderDataMiddleware:
+    """Middleware to set up request context for all routes.
+    Sets up provider data context from X-LlamaStack-Provider-Data header
+    and auth attributes. Also handles test context propagation when
+    running in test mode for deterministic ID generation.
+    """
+    def __init__(self, app):
+        self.app = app
+    async def __call__(self, scope, receive, send):
+        if scope["type"] == "http":
+            headers = {k.decode(): v.decode() for k, v in scope.get("headers", [])}
+            user = user_from_scope(scope)
+            with request_provider_data_context(headers, user):
+                test_context_token = None
+                reset_fn = None
+                if os.environ.get("LLAMA_STACK_TEST_INFERENCE_MODE"):
+                    from llama_stack.core.testing_context import (
+                        reset_test_context,
+                        sync_test_context_from_provider_data,
+                    )
+                    test_context_token = sync_test_context_from_provider_data()
+                    reset_fn = reset_test_context
+                try:
+                    return await self.app(scope, receive, send)
+                finally:
+                    if test_context_token and reset_fn:
+                        reset_fn(test_context_token)
+        return await self.app(scope, receive, send)
 def create_app() -> StackApp:
     """Create and configure the FastAPI application.
@@ -374,14 +381,11 @@ def create_app() -> StackApp:
     Returns:
         Configured StackApp instance.
     """
-    # Initialize logging from environment variables first
-    setup_logging()
     config_file = os.getenv("LLAMA_STACK_CONFIG")
     if config_file is None:
         raise ValueError("LLAMA_STACK_CONFIG environment variable is required")
-    config_file = resolve_config_or_distro(config_file, Mode.RUN)
+    config_file = resolve_config_or_distro(config_file)
     # Load and process configuration
     logger_config = None
@@ -392,7 +396,7 @@ def create_app() -> StackApp:
         logger = get_logger(name=__name__, category="core::server", config=logger_config)
         config = replace_env_vars(config_contents)
-        config = StackRunConfig(**cast_image_name_to_string(config))
+        config = StackConfig(**cast_image_name_to_string(config))
     _log_run_config(run_config=config)
@@ -407,6 +411,8 @@ def create_app() -> StackApp:
     if not os.environ.get("LLAMA_STACK_DISABLE_VERSION_CHECK"):
         app.add_middleware(ClientVersionMiddleware)
+    app.add_middleware(ProviderDataMiddleware)
     impls = app.stack.impls
     if config.server.auth:
@@ -448,11 +454,6 @@ def create_app() -> StackApp:
         if cors_config:
             app.add_middleware(CORSMiddleware, **cors_config.model_dump())
-    if config.telemetry.enabled:
-        setup_logger(impls[Api.telemetry])
-    else:
-        setup_logger(TelemetryAdapter(TelemetryConfig(), {}))
     # Load external APIs if configured
     external_apis = load_external_apis(config)
     all_routes = get_all_api_routes(external_apis)
@@ -468,19 +469,27 @@ def create_app() -> StackApp:
             continue
         apis_to_serve.add(inf.routing_table_api.value)
+    apis_to_serve.add("admin")
     apis_to_serve.add("inspect")
     apis_to_serve.add("providers")
     apis_to_serve.add("prompts")
     apis_to_serve.add("conversations")
     for api_str in apis_to_serve:
         api = Api(api_str)
-        routes = all_routes[api]
-        try:
-            impl = impls[api]
-        except KeyError as e:
-            raise ValueError(f"Could not find provider implementation for {api} API") from e
+        # Try to discover and use a router factory from the API package
+        impl = impls[api]
+        router = build_fastapi_router(api, impl)
+        if router:
+            app.include_router(router)
+            logger.debug(f"Registered FastAPIrouter for {api} API")
+            continue
+        # Fall back to old webmethod-based route discovery until the migration is complete
+        impl = impls[api]
+        routes = all_routes[api]
         for route, _ in routes:
             if not hasattr(impl, route.name):
                 # ideally this should be a typing violation already
@@ -506,17 +515,23 @@ def create_app() -> StackApp:
     logger.debug(f"serving APIs: {apis_to_serve}")
+    # Register specific exception handlers before the generic Exception handler
+    # This prevents the re-raising behavior that causes connection resets
     app.exception_handler(RequestValidationError)(global_exception_handler)
+    app.exception_handler(ConflictError)(global_exception_handler)
+    app.exception_handler(ResourceNotFoundError)(global_exception_handler)
+    app.exception_handler(AuthenticationRequiredError)(global_exception_handler)
+    app.exception_handler(AccessDeniedError)(global_exception_handler)
+    app.exception_handler(BadRequestError)(global_exception_handler)
+    # Generic Exception handler should be last
     app.exception_handler(Exception)(global_exception_handler)
-    app.add_middleware(TracingMiddleware, impls=impls, external_apis=external_apis)
     return app
-def _log_run_config(run_config: StackRunConfig):
+def _log_run_config(run_config: StackConfig):
     """Logs the run config with redacted fields and disabled providers removed."""
-    logger.info("Run configuration:")
+    logger.info("Stack Configuration:")
     safe_config = redact_sensitive_fields(run_config.model_dump(mode="json"))
     clean_config = remove_disabled_providers(safe_config)
     logger.info(yaml.dump(clean_config, indent=2))
@@ -532,8 +547,8 @@ def extract_path_params(route: str) -> list[str]:
 def remove_disabled_providers(obj):
     if isinstance(obj, dict):
-        keys = ["provider_id", "shield_id", "provider_model_id", "model_id"]
-        if any(k in obj and obj[k] in ("__disabled__", "", None) for k in keys):
+        # Filter out items where provider_id is explicitly disabled or empty
+        if "provider_id" in obj and obj["provider_id"] in ("__disabled__", "", None):
             return None
         return {k: v for k, v in ((k, remove_disabled_providers(v)) for k, v in obj.items()) if v is not None}
     elif isinstance(obj, list):

llama-stack 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

llama-stack 0.3.5py3-none-any.whl → 0.4.0py3-none-any.whl