PyPI - llama-stack - Versions diffs - 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

llama-stack 0.3.5py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (460) hide show

llama_stack_api/files/fastapi_routes.py ADDED Viewed

@@ -0,0 +1,124 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from typing import Annotated
+from fastapi import APIRouter, Depends, UploadFile
+from fastapi.param_functions import File, Form
+from fastapi.responses import Response
+from llama_stack_api.router_utils import create_path_dependency, create_query_dependency, standard_responses
+from llama_stack_api.version import LLAMA_STACK_API_V1
+from .api import Files
+from .models import (
+    DeleteFileRequest,
+    ExpiresAfter,
+    ListFilesRequest,
+    ListOpenAIFileResponse,
+    OpenAIFileDeleteResponse,
+    OpenAIFileObject,
+    OpenAIFilePurpose,
+    RetrieveFileContentRequest,
+    RetrieveFileRequest,
+    UploadFileRequest,
+)
+# Automatically generate dependency functions from Pydantic models
+# This ensures the models are the single source of truth for descriptions
+get_list_files_request = create_query_dependency(ListFilesRequest)
+get_get_files_request = create_path_dependency(RetrieveFileRequest)
+get_delete_files_request = create_path_dependency(DeleteFileRequest)
+get_retrieve_file_content_request = create_path_dependency(RetrieveFileContentRequest)
+def create_router(impl: Files) -> APIRouter:
+    router = APIRouter(
+        prefix=f"/{LLAMA_STACK_API_V1}",
+        tags=["Files"],
+        responses=standard_responses,
+    )
+    @router.get(
+        "/files",
+        response_model=ListOpenAIFileResponse,
+        summary="List files",
+        description="List files",
+        responses={
+            200: {"description": "The list of files."},
+        },
+    )
+    async def list_files(
+        request: Annotated[ListFilesRequest, Depends(get_list_files_request)],
+    ) -> ListOpenAIFileResponse:
+        return await impl.openai_list_files(request)
+    @router.get(
+        "/files/{file_id}",
+        response_model=OpenAIFileObject,
+        summary="Get file",
+        description="Get file",
+        responses={
+            200: {"description": "The file."},
+        },
+    )
+    async def get_file(
+        request: Annotated[RetrieveFileRequest, Depends(get_get_files_request)],
+    ) -> OpenAIFileObject:
+        return await impl.openai_retrieve_file(request)
+    @router.delete(
+        "/files/{file_id}",
+        response_model=OpenAIFileDeleteResponse,
+        summary="Delete file",
+        description="Delete file",
+        responses={
+            200: {"description": "The file was deleted."},
+        },
+    )
+    async def delete_file(
+        request: Annotated[DeleteFileRequest, Depends(get_delete_files_request)],
+    ) -> OpenAIFileDeleteResponse:
+        return await impl.openai_delete_file(request)
+    @router.get(
+        "/files/{file_id}/content",
+        status_code=200,
+        summary="Retrieve file content",
+        description="Retrieve file content",
+        responses={
+            200: {
+                "description": "The raw file content as a binary response.",
+                "content": {"application/json": {"schema": {"$ref": "#/components/schemas/Response"}}},
+            },
+        },
+    )
+    async def retrieve_file_content(
+        request: Annotated[RetrieveFileContentRequest, Depends(get_retrieve_file_content_request)],
+    ) -> Response:
+        return await impl.openai_retrieve_file_content(request)
+    @router.post(
+        "/files",
+        response_model=OpenAIFileObject,
+        summary="Upload file",
+        description="Upload a file.",
+        responses={
+            200: {"description": "The uploaded file."},
+        },
+    )
+    async def upload_file(
+        file: Annotated[UploadFile, File(description="The file to upload.")],
+        purpose: Annotated[OpenAIFilePurpose, Form(description="The intended purpose of the uploaded file.")],
+        expires_after: Annotated[ExpiresAfter | None, Form(description="Optional expiration settings.")] = None,
+    ) -> OpenAIFileObject:
+        request = UploadFileRequest(
+            purpose=purpose,
+            expires_after=expires_after,
+        )
+        return await impl.openai_upload_file(request, file)
+    return router

llama_stack_api/files/models.py ADDED Viewed

@@ -0,0 +1,107 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from enum import StrEnum
+from typing import ClassVar, Literal
+from pydantic import BaseModel, Field
+from llama_stack_api.common.responses import Order
+from llama_stack_api.schema_utils import json_schema_type
+class OpenAIFilePurpose(StrEnum):
+    """
+    Valid purpose values for OpenAI Files API.
+    """
+    ASSISTANTS = "assistants"
+    BATCH = "batch"
+@json_schema_type
+class OpenAIFileObject(BaseModel):
+    """OpenAI File object as defined in the OpenAI Files API."""
+    object: Literal["file"] = Field(default="file", description="The object type, which is always 'file'.")
+    id: str = Field(..., description="The file identifier, which can be referenced in the API endpoints.")
+    bytes: int = Field(..., description="The size of the file, in bytes.")
+    created_at: int = Field(..., description="The Unix timestamp (in seconds) for when the file was created.")
+    expires_at: int = Field(..., description="The Unix timestamp (in seconds) for when the file expires.")
+    filename: str = Field(..., description="The name of the file.")
+    purpose: OpenAIFilePurpose = Field(..., description="The intended purpose of the file.")
+@json_schema_type
+class ExpiresAfter(BaseModel):
+    """Control expiration of uploaded files."""
+    MIN: ClassVar[int] = 3600  # 1 hour
+    MAX: ClassVar[int] = 2592000  # 30 days
+    anchor: Literal["created_at"] = Field(..., description="The anchor point for expiration, must be 'created_at'.")
+    seconds: int = Field(
+        ..., ge=MIN, le=MAX, description="Seconds until expiration, between 3600 (1 hour) and 2592000 (30 days)."
+    )
+@json_schema_type
+class ListOpenAIFileResponse(BaseModel):
+    """Response for listing files in OpenAI Files API."""
+    data: list[OpenAIFileObject] = Field(..., description="The list of files.")
+    has_more: bool = Field(..., description="Whether there are more files available beyond this page.")
+    first_id: str = Field(..., description="The ID of the first file in the list for pagination.")
+    last_id: str = Field(..., description="The ID of the last file in the list for pagination.")
+    object: Literal["list"] = Field(default="list", description="The object type, which is always 'list'.")
+@json_schema_type
+class OpenAIFileDeleteResponse(BaseModel):
+    """Response for deleting a file in OpenAI Files API."""
+    id: str = Field(..., description="The file identifier that was deleted.")
+    object: Literal["file"] = Field(default="file", description="The object type, which is always 'file'.")
+    deleted: bool = Field(..., description="Whether the file was successfully deleted.")
+@json_schema_type
+class ListFilesRequest(BaseModel):
+    """Request model for listing files."""
+    after: str | None = Field(default=None, description="A cursor for pagination. Returns files after this ID.")
+    limit: int | None = Field(default=10000, description="Maximum number of files to return (1-10,000).")
+    order: Order | None = Field(default=Order.desc, description="Sort order by created_at timestamp ('asc' or 'desc').")
+    purpose: OpenAIFilePurpose | None = Field(default=None, description="Filter files by purpose.")
+@json_schema_type
+class RetrieveFileRequest(BaseModel):
+    """Request model for retrieving a file."""
+    file_id: str = Field(..., description="The ID of the file to retrieve.")
+@json_schema_type
+class DeleteFileRequest(BaseModel):
+    """Request model for deleting a file."""
+    file_id: str = Field(..., description="The ID of the file to delete.")
+@json_schema_type
+class RetrieveFileContentRequest(BaseModel):
+    """Request model for retrieving file content."""
+    file_id: str = Field(..., description="The ID of the file to retrieve content from.")
+@json_schema_type
+class UploadFileRequest(BaseModel):
+    """Request model for uploading a file."""
+    purpose: OpenAIFilePurpose = Field(..., description="The intended purpose of the uploaded file.")
+    expires_after: ExpiresAfter | None = Field(default=None, description="Optional expiration settings for the file.")

{llama_stack/apis/inference → llama_stack_api}/inference.py RENAMED Viewed

@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 from collections.abc import AsyncIterator
-from enum import Enum
+from enum import Enum, StrEnum
 from typing import (
     Annotated,
     Any,
@@ -15,28 +15,16 @@ from typing import (
 )
 from fastapi import Body
-from pydantic import BaseModel, Field, field_validator
+from pydantic import BaseModel, Field
 from typing_extensions import TypedDict
-from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent
-from llama_stack.apis.common.responses import Order
-from llama_stack.apis.models import Model
-from llama_stack.apis.telemetry import MetricResponseMixin
-from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
-from llama_stack.models.llama.datatypes import (
-    BuiltinTool,
-    StopReason,
-    ToolCall,
-    ToolDefinition,
-    ToolPromptFormat,
+from llama_stack_api.common.content_types import InterleavedContent
+from llama_stack_api.common.responses import (
+    Order,
 )
-from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
-from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
-register_schema(ToolCall)
-register_schema(ToolDefinition)
-from enum import StrEnum
+from llama_stack_api.models import Model
+from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
 @json_schema_type
@@ -97,7 +85,7 @@ class SamplingParams(BaseModel):
     strategy: SamplingStrategy = Field(default_factory=GreedySamplingStrategy)
-    max_tokens: int | None = 0
+    max_tokens: int | None = None
     repetition_penalty: float | None = 1.0
     stop: list[str] | None = None
@@ -202,58 +190,6 @@ class ToolResponseMessage(BaseModel):
     content: InterleavedContent
-@json_schema_type
-class CompletionMessage(BaseModel):
-    """A message containing the model's (assistant) response in a chat conversation.
-    :param role: Must be "assistant" to identify this as the model's response
-    :param content: The content of the model's response
-    :param stop_reason: Reason why the model stopped generating. Options are:
-        - `StopReason.end_of_turn`: The model finished generating the entire response.
-        - `StopReason.end_of_message`: The model finished generating but generated a partial response -- usually, a tool call. The user may call the tool and continue the conversation with the tool's response.
-        - `StopReason.out_of_tokens`: The model ran out of token budget.
-    :param tool_calls: List of tool calls. Each tool call is a ToolCall object.
-    """
-    role: Literal["assistant"] = "assistant"
-    content: InterleavedContent
-    stop_reason: StopReason
-    tool_calls: list[ToolCall] | None = Field(default_factory=lambda: [])
-Message = Annotated[
-    UserMessage | SystemMessage | ToolResponseMessage | CompletionMessage,
-    Field(discriminator="role"),
-]
-register_schema(Message, name="Message")
-@json_schema_type
-class ToolResponse(BaseModel):
-    """Response from a tool invocation.
-    :param call_id: Unique identifier for the tool call this response is for
-    :param tool_name: Name of the tool that was invoked
-    :param content: The response content from the tool
-    :param metadata: (Optional) Additional metadata about the tool response
-    """
-    call_id: str
-    tool_name: BuiltinTool | str
-    content: InterleavedContent
-    metadata: dict[str, Any] | None = None
-    @field_validator("tool_name", mode="before")
-    @classmethod
-    def validate_field(cls, v):
-        if isinstance(v, str):
-            try:
-                return BuiltinTool(v)
-            except ValueError:
-                return v
-        return v
 class ToolChoice(Enum):
     """Whether tool use is required or automatic. This is a hint to the model which may not be followed. It depends on the Instruction Following capabilities of the model.
@@ -290,22 +226,6 @@ class ChatCompletionResponseEventType(Enum):
     progress = "progress"
-@json_schema_type
-class ChatCompletionResponseEvent(BaseModel):
-    """An event during chat completion generation.
-    :param event_type: Type of the event
-    :param delta: Content generated since last event. This can be one or more tokens, or a tool call.
-    :param logprobs: Optional log probabilities for generated tokens
-    :param stop_reason: Optional reason why generation stopped, if complete
-    """
-    event_type: ChatCompletionResponseEventType
-    delta: ContentDelta
-    logprobs: list[TokenLogProbs] | None = None
-    stop_reason: StopReason | None = None
 class ResponseFormatType(StrEnum):
     """Types of formats for structured (guided) decoding.
@@ -358,34 +278,6 @@ class CompletionRequest(BaseModel):
     logprobs: LogProbConfig | None = None
-@json_schema_type
-class CompletionResponse(MetricResponseMixin):
-    """Response from a completion request.
-    :param content: The generated completion text
-    :param stop_reason: Reason why generation stopped
-    :param logprobs: Optional log probabilities for generated tokens
-    """
-    content: str
-    stop_reason: StopReason
-    logprobs: list[TokenLogProbs] | None = None
-@json_schema_type
-class CompletionResponseStreamChunk(MetricResponseMixin):
-    """A chunk of a streamed completion response.
-    :param delta: New content generated since last chunk. This can be one or more tokens.
-    :param stop_reason: Optional reason why generation stopped, if complete
-    :param logprobs: Optional log probabilities for generated tokens
-    """
-    delta: str
-    stop_reason: StopReason | None = None
-    logprobs: list[TokenLogProbs] | None = None
 class SystemMessageBehavior(Enum):
     """Config for how to override the default system prompt.
@@ -399,70 +291,6 @@ class SystemMessageBehavior(Enum):
     replace = "replace"
-@json_schema_type
-class ToolConfig(BaseModel):
-    """Configuration for tool use.
-    :param tool_choice: (Optional) Whether tool use is automatic, required, or none. Can also specify a tool name to use a specific tool. Defaults to ToolChoice.auto.
-    :param tool_prompt_format: (Optional) Instructs the model how to format tool calls. By default, Llama Stack will attempt to use a format that is best adapted to the model.
-        - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object.
-        - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a <function=function_name> tag.
-        - `ToolPromptFormat.python_list`: The tool calls are output as Python syntax -- a list of function calls.
-    :param system_message_behavior: (Optional) Config for how to override the default system prompt.
-        - `SystemMessageBehavior.append`: Appends the provided system message to the default system prompt.
-        - `SystemMessageBehavior.replace`: Replaces the default system prompt with the provided system message. The system message can include the string
-            '{{function_definitions}}' to indicate where the function definitions should be inserted.
-    """
-    tool_choice: ToolChoice | str | None = Field(default=ToolChoice.auto)
-    tool_prompt_format: ToolPromptFormat | None = Field(default=None)
-    system_message_behavior: SystemMessageBehavior | None = Field(default=SystemMessageBehavior.append)
-    def model_post_init(self, __context: Any) -> None:
-        if isinstance(self.tool_choice, str):
-            try:
-                self.tool_choice = ToolChoice[self.tool_choice]
-            except KeyError:
-                pass
-# This is an internally used class
-@json_schema_type
-class ChatCompletionRequest(BaseModel):
-    model: str
-    messages: list[Message]
-    sampling_params: SamplingParams | None = Field(default_factory=SamplingParams)
-    tools: list[ToolDefinition] | None = Field(default_factory=lambda: [])
-    tool_config: ToolConfig | None = Field(default_factory=ToolConfig)
-    response_format: ResponseFormat | None = None
-    stream: bool | None = False
-    logprobs: LogProbConfig | None = None
-@json_schema_type
-class ChatCompletionResponseStreamChunk(MetricResponseMixin):
-    """A chunk of a streamed chat completion response.
-    :param event: The event containing the new content
-    """
-    event: ChatCompletionResponseEvent
-@json_schema_type
-class ChatCompletionResponse(MetricResponseMixin):
-    """Response from a chat completion request.
-    :param completion_message: The complete response message
-    :param logprobs: Optional log probabilities for generated tokens
-    """
-    completion_message: CompletionMessage
-    logprobs: list[TokenLogProbs] | None = None
 @json_schema_type
 class EmbeddingsResponse(BaseModel):
     """Response containing generated embeddings.
@@ -727,6 +555,81 @@ OpenAIResponseFormatParam = Annotated[
 register_schema(OpenAIResponseFormatParam, name="OpenAIResponseFormatParam")
+@json_schema_type
+class FunctionToolConfig(BaseModel):
+    name: str
+@json_schema_type
+class OpenAIChatCompletionToolChoiceFunctionTool(BaseModel):
+    """Function tool choice for OpenAI-compatible chat completion requests.
+    :param type: Must be "function" to indicate function tool choice
+    :param function: The function tool configuration
+    """
+    type: Literal["function"] = "function"
+    function: FunctionToolConfig
+    def __init__(self, name: str):
+        super().__init__(type="function", function=FunctionToolConfig(name=name))
+@json_schema_type
+class CustomToolConfig(BaseModel):
+    """Custom tool configuration for OpenAI-compatible chat completion requests.
+    :param name: Name of the custom tool
+    """
+    name: str
+@json_schema_type
+class OpenAIChatCompletionToolChoiceCustomTool(BaseModel):
+    """Custom tool choice for OpenAI-compatible chat completion requests.
+    :param type: Must be "custom" to indicate custom tool choice
+    """
+    type: Literal["custom"] = "custom"
+    custom: CustomToolConfig
+    def __init__(self, name: str):
+        super().__init__(type="custom", custom=CustomToolConfig(name=name))
+@json_schema_type
+class AllowedToolsConfig(BaseModel):
+    tools: list[dict[str, Any]]
+    mode: Literal["auto", "required"]
+@json_schema_type
+class OpenAIChatCompletionToolChoiceAllowedTools(BaseModel):
+    """Allowed tools response format for OpenAI-compatible chat completion requests.
+    :param type: Must be "allowed_tools" to indicate allowed tools response format
+    """
+    type: Literal["allowed_tools"] = "allowed_tools"
+    allowed_tools: AllowedToolsConfig
+    def __init__(self, tools: list[dict[str, Any]], mode: Literal["auto", "required"]):
+        super().__init__(type="allowed_tools", allowed_tools=AllowedToolsConfig(tools=tools, mode=mode))
+# Define the object-level union with discriminator
+OpenAIChatCompletionToolChoice = Annotated[
+    OpenAIChatCompletionToolChoiceAllowedTools
+    | OpenAIChatCompletionToolChoiceFunctionTool
+    | OpenAIChatCompletionToolChoiceCustomTool,
+    Field(discriminator="type"),
+]
+register_schema(OpenAIChatCompletionToolChoice, name="OpenAIChatCompletionToolChoice")
 @json_schema_type
 class OpenAITopLogProb(BaseModel):
     """The top log probability for a token from an OpenAI-compatible chat completion response.
@@ -754,7 +657,7 @@ class OpenAITokenLogProb(BaseModel):
     token: str
     bytes: list[int] | None = None
     logprob: float
-    top_logprobs: list[OpenAITopLogProb]
+    top_logprobs: list[OpenAITopLogProb] | None = None
 @json_schema_type
@@ -1160,7 +1063,6 @@ class OpenAIEmbeddingsRequestWithExtraBody(BaseModel, extra="allow"):
 @runtime_checkable
-@trace_protocol
 class InferenceProvider(Protocol):
     """
     This protocol defines the interface that should be implemented by all inference providers.
@@ -1189,20 +1091,18 @@ class InferenceProvider(Protocol):
         raise NotImplementedError("Reranking is not implemented")
         return  # this is so mypy's safe-super rule will consider the method concrete
-    @webmethod(route="/openai/v1/completions", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
     @webmethod(route="/completions", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_completion(
         self,
         params: Annotated[OpenAICompletionRequestWithExtraBody, Body(...)],
-    ) -> OpenAICompletion:
+    ) -> OpenAICompletion | AsyncIterator[OpenAICompletion]:
         """Create completion.
         Generate an OpenAI-compatible completion for the given prompt using the specified model.
-        :returns: An OpenAICompletion.
+        :returns: An OpenAICompletion. When streaming, returns Server-Sent Events (SSE) with OpenAICompletion chunks.
         """
         ...
-    @webmethod(route="/openai/v1/chat/completions", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
     @webmethod(route="/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_chat_completion(
         self,
@@ -1211,11 +1111,10 @@ class InferenceProvider(Protocol):
         """Create chat completions.
         Generate an OpenAI-compatible chat completion for the given messages using the specified model.
-        :returns: An OpenAIChatCompletion.
+        :returns: An OpenAIChatCompletion. When streaming, returns Server-Sent Events (SSE) with OpenAIChatCompletionChunk objects.
         """
         ...
-    @webmethod(route="/openai/v1/embeddings", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
     @webmethod(route="/embeddings", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_embeddings(
         self,
@@ -1234,12 +1133,12 @@ class Inference(InferenceProvider):
     Llama Stack Inference API for generating completions, chat completions, and embeddings.
-    This API provides the raw interface to the underlying models. Two kinds of models are supported:
+    This API provides the raw interface to the underlying models. Three kinds of models are supported:
     - LLM models: these models generate "raw" and "chat" (conversational) completions.
     - Embedding models: these models generate embeddings to be used for semantic search.
+    - Rerank models: these models reorder the documents based on their relevance to a query.
     """
-    @webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
     @webmethod(route="/chat/completions", method="GET", level=LLAMA_STACK_API_V1)
     async def list_chat_completions(
         self,
@@ -1258,9 +1157,6 @@ class Inference(InferenceProvider):
         """
         raise NotImplementedError("List chat completions is not implemented")
-    @webmethod(
-        route="/openai/v1/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
-    )
     @webmethod(route="/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
         """Get chat completion.

llama_stack_api/inspect_api/__init__.py ADDED Viewed

@@ -0,0 +1,37 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+"""Inspect API protocol and models.
+This module contains the Inspect protocol definition.
+Pydantic models are defined in llama_stack_api.inspect.models.
+The FastAPI router is defined in llama_stack_api.inspect.fastapi_routes.
+"""
+# Import fastapi_routes for router factory access
+from . import fastapi_routes
+# Import protocol for re-export
+from .api import Inspect
+# Import models for re-export
+from .models import (
+    ApiFilter,
+    HealthInfo,
+    ListRoutesResponse,
+    RouteInfo,
+    VersionInfo,
+)
+__all__ = [
+    "Inspect",
+    "ApiFilter",
+    "HealthInfo",
+    "ListRoutesResponse",
+    "RouteInfo",
+    "VersionInfo",
+    "fastapi_routes",
+]

llama_stack_api/inspect_api/api.py ADDED Viewed

@@ -0,0 +1,25 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from typing import Protocol, runtime_checkable
+from .models import (
+    ApiFilter,
+    HealthInfo,
+    ListRoutesResponse,
+    VersionInfo,
+)
+@runtime_checkable
+class Inspect(Protocol):
+    """APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers."""
+    async def list_routes(self, api_filter: ApiFilter | None = None) -> ListRoutesResponse: ...
+    async def health(self) -> HealthInfo: ...
+    async def version(self) -> VersionInfo: ...

llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl

llama-stack 0.3.5py3-none-any.whl → 0.4.1py3-none-any.whl