PyPI - llama-stack-api - Versions diffs - 0.4.4__py3-none-any.whl → 0.5.0rc1__py3-none-any.whl - Mend

llama-stack-api 0.4.4py3-none-any.whl → 0.5.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

llama_stack_api/__init__.py +175 -20
llama_stack_api/agents/__init__.py +38 -0
llama_stack_api/agents/api.py +52 -0
llama_stack_api/agents/fastapi_routes.py +268 -0
llama_stack_api/agents/models.py +181 -0
llama_stack_api/common/errors.py +15 -0
llama_stack_api/connectors/__init__.py +38 -0
llama_stack_api/connectors/api.py +50 -0
llama_stack_api/connectors/fastapi_routes.py +103 -0
llama_stack_api/connectors/models.py +103 -0
llama_stack_api/conversations/__init__.py +61 -0
llama_stack_api/conversations/api.py +44 -0
llama_stack_api/conversations/fastapi_routes.py +177 -0
llama_stack_api/conversations/models.py +245 -0
llama_stack_api/datasetio/__init__.py +34 -0
llama_stack_api/datasetio/api.py +42 -0
llama_stack_api/datasetio/fastapi_routes.py +94 -0
llama_stack_api/datasetio/models.py +48 -0
llama_stack_api/eval/__init__.py +55 -0
llama_stack_api/eval/api.py +51 -0
llama_stack_api/eval/compat.py +300 -0
llama_stack_api/eval/fastapi_routes.py +126 -0
llama_stack_api/eval/models.py +141 -0
llama_stack_api/inference/__init__.py +207 -0
llama_stack_api/inference/api.py +93 -0
llama_stack_api/inference/fastapi_routes.py +243 -0
llama_stack_api/inference/models.py +1035 -0
llama_stack_api/models/__init__.py +47 -0
llama_stack_api/models/api.py +38 -0
llama_stack_api/models/fastapi_routes.py +104 -0
llama_stack_api/{models.py → models/models.py} +65 -79
llama_stack_api/openai_responses.py +32 -6
llama_stack_api/post_training/__init__.py +73 -0
llama_stack_api/post_training/api.py +36 -0
llama_stack_api/post_training/fastapi_routes.py +116 -0
llama_stack_api/{post_training.py → post_training/models.py} +55 -86
llama_stack_api/prompts/__init__.py +47 -0
llama_stack_api/prompts/api.py +44 -0
llama_stack_api/prompts/fastapi_routes.py +163 -0
llama_stack_api/prompts/models.py +177 -0
llama_stack_api/resource.py +0 -1
llama_stack_api/safety/__init__.py +37 -0
llama_stack_api/safety/api.py +29 -0
llama_stack_api/safety/datatypes.py +83 -0
llama_stack_api/safety/fastapi_routes.py +55 -0
llama_stack_api/safety/models.py +38 -0
llama_stack_api/schema_utils.py +47 -4
llama_stack_api/scoring/__init__.py +66 -0
llama_stack_api/scoring/api.py +35 -0
llama_stack_api/scoring/fastapi_routes.py +67 -0
llama_stack_api/scoring/models.py +81 -0
llama_stack_api/scoring_functions/__init__.py +50 -0
llama_stack_api/scoring_functions/api.py +39 -0
llama_stack_api/scoring_functions/fastapi_routes.py +108 -0
llama_stack_api/{scoring_functions.py → scoring_functions/models.py} +67 -64
llama_stack_api/shields/__init__.py +41 -0
llama_stack_api/shields/api.py +39 -0
llama_stack_api/shields/fastapi_routes.py +104 -0
llama_stack_api/shields/models.py +74 -0
llama_stack_api/validators.py +46 -0
llama_stack_api/vector_io/__init__.py +88 -0
llama_stack_api/vector_io/api.py +234 -0
llama_stack_api/vector_io/fastapi_routes.py +447 -0
llama_stack_api/{vector_io.py → vector_io/models.py} +99 -377
{llama_stack_api-0.4.4.dist-info → llama_stack_api-0.5.0rc1.dist-info}/METADATA +1 -1
llama_stack_api-0.5.0rc1.dist-info/RECORD +115 -0
llama_stack_api/agents.py +0 -173
llama_stack_api/connectors.py +0 -146
llama_stack_api/conversations.py +0 -270
llama_stack_api/datasetio.py +0 -55
llama_stack_api/eval.py +0 -137
llama_stack_api/inference.py +0 -1169
llama_stack_api/prompts.py +0 -203
llama_stack_api/safety.py +0 -132
llama_stack_api/scoring.py +0 -93
llama_stack_api/shields.py +0 -93
llama_stack_api-0.4.4.dist-info/RECORD +0 -70
{llama_stack_api-0.4.4.dist-info → llama_stack_api-0.5.0rc1.dist-info}/WHEEL +0 -0
{llama_stack_api-0.4.4.dist-info → llama_stack_api-0.5.0rc1.dist-info}/top_level.txt +0 -0

llama_stack_api/models/__init__.py ADDED Viewed

@@ -0,0 +1,47 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+"""Models API protocol and models.
+This module contains the Models protocol definition.
+Pydantic models are defined in llama_stack_api.models.models.
+The FastAPI router is defined in llama_stack_api.models.fastapi_routes.
+"""
+# Import fastapi_routes for router factory access
+from . import fastapi_routes
+# Import new protocol for FastAPI router
+from .api import Models
+# Import models for re-export
+from .models import (
+    CommonModelFields,
+    GetModelRequest,
+    ListModelsResponse,
+    Model,
+    ModelInput,
+    ModelType,
+    OpenAIListModelsResponse,
+    OpenAIModel,
+    RegisterModelRequest,
+    UnregisterModelRequest,
+)
+__all__ = [
+    "CommonModelFields",
+    "fastapi_routes",
+    "GetModelRequest",
+    "ListModelsResponse",
+    "Model",
+    "ModelInput",
+    "Models",
+    "ModelType",
+    "OpenAIListModelsResponse",
+    "OpenAIModel",
+    "RegisterModelRequest",
+    "UnregisterModelRequest",
+]

llama_stack_api/models/api.py ADDED Viewed

@@ -0,0 +1,38 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+"""Models API protocol definition.
+This module contains the Models protocol definition.
+Pydantic models are defined in llama_stack_api.models.models.
+The FastAPI router is defined in llama_stack_api.models.fastapi_routes.
+"""
+from typing import Protocol, runtime_checkable
+from .models import (
+    GetModelRequest,
+    ListModelsResponse,
+    Model,
+    OpenAIListModelsResponse,
+    RegisterModelRequest,
+    UnregisterModelRequest,
+)
+@runtime_checkable
+class Models(Protocol):
+    """Protocol for model management operations."""
+    async def list_models(self) -> ListModelsResponse: ...
+    async def openai_list_models(self) -> OpenAIListModelsResponse: ...
+    async def get_model(self, request: GetModelRequest) -> Model: ...
+    async def register_model(self, request: RegisterModelRequest) -> Model: ...
+    async def unregister_model(self, request: UnregisterModelRequest) -> None: ...

llama_stack_api/models/fastapi_routes.py ADDED Viewed

@@ -0,0 +1,104 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+"""FastAPI router for the Models API.
+This module defines the FastAPI router for the Models API using standard
+FastAPI route decorators.
+"""
+from typing import Annotated
+from fastapi import APIRouter, Body, Depends
+from llama_stack_api.router_utils import create_path_dependency, standard_responses
+from llama_stack_api.version import LLAMA_STACK_API_V1
+from .api import Models
+from .models import (
+    GetModelRequest,
+    Model,
+    OpenAIListModelsResponse,
+    RegisterModelRequest,
+    UnregisterModelRequest,
+)
+# Path parameter dependencies for single-field models
+get_model_request = create_path_dependency(GetModelRequest)
+unregister_model_request = create_path_dependency(UnregisterModelRequest)
+def create_router(impl: Models) -> APIRouter:
+    """Create a FastAPI router for the Models API.
+    Args:
+        impl: The Models implementation instance
+    Returns:
+        APIRouter configured for the Models API
+    """
+    router = APIRouter(
+        prefix=f"/{LLAMA_STACK_API_V1}",
+        tags=["Models"],
+        responses=standard_responses,
+    )
+    @router.get(
+        "/models",
+        response_model=OpenAIListModelsResponse,
+        summary="List models using the OpenAI API.",
+        description="List models using the OpenAI API.",
+        responses={
+            200: {"description": "A list of OpenAI model objects."},
+        },
+    )
+    async def openai_list_models() -> OpenAIListModelsResponse:
+        return await impl.openai_list_models()
+    @router.get(
+        "/models/{model_id:path}",
+        response_model=Model,
+        summary="Get a model by its identifier.",
+        description="Get a model by its identifier.",
+        responses={
+            200: {"description": "The model object."},
+        },
+    )
+    async def get_model(
+        request: Annotated[GetModelRequest, Depends(get_model_request)],
+    ) -> Model:
+        return await impl.get_model(request)
+    @router.post(
+        "/models",
+        response_model=Model,
+        summary="Register a model.",
+        description="Register a model.",
+        responses={
+            200: {"description": "The registered model object."},
+        },
+        deprecated=True,
+    )
+    async def register_model(
+        request: Annotated[RegisterModelRequest, Body(...)],
+    ) -> Model:
+        return await impl.register_model(request)
+    @router.delete(
+        "/models/{model_id:path}",
+        summary="Unregister a model.",
+        description="Unregister a model.",
+        responses={
+            200: {"description": "The model was successfully unregistered."},
+        },
+        deprecated=True,
+    )
+    async def unregister_model(
+        request: Annotated[UnregisterModelRequest, Depends(unregister_model_request)],
+    ) -> None:
+        return await impl.unregister_model(request)
+    return router

llama_stack_api/{models.py → models/models.py} RENAMED Viewed

@@ -4,26 +4,25 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
+"""Pydantic models for Models API requests and responses.
+This module defines the request and response models for the Models API
+using Pydantic with Field descriptions for OpenAPI schema generation.
+"""
 from enum import StrEnum
-from typing import Any, Literal, Protocol, runtime_checkable
+from typing import Any, Literal
 from pydantic import BaseModel, ConfigDict, Field, field_validator
 from llama_stack_api.resource import Resource, ResourceType
-from llama_stack_api.schema_utils import json_schema_type, webmethod
-from llama_stack_api.version import LLAMA_STACK_API_V1
-class CommonModelFields(BaseModel):
-    metadata: dict[str, Any] = Field(
-        default_factory=dict,
-        description="Any additional metadata for this model",
-    )
+from llama_stack_api.schema_utils import json_schema_type
 @json_schema_type
 class ModelType(StrEnum):
     """Enumeration of supported model types in Llama Stack.
     :cvar llm: Large language model for text generation and completion
     :cvar embedding: Embedding model for converting text to vector representations
     :cvar rerank: Reranking model for reordering documents based on their relevance to a query
@@ -34,6 +33,13 @@ class ModelType(StrEnum):
     rerank = "rerank"
+class CommonModelFields(BaseModel):
+    metadata: dict[str, Any] = Field(
+        default_factory=dict,
+        description="Any additional metadata for this model",
+    )
 @json_schema_type
 class Model(CommonModelFields, Resource):
     """A model resource representing an AI model registered in Llama Stack.
@@ -77,8 +83,11 @@ class ModelInput(CommonModelFields):
     model_config = ConfigDict(protected_namespaces=())
+@json_schema_type
 class ListModelsResponse(BaseModel):
-    data: list[Model]
+    """Response containing a list of model objects."""
+    data: list[Model] = Field(..., description="List of model objects.")
 @json_schema_type
@@ -101,71 +110,48 @@ class OpenAIModel(BaseModel):
 @json_schema_type
 class OpenAIListModelsResponse(BaseModel):
-    data: list[OpenAIModel]
-@runtime_checkable
-class Models(Protocol):
-    async def list_models(self) -> ListModelsResponse:
-        """List all models.
-        :returns: A ListModelsResponse.
-        """
-        ...
-    @webmethod(route="/models", method="GET", level=LLAMA_STACK_API_V1)
-    async def openai_list_models(self) -> OpenAIListModelsResponse:
-        """List models using the OpenAI API.
-        :returns: A OpenAIListModelsResponse.
-        """
-        ...
-    @webmethod(route="/models/{model_id:path}", method="GET", level=LLAMA_STACK_API_V1)
-    async def get_model(
-        self,
-        model_id: str,
-    ) -> Model:
-        """Get model.
-        Get a model by its identifier.
-        :param model_id: The identifier of the model to get.
-        :returns: A Model.
-        """
-        ...
-    @webmethod(route="/models", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
-    async def register_model(
-        self,
-        model_id: str,
-        provider_model_id: str | None = None,
-        provider_id: str | None = None,
-        metadata: dict[str, Any] | None = None,
-        model_type: ModelType | None = None,
-    ) -> Model:
-        """Register model.
-        Register a model.
-        :param model_id: The identifier of the model to register.
-        :param provider_model_id: The identifier of the model in the provider.
-        :param provider_id: The identifier of the provider.
-        :param metadata: Any additional metadata for this model.
-        :param model_type: The type of model to register.
-        :returns: A Model.
-        """
-        ...
-    @webmethod(route="/models/{model_id:path}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
-    async def unregister_model(
-        self,
-        model_id: str,
-    ) -> None:
-        """Unregister model.
-        Unregister a model.
-        :param model_id: The identifier of the model to unregister.
-        """
-        ...
+    """Response containing a list of OpenAI model objects."""
+    data: list[OpenAIModel] = Field(..., description="List of OpenAI model objects.")
+# Request models for each endpoint
+@json_schema_type
+class GetModelRequest(BaseModel):
+    """Request model for getting a model by ID."""
+    model_id: str = Field(..., description="The ID of the model to get.")
+@json_schema_type
+class RegisterModelRequest(BaseModel):
+    """Request model for registering a model."""
+    model_id: str = Field(..., description="The identifier of the model to register.")
+    provider_model_id: str | None = Field(default=None, description="The identifier of the model in the provider.")
+    provider_id: str | None = Field(default=None, description="The identifier of the provider.")
+    metadata: dict[str, Any] | None = Field(default=None, description="Any additional metadata for this model.")
+    model_type: ModelType | None = Field(default=None, description="The type of model to register.")
+@json_schema_type
+class UnregisterModelRequest(BaseModel):
+    """Request model for unregistering a model."""
+    model_id: str = Field(..., description="The ID of the model to unregister.")
+__all__ = [
+    "CommonModelFields",
+    "GetModelRequest",
+    "ListModelsResponse",
+    "Model",
+    "ModelInput",
+    "ModelType",
+    "OpenAIListModelsResponse",
+    "OpenAIModel",
+    "RegisterModelRequest",
+    "UnregisterModelRequest",
+]

llama_stack_api/openai_responses.py CHANGED Viewed

@@ -405,6 +405,19 @@ class OpenAIResponseText(BaseModel):
     format: OpenAIResponseTextFormat | None = None
+@json_schema_type
+class OpenAIResponseReasoning(BaseModel):
+    """Configuration for reasoning effort in OpenAI responses.
+    Controls how much reasoning the model performs before generating a response.
+    :param effort: The effort level for reasoning. "low" favors speed and economical token usage,
+                   "high" favors more complete reasoning, "medium" is a balance between the two.
+    """
+    effort: Literal["none", "minimal", "low", "medium", "high", "xhigh"] | None = None
 # Must match type Literals of OpenAIResponseInputToolWebSearch below
 WebSearchToolTypes = ["web_search", "web_search_preview", "web_search_preview_2025_03_11", "web_search_2025_08_26"]
@@ -491,7 +504,8 @@ class OpenAIResponseInputToolMCP(BaseModel):
     :param type: Tool type identifier, always "mcp"
     :param server_label: Label to identify this MCP server
-    :param server_url: URL endpoint of the MCP server
+    :param connector_id: (Optional) ID of the connector to use for this MCP server
+    :param server_url: (Optional) URL endpoint of the MCP server
     :param headers: (Optional) HTTP headers to include when connecting to the server
     :param authorization: (Optional) OAuth access token for authenticating with the MCP server
     :param require_approval: Approval requirement for tool calls ("always", "never", or filter)
@@ -500,13 +514,20 @@ class OpenAIResponseInputToolMCP(BaseModel):
     type: Literal["mcp"] = "mcp"
     server_label: str
-    server_url: str
+    connector_id: str | None = None
+    server_url: str | None = None
     headers: dict[str, Any] | None = None
     authorization: str | None = Field(default=None, exclude=True)
     require_approval: Literal["always"] | Literal["never"] | ApprovalFilter = "never"
     allowed_tools: list[str] | AllowedToolsFilter | None = None
+    @model_validator(mode="after")
+    def validate_server_or_connector(self) -> "OpenAIResponseInputToolMCP":
+        if not self.server_url and not self.connector_id:
+            raise ValueError("Either 'server_url' or 'connector_id' must be provided for MCP tool")
+        return self
 OpenAIResponseInputTool = Annotated[
     OpenAIResponseInputToolWebSearch
@@ -647,7 +668,7 @@ class OpenAIResponseUsageOutputTokensDetails(BaseModel):
     :param reasoning_tokens: Number of tokens used for reasoning (o1/o3 models)
     """
-    reasoning_tokens: int | None = None
+    reasoning_tokens: int
 class OpenAIResponseUsageInputTokensDetails(BaseModel):
@@ -656,7 +677,7 @@ class OpenAIResponseUsageInputTokensDetails(BaseModel):
     :param cached_tokens: Number of tokens retrieved from cache
     """
-    cached_tokens: int | None = None
+    cached_tokens: int
 @json_schema_type
@@ -673,8 +694,8 @@ class OpenAIResponseUsage(BaseModel):
     input_tokens: int
     output_tokens: int
     total_tokens: int
-    input_tokens_details: OpenAIResponseUsageInputTokensDetails | None = None
-    output_tokens_details: OpenAIResponseUsageOutputTokensDetails | None = None
+    input_tokens_details: OpenAIResponseUsageInputTokensDetails
+    output_tokens_details: OpenAIResponseUsageOutputTokensDetails
 @json_schema_type
@@ -700,10 +721,12 @@ class OpenAIResponseObject(BaseModel):
     :param usage: (Optional) Token usage information for the response
     :param instructions: (Optional) System message inserted into the model's context
     :param max_tool_calls: (Optional) Max number of total calls to built-in tools that can be processed in a response
+    :param max_output_tokens: (Optional) An upper bound for the number of tokens that can be generated for a response, including visible output tokens.
     :param metadata: (Optional) Dictionary of metadata key-value pairs
     """
     created_at: int
+    completed_at: int | None = None
     error: OpenAIResponseError | None = None
     id: str
     model: str
@@ -724,7 +747,10 @@ class OpenAIResponseObject(BaseModel):
     usage: OpenAIResponseUsage | None = None
     instructions: str | None = None
     max_tool_calls: int | None = None
+    reasoning: OpenAIResponseReasoning | None = None
+    max_output_tokens: int | None = None
     metadata: dict[str, str] | None = None
+    store: bool
 @json_schema_type

llama_stack_api/post_training/__init__.py ADDED Viewed

@@ -0,0 +1,73 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+"""Post-Training API protocol and models.
+This module contains the Post-Training protocol definition.
+Pydantic models are defined in llama_stack_api.post_training.models.
+The FastAPI router is defined in llama_stack_api.post_training.fastapi_routes.
+"""
+# Import fastapi_routes for router factory access
+from . import fastapi_routes
+# Import protocol for re-export
+from .api import PostTraining
+# Import models for re-export
+from .models import (
+    AlgorithmConfig,
+    CancelTrainingJobRequest,
+    DataConfig,
+    DatasetFormat,
+    DPOAlignmentConfig,
+    DPOLossType,
+    EfficiencyConfig,
+    GetTrainingJobArtifactsRequest,
+    GetTrainingJobStatusRequest,
+    ListPostTrainingJobsResponse,
+    LoraFinetuningConfig,
+    OptimizerConfig,
+    OptimizerType,
+    PostTrainingJob,
+    PostTrainingJobArtifactsResponse,
+    PostTrainingJobLogStream,
+    PostTrainingJobStatusResponse,
+    PostTrainingRLHFRequest,
+    PreferenceOptimizeRequest,
+    QATFinetuningConfig,
+    RLHFAlgorithm,
+    SupervisedFineTuneRequest,
+    TrainingConfig,
+)
+__all__ = [
+    "PostTraining",
+    "AlgorithmConfig",
+    "CancelTrainingJobRequest",
+    "DataConfig",
+    "DatasetFormat",
+    "DPOAlignmentConfig",
+    "DPOLossType",
+    "EfficiencyConfig",
+    "GetTrainingJobArtifactsRequest",
+    "GetTrainingJobStatusRequest",
+    "ListPostTrainingJobsResponse",
+    "LoraFinetuningConfig",
+    "OptimizerConfig",
+    "OptimizerType",
+    "PostTrainingJob",
+    "PostTrainingJobArtifactsResponse",
+    "PostTrainingJobLogStream",
+    "PostTrainingJobStatusResponse",
+    "PostTrainingRLHFRequest",
+    "PreferenceOptimizeRequest",
+    "QATFinetuningConfig",
+    "RLHFAlgorithm",
+    "SupervisedFineTuneRequest",
+    "TrainingConfig",
+    "fastapi_routes",
+]

llama_stack_api/post_training/api.py ADDED Viewed

@@ -0,0 +1,36 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from typing import Protocol, runtime_checkable
+from .models import (
+    CancelTrainingJobRequest,
+    GetTrainingJobArtifactsRequest,
+    GetTrainingJobStatusRequest,
+    ListPostTrainingJobsResponse,
+    PostTrainingJob,
+    PostTrainingJobArtifactsResponse,
+    PostTrainingJobStatusResponse,
+    PreferenceOptimizeRequest,
+    SupervisedFineTuneRequest,
+)
+@runtime_checkable
+class PostTraining(Protocol):
+    async def supervised_fine_tune(self, request: SupervisedFineTuneRequest) -> PostTrainingJob: ...
+    async def preference_optimize(self, request: PreferenceOptimizeRequest) -> PostTrainingJob: ...
+    async def get_training_jobs(self) -> ListPostTrainingJobsResponse: ...
+    async def get_training_job_status(self, request: GetTrainingJobStatusRequest) -> PostTrainingJobStatusResponse: ...
+    async def cancel_training_job(self, request: CancelTrainingJobRequest) -> None: ...
+    async def get_training_job_artifacts(
+        self, request: GetTrainingJobArtifactsRequest
+    ) -> PostTrainingJobArtifactsResponse: ...

llama-stack-api 0.4.4__py3-none-any.whl → 0.5.0rc1__py3-none-any.whl

llama-stack-api 0.4.4py3-none-any.whl → 0.5.0rc1py3-none-any.whl