PyPI - llama-stack - Versions diffs - 0.2.21__py3-none-any.whl → 0.2.23__py3-none-any.whl - Mend

llama-stack 0.2.21py3-none-any.whl → 0.2.23py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (138) hide show

llama_stack/apis/agents/agents.py +26 -14
llama_stack/apis/batch_inference/batch_inference.py +3 -2
llama_stack/apis/batches/batches.py +5 -4
llama_stack/apis/benchmarks/benchmarks.py +16 -3
llama_stack/apis/datasetio/datasetio.py +3 -2
llama_stack/apis/datasets/datasets.py +5 -4
llama_stack/apis/eval/eval.py +27 -5
llama_stack/apis/files/files.py +7 -6
llama_stack/apis/inference/inference.py +13 -11
llama_stack/apis/inspect/inspect.py +4 -3
llama_stack/apis/models/models.py +6 -5
llama_stack/apis/post_training/post_training.py +13 -6
llama_stack/apis/prompts/prompts.py +8 -7
llama_stack/apis/providers/providers.py +3 -2
llama_stack/apis/safety/safety.py +3 -2
llama_stack/apis/scoring/scoring.py +3 -2
llama_stack/apis/scoring_functions/scoring_functions.py +12 -3
llama_stack/apis/shields/shields.py +5 -4
llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +2 -1
llama_stack/apis/telemetry/telemetry.py +21 -8
llama_stack/apis/tools/rag_tool.py +3 -2
llama_stack/apis/tools/tools.py +9 -8
llama_stack/apis/vector_dbs/vector_dbs.py +5 -4
llama_stack/apis/vector_io/vector_io.py +25 -14
llama_stack/apis/version.py +3 -1
llama_stack/cli/stack/_build.py +7 -0
llama_stack/cli/verify_download.py +7 -10
llama_stack/core/build_container.sh +2 -2
llama_stack/core/client.py +18 -2
llama_stack/core/datatypes.py +10 -7
llama_stack/core/distribution.py +7 -20
llama_stack/core/library_client.py +6 -4
llama_stack/core/routers/__init__.py +4 -1
llama_stack/core/routers/inference.py +12 -7
llama_stack/core/routing_tables/benchmarks.py +4 -0
llama_stack/core/routing_tables/common.py +4 -0
llama_stack/core/routing_tables/models.py +1 -1
llama_stack/core/routing_tables/scoring_functions.py +4 -0
llama_stack/core/routing_tables/toolgroups.py +13 -2
llama_stack/core/server/routes.py +15 -15
llama_stack/core/server/server.py +99 -124
llama_stack/core/server/tracing.py +80 -0
llama_stack/core/stack.py +66 -60
llama_stack/core/start_stack.sh +1 -1
llama_stack/distributions/ci-tests/build.yaml +1 -0
llama_stack/distributions/ci-tests/run.yaml +7 -0
llama_stack/distributions/nvidia/build.yaml +2 -0
llama_stack/distributions/nvidia/nvidia.py +12 -10
llama_stack/distributions/nvidia/run-with-safety.yaml +9 -0
llama_stack/distributions/nvidia/run.yaml +10 -84
llama_stack/distributions/starter/build.yaml +1 -0
llama_stack/distributions/starter/run.yaml +7 -0
llama_stack/distributions/starter/starter.py +20 -2
llama_stack/distributions/starter-gpu/build.yaml +1 -0
llama_stack/distributions/starter-gpu/run.yaml +7 -0
llama_stack/distributions/watsonx/run.yaml +9 -0
llama_stack/distributions/watsonx/watsonx.py +10 -2
llama_stack/providers/datatypes.py +17 -71
llama_stack/providers/inline/eval/meta_reference/eval.py +7 -0
llama_stack/providers/inline/files/localfs/files.py +2 -3
llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +3 -0
llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +6 -6
llama_stack/providers/inline/tool_runtime/rag/memory.py +101 -46
llama_stack/providers/registry/batches.py +1 -1
llama_stack/providers/registry/datasetio.py +19 -22
llama_stack/providers/registry/eval.py +10 -11
llama_stack/providers/registry/files.py +8 -15
llama_stack/providers/registry/inference.py +189 -191
llama_stack/providers/registry/post_training.py +8 -9
llama_stack/providers/registry/safety.py +23 -27
llama_stack/providers/registry/scoring.py +1 -1
llama_stack/providers/registry/tool_runtime.py +41 -47
llama_stack/providers/registry/vector_io.py +59 -59
llama_stack/providers/remote/eval/nvidia/eval.py +12 -4
llama_stack/providers/remote/files/s3/files.py +2 -3
llama_stack/providers/remote/inference/anthropic/__init__.py +0 -6
llama_stack/providers/remote/inference/anthropic/anthropic.py +12 -2
llama_stack/providers/remote/inference/azure/__init__.py +15 -0
llama_stack/providers/remote/inference/azure/azure.py +62 -0
llama_stack/providers/remote/inference/azure/config.py +63 -0
llama_stack/providers/remote/inference/bedrock/bedrock.py +50 -3
llama_stack/providers/remote/inference/cerebras/cerebras.py +14 -14
llama_stack/providers/remote/inference/cerebras/config.py +2 -2
llama_stack/providers/remote/inference/databricks/__init__.py +2 -1
llama_stack/providers/remote/inference/databricks/config.py +5 -5
llama_stack/providers/remote/inference/databricks/databricks.py +84 -94
llama_stack/providers/remote/inference/fireworks/fireworks.py +17 -169
llama_stack/providers/remote/inference/gemini/__init__.py +0 -6
llama_stack/providers/remote/inference/gemini/gemini.py +4 -2
llama_stack/providers/remote/inference/groq/__init__.py +1 -3
llama_stack/providers/remote/inference/groq/groq.py +0 -3
llama_stack/providers/remote/inference/llama_openai_compat/llama.py +0 -3
llama_stack/providers/remote/inference/nvidia/nvidia.py +9 -8
llama_stack/providers/remote/inference/ollama/ollama.py +70 -217
llama_stack/providers/remote/inference/openai/__init__.py +0 -6
llama_stack/providers/remote/inference/openai/openai.py +5 -2
llama_stack/providers/remote/inference/passthrough/passthrough.py +1 -1
llama_stack/providers/remote/inference/sambanova/__init__.py +1 -3
llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -3
llama_stack/providers/remote/inference/tgi/tgi.py +43 -15
llama_stack/providers/remote/inference/together/together.py +85 -130
llama_stack/providers/remote/inference/vertexai/vertexai.py +29 -6
llama_stack/providers/remote/inference/vllm/__init__.py +6 -0
llama_stack/providers/remote/inference/vllm/vllm.py +56 -193
llama_stack/providers/remote/inference/watsonx/config.py +2 -2
llama_stack/providers/remote/inference/watsonx/watsonx.py +19 -3
llama_stack/providers/remote/vector_io/qdrant/qdrant.py +6 -2
llama_stack/providers/utils/inference/inference_store.py +130 -22
llama_stack/providers/utils/inference/litellm_openai_mixin.py +3 -3
llama_stack/providers/utils/inference/model_registry.py +9 -22
llama_stack/providers/utils/inference/openai_mixin.py +109 -24
llama_stack/providers/utils/kvstore/config.py +5 -5
llama_stack/providers/utils/kvstore/mongodb/mongodb.py +8 -3
llama_stack/providers/utils/kvstore/sqlite/sqlite.py +7 -0
llama_stack/providers/utils/responses/responses_store.py +2 -5
llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +19 -6
llama_stack/providers/utils/telemetry/tracing.py +29 -15
llama_stack/providers/utils/vector_io/vector_utils.py +2 -4
llama_stack/schema_utils.py +15 -1
llama_stack/testing/inference_recorder.py +51 -31
{llama_stack-0.2.21.dist-info → llama_stack-0.2.23.dist-info}/METADATA +15 -15
{llama_stack-0.2.21.dist-info → llama_stack-0.2.23.dist-info}/RECORD +126 -134
llama_stack/providers/remote/inference/anthropic/models.py +0 -40
llama_stack/providers/remote/inference/cerebras/models.py +0 -28
llama_stack/providers/remote/inference/fireworks/models.py +0 -70
llama_stack/providers/remote/inference/gemini/models.py +0 -34
llama_stack/providers/remote/inference/groq/models.py +0 -48
llama_stack/providers/remote/inference/llama_openai_compat/models.py +0 -25
llama_stack/providers/remote/inference/nvidia/models.py +0 -109
llama_stack/providers/remote/inference/ollama/models.py +0 -106
llama_stack/providers/remote/inference/openai/models.py +0 -60
llama_stack/providers/remote/inference/sambanova/models.py +0 -28
llama_stack/providers/remote/inference/together/models.py +0 -77
llama_stack/providers/remote/inference/vertexai/models.py +0 -20
{llama_stack-0.2.21.dist-info → llama_stack-0.2.23.dist-info}/WHEEL +0 -0
{llama_stack-0.2.21.dist-info → llama_stack-0.2.23.dist-info}/entry_points.txt +0 -0
{llama_stack-0.2.21.dist-info → llama_stack-0.2.23.dist-info}/licenses/LICENSE +0 -0
{llama_stack-0.2.21.dist-info → llama_stack-0.2.23.dist-info}/top_level.txt +0 -0

llama_stack/apis/agents/agents.py CHANGED Viewed

@@ -27,6 +27,7 @@ from llama_stack.apis.inference import (
 )
 from llama_stack.apis.safety import SafetyViolation
 from llama_stack.apis.tools import ToolDef
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
 from .openai_responses import (
@@ -481,7 +482,7 @@ class Agents(Protocol):
     - Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.
     """
-    @webmethod(route="/agents", method="POST", descriptive_name="create_agent")
+    @webmethod(route="/agents", method="POST", descriptive_name="create_agent", level=LLAMA_STACK_API_V1)
     async def create_agent(
         self,
         agent_config: AgentConfig,
@@ -494,7 +495,10 @@ class Agents(Protocol):
         ...
     @webmethod(
-        route="/agents/{agent_id}/session/{session_id}/turn", method="POST", descriptive_name="create_agent_turn"
+        route="/agents/{agent_id}/session/{session_id}/turn",
+        method="POST",
+        descriptive_name="create_agent_turn",
+        level=LLAMA_STACK_API_V1,
     )
     async def create_agent_turn(
         self,
@@ -524,6 +528,7 @@ class Agents(Protocol):
         route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
         method="POST",
         descriptive_name="resume_agent_turn",
+        level=LLAMA_STACK_API_V1,
     )
     async def resume_agent_turn(
         self,
@@ -549,6 +554,7 @@ class Agents(Protocol):
     @webmethod(
         route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
         method="GET",
+        level=LLAMA_STACK_API_V1,
     )
     async def get_agents_turn(
         self,
@@ -568,6 +574,7 @@ class Agents(Protocol):
     @webmethod(
         route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
         method="GET",
+        level=LLAMA_STACK_API_V1,
     )
     async def get_agents_step(
         self,
@@ -586,7 +593,12 @@ class Agents(Protocol):
         """
         ...
-    @webmethod(route="/agents/{agent_id}/session", method="POST", descriptive_name="create_agent_session")
+    @webmethod(
+        route="/agents/{agent_id}/session",
+        method="POST",
+        descriptive_name="create_agent_session",
+        level=LLAMA_STACK_API_V1,
+    )
     async def create_agent_session(
         self,
         agent_id: str,
@@ -600,7 +612,7 @@ class Agents(Protocol):
         """
         ...
-    @webmethod(route="/agents/{agent_id}/session/{session_id}", method="GET")
+    @webmethod(route="/agents/{agent_id}/session/{session_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_agents_session(
         self,
         session_id: str,
@@ -616,7 +628,7 @@ class Agents(Protocol):
         """
         ...
-    @webmethod(route="/agents/{agent_id}/session/{session_id}", method="DELETE")
+    @webmethod(route="/agents/{agent_id}/session/{session_id}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def delete_agents_session(
         self,
         session_id: str,
@@ -629,7 +641,7 @@ class Agents(Protocol):
         """
         ...
-    @webmethod(route="/agents/{agent_id}", method="DELETE")
+    @webmethod(route="/agents/{agent_id}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def delete_agent(
         self,
         agent_id: str,
@@ -640,7 +652,7 @@ class Agents(Protocol):
         """
         ...
-    @webmethod(route="/agents", method="GET")
+    @webmethod(route="/agents", method="GET", level=LLAMA_STACK_API_V1)
     async def list_agents(self, start_index: int | None = None, limit: int | None = None) -> PaginatedResponse:
         """List all agents.
@@ -650,7 +662,7 @@ class Agents(Protocol):
         """
         ...
-    @webmethod(route="/agents/{agent_id}", method="GET")
+    @webmethod(route="/agents/{agent_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_agent(self, agent_id: str) -> Agent:
         """Describe an agent by its ID.
@@ -659,7 +671,7 @@ class Agents(Protocol):
         """
         ...
-    @webmethod(route="/agents/{agent_id}/sessions", method="GET")
+    @webmethod(route="/agents/{agent_id}/sessions", method="GET", level=LLAMA_STACK_API_V1)
     async def list_agent_sessions(
         self,
         agent_id: str,
@@ -682,7 +694,7 @@ class Agents(Protocol):
     #
     # Both of these APIs are inherently stateful.
-    @webmethod(route="/openai/v1/responses/{response_id}", method="GET")
+    @webmethod(route="/openai/v1/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_openai_response(
         self,
         response_id: str,
@@ -694,7 +706,7 @@ class Agents(Protocol):
         """
         ...
-    @webmethod(route="/openai/v1/responses", method="POST")
+    @webmethod(route="/openai/v1/responses", method="POST", level=LLAMA_STACK_API_V1)
     async def create_openai_response(
         self,
         input: str | list[OpenAIResponseInput],
@@ -719,7 +731,7 @@ class Agents(Protocol):
         """
         ...
-    @webmethod(route="/openai/v1/responses", method="GET")
+    @webmethod(route="/openai/v1/responses", method="GET", level=LLAMA_STACK_API_V1)
     async def list_openai_responses(
         self,
         after: str | None = None,
@@ -737,7 +749,7 @@ class Agents(Protocol):
         """
         ...
-    @webmethod(route="/openai/v1/responses/{response_id}/input_items", method="GET")
+    @webmethod(route="/openai/v1/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
     async def list_openai_response_input_items(
         self,
         response_id: str,
@@ -759,7 +771,7 @@ class Agents(Protocol):
         """
         ...
-    @webmethod(route="/openai/v1/responses/{response_id}", method="DELETE")
+    @webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
         """Delete an OpenAI response by its ID.

llama_stack/apis/batch_inference/batch_inference.py CHANGED Viewed

@@ -17,6 +17,7 @@ from llama_stack.apis.inference import (
     ToolDefinition,
     ToolPromptFormat,
 )
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.schema_utils import webmethod
@@ -30,7 +31,7 @@ class BatchInference(Protocol):
     including (post-training, evals, etc).
     """
-    @webmethod(route="/batch-inference/completion", method="POST")
+    @webmethod(route="/batch-inference/completion", method="POST", level=LLAMA_STACK_API_V1)
     async def completion(
         self,
         model: str,
@@ -50,7 +51,7 @@ class BatchInference(Protocol):
         """
         ...
-    @webmethod(route="/batch-inference/chat-completion", method="POST")
+    @webmethod(route="/batch-inference/chat-completion", method="POST", level=LLAMA_STACK_API_V1)
     async def chat_completion(
         self,
         model: str,

llama_stack/apis/batches/batches.py CHANGED Viewed

@@ -8,6 +8,7 @@ from typing import Literal, Protocol, runtime_checkable
 from pydantic import BaseModel, Field
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.schema_utils import json_schema_type, webmethod
 try:
@@ -42,7 +43,7 @@ class Batches(Protocol):
     Note: This API is currently under active development and may undergo changes.
     """
-    @webmethod(route="/openai/v1/batches", method="POST")
+    @webmethod(route="/openai/v1/batches", method="POST", level=LLAMA_STACK_API_V1)
     async def create_batch(
         self,
         input_file_id: str,
@@ -62,7 +63,7 @@ class Batches(Protocol):
         """
         ...
-    @webmethod(route="/openai/v1/batches/{batch_id}", method="GET")
+    @webmethod(route="/openai/v1/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def retrieve_batch(self, batch_id: str) -> BatchObject:
         """Retrieve information about a specific batch.
@@ -71,7 +72,7 @@ class Batches(Protocol):
         """
         ...
-    @webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST")
+    @webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1)
     async def cancel_batch(self, batch_id: str) -> BatchObject:
         """Cancel a batch that is in progress.
@@ -80,7 +81,7 @@ class Batches(Protocol):
         """
         ...
-    @webmethod(route="/openai/v1/batches", method="GET")
+    @webmethod(route="/openai/v1/batches", method="GET", level=LLAMA_STACK_API_V1)
     async def list_batches(
         self,
         after: str | None = None,

llama_stack/apis/benchmarks/benchmarks.py CHANGED Viewed

@@ -8,6 +8,7 @@ from typing import Any, Literal, Protocol, runtime_checkable
 from pydantic import BaseModel, Field
 from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
 from llama_stack.schema_utils import json_schema_type, webmethod
@@ -53,7 +54,8 @@ class ListBenchmarksResponse(BaseModel):
 @runtime_checkable
 class Benchmarks(Protocol):
-    @webmethod(route="/eval/benchmarks", method="GET")
+    @webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1ALPHA)
     async def list_benchmarks(self) -> ListBenchmarksResponse:
         """List all benchmarks.
@@ -61,7 +63,8 @@ class Benchmarks(Protocol):
         """
         ...
-    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET")
+    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
     async def get_benchmark(
         self,
         benchmark_id: str,
@@ -73,7 +76,8 @@ class Benchmarks(Protocol):
         """
         ...
-    @webmethod(route="/eval/benchmarks", method="POST")
+    @webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA)
     async def register_benchmark(
         self,
         benchmark_id: str,
@@ -93,3 +97,12 @@ class Benchmarks(Protocol):
         :param metadata: The metadata to use for the benchmark.
         """
         ...
+    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
+    async def unregister_benchmark(self, benchmark_id: str) -> None:
+        """Unregister a benchmark.
+        :param benchmark_id: The ID of the benchmark to unregister.
+        """
+        ...

llama_stack/apis/datasetio/datasetio.py CHANGED Viewed

@@ -8,6 +8,7 @@ from typing import Any, Protocol, runtime_checkable
 from llama_stack.apis.common.responses import PaginatedResponse
 from llama_stack.apis.datasets import Dataset
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.schema_utils import webmethod
@@ -20,7 +21,7 @@ class DatasetIO(Protocol):
     # keeping for aligning with inference/safety, but this is not used
     dataset_store: DatasetStore
-    @webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET")
+    @webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1)
     async def iterrows(
         self,
         dataset_id: str,
@@ -44,7 +45,7 @@ class DatasetIO(Protocol):
         """
         ...
-    @webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST")
+    @webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST", level=LLAMA_STACK_API_V1)
     async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None:
         """Append rows to a dataset.

llama_stack/apis/datasets/datasets.py CHANGED Viewed

@@ -10,6 +10,7 @@ from typing import Annotated, Any, Literal, Protocol
 from pydantic import BaseModel, Field
 from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
@@ -145,7 +146,7 @@ class ListDatasetsResponse(BaseModel):
 class Datasets(Protocol):
-    @webmethod(route="/datasets", method="POST")
+    @webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1)
     async def register_dataset(
         self,
         purpose: DatasetPurpose,
@@ -214,7 +215,7 @@ class Datasets(Protocol):
         """
         ...
-    @webmethod(route="/datasets/{dataset_id:path}", method="GET")
+    @webmethod(route="/datasets/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_dataset(
         self,
         dataset_id: str,
@@ -226,7 +227,7 @@ class Datasets(Protocol):
         """
         ...
-    @webmethod(route="/datasets", method="GET")
+    @webmethod(route="/datasets", method="GET", level=LLAMA_STACK_API_V1)
     async def list_datasets(self) -> ListDatasetsResponse:
         """List all datasets.
@@ -234,7 +235,7 @@ class Datasets(Protocol):
         """
         ...
-    @webmethod(route="/datasets/{dataset_id:path}", method="DELETE")
+    @webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def unregister_dataset(
         self,
         dataset_id: str,

llama_stack/apis/eval/eval.py CHANGED Viewed

@@ -13,6 +13,7 @@ from llama_stack.apis.common.job_types import Job
 from llama_stack.apis.inference import SamplingParams, SystemMessage
 from llama_stack.apis.scoring import ScoringResult
 from llama_stack.apis.scoring_functions import ScoringFnParams
+from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
@@ -83,7 +84,8 @@ class EvaluateResponse(BaseModel):
 class Eval(Protocol):
     """Llama Stack Evaluation API for running evaluations on model and agent candidates."""
-    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST")
+    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1ALPHA)
     async def run_eval(
         self,
         benchmark_id: str,
@@ -97,7 +99,10 @@ class Eval(Protocol):
         """
         ...
-    @webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST")
+    @webmethod(
+        route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True
+    )
+    @webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1ALPHA)
     async def evaluate_rows(
         self,
         benchmark_id: str,
@@ -115,7 +120,10 @@ class Eval(Protocol):
         """
         ...
-    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET")
+    @webmethod(
+        route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
+    )
+    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
     async def job_status(self, benchmark_id: str, job_id: str) -> Job:
         """Get the status of a job.
@@ -125,7 +133,13 @@ class Eval(Protocol):
         """
         ...
-    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE")
+    @webmethod(
+        route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
+        method="DELETE",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
+    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
     async def job_cancel(self, benchmark_id: str, job_id: str) -> None:
         """Cancel a job.
@@ -134,7 +148,15 @@ class Eval(Protocol):
         """
         ...
-    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET")
+    @webmethod(
+        route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result",
+        method="GET",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
+    @webmethod(
+        route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET", level=LLAMA_STACK_API_V1ALPHA
+    )
     async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse:
         """Get the result of a job.

llama_stack/apis/files/files.py CHANGED Viewed

@@ -11,6 +11,7 @@ from fastapi import File, Form, Response, UploadFile
 from pydantic import BaseModel, Field
 from llama_stack.apis.common.responses import Order
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, webmethod
@@ -104,7 +105,7 @@ class OpenAIFileDeleteResponse(BaseModel):
 @trace_protocol
 class Files(Protocol):
     # OpenAI Files API Endpoints
-    @webmethod(route="/openai/v1/files", method="POST")
+    @webmethod(route="/openai/v1/files", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_upload_file(
         self,
         file: Annotated[UploadFile, File()],
@@ -119,7 +120,7 @@ class Files(Protocol):
         The file upload should be a multipart form request with:
         - file: The File object (not file name) to be uploaded.
         - purpose: The intended purpose of the uploaded file.
-        - expires_after: Optional form values describing expiration for the file. Expected expires_after[anchor] = "created_at", expires_after[seconds] = <int>. Seconds must be between 3600 and 2592000 (1 hour to 30 days).
+        - expires_after: Optional form values describing expiration for the file. Expected expires_after[anchor] = "created_at", expires_after[seconds] = {integer}. Seconds must be between 3600 and 2592000 (1 hour to 30 days).
         :param file: The uploaded file object containing content and metadata (filename, content_type, etc.).
         :param purpose: The intended purpose of the uploaded file (e.g., "assistants", "fine-tune").
@@ -127,7 +128,7 @@ class Files(Protocol):
         """
         ...
-    @webmethod(route="/openai/v1/files", method="GET")
+    @webmethod(route="/openai/v1/files", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_list_files(
         self,
         after: str | None = None,
@@ -146,7 +147,7 @@ class Files(Protocol):
         """
         ...
-    @webmethod(route="/openai/v1/files/{file_id}", method="GET")
+    @webmethod(route="/openai/v1/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_retrieve_file(
         self,
         file_id: str,
@@ -159,7 +160,7 @@ class Files(Protocol):
         """
         ...
-    @webmethod(route="/openai/v1/files/{file_id}", method="DELETE")
+    @webmethod(route="/openai/v1/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def openai_delete_file(
         self,
         file_id: str,
@@ -172,7 +173,7 @@ class Files(Protocol):
         """
         ...
-    @webmethod(route="/openai/v1/files/{file_id}/content", method="GET")
+    @webmethod(route="/openai/v1/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_retrieve_file_content(
         self,
         file_id: str,

llama_stack/apis/inference/inference.py CHANGED Viewed

@@ -21,6 +21,7 @@ from llama_stack.apis.common.content_types import ContentDelta, InterleavedConte
 from llama_stack.apis.common.responses import Order
 from llama_stack.apis.models import Model
 from llama_stack.apis.telemetry import MetricResponseMixin
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.models.llama.datatypes import (
     BuiltinTool,
     StopReason,
@@ -913,6 +914,7 @@ class OpenAIEmbeddingData(BaseModel):
     """
     object: Literal["embedding"] = "embedding"
+    # TODO: consider dropping str and using openai.types.embeddings.Embedding instead of OpenAIEmbeddingData
     embedding: list[float] | str
     index: int
@@ -1026,7 +1028,7 @@ class InferenceProvider(Protocol):
     model_store: ModelStore | None = None
-    @webmethod(route="/inference/completion", method="POST")
+    @webmethod(route="/inference/completion", method="POST", level=LLAMA_STACK_API_V1)
     async def completion(
         self,
         model_id: str,
@@ -1049,7 +1051,7 @@ class InferenceProvider(Protocol):
         """
         ...
-    @webmethod(route="/inference/batch-completion", method="POST", experimental=True)
+    @webmethod(route="/inference/batch-completion", method="POST", experimental=True, level=LLAMA_STACK_API_V1)
     async def batch_completion(
         self,
         model_id: str,
@@ -1070,7 +1072,7 @@ class InferenceProvider(Protocol):
         raise NotImplementedError("Batch completion is not implemented")
         return  # this is so mypy's safe-super rule will consider the method concrete
-    @webmethod(route="/inference/chat-completion", method="POST")
+    @webmethod(route="/inference/chat-completion", method="POST", level=LLAMA_STACK_API_V1)
     async def chat_completion(
         self,
         model_id: str,
@@ -1110,7 +1112,7 @@ class InferenceProvider(Protocol):
         """
         ...
-    @webmethod(route="/inference/batch-chat-completion", method="POST", experimental=True)
+    @webmethod(route="/inference/batch-chat-completion", method="POST", experimental=True, level=LLAMA_STACK_API_V1)
     async def batch_chat_completion(
         self,
         model_id: str,
@@ -1135,7 +1137,7 @@ class InferenceProvider(Protocol):
         raise NotImplementedError("Batch chat completion is not implemented")
         return  # this is so mypy's safe-super rule will consider the method concrete
-    @webmethod(route="/inference/embeddings", method="POST")
+    @webmethod(route="/inference/embeddings", method="POST", level=LLAMA_STACK_API_V1)
     async def embeddings(
         self,
         model_id: str,
@@ -1155,7 +1157,7 @@ class InferenceProvider(Protocol):
         """
         ...
-    @webmethod(route="/inference/rerank", method="POST", experimental=True)
+    @webmethod(route="/inference/rerank", method="POST", experimental=True, level=LLAMA_STACK_API_V1)
     async def rerank(
         self,
         model: str,
@@ -1174,7 +1176,7 @@ class InferenceProvider(Protocol):
         raise NotImplementedError("Reranking is not implemented")
         return  # this is so mypy's safe-super rule will consider the method concrete
-    @webmethod(route="/openai/v1/completions", method="POST")
+    @webmethod(route="/openai/v1/completions", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_completion(
         self,
         # Standard OpenAI completion parameters
@@ -1225,7 +1227,7 @@ class InferenceProvider(Protocol):
         """
         ...
-    @webmethod(route="/openai/v1/chat/completions", method="POST")
+    @webmethod(route="/openai/v1/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_chat_completion(
         self,
         model: str,
@@ -1281,7 +1283,7 @@ class InferenceProvider(Protocol):
         """
         ...
-    @webmethod(route="/openai/v1/embeddings", method="POST")
+    @webmethod(route="/openai/v1/embeddings", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_embeddings(
         self,
         model: str,
@@ -1310,7 +1312,7 @@ class Inference(InferenceProvider):
     - Embedding models: these models generate embeddings to be used for semantic search.
     """
-    @webmethod(route="/openai/v1/chat/completions", method="GET")
+    @webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1)
     async def list_chat_completions(
         self,
         after: str | None = None,
@@ -1328,7 +1330,7 @@ class Inference(InferenceProvider):
         """
         raise NotImplementedError("List chat completions is not implemented")
-    @webmethod(route="/openai/v1/chat/completions/{completion_id}", method="GET")
+    @webmethod(route="/openai/v1/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
         """Describe a chat completion by its ID.

llama_stack/apis/inspect/inspect.py CHANGED Viewed

@@ -8,6 +8,7 @@ from typing import Protocol, runtime_checkable
 from pydantic import BaseModel
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.datatypes import HealthStatus
 from llama_stack.schema_utils import json_schema_type, webmethod
@@ -57,7 +58,7 @@ class ListRoutesResponse(BaseModel):
 @runtime_checkable
 class Inspect(Protocol):
-    @webmethod(route="/inspect/routes", method="GET")
+    @webmethod(route="/inspect/routes", method="GET", level=LLAMA_STACK_API_V1)
     async def list_routes(self) -> ListRoutesResponse:
         """List all available API routes with their methods and implementing providers.
@@ -65,7 +66,7 @@ class Inspect(Protocol):
         """
         ...
-    @webmethod(route="/health", method="GET")
+    @webmethod(route="/health", method="GET", level=LLAMA_STACK_API_V1)
     async def health(self) -> HealthInfo:
         """Get the current health status of the service.
@@ -73,7 +74,7 @@ class Inspect(Protocol):
         """
         ...
-    @webmethod(route="/version", method="GET")
+    @webmethod(route="/version", method="GET", level=LLAMA_STACK_API_V1)
     async def version(self) -> VersionInfo:
         """Get the version of the service.

llama-stack 0.2.21__py3-none-any.whl → 0.2.23__py3-none-any.whl

llama-stack 0.2.21py3-none-any.whl → 0.2.23py3-none-any.whl