PyPI - llama-stack - Versions diffs - 0.4.3__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

llama-stack 0.4.3py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (311) hide show

llama_stack/cli/stack/_list_deps.py +11 -7
llama_stack/cli/stack/run.py +3 -25
llama_stack/core/access_control/datatypes.py +78 -0
llama_stack/core/configure.py +2 -2
{llama_stack_api/internal → llama_stack/core/connectors}/__init__.py +2 -2
llama_stack/core/connectors/connectors.py +162 -0
llama_stack/core/conversations/conversations.py +61 -58
llama_stack/core/datatypes.py +54 -8
llama_stack/core/library_client.py +60 -13
llama_stack/core/prompts/prompts.py +43 -42
llama_stack/core/routers/datasets.py +20 -17
llama_stack/core/routers/eval_scoring.py +143 -53
llama_stack/core/routers/inference.py +20 -9
llama_stack/core/routers/safety.py +30 -42
llama_stack/core/routers/vector_io.py +15 -7
llama_stack/core/routing_tables/models.py +42 -3
llama_stack/core/routing_tables/scoring_functions.py +19 -19
llama_stack/core/routing_tables/shields.py +20 -17
llama_stack/core/routing_tables/vector_stores.py +8 -5
llama_stack/core/server/auth.py +192 -17
llama_stack/core/server/fastapi_router_registry.py +40 -5
llama_stack/core/server/server.py +24 -5
llama_stack/core/stack.py +54 -10
llama_stack/core/storage/datatypes.py +9 -0
llama_stack/core/store/registry.py +1 -1
llama_stack/core/utils/exec.py +2 -2
llama_stack/core/utils/type_inspection.py +16 -2
llama_stack/distributions/dell/config.yaml +4 -1
llama_stack/distributions/dell/doc_template.md +209 -0
llama_stack/distributions/dell/run-with-safety.yaml +4 -1
llama_stack/distributions/nvidia/config.yaml +4 -1
llama_stack/distributions/nvidia/doc_template.md +170 -0
llama_stack/distributions/nvidia/run-with-safety.yaml +4 -1
llama_stack/distributions/oci/config.yaml +4 -1
llama_stack/distributions/oci/doc_template.md +140 -0
llama_stack/distributions/open-benchmark/config.yaml +9 -1
llama_stack/distributions/postgres-demo/config.yaml +1 -1
llama_stack/distributions/starter/build.yaml +62 -0
llama_stack/distributions/starter/config.yaml +22 -3
llama_stack/distributions/starter/run-with-postgres-store.yaml +22 -3
llama_stack/distributions/starter/starter.py +13 -1
llama_stack/distributions/starter-gpu/build.yaml +62 -0
llama_stack/distributions/starter-gpu/config.yaml +22 -3
llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +22 -3
llama_stack/distributions/template.py +10 -2
llama_stack/distributions/watsonx/config.yaml +4 -1
llama_stack/log.py +1 -0
llama_stack/models/llama/resources/dog.jpg +0 -0
llama_stack/models/llama/resources/pasta.jpeg +0 -0
llama_stack/models/llama/resources/small_dog.jpg +0 -0
llama_stack/providers/inline/agents/meta_reference/__init__.py +1 -0
llama_stack/providers/inline/agents/meta_reference/agents.py +58 -61
llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +187 -60
llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +99 -22
llama_stack/providers/inline/agents/meta_reference/responses/types.py +2 -1
llama_stack/providers/inline/agents/meta_reference/responses/utils.py +4 -1
llama_stack/providers/inline/agents/meta_reference/safety.py +2 -2
llama_stack/providers/inline/batches/reference/batches.py +2 -1
llama_stack/providers/inline/eval/meta_reference/eval.py +40 -32
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h +9 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift +189 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift +238 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift +12 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift +89 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj +550 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +8 -0
llama_stack/providers/inline/post_training/huggingface/post_training.py +33 -38
llama_stack/providers/inline/post_training/huggingface/utils.py +2 -5
llama_stack/providers/inline/post_training/torchtune/common/utils.py +5 -9
llama_stack/providers/inline/post_training/torchtune/post_training.py +28 -33
llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +2 -4
llama_stack/providers/inline/safety/code_scanner/code_scanner.py +12 -15
llama_stack/providers/inline/safety/llama_guard/llama_guard.py +20 -24
llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +11 -17
llama_stack/providers/inline/scoring/basic/scoring.py +13 -17
llama_stack/providers/inline/scoring/braintrust/braintrust.py +15 -15
llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +13 -17
llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +1 -1
llama_stack/providers/registry/agents.py +1 -0
llama_stack/providers/registry/inference.py +1 -9
llama_stack/providers/registry/vector_io.py +136 -16
llama_stack/providers/remote/datasetio/nvidia/README.md +74 -0
llama_stack/providers/remote/eval/nvidia/README.md +134 -0
llama_stack/providers/remote/eval/nvidia/eval.py +22 -21
llama_stack/providers/remote/files/s3/README.md +266 -0
llama_stack/providers/remote/files/s3/config.py +5 -3
llama_stack/providers/remote/files/s3/files.py +2 -2
llama_stack/providers/remote/inference/gemini/gemini.py +4 -0
llama_stack/providers/remote/inference/nvidia/NVIDIA.md +203 -0
llama_stack/providers/remote/inference/openai/openai.py +2 -0
llama_stack/providers/remote/inference/together/together.py +4 -0
llama_stack/providers/remote/inference/vertexai/config.py +3 -3
llama_stack/providers/remote/inference/vertexai/vertexai.py +5 -2
llama_stack/providers/remote/inference/vllm/config.py +37 -18
llama_stack/providers/remote/inference/vllm/vllm.py +0 -3
llama_stack/providers/remote/inference/watsonx/watsonx.py +4 -0
llama_stack/providers/remote/post_training/nvidia/README.md +151 -0
llama_stack/providers/remote/post_training/nvidia/models.py +3 -11
llama_stack/providers/remote/post_training/nvidia/post_training.py +31 -33
llama_stack/providers/remote/safety/bedrock/bedrock.py +10 -27
llama_stack/providers/remote/safety/nvidia/README.md +78 -0
llama_stack/providers/remote/safety/nvidia/nvidia.py +9 -25
llama_stack/providers/remote/safety/sambanova/sambanova.py +13 -11
llama_stack/providers/remote/vector_io/elasticsearch/__init__.py +17 -0
llama_stack/providers/remote/vector_io/elasticsearch/config.py +32 -0
llama_stack/providers/remote/vector_io/elasticsearch/elasticsearch.py +463 -0
llama_stack/providers/remote/vector_io/oci/__init__.py +22 -0
llama_stack/providers/remote/vector_io/oci/config.py +41 -0
llama_stack/providers/remote/vector_io/oci/oci26ai.py +595 -0
llama_stack/providers/remote/vector_io/pgvector/config.py +69 -2
llama_stack/providers/remote/vector_io/pgvector/pgvector.py +255 -6
llama_stack/providers/remote/vector_io/qdrant/qdrant.py +62 -38
llama_stack/providers/utils/bedrock/client.py +3 -3
llama_stack/providers/utils/bedrock/config.py +7 -7
llama_stack/providers/utils/inference/__init__.py +0 -25
llama_stack/providers/utils/inference/embedding_mixin.py +4 -0
llama_stack/providers/utils/inference/http_client.py +239 -0
llama_stack/providers/utils/inference/litellm_openai_mixin.py +6 -0
llama_stack/providers/utils/inference/model_registry.py +148 -2
llama_stack/providers/utils/inference/openai_compat.py +1 -158
llama_stack/providers/utils/inference/openai_mixin.py +42 -2
llama_stack/providers/utils/inference/prompt_adapter.py +0 -209
llama_stack/providers/utils/memory/openai_vector_store_mixin.py +92 -5
llama_stack/providers/utils/memory/vector_store.py +46 -19
llama_stack/providers/utils/responses/responses_store.py +40 -6
llama_stack/providers/utils/safety.py +114 -0
llama_stack/providers/utils/tools/mcp.py +44 -3
llama_stack/testing/api_recorder.py +9 -3
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/METADATA +14 -2
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/RECORD +135 -279
llama_stack-0.5.0.dist-info/top_level.txt +1 -0
llama_stack/distributions/meta-reference-gpu/__init__.py +0 -7
llama_stack/distributions/meta-reference-gpu/config.yaml +0 -140
llama_stack/distributions/meta-reference-gpu/meta_reference.py +0 -163
llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +0 -155
llama_stack/models/llama/hadamard_utils.py +0 -88
llama_stack/models/llama/llama3/args.py +0 -74
llama_stack/models/llama/llama3/generation.py +0 -378
llama_stack/models/llama/llama3/model.py +0 -304
llama_stack/models/llama/llama3/multimodal/__init__.py +0 -12
llama_stack/models/llama/llama3/multimodal/encoder_utils.py +0 -180
llama_stack/models/llama/llama3/multimodal/image_transform.py +0 -409
llama_stack/models/llama/llama3/multimodal/model.py +0 -1430
llama_stack/models/llama/llama3/multimodal/utils.py +0 -26
llama_stack/models/llama/llama3/quantization/__init__.py +0 -5
llama_stack/models/llama/llama3/quantization/loader.py +0 -316
llama_stack/models/llama/llama3_1/__init__.py +0 -12
llama_stack/models/llama/llama3_1/prompt_format.md +0 -358
llama_stack/models/llama/llama3_1/prompts.py +0 -258
llama_stack/models/llama/llama3_2/__init__.py +0 -5
llama_stack/models/llama/llama3_2/prompts_text.py +0 -229
llama_stack/models/llama/llama3_2/prompts_vision.py +0 -126
llama_stack/models/llama/llama3_2/text_prompt_format.md +0 -286
llama_stack/models/llama/llama3_2/vision_prompt_format.md +0 -141
llama_stack/models/llama/llama3_3/__init__.py +0 -5
llama_stack/models/llama/llama3_3/prompts.py +0 -259
llama_stack/models/llama/llama4/args.py +0 -107
llama_stack/models/llama/llama4/ffn.py +0 -58
llama_stack/models/llama/llama4/moe.py +0 -214
llama_stack/models/llama/llama4/preprocess.py +0 -435
llama_stack/models/llama/llama4/quantization/__init__.py +0 -5
llama_stack/models/llama/llama4/quantization/loader.py +0 -226
llama_stack/models/llama/llama4/vision/__init__.py +0 -5
llama_stack/models/llama/llama4/vision/embedding.py +0 -210
llama_stack/models/llama/llama4/vision/encoder.py +0 -412
llama_stack/models/llama/quantize_impls.py +0 -316
llama_stack/providers/inline/inference/meta_reference/__init__.py +0 -20
llama_stack/providers/inline/inference/meta_reference/common.py +0 -24
llama_stack/providers/inline/inference/meta_reference/config.py +0 -68
llama_stack/providers/inline/inference/meta_reference/generators.py +0 -201
llama_stack/providers/inline/inference/meta_reference/inference.py +0 -542
llama_stack/providers/inline/inference/meta_reference/model_parallel.py +0 -77
llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +0 -353
llama_stack-0.4.3.dist-info/top_level.txt +0 -2
llama_stack_api/__init__.py +0 -945
llama_stack_api/admin/__init__.py +0 -45
llama_stack_api/admin/api.py +0 -72
llama_stack_api/admin/fastapi_routes.py +0 -117
llama_stack_api/admin/models.py +0 -113
llama_stack_api/agents.py +0 -173
llama_stack_api/batches/__init__.py +0 -40
llama_stack_api/batches/api.py +0 -53
llama_stack_api/batches/fastapi_routes.py +0 -113
llama_stack_api/batches/models.py +0 -78
llama_stack_api/benchmarks/__init__.py +0 -43
llama_stack_api/benchmarks/api.py +0 -39
llama_stack_api/benchmarks/fastapi_routes.py +0 -109
llama_stack_api/benchmarks/models.py +0 -109
llama_stack_api/common/__init__.py +0 -5
llama_stack_api/common/content_types.py +0 -101
llama_stack_api/common/errors.py +0 -95
llama_stack_api/common/job_types.py +0 -38
llama_stack_api/common/responses.py +0 -77
llama_stack_api/common/training_types.py +0 -47
llama_stack_api/common/type_system.py +0 -146
llama_stack_api/connectors.py +0 -146
llama_stack_api/conversations.py +0 -270
llama_stack_api/datasetio.py +0 -55
llama_stack_api/datasets/__init__.py +0 -61
llama_stack_api/datasets/api.py +0 -35
llama_stack_api/datasets/fastapi_routes.py +0 -104
llama_stack_api/datasets/models.py +0 -152
llama_stack_api/datatypes.py +0 -373
llama_stack_api/eval.py +0 -137
llama_stack_api/file_processors/__init__.py +0 -27
llama_stack_api/file_processors/api.py +0 -64
llama_stack_api/file_processors/fastapi_routes.py +0 -78
llama_stack_api/file_processors/models.py +0 -42
llama_stack_api/files/__init__.py +0 -35
llama_stack_api/files/api.py +0 -51
llama_stack_api/files/fastapi_routes.py +0 -124
llama_stack_api/files/models.py +0 -107
llama_stack_api/inference.py +0 -1169
llama_stack_api/inspect_api/__init__.py +0 -37
llama_stack_api/inspect_api/api.py +0 -25
llama_stack_api/inspect_api/fastapi_routes.py +0 -76
llama_stack_api/inspect_api/models.py +0 -28
llama_stack_api/internal/kvstore.py +0 -28
llama_stack_api/internal/sqlstore.py +0 -81
llama_stack_api/llama_stack_api/__init__.py +0 -945
llama_stack_api/llama_stack_api/admin/__init__.py +0 -45
llama_stack_api/llama_stack_api/admin/api.py +0 -72
llama_stack_api/llama_stack_api/admin/fastapi_routes.py +0 -117
llama_stack_api/llama_stack_api/admin/models.py +0 -113
llama_stack_api/llama_stack_api/agents.py +0 -173
llama_stack_api/llama_stack_api/batches/__init__.py +0 -40
llama_stack_api/llama_stack_api/batches/api.py +0 -53
llama_stack_api/llama_stack_api/batches/fastapi_routes.py +0 -113
llama_stack_api/llama_stack_api/batches/models.py +0 -78
llama_stack_api/llama_stack_api/benchmarks/__init__.py +0 -43
llama_stack_api/llama_stack_api/benchmarks/api.py +0 -39
llama_stack_api/llama_stack_api/benchmarks/fastapi_routes.py +0 -109
llama_stack_api/llama_stack_api/benchmarks/models.py +0 -109
llama_stack_api/llama_stack_api/common/__init__.py +0 -5
llama_stack_api/llama_stack_api/common/content_types.py +0 -101
llama_stack_api/llama_stack_api/common/errors.py +0 -95
llama_stack_api/llama_stack_api/common/job_types.py +0 -38
llama_stack_api/llama_stack_api/common/responses.py +0 -77
llama_stack_api/llama_stack_api/common/training_types.py +0 -47
llama_stack_api/llama_stack_api/common/type_system.py +0 -146
llama_stack_api/llama_stack_api/connectors.py +0 -146
llama_stack_api/llama_stack_api/conversations.py +0 -270
llama_stack_api/llama_stack_api/datasetio.py +0 -55
llama_stack_api/llama_stack_api/datasets/__init__.py +0 -61
llama_stack_api/llama_stack_api/datasets/api.py +0 -35
llama_stack_api/llama_stack_api/datasets/fastapi_routes.py +0 -104
llama_stack_api/llama_stack_api/datasets/models.py +0 -152
llama_stack_api/llama_stack_api/datatypes.py +0 -373
llama_stack_api/llama_stack_api/eval.py +0 -137
llama_stack_api/llama_stack_api/file_processors/__init__.py +0 -27
llama_stack_api/llama_stack_api/file_processors/api.py +0 -64
llama_stack_api/llama_stack_api/file_processors/fastapi_routes.py +0 -78
llama_stack_api/llama_stack_api/file_processors/models.py +0 -42
llama_stack_api/llama_stack_api/files/__init__.py +0 -35
llama_stack_api/llama_stack_api/files/api.py +0 -51
llama_stack_api/llama_stack_api/files/fastapi_routes.py +0 -124
llama_stack_api/llama_stack_api/files/models.py +0 -107
llama_stack_api/llama_stack_api/inference.py +0 -1169
llama_stack_api/llama_stack_api/inspect_api/__init__.py +0 -37
llama_stack_api/llama_stack_api/inspect_api/api.py +0 -25
llama_stack_api/llama_stack_api/inspect_api/fastapi_routes.py +0 -76
llama_stack_api/llama_stack_api/inspect_api/models.py +0 -28
llama_stack_api/llama_stack_api/internal/__init__.py +0 -9
llama_stack_api/llama_stack_api/internal/kvstore.py +0 -28
llama_stack_api/llama_stack_api/internal/sqlstore.py +0 -81
llama_stack_api/llama_stack_api/models.py +0 -171
llama_stack_api/llama_stack_api/openai_responses.py +0 -1468
llama_stack_api/llama_stack_api/post_training.py +0 -370
llama_stack_api/llama_stack_api/prompts.py +0 -203
llama_stack_api/llama_stack_api/providers/__init__.py +0 -33
llama_stack_api/llama_stack_api/providers/api.py +0 -16
llama_stack_api/llama_stack_api/providers/fastapi_routes.py +0 -57
llama_stack_api/llama_stack_api/providers/models.py +0 -24
llama_stack_api/llama_stack_api/py.typed +0 -0
llama_stack_api/llama_stack_api/rag_tool.py +0 -168
llama_stack_api/llama_stack_api/resource.py +0 -37
llama_stack_api/llama_stack_api/router_utils.py +0 -160
llama_stack_api/llama_stack_api/safety.py +0 -132
llama_stack_api/llama_stack_api/schema_utils.py +0 -208
llama_stack_api/llama_stack_api/scoring.py +0 -93
llama_stack_api/llama_stack_api/scoring_functions.py +0 -211
llama_stack_api/llama_stack_api/shields.py +0 -93
llama_stack_api/llama_stack_api/tools.py +0 -226
llama_stack_api/llama_stack_api/vector_io.py +0 -941
llama_stack_api/llama_stack_api/vector_stores.py +0 -53
llama_stack_api/llama_stack_api/version.py +0 -9
llama_stack_api/models.py +0 -171
llama_stack_api/openai_responses.py +0 -1468
llama_stack_api/post_training.py +0 -370
llama_stack_api/prompts.py +0 -203
llama_stack_api/providers/__init__.py +0 -33
llama_stack_api/providers/api.py +0 -16
llama_stack_api/providers/fastapi_routes.py +0 -57
llama_stack_api/providers/models.py +0 -24
llama_stack_api/py.typed +0 -0
llama_stack_api/rag_tool.py +0 -168
llama_stack_api/resource.py +0 -37
llama_stack_api/router_utils.py +0 -160
llama_stack_api/safety.py +0 -132
llama_stack_api/schema_utils.py +0 -208
llama_stack_api/scoring.py +0 -93
llama_stack_api/scoring_functions.py +0 -211
llama_stack_api/shields.py +0 -93
llama_stack_api/tools.py +0 -226
llama_stack_api/vector_io.py +0 -941
llama_stack_api/vector_stores.py +0 -53
llama_stack_api/version.py +0 -9
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/WHEEL +0 -0
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/entry_points.txt +0 -0
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/licenses/LICENSE +0 -0

llama_stack/core/routing_tables/shields.py CHANGED Viewed

@@ -4,13 +4,19 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Any
 from llama_stack.core.datatypes import (
     ShieldWithOwner,
 )
 from llama_stack.log import get_logger
-from llama_stack_api import ListShieldsResponse, ResourceType, Shield, Shields
+from llama_stack_api import (
+    GetShieldRequest,
+    ListShieldsResponse,
+    RegisterShieldRequest,
+    ResourceType,
+    Shield,
+    Shields,
+    UnregisterShieldRequest,
+)
 from .common import CommonRoutingTableImpl
@@ -21,21 +27,17 @@ class ShieldsRoutingTable(CommonRoutingTableImpl, Shields):
     async def list_shields(self) -> ListShieldsResponse:
         return ListShieldsResponse(data=await self.get_all_with_type(ResourceType.shield.value))
-    async def get_shield(self, identifier: str) -> Shield:
-        shield = await self.get_object_by_identifier("shield", identifier)
+    async def get_shield(self, request: GetShieldRequest) -> Shield:
+        shield = await self.get_object_by_identifier("shield", request.identifier)
         if shield is None:
-            raise ValueError(f"Shield '{identifier}' not found")
+            raise ValueError(f"Shield '{request.identifier}' not found")
         return shield
-    async def register_shield(
-        self,
-        shield_id: str,
-        provider_shield_id: str | None = None,
-        provider_id: str | None = None,
-        params: dict[str, Any] | None = None,
-    ) -> Shield:
+    async def register_shield(self, request: RegisterShieldRequest) -> Shield:
+        provider_shield_id = request.provider_shield_id
         if provider_shield_id is None:
-            provider_shield_id = shield_id
+            provider_shield_id = request.shield_id
+        provider_id = request.provider_id
         if provider_id is None:
             # If provider_id not specified, use the only provider if it supports this shield type
             if len(self.impls_by_provider_id) == 1:
@@ -44,10 +46,11 @@ class ShieldsRoutingTable(CommonRoutingTableImpl, Shields):
                 raise ValueError(
                     "No provider specified and multiple providers available. Please specify a provider_id."
                 )
+        params = request.params
         if params is None:
             params = {}
         shield = ShieldWithOwner(
-            identifier=shield_id,
+            identifier=request.shield_id,
             provider_resource_id=provider_shield_id,
             provider_id=provider_id,
             params=params,
@@ -55,6 +58,6 @@ class ShieldsRoutingTable(CommonRoutingTableImpl, Shields):
         await self.register_object(shield)
         return shield
-    async def unregister_shield(self, identifier: str) -> None:
-        existing_shield = await self.get_shield(identifier)
+    async def unregister_shield(self, request: UnregisterShieldRequest) -> None:
+        existing_shield = await self.get_shield(GetShieldRequest(identifier=request.identifier))
         await self.unregister_object(existing_shield)

llama_stack/core/routing_tables/vector_stores.py CHANGED Viewed

@@ -24,10 +24,13 @@ from llama_stack_api import (
     SearchRankingOptions,
     VectorStoreChunkingStrategy,
     VectorStoreDeleteResponse,
+    VectorStoreFileBatchObject,
     VectorStoreFileContentResponse,
     VectorStoreFileDeleteResponse,
     VectorStoreFileObject,
+    VectorStoreFilesListInBatchResponse,
     VectorStoreFileStatus,
+    VectorStoreListFilesResponse,
     VectorStoreObject,
     VectorStoreSearchResponsePage,
 )
@@ -205,7 +208,7 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
         after: str | None = None,
         before: str | None = None,
         filter: VectorStoreFileStatus | None = None,
-    ) -> list[VectorStoreFileObject]:
+    ) -> VectorStoreListFilesResponse:
         await self.assert_action_allowed("read", "vector_store", vector_store_id)
         provider = await self.get_provider_impl(vector_store_id)
         return await provider.openai_list_files_in_vector_store(
@@ -276,7 +279,7 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
         self,
         vector_store_id: str,
         params: OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
-    ):
+    ) -> VectorStoreFileBatchObject:
         await self.assert_action_allowed("update", "vector_store", vector_store_id)
         provider = await self.get_provider_impl(vector_store_id)
         return await provider.openai_create_vector_store_file_batch(
@@ -288,7 +291,7 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
         self,
         batch_id: str,
         vector_store_id: str,
-    ):
+    ) -> VectorStoreFileBatchObject:
         await self.assert_action_allowed("read", "vector_store", vector_store_id)
         provider = await self.get_provider_impl(vector_store_id)
         return await provider.openai_retrieve_vector_store_file_batch(
@@ -305,7 +308,7 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
         filter: str | None = None,
         limit: int | None = 20,
         order: str | None = "desc",
-    ):
+    ) -> VectorStoreFilesListInBatchResponse:
         await self.assert_action_allowed("read", "vector_store", vector_store_id)
         provider = await self.get_provider_impl(vector_store_id)
         return await provider.openai_list_files_in_vector_store_file_batch(
@@ -322,7 +325,7 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
         self,
         batch_id: str,
         vector_store_id: str,
-    ):
+    ) -> VectorStoreFileBatchObject:
         await self.assert_action_allowed("update", "vector_store", vector_store_id)
         provider = await self.get_provider_impl(vector_store_id)
         return await provider.openai_cancel_vector_store_file_batch(

llama_stack/core/server/auth.py CHANGED Viewed

@@ -9,6 +9,8 @@ import json
 import httpx
 from aiohttp import hdrs
+from llama_stack.core.access_control.conditions import parse_conditions
+from llama_stack.core.access_control.datatypes import RouteAccessRule
 from llama_stack.core.datatypes import AuthenticationConfig, User
 from llama_stack.core.request_headers import user_from_scope
 from llama_stack.core.server.auth_providers import create_auth_provider
@@ -152,16 +154,6 @@ class AuthenticationMiddleware:
                 f"Authentication successful: {validation_result.principal} with {len(validation_result.attributes)} attributes"
             )
-            # Scope-based API access control
-            if webmethod and webmethod.required_scope:
-                user = user_from_scope(scope)
-                if not _has_required_scope(webmethod.required_scope, user):
-                    return await self._send_auth_error(
-                        send,
-                        f"Access denied: user does not have required scope: {webmethod.required_scope}",
-                        status=403,
-                    )
         return await self.app(scope, receive, send)
     async def _send_auth_error(self, send, message, status=401):
@@ -177,13 +169,196 @@ class AuthenticationMiddleware:
         await send({"type": "http.response.body", "body": error_msg})
-def _has_required_scope(required_scope: str, user: User | None) -> bool:
-    # if no user, assume auth is not enabled
-    if not user:
-        return True
+class RouteAuthorizationMiddleware:
+    """Middleware that enforces route-level access control.
+    This middleware runs after authentication and checks if the authenticated user
+    has permission to access the requested API route based on route_policy rules.
+    """
+    def __init__(self, app, route_policy: list[RouteAccessRule]):
+        self.app = app
+        self.route_policy = route_policy
+    async def __call__(self, scope, receive, send):
+        # Only process HTTP requests
+        if scope["type"] != "http":
+            return await self.app(scope, receive, send)
+        # If no route policy configured, allow all routes (backward compatible)
+        if not self.route_policy:
+            return await self.app(scope, receive, send)
+        route = scope.get("path", "")
+        # Normalize route: remove trailing slash (except for root "/")
+        if route != "/" and route.endswith("/"):
+            route = route.rstrip("/")
+        # Get authenticated user from scope (set by AuthenticationMiddleware if present)
+        user = user_from_scope(scope)
+        # Check if user has permission to access this route
+        if not self._is_route_allowed(route, user):
+            return await self._send_error(
+                send, f"Access denied: insufficient permissions for route {route}", status=403
+            )
+        return await self.app(scope, receive, send)
+    def _is_route_allowed(self, route: str, user: User | None) -> bool:
+        """Check if the user is allowed to access the given route.
+        Rules are evaluated in order. First matching rule determines access.
+        If no rule matches, access is denied.
+        Args:
+            route: The route being accessed
+            user: The authenticated user, or None if no authentication is configured
+        """
+        user_str = user.principal if user else "anonymous"
+        for index, rule in enumerate(self.route_policy):
+            if self._rule_matches(rule, route, user):
+                # Check if this is a permit or forbid rule
+                if rule.permit:
+                    decision = "APPROVED"
+                    reason = rule.description or ""
+                    logger.debug(
+                        f"ROUTE_AUTHZ,decision={decision},user={user_str},"
+                        f"route={route},rule_index={index},reason={reason!r}"
+                    )
+                    return True
+                else:  # forbid
+                    decision = "DENIED"
+                    reason = rule.description or ""
+                    logger.debug(
+                        f"ROUTE_AUTHZ,decision={decision},user={user_str},"
+                        f"route={route},rule_index={index},reason={reason!r}"
+                    )
+                    return False
+        # No matching rule found - deny by default
+        decision = "DENIED"
+        reason = "no matching rule"
+        logger.debug(f"ROUTE_AUTHZ,decision={decision},user={user_str},route={route},rule_index=-1,reason={reason!r}")
+        return False
+    def _rule_matches(self, rule: RouteAccessRule, route: str, user: User | None) -> bool:
+        """Check if a rule matches the given route and user.
+        Args:
+            rule: The rule to evaluate
+            route: The route being accessed
+            user: The authenticated user, or None if no authentication is configured
+        """
+        # Get the scope (permit or forbid)
+        scope = rule.permit if rule.permit else rule.forbid
+        if not scope:
+            return False
+        # Check if route matches
+        if not self._route_matches(route, scope.paths):
+            return False
+        # Evaluate conditions
+        return self._evaluate_conditions(rule, user)
+    def _route_matches(self, request_route: str, rule_patterns: str | list[str]) -> bool:
+        """Check if request route matches any of the rule patterns.
+        Supports:
+        - Exact match: "/v1/chat/completions"
+        - Prefix wildcard: "/v1/files*" matches "/v1/files", "/v1/files/upload", "/v1/files/list", etc.
+        - Full wildcard: "*" matches all routes
+        """
+        patterns = [rule_patterns] if isinstance(rule_patterns, str) else rule_patterns
+        for pattern in patterns:
+            if pattern == "*":
+                # Full wildcard matches everything
+                return True
+            elif pattern.endswith("*"):
+                # Prefix wildcard: check if request route starts with the prefix
+                prefix = pattern[:-1]  # Remove "*"
+                if request_route.startswith(prefix):
+                    return True
+            elif pattern == request_route:
+                # Exact match
+                return True
-    if not user.attributes:
         return False
-    user_scopes = user.attributes.get("scopes", [])
-    return required_scope in user_scopes
+    def _evaluate_conditions(self, rule: RouteAccessRule, user: User | None) -> bool:
+        """Evaluate when/unless conditions for the rule.
+        Reuses the existing condition parsing from access_control.conditions.
+        Args:
+            rule: The rule whose conditions to evaluate
+            user: The authenticated user, or None if no authentication is configured
+        Returns:
+            True if conditions are met (or no conditions exist), False otherwise
+        """
+        # If rule has conditions but no user is available, conditions cannot be met
+        if (rule.when or rule.unless) and not user:
+            return False
+        if rule.when:
+            # At this point, if rule.when exists and we got past the check above,
+            # user is guaranteed to be non-None
+            assert user is not None
+            conditions_list = rule.when if isinstance(rule.when, list) else [rule.when]
+            conditions = parse_conditions(conditions_list)
+            # For 'when', all conditions must match (AND logic)
+            # Note: Since we're checking route access, we don't have a resource,
+            # so we create a context object to satisfy the interface
+            route_context = _RouteContext()
+            for condition in conditions:
+                if not condition.matches(route_context, user):
+                    return False
+            return True
+        if rule.unless:
+            # At this point, if rule.unless exists and we got past the check above,
+            # user is guaranteed to be non-None
+            assert user is not None
+            conditions_list = rule.unless if isinstance(rule.unless, list) else [rule.unless]
+            conditions = parse_conditions(conditions_list)
+            # For 'unless', no conditions should match (NOT logic)
+            route_context = _RouteContext()
+            for condition in conditions:
+                if condition.matches(route_context, user):
+                    return False
+            return True
+        # No conditions specified - rule applies regardless of user
+        return True
+    async def _send_error(self, send, message: str, status: int = 403):
+        """Send an error response."""
+        await send(
+            {
+                "type": "http.response.start",
+                "status": status,
+                "headers": [[b"content-type", b"application/json"]],
+            }
+        )
+        error_key = "message" if status == 401 else "detail"
+        error_msg = json.dumps({"error": {error_key: message}}).encode()
+        await send({"type": "http.response.body", "body": error_msg})
+class _RouteContext:
+    """Placeholder resource for route-level condition evaluation.
+    Route rules don't operate on actual resources, so we use this context object
+    to satisfy the condition.matches() interface. Route conditions typically check
+    user attributes (e.g., "user with admin in roles") and don't require resource properties.
+    """
+    def __init__(self):
+        self.type = "route"
+        self.identifier = "route"
+        self.owner = None

llama_stack/core/server/fastapi_router_registry.py CHANGED Viewed

@@ -16,20 +16,55 @@ from typing import Any, cast
 from fastapi import APIRouter
 from fastapi.routing import APIRoute
-from llama_stack_api import admin, batches, benchmarks, datasets, files, inspect_api, providers
+from llama_stack_api import (
+    admin,
+    agents,
+    batches,
+    benchmarks,
+    connectors,
+    conversations,
+    datasetio,
+    datasets,
+    eval,
+    files,
+    inference,
+    inspect_api,
+    models,
+    post_training,
+    prompts,
+    providers,
+    safety,
+    scoring,
+    scoring_functions,
+    shields,
+    vector_io,
+)
+from llama_stack_api.datatypes import Api
 # Router factories for APIs that have FastAPI routers
 # Add new APIs here as they are migrated to the router system
-from llama_stack_api.datatypes import Api
 _ROUTER_FACTORIES: dict[str, Callable[[Any], APIRouter]] = {
     "admin": admin.fastapi_routes.create_router,
+    "agents": agents.fastapi_routes.create_router,
     "batches": batches.fastapi_routes.create_router,
     "benchmarks": benchmarks.fastapi_routes.create_router,
+    "connectors": connectors.fastapi_routes.create_router,
+    "conversations": conversations.fastapi_routes.create_router,
+    "datasetio": datasetio.fastapi_routes.create_router,
     "datasets": datasets.fastapi_routes.create_router,
-    "providers": providers.fastapi_routes.create_router,
-    "inspect": inspect_api.fastapi_routes.create_router,
+    "eval": eval.fastapi_routes.create_router,
     "files": files.fastapi_routes.create_router,
+    "inference": inference.fastapi_routes.create_router,
+    "inspect": inspect_api.fastapi_routes.create_router,
+    "models": models.fastapi_routes.create_router,
+    "post_training": post_training.fastapi_routes.create_router,
+    "prompts": prompts.fastapi_routes.create_router,
+    "providers": providers.fastapi_routes.create_router,
+    "safety": safety.fastapi_routes.create_router,
+    "scoring": scoring.fastapi_routes.create_router,
+    "scoring_functions": scoring_functions.fastapi_routes.create_router,
+    "shields": shields.fastapi_routes.create_router,
+    "vector_io": vector_io.fastapi_routes.create_router,
 }

llama_stack/core/server/server.py CHANGED Viewed

@@ -48,7 +48,7 @@ from llama_stack.core.server.fastapi_router_registry import build_fastapi_router
 from llama_stack.core.server.routes import get_all_api_routes
 from llama_stack.core.stack import (
     Stack,
-    cast_image_name_to_string,
+    cast_distro_name_to_string,
     replace_env_vars,
 )
 from llama_stack.core.utils.config import redact_sensitive_fields
@@ -57,7 +57,7 @@ from llama_stack.core.utils.context import preserve_contexts_async_generator
 from llama_stack.log import LoggingConfig, get_logger
 from llama_stack_api import Api, ConflictError, PaginatedResponse, ResourceNotFoundError
-from .auth import AuthenticationMiddleware
+from .auth import AuthenticationMiddleware, RouteAuthorizationMiddleware
 from .quota import QuotaMiddleware
 REPO_ROOT = Path(__file__).parent.parent.parent.parent
@@ -88,6 +88,13 @@ async def global_exception_handler(request: Request, exc: Exception):
     traceback.print_exception(type(exc), exc, exc.__traceback__)
     http_exc = translate_exception(exc)
+    # OpenAI-compat Vector Stores endpoints treat many "not found" conditions as 400s.
+    # Our core exceptions model these as ResourceNotFoundError (mapped to 404 by default),
+    # but integration tests (and OpenAI client behavior expectations in this repo)
+    # assert they surface as BadRequestError instead.
+    if isinstance(exc, ResourceNotFoundError) and request.url.path.startswith("/v1/vector_stores"):
+        http_exc = HTTPException(status_code=httpx.codes.BAD_REQUEST, detail=str(exc))
     return JSONResponse(status_code=http_exc.status_code, content={"error": {"detail": http_exc.detail}})
@@ -396,7 +403,7 @@ def create_app() -> StackApp:
         logger = get_logger(name=__name__, category="core::server", config=logger_config)
         config = replace_env_vars(config_contents)
-        config = StackConfig(**cast_image_name_to_string(config))
+        config = StackConfig(**cast_distro_name_to_string(config))
     _log_run_config(run_config=config)
@@ -416,8 +423,19 @@ def create_app() -> StackApp:
     impls = app.stack.impls
     if config.server.auth:
-        logger.info(f"Enabling authentication with provider: {config.server.auth.provider_config.type.value}")
-        app.add_middleware(AuthenticationMiddleware, auth_config=config.server.auth, impls=impls)
+        # Add route authorization middleware if route_policy is configured
+        # This can work independently of authentication
+        # NOTE: Add this FIRST because middleware wraps in reverse order (last added runs first)
+        # We want: Request → Auth → RouteAuth → App
+        if config.server.auth.route_policy:
+            logger.info(f"Enabling route-level authorization with {len(config.server.auth.route_policy)} rules")
+            app.add_middleware(RouteAuthorizationMiddleware, route_policy=config.server.auth.route_policy)
+        # Add authentication middleware only if provider is configured
+        # This runs FIRST in the middleware chain (last added = first to run)
+        if config.server.auth.provider_config:
+            logger.info(f"Enabling authentication with provider: {config.server.auth.provider_config.type.value}")
+            app.add_middleware(AuthenticationMiddleware, auth_config=config.server.auth, impls=impls)
     else:
         if config.server.quota:
             quota = config.server.quota
@@ -474,6 +492,7 @@ def create_app() -> StackApp:
     apis_to_serve.add("providers")
     apis_to_serve.add("prompts")
     apis_to_serve.add("conversations")
+    apis_to_serve.add("connectors")
     for api_str in apis_to_serve:
         api = Api(api_str)

llama_stack/core/stack.py CHANGED Viewed

@@ -16,6 +16,7 @@ import yaml
 from pydantic import BaseModel
 from llama_stack.core.admin import AdminImpl, AdminImplConfig
+from llama_stack.core.connectors.connectors import ConnectorServiceConfig, ConnectorServiceImpl
 from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
 from llama_stack.core.datatypes import Provider, QualifiedModel, SafetyConfig, StackConfig, VectorStoresConfig
 from llama_stack.core.distribution import get_provider_registry
@@ -42,6 +43,7 @@ from llama_stack_api import (
     Api,
     Batches,
     Benchmarks,
+    Connectors,
     Conversations,
     DatasetIO,
     Datasets,
@@ -54,6 +56,9 @@ from llama_stack_api import (
     Prompts,
     Providers,
     RegisterBenchmarkRequest,
+    RegisterModelRequest,
+    RegisterScoringFunctionRequest,
+    RegisterShieldRequest,
     Safety,
     Scoring,
     ScoringFunctions,
@@ -89,6 +94,7 @@ class LlamaStack(
     Files,
     Prompts,
     Conversations,
+    Connectors,
 ):
     pass
@@ -96,15 +102,15 @@ class LlamaStack(
 # Resources to register based on configuration.
 # If a request class is specified, the configuration object will be converted to this class before invoking the registration method.
 RESOURCES = [
-    ("models", Api.models, "register_model", "list_models", None),
-    ("shields", Api.shields, "register_shield", "list_shields", None),
+    ("models", Api.models, "register_model", "list_models", RegisterModelRequest),
+    ("shields", Api.shields, "register_shield", "list_shields", RegisterShieldRequest),
     ("datasets", Api.datasets, "register_dataset", "list_datasets", RegisterDatasetRequest),
     (
         "scoring_fns",
         Api.scoring_functions,
         "register_scoring_function",
         "list_scoring_functions",
-        None,
+        RegisterScoringFunctionRequest,
     ),
     ("benchmarks", Api.benchmarks, "register_benchmark", "list_benchmarks", RegisterBenchmarkRequest),
     ("tool_groups", Api.tool_groups, "register_tool_group", "list_tool_groups", None),
@@ -242,6 +248,34 @@ async def register_resources(run_config: StackConfig, impls: dict[Api, Any]):
             )
+async def register_connectors(run_config: StackConfig, impls: dict[Api, Any]):
+    """Register connectors from config"""
+    if Api.connectors not in impls:
+        return
+    connectors_impl = impls[Api.connectors]
+    # Get connector IDs from config
+    config_connector_ids = {c.connector_id for c in run_config.connectors}
+    # Register/Update config connectors
+    for connector in run_config.connectors:
+        logger.debug(f"Registering connector: {connector.connector_id}")
+        await connectors_impl.register_connector(
+            connector_id=connector.connector_id,
+            connector_type=connector.connector_type,
+            url=connector.url,
+            server_label=connector.server_label,
+        )
+    # Remove connectors not in config (orphan cleanup)
+    existing_connectors = await connectors_impl.list_connectors()
+    for connector in existing_connectors.data:
+        if connector.connector_id not in config_connector_ids:
+            logger.info(f"Removing orphaned connector: {connector.connector_id}")
+            await connectors_impl.unregister_connector(connector.connector_id)
 async def validate_vector_stores_config(vector_stores_config: VectorStoresConfig | None, impls: dict[Api, Any]):
     """Validate vector stores configuration."""
     if vector_stores_config is None:
@@ -276,7 +310,8 @@ async def _validate_embedding_model(embedding_model: QualifiedModel, impls: dict
             f"Embedding model '{model_identifier}' not found. Available embedding models: {list(models_list.keys())}"
         )
-    embedding_dimension = model.metadata.get("embedding_dimension")
+    # if not in metadata, fetch from config default
+    embedding_dimension = model.metadata.get("embedding_dimension", embedding_model.embedding_dimensions)
     if embedding_dimension is None:
         raise ValueError(f"Embedding model '{model_identifier}' is missing 'embedding_dimension' in metadata")
@@ -489,10 +524,10 @@ def _convert_string_to_proper_type(value: str) -> Any:
     return value
-def cast_image_name_to_string(config_dict: dict[str, Any]) -> dict[str, Any]:
-    """Ensure that any value for a key 'image_name' in a config_dict is a string"""
-    if "image_name" in config_dict and config_dict["image_name"] is not None:
-        config_dict["image_name"] = str(config_dict["image_name"])
+def cast_distro_name_to_string(config_dict: dict[str, Any]) -> dict[str, Any]:
+    """Ensure that any value for a key 'distro_name' in a config_dict is a string"""
+    if "distro_name" in config_dict and config_dict["distro_name"] is not None:
+        config_dict["distro_name"] = str(config_dict["distro_name"])
     return config_dict
@@ -532,6 +567,11 @@ def add_internal_implementations(impls: dict[Api, Any], config: StackConfig) ->
     )
     impls[Api.conversations] = conversations_impl
+    connectors_impl = ConnectorServiceImpl(
+        ConnectorServiceConfig(config=config),
+    )
+    impls[Api.connectors] = connectors_impl
 def _initialize_storage(run_config: StackConfig):
     kv_backends: dict[str, StorageBackendConfig] = {}
@@ -574,7 +614,7 @@ class Stack:
         stores = self.run_config.storage.stores
         if not stores.metadata:
             raise ValueError("storage.stores.metadata must be configured with a kv_* backend")
-        dist_registry, _ = await create_dist_registry(stores.metadata, self.run_config.image_name)
+        dist_registry, _ = await create_dist_registry(stores.metadata, self.run_config.distro_name)
         policy = self.run_config.server.auth.access_policy if self.run_config.server.auth else []
         internal_impls = {}
@@ -592,8 +632,11 @@ class Stack:
             await impls[Api.prompts].initialize()
         if Api.conversations in impls:
             await impls[Api.conversations].initialize()
+        if Api.connectors in impls:
+            await impls[Api.connectors].initialize()
         await register_resources(self.run_config, impls)
+        await register_connectors(self.run_config, impls)
         await refresh_registry_once(impls)
         await validate_vector_stores_config(self.run_config.vector_stores, impls)
         await validate_safety_config(self.run_config.safety, impls)
@@ -727,7 +770,7 @@ def run_config_from_adhoc_config_spec(
             )
         ]
     config = StackConfig(
-        image_name="distro-test",
+        distro_name="distro-test",
         apis=list(provider_configs_by_api.keys()),
         providers=provider_configs_by_api,
         storage=StorageConfig(
@@ -740,6 +783,7 @@ def run_config_from_adhoc_config_spec(
                 inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
                 conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"),
                 prompts=KVStoreReference(backend="kv_default", namespace="prompts"),
+                connectors=KVStoreReference(backend="kv_default", namespace="connectors"),
             ),
         ),
     )

llama_stack/core/storage/datatypes.py CHANGED Viewed

@@ -255,6 +255,11 @@ class InferenceStoreReference(SqlStoreReference):
 class ResponsesStoreReference(InferenceStoreReference):
     """Responses store configuration with queue tuning."""
+    table_name: str = Field(
+        default="openai_responses",
+        description="Name of the table to use for storing OpenAI responses",
+    )
 class ServerStoresConfig(BaseModel):
     metadata: KVStoreReference | None = Field(
@@ -286,6 +291,10 @@ class ServerStoresConfig(BaseModel):
         default=KVStoreReference(backend="kv_default", namespace="prompts"),
         description="Prompts store configuration (uses KV backend)",
     )
+    connectors: KVStoreReference | None = Field(
+        default=KVStoreReference(backend="kv_default", namespace="connectors"),
+        description="Connectors store configuration (uses KV backend)",
+    )
 class StorageConfig(BaseModel):

llama-stack 0.4.3__py3-none-any.whl → 0.5.0__py3-none-any.whl

llama-stack 0.4.3py3-none-any.whl → 0.5.0py3-none-any.whl