llama-stack 0.4.3__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/cli/stack/_list_deps.py +11 -7
- llama_stack/cli/stack/run.py +3 -25
- llama_stack/core/access_control/datatypes.py +78 -0
- llama_stack/core/configure.py +2 -2
- {llama_stack_api/internal → llama_stack/core/connectors}/__init__.py +2 -2
- llama_stack/core/connectors/connectors.py +162 -0
- llama_stack/core/conversations/conversations.py +61 -58
- llama_stack/core/datatypes.py +54 -8
- llama_stack/core/library_client.py +60 -13
- llama_stack/core/prompts/prompts.py +43 -42
- llama_stack/core/routers/datasets.py +20 -17
- llama_stack/core/routers/eval_scoring.py +143 -53
- llama_stack/core/routers/inference.py +20 -9
- llama_stack/core/routers/safety.py +30 -42
- llama_stack/core/routers/vector_io.py +15 -7
- llama_stack/core/routing_tables/models.py +42 -3
- llama_stack/core/routing_tables/scoring_functions.py +19 -19
- llama_stack/core/routing_tables/shields.py +20 -17
- llama_stack/core/routing_tables/vector_stores.py +8 -5
- llama_stack/core/server/auth.py +192 -17
- llama_stack/core/server/fastapi_router_registry.py +40 -5
- llama_stack/core/server/server.py +24 -5
- llama_stack/core/stack.py +54 -10
- llama_stack/core/storage/datatypes.py +9 -0
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/exec.py +2 -2
- llama_stack/core/utils/type_inspection.py +16 -2
- llama_stack/distributions/dell/config.yaml +4 -1
- llama_stack/distributions/dell/doc_template.md +209 -0
- llama_stack/distributions/dell/run-with-safety.yaml +4 -1
- llama_stack/distributions/nvidia/config.yaml +4 -1
- llama_stack/distributions/nvidia/doc_template.md +170 -0
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -1
- llama_stack/distributions/oci/config.yaml +4 -1
- llama_stack/distributions/oci/doc_template.md +140 -0
- llama_stack/distributions/open-benchmark/config.yaml +9 -1
- llama_stack/distributions/postgres-demo/config.yaml +1 -1
- llama_stack/distributions/starter/build.yaml +62 -0
- llama_stack/distributions/starter/config.yaml +22 -3
- llama_stack/distributions/starter/run-with-postgres-store.yaml +22 -3
- llama_stack/distributions/starter/starter.py +13 -1
- llama_stack/distributions/starter-gpu/build.yaml +62 -0
- llama_stack/distributions/starter-gpu/config.yaml +22 -3
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +22 -3
- llama_stack/distributions/template.py +10 -2
- llama_stack/distributions/watsonx/config.yaml +4 -1
- llama_stack/log.py +1 -0
- llama_stack/models/llama/resources/dog.jpg +0 -0
- llama_stack/models/llama/resources/pasta.jpeg +0 -0
- llama_stack/models/llama/resources/small_dog.jpg +0 -0
- llama_stack/providers/inline/agents/meta_reference/__init__.py +1 -0
- llama_stack/providers/inline/agents/meta_reference/agents.py +58 -61
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +187 -60
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +99 -22
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +2 -1
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +4 -1
- llama_stack/providers/inline/agents/meta_reference/safety.py +2 -2
- llama_stack/providers/inline/batches/reference/batches.py +2 -1
- llama_stack/providers/inline/eval/meta_reference/eval.py +40 -32
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h +9 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift +189 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift +238 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift +12 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift +89 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj +550 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +8 -0
- llama_stack/providers/inline/post_training/huggingface/post_training.py +33 -38
- llama_stack/providers/inline/post_training/huggingface/utils.py +2 -5
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +5 -9
- llama_stack/providers/inline/post_training/torchtune/post_training.py +28 -33
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +2 -4
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +12 -15
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +20 -24
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +11 -17
- llama_stack/providers/inline/scoring/basic/scoring.py +13 -17
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +15 -15
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +13 -17
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +1 -1
- llama_stack/providers/registry/agents.py +1 -0
- llama_stack/providers/registry/inference.py +1 -9
- llama_stack/providers/registry/vector_io.py +136 -16
- llama_stack/providers/remote/datasetio/nvidia/README.md +74 -0
- llama_stack/providers/remote/eval/nvidia/README.md +134 -0
- llama_stack/providers/remote/eval/nvidia/eval.py +22 -21
- llama_stack/providers/remote/files/s3/README.md +266 -0
- llama_stack/providers/remote/files/s3/config.py +5 -3
- llama_stack/providers/remote/files/s3/files.py +2 -2
- llama_stack/providers/remote/inference/gemini/gemini.py +4 -0
- llama_stack/providers/remote/inference/nvidia/NVIDIA.md +203 -0
- llama_stack/providers/remote/inference/openai/openai.py +2 -0
- llama_stack/providers/remote/inference/together/together.py +4 -0
- llama_stack/providers/remote/inference/vertexai/config.py +3 -3
- llama_stack/providers/remote/inference/vertexai/vertexai.py +5 -2
- llama_stack/providers/remote/inference/vllm/config.py +37 -18
- llama_stack/providers/remote/inference/vllm/vllm.py +0 -3
- llama_stack/providers/remote/inference/watsonx/watsonx.py +4 -0
- llama_stack/providers/remote/post_training/nvidia/README.md +151 -0
- llama_stack/providers/remote/post_training/nvidia/models.py +3 -11
- llama_stack/providers/remote/post_training/nvidia/post_training.py +31 -33
- llama_stack/providers/remote/safety/bedrock/bedrock.py +10 -27
- llama_stack/providers/remote/safety/nvidia/README.md +78 -0
- llama_stack/providers/remote/safety/nvidia/nvidia.py +9 -25
- llama_stack/providers/remote/safety/sambanova/sambanova.py +13 -11
- llama_stack/providers/remote/vector_io/elasticsearch/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/elasticsearch/config.py +32 -0
- llama_stack/providers/remote/vector_io/elasticsearch/elasticsearch.py +463 -0
- llama_stack/providers/remote/vector_io/oci/__init__.py +22 -0
- llama_stack/providers/remote/vector_io/oci/config.py +41 -0
- llama_stack/providers/remote/vector_io/oci/oci26ai.py +595 -0
- llama_stack/providers/remote/vector_io/pgvector/config.py +69 -2
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +255 -6
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +62 -38
- llama_stack/providers/utils/bedrock/client.py +3 -3
- llama_stack/providers/utils/bedrock/config.py +7 -7
- llama_stack/providers/utils/inference/__init__.py +0 -25
- llama_stack/providers/utils/inference/embedding_mixin.py +4 -0
- llama_stack/providers/utils/inference/http_client.py +239 -0
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +6 -0
- llama_stack/providers/utils/inference/model_registry.py +148 -2
- llama_stack/providers/utils/inference/openai_compat.py +1 -158
- llama_stack/providers/utils/inference/openai_mixin.py +42 -2
- llama_stack/providers/utils/inference/prompt_adapter.py +0 -209
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +92 -5
- llama_stack/providers/utils/memory/vector_store.py +46 -19
- llama_stack/providers/utils/responses/responses_store.py +40 -6
- llama_stack/providers/utils/safety.py +114 -0
- llama_stack/providers/utils/tools/mcp.py +44 -3
- llama_stack/testing/api_recorder.py +9 -3
- {llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/METADATA +14 -2
- {llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/RECORD +135 -279
- llama_stack-0.5.0.dist-info/top_level.txt +1 -0
- llama_stack/distributions/meta-reference-gpu/__init__.py +0 -7
- llama_stack/distributions/meta-reference-gpu/config.yaml +0 -140
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +0 -163
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +0 -155
- llama_stack/models/llama/hadamard_utils.py +0 -88
- llama_stack/models/llama/llama3/args.py +0 -74
- llama_stack/models/llama/llama3/generation.py +0 -378
- llama_stack/models/llama/llama3/model.py +0 -304
- llama_stack/models/llama/llama3/multimodal/__init__.py +0 -12
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +0 -180
- llama_stack/models/llama/llama3/multimodal/image_transform.py +0 -409
- llama_stack/models/llama/llama3/multimodal/model.py +0 -1430
- llama_stack/models/llama/llama3/multimodal/utils.py +0 -26
- llama_stack/models/llama/llama3/quantization/__init__.py +0 -5
- llama_stack/models/llama/llama3/quantization/loader.py +0 -316
- llama_stack/models/llama/llama3_1/__init__.py +0 -12
- llama_stack/models/llama/llama3_1/prompt_format.md +0 -358
- llama_stack/models/llama/llama3_1/prompts.py +0 -258
- llama_stack/models/llama/llama3_2/__init__.py +0 -5
- llama_stack/models/llama/llama3_2/prompts_text.py +0 -229
- llama_stack/models/llama/llama3_2/prompts_vision.py +0 -126
- llama_stack/models/llama/llama3_2/text_prompt_format.md +0 -286
- llama_stack/models/llama/llama3_2/vision_prompt_format.md +0 -141
- llama_stack/models/llama/llama3_3/__init__.py +0 -5
- llama_stack/models/llama/llama3_3/prompts.py +0 -259
- llama_stack/models/llama/llama4/args.py +0 -107
- llama_stack/models/llama/llama4/ffn.py +0 -58
- llama_stack/models/llama/llama4/moe.py +0 -214
- llama_stack/models/llama/llama4/preprocess.py +0 -435
- llama_stack/models/llama/llama4/quantization/__init__.py +0 -5
- llama_stack/models/llama/llama4/quantization/loader.py +0 -226
- llama_stack/models/llama/llama4/vision/__init__.py +0 -5
- llama_stack/models/llama/llama4/vision/embedding.py +0 -210
- llama_stack/models/llama/llama4/vision/encoder.py +0 -412
- llama_stack/models/llama/quantize_impls.py +0 -316
- llama_stack/providers/inline/inference/meta_reference/__init__.py +0 -20
- llama_stack/providers/inline/inference/meta_reference/common.py +0 -24
- llama_stack/providers/inline/inference/meta_reference/config.py +0 -68
- llama_stack/providers/inline/inference/meta_reference/generators.py +0 -201
- llama_stack/providers/inline/inference/meta_reference/inference.py +0 -542
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +0 -77
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +0 -353
- llama_stack-0.4.3.dist-info/top_level.txt +0 -2
- llama_stack_api/__init__.py +0 -945
- llama_stack_api/admin/__init__.py +0 -45
- llama_stack_api/admin/api.py +0 -72
- llama_stack_api/admin/fastapi_routes.py +0 -117
- llama_stack_api/admin/models.py +0 -113
- llama_stack_api/agents.py +0 -173
- llama_stack_api/batches/__init__.py +0 -40
- llama_stack_api/batches/api.py +0 -53
- llama_stack_api/batches/fastapi_routes.py +0 -113
- llama_stack_api/batches/models.py +0 -78
- llama_stack_api/benchmarks/__init__.py +0 -43
- llama_stack_api/benchmarks/api.py +0 -39
- llama_stack_api/benchmarks/fastapi_routes.py +0 -109
- llama_stack_api/benchmarks/models.py +0 -109
- llama_stack_api/common/__init__.py +0 -5
- llama_stack_api/common/content_types.py +0 -101
- llama_stack_api/common/errors.py +0 -95
- llama_stack_api/common/job_types.py +0 -38
- llama_stack_api/common/responses.py +0 -77
- llama_stack_api/common/training_types.py +0 -47
- llama_stack_api/common/type_system.py +0 -146
- llama_stack_api/connectors.py +0 -146
- llama_stack_api/conversations.py +0 -270
- llama_stack_api/datasetio.py +0 -55
- llama_stack_api/datasets/__init__.py +0 -61
- llama_stack_api/datasets/api.py +0 -35
- llama_stack_api/datasets/fastapi_routes.py +0 -104
- llama_stack_api/datasets/models.py +0 -152
- llama_stack_api/datatypes.py +0 -373
- llama_stack_api/eval.py +0 -137
- llama_stack_api/file_processors/__init__.py +0 -27
- llama_stack_api/file_processors/api.py +0 -64
- llama_stack_api/file_processors/fastapi_routes.py +0 -78
- llama_stack_api/file_processors/models.py +0 -42
- llama_stack_api/files/__init__.py +0 -35
- llama_stack_api/files/api.py +0 -51
- llama_stack_api/files/fastapi_routes.py +0 -124
- llama_stack_api/files/models.py +0 -107
- llama_stack_api/inference.py +0 -1169
- llama_stack_api/inspect_api/__init__.py +0 -37
- llama_stack_api/inspect_api/api.py +0 -25
- llama_stack_api/inspect_api/fastapi_routes.py +0 -76
- llama_stack_api/inspect_api/models.py +0 -28
- llama_stack_api/internal/kvstore.py +0 -28
- llama_stack_api/internal/sqlstore.py +0 -81
- llama_stack_api/llama_stack_api/__init__.py +0 -945
- llama_stack_api/llama_stack_api/admin/__init__.py +0 -45
- llama_stack_api/llama_stack_api/admin/api.py +0 -72
- llama_stack_api/llama_stack_api/admin/fastapi_routes.py +0 -117
- llama_stack_api/llama_stack_api/admin/models.py +0 -113
- llama_stack_api/llama_stack_api/agents.py +0 -173
- llama_stack_api/llama_stack_api/batches/__init__.py +0 -40
- llama_stack_api/llama_stack_api/batches/api.py +0 -53
- llama_stack_api/llama_stack_api/batches/fastapi_routes.py +0 -113
- llama_stack_api/llama_stack_api/batches/models.py +0 -78
- llama_stack_api/llama_stack_api/benchmarks/__init__.py +0 -43
- llama_stack_api/llama_stack_api/benchmarks/api.py +0 -39
- llama_stack_api/llama_stack_api/benchmarks/fastapi_routes.py +0 -109
- llama_stack_api/llama_stack_api/benchmarks/models.py +0 -109
- llama_stack_api/llama_stack_api/common/__init__.py +0 -5
- llama_stack_api/llama_stack_api/common/content_types.py +0 -101
- llama_stack_api/llama_stack_api/common/errors.py +0 -95
- llama_stack_api/llama_stack_api/common/job_types.py +0 -38
- llama_stack_api/llama_stack_api/common/responses.py +0 -77
- llama_stack_api/llama_stack_api/common/training_types.py +0 -47
- llama_stack_api/llama_stack_api/common/type_system.py +0 -146
- llama_stack_api/llama_stack_api/connectors.py +0 -146
- llama_stack_api/llama_stack_api/conversations.py +0 -270
- llama_stack_api/llama_stack_api/datasetio.py +0 -55
- llama_stack_api/llama_stack_api/datasets/__init__.py +0 -61
- llama_stack_api/llama_stack_api/datasets/api.py +0 -35
- llama_stack_api/llama_stack_api/datasets/fastapi_routes.py +0 -104
- llama_stack_api/llama_stack_api/datasets/models.py +0 -152
- llama_stack_api/llama_stack_api/datatypes.py +0 -373
- llama_stack_api/llama_stack_api/eval.py +0 -137
- llama_stack_api/llama_stack_api/file_processors/__init__.py +0 -27
- llama_stack_api/llama_stack_api/file_processors/api.py +0 -64
- llama_stack_api/llama_stack_api/file_processors/fastapi_routes.py +0 -78
- llama_stack_api/llama_stack_api/file_processors/models.py +0 -42
- llama_stack_api/llama_stack_api/files/__init__.py +0 -35
- llama_stack_api/llama_stack_api/files/api.py +0 -51
- llama_stack_api/llama_stack_api/files/fastapi_routes.py +0 -124
- llama_stack_api/llama_stack_api/files/models.py +0 -107
- llama_stack_api/llama_stack_api/inference.py +0 -1169
- llama_stack_api/llama_stack_api/inspect_api/__init__.py +0 -37
- llama_stack_api/llama_stack_api/inspect_api/api.py +0 -25
- llama_stack_api/llama_stack_api/inspect_api/fastapi_routes.py +0 -76
- llama_stack_api/llama_stack_api/inspect_api/models.py +0 -28
- llama_stack_api/llama_stack_api/internal/__init__.py +0 -9
- llama_stack_api/llama_stack_api/internal/kvstore.py +0 -28
- llama_stack_api/llama_stack_api/internal/sqlstore.py +0 -81
- llama_stack_api/llama_stack_api/models.py +0 -171
- llama_stack_api/llama_stack_api/openai_responses.py +0 -1468
- llama_stack_api/llama_stack_api/post_training.py +0 -370
- llama_stack_api/llama_stack_api/prompts.py +0 -203
- llama_stack_api/llama_stack_api/providers/__init__.py +0 -33
- llama_stack_api/llama_stack_api/providers/api.py +0 -16
- llama_stack_api/llama_stack_api/providers/fastapi_routes.py +0 -57
- llama_stack_api/llama_stack_api/providers/models.py +0 -24
- llama_stack_api/llama_stack_api/py.typed +0 -0
- llama_stack_api/llama_stack_api/rag_tool.py +0 -168
- llama_stack_api/llama_stack_api/resource.py +0 -37
- llama_stack_api/llama_stack_api/router_utils.py +0 -160
- llama_stack_api/llama_stack_api/safety.py +0 -132
- llama_stack_api/llama_stack_api/schema_utils.py +0 -208
- llama_stack_api/llama_stack_api/scoring.py +0 -93
- llama_stack_api/llama_stack_api/scoring_functions.py +0 -211
- llama_stack_api/llama_stack_api/shields.py +0 -93
- llama_stack_api/llama_stack_api/tools.py +0 -226
- llama_stack_api/llama_stack_api/vector_io.py +0 -941
- llama_stack_api/llama_stack_api/vector_stores.py +0 -53
- llama_stack_api/llama_stack_api/version.py +0 -9
- llama_stack_api/models.py +0 -171
- llama_stack_api/openai_responses.py +0 -1468
- llama_stack_api/post_training.py +0 -370
- llama_stack_api/prompts.py +0 -203
- llama_stack_api/providers/__init__.py +0 -33
- llama_stack_api/providers/api.py +0 -16
- llama_stack_api/providers/fastapi_routes.py +0 -57
- llama_stack_api/providers/models.py +0 -24
- llama_stack_api/py.typed +0 -0
- llama_stack_api/rag_tool.py +0 -168
- llama_stack_api/resource.py +0 -37
- llama_stack_api/router_utils.py +0 -160
- llama_stack_api/safety.py +0 -132
- llama_stack_api/schema_utils.py +0 -208
- llama_stack_api/scoring.py +0 -93
- llama_stack_api/scoring_functions.py +0 -211
- llama_stack_api/shields.py +0 -93
- llama_stack_api/tools.py +0 -226
- llama_stack_api/vector_io.py +0 -941
- llama_stack_api/vector_stores.py +0 -53
- llama_stack_api/version.py +0 -9
- {llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/WHEEL +0 -0
- {llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -4,13 +4,19 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
from typing import Any
|
|
8
|
-
|
|
9
7
|
from llama_stack.core.datatypes import (
|
|
10
8
|
ShieldWithOwner,
|
|
11
9
|
)
|
|
12
10
|
from llama_stack.log import get_logger
|
|
13
|
-
from llama_stack_api import
|
|
11
|
+
from llama_stack_api import (
|
|
12
|
+
GetShieldRequest,
|
|
13
|
+
ListShieldsResponse,
|
|
14
|
+
RegisterShieldRequest,
|
|
15
|
+
ResourceType,
|
|
16
|
+
Shield,
|
|
17
|
+
Shields,
|
|
18
|
+
UnregisterShieldRequest,
|
|
19
|
+
)
|
|
14
20
|
|
|
15
21
|
from .common import CommonRoutingTableImpl
|
|
16
22
|
|
|
@@ -21,21 +27,17 @@ class ShieldsRoutingTable(CommonRoutingTableImpl, Shields):
|
|
|
21
27
|
async def list_shields(self) -> ListShieldsResponse:
|
|
22
28
|
return ListShieldsResponse(data=await self.get_all_with_type(ResourceType.shield.value))
|
|
23
29
|
|
|
24
|
-
async def get_shield(self,
|
|
25
|
-
shield = await self.get_object_by_identifier("shield", identifier)
|
|
30
|
+
async def get_shield(self, request: GetShieldRequest) -> Shield:
|
|
31
|
+
shield = await self.get_object_by_identifier("shield", request.identifier)
|
|
26
32
|
if shield is None:
|
|
27
|
-
raise ValueError(f"Shield '{identifier}' not found")
|
|
33
|
+
raise ValueError(f"Shield '{request.identifier}' not found")
|
|
28
34
|
return shield
|
|
29
35
|
|
|
30
|
-
async def register_shield(
|
|
31
|
-
|
|
32
|
-
shield_id: str,
|
|
33
|
-
provider_shield_id: str | None = None,
|
|
34
|
-
provider_id: str | None = None,
|
|
35
|
-
params: dict[str, Any] | None = None,
|
|
36
|
-
) -> Shield:
|
|
36
|
+
async def register_shield(self, request: RegisterShieldRequest) -> Shield:
|
|
37
|
+
provider_shield_id = request.provider_shield_id
|
|
37
38
|
if provider_shield_id is None:
|
|
38
|
-
provider_shield_id = shield_id
|
|
39
|
+
provider_shield_id = request.shield_id
|
|
40
|
+
provider_id = request.provider_id
|
|
39
41
|
if provider_id is None:
|
|
40
42
|
# If provider_id not specified, use the only provider if it supports this shield type
|
|
41
43
|
if len(self.impls_by_provider_id) == 1:
|
|
@@ -44,10 +46,11 @@ class ShieldsRoutingTable(CommonRoutingTableImpl, Shields):
|
|
|
44
46
|
raise ValueError(
|
|
45
47
|
"No provider specified and multiple providers available. Please specify a provider_id."
|
|
46
48
|
)
|
|
49
|
+
params = request.params
|
|
47
50
|
if params is None:
|
|
48
51
|
params = {}
|
|
49
52
|
shield = ShieldWithOwner(
|
|
50
|
-
identifier=shield_id,
|
|
53
|
+
identifier=request.shield_id,
|
|
51
54
|
provider_resource_id=provider_shield_id,
|
|
52
55
|
provider_id=provider_id,
|
|
53
56
|
params=params,
|
|
@@ -55,6 +58,6 @@ class ShieldsRoutingTable(CommonRoutingTableImpl, Shields):
|
|
|
55
58
|
await self.register_object(shield)
|
|
56
59
|
return shield
|
|
57
60
|
|
|
58
|
-
async def unregister_shield(self,
|
|
59
|
-
existing_shield = await self.get_shield(identifier)
|
|
61
|
+
async def unregister_shield(self, request: UnregisterShieldRequest) -> None:
|
|
62
|
+
existing_shield = await self.get_shield(GetShieldRequest(identifier=request.identifier))
|
|
60
63
|
await self.unregister_object(existing_shield)
|
|
@@ -24,10 +24,13 @@ from llama_stack_api import (
|
|
|
24
24
|
SearchRankingOptions,
|
|
25
25
|
VectorStoreChunkingStrategy,
|
|
26
26
|
VectorStoreDeleteResponse,
|
|
27
|
+
VectorStoreFileBatchObject,
|
|
27
28
|
VectorStoreFileContentResponse,
|
|
28
29
|
VectorStoreFileDeleteResponse,
|
|
29
30
|
VectorStoreFileObject,
|
|
31
|
+
VectorStoreFilesListInBatchResponse,
|
|
30
32
|
VectorStoreFileStatus,
|
|
33
|
+
VectorStoreListFilesResponse,
|
|
31
34
|
VectorStoreObject,
|
|
32
35
|
VectorStoreSearchResponsePage,
|
|
33
36
|
)
|
|
@@ -205,7 +208,7 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
|
|
|
205
208
|
after: str | None = None,
|
|
206
209
|
before: str | None = None,
|
|
207
210
|
filter: VectorStoreFileStatus | None = None,
|
|
208
|
-
) ->
|
|
211
|
+
) -> VectorStoreListFilesResponse:
|
|
209
212
|
await self.assert_action_allowed("read", "vector_store", vector_store_id)
|
|
210
213
|
provider = await self.get_provider_impl(vector_store_id)
|
|
211
214
|
return await provider.openai_list_files_in_vector_store(
|
|
@@ -276,7 +279,7 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
|
|
|
276
279
|
self,
|
|
277
280
|
vector_store_id: str,
|
|
278
281
|
params: OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
|
|
279
|
-
):
|
|
282
|
+
) -> VectorStoreFileBatchObject:
|
|
280
283
|
await self.assert_action_allowed("update", "vector_store", vector_store_id)
|
|
281
284
|
provider = await self.get_provider_impl(vector_store_id)
|
|
282
285
|
return await provider.openai_create_vector_store_file_batch(
|
|
@@ -288,7 +291,7 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
|
|
|
288
291
|
self,
|
|
289
292
|
batch_id: str,
|
|
290
293
|
vector_store_id: str,
|
|
291
|
-
):
|
|
294
|
+
) -> VectorStoreFileBatchObject:
|
|
292
295
|
await self.assert_action_allowed("read", "vector_store", vector_store_id)
|
|
293
296
|
provider = await self.get_provider_impl(vector_store_id)
|
|
294
297
|
return await provider.openai_retrieve_vector_store_file_batch(
|
|
@@ -305,7 +308,7 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
|
|
|
305
308
|
filter: str | None = None,
|
|
306
309
|
limit: int | None = 20,
|
|
307
310
|
order: str | None = "desc",
|
|
308
|
-
):
|
|
311
|
+
) -> VectorStoreFilesListInBatchResponse:
|
|
309
312
|
await self.assert_action_allowed("read", "vector_store", vector_store_id)
|
|
310
313
|
provider = await self.get_provider_impl(vector_store_id)
|
|
311
314
|
return await provider.openai_list_files_in_vector_store_file_batch(
|
|
@@ -322,7 +325,7 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
|
|
|
322
325
|
self,
|
|
323
326
|
batch_id: str,
|
|
324
327
|
vector_store_id: str,
|
|
325
|
-
):
|
|
328
|
+
) -> VectorStoreFileBatchObject:
|
|
326
329
|
await self.assert_action_allowed("update", "vector_store", vector_store_id)
|
|
327
330
|
provider = await self.get_provider_impl(vector_store_id)
|
|
328
331
|
return await provider.openai_cancel_vector_store_file_batch(
|
llama_stack/core/server/auth.py
CHANGED
|
@@ -9,6 +9,8 @@ import json
|
|
|
9
9
|
import httpx
|
|
10
10
|
from aiohttp import hdrs
|
|
11
11
|
|
|
12
|
+
from llama_stack.core.access_control.conditions import parse_conditions
|
|
13
|
+
from llama_stack.core.access_control.datatypes import RouteAccessRule
|
|
12
14
|
from llama_stack.core.datatypes import AuthenticationConfig, User
|
|
13
15
|
from llama_stack.core.request_headers import user_from_scope
|
|
14
16
|
from llama_stack.core.server.auth_providers import create_auth_provider
|
|
@@ -152,16 +154,6 @@ class AuthenticationMiddleware:
|
|
|
152
154
|
f"Authentication successful: {validation_result.principal} with {len(validation_result.attributes)} attributes"
|
|
153
155
|
)
|
|
154
156
|
|
|
155
|
-
# Scope-based API access control
|
|
156
|
-
if webmethod and webmethod.required_scope:
|
|
157
|
-
user = user_from_scope(scope)
|
|
158
|
-
if not _has_required_scope(webmethod.required_scope, user):
|
|
159
|
-
return await self._send_auth_error(
|
|
160
|
-
send,
|
|
161
|
-
f"Access denied: user does not have required scope: {webmethod.required_scope}",
|
|
162
|
-
status=403,
|
|
163
|
-
)
|
|
164
|
-
|
|
165
157
|
return await self.app(scope, receive, send)
|
|
166
158
|
|
|
167
159
|
async def _send_auth_error(self, send, message, status=401):
|
|
@@ -177,13 +169,196 @@ class AuthenticationMiddleware:
|
|
|
177
169
|
await send({"type": "http.response.body", "body": error_msg})
|
|
178
170
|
|
|
179
171
|
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
172
|
+
class RouteAuthorizationMiddleware:
|
|
173
|
+
"""Middleware that enforces route-level access control.
|
|
174
|
+
|
|
175
|
+
This middleware runs after authentication and checks if the authenticated user
|
|
176
|
+
has permission to access the requested API route based on route_policy rules.
|
|
177
|
+
|
|
178
|
+
"""
|
|
179
|
+
|
|
180
|
+
def __init__(self, app, route_policy: list[RouteAccessRule]):
|
|
181
|
+
self.app = app
|
|
182
|
+
self.route_policy = route_policy
|
|
183
|
+
|
|
184
|
+
async def __call__(self, scope, receive, send):
|
|
185
|
+
# Only process HTTP requests
|
|
186
|
+
if scope["type"] != "http":
|
|
187
|
+
return await self.app(scope, receive, send)
|
|
188
|
+
|
|
189
|
+
# If no route policy configured, allow all routes (backward compatible)
|
|
190
|
+
if not self.route_policy:
|
|
191
|
+
return await self.app(scope, receive, send)
|
|
192
|
+
|
|
193
|
+
route = scope.get("path", "")
|
|
194
|
+
# Normalize route: remove trailing slash (except for root "/")
|
|
195
|
+
if route != "/" and route.endswith("/"):
|
|
196
|
+
route = route.rstrip("/")
|
|
197
|
+
|
|
198
|
+
# Get authenticated user from scope (set by AuthenticationMiddleware if present)
|
|
199
|
+
user = user_from_scope(scope)
|
|
200
|
+
|
|
201
|
+
# Check if user has permission to access this route
|
|
202
|
+
if not self._is_route_allowed(route, user):
|
|
203
|
+
return await self._send_error(
|
|
204
|
+
send, f"Access denied: insufficient permissions for route {route}", status=403
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
return await self.app(scope, receive, send)
|
|
208
|
+
|
|
209
|
+
def _is_route_allowed(self, route: str, user: User | None) -> bool:
|
|
210
|
+
"""Check if the user is allowed to access the given route.
|
|
211
|
+
|
|
212
|
+
Rules are evaluated in order. First matching rule determines access.
|
|
213
|
+
If no rule matches, access is denied.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
route: The route being accessed
|
|
217
|
+
user: The authenticated user, or None if no authentication is configured
|
|
218
|
+
"""
|
|
219
|
+
user_str = user.principal if user else "anonymous"
|
|
220
|
+
|
|
221
|
+
for index, rule in enumerate(self.route_policy):
|
|
222
|
+
if self._rule_matches(rule, route, user):
|
|
223
|
+
# Check if this is a permit or forbid rule
|
|
224
|
+
if rule.permit:
|
|
225
|
+
decision = "APPROVED"
|
|
226
|
+
reason = rule.description or ""
|
|
227
|
+
logger.debug(
|
|
228
|
+
f"ROUTE_AUTHZ,decision={decision},user={user_str},"
|
|
229
|
+
f"route={route},rule_index={index},reason={reason!r}"
|
|
230
|
+
)
|
|
231
|
+
return True
|
|
232
|
+
else: # forbid
|
|
233
|
+
decision = "DENIED"
|
|
234
|
+
reason = rule.description or ""
|
|
235
|
+
logger.debug(
|
|
236
|
+
f"ROUTE_AUTHZ,decision={decision},user={user_str},"
|
|
237
|
+
f"route={route},rule_index={index},reason={reason!r}"
|
|
238
|
+
)
|
|
239
|
+
return False
|
|
240
|
+
|
|
241
|
+
# No matching rule found - deny by default
|
|
242
|
+
decision = "DENIED"
|
|
243
|
+
reason = "no matching rule"
|
|
244
|
+
logger.debug(f"ROUTE_AUTHZ,decision={decision},user={user_str},route={route},rule_index=-1,reason={reason!r}")
|
|
245
|
+
return False
|
|
246
|
+
|
|
247
|
+
def _rule_matches(self, rule: RouteAccessRule, route: str, user: User | None) -> bool:
|
|
248
|
+
"""Check if a rule matches the given route and user.
|
|
249
|
+
|
|
250
|
+
Args:
|
|
251
|
+
rule: The rule to evaluate
|
|
252
|
+
route: The route being accessed
|
|
253
|
+
user: The authenticated user, or None if no authentication is configured
|
|
254
|
+
"""
|
|
255
|
+
# Get the scope (permit or forbid)
|
|
256
|
+
scope = rule.permit if rule.permit else rule.forbid
|
|
257
|
+
if not scope:
|
|
258
|
+
return False
|
|
259
|
+
|
|
260
|
+
# Check if route matches
|
|
261
|
+
if not self._route_matches(route, scope.paths):
|
|
262
|
+
return False
|
|
263
|
+
|
|
264
|
+
# Evaluate conditions
|
|
265
|
+
return self._evaluate_conditions(rule, user)
|
|
266
|
+
|
|
267
|
+
def _route_matches(self, request_route: str, rule_patterns: str | list[str]) -> bool:
|
|
268
|
+
"""Check if request route matches any of the rule patterns.
|
|
269
|
+
|
|
270
|
+
Supports:
|
|
271
|
+
- Exact match: "/v1/chat/completions"
|
|
272
|
+
- Prefix wildcard: "/v1/files*" matches "/v1/files", "/v1/files/upload", "/v1/files/list", etc.
|
|
273
|
+
- Full wildcard: "*" matches all routes
|
|
274
|
+
"""
|
|
275
|
+
patterns = [rule_patterns] if isinstance(rule_patterns, str) else rule_patterns
|
|
276
|
+
|
|
277
|
+
for pattern in patterns:
|
|
278
|
+
if pattern == "*":
|
|
279
|
+
# Full wildcard matches everything
|
|
280
|
+
return True
|
|
281
|
+
elif pattern.endswith("*"):
|
|
282
|
+
# Prefix wildcard: check if request route starts with the prefix
|
|
283
|
+
prefix = pattern[:-1] # Remove "*"
|
|
284
|
+
if request_route.startswith(prefix):
|
|
285
|
+
return True
|
|
286
|
+
elif pattern == request_route:
|
|
287
|
+
# Exact match
|
|
288
|
+
return True
|
|
184
289
|
|
|
185
|
-
if not user.attributes:
|
|
186
290
|
return False
|
|
187
291
|
|
|
188
|
-
|
|
189
|
-
|
|
292
|
+
def _evaluate_conditions(self, rule: RouteAccessRule, user: User | None) -> bool:
|
|
293
|
+
"""Evaluate when/unless conditions for the rule.
|
|
294
|
+
|
|
295
|
+
Reuses the existing condition parsing from access_control.conditions.
|
|
296
|
+
|
|
297
|
+
Args:
|
|
298
|
+
rule: The rule whose conditions to evaluate
|
|
299
|
+
user: The authenticated user, or None if no authentication is configured
|
|
300
|
+
|
|
301
|
+
Returns:
|
|
302
|
+
True if conditions are met (or no conditions exist), False otherwise
|
|
303
|
+
"""
|
|
304
|
+
# If rule has conditions but no user is available, conditions cannot be met
|
|
305
|
+
if (rule.when or rule.unless) and not user:
|
|
306
|
+
return False
|
|
307
|
+
|
|
308
|
+
if rule.when:
|
|
309
|
+
# At this point, if rule.when exists and we got past the check above,
|
|
310
|
+
# user is guaranteed to be non-None
|
|
311
|
+
assert user is not None
|
|
312
|
+
conditions_list = rule.when if isinstance(rule.when, list) else [rule.when]
|
|
313
|
+
conditions = parse_conditions(conditions_list)
|
|
314
|
+
# For 'when', all conditions must match (AND logic)
|
|
315
|
+
# Note: Since we're checking route access, we don't have a resource,
|
|
316
|
+
# so we create a context object to satisfy the interface
|
|
317
|
+
route_context = _RouteContext()
|
|
318
|
+
for condition in conditions:
|
|
319
|
+
if not condition.matches(route_context, user):
|
|
320
|
+
return False
|
|
321
|
+
return True
|
|
322
|
+
|
|
323
|
+
if rule.unless:
|
|
324
|
+
# At this point, if rule.unless exists and we got past the check above,
|
|
325
|
+
# user is guaranteed to be non-None
|
|
326
|
+
assert user is not None
|
|
327
|
+
conditions_list = rule.unless if isinstance(rule.unless, list) else [rule.unless]
|
|
328
|
+
conditions = parse_conditions(conditions_list)
|
|
329
|
+
# For 'unless', no conditions should match (NOT logic)
|
|
330
|
+
route_context = _RouteContext()
|
|
331
|
+
for condition in conditions:
|
|
332
|
+
if condition.matches(route_context, user):
|
|
333
|
+
return False
|
|
334
|
+
return True
|
|
335
|
+
|
|
336
|
+
# No conditions specified - rule applies regardless of user
|
|
337
|
+
return True
|
|
338
|
+
|
|
339
|
+
async def _send_error(self, send, message: str, status: int = 403):
|
|
340
|
+
"""Send an error response."""
|
|
341
|
+
await send(
|
|
342
|
+
{
|
|
343
|
+
"type": "http.response.start",
|
|
344
|
+
"status": status,
|
|
345
|
+
"headers": [[b"content-type", b"application/json"]],
|
|
346
|
+
}
|
|
347
|
+
)
|
|
348
|
+
error_key = "message" if status == 401 else "detail"
|
|
349
|
+
error_msg = json.dumps({"error": {error_key: message}}).encode()
|
|
350
|
+
await send({"type": "http.response.body", "body": error_msg})
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
class _RouteContext:
|
|
354
|
+
"""Placeholder resource for route-level condition evaluation.
|
|
355
|
+
|
|
356
|
+
Route rules don't operate on actual resources, so we use this context object
|
|
357
|
+
to satisfy the condition.matches() interface. Route conditions typically check
|
|
358
|
+
user attributes (e.g., "user with admin in roles") and don't require resource properties.
|
|
359
|
+
"""
|
|
360
|
+
|
|
361
|
+
def __init__(self):
|
|
362
|
+
self.type = "route"
|
|
363
|
+
self.identifier = "route"
|
|
364
|
+
self.owner = None
|
|
@@ -16,20 +16,55 @@ from typing import Any, cast
|
|
|
16
16
|
from fastapi import APIRouter
|
|
17
17
|
from fastapi.routing import APIRoute
|
|
18
18
|
|
|
19
|
-
from llama_stack_api import
|
|
19
|
+
from llama_stack_api import (
|
|
20
|
+
admin,
|
|
21
|
+
agents,
|
|
22
|
+
batches,
|
|
23
|
+
benchmarks,
|
|
24
|
+
connectors,
|
|
25
|
+
conversations,
|
|
26
|
+
datasetio,
|
|
27
|
+
datasets,
|
|
28
|
+
eval,
|
|
29
|
+
files,
|
|
30
|
+
inference,
|
|
31
|
+
inspect_api,
|
|
32
|
+
models,
|
|
33
|
+
post_training,
|
|
34
|
+
prompts,
|
|
35
|
+
providers,
|
|
36
|
+
safety,
|
|
37
|
+
scoring,
|
|
38
|
+
scoring_functions,
|
|
39
|
+
shields,
|
|
40
|
+
vector_io,
|
|
41
|
+
)
|
|
42
|
+
from llama_stack_api.datatypes import Api
|
|
20
43
|
|
|
21
44
|
# Router factories for APIs that have FastAPI routers
|
|
22
45
|
# Add new APIs here as they are migrated to the router system
|
|
23
|
-
from llama_stack_api.datatypes import Api
|
|
24
|
-
|
|
25
46
|
_ROUTER_FACTORIES: dict[str, Callable[[Any], APIRouter]] = {
|
|
26
47
|
"admin": admin.fastapi_routes.create_router,
|
|
48
|
+
"agents": agents.fastapi_routes.create_router,
|
|
27
49
|
"batches": batches.fastapi_routes.create_router,
|
|
28
50
|
"benchmarks": benchmarks.fastapi_routes.create_router,
|
|
51
|
+
"connectors": connectors.fastapi_routes.create_router,
|
|
52
|
+
"conversations": conversations.fastapi_routes.create_router,
|
|
53
|
+
"datasetio": datasetio.fastapi_routes.create_router,
|
|
29
54
|
"datasets": datasets.fastapi_routes.create_router,
|
|
30
|
-
"
|
|
31
|
-
"inspect": inspect_api.fastapi_routes.create_router,
|
|
55
|
+
"eval": eval.fastapi_routes.create_router,
|
|
32
56
|
"files": files.fastapi_routes.create_router,
|
|
57
|
+
"inference": inference.fastapi_routes.create_router,
|
|
58
|
+
"inspect": inspect_api.fastapi_routes.create_router,
|
|
59
|
+
"models": models.fastapi_routes.create_router,
|
|
60
|
+
"post_training": post_training.fastapi_routes.create_router,
|
|
61
|
+
"prompts": prompts.fastapi_routes.create_router,
|
|
62
|
+
"providers": providers.fastapi_routes.create_router,
|
|
63
|
+
"safety": safety.fastapi_routes.create_router,
|
|
64
|
+
"scoring": scoring.fastapi_routes.create_router,
|
|
65
|
+
"scoring_functions": scoring_functions.fastapi_routes.create_router,
|
|
66
|
+
"shields": shields.fastapi_routes.create_router,
|
|
67
|
+
"vector_io": vector_io.fastapi_routes.create_router,
|
|
33
68
|
}
|
|
34
69
|
|
|
35
70
|
|
|
@@ -48,7 +48,7 @@ from llama_stack.core.server.fastapi_router_registry import build_fastapi_router
|
|
|
48
48
|
from llama_stack.core.server.routes import get_all_api_routes
|
|
49
49
|
from llama_stack.core.stack import (
|
|
50
50
|
Stack,
|
|
51
|
-
|
|
51
|
+
cast_distro_name_to_string,
|
|
52
52
|
replace_env_vars,
|
|
53
53
|
)
|
|
54
54
|
from llama_stack.core.utils.config import redact_sensitive_fields
|
|
@@ -57,7 +57,7 @@ from llama_stack.core.utils.context import preserve_contexts_async_generator
|
|
|
57
57
|
from llama_stack.log import LoggingConfig, get_logger
|
|
58
58
|
from llama_stack_api import Api, ConflictError, PaginatedResponse, ResourceNotFoundError
|
|
59
59
|
|
|
60
|
-
from .auth import AuthenticationMiddleware
|
|
60
|
+
from .auth import AuthenticationMiddleware, RouteAuthorizationMiddleware
|
|
61
61
|
from .quota import QuotaMiddleware
|
|
62
62
|
|
|
63
63
|
REPO_ROOT = Path(__file__).parent.parent.parent.parent
|
|
@@ -88,6 +88,13 @@ async def global_exception_handler(request: Request, exc: Exception):
|
|
|
88
88
|
traceback.print_exception(type(exc), exc, exc.__traceback__)
|
|
89
89
|
http_exc = translate_exception(exc)
|
|
90
90
|
|
|
91
|
+
# OpenAI-compat Vector Stores endpoints treat many "not found" conditions as 400s.
|
|
92
|
+
# Our core exceptions model these as ResourceNotFoundError (mapped to 404 by default),
|
|
93
|
+
# but integration tests (and OpenAI client behavior expectations in this repo)
|
|
94
|
+
# assert they surface as BadRequestError instead.
|
|
95
|
+
if isinstance(exc, ResourceNotFoundError) and request.url.path.startswith("/v1/vector_stores"):
|
|
96
|
+
http_exc = HTTPException(status_code=httpx.codes.BAD_REQUEST, detail=str(exc))
|
|
97
|
+
|
|
91
98
|
return JSONResponse(status_code=http_exc.status_code, content={"error": {"detail": http_exc.detail}})
|
|
92
99
|
|
|
93
100
|
|
|
@@ -396,7 +403,7 @@ def create_app() -> StackApp:
|
|
|
396
403
|
logger = get_logger(name=__name__, category="core::server", config=logger_config)
|
|
397
404
|
|
|
398
405
|
config = replace_env_vars(config_contents)
|
|
399
|
-
config = StackConfig(**
|
|
406
|
+
config = StackConfig(**cast_distro_name_to_string(config))
|
|
400
407
|
|
|
401
408
|
_log_run_config(run_config=config)
|
|
402
409
|
|
|
@@ -416,8 +423,19 @@ def create_app() -> StackApp:
|
|
|
416
423
|
impls = app.stack.impls
|
|
417
424
|
|
|
418
425
|
if config.server.auth:
|
|
419
|
-
|
|
420
|
-
|
|
426
|
+
# Add route authorization middleware if route_policy is configured
|
|
427
|
+
# This can work independently of authentication
|
|
428
|
+
# NOTE: Add this FIRST because middleware wraps in reverse order (last added runs first)
|
|
429
|
+
# We want: Request → Auth → RouteAuth → App
|
|
430
|
+
if config.server.auth.route_policy:
|
|
431
|
+
logger.info(f"Enabling route-level authorization with {len(config.server.auth.route_policy)} rules")
|
|
432
|
+
app.add_middleware(RouteAuthorizationMiddleware, route_policy=config.server.auth.route_policy)
|
|
433
|
+
|
|
434
|
+
# Add authentication middleware only if provider is configured
|
|
435
|
+
# This runs FIRST in the middleware chain (last added = first to run)
|
|
436
|
+
if config.server.auth.provider_config:
|
|
437
|
+
logger.info(f"Enabling authentication with provider: {config.server.auth.provider_config.type.value}")
|
|
438
|
+
app.add_middleware(AuthenticationMiddleware, auth_config=config.server.auth, impls=impls)
|
|
421
439
|
else:
|
|
422
440
|
if config.server.quota:
|
|
423
441
|
quota = config.server.quota
|
|
@@ -474,6 +492,7 @@ def create_app() -> StackApp:
|
|
|
474
492
|
apis_to_serve.add("providers")
|
|
475
493
|
apis_to_serve.add("prompts")
|
|
476
494
|
apis_to_serve.add("conversations")
|
|
495
|
+
apis_to_serve.add("connectors")
|
|
477
496
|
|
|
478
497
|
for api_str in apis_to_serve:
|
|
479
498
|
api = Api(api_str)
|
llama_stack/core/stack.py
CHANGED
|
@@ -16,6 +16,7 @@ import yaml
|
|
|
16
16
|
from pydantic import BaseModel
|
|
17
17
|
|
|
18
18
|
from llama_stack.core.admin import AdminImpl, AdminImplConfig
|
|
19
|
+
from llama_stack.core.connectors.connectors import ConnectorServiceConfig, ConnectorServiceImpl
|
|
19
20
|
from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
|
|
20
21
|
from llama_stack.core.datatypes import Provider, QualifiedModel, SafetyConfig, StackConfig, VectorStoresConfig
|
|
21
22
|
from llama_stack.core.distribution import get_provider_registry
|
|
@@ -42,6 +43,7 @@ from llama_stack_api import (
|
|
|
42
43
|
Api,
|
|
43
44
|
Batches,
|
|
44
45
|
Benchmarks,
|
|
46
|
+
Connectors,
|
|
45
47
|
Conversations,
|
|
46
48
|
DatasetIO,
|
|
47
49
|
Datasets,
|
|
@@ -54,6 +56,9 @@ from llama_stack_api import (
|
|
|
54
56
|
Prompts,
|
|
55
57
|
Providers,
|
|
56
58
|
RegisterBenchmarkRequest,
|
|
59
|
+
RegisterModelRequest,
|
|
60
|
+
RegisterScoringFunctionRequest,
|
|
61
|
+
RegisterShieldRequest,
|
|
57
62
|
Safety,
|
|
58
63
|
Scoring,
|
|
59
64
|
ScoringFunctions,
|
|
@@ -89,6 +94,7 @@ class LlamaStack(
|
|
|
89
94
|
Files,
|
|
90
95
|
Prompts,
|
|
91
96
|
Conversations,
|
|
97
|
+
Connectors,
|
|
92
98
|
):
|
|
93
99
|
pass
|
|
94
100
|
|
|
@@ -96,15 +102,15 @@ class LlamaStack(
|
|
|
96
102
|
# Resources to register based on configuration.
|
|
97
103
|
# If a request class is specified, the configuration object will be converted to this class before invoking the registration method.
|
|
98
104
|
RESOURCES = [
|
|
99
|
-
("models", Api.models, "register_model", "list_models",
|
|
100
|
-
("shields", Api.shields, "register_shield", "list_shields",
|
|
105
|
+
("models", Api.models, "register_model", "list_models", RegisterModelRequest),
|
|
106
|
+
("shields", Api.shields, "register_shield", "list_shields", RegisterShieldRequest),
|
|
101
107
|
("datasets", Api.datasets, "register_dataset", "list_datasets", RegisterDatasetRequest),
|
|
102
108
|
(
|
|
103
109
|
"scoring_fns",
|
|
104
110
|
Api.scoring_functions,
|
|
105
111
|
"register_scoring_function",
|
|
106
112
|
"list_scoring_functions",
|
|
107
|
-
|
|
113
|
+
RegisterScoringFunctionRequest,
|
|
108
114
|
),
|
|
109
115
|
("benchmarks", Api.benchmarks, "register_benchmark", "list_benchmarks", RegisterBenchmarkRequest),
|
|
110
116
|
("tool_groups", Api.tool_groups, "register_tool_group", "list_tool_groups", None),
|
|
@@ -242,6 +248,34 @@ async def register_resources(run_config: StackConfig, impls: dict[Api, Any]):
|
|
|
242
248
|
)
|
|
243
249
|
|
|
244
250
|
|
|
251
|
+
async def register_connectors(run_config: StackConfig, impls: dict[Api, Any]):
|
|
252
|
+
"""Register connectors from config"""
|
|
253
|
+
if Api.connectors not in impls:
|
|
254
|
+
return
|
|
255
|
+
|
|
256
|
+
connectors_impl = impls[Api.connectors]
|
|
257
|
+
|
|
258
|
+
# Get connector IDs from config
|
|
259
|
+
config_connector_ids = {c.connector_id for c in run_config.connectors}
|
|
260
|
+
|
|
261
|
+
# Register/Update config connectors
|
|
262
|
+
for connector in run_config.connectors:
|
|
263
|
+
logger.debug(f"Registering connector: {connector.connector_id}")
|
|
264
|
+
await connectors_impl.register_connector(
|
|
265
|
+
connector_id=connector.connector_id,
|
|
266
|
+
connector_type=connector.connector_type,
|
|
267
|
+
url=connector.url,
|
|
268
|
+
server_label=connector.server_label,
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
# Remove connectors not in config (orphan cleanup)
|
|
272
|
+
existing_connectors = await connectors_impl.list_connectors()
|
|
273
|
+
for connector in existing_connectors.data:
|
|
274
|
+
if connector.connector_id not in config_connector_ids:
|
|
275
|
+
logger.info(f"Removing orphaned connector: {connector.connector_id}")
|
|
276
|
+
await connectors_impl.unregister_connector(connector.connector_id)
|
|
277
|
+
|
|
278
|
+
|
|
245
279
|
async def validate_vector_stores_config(vector_stores_config: VectorStoresConfig | None, impls: dict[Api, Any]):
|
|
246
280
|
"""Validate vector stores configuration."""
|
|
247
281
|
if vector_stores_config is None:
|
|
@@ -276,7 +310,8 @@ async def _validate_embedding_model(embedding_model: QualifiedModel, impls: dict
|
|
|
276
310
|
f"Embedding model '{model_identifier}' not found. Available embedding models: {list(models_list.keys())}"
|
|
277
311
|
)
|
|
278
312
|
|
|
279
|
-
|
|
313
|
+
# if not in metadata, fetch from config default
|
|
314
|
+
embedding_dimension = model.metadata.get("embedding_dimension", embedding_model.embedding_dimensions)
|
|
280
315
|
if embedding_dimension is None:
|
|
281
316
|
raise ValueError(f"Embedding model '{model_identifier}' is missing 'embedding_dimension' in metadata")
|
|
282
317
|
|
|
@@ -489,10 +524,10 @@ def _convert_string_to_proper_type(value: str) -> Any:
|
|
|
489
524
|
return value
|
|
490
525
|
|
|
491
526
|
|
|
492
|
-
def
|
|
493
|
-
"""Ensure that any value for a key '
|
|
494
|
-
if "
|
|
495
|
-
config_dict["
|
|
527
|
+
def cast_distro_name_to_string(config_dict: dict[str, Any]) -> dict[str, Any]:
|
|
528
|
+
"""Ensure that any value for a key 'distro_name' in a config_dict is a string"""
|
|
529
|
+
if "distro_name" in config_dict and config_dict["distro_name"] is not None:
|
|
530
|
+
config_dict["distro_name"] = str(config_dict["distro_name"])
|
|
496
531
|
return config_dict
|
|
497
532
|
|
|
498
533
|
|
|
@@ -532,6 +567,11 @@ def add_internal_implementations(impls: dict[Api, Any], config: StackConfig) ->
|
|
|
532
567
|
)
|
|
533
568
|
impls[Api.conversations] = conversations_impl
|
|
534
569
|
|
|
570
|
+
connectors_impl = ConnectorServiceImpl(
|
|
571
|
+
ConnectorServiceConfig(config=config),
|
|
572
|
+
)
|
|
573
|
+
impls[Api.connectors] = connectors_impl
|
|
574
|
+
|
|
535
575
|
|
|
536
576
|
def _initialize_storage(run_config: StackConfig):
|
|
537
577
|
kv_backends: dict[str, StorageBackendConfig] = {}
|
|
@@ -574,7 +614,7 @@ class Stack:
|
|
|
574
614
|
stores = self.run_config.storage.stores
|
|
575
615
|
if not stores.metadata:
|
|
576
616
|
raise ValueError("storage.stores.metadata must be configured with a kv_* backend")
|
|
577
|
-
dist_registry, _ = await create_dist_registry(stores.metadata, self.run_config.
|
|
617
|
+
dist_registry, _ = await create_dist_registry(stores.metadata, self.run_config.distro_name)
|
|
578
618
|
policy = self.run_config.server.auth.access_policy if self.run_config.server.auth else []
|
|
579
619
|
|
|
580
620
|
internal_impls = {}
|
|
@@ -592,8 +632,11 @@ class Stack:
|
|
|
592
632
|
await impls[Api.prompts].initialize()
|
|
593
633
|
if Api.conversations in impls:
|
|
594
634
|
await impls[Api.conversations].initialize()
|
|
635
|
+
if Api.connectors in impls:
|
|
636
|
+
await impls[Api.connectors].initialize()
|
|
595
637
|
|
|
596
638
|
await register_resources(self.run_config, impls)
|
|
639
|
+
await register_connectors(self.run_config, impls)
|
|
597
640
|
await refresh_registry_once(impls)
|
|
598
641
|
await validate_vector_stores_config(self.run_config.vector_stores, impls)
|
|
599
642
|
await validate_safety_config(self.run_config.safety, impls)
|
|
@@ -727,7 +770,7 @@ def run_config_from_adhoc_config_spec(
|
|
|
727
770
|
)
|
|
728
771
|
]
|
|
729
772
|
config = StackConfig(
|
|
730
|
-
|
|
773
|
+
distro_name="distro-test",
|
|
731
774
|
apis=list(provider_configs_by_api.keys()),
|
|
732
775
|
providers=provider_configs_by_api,
|
|
733
776
|
storage=StorageConfig(
|
|
@@ -740,6 +783,7 @@ def run_config_from_adhoc_config_spec(
|
|
|
740
783
|
inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
|
|
741
784
|
conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"),
|
|
742
785
|
prompts=KVStoreReference(backend="kv_default", namespace="prompts"),
|
|
786
|
+
connectors=KVStoreReference(backend="kv_default", namespace="connectors"),
|
|
743
787
|
),
|
|
744
788
|
),
|
|
745
789
|
)
|
|
@@ -255,6 +255,11 @@ class InferenceStoreReference(SqlStoreReference):
|
|
|
255
255
|
class ResponsesStoreReference(InferenceStoreReference):
|
|
256
256
|
"""Responses store configuration with queue tuning."""
|
|
257
257
|
|
|
258
|
+
table_name: str = Field(
|
|
259
|
+
default="openai_responses",
|
|
260
|
+
description="Name of the table to use for storing OpenAI responses",
|
|
261
|
+
)
|
|
262
|
+
|
|
258
263
|
|
|
259
264
|
class ServerStoresConfig(BaseModel):
|
|
260
265
|
metadata: KVStoreReference | None = Field(
|
|
@@ -286,6 +291,10 @@ class ServerStoresConfig(BaseModel):
|
|
|
286
291
|
default=KVStoreReference(backend="kv_default", namespace="prompts"),
|
|
287
292
|
description="Prompts store configuration (uses KV backend)",
|
|
288
293
|
)
|
|
294
|
+
connectors: KVStoreReference | None = Field(
|
|
295
|
+
default=KVStoreReference(backend="kv_default", namespace="connectors"),
|
|
296
|
+
description="Connectors store configuration (uses KV backend)",
|
|
297
|
+
)
|
|
289
298
|
|
|
290
299
|
|
|
291
300
|
class StorageConfig(BaseModel):
|