llama-stack 0.4.3__py3-none-any.whl → 0.5.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/cli/stack/_list_deps.py +11 -7
- llama_stack/cli/stack/run.py +3 -25
- llama_stack/core/access_control/datatypes.py +78 -0
- llama_stack/core/configure.py +2 -2
- {llama_stack_api/internal → llama_stack/core/connectors}/__init__.py +2 -2
- llama_stack/core/connectors/connectors.py +162 -0
- llama_stack/core/conversations/conversations.py +61 -58
- llama_stack/core/datatypes.py +54 -8
- llama_stack/core/library_client.py +60 -13
- llama_stack/core/prompts/prompts.py +43 -42
- llama_stack/core/routers/datasets.py +20 -17
- llama_stack/core/routers/eval_scoring.py +143 -53
- llama_stack/core/routers/inference.py +20 -9
- llama_stack/core/routers/safety.py +30 -42
- llama_stack/core/routers/vector_io.py +15 -7
- llama_stack/core/routing_tables/models.py +42 -3
- llama_stack/core/routing_tables/scoring_functions.py +19 -19
- llama_stack/core/routing_tables/shields.py +20 -17
- llama_stack/core/routing_tables/vector_stores.py +8 -5
- llama_stack/core/server/auth.py +192 -17
- llama_stack/core/server/fastapi_router_registry.py +40 -5
- llama_stack/core/server/server.py +24 -5
- llama_stack/core/stack.py +54 -10
- llama_stack/core/storage/datatypes.py +9 -0
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/exec.py +2 -2
- llama_stack/core/utils/type_inspection.py +16 -2
- llama_stack/distributions/dell/config.yaml +4 -1
- llama_stack/distributions/dell/doc_template.md +209 -0
- llama_stack/distributions/dell/run-with-safety.yaml +4 -1
- llama_stack/distributions/nvidia/config.yaml +4 -1
- llama_stack/distributions/nvidia/doc_template.md +170 -0
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -1
- llama_stack/distributions/oci/config.yaml +4 -1
- llama_stack/distributions/oci/doc_template.md +140 -0
- llama_stack/distributions/open-benchmark/config.yaml +9 -1
- llama_stack/distributions/postgres-demo/config.yaml +1 -1
- llama_stack/distributions/starter/build.yaml +62 -0
- llama_stack/distributions/starter/config.yaml +22 -3
- llama_stack/distributions/starter/run-with-postgres-store.yaml +22 -3
- llama_stack/distributions/starter/starter.py +13 -1
- llama_stack/distributions/starter-gpu/build.yaml +62 -0
- llama_stack/distributions/starter-gpu/config.yaml +22 -3
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +22 -3
- llama_stack/distributions/template.py +10 -2
- llama_stack/distributions/watsonx/config.yaml +4 -1
- llama_stack/log.py +1 -0
- llama_stack/models/llama/resources/dog.jpg +0 -0
- llama_stack/models/llama/resources/pasta.jpeg +0 -0
- llama_stack/models/llama/resources/small_dog.jpg +0 -0
- llama_stack/providers/inline/agents/meta_reference/__init__.py +1 -0
- llama_stack/providers/inline/agents/meta_reference/agents.py +57 -61
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +183 -60
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +94 -22
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +2 -1
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +4 -1
- llama_stack/providers/inline/agents/meta_reference/safety.py +2 -2
- llama_stack/providers/inline/batches/reference/batches.py +2 -1
- llama_stack/providers/inline/eval/meta_reference/eval.py +40 -32
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h +9 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift +189 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift +238 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift +12 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift +89 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj +550 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +8 -0
- llama_stack/providers/inline/post_training/huggingface/post_training.py +33 -38
- llama_stack/providers/inline/post_training/huggingface/utils.py +2 -5
- llama_stack/providers/inline/post_training/torchtune/post_training.py +28 -33
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +2 -4
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +12 -15
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +15 -18
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +11 -17
- llama_stack/providers/inline/scoring/basic/scoring.py +13 -17
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +15 -15
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +13 -17
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +1 -1
- llama_stack/providers/registry/agents.py +1 -0
- llama_stack/providers/registry/inference.py +1 -9
- llama_stack/providers/registry/vector_io.py +136 -16
- llama_stack/providers/remote/datasetio/nvidia/README.md +74 -0
- llama_stack/providers/remote/eval/nvidia/README.md +134 -0
- llama_stack/providers/remote/eval/nvidia/eval.py +22 -21
- llama_stack/providers/remote/files/s3/README.md +266 -0
- llama_stack/providers/remote/files/s3/config.py +5 -3
- llama_stack/providers/remote/files/s3/files.py +2 -2
- llama_stack/providers/remote/inference/gemini/gemini.py +4 -0
- llama_stack/providers/remote/inference/nvidia/NVIDIA.md +203 -0
- llama_stack/providers/remote/inference/openai/openai.py +2 -0
- llama_stack/providers/remote/inference/together/together.py +4 -0
- llama_stack/providers/remote/inference/vertexai/config.py +3 -3
- llama_stack/providers/remote/inference/vertexai/vertexai.py +5 -2
- llama_stack/providers/remote/inference/vllm/config.py +37 -18
- llama_stack/providers/remote/inference/vllm/vllm.py +0 -3
- llama_stack/providers/remote/inference/watsonx/watsonx.py +4 -0
- llama_stack/providers/remote/post_training/nvidia/README.md +151 -0
- llama_stack/providers/remote/post_training/nvidia/post_training.py +31 -33
- llama_stack/providers/remote/safety/bedrock/bedrock.py +10 -27
- llama_stack/providers/remote/safety/nvidia/README.md +78 -0
- llama_stack/providers/remote/safety/nvidia/nvidia.py +9 -25
- llama_stack/providers/remote/safety/sambanova/sambanova.py +13 -11
- llama_stack/providers/remote/vector_io/elasticsearch/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/elasticsearch/config.py +32 -0
- llama_stack/providers/remote/vector_io/elasticsearch/elasticsearch.py +463 -0
- llama_stack/providers/remote/vector_io/oci/__init__.py +22 -0
- llama_stack/providers/remote/vector_io/oci/config.py +41 -0
- llama_stack/providers/remote/vector_io/oci/oci26ai.py +595 -0
- llama_stack/providers/remote/vector_io/pgvector/config.py +69 -2
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +255 -6
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +62 -38
- llama_stack/providers/utils/bedrock/client.py +3 -3
- llama_stack/providers/utils/bedrock/config.py +7 -7
- llama_stack/providers/utils/inference/embedding_mixin.py +4 -0
- llama_stack/providers/utils/inference/http_client.py +239 -0
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +5 -0
- llama_stack/providers/utils/inference/model_registry.py +148 -2
- llama_stack/providers/utils/inference/openai_compat.py +2 -1
- llama_stack/providers/utils/inference/openai_mixin.py +41 -2
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +92 -5
- llama_stack/providers/utils/memory/vector_store.py +46 -19
- llama_stack/providers/utils/responses/responses_store.py +40 -6
- llama_stack/providers/utils/safety.py +114 -0
- llama_stack/providers/utils/tools/mcp.py +44 -3
- llama_stack/testing/api_recorder.py +9 -3
- {llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/METADATA +14 -2
- {llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/RECORD +131 -275
- llama_stack-0.5.0rc1.dist-info/top_level.txt +1 -0
- llama_stack/distributions/meta-reference-gpu/__init__.py +0 -7
- llama_stack/distributions/meta-reference-gpu/config.yaml +0 -140
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +0 -163
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +0 -155
- llama_stack/models/llama/hadamard_utils.py +0 -88
- llama_stack/models/llama/llama3/args.py +0 -74
- llama_stack/models/llama/llama3/generation.py +0 -378
- llama_stack/models/llama/llama3/model.py +0 -304
- llama_stack/models/llama/llama3/multimodal/__init__.py +0 -12
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +0 -180
- llama_stack/models/llama/llama3/multimodal/image_transform.py +0 -409
- llama_stack/models/llama/llama3/multimodal/model.py +0 -1430
- llama_stack/models/llama/llama3/multimodal/utils.py +0 -26
- llama_stack/models/llama/llama3/quantization/__init__.py +0 -5
- llama_stack/models/llama/llama3/quantization/loader.py +0 -316
- llama_stack/models/llama/llama3_1/__init__.py +0 -12
- llama_stack/models/llama/llama3_1/prompt_format.md +0 -358
- llama_stack/models/llama/llama3_1/prompts.py +0 -258
- llama_stack/models/llama/llama3_2/__init__.py +0 -5
- llama_stack/models/llama/llama3_2/prompts_text.py +0 -229
- llama_stack/models/llama/llama3_2/prompts_vision.py +0 -126
- llama_stack/models/llama/llama3_2/text_prompt_format.md +0 -286
- llama_stack/models/llama/llama3_2/vision_prompt_format.md +0 -141
- llama_stack/models/llama/llama3_3/__init__.py +0 -5
- llama_stack/models/llama/llama3_3/prompts.py +0 -259
- llama_stack/models/llama/llama4/args.py +0 -107
- llama_stack/models/llama/llama4/ffn.py +0 -58
- llama_stack/models/llama/llama4/moe.py +0 -214
- llama_stack/models/llama/llama4/preprocess.py +0 -435
- llama_stack/models/llama/llama4/quantization/__init__.py +0 -5
- llama_stack/models/llama/llama4/quantization/loader.py +0 -226
- llama_stack/models/llama/llama4/vision/__init__.py +0 -5
- llama_stack/models/llama/llama4/vision/embedding.py +0 -210
- llama_stack/models/llama/llama4/vision/encoder.py +0 -412
- llama_stack/models/llama/quantize_impls.py +0 -316
- llama_stack/providers/inline/inference/meta_reference/__init__.py +0 -20
- llama_stack/providers/inline/inference/meta_reference/common.py +0 -24
- llama_stack/providers/inline/inference/meta_reference/config.py +0 -68
- llama_stack/providers/inline/inference/meta_reference/generators.py +0 -201
- llama_stack/providers/inline/inference/meta_reference/inference.py +0 -542
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +0 -77
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +0 -353
- llama_stack-0.4.3.dist-info/top_level.txt +0 -2
- llama_stack_api/__init__.py +0 -945
- llama_stack_api/admin/__init__.py +0 -45
- llama_stack_api/admin/api.py +0 -72
- llama_stack_api/admin/fastapi_routes.py +0 -117
- llama_stack_api/admin/models.py +0 -113
- llama_stack_api/agents.py +0 -173
- llama_stack_api/batches/__init__.py +0 -40
- llama_stack_api/batches/api.py +0 -53
- llama_stack_api/batches/fastapi_routes.py +0 -113
- llama_stack_api/batches/models.py +0 -78
- llama_stack_api/benchmarks/__init__.py +0 -43
- llama_stack_api/benchmarks/api.py +0 -39
- llama_stack_api/benchmarks/fastapi_routes.py +0 -109
- llama_stack_api/benchmarks/models.py +0 -109
- llama_stack_api/common/__init__.py +0 -5
- llama_stack_api/common/content_types.py +0 -101
- llama_stack_api/common/errors.py +0 -95
- llama_stack_api/common/job_types.py +0 -38
- llama_stack_api/common/responses.py +0 -77
- llama_stack_api/common/training_types.py +0 -47
- llama_stack_api/common/type_system.py +0 -146
- llama_stack_api/connectors.py +0 -146
- llama_stack_api/conversations.py +0 -270
- llama_stack_api/datasetio.py +0 -55
- llama_stack_api/datasets/__init__.py +0 -61
- llama_stack_api/datasets/api.py +0 -35
- llama_stack_api/datasets/fastapi_routes.py +0 -104
- llama_stack_api/datasets/models.py +0 -152
- llama_stack_api/datatypes.py +0 -373
- llama_stack_api/eval.py +0 -137
- llama_stack_api/file_processors/__init__.py +0 -27
- llama_stack_api/file_processors/api.py +0 -64
- llama_stack_api/file_processors/fastapi_routes.py +0 -78
- llama_stack_api/file_processors/models.py +0 -42
- llama_stack_api/files/__init__.py +0 -35
- llama_stack_api/files/api.py +0 -51
- llama_stack_api/files/fastapi_routes.py +0 -124
- llama_stack_api/files/models.py +0 -107
- llama_stack_api/inference.py +0 -1169
- llama_stack_api/inspect_api/__init__.py +0 -37
- llama_stack_api/inspect_api/api.py +0 -25
- llama_stack_api/inspect_api/fastapi_routes.py +0 -76
- llama_stack_api/inspect_api/models.py +0 -28
- llama_stack_api/internal/kvstore.py +0 -28
- llama_stack_api/internal/sqlstore.py +0 -81
- llama_stack_api/llama_stack_api/__init__.py +0 -945
- llama_stack_api/llama_stack_api/admin/__init__.py +0 -45
- llama_stack_api/llama_stack_api/admin/api.py +0 -72
- llama_stack_api/llama_stack_api/admin/fastapi_routes.py +0 -117
- llama_stack_api/llama_stack_api/admin/models.py +0 -113
- llama_stack_api/llama_stack_api/agents.py +0 -173
- llama_stack_api/llama_stack_api/batches/__init__.py +0 -40
- llama_stack_api/llama_stack_api/batches/api.py +0 -53
- llama_stack_api/llama_stack_api/batches/fastapi_routes.py +0 -113
- llama_stack_api/llama_stack_api/batches/models.py +0 -78
- llama_stack_api/llama_stack_api/benchmarks/__init__.py +0 -43
- llama_stack_api/llama_stack_api/benchmarks/api.py +0 -39
- llama_stack_api/llama_stack_api/benchmarks/fastapi_routes.py +0 -109
- llama_stack_api/llama_stack_api/benchmarks/models.py +0 -109
- llama_stack_api/llama_stack_api/common/__init__.py +0 -5
- llama_stack_api/llama_stack_api/common/content_types.py +0 -101
- llama_stack_api/llama_stack_api/common/errors.py +0 -95
- llama_stack_api/llama_stack_api/common/job_types.py +0 -38
- llama_stack_api/llama_stack_api/common/responses.py +0 -77
- llama_stack_api/llama_stack_api/common/training_types.py +0 -47
- llama_stack_api/llama_stack_api/common/type_system.py +0 -146
- llama_stack_api/llama_stack_api/connectors.py +0 -146
- llama_stack_api/llama_stack_api/conversations.py +0 -270
- llama_stack_api/llama_stack_api/datasetio.py +0 -55
- llama_stack_api/llama_stack_api/datasets/__init__.py +0 -61
- llama_stack_api/llama_stack_api/datasets/api.py +0 -35
- llama_stack_api/llama_stack_api/datasets/fastapi_routes.py +0 -104
- llama_stack_api/llama_stack_api/datasets/models.py +0 -152
- llama_stack_api/llama_stack_api/datatypes.py +0 -373
- llama_stack_api/llama_stack_api/eval.py +0 -137
- llama_stack_api/llama_stack_api/file_processors/__init__.py +0 -27
- llama_stack_api/llama_stack_api/file_processors/api.py +0 -64
- llama_stack_api/llama_stack_api/file_processors/fastapi_routes.py +0 -78
- llama_stack_api/llama_stack_api/file_processors/models.py +0 -42
- llama_stack_api/llama_stack_api/files/__init__.py +0 -35
- llama_stack_api/llama_stack_api/files/api.py +0 -51
- llama_stack_api/llama_stack_api/files/fastapi_routes.py +0 -124
- llama_stack_api/llama_stack_api/files/models.py +0 -107
- llama_stack_api/llama_stack_api/inference.py +0 -1169
- llama_stack_api/llama_stack_api/inspect_api/__init__.py +0 -37
- llama_stack_api/llama_stack_api/inspect_api/api.py +0 -25
- llama_stack_api/llama_stack_api/inspect_api/fastapi_routes.py +0 -76
- llama_stack_api/llama_stack_api/inspect_api/models.py +0 -28
- llama_stack_api/llama_stack_api/internal/__init__.py +0 -9
- llama_stack_api/llama_stack_api/internal/kvstore.py +0 -28
- llama_stack_api/llama_stack_api/internal/sqlstore.py +0 -81
- llama_stack_api/llama_stack_api/models.py +0 -171
- llama_stack_api/llama_stack_api/openai_responses.py +0 -1468
- llama_stack_api/llama_stack_api/post_training.py +0 -370
- llama_stack_api/llama_stack_api/prompts.py +0 -203
- llama_stack_api/llama_stack_api/providers/__init__.py +0 -33
- llama_stack_api/llama_stack_api/providers/api.py +0 -16
- llama_stack_api/llama_stack_api/providers/fastapi_routes.py +0 -57
- llama_stack_api/llama_stack_api/providers/models.py +0 -24
- llama_stack_api/llama_stack_api/py.typed +0 -0
- llama_stack_api/llama_stack_api/rag_tool.py +0 -168
- llama_stack_api/llama_stack_api/resource.py +0 -37
- llama_stack_api/llama_stack_api/router_utils.py +0 -160
- llama_stack_api/llama_stack_api/safety.py +0 -132
- llama_stack_api/llama_stack_api/schema_utils.py +0 -208
- llama_stack_api/llama_stack_api/scoring.py +0 -93
- llama_stack_api/llama_stack_api/scoring_functions.py +0 -211
- llama_stack_api/llama_stack_api/shields.py +0 -93
- llama_stack_api/llama_stack_api/tools.py +0 -226
- llama_stack_api/llama_stack_api/vector_io.py +0 -941
- llama_stack_api/llama_stack_api/vector_stores.py +0 -53
- llama_stack_api/llama_stack_api/version.py +0 -9
- llama_stack_api/models.py +0 -171
- llama_stack_api/openai_responses.py +0 -1468
- llama_stack_api/post_training.py +0 -370
- llama_stack_api/prompts.py +0 -203
- llama_stack_api/providers/__init__.py +0 -33
- llama_stack_api/providers/api.py +0 -16
- llama_stack_api/providers/fastapi_routes.py +0 -57
- llama_stack_api/providers/models.py +0 -24
- llama_stack_api/py.typed +0 -0
- llama_stack_api/rag_tool.py +0 -168
- llama_stack_api/resource.py +0 -37
- llama_stack_api/router_utils.py +0 -160
- llama_stack_api/safety.py +0 -132
- llama_stack_api/schema_utils.py +0 -208
- llama_stack_api/scoring.py +0 -93
- llama_stack_api/scoring_functions.py +0 -211
- llama_stack_api/shields.py +0 -93
- llama_stack_api/tools.py +0 -226
- llama_stack_api/vector_io.py +0 -941
- llama_stack_api/vector_stores.py +0 -53
- llama_stack_api/version.py +0 -9
- {llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/WHEEL +0 -0
- {llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/licenses/LICENSE +0 -0
llama_stack/core/datatypes.py
CHANGED
|
@@ -11,7 +11,7 @@ from urllib.parse import urlparse
|
|
|
11
11
|
|
|
12
12
|
from pydantic import BaseModel, Field, field_validator, model_validator
|
|
13
13
|
|
|
14
|
-
from llama_stack.core.access_control.datatypes import AccessRule
|
|
14
|
+
from llama_stack.core.access_control.datatypes import AccessRule, RouteAccessRule
|
|
15
15
|
from llama_stack.core.storage.datatypes import (
|
|
16
16
|
KVStoreReference,
|
|
17
17
|
StorageBackendType,
|
|
@@ -329,13 +329,17 @@ AuthProviderConfig = Annotated[
|
|
|
329
329
|
class AuthenticationConfig(BaseModel):
|
|
330
330
|
"""Top-level authentication configuration."""
|
|
331
331
|
|
|
332
|
-
provider_config: AuthProviderConfig = Field(
|
|
333
|
-
|
|
334
|
-
description="Authentication provider configuration",
|
|
332
|
+
provider_config: AuthProviderConfig | None = Field(
|
|
333
|
+
default=None,
|
|
334
|
+
description="Authentication provider configuration (optional if only using route_policy)",
|
|
335
|
+
)
|
|
336
|
+
route_policy: list[RouteAccessRule] = Field(
|
|
337
|
+
default=[],
|
|
338
|
+
description="Rules for determining access to API routes (infrastructure-level)",
|
|
335
339
|
)
|
|
336
340
|
access_policy: list[AccessRule] = Field(
|
|
337
341
|
default=[],
|
|
338
|
-
description="Rules for determining access to resources",
|
|
342
|
+
description="Rules for determining access to resources (data-level)",
|
|
339
343
|
)
|
|
340
344
|
|
|
341
345
|
|
|
@@ -348,6 +352,7 @@ class QualifiedModel(BaseModel):
|
|
|
348
352
|
|
|
349
353
|
provider_id: str
|
|
350
354
|
model_id: str
|
|
355
|
+
embedding_dimensions: int | None = None
|
|
351
356
|
|
|
352
357
|
|
|
353
358
|
class RewriteQueryParams(BaseModel):
|
|
@@ -654,7 +659,6 @@ class RegisteredResources(BaseModel):
|
|
|
654
659
|
scoring_fns: list[ScoringFnInput] = Field(default_factory=list)
|
|
655
660
|
benchmarks: list[BenchmarkInput] = Field(default_factory=list)
|
|
656
661
|
tool_groups: list[ToolGroupInput] = Field(default_factory=list)
|
|
657
|
-
connectors: list[ConnectorInput] = Field(default_factory=list)
|
|
658
662
|
|
|
659
663
|
|
|
660
664
|
class ServerConfig(BaseModel):
|
|
@@ -703,11 +707,20 @@ class ServerConfig(BaseModel):
|
|
|
703
707
|
class StackConfig(BaseModel):
|
|
704
708
|
version: int = LLAMA_STACK_RUN_CONFIG_VERSION
|
|
705
709
|
|
|
706
|
-
|
|
707
|
-
|
|
710
|
+
distro_name: str | None = Field(
|
|
711
|
+
default=None,
|
|
708
712
|
description="""
|
|
709
713
|
Reference to the distribution this package refers to. For unregistered (adhoc) packages,
|
|
710
714
|
this could be just a hash
|
|
715
|
+
""",
|
|
716
|
+
)
|
|
717
|
+
image_name: str | None = Field(
|
|
718
|
+
default=None,
|
|
719
|
+
deprecated=True,
|
|
720
|
+
description="""
|
|
721
|
+
DEPRECATED: Use 'distro_name' instead. This field is maintained for backward compatibility.
|
|
722
|
+
Reference to the distribution this package refers to. For unregistered (adhoc) packages,
|
|
723
|
+
this could be just a hash
|
|
711
724
|
""",
|
|
712
725
|
)
|
|
713
726
|
container_image: str | None = Field(
|
|
@@ -763,6 +776,11 @@ can be instantiated multiple times (with different configs) if necessary.
|
|
|
763
776
|
description="Configuration for default moderations model",
|
|
764
777
|
)
|
|
765
778
|
|
|
779
|
+
connectors: list[ConnectorInput] = Field(
|
|
780
|
+
default_factory=list,
|
|
781
|
+
description="List of connectors to register at stack startup",
|
|
782
|
+
)
|
|
783
|
+
|
|
766
784
|
@field_validator("external_providers_dir")
|
|
767
785
|
@classmethod
|
|
768
786
|
def validate_external_providers_dir(cls, v):
|
|
@@ -772,6 +790,34 @@ can be instantiated multiple times (with different configs) if necessary.
|
|
|
772
790
|
return Path(v)
|
|
773
791
|
return v
|
|
774
792
|
|
|
793
|
+
@model_validator(mode="after")
|
|
794
|
+
def validate_distro_name_migration(self) -> "StackConfig":
|
|
795
|
+
"""Handle migration from image_name to distro_name."""
|
|
796
|
+
import warnings
|
|
797
|
+
|
|
798
|
+
if self.distro_name is None and self.image_name is None:
|
|
799
|
+
raise ValueError("Either 'distro_name' or 'image_name' must be provided")
|
|
800
|
+
|
|
801
|
+
if self.image_name is not None and self.distro_name is None:
|
|
802
|
+
# Migrate from image_name to distro_name
|
|
803
|
+
warnings.warn(
|
|
804
|
+
"The 'image_name' field is deprecated. Please use 'distro_name' instead.",
|
|
805
|
+
DeprecationWarning,
|
|
806
|
+
stacklevel=2,
|
|
807
|
+
)
|
|
808
|
+
self.distro_name = self.image_name
|
|
809
|
+
elif self.image_name is not None and self.distro_name is not None:
|
|
810
|
+
# Both provided - warn and prefer distro_name
|
|
811
|
+
warnings.warn(
|
|
812
|
+
"Both 'image_name' and 'distro_name' were provided. "
|
|
813
|
+
"The 'image_name' field is deprecated and will be ignored. "
|
|
814
|
+
"Please use only 'distro_name' in your configuration.",
|
|
815
|
+
DeprecationWarning,
|
|
816
|
+
stacklevel=2,
|
|
817
|
+
)
|
|
818
|
+
|
|
819
|
+
return self
|
|
820
|
+
|
|
775
821
|
@model_validator(mode="after")
|
|
776
822
|
def validate_server_stores(self) -> "StackConfig":
|
|
777
823
|
backend_map = self.storage.backends
|
|
@@ -20,7 +20,7 @@ import httpx
|
|
|
20
20
|
import yaml
|
|
21
21
|
from fastapi import Response as FastAPIResponse
|
|
22
22
|
|
|
23
|
-
from llama_stack.core.utils.type_inspection import is_unwrapped_body_param
|
|
23
|
+
from llama_stack.core.utils.type_inspection import is_body_param, is_unwrapped_body_param
|
|
24
24
|
|
|
25
25
|
try:
|
|
26
26
|
from llama_stack_client import (
|
|
@@ -504,11 +504,30 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
|
|
504
504
|
# Prepare body for the function call (handles both Pydantic and traditional params)
|
|
505
505
|
body = self._convert_body(func, body)
|
|
506
506
|
|
|
507
|
+
result = await func(**body)
|
|
508
|
+
content_type = "application/json"
|
|
509
|
+
if isinstance(result, FastAPIResponse):
|
|
510
|
+
content_type = result.media_type or content_type
|
|
511
|
+
|
|
507
512
|
async def gen():
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
513
|
+
# Handle FastAPI StreamingResponse (returned by router endpoints)
|
|
514
|
+
# Extract the async generator from the StreamingResponse body
|
|
515
|
+
from fastapi.responses import StreamingResponse
|
|
516
|
+
|
|
517
|
+
if isinstance(result, StreamingResponse):
|
|
518
|
+
# StreamingResponse.body_iterator is the async generator
|
|
519
|
+
async for chunk in result.body_iterator:
|
|
520
|
+
# Chunk is already SSE-formatted string from sse_generator, encode to bytes
|
|
521
|
+
if isinstance(chunk, str):
|
|
522
|
+
yield chunk.encode("utf-8")
|
|
523
|
+
else:
|
|
524
|
+
yield chunk
|
|
525
|
+
else:
|
|
526
|
+
# Direct async generator from implementation
|
|
527
|
+
async for chunk in result:
|
|
528
|
+
data = json.dumps(convert_pydantic_to_json_value(chunk))
|
|
529
|
+
sse_event = f"data: {data}\n\n"
|
|
530
|
+
yield sse_event.encode("utf-8")
|
|
512
531
|
|
|
513
532
|
wrapped_gen = preserve_contexts_async_generator(gen(), [PROVIDER_DATA_VAR])
|
|
514
533
|
|
|
@@ -516,7 +535,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
|
|
516
535
|
status_code=httpx.codes.OK,
|
|
517
536
|
content=wrapped_gen,
|
|
518
537
|
headers={
|
|
519
|
-
"Content-Type":
|
|
538
|
+
"Content-Type": content_type,
|
|
520
539
|
},
|
|
521
540
|
request=httpx.Request(
|
|
522
541
|
method=options.method,
|
|
@@ -549,10 +568,26 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
|
|
549
568
|
sig = inspect.signature(func)
|
|
550
569
|
params_list = [p for p in sig.parameters.values() if p.name != "self"]
|
|
551
570
|
|
|
571
|
+
# Resolve string annotations (from `from __future__ import annotations`) to actual types
|
|
572
|
+
try:
|
|
573
|
+
type_hints = typing.get_type_hints(func, include_extras=True)
|
|
574
|
+
except NameError as e:
|
|
575
|
+
# Forward reference could not be resolved - fall back to raw annotations
|
|
576
|
+
logger.debug(f"Could not resolve type hints for {func.__name__}: {e}")
|
|
577
|
+
type_hints = {}
|
|
578
|
+
except Exception as e:
|
|
579
|
+
# Unexpected error - log and fall back
|
|
580
|
+
logger.warning(f"Failed to resolve type hints for {func.__name__}: {e}")
|
|
581
|
+
type_hints = {}
|
|
582
|
+
|
|
583
|
+
# Helper to get the resolved type for a parameter
|
|
584
|
+
def get_param_type(param: inspect.Parameter) -> Any:
|
|
585
|
+
return type_hints.get(param.name, param.annotation)
|
|
586
|
+
|
|
552
587
|
# Flatten if there's a single unwrapped body parameter (BaseModel or Annotated[BaseModel, Body(embed=False)])
|
|
553
588
|
if len(params_list) == 1:
|
|
554
589
|
param = params_list[0]
|
|
555
|
-
param_type = param
|
|
590
|
+
param_type = get_param_type(param)
|
|
556
591
|
if is_unwrapped_body_param(param_type):
|
|
557
592
|
base_type = get_args(param_type)[0]
|
|
558
593
|
return {param.name: base_type(**body)}
|
|
@@ -563,16 +598,22 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
|
|
563
598
|
# Check if there's an unwrapped body parameter among multiple parameters
|
|
564
599
|
# (e.g., path param + body param like: vector_store_id: str, params: Annotated[Model, Body(...)])
|
|
565
600
|
unwrapped_body_param = None
|
|
601
|
+
unwrapped_body_param_type = None
|
|
602
|
+
body_param = None
|
|
566
603
|
for param in params_list:
|
|
567
|
-
|
|
604
|
+
param_type = get_param_type(param)
|
|
605
|
+
if is_unwrapped_body_param(param_type):
|
|
568
606
|
unwrapped_body_param = param
|
|
607
|
+
unwrapped_body_param_type = param_type
|
|
569
608
|
break
|
|
609
|
+
if body_param is None and is_body_param(param_type):
|
|
610
|
+
body_param = param
|
|
570
611
|
|
|
571
612
|
# Check for parameters with Depends() annotation (FastAPI router endpoints)
|
|
572
613
|
# These need special handling: construct the request model from body
|
|
573
614
|
depends_param = None
|
|
574
615
|
for param in params_list:
|
|
575
|
-
param_type = param
|
|
616
|
+
param_type = get_param_type(param)
|
|
576
617
|
if get_origin(param_type) is typing.Annotated:
|
|
577
618
|
args = get_args(param_type)
|
|
578
619
|
if len(args) > 1:
|
|
@@ -595,11 +636,12 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
|
|
595
636
|
if param_name in exclude_params:
|
|
596
637
|
converted_body[param_name] = value
|
|
597
638
|
else:
|
|
598
|
-
|
|
639
|
+
resolved_type = get_param_type(param)
|
|
640
|
+
converted_body[param_name] = convert_to_pydantic(resolved_type, value)
|
|
599
641
|
|
|
600
642
|
# Handle Depends parameter: construct request model from body
|
|
601
643
|
if depends_param and depends_param.name not in converted_body:
|
|
602
|
-
param_type = depends_param
|
|
644
|
+
param_type = get_param_type(depends_param)
|
|
603
645
|
if get_origin(param_type) is typing.Annotated:
|
|
604
646
|
base_type = get_args(param_type)[0]
|
|
605
647
|
# Handle Union types (e.g., SomeRequestModel | None) - extract the non-None type
|
|
@@ -619,10 +661,15 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
|
|
619
661
|
converted_body[depends_param.name] = base_type(**body)
|
|
620
662
|
|
|
621
663
|
# handle unwrapped body parameter after processing all named parameters
|
|
622
|
-
if unwrapped_body_param:
|
|
623
|
-
base_type = get_args(
|
|
664
|
+
if unwrapped_body_param and unwrapped_body_param_type:
|
|
665
|
+
base_type = get_args(unwrapped_body_param_type)[0]
|
|
624
666
|
# extract only keys not already used by other params
|
|
625
667
|
remaining_keys = {k: v for k, v in body.items() if k not in converted_body}
|
|
626
668
|
converted_body[unwrapped_body_param.name] = base_type(**remaining_keys)
|
|
669
|
+
elif body_param and body_param.name not in converted_body:
|
|
670
|
+
body_param_type = get_param_type(body_param)
|
|
671
|
+
base_type = get_args(body_param_type)[0]
|
|
672
|
+
remaining_keys = {k: v for k, v in body.items() if k not in converted_body}
|
|
673
|
+
converted_body[body_param.name] = base_type(**remaining_keys)
|
|
627
674
|
|
|
628
675
|
return converted_body
|
|
@@ -11,7 +11,17 @@ from pydantic import BaseModel
|
|
|
11
11
|
|
|
12
12
|
from llama_stack.core.datatypes import StackConfig
|
|
13
13
|
from llama_stack.core.storage.kvstore import KVStore, kvstore_impl
|
|
14
|
-
from llama_stack_api import
|
|
14
|
+
from llama_stack_api import (
|
|
15
|
+
CreatePromptRequest,
|
|
16
|
+
DeletePromptRequest,
|
|
17
|
+
GetPromptRequest,
|
|
18
|
+
ListPromptsResponse,
|
|
19
|
+
ListPromptVersionsRequest,
|
|
20
|
+
Prompt,
|
|
21
|
+
Prompts,
|
|
22
|
+
SetDefaultVersionRequest,
|
|
23
|
+
UpdatePromptRequest,
|
|
24
|
+
)
|
|
15
25
|
|
|
16
26
|
|
|
17
27
|
class PromptServiceConfig(BaseModel):
|
|
@@ -114,26 +124,23 @@ class PromptServiceImpl(Prompts):
|
|
|
114
124
|
prompts.sort(key=lambda p: p.prompt_id or "", reverse=True)
|
|
115
125
|
return ListPromptsResponse(data=prompts)
|
|
116
126
|
|
|
117
|
-
async def get_prompt(self,
|
|
127
|
+
async def get_prompt(self, request: GetPromptRequest) -> Prompt:
|
|
118
128
|
"""Get a prompt by its identifier and optional version."""
|
|
119
|
-
key = await self._get_prompt_key(prompt_id, version)
|
|
129
|
+
key = await self._get_prompt_key(request.prompt_id, request.version)
|
|
120
130
|
data = await self.kvstore.get(key)
|
|
121
131
|
if data is None:
|
|
122
|
-
raise ValueError(
|
|
132
|
+
raise ValueError(
|
|
133
|
+
f"Prompt {request.prompt_id}:{request.version if request.version else 'default'} not found"
|
|
134
|
+
)
|
|
123
135
|
return self._deserialize_prompt(data)
|
|
124
136
|
|
|
125
|
-
async def create_prompt(
|
|
126
|
-
self,
|
|
127
|
-
prompt: str,
|
|
128
|
-
variables: list[str] | None = None,
|
|
129
|
-
) -> Prompt:
|
|
137
|
+
async def create_prompt(self, request: CreatePromptRequest) -> Prompt:
|
|
130
138
|
"""Create a new prompt."""
|
|
131
|
-
if variables is None
|
|
132
|
-
variables = []
|
|
139
|
+
variables = request.variables if request.variables is not None else []
|
|
133
140
|
|
|
134
141
|
prompt_obj = Prompt(
|
|
135
142
|
prompt_id=Prompt.generate_prompt_id(),
|
|
136
|
-
prompt=prompt,
|
|
143
|
+
prompt=request.prompt,
|
|
137
144
|
version=1,
|
|
138
145
|
variables=variables,
|
|
139
146
|
)
|
|
@@ -147,55 +154,49 @@ class PromptServiceImpl(Prompts):
|
|
|
147
154
|
|
|
148
155
|
return prompt_obj
|
|
149
156
|
|
|
150
|
-
async def update_prompt(
|
|
151
|
-
self,
|
|
152
|
-
prompt_id: str,
|
|
153
|
-
prompt: str,
|
|
154
|
-
version: int,
|
|
155
|
-
variables: list[str] | None = None,
|
|
156
|
-
set_as_default: bool = True,
|
|
157
|
-
) -> Prompt:
|
|
157
|
+
async def update_prompt(self, request: UpdatePromptRequest) -> Prompt:
|
|
158
158
|
"""Update an existing prompt (increments version)."""
|
|
159
|
-
if version < 1:
|
|
159
|
+
if request.version < 1:
|
|
160
160
|
raise ValueError("Version must be >= 1")
|
|
161
|
-
if variables is None
|
|
162
|
-
variables = []
|
|
161
|
+
variables = request.variables if request.variables is not None else []
|
|
163
162
|
|
|
164
|
-
prompt_versions = await self.list_prompt_versions(prompt_id)
|
|
163
|
+
prompt_versions = await self.list_prompt_versions(ListPromptVersionsRequest(prompt_id=request.prompt_id))
|
|
165
164
|
latest_prompt = max(prompt_versions.data, key=lambda x: int(x.version))
|
|
166
165
|
|
|
167
|
-
if version and latest_prompt.version != version:
|
|
166
|
+
if request.version and latest_prompt.version != request.version:
|
|
168
167
|
raise ValueError(
|
|
169
|
-
f"'{version}' is not the latest prompt version for prompt_id='{prompt_id}'. Use the latest version '{latest_prompt.version}' in request."
|
|
168
|
+
f"'{request.version}' is not the latest prompt version for prompt_id='{request.prompt_id}'. Use the latest version '{latest_prompt.version}' in request."
|
|
170
169
|
)
|
|
171
170
|
|
|
172
|
-
current_version = latest_prompt.version if version is None else version
|
|
171
|
+
current_version = latest_prompt.version if request.version is None else request.version
|
|
173
172
|
new_version = current_version + 1
|
|
174
173
|
|
|
175
|
-
updated_prompt = Prompt(
|
|
174
|
+
updated_prompt = Prompt(
|
|
175
|
+
prompt_id=request.prompt_id, prompt=request.prompt, version=new_version, variables=variables
|
|
176
|
+
)
|
|
176
177
|
|
|
177
|
-
version_key = self._get_version_key(prompt_id, str(new_version))
|
|
178
|
+
version_key = self._get_version_key(request.prompt_id, str(new_version))
|
|
178
179
|
data = self._serialize_prompt(updated_prompt)
|
|
179
180
|
await self.kvstore.set(version_key, data)
|
|
180
181
|
|
|
181
|
-
if set_as_default:
|
|
182
|
-
await self.set_default_version(prompt_id, new_version)
|
|
182
|
+
if request.set_as_default:
|
|
183
|
+
await self.set_default_version(SetDefaultVersionRequest(prompt_id=request.prompt_id, version=new_version))
|
|
183
184
|
|
|
184
185
|
return updated_prompt
|
|
185
186
|
|
|
186
|
-
async def delete_prompt(self,
|
|
187
|
+
async def delete_prompt(self, request: DeletePromptRequest) -> None:
|
|
187
188
|
"""Delete a prompt and all its versions."""
|
|
188
|
-
await self.get_prompt(prompt_id)
|
|
189
|
+
await self.get_prompt(GetPromptRequest(prompt_id=request.prompt_id))
|
|
189
190
|
|
|
190
|
-
prefix = f"prompts:v1:{prompt_id}:"
|
|
191
|
+
prefix = f"prompts:v1:{request.prompt_id}:"
|
|
191
192
|
keys = await self.kvstore.keys_in_range(prefix, prefix + "\xff")
|
|
192
193
|
|
|
193
194
|
for key in keys:
|
|
194
195
|
await self.kvstore.delete(key)
|
|
195
196
|
|
|
196
|
-
async def list_prompt_versions(self,
|
|
197
|
+
async def list_prompt_versions(self, request: ListPromptVersionsRequest) -> ListPromptsResponse:
|
|
197
198
|
"""List all versions of a specific prompt."""
|
|
198
|
-
prefix = f"prompts:v1:{prompt_id}:"
|
|
199
|
+
prefix = f"prompts:v1:{request.prompt_id}:"
|
|
199
200
|
keys = await self.kvstore.keys_in_range(prefix, prefix + "\xff")
|
|
200
201
|
|
|
201
202
|
default_version = None
|
|
@@ -211,7 +212,7 @@ class PromptServiceImpl(Prompts):
|
|
|
211
212
|
prompts.append(prompt_obj)
|
|
212
213
|
|
|
213
214
|
if not prompts:
|
|
214
|
-
raise ValueError(f"Prompt {prompt_id} not found")
|
|
215
|
+
raise ValueError(f"Prompt {request.prompt_id} not found")
|
|
215
216
|
|
|
216
217
|
for prompt in prompts:
|
|
217
218
|
prompt.is_default = str(prompt.version) == default_version
|
|
@@ -219,15 +220,15 @@ class PromptServiceImpl(Prompts):
|
|
|
219
220
|
prompts.sort(key=lambda x: x.version)
|
|
220
221
|
return ListPromptsResponse(data=prompts)
|
|
221
222
|
|
|
222
|
-
async def set_default_version(self,
|
|
223
|
+
async def set_default_version(self, request: SetDefaultVersionRequest) -> Prompt:
|
|
223
224
|
"""Set which version of a prompt should be the default, If not set. the default is the latest."""
|
|
224
|
-
version_key = self._get_version_key(prompt_id, str(version))
|
|
225
|
+
version_key = self._get_version_key(request.prompt_id, str(request.version))
|
|
225
226
|
data = await self.kvstore.get(version_key)
|
|
226
227
|
if data is None:
|
|
227
|
-
raise ValueError(f"Prompt {prompt_id} version {version} not found")
|
|
228
|
+
raise ValueError(f"Prompt {request.prompt_id} version {request.version} not found")
|
|
228
229
|
|
|
229
|
-
default_key = self._get_default_key(prompt_id)
|
|
230
|
-
await self.kvstore.set(default_key, str(version))
|
|
230
|
+
default_key = self._get_default_key(request.prompt_id)
|
|
231
|
+
await self.kvstore.set(default_key, str(request.version))
|
|
231
232
|
|
|
232
233
|
return self._deserialize_prompt(data)
|
|
233
234
|
|
|
@@ -7,7 +7,15 @@
|
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
9
|
from llama_stack.log import get_logger
|
|
10
|
-
from llama_stack_api import
|
|
10
|
+
from llama_stack_api import (
|
|
11
|
+
AppendRowsRequest,
|
|
12
|
+
DatasetIO,
|
|
13
|
+
DatasetPurpose,
|
|
14
|
+
DataSource,
|
|
15
|
+
IterRowsRequest,
|
|
16
|
+
PaginatedResponse,
|
|
17
|
+
RoutingTable,
|
|
18
|
+
)
|
|
11
19
|
|
|
12
20
|
logger = get_logger(name=__name__, category="core::routers")
|
|
13
21
|
|
|
@@ -45,26 +53,21 @@ class DatasetIORouter(DatasetIO):
|
|
|
45
53
|
dataset_id=dataset_id,
|
|
46
54
|
)
|
|
47
55
|
|
|
48
|
-
async def iterrows(
|
|
49
|
-
self,
|
|
50
|
-
dataset_id: str,
|
|
51
|
-
start_index: int | None = None,
|
|
52
|
-
limit: int | None = None,
|
|
53
|
-
) -> PaginatedResponse:
|
|
56
|
+
async def iterrows(self, request: IterRowsRequest) -> PaginatedResponse:
|
|
54
57
|
logger.debug(
|
|
55
|
-
f"DatasetIORouter.iterrows: {dataset_id}, {start_index
|
|
58
|
+
f"DatasetIORouter.iterrows: {request.dataset_id}, start_index={request.start_index} limit={request.limit}",
|
|
56
59
|
)
|
|
57
|
-
provider = await self.routing_table.get_provider_impl(dataset_id)
|
|
60
|
+
provider = await self.routing_table.get_provider_impl(request.dataset_id)
|
|
58
61
|
return await provider.iterrows(
|
|
59
|
-
dataset_id=dataset_id,
|
|
60
|
-
start_index=start_index,
|
|
61
|
-
limit=limit,
|
|
62
|
+
dataset_id=request.dataset_id,
|
|
63
|
+
start_index=request.start_index,
|
|
64
|
+
limit=request.limit,
|
|
62
65
|
)
|
|
63
66
|
|
|
64
|
-
async def append_rows(self,
|
|
65
|
-
logger.debug(f"DatasetIORouter.append_rows: {dataset_id}, {len(rows)} rows")
|
|
66
|
-
provider = await self.routing_table.get_provider_impl(dataset_id)
|
|
67
|
+
async def append_rows(self, request: AppendRowsRequest) -> None:
|
|
68
|
+
logger.debug(f"DatasetIORouter.append_rows: {request.dataset_id}, {len(request.rows)} rows")
|
|
69
|
+
provider = await self.routing_table.get_provider_impl(request.dataset_id)
|
|
67
70
|
return await provider.append_rows(
|
|
68
|
-
dataset_id=dataset_id,
|
|
69
|
-
rows=rows,
|
|
71
|
+
dataset_id=request.dataset_id,
|
|
72
|
+
rows=request.rows,
|
|
70
73
|
)
|