llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +235 -62
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +46 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +44 -33
- llama_stack/providers/registry/agents.py +8 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +131 -23
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +37 -28
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +37 -25
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +147 -30
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +31 -26
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +7 -8
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/providers/utils/vector_io/__init__.py +16 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +36 -0
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/METADATA +57 -55
- llama_stack-0.4.1.dist-info/RECORD +588 -0
- llama_stack-0.4.1.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.5.dist-info/RECORD +0 -625
- llama_stack-0.3.5.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
llama_stack/core/stack.py
CHANGED
|
@@ -6,36 +6,18 @@
|
|
|
6
6
|
|
|
7
7
|
import asyncio
|
|
8
8
|
import importlib.resources
|
|
9
|
+
import inspect
|
|
9
10
|
import os
|
|
10
11
|
import re
|
|
11
12
|
import tempfile
|
|
12
|
-
from typing import Any
|
|
13
|
+
from typing import Any, get_type_hints
|
|
13
14
|
|
|
14
15
|
import yaml
|
|
16
|
+
from pydantic import BaseModel
|
|
15
17
|
|
|
16
|
-
from llama_stack.
|
|
17
|
-
from llama_stack.apis.benchmarks import Benchmarks
|
|
18
|
-
from llama_stack.apis.conversations import Conversations
|
|
19
|
-
from llama_stack.apis.datasetio import DatasetIO
|
|
20
|
-
from llama_stack.apis.datasets import Datasets
|
|
21
|
-
from llama_stack.apis.eval import Eval
|
|
22
|
-
from llama_stack.apis.files import Files
|
|
23
|
-
from llama_stack.apis.inference import Inference
|
|
24
|
-
from llama_stack.apis.inspect import Inspect
|
|
25
|
-
from llama_stack.apis.models import Models
|
|
26
|
-
from llama_stack.apis.post_training import PostTraining
|
|
27
|
-
from llama_stack.apis.prompts import Prompts
|
|
28
|
-
from llama_stack.apis.providers import Providers
|
|
29
|
-
from llama_stack.apis.safety import Safety
|
|
30
|
-
from llama_stack.apis.scoring import Scoring
|
|
31
|
-
from llama_stack.apis.scoring_functions import ScoringFunctions
|
|
32
|
-
from llama_stack.apis.shields import Shields
|
|
33
|
-
from llama_stack.apis.synthetic_data_generation import SyntheticDataGeneration
|
|
34
|
-
from llama_stack.apis.telemetry import Telemetry
|
|
35
|
-
from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime
|
|
36
|
-
from llama_stack.apis.vector_io import VectorIO
|
|
18
|
+
from llama_stack.core.admin import AdminImpl, AdminImplConfig
|
|
37
19
|
from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
|
|
38
|
-
from llama_stack.core.datatypes import Provider,
|
|
20
|
+
from llama_stack.core.datatypes import Provider, QualifiedModel, SafetyConfig, StackConfig, VectorStoresConfig
|
|
39
21
|
from llama_stack.core.distribution import get_provider_registry
|
|
40
22
|
from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl
|
|
41
23
|
from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceImpl
|
|
@@ -55,7 +37,30 @@ from llama_stack.core.storage.datatypes import (
|
|
|
55
37
|
from llama_stack.core.store.registry import create_dist_registry
|
|
56
38
|
from llama_stack.core.utils.dynamic import instantiate_class_type
|
|
57
39
|
from llama_stack.log import get_logger
|
|
58
|
-
from
|
|
40
|
+
from llama_stack_api import (
|
|
41
|
+
Agents,
|
|
42
|
+
Api,
|
|
43
|
+
Batches,
|
|
44
|
+
Benchmarks,
|
|
45
|
+
Conversations,
|
|
46
|
+
DatasetIO,
|
|
47
|
+
Datasets,
|
|
48
|
+
Eval,
|
|
49
|
+
Files,
|
|
50
|
+
Inference,
|
|
51
|
+
Inspect,
|
|
52
|
+
Models,
|
|
53
|
+
PostTraining,
|
|
54
|
+
Prompts,
|
|
55
|
+
Providers,
|
|
56
|
+
Safety,
|
|
57
|
+
Scoring,
|
|
58
|
+
ScoringFunctions,
|
|
59
|
+
Shields,
|
|
60
|
+
ToolGroups,
|
|
61
|
+
ToolRuntime,
|
|
62
|
+
VectorIO,
|
|
63
|
+
)
|
|
59
64
|
|
|
60
65
|
logger = get_logger(name=__name__, category="core")
|
|
61
66
|
|
|
@@ -64,10 +69,9 @@ class LlamaStack(
|
|
|
64
69
|
Providers,
|
|
65
70
|
Inference,
|
|
66
71
|
Agents,
|
|
72
|
+
Batches,
|
|
67
73
|
Safety,
|
|
68
|
-
SyntheticDataGeneration,
|
|
69
74
|
Datasets,
|
|
70
|
-
Telemetry,
|
|
71
75
|
PostTraining,
|
|
72
76
|
VectorIO,
|
|
73
77
|
Eval,
|
|
@@ -80,7 +84,6 @@ class LlamaStack(
|
|
|
80
84
|
Inspect,
|
|
81
85
|
ToolGroups,
|
|
82
86
|
ToolRuntime,
|
|
83
|
-
RAGToolRuntime,
|
|
84
87
|
Files,
|
|
85
88
|
Prompts,
|
|
86
89
|
Conversations,
|
|
@@ -107,8 +110,95 @@ REGISTRY_REFRESH_INTERVAL_SECONDS = 300
|
|
|
107
110
|
REGISTRY_REFRESH_TASK = None
|
|
108
111
|
TEST_RECORDING_CONTEXT = None
|
|
109
112
|
|
|
113
|
+
# ID fields for registered resources that should trigger skipping
|
|
114
|
+
# when they resolve to empty/None (from conditional env vars like :+)
|
|
115
|
+
RESOURCE_ID_FIELDS = [
|
|
116
|
+
"vector_store_id",
|
|
117
|
+
"model_id",
|
|
118
|
+
"shield_id",
|
|
119
|
+
"dataset_id",
|
|
120
|
+
"scoring_fn_id",
|
|
121
|
+
"benchmark_id",
|
|
122
|
+
"toolgroup_id",
|
|
123
|
+
]
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def is_request_model(t: Any) -> bool:
|
|
127
|
+
"""Check if a type is a request model (Pydantic BaseModel).
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
t: The type to check
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
True if the type is a Pydantic BaseModel subclass, False otherwise
|
|
134
|
+
"""
|
|
135
|
+
|
|
136
|
+
return inspect.isclass(t) and issubclass(t, BaseModel)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
async def invoke_with_optional_request(method: Any) -> Any:
|
|
140
|
+
"""Invoke a method, automatically creating a request instance if needed.
|
|
141
|
+
|
|
142
|
+
For APIs that use request models, this will create an empty request object.
|
|
143
|
+
For backward compatibility, falls back to calling without arguments.
|
|
144
|
+
|
|
145
|
+
Uses get_type_hints() to resolve forward references (e.g., "ListBenchmarksRequest" -> actual class).
|
|
146
|
+
|
|
147
|
+
Handles methods with:
|
|
148
|
+
- No parameters: calls without arguments
|
|
149
|
+
- One or more request model parameters: creates empty instances for each
|
|
150
|
+
- Mixed parameters: creates request models, uses defaults for others
|
|
151
|
+
- Required non-request-model parameters without defaults: falls back to calling without arguments
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
method: The method to invoke
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
The result of calling the method
|
|
158
|
+
"""
|
|
159
|
+
try:
|
|
160
|
+
hints = get_type_hints(method)
|
|
161
|
+
except Exception:
|
|
162
|
+
# Forward references can't be resolved, fall back to calling without request
|
|
163
|
+
return await method()
|
|
110
164
|
|
|
111
|
-
|
|
165
|
+
params = list(inspect.signature(method).parameters.values())
|
|
166
|
+
params = [p for p in params if p.name != "self"]
|
|
167
|
+
|
|
168
|
+
if not params:
|
|
169
|
+
return await method()
|
|
170
|
+
|
|
171
|
+
# Build arguments for the method call
|
|
172
|
+
args: dict[str, Any] = {}
|
|
173
|
+
can_call = True
|
|
174
|
+
|
|
175
|
+
for param in params:
|
|
176
|
+
param_type = hints.get(param.name)
|
|
177
|
+
|
|
178
|
+
# If it's a request model, try to create an empty instance
|
|
179
|
+
if param_type and is_request_model(param_type):
|
|
180
|
+
try:
|
|
181
|
+
args[param.name] = param_type()
|
|
182
|
+
except Exception:
|
|
183
|
+
# Request model requires arguments, can't create empty instance
|
|
184
|
+
can_call = False
|
|
185
|
+
break
|
|
186
|
+
# If it has a default value, we can skip it (will use default)
|
|
187
|
+
elif param.default != inspect.Parameter.empty:
|
|
188
|
+
continue
|
|
189
|
+
# Required parameter that's not a request model - can't provide it
|
|
190
|
+
else:
|
|
191
|
+
can_call = False
|
|
192
|
+
break
|
|
193
|
+
|
|
194
|
+
if can_call and args:
|
|
195
|
+
return await method(**args)
|
|
196
|
+
|
|
197
|
+
# Fall back to calling without arguments for backward compatibility
|
|
198
|
+
return await method()
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
async def register_resources(run_config: StackConfig, impls: dict[Api, Any]):
|
|
112
202
|
for rsrc, api, register_method, list_method in RESOURCES:
|
|
113
203
|
objects = getattr(run_config.registered_resources, rsrc)
|
|
114
204
|
if api not in impls:
|
|
@@ -129,7 +219,7 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
|
|
|
129
219
|
await method(**{k: getattr(obj, k) for k in obj.model_dump().keys()})
|
|
130
220
|
|
|
131
221
|
method = getattr(impls[api], list_method)
|
|
132
|
-
response = await method
|
|
222
|
+
response = await invoke_with_optional_request(method)
|
|
133
223
|
|
|
134
224
|
objects_to_process = response.data if hasattr(response, "data") else response
|
|
135
225
|
|
|
@@ -144,35 +234,93 @@ async def validate_vector_stores_config(vector_stores_config: VectorStoresConfig
|
|
|
144
234
|
if vector_stores_config is None:
|
|
145
235
|
return
|
|
146
236
|
|
|
147
|
-
|
|
148
|
-
if default_embedding_model is None:
|
|
149
|
-
|
|
237
|
+
# Validate default embedding model
|
|
238
|
+
if vector_stores_config.default_embedding_model is not None:
|
|
239
|
+
await _validate_embedding_model(vector_stores_config.default_embedding_model, impls)
|
|
240
|
+
|
|
241
|
+
# Validate rewrite query params
|
|
242
|
+
if vector_stores_config.rewrite_query_params:
|
|
243
|
+
if vector_stores_config.rewrite_query_params.model:
|
|
244
|
+
await _validate_rewrite_query_model(vector_stores_config.rewrite_query_params.model, impls)
|
|
150
245
|
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
246
|
+
|
|
247
|
+
async def _validate_embedding_model(embedding_model: QualifiedModel, impls: dict[Api, Any]) -> None:
|
|
248
|
+
"""Validate that an embedding model exists and has required metadata."""
|
|
249
|
+
provider_id = embedding_model.provider_id
|
|
250
|
+
model_id = embedding_model.model_id
|
|
251
|
+
model_identifier = f"{provider_id}/{model_id}"
|
|
154
252
|
|
|
155
253
|
if Api.models not in impls:
|
|
156
|
-
raise ValueError(f"Models API is not available but vector_stores config requires model '{
|
|
254
|
+
raise ValueError(f"Models API is not available but vector_stores config requires model '{model_identifier}'")
|
|
157
255
|
|
|
158
256
|
models_impl = impls[Api.models]
|
|
159
257
|
response = await models_impl.list_models()
|
|
160
258
|
models_list = {m.identifier: m for m in response.data if m.model_type == "embedding"}
|
|
161
259
|
|
|
162
|
-
|
|
163
|
-
if
|
|
164
|
-
raise ValueError(
|
|
260
|
+
model = models_list.get(model_identifier)
|
|
261
|
+
if model is None:
|
|
262
|
+
raise ValueError(
|
|
263
|
+
f"Embedding model '{model_identifier}' not found. Available embedding models: {list(models_list.keys())}"
|
|
264
|
+
)
|
|
165
265
|
|
|
166
|
-
embedding_dimension =
|
|
266
|
+
embedding_dimension = model.metadata.get("embedding_dimension")
|
|
167
267
|
if embedding_dimension is None:
|
|
168
|
-
raise ValueError(f"Embedding model '{
|
|
268
|
+
raise ValueError(f"Embedding model '{model_identifier}' is missing 'embedding_dimension' in metadata")
|
|
169
269
|
|
|
170
270
|
try:
|
|
171
271
|
int(embedding_dimension)
|
|
172
272
|
except ValueError as err:
|
|
173
273
|
raise ValueError(f"Embedding dimension '{embedding_dimension}' cannot be converted to an integer") from err
|
|
174
274
|
|
|
175
|
-
logger.debug(f"Validated
|
|
275
|
+
logger.debug(f"Validated embedding model: {model_identifier} (dimension: {embedding_dimension})")
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
async def _validate_rewrite_query_model(rewrite_query_model: QualifiedModel, impls: dict[Api, Any]) -> None:
|
|
279
|
+
"""Validate that a rewrite query model exists and is accessible."""
|
|
280
|
+
provider_id = rewrite_query_model.provider_id
|
|
281
|
+
model_id = rewrite_query_model.model_id
|
|
282
|
+
model_identifier = f"{provider_id}/{model_id}"
|
|
283
|
+
|
|
284
|
+
if Api.models not in impls:
|
|
285
|
+
raise ValueError(
|
|
286
|
+
f"Models API is not available but vector_stores config requires rewrite query model '{model_identifier}'"
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
models_impl = impls[Api.models]
|
|
290
|
+
response = await models_impl.list_models()
|
|
291
|
+
llm_models_list = {m.identifier: m for m in response.data if m.model_type == "llm"}
|
|
292
|
+
|
|
293
|
+
model = llm_models_list.get(model_identifier)
|
|
294
|
+
if model is None:
|
|
295
|
+
raise ValueError(
|
|
296
|
+
f"Rewrite query model '{model_identifier}' not found. Available LLM models: {list(llm_models_list.keys())}"
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
logger.debug(f"Validated rewrite query model: {model_identifier}")
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
async def validate_safety_config(safety_config: SafetyConfig | None, impls: dict[Api, Any]):
|
|
303
|
+
if safety_config is None or safety_config.default_shield_id is None:
|
|
304
|
+
return
|
|
305
|
+
|
|
306
|
+
if Api.shields not in impls:
|
|
307
|
+
raise ValueError("Safety configuration requires the shields API to be enabled")
|
|
308
|
+
|
|
309
|
+
if Api.safety not in impls:
|
|
310
|
+
raise ValueError("Safety configuration requires the safety API to be enabled")
|
|
311
|
+
|
|
312
|
+
shields_impl = impls[Api.shields]
|
|
313
|
+
response = await shields_impl.list_shields()
|
|
314
|
+
shields_by_id = {shield.identifier: shield for shield in response.data}
|
|
315
|
+
|
|
316
|
+
default_shield_id = safety_config.default_shield_id
|
|
317
|
+
# don't validate if there are no shields registered
|
|
318
|
+
if shields_by_id and default_shield_id not in shields_by_id:
|
|
319
|
+
available = sorted(shields_by_id)
|
|
320
|
+
raise ValueError(
|
|
321
|
+
f"Configured default_shield_id '{default_shield_id}' not found among registered shields."
|
|
322
|
+
f" Available shields: {available}"
|
|
323
|
+
)
|
|
176
324
|
|
|
177
325
|
|
|
178
326
|
class EnvVarError(Exception):
|
|
@@ -210,15 +358,33 @@ def replace_env_vars(config: Any, path: str = "") -> Any:
|
|
|
210
358
|
logger.debug(
|
|
211
359
|
f"Skipping config env variable expansion for disabled provider: {v.get('provider_id', '')}"
|
|
212
360
|
)
|
|
213
|
-
# Create a copy with resolved provider_id but original config
|
|
214
|
-
disabled_provider = v.copy()
|
|
215
|
-
disabled_provider["provider_id"] = resolved_provider_id
|
|
216
361
|
continue
|
|
217
362
|
except EnvVarError:
|
|
218
363
|
# If we can't resolve the provider_id, continue with normal processing
|
|
219
364
|
pass
|
|
220
365
|
|
|
221
|
-
#
|
|
366
|
+
# Special handling for registered resources: check if ID field resolves to empty/None
|
|
367
|
+
# from conditional env vars (e.g., ${env.VAR:+value}) and skip the entry if so
|
|
368
|
+
if isinstance(v, dict):
|
|
369
|
+
should_skip = False
|
|
370
|
+
for id_field in RESOURCE_ID_FIELDS:
|
|
371
|
+
if id_field in v:
|
|
372
|
+
try:
|
|
373
|
+
resolved_id = replace_env_vars(v[id_field], f"{path}[{i}].{id_field}")
|
|
374
|
+
if resolved_id is None or resolved_id == "":
|
|
375
|
+
logger.debug(
|
|
376
|
+
f"Skipping {path}[{i}] with empty {id_field} (conditional env var not set)"
|
|
377
|
+
)
|
|
378
|
+
should_skip = True
|
|
379
|
+
break
|
|
380
|
+
except EnvVarError as e:
|
|
381
|
+
logger.warning(
|
|
382
|
+
f"Could not resolve {id_field} in {path}[{i}], env var '{e.var_name}': {e}"
|
|
383
|
+
)
|
|
384
|
+
if should_skip:
|
|
385
|
+
continue
|
|
386
|
+
|
|
387
|
+
# Normal processing
|
|
222
388
|
result.append(replace_env_vars(v, f"{path}[{i}]"))
|
|
223
389
|
except EnvVarError as e:
|
|
224
390
|
raise EnvVarError(e.var_name, e.path) from None
|
|
@@ -317,39 +483,44 @@ def cast_image_name_to_string(config_dict: dict[str, Any]) -> dict[str, Any]:
|
|
|
317
483
|
return config_dict
|
|
318
484
|
|
|
319
485
|
|
|
320
|
-
def add_internal_implementations(impls: dict[Api, Any],
|
|
321
|
-
"""Add internal implementations (inspect and
|
|
322
|
-
|
|
486
|
+
def add_internal_implementations(impls: dict[Api, Any], config: StackConfig) -> None:
|
|
487
|
+
"""Add internal implementations (inspect, providers, and admin) to the implementations dictionary.
|
|
323
488
|
Args:
|
|
324
489
|
impls: Dictionary of API implementations
|
|
325
490
|
run_config: Stack run configuration
|
|
326
491
|
"""
|
|
327
492
|
inspect_impl = DistributionInspectImpl(
|
|
328
|
-
DistributionInspectConfig(
|
|
493
|
+
DistributionInspectConfig(config=config),
|
|
329
494
|
deps=impls,
|
|
330
495
|
)
|
|
331
496
|
impls[Api.inspect] = inspect_impl
|
|
332
497
|
|
|
333
498
|
providers_impl = ProviderImpl(
|
|
334
|
-
ProviderImplConfig(
|
|
499
|
+
ProviderImplConfig(config=config),
|
|
335
500
|
deps=impls,
|
|
336
501
|
)
|
|
337
502
|
impls[Api.providers] = providers_impl
|
|
338
503
|
|
|
504
|
+
admin_impl = AdminImpl(
|
|
505
|
+
AdminImplConfig(config=config),
|
|
506
|
+
deps=impls,
|
|
507
|
+
)
|
|
508
|
+
impls[Api.admin] = admin_impl
|
|
509
|
+
|
|
339
510
|
prompts_impl = PromptServiceImpl(
|
|
340
|
-
PromptServiceConfig(
|
|
511
|
+
PromptServiceConfig(config=config),
|
|
341
512
|
deps=impls,
|
|
342
513
|
)
|
|
343
514
|
impls[Api.prompts] = prompts_impl
|
|
344
515
|
|
|
345
516
|
conversations_impl = ConversationServiceImpl(
|
|
346
|
-
ConversationServiceConfig(
|
|
517
|
+
ConversationServiceConfig(config=config),
|
|
347
518
|
deps=impls,
|
|
348
519
|
)
|
|
349
520
|
impls[Api.conversations] = conversations_impl
|
|
350
521
|
|
|
351
522
|
|
|
352
|
-
def _initialize_storage(run_config:
|
|
523
|
+
def _initialize_storage(run_config: StackConfig):
|
|
353
524
|
kv_backends: dict[str, StorageBackendConfig] = {}
|
|
354
525
|
sql_backends: dict[str, StorageBackendConfig] = {}
|
|
355
526
|
for backend_name, backend_config in run_config.storage.backends.items():
|
|
@@ -361,15 +532,15 @@ def _initialize_storage(run_config: StackRunConfig):
|
|
|
361
532
|
else:
|
|
362
533
|
raise ValueError(f"Unknown storage backend type: {type}")
|
|
363
534
|
|
|
364
|
-
from llama_stack.
|
|
365
|
-
from llama_stack.
|
|
535
|
+
from llama_stack.core.storage.kvstore.kvstore import register_kvstore_backends
|
|
536
|
+
from llama_stack.core.storage.sqlstore.sqlstore import register_sqlstore_backends
|
|
366
537
|
|
|
367
538
|
register_kvstore_backends(kv_backends)
|
|
368
539
|
register_sqlstore_backends(sql_backends)
|
|
369
540
|
|
|
370
541
|
|
|
371
542
|
class Stack:
|
|
372
|
-
def __init__(self, run_config:
|
|
543
|
+
def __init__(self, run_config: StackConfig, provider_registry: ProviderRegistry | None = None):
|
|
373
544
|
self.run_config = run_config
|
|
374
545
|
self.provider_registry = provider_registry
|
|
375
546
|
self.impls = None
|
|
@@ -412,6 +583,7 @@ class Stack:
|
|
|
412
583
|
await register_resources(self.run_config, impls)
|
|
413
584
|
await refresh_registry_once(impls)
|
|
414
585
|
await validate_vector_stores_config(self.run_config.vector_stores, impls)
|
|
586
|
+
await validate_safety_config(self.run_config.safety, impls)
|
|
415
587
|
self.impls = impls
|
|
416
588
|
|
|
417
589
|
def create_registry_refresh_task(self):
|
|
@@ -474,20 +646,20 @@ async def refresh_registry_task(impls: dict[Api, Any]):
|
|
|
474
646
|
await asyncio.sleep(REGISTRY_REFRESH_INTERVAL_SECONDS)
|
|
475
647
|
|
|
476
648
|
|
|
477
|
-
def get_stack_run_config_from_distro(distro: str) ->
|
|
478
|
-
distro_path = importlib.resources.files("llama_stack") / f"distributions/{distro}/
|
|
649
|
+
def get_stack_run_config_from_distro(distro: str) -> StackConfig:
|
|
650
|
+
distro_path = importlib.resources.files("llama_stack") / f"distributions/{distro}/config.yaml"
|
|
479
651
|
|
|
480
652
|
with importlib.resources.as_file(distro_path) as path:
|
|
481
653
|
if not path.exists():
|
|
482
654
|
raise ValueError(f"Distribution '{distro}' not found at {distro_path}")
|
|
483
655
|
run_config = yaml.safe_load(path.open())
|
|
484
656
|
|
|
485
|
-
return
|
|
657
|
+
return StackConfig(**replace_env_vars(run_config))
|
|
486
658
|
|
|
487
659
|
|
|
488
660
|
def run_config_from_adhoc_config_spec(
|
|
489
661
|
adhoc_config_spec: str, provider_registry: ProviderRegistry | None = None
|
|
490
|
-
) ->
|
|
662
|
+
) -> StackConfig:
|
|
491
663
|
"""
|
|
492
664
|
Create an adhoc distribution from a list of API providers.
|
|
493
665
|
|
|
@@ -527,7 +699,7 @@ def run_config_from_adhoc_config_spec(
|
|
|
527
699
|
config=provider_config,
|
|
528
700
|
)
|
|
529
701
|
]
|
|
530
|
-
config =
|
|
702
|
+
config = StackConfig(
|
|
531
703
|
image_name="distro-test",
|
|
532
704
|
apis=list(provider_configs_by_api.keys()),
|
|
533
705
|
providers=provider_configs_by_api,
|
|
@@ -540,6 +712,7 @@ def run_config_from_adhoc_config_spec(
|
|
|
540
712
|
metadata=KVStoreReference(backend="kv_default", namespace="registry"),
|
|
541
713
|
inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
|
|
542
714
|
conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"),
|
|
715
|
+
prompts=KVStoreReference(backend="kv_default", namespace="prompts"),
|
|
543
716
|
),
|
|
544
717
|
),
|
|
545
718
|
)
|
|
@@ -12,6 +12,8 @@ from typing import Annotated, Literal
|
|
|
12
12
|
|
|
13
13
|
from pydantic import BaseModel, Field, field_validator
|
|
14
14
|
|
|
15
|
+
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
|
|
16
|
+
|
|
15
17
|
|
|
16
18
|
class StorageBackendType(StrEnum):
|
|
17
19
|
KV_REDIS = "kv_redis"
|
|
@@ -256,25 +258,46 @@ class ResponsesStoreReference(InferenceStoreReference):
|
|
|
256
258
|
|
|
257
259
|
class ServerStoresConfig(BaseModel):
|
|
258
260
|
metadata: KVStoreReference | None = Field(
|
|
259
|
-
default=
|
|
261
|
+
default=KVStoreReference(
|
|
262
|
+
backend="kv_default",
|
|
263
|
+
namespace="registry",
|
|
264
|
+
),
|
|
260
265
|
description="Metadata store configuration (uses KV backend)",
|
|
261
266
|
)
|
|
262
267
|
inference: InferenceStoreReference | None = Field(
|
|
263
|
-
default=
|
|
268
|
+
default=InferenceStoreReference(
|
|
269
|
+
backend="sql_default",
|
|
270
|
+
table_name="inference_store",
|
|
271
|
+
),
|
|
264
272
|
description="Inference store configuration (uses SQL backend)",
|
|
265
273
|
)
|
|
266
274
|
conversations: SqlStoreReference | None = Field(
|
|
267
|
-
default=
|
|
275
|
+
default=SqlStoreReference(
|
|
276
|
+
backend="sql_default",
|
|
277
|
+
table_name="openai_conversations",
|
|
278
|
+
),
|
|
268
279
|
description="Conversations store configuration (uses SQL backend)",
|
|
269
280
|
)
|
|
270
281
|
responses: ResponsesStoreReference | None = Field(
|
|
271
282
|
default=None,
|
|
272
283
|
description="Responses store configuration (uses SQL backend)",
|
|
273
284
|
)
|
|
285
|
+
prompts: KVStoreReference | None = Field(
|
|
286
|
+
default=KVStoreReference(backend="kv_default", namespace="prompts"),
|
|
287
|
+
description="Prompts store configuration (uses KV backend)",
|
|
288
|
+
)
|
|
274
289
|
|
|
275
290
|
|
|
276
291
|
class StorageConfig(BaseModel):
|
|
277
292
|
backends: dict[str, StorageBackendConfig] = Field(
|
|
293
|
+
default={
|
|
294
|
+
"kv_default": SqliteKVStoreConfig(
|
|
295
|
+
db_path=f"${{env.SQLITE_STORE_DIR:={DISTRIBS_BASE_DIR}}}/kvstore.db",
|
|
296
|
+
),
|
|
297
|
+
"sql_default": SqliteSqlStoreConfig(
|
|
298
|
+
db_path=f"${{env.SQLITE_STORE_DIR:={DISTRIBS_BASE_DIR}}}/sql_store.db",
|
|
299
|
+
),
|
|
300
|
+
},
|
|
278
301
|
description="Named backend configurations (e.g., 'default', 'cache')",
|
|
279
302
|
)
|
|
280
303
|
stores: ServerStoresConfig = Field(
|
|
@@ -11,10 +11,21 @@
|
|
|
11
11
|
|
|
12
12
|
from __future__ import annotations
|
|
13
13
|
|
|
14
|
-
|
|
14
|
+
import asyncio
|
|
15
|
+
from collections import defaultdict
|
|
16
|
+
from datetime import datetime
|
|
17
|
+
from typing import cast
|
|
15
18
|
|
|
16
|
-
from .
|
|
17
|
-
from .
|
|
19
|
+
from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendConfig
|
|
20
|
+
from llama_stack_api.internal.kvstore import KVStore
|
|
21
|
+
|
|
22
|
+
from .config import (
|
|
23
|
+
KVStoreConfig,
|
|
24
|
+
MongoDBKVStoreConfig,
|
|
25
|
+
PostgresKVStoreConfig,
|
|
26
|
+
RedisKVStoreConfig,
|
|
27
|
+
SqliteKVStoreConfig,
|
|
28
|
+
)
|
|
18
29
|
|
|
19
30
|
|
|
20
31
|
def kvstore_dependencies():
|
|
@@ -30,7 +41,7 @@ def kvstore_dependencies():
|
|
|
30
41
|
|
|
31
42
|
class InmemoryKVStoreImpl(KVStore):
|
|
32
43
|
def __init__(self):
|
|
33
|
-
self._store = {}
|
|
44
|
+
self._store: dict[str, str] = {}
|
|
34
45
|
|
|
35
46
|
async def initialize(self) -> None:
|
|
36
47
|
pass
|
|
@@ -38,7 +49,7 @@ class InmemoryKVStoreImpl(KVStore):
|
|
|
38
49
|
async def get(self, key: str) -> str | None:
|
|
39
50
|
return self._store.get(key)
|
|
40
51
|
|
|
41
|
-
async def set(self, key: str, value: str) -> None:
|
|
52
|
+
async def set(self, key: str, value: str, expiration: datetime | None = None) -> None:
|
|
42
53
|
self._store[key] = value
|
|
43
54
|
|
|
44
55
|
async def values_in_range(self, start_key: str, end_key: str) -> list[str]:
|
|
@@ -53,45 +64,65 @@ class InmemoryKVStoreImpl(KVStore):
|
|
|
53
64
|
|
|
54
65
|
|
|
55
66
|
_KVSTORE_BACKENDS: dict[str, KVStoreConfig] = {}
|
|
67
|
+
_KVSTORE_INSTANCES: dict[tuple[str, str], KVStore] = {}
|
|
68
|
+
_KVSTORE_LOCKS: defaultdict[tuple[str, str], asyncio.Lock] = defaultdict(asyncio.Lock)
|
|
56
69
|
|
|
57
70
|
|
|
58
71
|
def register_kvstore_backends(backends: dict[str, StorageBackendConfig]) -> None:
|
|
59
72
|
"""Register the set of available KV store backends for reference resolution."""
|
|
60
73
|
global _KVSTORE_BACKENDS
|
|
74
|
+
global _KVSTORE_INSTANCES
|
|
75
|
+
global _KVSTORE_LOCKS
|
|
61
76
|
|
|
62
77
|
_KVSTORE_BACKENDS.clear()
|
|
78
|
+
_KVSTORE_INSTANCES.clear()
|
|
79
|
+
_KVSTORE_LOCKS.clear()
|
|
63
80
|
for name, cfg in backends.items():
|
|
64
|
-
|
|
81
|
+
typed_cfg = cast(KVStoreConfig, cfg)
|
|
82
|
+
_KVSTORE_BACKENDS[name] = typed_cfg
|
|
65
83
|
|
|
66
84
|
|
|
67
85
|
async def kvstore_impl(reference: KVStoreReference) -> KVStore:
|
|
68
86
|
backend_name = reference.backend
|
|
87
|
+
cache_key = (backend_name, reference.namespace)
|
|
88
|
+
|
|
89
|
+
existing = _KVSTORE_INSTANCES.get(cache_key)
|
|
90
|
+
if existing:
|
|
91
|
+
return existing
|
|
69
92
|
|
|
70
93
|
backend_config = _KVSTORE_BACKENDS.get(backend_name)
|
|
71
94
|
if backend_config is None:
|
|
72
95
|
raise ValueError(f"Unknown KVStore backend '{backend_name}'. Registered backends: {sorted(_KVSTORE_BACKENDS)}")
|
|
73
96
|
|
|
74
|
-
|
|
75
|
-
|
|
97
|
+
lock = _KVSTORE_LOCKS[cache_key]
|
|
98
|
+
async with lock:
|
|
99
|
+
existing = _KVSTORE_INSTANCES.get(cache_key)
|
|
100
|
+
if existing:
|
|
101
|
+
return existing
|
|
102
|
+
|
|
103
|
+
config = backend_config.model_copy()
|
|
104
|
+
config.namespace = reference.namespace
|
|
76
105
|
|
|
77
|
-
|
|
78
|
-
|
|
106
|
+
impl: KVStore
|
|
107
|
+
if isinstance(config, RedisKVStoreConfig):
|
|
108
|
+
from .redis import RedisKVStoreImpl
|
|
79
109
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
110
|
+
impl = RedisKVStoreImpl(config)
|
|
111
|
+
elif isinstance(config, SqliteKVStoreConfig):
|
|
112
|
+
from .sqlite import SqliteKVStoreImpl
|
|
83
113
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
114
|
+
impl = SqliteKVStoreImpl(config)
|
|
115
|
+
elif isinstance(config, PostgresKVStoreConfig):
|
|
116
|
+
from .postgres import PostgresKVStoreImpl
|
|
87
117
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
118
|
+
impl = PostgresKVStoreImpl(config)
|
|
119
|
+
elif isinstance(config, MongoDBKVStoreConfig):
|
|
120
|
+
from .mongodb import MongoDBKVStoreImpl
|
|
91
121
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
122
|
+
impl = MongoDBKVStoreImpl(config)
|
|
123
|
+
else:
|
|
124
|
+
raise ValueError(f"Unknown kvstore type {config.type}")
|
|
95
125
|
|
|
96
|
-
|
|
97
|
-
|
|
126
|
+
await impl.initialize()
|
|
127
|
+
_KVSTORE_INSTANCES[cache_key] = impl
|
|
128
|
+
return impl
|