llama-stack 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +201 -58
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
- llama_stack/providers/registry/agents.py +7 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +7 -8
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -54
- llama_stack-0.4.0.dist-info/RECORD +588 -0
- llama_stack-0.4.0.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.5.dist-info/RECORD +0 -625
- llama_stack-0.3.5.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
llama_stack/core/stack.py
CHANGED
|
@@ -6,36 +6,18 @@
|
|
|
6
6
|
|
|
7
7
|
import asyncio
|
|
8
8
|
import importlib.resources
|
|
9
|
+
import inspect
|
|
9
10
|
import os
|
|
10
11
|
import re
|
|
11
12
|
import tempfile
|
|
12
|
-
from typing import Any
|
|
13
|
+
from typing import Any, get_type_hints
|
|
13
14
|
|
|
14
15
|
import yaml
|
|
16
|
+
from pydantic import BaseModel
|
|
15
17
|
|
|
16
|
-
from llama_stack.
|
|
17
|
-
from llama_stack.apis.benchmarks import Benchmarks
|
|
18
|
-
from llama_stack.apis.conversations import Conversations
|
|
19
|
-
from llama_stack.apis.datasetio import DatasetIO
|
|
20
|
-
from llama_stack.apis.datasets import Datasets
|
|
21
|
-
from llama_stack.apis.eval import Eval
|
|
22
|
-
from llama_stack.apis.files import Files
|
|
23
|
-
from llama_stack.apis.inference import Inference
|
|
24
|
-
from llama_stack.apis.inspect import Inspect
|
|
25
|
-
from llama_stack.apis.models import Models
|
|
26
|
-
from llama_stack.apis.post_training import PostTraining
|
|
27
|
-
from llama_stack.apis.prompts import Prompts
|
|
28
|
-
from llama_stack.apis.providers import Providers
|
|
29
|
-
from llama_stack.apis.safety import Safety
|
|
30
|
-
from llama_stack.apis.scoring import Scoring
|
|
31
|
-
from llama_stack.apis.scoring_functions import ScoringFunctions
|
|
32
|
-
from llama_stack.apis.shields import Shields
|
|
33
|
-
from llama_stack.apis.synthetic_data_generation import SyntheticDataGeneration
|
|
34
|
-
from llama_stack.apis.telemetry import Telemetry
|
|
35
|
-
from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime
|
|
36
|
-
from llama_stack.apis.vector_io import VectorIO
|
|
18
|
+
from llama_stack.core.admin import AdminImpl, AdminImplConfig
|
|
37
19
|
from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
|
|
38
|
-
from llama_stack.core.datatypes import Provider,
|
|
20
|
+
from llama_stack.core.datatypes import Provider, QualifiedModel, SafetyConfig, StackConfig, VectorStoresConfig
|
|
39
21
|
from llama_stack.core.distribution import get_provider_registry
|
|
40
22
|
from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl
|
|
41
23
|
from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceImpl
|
|
@@ -55,7 +37,30 @@ from llama_stack.core.storage.datatypes import (
|
|
|
55
37
|
from llama_stack.core.store.registry import create_dist_registry
|
|
56
38
|
from llama_stack.core.utils.dynamic import instantiate_class_type
|
|
57
39
|
from llama_stack.log import get_logger
|
|
58
|
-
from
|
|
40
|
+
from llama_stack_api import (
|
|
41
|
+
Agents,
|
|
42
|
+
Api,
|
|
43
|
+
Batches,
|
|
44
|
+
Benchmarks,
|
|
45
|
+
Conversations,
|
|
46
|
+
DatasetIO,
|
|
47
|
+
Datasets,
|
|
48
|
+
Eval,
|
|
49
|
+
Files,
|
|
50
|
+
Inference,
|
|
51
|
+
Inspect,
|
|
52
|
+
Models,
|
|
53
|
+
PostTraining,
|
|
54
|
+
Prompts,
|
|
55
|
+
Providers,
|
|
56
|
+
Safety,
|
|
57
|
+
Scoring,
|
|
58
|
+
ScoringFunctions,
|
|
59
|
+
Shields,
|
|
60
|
+
ToolGroups,
|
|
61
|
+
ToolRuntime,
|
|
62
|
+
VectorIO,
|
|
63
|
+
)
|
|
59
64
|
|
|
60
65
|
logger = get_logger(name=__name__, category="core")
|
|
61
66
|
|
|
@@ -64,10 +69,9 @@ class LlamaStack(
|
|
|
64
69
|
Providers,
|
|
65
70
|
Inference,
|
|
66
71
|
Agents,
|
|
72
|
+
Batches,
|
|
67
73
|
Safety,
|
|
68
|
-
SyntheticDataGeneration,
|
|
69
74
|
Datasets,
|
|
70
|
-
Telemetry,
|
|
71
75
|
PostTraining,
|
|
72
76
|
VectorIO,
|
|
73
77
|
Eval,
|
|
@@ -80,7 +84,6 @@ class LlamaStack(
|
|
|
80
84
|
Inspect,
|
|
81
85
|
ToolGroups,
|
|
82
86
|
ToolRuntime,
|
|
83
|
-
RAGToolRuntime,
|
|
84
87
|
Files,
|
|
85
88
|
Prompts,
|
|
86
89
|
Conversations,
|
|
@@ -108,7 +111,82 @@ REGISTRY_REFRESH_TASK = None
|
|
|
108
111
|
TEST_RECORDING_CONTEXT = None
|
|
109
112
|
|
|
110
113
|
|
|
111
|
-
|
|
114
|
+
def is_request_model(t: Any) -> bool:
|
|
115
|
+
"""Check if a type is a request model (Pydantic BaseModel).
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
t: The type to check
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
True if the type is a Pydantic BaseModel subclass, False otherwise
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
return inspect.isclass(t) and issubclass(t, BaseModel)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
async def invoke_with_optional_request(method: Any) -> Any:
|
|
128
|
+
"""Invoke a method, automatically creating a request instance if needed.
|
|
129
|
+
|
|
130
|
+
For APIs that use request models, this will create an empty request object.
|
|
131
|
+
For backward compatibility, falls back to calling without arguments.
|
|
132
|
+
|
|
133
|
+
Uses get_type_hints() to resolve forward references (e.g., "ListBenchmarksRequest" -> actual class).
|
|
134
|
+
|
|
135
|
+
Handles methods with:
|
|
136
|
+
- No parameters: calls without arguments
|
|
137
|
+
- One or more request model parameters: creates empty instances for each
|
|
138
|
+
- Mixed parameters: creates request models, uses defaults for others
|
|
139
|
+
- Required non-request-model parameters without defaults: falls back to calling without arguments
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
method: The method to invoke
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
The result of calling the method
|
|
146
|
+
"""
|
|
147
|
+
try:
|
|
148
|
+
hints = get_type_hints(method)
|
|
149
|
+
except Exception:
|
|
150
|
+
# Forward references can't be resolved, fall back to calling without request
|
|
151
|
+
return await method()
|
|
152
|
+
|
|
153
|
+
params = list(inspect.signature(method).parameters.values())
|
|
154
|
+
params = [p for p in params if p.name != "self"]
|
|
155
|
+
|
|
156
|
+
if not params:
|
|
157
|
+
return await method()
|
|
158
|
+
|
|
159
|
+
# Build arguments for the method call
|
|
160
|
+
args: dict[str, Any] = {}
|
|
161
|
+
can_call = True
|
|
162
|
+
|
|
163
|
+
for param in params:
|
|
164
|
+
param_type = hints.get(param.name)
|
|
165
|
+
|
|
166
|
+
# If it's a request model, try to create an empty instance
|
|
167
|
+
if param_type and is_request_model(param_type):
|
|
168
|
+
try:
|
|
169
|
+
args[param.name] = param_type()
|
|
170
|
+
except Exception:
|
|
171
|
+
# Request model requires arguments, can't create empty instance
|
|
172
|
+
can_call = False
|
|
173
|
+
break
|
|
174
|
+
# If it has a default value, we can skip it (will use default)
|
|
175
|
+
elif param.default != inspect.Parameter.empty:
|
|
176
|
+
continue
|
|
177
|
+
# Required parameter that's not a request model - can't provide it
|
|
178
|
+
else:
|
|
179
|
+
can_call = False
|
|
180
|
+
break
|
|
181
|
+
|
|
182
|
+
if can_call and args:
|
|
183
|
+
return await method(**args)
|
|
184
|
+
|
|
185
|
+
# Fall back to calling without arguments for backward compatibility
|
|
186
|
+
return await method()
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
async def register_resources(run_config: StackConfig, impls: dict[Api, Any]):
|
|
112
190
|
for rsrc, api, register_method, list_method in RESOURCES:
|
|
113
191
|
objects = getattr(run_config.registered_resources, rsrc)
|
|
114
192
|
if api not in impls:
|
|
@@ -129,7 +207,7 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
|
|
|
129
207
|
await method(**{k: getattr(obj, k) for k in obj.model_dump().keys()})
|
|
130
208
|
|
|
131
209
|
method = getattr(impls[api], list_method)
|
|
132
|
-
response = await method
|
|
210
|
+
response = await invoke_with_optional_request(method)
|
|
133
211
|
|
|
134
212
|
objects_to_process = response.data if hasattr(response, "data") else response
|
|
135
213
|
|
|
@@ -144,35 +222,93 @@ async def validate_vector_stores_config(vector_stores_config: VectorStoresConfig
|
|
|
144
222
|
if vector_stores_config is None:
|
|
145
223
|
return
|
|
146
224
|
|
|
147
|
-
|
|
148
|
-
if default_embedding_model is None:
|
|
149
|
-
|
|
225
|
+
# Validate default embedding model
|
|
226
|
+
if vector_stores_config.default_embedding_model is not None:
|
|
227
|
+
await _validate_embedding_model(vector_stores_config.default_embedding_model, impls)
|
|
228
|
+
|
|
229
|
+
# Validate rewrite query params
|
|
230
|
+
if vector_stores_config.rewrite_query_params:
|
|
231
|
+
if vector_stores_config.rewrite_query_params.model:
|
|
232
|
+
await _validate_rewrite_query_model(vector_stores_config.rewrite_query_params.model, impls)
|
|
233
|
+
|
|
150
234
|
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
235
|
+
async def _validate_embedding_model(embedding_model: QualifiedModel, impls: dict[Api, Any]) -> None:
|
|
236
|
+
"""Validate that an embedding model exists and has required metadata."""
|
|
237
|
+
provider_id = embedding_model.provider_id
|
|
238
|
+
model_id = embedding_model.model_id
|
|
239
|
+
model_identifier = f"{provider_id}/{model_id}"
|
|
154
240
|
|
|
155
241
|
if Api.models not in impls:
|
|
156
|
-
raise ValueError(f"Models API is not available but vector_stores config requires model '{
|
|
242
|
+
raise ValueError(f"Models API is not available but vector_stores config requires model '{model_identifier}'")
|
|
157
243
|
|
|
158
244
|
models_impl = impls[Api.models]
|
|
159
245
|
response = await models_impl.list_models()
|
|
160
246
|
models_list = {m.identifier: m for m in response.data if m.model_type == "embedding"}
|
|
161
247
|
|
|
162
|
-
|
|
163
|
-
if
|
|
164
|
-
raise ValueError(
|
|
248
|
+
model = models_list.get(model_identifier)
|
|
249
|
+
if model is None:
|
|
250
|
+
raise ValueError(
|
|
251
|
+
f"Embedding model '{model_identifier}' not found. Available embedding models: {list(models_list.keys())}"
|
|
252
|
+
)
|
|
165
253
|
|
|
166
|
-
embedding_dimension =
|
|
254
|
+
embedding_dimension = model.metadata.get("embedding_dimension")
|
|
167
255
|
if embedding_dimension is None:
|
|
168
|
-
raise ValueError(f"Embedding model '{
|
|
256
|
+
raise ValueError(f"Embedding model '{model_identifier}' is missing 'embedding_dimension' in metadata")
|
|
169
257
|
|
|
170
258
|
try:
|
|
171
259
|
int(embedding_dimension)
|
|
172
260
|
except ValueError as err:
|
|
173
261
|
raise ValueError(f"Embedding dimension '{embedding_dimension}' cannot be converted to an integer") from err
|
|
174
262
|
|
|
175
|
-
logger.debug(f"Validated
|
|
263
|
+
logger.debug(f"Validated embedding model: {model_identifier} (dimension: {embedding_dimension})")
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
async def _validate_rewrite_query_model(rewrite_query_model: QualifiedModel, impls: dict[Api, Any]) -> None:
|
|
267
|
+
"""Validate that a rewrite query model exists and is accessible."""
|
|
268
|
+
provider_id = rewrite_query_model.provider_id
|
|
269
|
+
model_id = rewrite_query_model.model_id
|
|
270
|
+
model_identifier = f"{provider_id}/{model_id}"
|
|
271
|
+
|
|
272
|
+
if Api.models not in impls:
|
|
273
|
+
raise ValueError(
|
|
274
|
+
f"Models API is not available but vector_stores config requires rewrite query model '{model_identifier}'"
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
models_impl = impls[Api.models]
|
|
278
|
+
response = await models_impl.list_models()
|
|
279
|
+
llm_models_list = {m.identifier: m for m in response.data if m.model_type == "llm"}
|
|
280
|
+
|
|
281
|
+
model = llm_models_list.get(model_identifier)
|
|
282
|
+
if model is None:
|
|
283
|
+
raise ValueError(
|
|
284
|
+
f"Rewrite query model '{model_identifier}' not found. Available LLM models: {list(llm_models_list.keys())}"
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
logger.debug(f"Validated rewrite query model: {model_identifier}")
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
async def validate_safety_config(safety_config: SafetyConfig | None, impls: dict[Api, Any]):
|
|
291
|
+
if safety_config is None or safety_config.default_shield_id is None:
|
|
292
|
+
return
|
|
293
|
+
|
|
294
|
+
if Api.shields not in impls:
|
|
295
|
+
raise ValueError("Safety configuration requires the shields API to be enabled")
|
|
296
|
+
|
|
297
|
+
if Api.safety not in impls:
|
|
298
|
+
raise ValueError("Safety configuration requires the safety API to be enabled")
|
|
299
|
+
|
|
300
|
+
shields_impl = impls[Api.shields]
|
|
301
|
+
response = await shields_impl.list_shields()
|
|
302
|
+
shields_by_id = {shield.identifier: shield for shield in response.data}
|
|
303
|
+
|
|
304
|
+
default_shield_id = safety_config.default_shield_id
|
|
305
|
+
# don't validate if there are no shields registered
|
|
306
|
+
if shields_by_id and default_shield_id not in shields_by_id:
|
|
307
|
+
available = sorted(shields_by_id)
|
|
308
|
+
raise ValueError(
|
|
309
|
+
f"Configured default_shield_id '{default_shield_id}' not found among registered shields."
|
|
310
|
+
f" Available shields: {available}"
|
|
311
|
+
)
|
|
176
312
|
|
|
177
313
|
|
|
178
314
|
class EnvVarError(Exception):
|
|
@@ -317,39 +453,44 @@ def cast_image_name_to_string(config_dict: dict[str, Any]) -> dict[str, Any]:
|
|
|
317
453
|
return config_dict
|
|
318
454
|
|
|
319
455
|
|
|
320
|
-
def add_internal_implementations(impls: dict[Api, Any],
|
|
321
|
-
"""Add internal implementations (inspect and
|
|
322
|
-
|
|
456
|
+
def add_internal_implementations(impls: dict[Api, Any], config: StackConfig) -> None:
|
|
457
|
+
"""Add internal implementations (inspect, providers, and admin) to the implementations dictionary.
|
|
323
458
|
Args:
|
|
324
459
|
impls: Dictionary of API implementations
|
|
325
460
|
run_config: Stack run configuration
|
|
326
461
|
"""
|
|
327
462
|
inspect_impl = DistributionInspectImpl(
|
|
328
|
-
DistributionInspectConfig(
|
|
463
|
+
DistributionInspectConfig(config=config),
|
|
329
464
|
deps=impls,
|
|
330
465
|
)
|
|
331
466
|
impls[Api.inspect] = inspect_impl
|
|
332
467
|
|
|
333
468
|
providers_impl = ProviderImpl(
|
|
334
|
-
ProviderImplConfig(
|
|
469
|
+
ProviderImplConfig(config=config),
|
|
335
470
|
deps=impls,
|
|
336
471
|
)
|
|
337
472
|
impls[Api.providers] = providers_impl
|
|
338
473
|
|
|
474
|
+
admin_impl = AdminImpl(
|
|
475
|
+
AdminImplConfig(config=config),
|
|
476
|
+
deps=impls,
|
|
477
|
+
)
|
|
478
|
+
impls[Api.admin] = admin_impl
|
|
479
|
+
|
|
339
480
|
prompts_impl = PromptServiceImpl(
|
|
340
|
-
PromptServiceConfig(
|
|
481
|
+
PromptServiceConfig(config=config),
|
|
341
482
|
deps=impls,
|
|
342
483
|
)
|
|
343
484
|
impls[Api.prompts] = prompts_impl
|
|
344
485
|
|
|
345
486
|
conversations_impl = ConversationServiceImpl(
|
|
346
|
-
ConversationServiceConfig(
|
|
487
|
+
ConversationServiceConfig(config=config),
|
|
347
488
|
deps=impls,
|
|
348
489
|
)
|
|
349
490
|
impls[Api.conversations] = conversations_impl
|
|
350
491
|
|
|
351
492
|
|
|
352
|
-
def _initialize_storage(run_config:
|
|
493
|
+
def _initialize_storage(run_config: StackConfig):
|
|
353
494
|
kv_backends: dict[str, StorageBackendConfig] = {}
|
|
354
495
|
sql_backends: dict[str, StorageBackendConfig] = {}
|
|
355
496
|
for backend_name, backend_config in run_config.storage.backends.items():
|
|
@@ -361,15 +502,15 @@ def _initialize_storage(run_config: StackRunConfig):
|
|
|
361
502
|
else:
|
|
362
503
|
raise ValueError(f"Unknown storage backend type: {type}")
|
|
363
504
|
|
|
364
|
-
from llama_stack.
|
|
365
|
-
from llama_stack.
|
|
505
|
+
from llama_stack.core.storage.kvstore.kvstore import register_kvstore_backends
|
|
506
|
+
from llama_stack.core.storage.sqlstore.sqlstore import register_sqlstore_backends
|
|
366
507
|
|
|
367
508
|
register_kvstore_backends(kv_backends)
|
|
368
509
|
register_sqlstore_backends(sql_backends)
|
|
369
510
|
|
|
370
511
|
|
|
371
512
|
class Stack:
|
|
372
|
-
def __init__(self, run_config:
|
|
513
|
+
def __init__(self, run_config: StackConfig, provider_registry: ProviderRegistry | None = None):
|
|
373
514
|
self.run_config = run_config
|
|
374
515
|
self.provider_registry = provider_registry
|
|
375
516
|
self.impls = None
|
|
@@ -412,6 +553,7 @@ class Stack:
|
|
|
412
553
|
await register_resources(self.run_config, impls)
|
|
413
554
|
await refresh_registry_once(impls)
|
|
414
555
|
await validate_vector_stores_config(self.run_config.vector_stores, impls)
|
|
556
|
+
await validate_safety_config(self.run_config.safety, impls)
|
|
415
557
|
self.impls = impls
|
|
416
558
|
|
|
417
559
|
def create_registry_refresh_task(self):
|
|
@@ -474,20 +616,20 @@ async def refresh_registry_task(impls: dict[Api, Any]):
|
|
|
474
616
|
await asyncio.sleep(REGISTRY_REFRESH_INTERVAL_SECONDS)
|
|
475
617
|
|
|
476
618
|
|
|
477
|
-
def get_stack_run_config_from_distro(distro: str) ->
|
|
478
|
-
distro_path = importlib.resources.files("llama_stack") / f"distributions/{distro}/
|
|
619
|
+
def get_stack_run_config_from_distro(distro: str) -> StackConfig:
|
|
620
|
+
distro_path = importlib.resources.files("llama_stack") / f"distributions/{distro}/config.yaml"
|
|
479
621
|
|
|
480
622
|
with importlib.resources.as_file(distro_path) as path:
|
|
481
623
|
if not path.exists():
|
|
482
624
|
raise ValueError(f"Distribution '{distro}' not found at {distro_path}")
|
|
483
625
|
run_config = yaml.safe_load(path.open())
|
|
484
626
|
|
|
485
|
-
return
|
|
627
|
+
return StackConfig(**replace_env_vars(run_config))
|
|
486
628
|
|
|
487
629
|
|
|
488
630
|
def run_config_from_adhoc_config_spec(
|
|
489
631
|
adhoc_config_spec: str, provider_registry: ProviderRegistry | None = None
|
|
490
|
-
) ->
|
|
632
|
+
) -> StackConfig:
|
|
491
633
|
"""
|
|
492
634
|
Create an adhoc distribution from a list of API providers.
|
|
493
635
|
|
|
@@ -527,7 +669,7 @@ def run_config_from_adhoc_config_spec(
|
|
|
527
669
|
config=provider_config,
|
|
528
670
|
)
|
|
529
671
|
]
|
|
530
|
-
config =
|
|
672
|
+
config = StackConfig(
|
|
531
673
|
image_name="distro-test",
|
|
532
674
|
apis=list(provider_configs_by_api.keys()),
|
|
533
675
|
providers=provider_configs_by_api,
|
|
@@ -540,6 +682,7 @@ def run_config_from_adhoc_config_spec(
|
|
|
540
682
|
metadata=KVStoreReference(backend="kv_default", namespace="registry"),
|
|
541
683
|
inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
|
|
542
684
|
conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"),
|
|
685
|
+
prompts=KVStoreReference(backend="kv_default", namespace="prompts"),
|
|
543
686
|
),
|
|
544
687
|
),
|
|
545
688
|
)
|
|
@@ -12,6 +12,8 @@ from typing import Annotated, Literal
|
|
|
12
12
|
|
|
13
13
|
from pydantic import BaseModel, Field, field_validator
|
|
14
14
|
|
|
15
|
+
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
|
|
16
|
+
|
|
15
17
|
|
|
16
18
|
class StorageBackendType(StrEnum):
|
|
17
19
|
KV_REDIS = "kv_redis"
|
|
@@ -256,25 +258,46 @@ class ResponsesStoreReference(InferenceStoreReference):
|
|
|
256
258
|
|
|
257
259
|
class ServerStoresConfig(BaseModel):
|
|
258
260
|
metadata: KVStoreReference | None = Field(
|
|
259
|
-
default=
|
|
261
|
+
default=KVStoreReference(
|
|
262
|
+
backend="kv_default",
|
|
263
|
+
namespace="registry",
|
|
264
|
+
),
|
|
260
265
|
description="Metadata store configuration (uses KV backend)",
|
|
261
266
|
)
|
|
262
267
|
inference: InferenceStoreReference | None = Field(
|
|
263
|
-
default=
|
|
268
|
+
default=InferenceStoreReference(
|
|
269
|
+
backend="sql_default",
|
|
270
|
+
table_name="inference_store",
|
|
271
|
+
),
|
|
264
272
|
description="Inference store configuration (uses SQL backend)",
|
|
265
273
|
)
|
|
266
274
|
conversations: SqlStoreReference | None = Field(
|
|
267
|
-
default=
|
|
275
|
+
default=SqlStoreReference(
|
|
276
|
+
backend="sql_default",
|
|
277
|
+
table_name="openai_conversations",
|
|
278
|
+
),
|
|
268
279
|
description="Conversations store configuration (uses SQL backend)",
|
|
269
280
|
)
|
|
270
281
|
responses: ResponsesStoreReference | None = Field(
|
|
271
282
|
default=None,
|
|
272
283
|
description="Responses store configuration (uses SQL backend)",
|
|
273
284
|
)
|
|
285
|
+
prompts: KVStoreReference | None = Field(
|
|
286
|
+
default=KVStoreReference(backend="kv_default", namespace="prompts"),
|
|
287
|
+
description="Prompts store configuration (uses KV backend)",
|
|
288
|
+
)
|
|
274
289
|
|
|
275
290
|
|
|
276
291
|
class StorageConfig(BaseModel):
|
|
277
292
|
backends: dict[str, StorageBackendConfig] = Field(
|
|
293
|
+
default={
|
|
294
|
+
"kv_default": SqliteKVStoreConfig(
|
|
295
|
+
db_path=f"${{env.SQLITE_STORE_DIR:={DISTRIBS_BASE_DIR}}}/kvstore.db",
|
|
296
|
+
),
|
|
297
|
+
"sql_default": SqliteSqlStoreConfig(
|
|
298
|
+
db_path=f"${{env.SQLITE_STORE_DIR:={DISTRIBS_BASE_DIR}}}/sql_store.db",
|
|
299
|
+
),
|
|
300
|
+
},
|
|
278
301
|
description="Named backend configurations (e.g., 'default', 'cache')",
|
|
279
302
|
)
|
|
280
303
|
stores: ServerStoresConfig = Field(
|
|
@@ -11,10 +11,21 @@
|
|
|
11
11
|
|
|
12
12
|
from __future__ import annotations
|
|
13
13
|
|
|
14
|
-
|
|
14
|
+
import asyncio
|
|
15
|
+
from collections import defaultdict
|
|
16
|
+
from datetime import datetime
|
|
17
|
+
from typing import cast
|
|
15
18
|
|
|
16
|
-
from .
|
|
17
|
-
from .
|
|
19
|
+
from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendConfig
|
|
20
|
+
from llama_stack_api.internal.kvstore import KVStore
|
|
21
|
+
|
|
22
|
+
from .config import (
|
|
23
|
+
KVStoreConfig,
|
|
24
|
+
MongoDBKVStoreConfig,
|
|
25
|
+
PostgresKVStoreConfig,
|
|
26
|
+
RedisKVStoreConfig,
|
|
27
|
+
SqliteKVStoreConfig,
|
|
28
|
+
)
|
|
18
29
|
|
|
19
30
|
|
|
20
31
|
def kvstore_dependencies():
|
|
@@ -30,7 +41,7 @@ def kvstore_dependencies():
|
|
|
30
41
|
|
|
31
42
|
class InmemoryKVStoreImpl(KVStore):
|
|
32
43
|
def __init__(self):
|
|
33
|
-
self._store = {}
|
|
44
|
+
self._store: dict[str, str] = {}
|
|
34
45
|
|
|
35
46
|
async def initialize(self) -> None:
|
|
36
47
|
pass
|
|
@@ -38,7 +49,7 @@ class InmemoryKVStoreImpl(KVStore):
|
|
|
38
49
|
async def get(self, key: str) -> str | None:
|
|
39
50
|
return self._store.get(key)
|
|
40
51
|
|
|
41
|
-
async def set(self, key: str, value: str) -> None:
|
|
52
|
+
async def set(self, key: str, value: str, expiration: datetime | None = None) -> None:
|
|
42
53
|
self._store[key] = value
|
|
43
54
|
|
|
44
55
|
async def values_in_range(self, start_key: str, end_key: str) -> list[str]:
|
|
@@ -53,45 +64,65 @@ class InmemoryKVStoreImpl(KVStore):
|
|
|
53
64
|
|
|
54
65
|
|
|
55
66
|
_KVSTORE_BACKENDS: dict[str, KVStoreConfig] = {}
|
|
67
|
+
_KVSTORE_INSTANCES: dict[tuple[str, str], KVStore] = {}
|
|
68
|
+
_KVSTORE_LOCKS: defaultdict[tuple[str, str], asyncio.Lock] = defaultdict(asyncio.Lock)
|
|
56
69
|
|
|
57
70
|
|
|
58
71
|
def register_kvstore_backends(backends: dict[str, StorageBackendConfig]) -> None:
|
|
59
72
|
"""Register the set of available KV store backends for reference resolution."""
|
|
60
73
|
global _KVSTORE_BACKENDS
|
|
74
|
+
global _KVSTORE_INSTANCES
|
|
75
|
+
global _KVSTORE_LOCKS
|
|
61
76
|
|
|
62
77
|
_KVSTORE_BACKENDS.clear()
|
|
78
|
+
_KVSTORE_INSTANCES.clear()
|
|
79
|
+
_KVSTORE_LOCKS.clear()
|
|
63
80
|
for name, cfg in backends.items():
|
|
64
|
-
|
|
81
|
+
typed_cfg = cast(KVStoreConfig, cfg)
|
|
82
|
+
_KVSTORE_BACKENDS[name] = typed_cfg
|
|
65
83
|
|
|
66
84
|
|
|
67
85
|
async def kvstore_impl(reference: KVStoreReference) -> KVStore:
|
|
68
86
|
backend_name = reference.backend
|
|
87
|
+
cache_key = (backend_name, reference.namespace)
|
|
88
|
+
|
|
89
|
+
existing = _KVSTORE_INSTANCES.get(cache_key)
|
|
90
|
+
if existing:
|
|
91
|
+
return existing
|
|
69
92
|
|
|
70
93
|
backend_config = _KVSTORE_BACKENDS.get(backend_name)
|
|
71
94
|
if backend_config is None:
|
|
72
95
|
raise ValueError(f"Unknown KVStore backend '{backend_name}'. Registered backends: {sorted(_KVSTORE_BACKENDS)}")
|
|
73
96
|
|
|
74
|
-
|
|
75
|
-
|
|
97
|
+
lock = _KVSTORE_LOCKS[cache_key]
|
|
98
|
+
async with lock:
|
|
99
|
+
existing = _KVSTORE_INSTANCES.get(cache_key)
|
|
100
|
+
if existing:
|
|
101
|
+
return existing
|
|
102
|
+
|
|
103
|
+
config = backend_config.model_copy()
|
|
104
|
+
config.namespace = reference.namespace
|
|
76
105
|
|
|
77
|
-
|
|
78
|
-
|
|
106
|
+
impl: KVStore
|
|
107
|
+
if isinstance(config, RedisKVStoreConfig):
|
|
108
|
+
from .redis import RedisKVStoreImpl
|
|
79
109
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
110
|
+
impl = RedisKVStoreImpl(config)
|
|
111
|
+
elif isinstance(config, SqliteKVStoreConfig):
|
|
112
|
+
from .sqlite import SqliteKVStoreImpl
|
|
83
113
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
114
|
+
impl = SqliteKVStoreImpl(config)
|
|
115
|
+
elif isinstance(config, PostgresKVStoreConfig):
|
|
116
|
+
from .postgres import PostgresKVStoreImpl
|
|
87
117
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
118
|
+
impl = PostgresKVStoreImpl(config)
|
|
119
|
+
elif isinstance(config, MongoDBKVStoreConfig):
|
|
120
|
+
from .mongodb import MongoDBKVStoreImpl
|
|
91
121
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
122
|
+
impl = MongoDBKVStoreImpl(config)
|
|
123
|
+
else:
|
|
124
|
+
raise ValueError(f"Unknown kvstore type {config.type}")
|
|
95
125
|
|
|
96
|
-
|
|
97
|
-
|
|
126
|
+
await impl.initialize()
|
|
127
|
+
_KVSTORE_INSTANCES[cache_key] = impl
|
|
128
|
+
return impl
|
|
@@ -9,8 +9,8 @@ from datetime import datetime
|
|
|
9
9
|
from pymongo import AsyncMongoClient
|
|
10
10
|
from pymongo.asynchronous.collection import AsyncCollection
|
|
11
11
|
|
|
12
|
+
from llama_stack.core.storage.kvstore import KVStore
|
|
12
13
|
from llama_stack.log import get_logger
|
|
13
|
-
from llama_stack.providers.utils.kvstore import KVStore
|
|
14
14
|
|
|
15
15
|
from ..config import MongoDBKVStoreConfig
|
|
16
16
|
|
|
@@ -30,14 +30,13 @@ class MongoDBKVStoreImpl(KVStore):
|
|
|
30
30
|
|
|
31
31
|
async def initialize(self) -> None:
|
|
32
32
|
try:
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
self.conn = AsyncMongoClient(**conn_creds)
|
|
33
|
+
# Pass parameters explicitly to satisfy mypy - AsyncMongoClient doesn't accept **dict
|
|
34
|
+
self.conn = AsyncMongoClient(
|
|
35
|
+
host=self.config.host if self.config.host is not None else None,
|
|
36
|
+
port=self.config.port if self.config.port is not None else None,
|
|
37
|
+
username=self.config.user if self.config.user is not None else None,
|
|
38
|
+
password=self.config.password if self.config.password is not None else None,
|
|
39
|
+
)
|
|
41
40
|
except Exception as e:
|
|
42
41
|
log.exception("Could not connect to MongoDB database server")
|
|
43
42
|
raise RuntimeError("Could not connect to MongoDB database server") from e
|
|
@@ -79,4 +78,8 @@ class MongoDBKVStoreImpl(KVStore):
|
|
|
79
78
|
end_key = self._namespaced_key(end_key)
|
|
80
79
|
query = {"key": {"$gte": start_key, "$lt": end_key}}
|
|
81
80
|
cursor = self.collection.find(query, {"key": 1, "_id": 0}).sort("key", 1)
|
|
82
|
-
|
|
81
|
+
# AsyncCursor requires async iteration
|
|
82
|
+
result = []
|
|
83
|
+
async for doc in cursor:
|
|
84
|
+
result.append(doc["key"])
|
|
85
|
+
return result
|