llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +235 -62
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +46 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +44 -33
- llama_stack/providers/registry/agents.py +8 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +131 -23
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +37 -28
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +37 -25
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +147 -30
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +31 -26
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +7 -8
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/providers/utils/vector_io/__init__.py +16 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +36 -0
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/METADATA +57 -55
- llama_stack-0.4.1.dist-info/RECORD +588 -0
- llama_stack-0.4.1.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.5.dist-info/RECORD +0 -625
- llama_stack-0.3.5.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
llama_stack/__init__.py
CHANGED
llama_stack/cli/llama.py
CHANGED
|
@@ -8,6 +8,9 @@ import argparse
|
|
|
8
8
|
|
|
9
9
|
from llama_stack.log import setup_logging
|
|
10
10
|
|
|
11
|
+
# Initialize logging early before any loggers get created
|
|
12
|
+
setup_logging()
|
|
13
|
+
|
|
11
14
|
from .stack import StackParser
|
|
12
15
|
from .stack.utils import print_subcommand_description
|
|
13
16
|
|
|
@@ -44,9 +47,6 @@ class LlamaCLIParser:
|
|
|
44
47
|
|
|
45
48
|
|
|
46
49
|
def main():
|
|
47
|
-
# Initialize logging from environment variables before any other operations
|
|
48
|
-
setup_logging()
|
|
49
|
-
|
|
50
50
|
parser = LlamaCLIParser()
|
|
51
51
|
args = parser.parse_args()
|
|
52
52
|
parser.run(args)
|
|
@@ -11,17 +11,11 @@ from pathlib import Path
|
|
|
11
11
|
import yaml
|
|
12
12
|
from termcolor import cprint
|
|
13
13
|
|
|
14
|
-
from llama_stack.cli.stack.utils import ImageType
|
|
15
14
|
from llama_stack.core.build import get_provider_dependencies
|
|
16
|
-
from llama_stack.core.datatypes import
|
|
17
|
-
BuildConfig,
|
|
18
|
-
BuildProvider,
|
|
19
|
-
DistributionSpec,
|
|
20
|
-
)
|
|
15
|
+
from llama_stack.core.datatypes import Provider, StackConfig
|
|
21
16
|
from llama_stack.core.distribution import get_provider_registry
|
|
22
|
-
from llama_stack.core.stack import replace_env_vars
|
|
23
17
|
from llama_stack.log import get_logger
|
|
24
|
-
from
|
|
18
|
+
from llama_stack_api import Api
|
|
25
19
|
|
|
26
20
|
TEMPLATES_PATH = Path(__file__).parent.parent.parent / "templates"
|
|
27
21
|
|
|
@@ -70,9 +64,9 @@ def format_output_deps_only(
|
|
|
70
64
|
def run_stack_list_deps_command(args: argparse.Namespace) -> None:
|
|
71
65
|
if args.config:
|
|
72
66
|
try:
|
|
73
|
-
from llama_stack.core.utils.config_resolution import
|
|
67
|
+
from llama_stack.core.utils.config_resolution import resolve_config_or_distro
|
|
74
68
|
|
|
75
|
-
config_file = resolve_config_or_distro(args.config
|
|
69
|
+
config_file = resolve_config_or_distro(args.config)
|
|
76
70
|
except ValueError as e:
|
|
77
71
|
cprint(
|
|
78
72
|
f"Could not parse config file {args.config}: {e}",
|
|
@@ -84,9 +78,7 @@ def run_stack_list_deps_command(args: argparse.Namespace) -> None:
|
|
|
84
78
|
with open(config_file) as f:
|
|
85
79
|
try:
|
|
86
80
|
contents = yaml.safe_load(f)
|
|
87
|
-
|
|
88
|
-
build_config = BuildConfig(**contents)
|
|
89
|
-
build_config.image_type = "venv"
|
|
81
|
+
config = StackConfig(**contents)
|
|
90
82
|
except Exception as e:
|
|
91
83
|
cprint(
|
|
92
84
|
f"Could not parse config file {config_file}: {e}",
|
|
@@ -95,7 +87,7 @@ def run_stack_list_deps_command(args: argparse.Namespace) -> None:
|
|
|
95
87
|
)
|
|
96
88
|
sys.exit(1)
|
|
97
89
|
elif args.providers:
|
|
98
|
-
provider_list: dict[str, list[
|
|
90
|
+
provider_list: dict[str, list[Provider]] = dict()
|
|
99
91
|
for api_provider in args.providers.split(","):
|
|
100
92
|
if "=" not in api_provider:
|
|
101
93
|
cprint(
|
|
@@ -114,8 +106,9 @@ def run_stack_list_deps_command(args: argparse.Namespace) -> None:
|
|
|
114
106
|
)
|
|
115
107
|
sys.exit(1)
|
|
116
108
|
if provider_type in providers_for_api:
|
|
117
|
-
provider =
|
|
109
|
+
provider = Provider(
|
|
118
110
|
provider_type=provider_type,
|
|
111
|
+
provider_id=provider_type.split("::")[1],
|
|
119
112
|
module=None,
|
|
120
113
|
)
|
|
121
114
|
provider_list.setdefault(api, []).append(provider)
|
|
@@ -126,20 +119,16 @@ def run_stack_list_deps_command(args: argparse.Namespace) -> None:
|
|
|
126
119
|
file=sys.stderr,
|
|
127
120
|
)
|
|
128
121
|
sys.exit(1)
|
|
129
|
-
|
|
130
|
-
providers=provider_list,
|
|
131
|
-
description=",".join(args.providers),
|
|
132
|
-
)
|
|
133
|
-
build_config = BuildConfig(image_type=ImageType.VENV.value, distribution_spec=distribution_spec)
|
|
122
|
+
config = StackConfig(providers=provider_list, image_name="providers-run")
|
|
134
123
|
|
|
135
|
-
normal_deps, special_deps, external_provider_dependencies = get_provider_dependencies(
|
|
124
|
+
normal_deps, special_deps, external_provider_dependencies = get_provider_dependencies(config)
|
|
136
125
|
normal_deps += SERVER_DEPENDENCIES
|
|
137
126
|
|
|
138
127
|
# Add external API dependencies
|
|
139
|
-
if
|
|
128
|
+
if config.external_apis_dir:
|
|
140
129
|
from llama_stack.core.external import load_external_apis
|
|
141
130
|
|
|
142
|
-
external_apis = load_external_apis(
|
|
131
|
+
external_apis = load_external_apis(config)
|
|
143
132
|
if external_apis:
|
|
144
133
|
for _, api_spec in external_apis.items():
|
|
145
134
|
normal_deps.extend(api_spec.pip_packages)
|
|
@@ -9,48 +9,67 @@ from pathlib import Path
|
|
|
9
9
|
|
|
10
10
|
from llama_stack.cli.subcommand import Subcommand
|
|
11
11
|
from llama_stack.cli.table import print_table
|
|
12
|
+
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
class StackListBuilds(Subcommand):
|
|
15
|
-
"""List
|
|
16
|
+
"""List available distributions (both built-in and custom)"""
|
|
16
17
|
|
|
17
18
|
def __init__(self, subparsers: argparse._SubParsersAction):
|
|
18
19
|
super().__init__()
|
|
19
20
|
self.parser = subparsers.add_parser(
|
|
20
21
|
"list",
|
|
21
22
|
prog="llama stack list",
|
|
22
|
-
description="list
|
|
23
|
+
description="list available distributions",
|
|
23
24
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
24
25
|
)
|
|
25
26
|
self._add_arguments()
|
|
26
27
|
self.parser.set_defaults(func=self._list_stack_command)
|
|
27
28
|
|
|
28
|
-
def _get_distribution_dirs(self) -> dict[str, Path]:
|
|
29
|
-
"""Return a dictionary of distribution names and their paths
|
|
29
|
+
def _get_distribution_dirs(self) -> dict[str, tuple[Path, str]]:
|
|
30
|
+
"""Return a dictionary of distribution names and their paths with source type
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
dict mapping distro name to (path, source_type) where source_type is 'built-in' or 'custom'
|
|
34
|
+
"""
|
|
30
35
|
distributions = {}
|
|
31
|
-
dist_dir = Path.home() / ".llama" / "distributions"
|
|
32
36
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
+
# Get built-in distributions from source code
|
|
38
|
+
distro_dir = Path(__file__).parent.parent.parent / "distributions"
|
|
39
|
+
if distro_dir.exists():
|
|
40
|
+
for stack_dir in distro_dir.iterdir():
|
|
41
|
+
if stack_dir.is_dir() and not stack_dir.name.startswith(".") and not stack_dir.name.startswith("__"):
|
|
42
|
+
distributions[stack_dir.name] = (stack_dir, "built-in")
|
|
43
|
+
|
|
44
|
+
# Get custom/run distributions from ~/.llama/distributions
|
|
45
|
+
# These override built-in ones if they have the same name
|
|
46
|
+
if DISTRIBS_BASE_DIR.exists():
|
|
47
|
+
for stack_dir in DISTRIBS_BASE_DIR.iterdir():
|
|
48
|
+
if stack_dir.is_dir() and not stack_dir.name.startswith("."):
|
|
49
|
+
# Clean up the name (remove llamastack- prefix if present)
|
|
50
|
+
name = stack_dir.name.replace("llamastack-", "")
|
|
51
|
+
distributions[name] = (stack_dir, "custom")
|
|
52
|
+
|
|
37
53
|
return distributions
|
|
38
54
|
|
|
39
55
|
def _list_stack_command(self, args: argparse.Namespace) -> None:
|
|
40
56
|
distributions = self._get_distribution_dirs()
|
|
41
57
|
|
|
42
58
|
if not distributions:
|
|
43
|
-
print("No
|
|
59
|
+
print("No distributions found")
|
|
44
60
|
return
|
|
45
61
|
|
|
46
|
-
headers = ["Stack Name", "Path"]
|
|
47
|
-
headers.extend(["Build Config", "Run Config"])
|
|
62
|
+
headers = ["Stack Name", "Source", "Path", "Config"]
|
|
48
63
|
rows = []
|
|
49
|
-
for name, path in distributions.items():
|
|
50
|
-
row = [name, str(path)]
|
|
51
|
-
# Check for
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
64
|
+
for name, (path, source_type) in sorted(distributions.items()):
|
|
65
|
+
row = [name, source_type, str(path)]
|
|
66
|
+
# Check for config files
|
|
67
|
+
# For built-in distributions, configs are named config.yaml
|
|
68
|
+
# For custom distributions, configs are named {name}-config.yaml
|
|
69
|
+
if source_type == "built-in":
|
|
70
|
+
config = "Yes" if (path / "config.yaml").exists() else "No"
|
|
71
|
+
else:
|
|
72
|
+
config = "Yes" if (path / f"{name}-config.yaml").exists() else "No"
|
|
73
|
+
row.extend([config])
|
|
55
74
|
rows.append(row)
|
|
56
75
|
print_table(rows, headers, separate_rows=True)
|
llama_stack/cli/stack/run.py
CHANGED
|
@@ -8,17 +8,31 @@ import argparse
|
|
|
8
8
|
import os
|
|
9
9
|
import ssl
|
|
10
10
|
import subprocess
|
|
11
|
+
import sys
|
|
11
12
|
from pathlib import Path
|
|
12
13
|
|
|
13
14
|
import uvicorn
|
|
14
15
|
import yaml
|
|
16
|
+
from termcolor import cprint
|
|
15
17
|
|
|
16
18
|
from llama_stack.cli.stack.utils import ImageType
|
|
17
19
|
from llama_stack.cli.subcommand import Subcommand
|
|
18
|
-
from llama_stack.core.datatypes import
|
|
20
|
+
from llama_stack.core.datatypes import Api, Provider, StackConfig
|
|
21
|
+
from llama_stack.core.distribution import get_provider_registry
|
|
19
22
|
from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
|
|
20
|
-
from llama_stack.core.
|
|
21
|
-
|
|
23
|
+
from llama_stack.core.storage.datatypes import (
|
|
24
|
+
InferenceStoreReference,
|
|
25
|
+
KVStoreReference,
|
|
26
|
+
ServerStoresConfig,
|
|
27
|
+
SqliteKVStoreConfig,
|
|
28
|
+
SqliteSqlStoreConfig,
|
|
29
|
+
SqlStoreReference,
|
|
30
|
+
StorageConfig,
|
|
31
|
+
)
|
|
32
|
+
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
|
|
33
|
+
from llama_stack.core.utils.config_resolution import resolve_config_or_distro
|
|
34
|
+
from llama_stack.core.utils.dynamic import instantiate_class_type
|
|
35
|
+
from llama_stack.log import LoggingConfig, get_logger
|
|
22
36
|
|
|
23
37
|
REPO_ROOT = Path(__file__).parent.parent.parent.parent
|
|
24
38
|
|
|
@@ -68,6 +82,12 @@ class StackRun(Subcommand):
|
|
|
68
82
|
action="store_true",
|
|
69
83
|
help="Start the UI server",
|
|
70
84
|
)
|
|
85
|
+
self.parser.add_argument(
|
|
86
|
+
"--providers",
|
|
87
|
+
type=str,
|
|
88
|
+
default=None,
|
|
89
|
+
help="Run a stack with only a list of providers. This list is formatted like: api1=provider1,api1=provider2,api2=provider3. Where there can be multiple providers per API.",
|
|
90
|
+
)
|
|
71
91
|
|
|
72
92
|
def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
|
|
73
93
|
import yaml
|
|
@@ -88,16 +108,65 @@ class StackRun(Subcommand):
|
|
|
88
108
|
|
|
89
109
|
if args.config:
|
|
90
110
|
try:
|
|
91
|
-
from llama_stack.core.utils.config_resolution import
|
|
111
|
+
from llama_stack.core.utils.config_resolution import resolve_config_or_distro
|
|
92
112
|
|
|
93
|
-
config_file = resolve_config_or_distro(args.config
|
|
113
|
+
config_file = resolve_config_or_distro(args.config)
|
|
94
114
|
except ValueError as e:
|
|
95
115
|
self.parser.error(str(e))
|
|
116
|
+
elif args.providers:
|
|
117
|
+
provider_list: dict[str, list[Provider]] = dict()
|
|
118
|
+
for api_provider in args.providers.split(","):
|
|
119
|
+
if "=" not in api_provider:
|
|
120
|
+
cprint(
|
|
121
|
+
"Could not parse `--providers`. Please ensure the list is in the format api1=provider1,api2=provider2",
|
|
122
|
+
color="red",
|
|
123
|
+
file=sys.stderr,
|
|
124
|
+
)
|
|
125
|
+
sys.exit(1)
|
|
126
|
+
api, provider_type = api_provider.split("=")
|
|
127
|
+
providers_for_api = get_provider_registry().get(Api(api), None)
|
|
128
|
+
if providers_for_api is None:
|
|
129
|
+
cprint(
|
|
130
|
+
f"{api} is not a valid API.",
|
|
131
|
+
color="red",
|
|
132
|
+
file=sys.stderr,
|
|
133
|
+
)
|
|
134
|
+
sys.exit(1)
|
|
135
|
+
if provider_type in providers_for_api:
|
|
136
|
+
config_type = instantiate_class_type(providers_for_api[provider_type].config_class)
|
|
137
|
+
if config_type is not None and hasattr(config_type, "sample_run_config"):
|
|
138
|
+
config = config_type.sample_run_config(__distro_dir__="~/.llama/distributions/providers-run")
|
|
139
|
+
else:
|
|
140
|
+
config = {}
|
|
141
|
+
provider = Provider(
|
|
142
|
+
provider_type=provider_type,
|
|
143
|
+
config=config,
|
|
144
|
+
provider_id=provider_type.split("::")[1],
|
|
145
|
+
)
|
|
146
|
+
provider_list.setdefault(api, []).append(provider)
|
|
147
|
+
else:
|
|
148
|
+
cprint(
|
|
149
|
+
f"{provider} is not a valid provider for the {api} API.",
|
|
150
|
+
color="red",
|
|
151
|
+
file=sys.stderr,
|
|
152
|
+
)
|
|
153
|
+
sys.exit(1)
|
|
154
|
+
run_config = self._generate_run_config_from_providers(providers=provider_list)
|
|
155
|
+
config_dict = run_config.model_dump(mode="json")
|
|
156
|
+
|
|
157
|
+
# Write config to disk in providers-run directory
|
|
158
|
+
distro_dir = DISTRIBS_BASE_DIR / "providers-run"
|
|
159
|
+
config_file = distro_dir / "config.yaml"
|
|
160
|
+
|
|
161
|
+
logger.info(f"Writing generated config to: {config_file}")
|
|
162
|
+
with open(config_file, "w") as f:
|
|
163
|
+
yaml.dump(config_dict, f, default_flow_style=False, sort_keys=False)
|
|
164
|
+
|
|
96
165
|
else:
|
|
97
166
|
config_file = None
|
|
98
167
|
|
|
99
168
|
if config_file:
|
|
100
|
-
logger.info(f"Using
|
|
169
|
+
logger.info(f"Using stack configuration: {config_file}")
|
|
101
170
|
|
|
102
171
|
try:
|
|
103
172
|
config_dict = yaml.safe_load(config_file.read_text())
|
|
@@ -118,17 +187,17 @@ class StackRun(Subcommand):
|
|
|
118
187
|
if not config_file:
|
|
119
188
|
self.parser.error("Config file is required")
|
|
120
189
|
|
|
121
|
-
config_file = resolve_config_or_distro(str(config_file)
|
|
190
|
+
config_file = resolve_config_or_distro(str(config_file))
|
|
122
191
|
with open(config_file) as fp:
|
|
123
192
|
config_contents = yaml.safe_load(fp)
|
|
124
193
|
if isinstance(config_contents, dict) and (cfg := config_contents.get("logging_config")):
|
|
125
194
|
logger_config = LoggingConfig(**cfg)
|
|
126
195
|
else:
|
|
127
196
|
logger_config = None
|
|
128
|
-
config =
|
|
197
|
+
config = StackConfig(**cast_image_name_to_string(replace_env_vars(config_contents)))
|
|
129
198
|
|
|
130
199
|
port = args.port or config.server.port
|
|
131
|
-
host = config.server.host or "0.0.0.0"
|
|
200
|
+
host = config.server.host or ["::", "0.0.0.0"]
|
|
132
201
|
|
|
133
202
|
# Set the config file in environment so create_app can find it
|
|
134
203
|
os.environ["LLAMA_STACK_CONFIG"] = str(config_file)
|
|
@@ -170,7 +239,7 @@ class StackRun(Subcommand):
|
|
|
170
239
|
# Another approach would be to ignore SIGINT entirely - let uvicorn handle it through its own
|
|
171
240
|
# signal handling but this is quite intrusive and not worth the effort.
|
|
172
241
|
try:
|
|
173
|
-
uvicorn.run("llama_stack.core.server.server:create_app", **uvicorn_config)
|
|
242
|
+
uvicorn.run("llama_stack.core.server.server:create_app", **uvicorn_config) # type: ignore[arg-type]
|
|
174
243
|
except (KeyboardInterrupt, SystemExit):
|
|
175
244
|
logger.info("Received interrupt signal, shutting down gracefully...")
|
|
176
245
|
|
|
@@ -184,7 +253,7 @@ class StackRun(Subcommand):
|
|
|
184
253
|
)
|
|
185
254
|
return
|
|
186
255
|
|
|
187
|
-
ui_dir = REPO_ROOT / "
|
|
256
|
+
ui_dir = REPO_ROOT / "llama_stack_ui"
|
|
188
257
|
logs_dir = Path("~/.llama/ui/logs").expanduser()
|
|
189
258
|
try:
|
|
190
259
|
# Create logs directory if it doesn't exist
|
|
@@ -214,3 +283,44 @@ class StackRun(Subcommand):
|
|
|
214
283
|
)
|
|
215
284
|
except Exception as e:
|
|
216
285
|
logger.error(f"Failed to start UI development server in {ui_dir}: {e}")
|
|
286
|
+
|
|
287
|
+
def _generate_run_config_from_providers(self, providers: dict[str, list[Provider]]):
|
|
288
|
+
apis = list(providers.keys())
|
|
289
|
+
distro_dir = DISTRIBS_BASE_DIR / "providers-run"
|
|
290
|
+
# need somewhere to put the storage.
|
|
291
|
+
os.makedirs(distro_dir, exist_ok=True)
|
|
292
|
+
storage = StorageConfig(
|
|
293
|
+
backends={
|
|
294
|
+
"kv_default": SqliteKVStoreConfig(
|
|
295
|
+
db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/kvstore.db",
|
|
296
|
+
),
|
|
297
|
+
"sql_default": SqliteSqlStoreConfig(
|
|
298
|
+
db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/sql_store.db",
|
|
299
|
+
),
|
|
300
|
+
},
|
|
301
|
+
stores=ServerStoresConfig(
|
|
302
|
+
metadata=KVStoreReference(
|
|
303
|
+
backend="kv_default",
|
|
304
|
+
namespace="registry",
|
|
305
|
+
),
|
|
306
|
+
inference=InferenceStoreReference(
|
|
307
|
+
backend="sql_default",
|
|
308
|
+
table_name="inference_store",
|
|
309
|
+
),
|
|
310
|
+
conversations=SqlStoreReference(
|
|
311
|
+
backend="sql_default",
|
|
312
|
+
table_name="openai_conversations",
|
|
313
|
+
),
|
|
314
|
+
prompts=KVStoreReference(
|
|
315
|
+
backend="kv_default",
|
|
316
|
+
namespace="prompts",
|
|
317
|
+
),
|
|
318
|
+
),
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
return StackConfig(
|
|
322
|
+
image_name="providers-run",
|
|
323
|
+
apis=apis,
|
|
324
|
+
providers=providers,
|
|
325
|
+
storage=storage,
|
|
326
|
+
)
|
llama_stack/cli/stack/utils.py
CHANGED
|
@@ -4,36 +4,9 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
import json
|
|
8
|
-
import sys
|
|
9
7
|
from enum import Enum
|
|
10
|
-
from functools import lru_cache
|
|
11
8
|
from pathlib import Path
|
|
12
9
|
|
|
13
|
-
import yaml
|
|
14
|
-
from termcolor import cprint
|
|
15
|
-
|
|
16
|
-
from llama_stack.core.datatypes import (
|
|
17
|
-
BuildConfig,
|
|
18
|
-
Provider,
|
|
19
|
-
StackRunConfig,
|
|
20
|
-
StorageConfig,
|
|
21
|
-
)
|
|
22
|
-
from llama_stack.core.distribution import get_provider_registry
|
|
23
|
-
from llama_stack.core.resolver import InvalidProviderError
|
|
24
|
-
from llama_stack.core.storage.datatypes import (
|
|
25
|
-
InferenceStoreReference,
|
|
26
|
-
KVStoreReference,
|
|
27
|
-
ServerStoresConfig,
|
|
28
|
-
SqliteKVStoreConfig,
|
|
29
|
-
SqliteSqlStoreConfig,
|
|
30
|
-
SqlStoreReference,
|
|
31
|
-
)
|
|
32
|
-
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
|
|
33
|
-
from llama_stack.core.utils.dynamic import instantiate_class_type
|
|
34
|
-
from llama_stack.core.utils.image_types import LlamaStackImageType
|
|
35
|
-
from llama_stack.providers.datatypes import Api
|
|
36
|
-
|
|
37
10
|
TEMPLATES_PATH = Path(__file__).parent.parent.parent / "distributions"
|
|
38
11
|
|
|
39
12
|
|
|
@@ -49,103 +22,3 @@ def print_subcommand_description(parser, subparsers):
|
|
|
49
22
|
description = subcommand.description
|
|
50
23
|
description_text += f" {name:<21} {description}\n"
|
|
51
24
|
parser.epilog = description_text
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
def generate_run_config(
|
|
55
|
-
build_config: BuildConfig,
|
|
56
|
-
build_dir: Path,
|
|
57
|
-
image_name: str,
|
|
58
|
-
) -> Path:
|
|
59
|
-
"""
|
|
60
|
-
Generate a run.yaml template file for user to edit from a build.yaml file
|
|
61
|
-
"""
|
|
62
|
-
apis = list(build_config.distribution_spec.providers.keys())
|
|
63
|
-
distro_dir = DISTRIBS_BASE_DIR / image_name
|
|
64
|
-
run_config = StackRunConfig(
|
|
65
|
-
container_image=(image_name if build_config.image_type == LlamaStackImageType.CONTAINER.value else None),
|
|
66
|
-
image_name=image_name,
|
|
67
|
-
apis=apis,
|
|
68
|
-
providers={},
|
|
69
|
-
storage=StorageConfig(
|
|
70
|
-
backends={
|
|
71
|
-
"kv_default": SqliteKVStoreConfig(db_path=str(distro_dir / "kvstore.db")),
|
|
72
|
-
"sql_default": SqliteSqlStoreConfig(db_path=str(distro_dir / "sql_store.db")),
|
|
73
|
-
},
|
|
74
|
-
stores=ServerStoresConfig(
|
|
75
|
-
metadata=KVStoreReference(backend="kv_default", namespace="registry"),
|
|
76
|
-
inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
|
|
77
|
-
conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"),
|
|
78
|
-
),
|
|
79
|
-
),
|
|
80
|
-
external_providers_dir=build_config.external_providers_dir
|
|
81
|
-
if build_config.external_providers_dir
|
|
82
|
-
else EXTERNAL_PROVIDERS_DIR,
|
|
83
|
-
)
|
|
84
|
-
# build providers dict
|
|
85
|
-
provider_registry = get_provider_registry(build_config)
|
|
86
|
-
for api in apis:
|
|
87
|
-
run_config.providers[api] = []
|
|
88
|
-
providers = build_config.distribution_spec.providers[api]
|
|
89
|
-
|
|
90
|
-
for provider in providers:
|
|
91
|
-
pid = provider.provider_type.split("::")[-1]
|
|
92
|
-
|
|
93
|
-
p = provider_registry[Api(api)][provider.provider_type]
|
|
94
|
-
if p.deprecation_error:
|
|
95
|
-
raise InvalidProviderError(p.deprecation_error)
|
|
96
|
-
|
|
97
|
-
try:
|
|
98
|
-
config_type = instantiate_class_type(provider_registry[Api(api)][provider.provider_type].config_class)
|
|
99
|
-
except (ModuleNotFoundError, ValueError) as exc:
|
|
100
|
-
# HACK ALERT:
|
|
101
|
-
# This code executes after building is done, the import cannot work since the
|
|
102
|
-
# package is either available in the venv or container - not available on the host.
|
|
103
|
-
# TODO: use a "is_external" flag in ProviderSpec to check if the provider is
|
|
104
|
-
# external
|
|
105
|
-
cprint(
|
|
106
|
-
f"Failed to import provider {provider.provider_type} for API {api} - assuming it's external, skipping: {exc}",
|
|
107
|
-
color="yellow",
|
|
108
|
-
file=sys.stderr,
|
|
109
|
-
)
|
|
110
|
-
# Set config_type to None to avoid UnboundLocalError
|
|
111
|
-
config_type = None
|
|
112
|
-
|
|
113
|
-
if config_type is not None and hasattr(config_type, "sample_run_config"):
|
|
114
|
-
config = config_type.sample_run_config(__distro_dir__=f"~/.llama/distributions/{image_name}")
|
|
115
|
-
else:
|
|
116
|
-
config = {}
|
|
117
|
-
|
|
118
|
-
p_spec = Provider(
|
|
119
|
-
provider_id=pid,
|
|
120
|
-
provider_type=provider.provider_type,
|
|
121
|
-
config=config,
|
|
122
|
-
module=provider.module,
|
|
123
|
-
)
|
|
124
|
-
run_config.providers[api].append(p_spec)
|
|
125
|
-
|
|
126
|
-
run_config_file = build_dir / f"{image_name}-run.yaml"
|
|
127
|
-
|
|
128
|
-
with open(run_config_file, "w") as f:
|
|
129
|
-
to_write = json.loads(run_config.model_dump_json())
|
|
130
|
-
f.write(yaml.dump(to_write, sort_keys=False))
|
|
131
|
-
|
|
132
|
-
# Only print this message for non-container builds since it will be displayed before the
|
|
133
|
-
# container is built
|
|
134
|
-
# For non-container builds, the run.yaml is generated at the very end of the build process so it
|
|
135
|
-
# makes sense to display this message
|
|
136
|
-
if build_config.image_type != LlamaStackImageType.CONTAINER.value:
|
|
137
|
-
cprint(f"You can now run your stack with `llama stack run {run_config_file}`", color="green", file=sys.stderr)
|
|
138
|
-
return run_config_file
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
@lru_cache
|
|
142
|
-
def available_templates_specs() -> dict[str, BuildConfig]:
|
|
143
|
-
import yaml
|
|
144
|
-
|
|
145
|
-
template_specs = {}
|
|
146
|
-
for p in TEMPLATES_PATH.rglob("*build.yaml"):
|
|
147
|
-
template_name = p.parent.name
|
|
148
|
-
with open(p) as f:
|
|
149
|
-
build_config = BuildConfig(**yaml.safe_load(f))
|
|
150
|
-
template_specs[template_name] = build_config
|
|
151
|
-
return template_specs
|
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
9
|
from llama_stack.core.datatypes import User
|
|
10
|
+
from llama_stack.log import get_logger
|
|
10
11
|
|
|
11
12
|
from .conditions import (
|
|
12
13
|
Condition,
|
|
@@ -19,6 +20,8 @@ from .datatypes import (
|
|
|
19
20
|
Scope,
|
|
20
21
|
)
|
|
21
22
|
|
|
23
|
+
logger = get_logger(name=__name__, category="core::auth")
|
|
24
|
+
|
|
22
25
|
|
|
23
26
|
def matches_resource(resource_scope: str, actual_resource: str) -> bool:
|
|
24
27
|
if resource_scope == actual_resource:
|
|
@@ -63,7 +66,17 @@ def default_policy() -> list[AccessRule]:
|
|
|
63
66
|
return [
|
|
64
67
|
AccessRule(
|
|
65
68
|
permit=Scope(actions=list(Action)),
|
|
66
|
-
when=["user in owners " + name
|
|
69
|
+
when=["user in owners " + name],
|
|
70
|
+
)
|
|
71
|
+
for name in ["roles", "teams", "projects", "namespaces"]
|
|
72
|
+
] + [
|
|
73
|
+
AccessRule(
|
|
74
|
+
permit=Scope(actions=list(Action)),
|
|
75
|
+
when=["user is owner"],
|
|
76
|
+
),
|
|
77
|
+
AccessRule(
|
|
78
|
+
permit=Scope(actions=list(Action)),
|
|
79
|
+
when=["resource is unowned"],
|
|
67
80
|
),
|
|
68
81
|
]
|
|
69
82
|
|
|
@@ -74,35 +87,63 @@ def is_action_allowed(
|
|
|
74
87
|
resource: ProtectedResource,
|
|
75
88
|
user: User | None,
|
|
76
89
|
) -> bool:
|
|
90
|
+
qualified_resource_id = f"{resource.type}::{resource.identifier}"
|
|
91
|
+
decision = False
|
|
92
|
+
reason = ""
|
|
93
|
+
index = -1
|
|
94
|
+
|
|
77
95
|
# If user is not set, assume authentication is not enabled
|
|
78
96
|
if not user:
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
97
|
+
decision = True
|
|
98
|
+
reason = "no auth"
|
|
99
|
+
else:
|
|
100
|
+
if not len(policy):
|
|
101
|
+
policy = default_policy()
|
|
102
|
+
|
|
103
|
+
for index, rule in enumerate(policy): # noqa: B007
|
|
104
|
+
if rule.forbid and matches_scope(rule.forbid, action, qualified_resource_id, user.principal):
|
|
105
|
+
if rule.when:
|
|
106
|
+
if matches_conditions(parse_conditions(as_list(rule.when)), resource, user):
|
|
107
|
+
decision = False
|
|
108
|
+
reason = rule.description or ""
|
|
109
|
+
break
|
|
110
|
+
elif rule.unless:
|
|
111
|
+
if not matches_conditions(parse_conditions(as_list(rule.unless)), resource, user):
|
|
112
|
+
decision = False
|
|
113
|
+
reason = rule.description or ""
|
|
114
|
+
break
|
|
115
|
+
else:
|
|
116
|
+
decision = False
|
|
117
|
+
reason = rule.description or ""
|
|
118
|
+
break
|
|
119
|
+
elif rule.permit and matches_scope(rule.permit, action, qualified_resource_id, user.principal):
|
|
120
|
+
if rule.when:
|
|
121
|
+
if matches_conditions(parse_conditions(as_list(rule.when)), resource, user):
|
|
122
|
+
decision = True
|
|
123
|
+
reason = rule.description or ""
|
|
124
|
+
break
|
|
125
|
+
elif rule.unless:
|
|
126
|
+
if not matches_conditions(parse_conditions(as_list(rule.unless)), resource, user):
|
|
127
|
+
decision = True
|
|
128
|
+
reason = rule.description or ""
|
|
129
|
+
break
|
|
130
|
+
else:
|
|
131
|
+
decision = True
|
|
132
|
+
reason = rule.description or ""
|
|
133
|
+
break
|
|
134
|
+
else:
|
|
135
|
+
reason = "no matching rule"
|
|
136
|
+
index = -1
|
|
137
|
+
|
|
138
|
+
# print apprived or denied
|
|
139
|
+
decision_str = "APPROVED" if decision else "DENIED"
|
|
140
|
+
user_str = user.principal if user else "none"
|
|
141
|
+
logger.debug(
|
|
142
|
+
f"AUTHZ,decision={decision_str},user={user_str},"
|
|
143
|
+
f"resource_id={qualified_resource_id},action={action},"
|
|
144
|
+
f"rule_index={index},reason={reason!r}"
|
|
145
|
+
)
|
|
146
|
+
return decision
|
|
106
147
|
|
|
107
148
|
|
|
108
149
|
class AccessDeniedError(RuntimeError):
|