llama-stack 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +201 -58
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
- llama_stack/providers/registry/agents.py +7 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +7 -8
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -54
- llama_stack-0.4.0.dist-info/RECORD +588 -0
- llama_stack-0.4.0.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.5.dist-info/RECORD +0 -625
- llama_stack-0.3.5.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
|
@@ -7,13 +7,23 @@
|
|
|
7
7
|
import time
|
|
8
8
|
from typing import Any
|
|
9
9
|
|
|
10
|
-
from llama_stack.
|
|
11
|
-
from llama_stack.apis.models import ListModelsResponse, Model, Models, ModelType, OpenAIListModelsResponse, OpenAIModel
|
|
10
|
+
from llama_stack.core.access_control.access_control import is_action_allowed
|
|
12
11
|
from llama_stack.core.datatypes import (
|
|
13
12
|
ModelWithOwner,
|
|
14
13
|
RegistryEntrySource,
|
|
15
14
|
)
|
|
15
|
+
from llama_stack.core.request_headers import PROVIDER_DATA_VAR, NeedsRequestProviderData, get_authenticated_user
|
|
16
|
+
from llama_stack.core.utils.dynamic import instantiate_class_type
|
|
16
17
|
from llama_stack.log import get_logger
|
|
18
|
+
from llama_stack_api import (
|
|
19
|
+
ListModelsResponse,
|
|
20
|
+
Model,
|
|
21
|
+
ModelNotFoundError,
|
|
22
|
+
Models,
|
|
23
|
+
ModelType,
|
|
24
|
+
OpenAIListModelsResponse,
|
|
25
|
+
OpenAIModel,
|
|
26
|
+
)
|
|
17
27
|
|
|
18
28
|
from .common import CommonRoutingTableImpl, lookup_model
|
|
19
29
|
|
|
@@ -42,19 +52,122 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
|
|
|
42
52
|
|
|
43
53
|
await self.update_registered_models(provider_id, models)
|
|
44
54
|
|
|
55
|
+
async def _get_dynamic_models_from_provider_data(self) -> list[Model]:
|
|
56
|
+
"""
|
|
57
|
+
Fetch models from providers that have credentials in the current request's provider_data.
|
|
58
|
+
|
|
59
|
+
This allows users to see models available to them from providers that require
|
|
60
|
+
per-request API keys (via X-LlamaStack-Provider-Data header).
|
|
61
|
+
|
|
62
|
+
Returns models with fully qualified identifiers (provider_id/model_id) but does NOT
|
|
63
|
+
cache them in the registry since they are user-specific.
|
|
64
|
+
"""
|
|
65
|
+
provider_data = PROVIDER_DATA_VAR.get()
|
|
66
|
+
if not provider_data:
|
|
67
|
+
return []
|
|
68
|
+
|
|
69
|
+
dynamic_models = []
|
|
70
|
+
user = get_authenticated_user()
|
|
71
|
+
|
|
72
|
+
for provider_id, provider in self.impls_by_provider_id.items():
|
|
73
|
+
# Check if this provider supports provider_data
|
|
74
|
+
if not isinstance(provider, NeedsRequestProviderData):
|
|
75
|
+
continue
|
|
76
|
+
|
|
77
|
+
# Check if provider has a validator (some providers like ollama don't need per-request credentials)
|
|
78
|
+
spec = getattr(provider, "__provider_spec__", None)
|
|
79
|
+
if not spec or not getattr(spec, "provider_data_validator", None):
|
|
80
|
+
continue
|
|
81
|
+
|
|
82
|
+
# Validate provider_data silently - we're speculatively checking all providers
|
|
83
|
+
# so validation failures are expected when user didn't provide keys for this provider
|
|
84
|
+
try:
|
|
85
|
+
validator = instantiate_class_type(spec.provider_data_validator)
|
|
86
|
+
validator(**provider_data)
|
|
87
|
+
except Exception:
|
|
88
|
+
# User didn't provide credentials for this provider - skip silently
|
|
89
|
+
continue
|
|
90
|
+
|
|
91
|
+
# Validation succeeded! User has credentials for this provider
|
|
92
|
+
# Now try to list models
|
|
93
|
+
try:
|
|
94
|
+
models = await provider.list_models()
|
|
95
|
+
if not models:
|
|
96
|
+
continue
|
|
97
|
+
|
|
98
|
+
# Ensure models have fully qualified identifiers and apply RBAC filtering
|
|
99
|
+
for model in models:
|
|
100
|
+
# Only add prefix if model identifier doesn't already have it
|
|
101
|
+
if not model.identifier.startswith(f"{provider_id}/"):
|
|
102
|
+
model.identifier = f"{provider_id}/{model.provider_resource_id}"
|
|
103
|
+
|
|
104
|
+
# Convert to ModelWithOwner for RBAC check
|
|
105
|
+
temp_model = ModelWithOwner(
|
|
106
|
+
identifier=model.identifier,
|
|
107
|
+
provider_id=provider_id,
|
|
108
|
+
provider_resource_id=model.provider_resource_id,
|
|
109
|
+
model_type=model.model_type,
|
|
110
|
+
metadata=model.metadata,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
# Apply RBAC check - only include models user has read permission for
|
|
114
|
+
if is_action_allowed(self.policy, "read", temp_model, user):
|
|
115
|
+
dynamic_models.append(model)
|
|
116
|
+
else:
|
|
117
|
+
logger.debug(
|
|
118
|
+
f"Access denied to dynamic model '{model.identifier}' for user {user.principal if user else 'anonymous'}"
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
logger.debug(
|
|
122
|
+
f"Fetched {len(dynamic_models)} accessible models from provider {provider_id} using provider_data"
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
except Exception as e:
|
|
126
|
+
logger.debug(f"Failed to list models from provider {provider_id} with provider_data: {e}")
|
|
127
|
+
continue
|
|
128
|
+
|
|
129
|
+
return dynamic_models
|
|
130
|
+
|
|
45
131
|
async def list_models(self) -> ListModelsResponse:
|
|
46
|
-
|
|
132
|
+
# Get models from registry
|
|
133
|
+
registry_models = await self.get_all_with_type("model")
|
|
134
|
+
|
|
135
|
+
# Get additional models available via provider_data (user-specific, not cached)
|
|
136
|
+
dynamic_models = await self._get_dynamic_models_from_provider_data()
|
|
137
|
+
|
|
138
|
+
# Combine, avoiding duplicates (registry takes precedence)
|
|
139
|
+
registry_identifiers = {m.identifier for m in registry_models}
|
|
140
|
+
unique_dynamic_models = [m for m in dynamic_models if m.identifier not in registry_identifiers]
|
|
141
|
+
|
|
142
|
+
return ListModelsResponse(data=registry_models + unique_dynamic_models)
|
|
47
143
|
|
|
48
144
|
async def openai_list_models(self) -> OpenAIListModelsResponse:
|
|
49
|
-
models
|
|
145
|
+
# Get models from registry
|
|
146
|
+
registry_models = await self.get_all_with_type("model")
|
|
147
|
+
|
|
148
|
+
# Get additional models available via provider_data (user-specific, not cached)
|
|
149
|
+
dynamic_models = await self._get_dynamic_models_from_provider_data()
|
|
150
|
+
|
|
151
|
+
# Combine, avoiding duplicates (registry takes precedence)
|
|
152
|
+
registry_identifiers = {m.identifier for m in registry_models}
|
|
153
|
+
unique_dynamic_models = [m for m in dynamic_models if m.identifier not in registry_identifiers]
|
|
154
|
+
|
|
155
|
+
all_models = registry_models + unique_dynamic_models
|
|
156
|
+
|
|
50
157
|
openai_models = [
|
|
51
158
|
OpenAIModel(
|
|
52
159
|
id=model.identifier,
|
|
53
160
|
object="model",
|
|
54
161
|
created=int(time.time()),
|
|
55
162
|
owned_by="llama_stack",
|
|
163
|
+
custom_metadata={
|
|
164
|
+
"model_type": model.model_type,
|
|
165
|
+
"provider_id": model.provider_id,
|
|
166
|
+
"provider_resource_id": model.provider_resource_id,
|
|
167
|
+
**model.metadata,
|
|
168
|
+
},
|
|
56
169
|
)
|
|
57
|
-
for model in
|
|
170
|
+
for model in all_models
|
|
58
171
|
]
|
|
59
172
|
return OpenAIListModelsResponse(data=openai_models)
|
|
60
173
|
|
|
@@ -130,7 +243,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
|
|
|
130
243
|
existing_models = await self.get_all_with_type("model")
|
|
131
244
|
|
|
132
245
|
# we may have an alias for the model registered by the user (or during initialization
|
|
133
|
-
# from
|
|
246
|
+
# from config.yaml) that we need to keep track of
|
|
134
247
|
model_ids = {}
|
|
135
248
|
for model in existing_models:
|
|
136
249
|
if model.provider_id != provider_id:
|
|
@@ -4,18 +4,18 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
from llama_stack.
|
|
8
|
-
|
|
9
|
-
|
|
7
|
+
from llama_stack.core.datatypes import (
|
|
8
|
+
ScoringFnWithOwner,
|
|
9
|
+
)
|
|
10
|
+
from llama_stack.log import get_logger
|
|
11
|
+
from llama_stack_api import (
|
|
10
12
|
ListScoringFunctionsResponse,
|
|
13
|
+
ParamType,
|
|
14
|
+
ResourceType,
|
|
11
15
|
ScoringFn,
|
|
12
16
|
ScoringFnParams,
|
|
13
17
|
ScoringFunctions,
|
|
14
18
|
)
|
|
15
|
-
from llama_stack.core.datatypes import (
|
|
16
|
-
ScoringFnWithOwner,
|
|
17
|
-
)
|
|
18
|
-
from llama_stack.log import get_logger
|
|
19
19
|
|
|
20
20
|
from .common import CommonRoutingTableImpl
|
|
21
21
|
|
|
@@ -6,12 +6,11 @@
|
|
|
6
6
|
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
|
-
from llama_stack.apis.resource import ResourceType
|
|
10
|
-
from llama_stack.apis.shields import ListShieldsResponse, Shield, Shields
|
|
11
9
|
from llama_stack.core.datatypes import (
|
|
12
10
|
ShieldWithOwner,
|
|
13
11
|
)
|
|
14
12
|
from llama_stack.log import get_logger
|
|
13
|
+
from llama_stack_api import ListShieldsResponse, ResourceType, Shield, Shields
|
|
15
14
|
|
|
16
15
|
from .common import CommonRoutingTableImpl
|
|
17
16
|
|
|
@@ -6,11 +6,17 @@
|
|
|
6
6
|
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
|
-
from llama_stack.apis.common.content_types import URL
|
|
10
|
-
from llama_stack.apis.common.errors import ToolGroupNotFoundError
|
|
11
|
-
from llama_stack.apis.tools import ListToolDefsResponse, ListToolGroupsResponse, ToolDef, ToolGroup, ToolGroups
|
|
12
9
|
from llama_stack.core.datatypes import AuthenticationRequiredError, ToolGroupWithOwner
|
|
13
10
|
from llama_stack.log import get_logger
|
|
11
|
+
from llama_stack_api import (
|
|
12
|
+
URL,
|
|
13
|
+
ListToolDefsResponse,
|
|
14
|
+
ListToolGroupsResponse,
|
|
15
|
+
ToolDef,
|
|
16
|
+
ToolGroup,
|
|
17
|
+
ToolGroupNotFoundError,
|
|
18
|
+
ToolGroups,
|
|
19
|
+
)
|
|
14
20
|
|
|
15
21
|
from .common import CommonRoutingTableImpl
|
|
16
22
|
|
|
@@ -43,7 +49,9 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
|
|
|
43
49
|
routing_key = self.tool_to_toolgroup[routing_key]
|
|
44
50
|
return await super().get_provider_impl(routing_key, provider_id)
|
|
45
51
|
|
|
46
|
-
async def list_tools(
|
|
52
|
+
async def list_tools(
|
|
53
|
+
self, toolgroup_id: str | None = None, authorization: str | None = None
|
|
54
|
+
) -> ListToolDefsResponse:
|
|
47
55
|
if toolgroup_id:
|
|
48
56
|
if group_id := parse_toolgroup_from_toolgroup_name_pair(toolgroup_id):
|
|
49
57
|
toolgroup_id = group_id
|
|
@@ -55,7 +63,7 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
|
|
|
55
63
|
for toolgroup in toolgroups:
|
|
56
64
|
if toolgroup.identifier not in self.toolgroups_to_tools:
|
|
57
65
|
try:
|
|
58
|
-
await self._index_tools(toolgroup)
|
|
66
|
+
await self._index_tools(toolgroup, authorization=authorization)
|
|
59
67
|
except AuthenticationRequiredError:
|
|
60
68
|
# Send authentication errors back to the client so it knows
|
|
61
69
|
# that it needs to supply credentials for remote MCP servers.
|
|
@@ -70,9 +78,11 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
|
|
|
70
78
|
|
|
71
79
|
return ListToolDefsResponse(data=all_tools)
|
|
72
80
|
|
|
73
|
-
async def _index_tools(self, toolgroup: ToolGroup):
|
|
81
|
+
async def _index_tools(self, toolgroup: ToolGroup, authorization: str | None = None):
|
|
74
82
|
provider_impl = await super().get_provider_impl(toolgroup.identifier, toolgroup.provider_id)
|
|
75
|
-
tooldefs_response = await provider_impl.list_runtime_tools(
|
|
83
|
+
tooldefs_response = await provider_impl.list_runtime_tools(
|
|
84
|
+
toolgroup.identifier, toolgroup.mcp_endpoint, authorization=authorization
|
|
85
|
+
)
|
|
76
86
|
|
|
77
87
|
tooldefs = tooldefs_response.data
|
|
78
88
|
for t in tooldefs:
|
|
@@ -6,26 +6,31 @@
|
|
|
6
6
|
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
|
-
from llama_stack.
|
|
10
|
-
|
|
11
|
-
|
|
9
|
+
from llama_stack.core.datatypes import (
|
|
10
|
+
VectorStoreWithOwner,
|
|
11
|
+
)
|
|
12
|
+
from llama_stack.log import get_logger
|
|
12
13
|
|
|
13
14
|
# Removed VectorStores import to avoid exposing public API
|
|
14
|
-
from
|
|
15
|
+
from llama_stack_api import (
|
|
16
|
+
EmbeddedChunk,
|
|
17
|
+
InterleavedContent,
|
|
18
|
+
ModelNotFoundError,
|
|
19
|
+
ModelType,
|
|
20
|
+
ModelTypeError,
|
|
21
|
+
OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
|
|
22
|
+
QueryChunksResponse,
|
|
23
|
+
ResourceType,
|
|
15
24
|
SearchRankingOptions,
|
|
16
25
|
VectorStoreChunkingStrategy,
|
|
17
26
|
VectorStoreDeleteResponse,
|
|
18
|
-
|
|
27
|
+
VectorStoreFileContentResponse,
|
|
19
28
|
VectorStoreFileDeleteResponse,
|
|
20
29
|
VectorStoreFileObject,
|
|
21
30
|
VectorStoreFileStatus,
|
|
22
31
|
VectorStoreObject,
|
|
23
32
|
VectorStoreSearchResponsePage,
|
|
24
33
|
)
|
|
25
|
-
from llama_stack.core.datatypes import (
|
|
26
|
-
VectorStoreWithOwner,
|
|
27
|
-
)
|
|
28
|
-
from llama_stack.log import get_logger
|
|
29
34
|
|
|
30
35
|
from .common import CommonRoutingTableImpl, lookup_model
|
|
31
36
|
|
|
@@ -39,6 +44,15 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
|
|
|
39
44
|
Only provides internal routing functionality for VectorIORouter.
|
|
40
45
|
"""
|
|
41
46
|
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
impls_by_provider_id: dict[str, Any],
|
|
50
|
+
dist_registry: Any,
|
|
51
|
+
policy: list[Any],
|
|
52
|
+
) -> None:
|
|
53
|
+
super().__init__(impls_by_provider_id, dist_registry, policy)
|
|
54
|
+
self.vector_io_router = None # Will be set post-instantiation
|
|
55
|
+
|
|
42
56
|
# Internal methods only - no public API exposure
|
|
43
57
|
|
|
44
58
|
async def register_vector_store(
|
|
@@ -77,6 +91,26 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
|
|
|
77
91
|
await self.register_object(vector_store)
|
|
78
92
|
return vector_store
|
|
79
93
|
|
|
94
|
+
async def insert_chunks(
|
|
95
|
+
self,
|
|
96
|
+
vector_store_id: str,
|
|
97
|
+
chunks: list[EmbeddedChunk],
|
|
98
|
+
ttl_seconds: int | None = None,
|
|
99
|
+
) -> None:
|
|
100
|
+
await self.assert_action_allowed("update", "vector_store", vector_store_id)
|
|
101
|
+
provider = await self.get_provider_impl(vector_store_id)
|
|
102
|
+
return await provider.insert_chunks(vector_store_id, chunks, ttl_seconds)
|
|
103
|
+
|
|
104
|
+
async def query_chunks(
|
|
105
|
+
self,
|
|
106
|
+
vector_store_id: str,
|
|
107
|
+
query: InterleavedContent,
|
|
108
|
+
params: dict[str, Any] | None = None,
|
|
109
|
+
) -> QueryChunksResponse:
|
|
110
|
+
await self.assert_action_allowed("read", "vector_store", vector_store_id)
|
|
111
|
+
provider = await self.get_provider_impl(vector_store_id)
|
|
112
|
+
return await provider.query_chunks(vector_store_id, query, params)
|
|
113
|
+
|
|
80
114
|
async def openai_retrieve_vector_store(
|
|
81
115
|
self,
|
|
82
116
|
vector_store_id: str,
|
|
@@ -195,12 +229,17 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
|
|
|
195
229
|
self,
|
|
196
230
|
vector_store_id: str,
|
|
197
231
|
file_id: str,
|
|
198
|
-
|
|
232
|
+
include_embeddings: bool | None = False,
|
|
233
|
+
include_metadata: bool | None = False,
|
|
234
|
+
) -> VectorStoreFileContentResponse:
|
|
199
235
|
await self.assert_action_allowed("read", "vector_store", vector_store_id)
|
|
236
|
+
|
|
200
237
|
provider = await self.get_provider_impl(vector_store_id)
|
|
201
238
|
return await provider.openai_retrieve_vector_store_file_contents(
|
|
202
239
|
vector_store_id=vector_store_id,
|
|
203
240
|
file_id=file_id,
|
|
241
|
+
include_embeddings=include_embeddings,
|
|
242
|
+
include_metadata=include_metadata,
|
|
204
243
|
)
|
|
205
244
|
|
|
206
245
|
async def openai_update_vector_store_file(
|
|
@@ -232,17 +271,13 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
|
|
|
232
271
|
async def openai_create_vector_store_file_batch(
|
|
233
272
|
self,
|
|
234
273
|
vector_store_id: str,
|
|
235
|
-
|
|
236
|
-
attributes: dict[str, Any] | None = None,
|
|
237
|
-
chunking_strategy: Any | None = None,
|
|
274
|
+
params: OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
|
|
238
275
|
):
|
|
239
276
|
await self.assert_action_allowed("update", "vector_store", vector_store_id)
|
|
240
277
|
provider = await self.get_provider_impl(vector_store_id)
|
|
241
278
|
return await provider.openai_create_vector_store_file_batch(
|
|
242
279
|
vector_store_id=vector_store_id,
|
|
243
|
-
|
|
244
|
-
attributes=attributes,
|
|
245
|
-
chunking_strategy=chunking_strategy,
|
|
280
|
+
params=params,
|
|
246
281
|
)
|
|
247
282
|
|
|
248
283
|
async def openai_retrieve_vector_store_file_batch(
|
llama_stack/core/server/auth.py
CHANGED
|
@@ -28,9 +28,11 @@ class AuthenticationMiddleware:
|
|
|
28
28
|
4. Makes these attributes available to the route handlers for access control
|
|
29
29
|
|
|
30
30
|
Unauthenticated Access:
|
|
31
|
-
Endpoints can opt out of authentication by
|
|
32
|
-
|
|
33
|
-
|
|
31
|
+
Endpoints can opt out of authentication by:
|
|
32
|
+
- For legacy @webmethod routes: setting require_authentication=False in the decorator
|
|
33
|
+
- For FastAPI router routes: setting openapi_extra={PUBLIC_ROUTE_KEY: True}
|
|
34
|
+
This is typically used for operational endpoints like /health and /version to support
|
|
35
|
+
monitoring, load balancers, and observability tools.
|
|
34
36
|
|
|
35
37
|
The middleware supports multiple authentication providers through the AuthProvider interface:
|
|
36
38
|
- Kubernetes: Validates tokens against the Kubernetes API server
|
|
@@ -6,13 +6,13 @@
|
|
|
6
6
|
|
|
7
7
|
import ssl
|
|
8
8
|
from abc import ABC, abstractmethod
|
|
9
|
+
from typing import Any
|
|
9
10
|
from urllib.parse import parse_qs, urljoin, urlparse
|
|
10
11
|
|
|
11
12
|
import httpx
|
|
12
13
|
import jwt
|
|
13
14
|
from pydantic import BaseModel, Field
|
|
14
15
|
|
|
15
|
-
from llama_stack.apis.common.errors import TokenValidationError
|
|
16
16
|
from llama_stack.core.datatypes import (
|
|
17
17
|
AuthenticationConfig,
|
|
18
18
|
CustomAuthConfig,
|
|
@@ -22,6 +22,7 @@ from llama_stack.core.datatypes import (
|
|
|
22
22
|
User,
|
|
23
23
|
)
|
|
24
24
|
from llama_stack.log import get_logger
|
|
25
|
+
from llama_stack_api import TokenValidationError
|
|
25
26
|
|
|
26
27
|
logger = get_logger(name=__name__, category="core::auth")
|
|
27
28
|
|
|
@@ -143,14 +144,21 @@ class OAuth2TokenAuthProvider(AuthProvider):
|
|
|
143
144
|
if self.config.jwks and self.config.jwks.token:
|
|
144
145
|
headers["Authorization"] = f"Bearer {self.config.jwks.token}"
|
|
145
146
|
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
147
|
+
# Ensure uri is not None for PyJWKClient
|
|
148
|
+
if not self.config.jwks or not self.config.jwks.uri:
|
|
149
|
+
raise ValueError("JWKS configuration requires a valid URI")
|
|
150
|
+
|
|
151
|
+
# Build kwargs conditionally to avoid passing None values
|
|
152
|
+
jwks_kwargs: dict[str, Any] = {
|
|
153
|
+
"cache_keys": True,
|
|
154
|
+
"max_cached_keys": 10,
|
|
155
|
+
"headers": headers,
|
|
156
|
+
"ssl_context": ssl_context,
|
|
157
|
+
}
|
|
158
|
+
if self.config.jwks.key_recheck_period is not None:
|
|
159
|
+
jwks_kwargs["lifespan"] = self.config.jwks.key_recheck_period
|
|
160
|
+
|
|
161
|
+
self._jwks_client = jwt.PyJWKClient(self.config.jwks.uri, **jwks_kwargs)
|
|
154
162
|
return self._jwks_client
|
|
155
163
|
|
|
156
164
|
async def validate_jwt_token(self, token: str, scope: dict | None = None) -> User:
|
|
@@ -197,23 +205,31 @@ class OAuth2TokenAuthProvider(AuthProvider):
|
|
|
197
205
|
if self.config.introspection is None:
|
|
198
206
|
raise ValueError("Introspection is not configured")
|
|
199
207
|
|
|
208
|
+
# ssl_ctxt can be None, bool, str, or SSLContext - httpx accepts all
|
|
209
|
+
ssl_ctxt: ssl.SSLContext | bool = False # Default to no verification if no cafile
|
|
210
|
+
if self.config.tls_cafile:
|
|
211
|
+
ssl_ctxt = ssl.create_default_context(cafile=self.config.tls_cafile.as_posix())
|
|
212
|
+
|
|
213
|
+
# Build post kwargs conditionally based on auth method
|
|
214
|
+
post_kwargs: dict[str, Any] = {
|
|
215
|
+
"url": self.config.introspection.url,
|
|
216
|
+
"data": form,
|
|
217
|
+
"timeout": 10.0,
|
|
218
|
+
}
|
|
219
|
+
|
|
200
220
|
if self.config.introspection.send_secret_in_body:
|
|
201
221
|
form["client_id"] = self.config.introspection.client_id
|
|
202
222
|
form["client_secret"] = self.config.introspection.client_secret
|
|
203
|
-
auth = None
|
|
204
223
|
else:
|
|
205
|
-
auth
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
224
|
+
# httpx auth parameter expects tuple[str | bytes, str | bytes]
|
|
225
|
+
post_kwargs["auth"] = (
|
|
226
|
+
self.config.introspection.client_id,
|
|
227
|
+
self.config.introspection.client_secret,
|
|
228
|
+
)
|
|
229
|
+
|
|
209
230
|
try:
|
|
210
231
|
async with httpx.AsyncClient(verify=ssl_ctxt) as client:
|
|
211
|
-
response = await client.post(
|
|
212
|
-
self.config.introspection.url,
|
|
213
|
-
data=form,
|
|
214
|
-
auth=auth,
|
|
215
|
-
timeout=10.0, # Add a reasonable timeout
|
|
216
|
-
)
|
|
232
|
+
response = await client.post(**post_kwargs)
|
|
217
233
|
if response.status_code != httpx.codes.OK:
|
|
218
234
|
logger.warning(f"Token introspection failed with status code: {response.status_code}")
|
|
219
235
|
raise ValueError(f"Token introspection failed: {response.status_code}")
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
"""Router utilities for FastAPI routers.
|
|
8
|
+
|
|
9
|
+
This module provides utilities to create FastAPI routers from API packages.
|
|
10
|
+
APIs with routers are explicitly listed here.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from collections.abc import Callable
|
|
14
|
+
from typing import Any, cast
|
|
15
|
+
|
|
16
|
+
from fastapi import APIRouter
|
|
17
|
+
from fastapi.routing import APIRoute
|
|
18
|
+
|
|
19
|
+
from llama_stack_api import admin, batches, benchmarks, datasets, files, inspect_api, providers
|
|
20
|
+
|
|
21
|
+
# Router factories for APIs that have FastAPI routers
|
|
22
|
+
# Add new APIs here as they are migrated to the router system
|
|
23
|
+
from llama_stack_api.datatypes import Api
|
|
24
|
+
|
|
25
|
+
_ROUTER_FACTORIES: dict[str, Callable[[Any], APIRouter]] = {
|
|
26
|
+
"admin": admin.fastapi_routes.create_router,
|
|
27
|
+
"batches": batches.fastapi_routes.create_router,
|
|
28
|
+
"benchmarks": benchmarks.fastapi_routes.create_router,
|
|
29
|
+
"datasets": datasets.fastapi_routes.create_router,
|
|
30
|
+
"providers": providers.fastapi_routes.create_router,
|
|
31
|
+
"inspect": inspect_api.fastapi_routes.create_router,
|
|
32
|
+
"files": files.fastapi_routes.create_router,
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def has_router(api: "Api") -> bool:
|
|
37
|
+
"""Check if an API has a router factory.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
api: The API enum value
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
True if the API has a router factory, False otherwise
|
|
44
|
+
"""
|
|
45
|
+
return api.value in _ROUTER_FACTORIES
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def build_fastapi_router(api: "Api", impl: Any) -> APIRouter | None:
|
|
49
|
+
"""Build a router for an API by combining its router factory with the implementation.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
api: The API enum value
|
|
53
|
+
impl: The implementation instance for the API
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
APIRouter if the API has a router factory, None otherwise
|
|
57
|
+
"""
|
|
58
|
+
router_factory = _ROUTER_FACTORIES.get(api.value)
|
|
59
|
+
if router_factory is None:
|
|
60
|
+
return None
|
|
61
|
+
|
|
62
|
+
# cast is safe here: all router factories in API packages are required to return APIRouter.
|
|
63
|
+
# If a router factory returns the wrong type, it will fail at runtime when
|
|
64
|
+
# app.include_router(router) is called
|
|
65
|
+
return cast(APIRouter, router_factory(impl))
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def get_router_routes(router: APIRouter) -> list[APIRoute]:
|
|
69
|
+
"""Extract APIRoute objects from a FastAPI router.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
router: The FastAPI router to extract routes from
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
List of APIRoute objects from the router (preserves tags and other metadata)
|
|
76
|
+
"""
|
|
77
|
+
routes = []
|
|
78
|
+
|
|
79
|
+
for route in router.routes:
|
|
80
|
+
# FastAPI routers use APIRoute objects, which have path, methods, tags, etc.
|
|
81
|
+
if isinstance(route, APIRoute):
|
|
82
|
+
routes.append(route)
|
|
83
|
+
|
|
84
|
+
return routes
|
llama_stack/core/server/quota.py
CHANGED
|
@@ -11,9 +11,9 @@ from datetime import UTC, datetime, timedelta
|
|
|
11
11
|
from starlette.types import ASGIApp, Receive, Scope, Send
|
|
12
12
|
|
|
13
13
|
from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendType
|
|
14
|
+
from llama_stack.core.storage.kvstore.kvstore import _KVSTORE_BACKENDS, kvstore_impl
|
|
14
15
|
from llama_stack.log import get_logger
|
|
15
|
-
from
|
|
16
|
-
from llama_stack.providers.utils.kvstore.kvstore import _KVSTORE_BACKENDS, kvstore_impl
|
|
16
|
+
from llama_stack_api.internal.kvstore import KVStore
|
|
17
17
|
|
|
18
18
|
logger = get_logger(name=__name__, category="core::server")
|
|
19
19
|
|