llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +5 -0
- llama_stack/apis/agents/__init__.py +1 -1
- llama_stack/apis/agents/agents.py +700 -281
- llama_stack/apis/agents/openai_responses.py +1311 -0
- llama_stack/{providers/adapters/memory/sample/config.py → apis/batches/__init__.py} +2 -5
- llama_stack/apis/batches/batches.py +100 -0
- llama_stack/apis/benchmarks/__init__.py +7 -0
- llama_stack/apis/benchmarks/benchmarks.py +108 -0
- llama_stack/apis/common/content_types.py +143 -0
- llama_stack/apis/common/errors.py +103 -0
- llama_stack/apis/common/job_types.py +38 -0
- llama_stack/apis/common/responses.py +36 -0
- llama_stack/apis/common/training_types.py +36 -5
- llama_stack/apis/common/type_system.py +158 -0
- llama_stack/apis/conversations/__init__.py +31 -0
- llama_stack/apis/conversations/conversations.py +286 -0
- llama_stack/apis/datasetio/__init__.py +7 -0
- llama_stack/apis/datasetio/datasetio.py +59 -0
- llama_stack/apis/datasets/__init__.py +7 -0
- llama_stack/apis/datasets/datasets.py +251 -0
- llama_stack/apis/datatypes.py +160 -0
- llama_stack/apis/eval/__init__.py +7 -0
- llama_stack/apis/eval/eval.py +169 -0
- llama_stack/apis/files/__init__.py +7 -0
- llama_stack/apis/files/files.py +199 -0
- llama_stack/apis/inference/__init__.py +1 -1
- llama_stack/apis/inference/inference.py +1169 -113
- llama_stack/apis/inspect/__init__.py +1 -1
- llama_stack/apis/inspect/inspect.py +69 -16
- llama_stack/apis/models/__init__.py +1 -1
- llama_stack/apis/models/models.py +148 -21
- llama_stack/apis/post_training/__init__.py +1 -1
- llama_stack/apis/post_training/post_training.py +265 -120
- llama_stack/{providers/adapters/agents/sample/config.py → apis/prompts/__init__.py} +2 -5
- llama_stack/apis/prompts/prompts.py +204 -0
- llama_stack/apis/providers/__init__.py +7 -0
- llama_stack/apis/providers/providers.py +69 -0
- llama_stack/apis/resource.py +37 -0
- llama_stack/apis/safety/__init__.py +1 -1
- llama_stack/apis/safety/safety.py +95 -12
- llama_stack/apis/scoring/__init__.py +7 -0
- llama_stack/apis/scoring/scoring.py +93 -0
- llama_stack/apis/scoring_functions/__init__.py +7 -0
- llama_stack/apis/scoring_functions/scoring_functions.py +208 -0
- llama_stack/apis/shields/__init__.py +1 -1
- llama_stack/apis/shields/shields.py +76 -33
- llama_stack/apis/synthetic_data_generation/__init__.py +1 -1
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +40 -17
- llama_stack/apis/telemetry/__init__.py +1 -1
- llama_stack/apis/telemetry/telemetry.py +322 -31
- llama_stack/apis/{dataset → tools}/__init__.py +2 -1
- llama_stack/apis/tools/rag_tool.py +218 -0
- llama_stack/apis/tools/tools.py +221 -0
- llama_stack/apis/vector_io/__init__.py +7 -0
- llama_stack/apis/vector_io/vector_io.py +960 -0
- llama_stack/apis/vector_stores/__init__.py +7 -0
- llama_stack/apis/vector_stores/vector_stores.py +51 -0
- llama_stack/apis/version.py +9 -0
- llama_stack/cli/llama.py +13 -5
- llama_stack/cli/stack/_list_deps.py +182 -0
- llama_stack/cli/stack/list_apis.py +1 -1
- llama_stack/cli/stack/list_deps.py +55 -0
- llama_stack/cli/stack/list_providers.py +24 -10
- llama_stack/cli/stack/list_stacks.py +56 -0
- llama_stack/cli/stack/remove.py +115 -0
- llama_stack/cli/stack/run.py +169 -56
- llama_stack/cli/stack/stack.py +18 -4
- llama_stack/cli/stack/utils.py +151 -0
- llama_stack/cli/table.py +23 -61
- llama_stack/cli/utils.py +29 -0
- llama_stack/core/access_control/access_control.py +131 -0
- llama_stack/core/access_control/conditions.py +129 -0
- llama_stack/core/access_control/datatypes.py +107 -0
- llama_stack/core/build.py +164 -0
- llama_stack/core/client.py +205 -0
- llama_stack/core/common.sh +37 -0
- llama_stack/{distribution → core}/configure.py +74 -55
- llama_stack/core/conversations/conversations.py +309 -0
- llama_stack/core/datatypes.py +625 -0
- llama_stack/core/distribution.py +276 -0
- llama_stack/core/external.py +54 -0
- llama_stack/core/id_generation.py +42 -0
- llama_stack/core/inspect.py +86 -0
- llama_stack/core/library_client.py +539 -0
- llama_stack/core/prompts/prompts.py +234 -0
- llama_stack/core/providers.py +137 -0
- llama_stack/core/request_headers.py +115 -0
- llama_stack/core/resolver.py +506 -0
- llama_stack/core/routers/__init__.py +101 -0
- llama_stack/core/routers/datasets.py +73 -0
- llama_stack/core/routers/eval_scoring.py +155 -0
- llama_stack/core/routers/inference.py +645 -0
- llama_stack/core/routers/safety.py +85 -0
- llama_stack/core/routers/tool_runtime.py +91 -0
- llama_stack/core/routers/vector_io.py +442 -0
- llama_stack/core/routing_tables/benchmarks.py +62 -0
- llama_stack/core/routing_tables/common.py +254 -0
- llama_stack/core/routing_tables/datasets.py +91 -0
- llama_stack/core/routing_tables/models.py +163 -0
- llama_stack/core/routing_tables/scoring_functions.py +66 -0
- llama_stack/core/routing_tables/shields.py +61 -0
- llama_stack/core/routing_tables/toolgroups.py +129 -0
- llama_stack/core/routing_tables/vector_stores.py +292 -0
- llama_stack/core/server/auth.py +187 -0
- llama_stack/core/server/auth_providers.py +494 -0
- llama_stack/core/server/quota.py +110 -0
- llama_stack/core/server/routes.py +141 -0
- llama_stack/core/server/server.py +542 -0
- llama_stack/core/server/tracing.py +80 -0
- llama_stack/core/stack.py +546 -0
- llama_stack/core/start_stack.sh +117 -0
- llama_stack/core/storage/datatypes.py +283 -0
- llama_stack/{cli/model → core/store}/__init__.py +1 -1
- llama_stack/core/store/registry.py +199 -0
- llama_stack/core/testing_context.py +49 -0
- llama_stack/core/ui/app.py +55 -0
- llama_stack/core/ui/modules/api.py +32 -0
- llama_stack/core/ui/modules/utils.py +42 -0
- llama_stack/core/ui/page/distribution/datasets.py +18 -0
- llama_stack/core/ui/page/distribution/eval_tasks.py +20 -0
- llama_stack/core/ui/page/distribution/models.py +18 -0
- llama_stack/core/ui/page/distribution/providers.py +27 -0
- llama_stack/core/ui/page/distribution/resources.py +48 -0
- llama_stack/core/ui/page/distribution/scoring_functions.py +18 -0
- llama_stack/core/ui/page/distribution/shields.py +19 -0
- llama_stack/core/ui/page/evaluations/app_eval.py +143 -0
- llama_stack/core/ui/page/evaluations/native_eval.py +253 -0
- llama_stack/core/ui/page/playground/chat.py +130 -0
- llama_stack/core/ui/page/playground/tools.py +352 -0
- llama_stack/core/utils/config.py +30 -0
- llama_stack/{distribution → core}/utils/config_dirs.py +3 -6
- llama_stack/core/utils/config_resolution.py +125 -0
- llama_stack/core/utils/context.py +84 -0
- llama_stack/core/utils/exec.py +96 -0
- llama_stack/{providers/impls/meta_reference/codeshield/config.py → core/utils/image_types.py} +4 -3
- llama_stack/{distribution → core}/utils/model_utils.py +2 -2
- llama_stack/{distribution → core}/utils/prompt_for_config.py +30 -63
- llama_stack/{apis/batch_inference → distributions/dell}/__init__.py +1 -1
- llama_stack/distributions/dell/build.yaml +33 -0
- llama_stack/distributions/dell/dell.py +158 -0
- llama_stack/distributions/dell/run-with-safety.yaml +141 -0
- llama_stack/distributions/dell/run.yaml +132 -0
- llama_stack/distributions/meta-reference-gpu/__init__.py +7 -0
- llama_stack/distributions/meta-reference-gpu/build.yaml +32 -0
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +163 -0
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +154 -0
- llama_stack/distributions/meta-reference-gpu/run.yaml +139 -0
- llama_stack/{apis/evals → distributions/nvidia}/__init__.py +1 -1
- llama_stack/distributions/nvidia/build.yaml +29 -0
- llama_stack/distributions/nvidia/nvidia.py +154 -0
- llama_stack/distributions/nvidia/run-with-safety.yaml +137 -0
- llama_stack/distributions/nvidia/run.yaml +116 -0
- llama_stack/distributions/open-benchmark/__init__.py +7 -0
- llama_stack/distributions/open-benchmark/build.yaml +36 -0
- llama_stack/distributions/open-benchmark/open_benchmark.py +303 -0
- llama_stack/distributions/open-benchmark/run.yaml +252 -0
- llama_stack/distributions/postgres-demo/__init__.py +7 -0
- llama_stack/distributions/postgres-demo/build.yaml +23 -0
- llama_stack/distributions/postgres-demo/postgres_demo.py +125 -0
- llama_stack/distributions/postgres-demo/run.yaml +115 -0
- llama_stack/{apis/memory → distributions/starter}/__init__.py +1 -1
- llama_stack/distributions/starter/build.yaml +61 -0
- llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
- llama_stack/distributions/starter/run.yaml +276 -0
- llama_stack/distributions/starter/starter.py +345 -0
- llama_stack/distributions/starter-gpu/__init__.py +7 -0
- llama_stack/distributions/starter-gpu/build.yaml +61 -0
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
- llama_stack/distributions/starter-gpu/run.yaml +279 -0
- llama_stack/distributions/starter-gpu/starter_gpu.py +20 -0
- llama_stack/distributions/template.py +456 -0
- llama_stack/distributions/watsonx/__init__.py +7 -0
- llama_stack/distributions/watsonx/build.yaml +33 -0
- llama_stack/distributions/watsonx/run.yaml +133 -0
- llama_stack/distributions/watsonx/watsonx.py +95 -0
- llama_stack/env.py +24 -0
- llama_stack/log.py +314 -0
- llama_stack/models/llama/checkpoint.py +164 -0
- llama_stack/models/llama/datatypes.py +164 -0
- llama_stack/models/llama/hadamard_utils.py +86 -0
- llama_stack/models/llama/llama3/args.py +74 -0
- llama_stack/models/llama/llama3/chat_format.py +286 -0
- llama_stack/models/llama/llama3/generation.py +376 -0
- llama_stack/models/llama/llama3/interface.py +255 -0
- llama_stack/models/llama/llama3/model.py +304 -0
- llama_stack/models/llama/llama3/multimodal/__init__.py +12 -0
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +180 -0
- llama_stack/models/llama/llama3/multimodal/image_transform.py +409 -0
- llama_stack/models/llama/llama3/multimodal/model.py +1430 -0
- llama_stack/models/llama/llama3/multimodal/utils.py +26 -0
- llama_stack/models/llama/llama3/prompt_templates/__init__.py +22 -0
- llama_stack/models/llama/llama3/prompt_templates/base.py +39 -0
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +319 -0
- llama_stack/models/llama/llama3/prompt_templates/tool_response.py +62 -0
- llama_stack/models/llama/llama3/quantization/loader.py +316 -0
- llama_stack/models/llama/llama3/template_data.py +116 -0
- llama_stack/models/llama/llama3/tokenizer.model +128000 -0
- llama_stack/models/llama/llama3/tokenizer.py +198 -0
- llama_stack/models/llama/llama3/tool_utils.py +266 -0
- llama_stack/models/llama/llama3_1/__init__.py +12 -0
- llama_stack/models/llama/llama3_1/prompt_format.md +358 -0
- llama_stack/models/llama/llama3_1/prompts.py +258 -0
- llama_stack/models/llama/llama3_2/prompts_text.py +229 -0
- llama_stack/models/llama/llama3_2/prompts_vision.py +126 -0
- llama_stack/models/llama/llama3_2/text_prompt_format.md +286 -0
- llama_stack/models/llama/llama3_2/vision_prompt_format.md +141 -0
- llama_stack/models/llama/llama3_3/prompts.py +259 -0
- llama_stack/models/llama/llama4/args.py +107 -0
- llama_stack/models/llama/llama4/chat_format.py +317 -0
- llama_stack/models/llama/llama4/datatypes.py +56 -0
- llama_stack/models/llama/llama4/ffn.py +58 -0
- llama_stack/models/llama/llama4/generation.py +313 -0
- llama_stack/models/llama/llama4/model.py +437 -0
- llama_stack/models/llama/llama4/moe.py +214 -0
- llama_stack/models/llama/llama4/preprocess.py +435 -0
- llama_stack/models/llama/llama4/prompt_format.md +304 -0
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +136 -0
- llama_stack/models/llama/llama4/prompts.py +279 -0
- llama_stack/models/llama/llama4/quantization/__init__.py +5 -0
- llama_stack/models/llama/llama4/quantization/loader.py +226 -0
- llama_stack/models/llama/llama4/tokenizer.model +200000 -0
- llama_stack/models/llama/llama4/tokenizer.py +263 -0
- llama_stack/models/llama/llama4/vision/__init__.py +5 -0
- llama_stack/models/llama/llama4/vision/embedding.py +210 -0
- llama_stack/models/llama/llama4/vision/encoder.py +412 -0
- llama_stack/models/llama/prompt_format.py +191 -0
- llama_stack/models/llama/quantize_impls.py +316 -0
- llama_stack/models/llama/sku_list.py +1029 -0
- llama_stack/models/llama/sku_types.py +233 -0
- llama_stack/models/llama/tokenizer_utils.py +40 -0
- llama_stack/providers/datatypes.py +136 -107
- llama_stack/providers/inline/__init__.py +5 -0
- llama_stack/providers/inline/agents/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/agents/meta_reference}/__init__.py +12 -5
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +1024 -0
- llama_stack/providers/inline/agents/meta_reference/agents.py +383 -0
- llama_stack/providers/inline/agents/meta_reference/config.py +37 -0
- llama_stack/providers/inline/agents/meta_reference/persistence.py +228 -0
- llama_stack/providers/inline/agents/meta_reference/responses/__init__.py +5 -0
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +423 -0
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +1226 -0
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +449 -0
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +194 -0
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +365 -0
- llama_stack/providers/inline/agents/meta_reference/safety.py +52 -0
- llama_stack/providers/inline/batches/__init__.py +5 -0
- llama_stack/providers/inline/batches/reference/__init__.py +36 -0
- llama_stack/providers/inline/batches/reference/batches.py +679 -0
- llama_stack/providers/inline/batches/reference/config.py +40 -0
- llama_stack/providers/inline/datasetio/__init__.py +5 -0
- llama_stack/providers/inline/datasetio/localfs/__init__.py +20 -0
- llama_stack/providers/inline/datasetio/localfs/config.py +23 -0
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +113 -0
- llama_stack/providers/inline/eval/__init__.py +5 -0
- llama_stack/providers/inline/eval/meta_reference/__init__.py +28 -0
- llama_stack/providers/inline/eval/meta_reference/config.py +23 -0
- llama_stack/providers/inline/eval/meta_reference/eval.py +259 -0
- llama_stack/providers/inline/files/localfs/__init__.py +20 -0
- llama_stack/providers/inline/files/localfs/config.py +31 -0
- llama_stack/providers/inline/files/localfs/files.py +219 -0
- llama_stack/providers/inline/inference/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/__init__.py +4 -4
- llama_stack/providers/inline/inference/meta_reference/common.py +24 -0
- llama_stack/providers/inline/inference/meta_reference/config.py +68 -0
- llama_stack/providers/inline/inference/meta_reference/generators.py +211 -0
- llama_stack/providers/inline/inference/meta_reference/inference.py +158 -0
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +96 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/parallel_utils.py +56 -73
- llama_stack/providers/inline/inference/sentence_transformers/__init__.py +22 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/inference/sentence_transformers}/config.py +6 -4
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +83 -0
- llama_stack/providers/inline/post_training/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/utils.py +35 -0
- llama_stack/providers/inline/post_training/common/validator.py +36 -0
- llama_stack/providers/inline/post_training/huggingface/__init__.py +27 -0
- llama_stack/providers/inline/post_training/huggingface/config.py +83 -0
- llama_stack/providers/inline/post_training/huggingface/post_training.py +208 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +519 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +485 -0
- llama_stack/providers/inline/post_training/huggingface/utils.py +269 -0
- llama_stack/providers/inline/post_training/torchtune/__init__.py +27 -0
- llama_stack/providers/inline/post_training/torchtune/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +240 -0
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +99 -0
- llama_stack/providers/inline/post_training/torchtune/config.py +20 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +57 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/sft.py +78 -0
- llama_stack/providers/inline/post_training/torchtune/post_training.py +178 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +588 -0
- llama_stack/providers/inline/safety/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/codeshield → inline/safety/code_scanner}/__init__.py +4 -2
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +128 -0
- llama_stack/providers/{impls/meta_reference/memory → inline/safety/code_scanner}/config.py +5 -3
- llama_stack/providers/inline/safety/llama_guard/__init__.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/config.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +489 -0
- llama_stack/providers/{adapters/memory/sample → inline/safety/prompt_guard}/__init__.py +4 -4
- llama_stack/providers/inline/safety/prompt_guard/config.py +32 -0
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +131 -0
- llama_stack/providers/inline/scoring/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/__init__.py +25 -0
- llama_stack/providers/{adapters/memory/weaviate → inline/scoring/basic}/config.py +5 -7
- llama_stack/providers/inline/scoring/basic/scoring.py +126 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +240 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +41 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +23 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +27 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +71 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +80 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +66 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +58 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +38 -0
- llama_stack/providers/inline/scoring/basic/utils/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py +3319 -0
- llama_stack/providers/inline/scoring/basic/utils/math_utils.py +330 -0
- llama_stack/providers/inline/scoring/braintrust/__init__.py +27 -0
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +230 -0
- llama_stack/providers/inline/scoring/braintrust/config.py +21 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +23 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +24 -0
- llama_stack/providers/inline/scoring/llm_as_judge/__init__.py +21 -0
- llama_stack/providers/inline/scoring/llm_as_judge/config.py +14 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +113 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +96 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +20 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +81 -0
- llama_stack/providers/inline/telemetry/__init__.py +5 -0
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +21 -0
- llama_stack/providers/inline/telemetry/meta_reference/config.py +47 -0
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +252 -0
- llama_stack/providers/inline/tool_runtime/__init__.py +5 -0
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +19 -0
- llama_stack/providers/{impls/meta_reference/telemetry → inline/tool_runtime/rag}/config.py +5 -3
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +77 -0
- llama_stack/providers/inline/tool_runtime/rag/memory.py +332 -0
- llama_stack/providers/inline/vector_io/__init__.py +5 -0
- llama_stack/providers/inline/vector_io/chroma/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/chroma/config.py +30 -0
- llama_stack/providers/inline/vector_io/faiss/__init__.py +21 -0
- llama_stack/providers/inline/vector_io/faiss/config.py +26 -0
- llama_stack/providers/inline/vector_io/faiss/faiss.py +293 -0
- llama_stack/providers/inline/vector_io/milvus/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/milvus/config.py +29 -0
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/qdrant/config.py +29 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/config.py +26 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +483 -0
- llama_stack/providers/registry/agents.py +16 -18
- llama_stack/providers/registry/batches.py +26 -0
- llama_stack/providers/registry/datasetio.py +49 -0
- llama_stack/providers/registry/eval.py +46 -0
- llama_stack/providers/registry/files.py +31 -0
- llama_stack/providers/registry/inference.py +273 -118
- llama_stack/providers/registry/post_training.py +69 -0
- llama_stack/providers/registry/safety.py +46 -41
- llama_stack/providers/registry/scoring.py +51 -0
- llama_stack/providers/registry/tool_runtime.py +87 -0
- llama_stack/providers/registry/vector_io.py +828 -0
- llama_stack/providers/remote/__init__.py +5 -0
- llama_stack/providers/remote/agents/__init__.py +5 -0
- llama_stack/providers/remote/datasetio/__init__.py +5 -0
- llama_stack/providers/{adapters/memory/chroma → remote/datasetio/huggingface}/__init__.py +7 -4
- llama_stack/providers/remote/datasetio/huggingface/config.py +23 -0
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +99 -0
- llama_stack/providers/remote/datasetio/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/datasetio/nvidia/config.py +61 -0
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +116 -0
- llama_stack/providers/remote/eval/__init__.py +5 -0
- llama_stack/providers/remote/eval/nvidia/__init__.py +31 -0
- llama_stack/providers/remote/eval/nvidia/config.py +29 -0
- llama_stack/providers/remote/eval/nvidia/eval.py +162 -0
- llama_stack/providers/remote/files/s3/__init__.py +19 -0
- llama_stack/providers/remote/files/s3/config.py +42 -0
- llama_stack/providers/remote/files/s3/files.py +313 -0
- llama_stack/providers/remote/inference/__init__.py +5 -0
- llama_stack/providers/{adapters/safety/sample → remote/inference/anthropic}/__init__.py +4 -6
- llama_stack/providers/remote/inference/anthropic/anthropic.py +36 -0
- llama_stack/providers/remote/inference/anthropic/config.py +28 -0
- llama_stack/providers/{impls/meta_reference/telemetry → remote/inference/azure}/__init__.py +4 -4
- llama_stack/providers/remote/inference/azure/azure.py +25 -0
- llama_stack/providers/remote/inference/azure/config.py +61 -0
- llama_stack/providers/{adapters → remote}/inference/bedrock/__init__.py +18 -17
- llama_stack/providers/remote/inference/bedrock/bedrock.py +142 -0
- llama_stack/providers/{adapters/inference/sample → remote/inference/bedrock}/config.py +3 -4
- llama_stack/providers/remote/inference/bedrock/models.py +29 -0
- llama_stack/providers/remote/inference/cerebras/__init__.py +19 -0
- llama_stack/providers/remote/inference/cerebras/cerebras.py +28 -0
- llama_stack/providers/remote/inference/cerebras/config.py +30 -0
- llama_stack/providers/{adapters → remote}/inference/databricks/__init__.py +4 -5
- llama_stack/providers/remote/inference/databricks/config.py +37 -0
- llama_stack/providers/remote/inference/databricks/databricks.py +44 -0
- llama_stack/providers/{adapters → remote}/inference/fireworks/__init__.py +8 -4
- llama_stack/providers/remote/inference/fireworks/config.py +27 -0
- llama_stack/providers/remote/inference/fireworks/fireworks.py +27 -0
- llama_stack/providers/{adapters/memory/pgvector → remote/inference/gemini}/__init__.py +4 -4
- llama_stack/providers/remote/inference/gemini/config.py +28 -0
- llama_stack/providers/remote/inference/gemini/gemini.py +82 -0
- llama_stack/providers/remote/inference/groq/__init__.py +15 -0
- llama_stack/providers/remote/inference/groq/config.py +34 -0
- llama_stack/providers/remote/inference/groq/groq.py +18 -0
- llama_stack/providers/remote/inference/llama_openai_compat/__init__.py +15 -0
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +34 -0
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +46 -0
- llama_stack/providers/remote/inference/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/inference/nvidia/config.py +64 -0
- llama_stack/providers/remote/inference/nvidia/nvidia.py +61 -0
- llama_stack/providers/{adapters/safety/sample/config.py → remote/inference/nvidia/utils.py} +3 -4
- llama_stack/providers/{impls/vllm → remote/inference/ollama}/__init__.py +4 -6
- llama_stack/providers/remote/inference/ollama/config.py +25 -0
- llama_stack/providers/remote/inference/ollama/ollama.py +102 -0
- llama_stack/providers/{adapters/telemetry/opentelemetry → remote/inference/openai}/__init__.py +4 -4
- llama_stack/providers/remote/inference/openai/config.py +39 -0
- llama_stack/providers/remote/inference/openai/openai.py +38 -0
- llama_stack/providers/remote/inference/passthrough/__init__.py +23 -0
- llama_stack/providers/remote/inference/passthrough/config.py +34 -0
- llama_stack/providers/remote/inference/passthrough/passthrough.py +122 -0
- llama_stack/providers/remote/inference/runpod/__init__.py +16 -0
- llama_stack/providers/remote/inference/runpod/config.py +32 -0
- llama_stack/providers/remote/inference/runpod/runpod.py +42 -0
- llama_stack/providers/remote/inference/sambanova/__init__.py +16 -0
- llama_stack/providers/remote/inference/sambanova/config.py +34 -0
- llama_stack/providers/remote/inference/sambanova/sambanova.py +28 -0
- llama_stack/providers/{adapters → remote}/inference/tgi/__init__.py +3 -4
- llama_stack/providers/remote/inference/tgi/config.py +76 -0
- llama_stack/providers/remote/inference/tgi/tgi.py +85 -0
- llama_stack/providers/{adapters → remote}/inference/together/__init__.py +8 -4
- llama_stack/providers/remote/inference/together/config.py +27 -0
- llama_stack/providers/remote/inference/together/together.py +102 -0
- llama_stack/providers/remote/inference/vertexai/__init__.py +15 -0
- llama_stack/providers/remote/inference/vertexai/config.py +48 -0
- llama_stack/providers/remote/inference/vertexai/vertexai.py +54 -0
- llama_stack/providers/remote/inference/vllm/__init__.py +22 -0
- llama_stack/providers/remote/inference/vllm/config.py +59 -0
- llama_stack/providers/remote/inference/vllm/vllm.py +111 -0
- llama_stack/providers/remote/inference/watsonx/__init__.py +15 -0
- llama_stack/providers/remote/inference/watsonx/config.py +45 -0
- llama_stack/providers/remote/inference/watsonx/watsonx.py +336 -0
- llama_stack/providers/remote/post_training/__init__.py +5 -0
- llama_stack/providers/remote/post_training/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/post_training/nvidia/config.py +113 -0
- llama_stack/providers/remote/post_training/nvidia/models.py +27 -0
- llama_stack/providers/remote/post_training/nvidia/post_training.py +430 -0
- llama_stack/providers/remote/post_training/nvidia/utils.py +63 -0
- llama_stack/providers/remote/safety/__init__.py +5 -0
- llama_stack/providers/remote/safety/bedrock/bedrock.py +111 -0
- llama_stack/providers/remote/safety/bedrock/config.py +14 -0
- llama_stack/providers/{adapters/inference/sample → remote/safety/nvidia}/__init__.py +5 -4
- llama_stack/providers/remote/safety/nvidia/config.py +40 -0
- llama_stack/providers/remote/safety/nvidia/nvidia.py +161 -0
- llama_stack/providers/{adapters/agents/sample → remote/safety/sambanova}/__init__.py +5 -4
- llama_stack/providers/remote/safety/sambanova/config.py +37 -0
- llama_stack/providers/remote/safety/sambanova/sambanova.py +98 -0
- llama_stack/providers/remote/tool_runtime/__init__.py +5 -0
- llama_stack/providers/remote/tool_runtime/bing_search/__init__.py +21 -0
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +112 -0
- llama_stack/providers/remote/tool_runtime/bing_search/config.py +22 -0
- llama_stack/providers/remote/tool_runtime/brave_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +148 -0
- llama_stack/providers/remote/tool_runtime/brave_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py +15 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +20 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +73 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +84 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py +22 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py +21 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +140 -0
- llama_stack/providers/remote/vector_io/__init__.py +5 -0
- llama_stack/providers/remote/vector_io/chroma/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/chroma/chroma.py +215 -0
- llama_stack/providers/remote/vector_io/chroma/config.py +28 -0
- llama_stack/providers/remote/vector_io/milvus/__init__.py +18 -0
- llama_stack/providers/remote/vector_io/milvus/config.py +35 -0
- llama_stack/providers/remote/vector_io/milvus/milvus.py +375 -0
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/pgvector/config.py +47 -0
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +460 -0
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/qdrant/config.py +37 -0
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +265 -0
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/weaviate/config.py +32 -0
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +393 -0
- llama_stack/providers/utils/bedrock/__init__.py +5 -0
- llama_stack/providers/utils/bedrock/client.py +74 -0
- llama_stack/providers/utils/bedrock/config.py +64 -0
- llama_stack/providers/utils/bedrock/refreshable_boto_session.py +112 -0
- llama_stack/providers/utils/common/__init__.py +5 -0
- llama_stack/providers/utils/common/data_schema_validator.py +103 -0
- llama_stack/providers/utils/datasetio/__init__.py +5 -0
- llama_stack/providers/utils/datasetio/url_utils.py +47 -0
- llama_stack/providers/utils/files/__init__.py +5 -0
- llama_stack/providers/utils/files/form_data.py +69 -0
- llama_stack/providers/utils/inference/__init__.py +8 -7
- llama_stack/providers/utils/inference/embedding_mixin.py +101 -0
- llama_stack/providers/utils/inference/inference_store.py +264 -0
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +336 -0
- llama_stack/providers/utils/inference/model_registry.py +173 -23
- llama_stack/providers/utils/inference/openai_compat.py +1261 -49
- llama_stack/providers/utils/inference/openai_mixin.py +506 -0
- llama_stack/providers/utils/inference/prompt_adapter.py +365 -67
- llama_stack/providers/utils/kvstore/api.py +6 -6
- llama_stack/providers/utils/kvstore/config.py +28 -48
- llama_stack/providers/utils/kvstore/kvstore.py +61 -15
- llama_stack/providers/utils/kvstore/mongodb/__init__.py +9 -0
- llama_stack/providers/utils/kvstore/mongodb/mongodb.py +82 -0
- llama_stack/providers/utils/kvstore/postgres/__init__.py +7 -0
- llama_stack/providers/utils/kvstore/postgres/postgres.py +114 -0
- llama_stack/providers/utils/kvstore/redis/redis.py +33 -9
- llama_stack/providers/utils/kvstore/sqlite/config.py +2 -1
- llama_stack/providers/utils/kvstore/sqlite/sqlite.py +123 -22
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +1304 -0
- llama_stack/providers/utils/memory/vector_store.py +220 -82
- llama_stack/providers/utils/pagination.py +43 -0
- llama_stack/providers/utils/responses/__init__.py +5 -0
- llama_stack/providers/utils/responses/responses_store.py +292 -0
- llama_stack/providers/utils/scheduler.py +270 -0
- llama_stack/providers/utils/scoring/__init__.py +5 -0
- llama_stack/providers/utils/scoring/aggregation_utils.py +75 -0
- llama_stack/providers/utils/scoring/base_scoring_fn.py +114 -0
- llama_stack/providers/utils/scoring/basic_scoring_utils.py +26 -0
- llama_stack/providers/utils/sqlstore/__init__.py +5 -0
- llama_stack/providers/utils/sqlstore/api.py +128 -0
- llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +319 -0
- llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +343 -0
- llama_stack/providers/utils/sqlstore/sqlstore.py +70 -0
- llama_stack/providers/utils/telemetry/trace_protocol.py +142 -0
- llama_stack/providers/utils/telemetry/tracing.py +192 -53
- llama_stack/providers/utils/tools/__init__.py +5 -0
- llama_stack/providers/utils/tools/mcp.py +148 -0
- llama_stack/providers/utils/tools/ttl_dict.py +70 -0
- llama_stack/providers/utils/vector_io/__init__.py +5 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +156 -0
- llama_stack/schema_utils.py +118 -0
- llama_stack/strong_typing/__init__.py +19 -0
- llama_stack/strong_typing/auxiliary.py +228 -0
- llama_stack/strong_typing/classdef.py +440 -0
- llama_stack/strong_typing/core.py +46 -0
- llama_stack/strong_typing/deserializer.py +877 -0
- llama_stack/strong_typing/docstring.py +409 -0
- llama_stack/strong_typing/exception.py +23 -0
- llama_stack/strong_typing/inspection.py +1085 -0
- llama_stack/strong_typing/mapping.py +40 -0
- llama_stack/strong_typing/name.py +182 -0
- llama_stack/strong_typing/py.typed +0 -0
- llama_stack/strong_typing/schema.py +792 -0
- llama_stack/strong_typing/serialization.py +97 -0
- llama_stack/strong_typing/serializer.py +500 -0
- llama_stack/strong_typing/slots.py +27 -0
- llama_stack/strong_typing/topological.py +89 -0
- llama_stack/testing/__init__.py +5 -0
- llama_stack/testing/api_recorder.py +956 -0
- llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
- llama_stack-0.3.4.dist-info/METADATA +261 -0
- llama_stack-0.3.4.dist-info/RECORD +625 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/WHEEL +1 -1
- llama_stack/apis/agents/client.py +0 -292
- llama_stack/apis/agents/event_logger.py +0 -184
- llama_stack/apis/batch_inference/batch_inference.py +0 -72
- llama_stack/apis/common/deployment_types.py +0 -31
- llama_stack/apis/dataset/dataset.py +0 -63
- llama_stack/apis/evals/evals.py +0 -122
- llama_stack/apis/inference/client.py +0 -197
- llama_stack/apis/inspect/client.py +0 -82
- llama_stack/apis/memory/client.py +0 -155
- llama_stack/apis/memory/memory.py +0 -65
- llama_stack/apis/memory_banks/__init__.py +0 -7
- llama_stack/apis/memory_banks/client.py +0 -101
- llama_stack/apis/memory_banks/memory_banks.py +0 -78
- llama_stack/apis/models/client.py +0 -83
- llama_stack/apis/reward_scoring/__init__.py +0 -7
- llama_stack/apis/reward_scoring/reward_scoring.py +0 -55
- llama_stack/apis/safety/client.py +0 -105
- llama_stack/apis/shields/client.py +0 -79
- llama_stack/cli/download.py +0 -340
- llama_stack/cli/model/describe.py +0 -82
- llama_stack/cli/model/download.py +0 -24
- llama_stack/cli/model/list.py +0 -62
- llama_stack/cli/model/model.py +0 -34
- llama_stack/cli/model/prompt_format.py +0 -112
- llama_stack/cli/model/safety_models.py +0 -52
- llama_stack/cli/stack/build.py +0 -299
- llama_stack/cli/stack/configure.py +0 -178
- llama_stack/distribution/build.py +0 -123
- llama_stack/distribution/build_conda_env.sh +0 -136
- llama_stack/distribution/build_container.sh +0 -142
- llama_stack/distribution/common.sh +0 -40
- llama_stack/distribution/configure_container.sh +0 -47
- llama_stack/distribution/datatypes.py +0 -139
- llama_stack/distribution/distribution.py +0 -58
- llama_stack/distribution/inspect.py +0 -67
- llama_stack/distribution/request_headers.py +0 -57
- llama_stack/distribution/resolver.py +0 -323
- llama_stack/distribution/routers/__init__.py +0 -48
- llama_stack/distribution/routers/routers.py +0 -158
- llama_stack/distribution/routers/routing_tables.py +0 -173
- llama_stack/distribution/server/endpoints.py +0 -48
- llama_stack/distribution/server/server.py +0 -343
- llama_stack/distribution/start_conda_env.sh +0 -42
- llama_stack/distribution/start_container.sh +0 -64
- llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml +0 -10
- llama_stack/distribution/templates/local-build.yaml +0 -10
- llama_stack/distribution/templates/local-databricks-build.yaml +0 -10
- llama_stack/distribution/templates/local-fireworks-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-endpoint-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-serverless-build.yaml +0 -10
- llama_stack/distribution/templates/local-ollama-build.yaml +0 -10
- llama_stack/distribution/templates/local-tgi-build.yaml +0 -10
- llama_stack/distribution/templates/local-together-build.yaml +0 -10
- llama_stack/distribution/templates/local-vllm-build.yaml +0 -10
- llama_stack/distribution/utils/exec.py +0 -105
- llama_stack/providers/adapters/agents/sample/sample.py +0 -18
- llama_stack/providers/adapters/inference/bedrock/bedrock.py +0 -451
- llama_stack/providers/adapters/inference/bedrock/config.py +0 -55
- llama_stack/providers/adapters/inference/databricks/config.py +0 -21
- llama_stack/providers/adapters/inference/databricks/databricks.py +0 -125
- llama_stack/providers/adapters/inference/fireworks/config.py +0 -20
- llama_stack/providers/adapters/inference/fireworks/fireworks.py +0 -130
- llama_stack/providers/adapters/inference/ollama/__init__.py +0 -19
- llama_stack/providers/adapters/inference/ollama/ollama.py +0 -175
- llama_stack/providers/adapters/inference/sample/sample.py +0 -23
- llama_stack/providers/adapters/inference/tgi/config.py +0 -43
- llama_stack/providers/adapters/inference/tgi/tgi.py +0 -200
- llama_stack/providers/adapters/inference/together/config.py +0 -22
- llama_stack/providers/adapters/inference/together/together.py +0 -143
- llama_stack/providers/adapters/memory/chroma/chroma.py +0 -157
- llama_stack/providers/adapters/memory/pgvector/config.py +0 -17
- llama_stack/providers/adapters/memory/pgvector/pgvector.py +0 -211
- llama_stack/providers/adapters/memory/sample/sample.py +0 -23
- llama_stack/providers/adapters/memory/weaviate/__init__.py +0 -15
- llama_stack/providers/adapters/memory/weaviate/weaviate.py +0 -190
- llama_stack/providers/adapters/safety/bedrock/bedrock.py +0 -113
- llama_stack/providers/adapters/safety/bedrock/config.py +0 -16
- llama_stack/providers/adapters/safety/sample/sample.py +0 -23
- llama_stack/providers/adapters/safety/together/__init__.py +0 -18
- llama_stack/providers/adapters/safety/together/config.py +0 -26
- llama_stack/providers/adapters/safety/together/together.py +0 -101
- llama_stack/providers/adapters/telemetry/opentelemetry/config.py +0 -12
- llama_stack/providers/adapters/telemetry/opentelemetry/opentelemetry.py +0 -201
- llama_stack/providers/adapters/telemetry/sample/__init__.py +0 -17
- llama_stack/providers/adapters/telemetry/sample/config.py +0 -12
- llama_stack/providers/adapters/telemetry/sample/sample.py +0 -18
- llama_stack/providers/impls/meta_reference/agents/agent_instance.py +0 -844
- llama_stack/providers/impls/meta_reference/agents/agents.py +0 -161
- llama_stack/providers/impls/meta_reference/agents/persistence.py +0 -84
- llama_stack/providers/impls/meta_reference/agents/rag/context_retriever.py +0 -74
- llama_stack/providers/impls/meta_reference/agents/safety.py +0 -57
- llama_stack/providers/impls/meta_reference/agents/tests/code_execution.py +0 -93
- llama_stack/providers/impls/meta_reference/agents/tests/test_chat_agent.py +0 -305
- llama_stack/providers/impls/meta_reference/agents/tools/base.py +0 -20
- llama_stack/providers/impls/meta_reference/agents/tools/builtin.py +0 -375
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_env_prefix.py +0 -133
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_execution.py +0 -256
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py +0 -87
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/utils.py +0 -21
- llama_stack/providers/impls/meta_reference/agents/tools/safety.py +0 -43
- llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py +0 -58
- llama_stack/providers/impls/meta_reference/inference/config.py +0 -45
- llama_stack/providers/impls/meta_reference/inference/generation.py +0 -376
- llama_stack/providers/impls/meta_reference/inference/inference.py +0 -280
- llama_stack/providers/impls/meta_reference/inference/model_parallel.py +0 -99
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_impls.py +0 -184
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_txest_disabled.py +0 -76
- llama_stack/providers/impls/meta_reference/inference/quantization/loader.py +0 -97
- llama_stack/providers/impls/meta_reference/inference/quantization/scripts/quantize_checkpoint.py +0 -161
- llama_stack/providers/impls/meta_reference/memory/__init__.py +0 -19
- llama_stack/providers/impls/meta_reference/memory/faiss.py +0 -113
- llama_stack/providers/impls/meta_reference/safety/__init__.py +0 -17
- llama_stack/providers/impls/meta_reference/safety/base.py +0 -57
- llama_stack/providers/impls/meta_reference/safety/config.py +0 -48
- llama_stack/providers/impls/meta_reference/safety/llama_guard.py +0 -268
- llama_stack/providers/impls/meta_reference/safety/prompt_guard.py +0 -145
- llama_stack/providers/impls/meta_reference/safety/safety.py +0 -112
- llama_stack/providers/impls/meta_reference/telemetry/console.py +0 -89
- llama_stack/providers/impls/vllm/config.py +0 -35
- llama_stack/providers/impls/vllm/vllm.py +0 -241
- llama_stack/providers/registry/memory.py +0 -78
- llama_stack/providers/registry/telemetry.py +0 -44
- llama_stack/providers/tests/agents/test_agents.py +0 -210
- llama_stack/providers/tests/inference/test_inference.py +0 -257
- llama_stack/providers/tests/inference/test_prompt_adapter.py +0 -126
- llama_stack/providers/tests/memory/test_memory.py +0 -136
- llama_stack/providers/tests/resolver.py +0 -100
- llama_stack/providers/tests/safety/test_safety.py +0 -77
- llama_stack-0.0.42.dist-info/METADATA +0 -137
- llama_stack-0.0.42.dist-info/RECORD +0 -256
- /llama_stack/{distribution → core}/__init__.py +0 -0
- /llama_stack/{distribution/server → core/access_control}/__init__.py +0 -0
- /llama_stack/{distribution/utils → core/conversations}/__init__.py +0 -0
- /llama_stack/{providers/adapters → core/prompts}/__init__.py +0 -0
- /llama_stack/{providers/adapters/agents → core/routing_tables}/__init__.py +0 -0
- /llama_stack/{providers/adapters/inference → core/server}/__init__.py +0 -0
- /llama_stack/{providers/adapters/memory → core/storage}/__init__.py +0 -0
- /llama_stack/{providers/adapters/safety → core/ui}/__init__.py +0 -0
- /llama_stack/{providers/adapters/telemetry → core/ui/modules}/__init__.py +0 -0
- /llama_stack/{providers/impls → core/ui/page}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference → core/ui/page/distribution}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/rag → core/ui/page/evaluations}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tests → core/ui/page/playground}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools → core/utils}/__init__.py +0 -0
- /llama_stack/{distribution → core}/utils/dynamic.py +0 -0
- /llama_stack/{distribution → core}/utils/serialize.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools/ipython_tool → distributions}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization → models}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization/scripts → models/llama}/__init__.py +0 -0
- /llama_stack/{providers/tests → models/llama/llama3}/__init__.py +0 -0
- /llama_stack/{providers/tests/agents → models/llama/llama3/quantization}/__init__.py +0 -0
- /llama_stack/{providers/tests/inference → models/llama/llama3_2}/__init__.py +0 -0
- /llama_stack/{providers/tests/memory → models/llama/llama3_3}/__init__.py +0 -0
- /llama_stack/{providers/tests/safety → models/llama/llama4}/__init__.py +0 -0
- /llama_stack/{scripts → models/llama/llama4/prompt_templates}/__init__.py +0 -0
- /llama_stack/providers/{adapters → remote}/safety/bedrock/__init__.py +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info/licenses}/LICENSE +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from llama_stack.apis.common.content_types import URL
|
|
10
|
+
from llama_stack.apis.common.errors import ToolGroupNotFoundError
|
|
11
|
+
from llama_stack.apis.tools import ListToolDefsResponse, ListToolGroupsResponse, ToolDef, ToolGroup, ToolGroups
|
|
12
|
+
from llama_stack.core.datatypes import AuthenticationRequiredError, ToolGroupWithOwner
|
|
13
|
+
from llama_stack.log import get_logger
|
|
14
|
+
|
|
15
|
+
from .common import CommonRoutingTableImpl
|
|
16
|
+
|
|
17
|
+
logger = get_logger(name=__name__, category="core::routing_tables")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def parse_toolgroup_from_toolgroup_name_pair(toolgroup_name_with_maybe_tool_name: str) -> str | None:
|
|
21
|
+
# handle the funny case like "builtin::rag/knowledge_search"
|
|
22
|
+
parts = toolgroup_name_with_maybe_tool_name.split("/")
|
|
23
|
+
if len(parts) == 2:
|
|
24
|
+
return parts[0]
|
|
25
|
+
else:
|
|
26
|
+
return None
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
|
|
30
|
+
toolgroups_to_tools: dict[str, list[ToolDef]] = {}
|
|
31
|
+
tool_to_toolgroup: dict[str, str] = {}
|
|
32
|
+
|
|
33
|
+
# overridden
|
|
34
|
+
async def get_provider_impl(self, routing_key: str, provider_id: str | None = None) -> Any:
|
|
35
|
+
# we don't index tools in the registry anymore, but only keep a cache of them by toolgroup_id
|
|
36
|
+
# TODO: we may want to invalidate the cache (for a given toolgroup_id) every once in a while?
|
|
37
|
+
|
|
38
|
+
toolgroup_id = parse_toolgroup_from_toolgroup_name_pair(routing_key)
|
|
39
|
+
if toolgroup_id:
|
|
40
|
+
routing_key = toolgroup_id
|
|
41
|
+
|
|
42
|
+
if routing_key in self.tool_to_toolgroup:
|
|
43
|
+
routing_key = self.tool_to_toolgroup[routing_key]
|
|
44
|
+
return await super().get_provider_impl(routing_key, provider_id)
|
|
45
|
+
|
|
46
|
+
async def list_tools(self, toolgroup_id: str | None = None) -> ListToolDefsResponse:
|
|
47
|
+
if toolgroup_id:
|
|
48
|
+
if group_id := parse_toolgroup_from_toolgroup_name_pair(toolgroup_id):
|
|
49
|
+
toolgroup_id = group_id
|
|
50
|
+
toolgroups = [await self.get_tool_group(toolgroup_id)]
|
|
51
|
+
else:
|
|
52
|
+
toolgroups = await self.get_all_with_type("tool_group")
|
|
53
|
+
|
|
54
|
+
all_tools = []
|
|
55
|
+
for toolgroup in toolgroups:
|
|
56
|
+
if toolgroup.identifier not in self.toolgroups_to_tools:
|
|
57
|
+
try:
|
|
58
|
+
await self._index_tools(toolgroup)
|
|
59
|
+
except AuthenticationRequiredError:
|
|
60
|
+
# Send authentication errors back to the client so it knows
|
|
61
|
+
# that it needs to supply credentials for remote MCP servers.
|
|
62
|
+
raise
|
|
63
|
+
except Exception as e:
|
|
64
|
+
# Other errors that the client cannot fix are logged and
|
|
65
|
+
# those specific toolgroups are skipped.
|
|
66
|
+
logger.warning(f"Error listing tools for toolgroup {toolgroup.identifier}: {e}")
|
|
67
|
+
logger.debug(e, exc_info=True)
|
|
68
|
+
continue
|
|
69
|
+
all_tools.extend(self.toolgroups_to_tools[toolgroup.identifier])
|
|
70
|
+
|
|
71
|
+
return ListToolDefsResponse(data=all_tools)
|
|
72
|
+
|
|
73
|
+
async def _index_tools(self, toolgroup: ToolGroup):
|
|
74
|
+
provider_impl = await super().get_provider_impl(toolgroup.identifier, toolgroup.provider_id)
|
|
75
|
+
tooldefs_response = await provider_impl.list_runtime_tools(toolgroup.identifier, toolgroup.mcp_endpoint)
|
|
76
|
+
|
|
77
|
+
tooldefs = tooldefs_response.data
|
|
78
|
+
for t in tooldefs:
|
|
79
|
+
t.toolgroup_id = toolgroup.identifier
|
|
80
|
+
|
|
81
|
+
self.toolgroups_to_tools[toolgroup.identifier] = tooldefs
|
|
82
|
+
for tool in tooldefs:
|
|
83
|
+
self.tool_to_toolgroup[tool.name] = toolgroup.identifier
|
|
84
|
+
|
|
85
|
+
async def list_tool_groups(self) -> ListToolGroupsResponse:
|
|
86
|
+
return ListToolGroupsResponse(data=await self.get_all_with_type("tool_group"))
|
|
87
|
+
|
|
88
|
+
async def get_tool_group(self, toolgroup_id: str) -> ToolGroup:
|
|
89
|
+
tool_group = await self.get_object_by_identifier("tool_group", toolgroup_id)
|
|
90
|
+
if tool_group is None:
|
|
91
|
+
raise ToolGroupNotFoundError(toolgroup_id)
|
|
92
|
+
return tool_group
|
|
93
|
+
|
|
94
|
+
async def get_tool(self, tool_name: str) -> ToolDef:
|
|
95
|
+
if tool_name in self.tool_to_toolgroup:
|
|
96
|
+
toolgroup_id = self.tool_to_toolgroup[tool_name]
|
|
97
|
+
tools = self.toolgroups_to_tools[toolgroup_id]
|
|
98
|
+
for tool in tools:
|
|
99
|
+
if tool.name == tool_name:
|
|
100
|
+
return tool
|
|
101
|
+
raise ValueError(f"Tool '{tool_name}' not found")
|
|
102
|
+
|
|
103
|
+
async def register_tool_group(
|
|
104
|
+
self,
|
|
105
|
+
toolgroup_id: str,
|
|
106
|
+
provider_id: str,
|
|
107
|
+
mcp_endpoint: URL | None = None,
|
|
108
|
+
args: dict[str, Any] | None = None,
|
|
109
|
+
) -> None:
|
|
110
|
+
toolgroup = ToolGroupWithOwner(
|
|
111
|
+
identifier=toolgroup_id,
|
|
112
|
+
provider_id=provider_id,
|
|
113
|
+
provider_resource_id=toolgroup_id,
|
|
114
|
+
mcp_endpoint=mcp_endpoint,
|
|
115
|
+
args=args,
|
|
116
|
+
)
|
|
117
|
+
await self.register_object(toolgroup)
|
|
118
|
+
|
|
119
|
+
# ideally, indexing of the tools should not be necessary because anyone using
|
|
120
|
+
# the tools should first list the tools and then use them. but there are assumptions
|
|
121
|
+
# baked in some of the code and tests right now.
|
|
122
|
+
if not toolgroup.mcp_endpoint:
|
|
123
|
+
await self._index_tools(toolgroup)
|
|
124
|
+
|
|
125
|
+
async def unregister_toolgroup(self, toolgroup_id: str) -> None:
|
|
126
|
+
await self.unregister_object(await self.get_tool_group(toolgroup_id))
|
|
127
|
+
|
|
128
|
+
async def shutdown(self) -> None:
|
|
129
|
+
pass
|
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError
|
|
10
|
+
from llama_stack.apis.models import ModelType
|
|
11
|
+
from llama_stack.apis.resource import ResourceType
|
|
12
|
+
|
|
13
|
+
# Removed VectorStores import to avoid exposing public API
|
|
14
|
+
from llama_stack.apis.vector_io.vector_io import (
|
|
15
|
+
SearchRankingOptions,
|
|
16
|
+
VectorStoreChunkingStrategy,
|
|
17
|
+
VectorStoreDeleteResponse,
|
|
18
|
+
VectorStoreFileContentsResponse,
|
|
19
|
+
VectorStoreFileDeleteResponse,
|
|
20
|
+
VectorStoreFileObject,
|
|
21
|
+
VectorStoreFileStatus,
|
|
22
|
+
VectorStoreObject,
|
|
23
|
+
VectorStoreSearchResponsePage,
|
|
24
|
+
)
|
|
25
|
+
from llama_stack.core.datatypes import (
|
|
26
|
+
VectorStoreWithOwner,
|
|
27
|
+
)
|
|
28
|
+
from llama_stack.log import get_logger
|
|
29
|
+
|
|
30
|
+
from .common import CommonRoutingTableImpl, lookup_model
|
|
31
|
+
|
|
32
|
+
logger = get_logger(name=__name__, category="core::routing_tables")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class VectorStoresRoutingTable(CommonRoutingTableImpl):
|
|
36
|
+
"""Internal routing table for vector_store operations.
|
|
37
|
+
|
|
38
|
+
Does not inherit from VectorStores to avoid exposing public API endpoints.
|
|
39
|
+
Only provides internal routing functionality for VectorIORouter.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
# Internal methods only - no public API exposure
|
|
43
|
+
|
|
44
|
+
async def register_vector_store(
|
|
45
|
+
self,
|
|
46
|
+
vector_store_id: str,
|
|
47
|
+
embedding_model: str,
|
|
48
|
+
embedding_dimension: int | None = 384,
|
|
49
|
+
provider_id: str | None = None,
|
|
50
|
+
provider_vector_store_id: str | None = None,
|
|
51
|
+
vector_store_name: str | None = None,
|
|
52
|
+
) -> Any:
|
|
53
|
+
if provider_id is None:
|
|
54
|
+
if len(self.impls_by_provider_id) > 0:
|
|
55
|
+
provider_id = list(self.impls_by_provider_id.keys())[0]
|
|
56
|
+
if len(self.impls_by_provider_id) > 1:
|
|
57
|
+
logger.warning(
|
|
58
|
+
f"No provider specified and multiple providers available. Arbitrarily selected the first provider {provider_id}."
|
|
59
|
+
)
|
|
60
|
+
else:
|
|
61
|
+
raise ValueError("No provider available. Please configure a vector_io provider.")
|
|
62
|
+
model = await lookup_model(self, embedding_model)
|
|
63
|
+
if model is None:
|
|
64
|
+
raise ModelNotFoundError(embedding_model)
|
|
65
|
+
if model.model_type != ModelType.embedding:
|
|
66
|
+
raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding)
|
|
67
|
+
|
|
68
|
+
vector_store = VectorStoreWithOwner(
|
|
69
|
+
identifier=vector_store_id,
|
|
70
|
+
type=ResourceType.vector_store.value,
|
|
71
|
+
provider_id=provider_id,
|
|
72
|
+
provider_resource_id=provider_vector_store_id,
|
|
73
|
+
embedding_model=embedding_model,
|
|
74
|
+
embedding_dimension=embedding_dimension,
|
|
75
|
+
vector_store_name=vector_store_name,
|
|
76
|
+
)
|
|
77
|
+
await self.register_object(vector_store)
|
|
78
|
+
return vector_store
|
|
79
|
+
|
|
80
|
+
async def openai_retrieve_vector_store(
|
|
81
|
+
self,
|
|
82
|
+
vector_store_id: str,
|
|
83
|
+
) -> VectorStoreObject:
|
|
84
|
+
await self.assert_action_allowed("read", "vector_store", vector_store_id)
|
|
85
|
+
provider = await self.get_provider_impl(vector_store_id)
|
|
86
|
+
return await provider.openai_retrieve_vector_store(vector_store_id)
|
|
87
|
+
|
|
88
|
+
async def openai_update_vector_store(
|
|
89
|
+
self,
|
|
90
|
+
vector_store_id: str,
|
|
91
|
+
name: str | None = None,
|
|
92
|
+
expires_after: dict[str, Any] | None = None,
|
|
93
|
+
metadata: dict[str, Any] | None = None,
|
|
94
|
+
) -> VectorStoreObject:
|
|
95
|
+
await self.assert_action_allowed("update", "vector_store", vector_store_id)
|
|
96
|
+
provider = await self.get_provider_impl(vector_store_id)
|
|
97
|
+
return await provider.openai_update_vector_store(
|
|
98
|
+
vector_store_id=vector_store_id,
|
|
99
|
+
name=name,
|
|
100
|
+
expires_after=expires_after,
|
|
101
|
+
metadata=metadata,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
async def openai_delete_vector_store(
|
|
105
|
+
self,
|
|
106
|
+
vector_store_id: str,
|
|
107
|
+
) -> VectorStoreDeleteResponse:
|
|
108
|
+
await self.assert_action_allowed("delete", "vector_store", vector_store_id)
|
|
109
|
+
provider = await self.get_provider_impl(vector_store_id)
|
|
110
|
+
result = await provider.openai_delete_vector_store(vector_store_id)
|
|
111
|
+
await self.unregister_vector_store(vector_store_id)
|
|
112
|
+
return result
|
|
113
|
+
|
|
114
|
+
async def unregister_vector_store(self, vector_store_id: str) -> None:
|
|
115
|
+
"""Remove the vector store from the routing table registry."""
|
|
116
|
+
try:
|
|
117
|
+
vector_store_obj = await self.get_object_by_identifier("vector_store", vector_store_id)
|
|
118
|
+
if vector_store_obj:
|
|
119
|
+
await self.unregister_object(vector_store_obj)
|
|
120
|
+
except Exception as e:
|
|
121
|
+
# Log the error but don't fail the operation
|
|
122
|
+
logger.warning(f"Failed to unregister vector store {vector_store_id} from routing table: {e}")
|
|
123
|
+
|
|
124
|
+
async def openai_search_vector_store(
|
|
125
|
+
self,
|
|
126
|
+
vector_store_id: str,
|
|
127
|
+
query: str | list[str],
|
|
128
|
+
filters: dict[str, Any] | None = None,
|
|
129
|
+
max_num_results: int | None = 10,
|
|
130
|
+
ranking_options: SearchRankingOptions | None = None,
|
|
131
|
+
rewrite_query: bool | None = False,
|
|
132
|
+
search_mode: str | None = "vector",
|
|
133
|
+
) -> VectorStoreSearchResponsePage:
|
|
134
|
+
await self.assert_action_allowed("read", "vector_store", vector_store_id)
|
|
135
|
+
provider = await self.get_provider_impl(vector_store_id)
|
|
136
|
+
return await provider.openai_search_vector_store(
|
|
137
|
+
vector_store_id=vector_store_id,
|
|
138
|
+
query=query,
|
|
139
|
+
filters=filters,
|
|
140
|
+
max_num_results=max_num_results,
|
|
141
|
+
ranking_options=ranking_options,
|
|
142
|
+
rewrite_query=rewrite_query,
|
|
143
|
+
search_mode=search_mode,
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
async def openai_attach_file_to_vector_store(
|
|
147
|
+
self,
|
|
148
|
+
vector_store_id: str,
|
|
149
|
+
file_id: str,
|
|
150
|
+
attributes: dict[str, Any] | None = None,
|
|
151
|
+
chunking_strategy: VectorStoreChunkingStrategy | None = None,
|
|
152
|
+
) -> VectorStoreFileObject:
|
|
153
|
+
await self.assert_action_allowed("update", "vector_store", vector_store_id)
|
|
154
|
+
provider = await self.get_provider_impl(vector_store_id)
|
|
155
|
+
return await provider.openai_attach_file_to_vector_store(
|
|
156
|
+
vector_store_id=vector_store_id,
|
|
157
|
+
file_id=file_id,
|
|
158
|
+
attributes=attributes,
|
|
159
|
+
chunking_strategy=chunking_strategy,
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
async def openai_list_files_in_vector_store(
|
|
163
|
+
self,
|
|
164
|
+
vector_store_id: str,
|
|
165
|
+
limit: int | None = 20,
|
|
166
|
+
order: str | None = "desc",
|
|
167
|
+
after: str | None = None,
|
|
168
|
+
before: str | None = None,
|
|
169
|
+
filter: VectorStoreFileStatus | None = None,
|
|
170
|
+
) -> list[VectorStoreFileObject]:
|
|
171
|
+
await self.assert_action_allowed("read", "vector_store", vector_store_id)
|
|
172
|
+
provider = await self.get_provider_impl(vector_store_id)
|
|
173
|
+
return await provider.openai_list_files_in_vector_store(
|
|
174
|
+
vector_store_id=vector_store_id,
|
|
175
|
+
limit=limit,
|
|
176
|
+
order=order,
|
|
177
|
+
after=after,
|
|
178
|
+
before=before,
|
|
179
|
+
filter=filter,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
async def openai_retrieve_vector_store_file(
|
|
183
|
+
self,
|
|
184
|
+
vector_store_id: str,
|
|
185
|
+
file_id: str,
|
|
186
|
+
) -> VectorStoreFileObject:
|
|
187
|
+
await self.assert_action_allowed("read", "vector_store", vector_store_id)
|
|
188
|
+
provider = await self.get_provider_impl(vector_store_id)
|
|
189
|
+
return await provider.openai_retrieve_vector_store_file(
|
|
190
|
+
vector_store_id=vector_store_id,
|
|
191
|
+
file_id=file_id,
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
async def openai_retrieve_vector_store_file_contents(
|
|
195
|
+
self,
|
|
196
|
+
vector_store_id: str,
|
|
197
|
+
file_id: str,
|
|
198
|
+
) -> VectorStoreFileContentsResponse:
|
|
199
|
+
await self.assert_action_allowed("read", "vector_store", vector_store_id)
|
|
200
|
+
provider = await self.get_provider_impl(vector_store_id)
|
|
201
|
+
return await provider.openai_retrieve_vector_store_file_contents(
|
|
202
|
+
vector_store_id=vector_store_id,
|
|
203
|
+
file_id=file_id,
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
async def openai_update_vector_store_file(
|
|
207
|
+
self,
|
|
208
|
+
vector_store_id: str,
|
|
209
|
+
file_id: str,
|
|
210
|
+
attributes: dict[str, Any],
|
|
211
|
+
) -> VectorStoreFileObject:
|
|
212
|
+
await self.assert_action_allowed("update", "vector_store", vector_store_id)
|
|
213
|
+
provider = await self.get_provider_impl(vector_store_id)
|
|
214
|
+
return await provider.openai_update_vector_store_file(
|
|
215
|
+
vector_store_id=vector_store_id,
|
|
216
|
+
file_id=file_id,
|
|
217
|
+
attributes=attributes,
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
async def openai_delete_vector_store_file(
|
|
221
|
+
self,
|
|
222
|
+
vector_store_id: str,
|
|
223
|
+
file_id: str,
|
|
224
|
+
) -> VectorStoreFileDeleteResponse:
|
|
225
|
+
await self.assert_action_allowed("delete", "vector_store", vector_store_id)
|
|
226
|
+
provider = await self.get_provider_impl(vector_store_id)
|
|
227
|
+
return await provider.openai_delete_vector_store_file(
|
|
228
|
+
vector_store_id=vector_store_id,
|
|
229
|
+
file_id=file_id,
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
async def openai_create_vector_store_file_batch(
|
|
233
|
+
self,
|
|
234
|
+
vector_store_id: str,
|
|
235
|
+
file_ids: list[str],
|
|
236
|
+
attributes: dict[str, Any] | None = None,
|
|
237
|
+
chunking_strategy: Any | None = None,
|
|
238
|
+
):
|
|
239
|
+
await self.assert_action_allowed("update", "vector_store", vector_store_id)
|
|
240
|
+
provider = await self.get_provider_impl(vector_store_id)
|
|
241
|
+
return await provider.openai_create_vector_store_file_batch(
|
|
242
|
+
vector_store_id=vector_store_id,
|
|
243
|
+
file_ids=file_ids,
|
|
244
|
+
attributes=attributes,
|
|
245
|
+
chunking_strategy=chunking_strategy,
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
async def openai_retrieve_vector_store_file_batch(
|
|
249
|
+
self,
|
|
250
|
+
batch_id: str,
|
|
251
|
+
vector_store_id: str,
|
|
252
|
+
):
|
|
253
|
+
await self.assert_action_allowed("read", "vector_store", vector_store_id)
|
|
254
|
+
provider = await self.get_provider_impl(vector_store_id)
|
|
255
|
+
return await provider.openai_retrieve_vector_store_file_batch(
|
|
256
|
+
batch_id=batch_id,
|
|
257
|
+
vector_store_id=vector_store_id,
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
async def openai_list_files_in_vector_store_file_batch(
|
|
261
|
+
self,
|
|
262
|
+
batch_id: str,
|
|
263
|
+
vector_store_id: str,
|
|
264
|
+
after: str | None = None,
|
|
265
|
+
before: str | None = None,
|
|
266
|
+
filter: str | None = None,
|
|
267
|
+
limit: int | None = 20,
|
|
268
|
+
order: str | None = "desc",
|
|
269
|
+
):
|
|
270
|
+
await self.assert_action_allowed("read", "vector_store", vector_store_id)
|
|
271
|
+
provider = await self.get_provider_impl(vector_store_id)
|
|
272
|
+
return await provider.openai_list_files_in_vector_store_file_batch(
|
|
273
|
+
batch_id=batch_id,
|
|
274
|
+
vector_store_id=vector_store_id,
|
|
275
|
+
after=after,
|
|
276
|
+
before=before,
|
|
277
|
+
filter=filter,
|
|
278
|
+
limit=limit,
|
|
279
|
+
order=order,
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
async def openai_cancel_vector_store_file_batch(
|
|
283
|
+
self,
|
|
284
|
+
batch_id: str,
|
|
285
|
+
vector_store_id: str,
|
|
286
|
+
):
|
|
287
|
+
await self.assert_action_allowed("update", "vector_store", vector_store_id)
|
|
288
|
+
provider = await self.get_provider_impl(vector_store_id)
|
|
289
|
+
return await provider.openai_cancel_vector_store_file_batch(
|
|
290
|
+
batch_id=batch_id,
|
|
291
|
+
vector_store_id=vector_store_id,
|
|
292
|
+
)
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
|
|
9
|
+
import httpx
|
|
10
|
+
from aiohttp import hdrs
|
|
11
|
+
|
|
12
|
+
from llama_stack.core.datatypes import AuthenticationConfig, User
|
|
13
|
+
from llama_stack.core.request_headers import user_from_scope
|
|
14
|
+
from llama_stack.core.server.auth_providers import create_auth_provider
|
|
15
|
+
from llama_stack.core.server.routes import find_matching_route, initialize_route_impls
|
|
16
|
+
from llama_stack.log import get_logger
|
|
17
|
+
|
|
18
|
+
logger = get_logger(name=__name__, category="core::auth")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class AuthenticationMiddleware:
|
|
22
|
+
"""Middleware that authenticates requests using configured authentication provider.
|
|
23
|
+
|
|
24
|
+
This middleware:
|
|
25
|
+
1. Extracts the Bearer token from the Authorization header
|
|
26
|
+
2. Uses the configured auth provider to validate the token
|
|
27
|
+
3. Extracts user attributes from the provider's response
|
|
28
|
+
4. Makes these attributes available to the route handlers for access control
|
|
29
|
+
|
|
30
|
+
Unauthenticated Access:
|
|
31
|
+
Endpoints can opt out of authentication by setting require_authentication=False
|
|
32
|
+
in their @webmethod decorator. This is typically used for operational endpoints
|
|
33
|
+
like /health and /version to support monitoring, load balancers, and observability tools.
|
|
34
|
+
|
|
35
|
+
The middleware supports multiple authentication providers through the AuthProvider interface:
|
|
36
|
+
- Kubernetes: Validates tokens against the Kubernetes API server
|
|
37
|
+
- Custom: Validates tokens against a custom endpoint
|
|
38
|
+
|
|
39
|
+
Authentication Request Format for Custom Auth Provider:
|
|
40
|
+
```json
|
|
41
|
+
{
|
|
42
|
+
"api_key": "the-api-key-extracted-from-auth-header",
|
|
43
|
+
"request": {
|
|
44
|
+
"path": "/models/list",
|
|
45
|
+
"headers": {
|
|
46
|
+
"content-type": "application/json",
|
|
47
|
+
"user-agent": "..."
|
|
48
|
+
// All headers except Authorization
|
|
49
|
+
},
|
|
50
|
+
"params": {
|
|
51
|
+
"limit": ["100"],
|
|
52
|
+
"offset": ["0"]
|
|
53
|
+
// Query parameters as key -> list of values
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
Expected Auth Endpoint Response Format:
|
|
60
|
+
```json
|
|
61
|
+
{
|
|
62
|
+
"access_attributes": { // Structured attribute format
|
|
63
|
+
"roles": ["admin", "user"],
|
|
64
|
+
"teams": ["ml-team", "nlp-team"],
|
|
65
|
+
"projects": ["llama-3", "project-x"],
|
|
66
|
+
"namespaces": ["research"]
|
|
67
|
+
},
|
|
68
|
+
"message": "Optional message about auth result"
|
|
69
|
+
}
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Token Validation:
|
|
73
|
+
Each provider implements its own token validation logic:
|
|
74
|
+
- Kubernetes: Uses TokenReview API to validate service account tokens
|
|
75
|
+
- Custom: Sends token to custom endpoint for validation
|
|
76
|
+
|
|
77
|
+
Attribute-Based Access Control:
|
|
78
|
+
The attributes returned by the auth provider are used to determine which
|
|
79
|
+
resources the user can access. Resources can specify required attributes
|
|
80
|
+
using the access_attributes field. For a user to access a resource:
|
|
81
|
+
|
|
82
|
+
1. All attribute categories specified in the resource must be present in the user's attributes
|
|
83
|
+
2. For each category, the user must have at least one matching value
|
|
84
|
+
|
|
85
|
+
If the auth provider doesn't return any attributes, the user will only be able to
|
|
86
|
+
access resources that don't have access_attributes defined.
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
def __init__(self, app, auth_config: AuthenticationConfig, impls):
|
|
90
|
+
self.app = app
|
|
91
|
+
self.impls = impls
|
|
92
|
+
self.auth_provider = create_auth_provider(auth_config)
|
|
93
|
+
|
|
94
|
+
async def __call__(self, scope, receive, send):
|
|
95
|
+
if scope["type"] == "http":
|
|
96
|
+
# Find the route and check if authentication is required
|
|
97
|
+
path = scope.get("path", "")
|
|
98
|
+
method = scope.get("method", hdrs.METH_GET)
|
|
99
|
+
|
|
100
|
+
if not hasattr(self, "route_impls"):
|
|
101
|
+
self.route_impls = initialize_route_impls(self.impls)
|
|
102
|
+
|
|
103
|
+
webmethod = None
|
|
104
|
+
try:
|
|
105
|
+
_, _, _, webmethod = find_matching_route(method, path, self.route_impls)
|
|
106
|
+
except ValueError:
|
|
107
|
+
# If no matching endpoint is found, pass here to run auth anyways
|
|
108
|
+
pass
|
|
109
|
+
|
|
110
|
+
# If webmethod explicitly sets require_authentication=False, allow without auth
|
|
111
|
+
if webmethod and webmethod.require_authentication is False:
|
|
112
|
+
logger.debug(f"Allowing unauthenticated access to endpoint: {path}")
|
|
113
|
+
return await self.app(scope, receive, send)
|
|
114
|
+
|
|
115
|
+
# Handle authentication
|
|
116
|
+
headers = dict(scope.get("headers", []))
|
|
117
|
+
auth_header = headers.get(b"authorization", b"").decode()
|
|
118
|
+
|
|
119
|
+
if not auth_header:
|
|
120
|
+
error_msg = self.auth_provider.get_auth_error_message(scope)
|
|
121
|
+
return await self._send_auth_error(send, error_msg)
|
|
122
|
+
|
|
123
|
+
if not auth_header.startswith("Bearer "):
|
|
124
|
+
return await self._send_auth_error(send, "Invalid Authorization header format")
|
|
125
|
+
|
|
126
|
+
token = auth_header.split("Bearer ", 1)[1]
|
|
127
|
+
|
|
128
|
+
# Validate token and get access attributes
|
|
129
|
+
try:
|
|
130
|
+
validation_result = await self.auth_provider.validate_token(token, scope)
|
|
131
|
+
except httpx.TimeoutException:
|
|
132
|
+
logger.exception("Authentication request timed out")
|
|
133
|
+
return await self._send_auth_error(send, "Authentication service timeout")
|
|
134
|
+
except ValueError as e:
|
|
135
|
+
logger.exception("Error during authentication")
|
|
136
|
+
return await self._send_auth_error(send, str(e))
|
|
137
|
+
except Exception:
|
|
138
|
+
logger.exception("Error during authentication")
|
|
139
|
+
return await self._send_auth_error(send, "Authentication service error")
|
|
140
|
+
|
|
141
|
+
# Store the client ID in the request scope so that downstream middleware (like QuotaMiddleware)
|
|
142
|
+
# can identify the requester and enforce per-client rate limits.
|
|
143
|
+
scope["authenticated_client_id"] = token
|
|
144
|
+
|
|
145
|
+
# Store attributes in request scope
|
|
146
|
+
scope["principal"] = validation_result.principal
|
|
147
|
+
if validation_result.attributes:
|
|
148
|
+
scope["user_attributes"] = validation_result.attributes
|
|
149
|
+
logger.debug(
|
|
150
|
+
f"Authentication successful: {validation_result.principal} with {len(validation_result.attributes)} attributes"
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
# Scope-based API access control
|
|
154
|
+
if webmethod and webmethod.required_scope:
|
|
155
|
+
user = user_from_scope(scope)
|
|
156
|
+
if not _has_required_scope(webmethod.required_scope, user):
|
|
157
|
+
return await self._send_auth_error(
|
|
158
|
+
send,
|
|
159
|
+
f"Access denied: user does not have required scope: {webmethod.required_scope}",
|
|
160
|
+
status=403,
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
return await self.app(scope, receive, send)
|
|
164
|
+
|
|
165
|
+
async def _send_auth_error(self, send, message, status=401):
|
|
166
|
+
await send(
|
|
167
|
+
{
|
|
168
|
+
"type": "http.response.start",
|
|
169
|
+
"status": status,
|
|
170
|
+
"headers": [[b"content-type", b"application/json"]],
|
|
171
|
+
}
|
|
172
|
+
)
|
|
173
|
+
error_key = "message" if status == 401 else "detail"
|
|
174
|
+
error_msg = json.dumps({"error": {error_key: message}}).encode()
|
|
175
|
+
await send({"type": "http.response.body", "body": error_msg})
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def _has_required_scope(required_scope: str, user: User | None) -> bool:
|
|
179
|
+
# if no user, assume auth is not enabled
|
|
180
|
+
if not user:
|
|
181
|
+
return True
|
|
182
|
+
|
|
183
|
+
if not user.attributes:
|
|
184
|
+
return False
|
|
185
|
+
|
|
186
|
+
user_scopes = user.attributes.get("scopes", [])
|
|
187
|
+
return required_scope in user_scopes
|