llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +5 -0
- llama_stack/apis/agents/__init__.py +1 -1
- llama_stack/apis/agents/agents.py +700 -281
- llama_stack/apis/agents/openai_responses.py +1311 -0
- llama_stack/{providers/adapters/memory/sample/config.py → apis/batches/__init__.py} +2 -5
- llama_stack/apis/batches/batches.py +100 -0
- llama_stack/apis/benchmarks/__init__.py +7 -0
- llama_stack/apis/benchmarks/benchmarks.py +108 -0
- llama_stack/apis/common/content_types.py +143 -0
- llama_stack/apis/common/errors.py +103 -0
- llama_stack/apis/common/job_types.py +38 -0
- llama_stack/apis/common/responses.py +36 -0
- llama_stack/apis/common/training_types.py +36 -5
- llama_stack/apis/common/type_system.py +158 -0
- llama_stack/apis/conversations/__init__.py +31 -0
- llama_stack/apis/conversations/conversations.py +286 -0
- llama_stack/apis/datasetio/__init__.py +7 -0
- llama_stack/apis/datasetio/datasetio.py +59 -0
- llama_stack/apis/datasets/__init__.py +7 -0
- llama_stack/apis/datasets/datasets.py +251 -0
- llama_stack/apis/datatypes.py +160 -0
- llama_stack/apis/eval/__init__.py +7 -0
- llama_stack/apis/eval/eval.py +169 -0
- llama_stack/apis/files/__init__.py +7 -0
- llama_stack/apis/files/files.py +199 -0
- llama_stack/apis/inference/__init__.py +1 -1
- llama_stack/apis/inference/inference.py +1169 -113
- llama_stack/apis/inspect/__init__.py +1 -1
- llama_stack/apis/inspect/inspect.py +69 -16
- llama_stack/apis/models/__init__.py +1 -1
- llama_stack/apis/models/models.py +148 -21
- llama_stack/apis/post_training/__init__.py +1 -1
- llama_stack/apis/post_training/post_training.py +265 -120
- llama_stack/{providers/adapters/agents/sample/config.py → apis/prompts/__init__.py} +2 -5
- llama_stack/apis/prompts/prompts.py +204 -0
- llama_stack/apis/providers/__init__.py +7 -0
- llama_stack/apis/providers/providers.py +69 -0
- llama_stack/apis/resource.py +37 -0
- llama_stack/apis/safety/__init__.py +1 -1
- llama_stack/apis/safety/safety.py +95 -12
- llama_stack/apis/scoring/__init__.py +7 -0
- llama_stack/apis/scoring/scoring.py +93 -0
- llama_stack/apis/scoring_functions/__init__.py +7 -0
- llama_stack/apis/scoring_functions/scoring_functions.py +208 -0
- llama_stack/apis/shields/__init__.py +1 -1
- llama_stack/apis/shields/shields.py +76 -33
- llama_stack/apis/synthetic_data_generation/__init__.py +1 -1
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +40 -17
- llama_stack/apis/telemetry/__init__.py +1 -1
- llama_stack/apis/telemetry/telemetry.py +322 -31
- llama_stack/apis/{dataset → tools}/__init__.py +2 -1
- llama_stack/apis/tools/rag_tool.py +218 -0
- llama_stack/apis/tools/tools.py +221 -0
- llama_stack/apis/vector_io/__init__.py +7 -0
- llama_stack/apis/vector_io/vector_io.py +960 -0
- llama_stack/apis/vector_stores/__init__.py +7 -0
- llama_stack/apis/vector_stores/vector_stores.py +51 -0
- llama_stack/apis/version.py +9 -0
- llama_stack/cli/llama.py +13 -5
- llama_stack/cli/stack/_list_deps.py +182 -0
- llama_stack/cli/stack/list_apis.py +1 -1
- llama_stack/cli/stack/list_deps.py +55 -0
- llama_stack/cli/stack/list_providers.py +24 -10
- llama_stack/cli/stack/list_stacks.py +56 -0
- llama_stack/cli/stack/remove.py +115 -0
- llama_stack/cli/stack/run.py +169 -56
- llama_stack/cli/stack/stack.py +18 -4
- llama_stack/cli/stack/utils.py +151 -0
- llama_stack/cli/table.py +23 -61
- llama_stack/cli/utils.py +29 -0
- llama_stack/core/access_control/access_control.py +131 -0
- llama_stack/core/access_control/conditions.py +129 -0
- llama_stack/core/access_control/datatypes.py +107 -0
- llama_stack/core/build.py +164 -0
- llama_stack/core/client.py +205 -0
- llama_stack/core/common.sh +37 -0
- llama_stack/{distribution → core}/configure.py +74 -55
- llama_stack/core/conversations/conversations.py +309 -0
- llama_stack/core/datatypes.py +625 -0
- llama_stack/core/distribution.py +276 -0
- llama_stack/core/external.py +54 -0
- llama_stack/core/id_generation.py +42 -0
- llama_stack/core/inspect.py +86 -0
- llama_stack/core/library_client.py +539 -0
- llama_stack/core/prompts/prompts.py +234 -0
- llama_stack/core/providers.py +137 -0
- llama_stack/core/request_headers.py +115 -0
- llama_stack/core/resolver.py +506 -0
- llama_stack/core/routers/__init__.py +101 -0
- llama_stack/core/routers/datasets.py +73 -0
- llama_stack/core/routers/eval_scoring.py +155 -0
- llama_stack/core/routers/inference.py +645 -0
- llama_stack/core/routers/safety.py +85 -0
- llama_stack/core/routers/tool_runtime.py +91 -0
- llama_stack/core/routers/vector_io.py +442 -0
- llama_stack/core/routing_tables/benchmarks.py +62 -0
- llama_stack/core/routing_tables/common.py +254 -0
- llama_stack/core/routing_tables/datasets.py +91 -0
- llama_stack/core/routing_tables/models.py +163 -0
- llama_stack/core/routing_tables/scoring_functions.py +66 -0
- llama_stack/core/routing_tables/shields.py +61 -0
- llama_stack/core/routing_tables/toolgroups.py +129 -0
- llama_stack/core/routing_tables/vector_stores.py +292 -0
- llama_stack/core/server/auth.py +187 -0
- llama_stack/core/server/auth_providers.py +494 -0
- llama_stack/core/server/quota.py +110 -0
- llama_stack/core/server/routes.py +141 -0
- llama_stack/core/server/server.py +542 -0
- llama_stack/core/server/tracing.py +80 -0
- llama_stack/core/stack.py +546 -0
- llama_stack/core/start_stack.sh +117 -0
- llama_stack/core/storage/datatypes.py +283 -0
- llama_stack/{cli/model → core/store}/__init__.py +1 -1
- llama_stack/core/store/registry.py +199 -0
- llama_stack/core/testing_context.py +49 -0
- llama_stack/core/ui/app.py +55 -0
- llama_stack/core/ui/modules/api.py +32 -0
- llama_stack/core/ui/modules/utils.py +42 -0
- llama_stack/core/ui/page/distribution/datasets.py +18 -0
- llama_stack/core/ui/page/distribution/eval_tasks.py +20 -0
- llama_stack/core/ui/page/distribution/models.py +18 -0
- llama_stack/core/ui/page/distribution/providers.py +27 -0
- llama_stack/core/ui/page/distribution/resources.py +48 -0
- llama_stack/core/ui/page/distribution/scoring_functions.py +18 -0
- llama_stack/core/ui/page/distribution/shields.py +19 -0
- llama_stack/core/ui/page/evaluations/app_eval.py +143 -0
- llama_stack/core/ui/page/evaluations/native_eval.py +253 -0
- llama_stack/core/ui/page/playground/chat.py +130 -0
- llama_stack/core/ui/page/playground/tools.py +352 -0
- llama_stack/core/utils/config.py +30 -0
- llama_stack/{distribution → core}/utils/config_dirs.py +3 -6
- llama_stack/core/utils/config_resolution.py +125 -0
- llama_stack/core/utils/context.py +84 -0
- llama_stack/core/utils/exec.py +96 -0
- llama_stack/{providers/impls/meta_reference/codeshield/config.py → core/utils/image_types.py} +4 -3
- llama_stack/{distribution → core}/utils/model_utils.py +2 -2
- llama_stack/{distribution → core}/utils/prompt_for_config.py +30 -63
- llama_stack/{apis/batch_inference → distributions/dell}/__init__.py +1 -1
- llama_stack/distributions/dell/build.yaml +33 -0
- llama_stack/distributions/dell/dell.py +158 -0
- llama_stack/distributions/dell/run-with-safety.yaml +141 -0
- llama_stack/distributions/dell/run.yaml +132 -0
- llama_stack/distributions/meta-reference-gpu/__init__.py +7 -0
- llama_stack/distributions/meta-reference-gpu/build.yaml +32 -0
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +163 -0
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +154 -0
- llama_stack/distributions/meta-reference-gpu/run.yaml +139 -0
- llama_stack/{apis/evals → distributions/nvidia}/__init__.py +1 -1
- llama_stack/distributions/nvidia/build.yaml +29 -0
- llama_stack/distributions/nvidia/nvidia.py +154 -0
- llama_stack/distributions/nvidia/run-with-safety.yaml +137 -0
- llama_stack/distributions/nvidia/run.yaml +116 -0
- llama_stack/distributions/open-benchmark/__init__.py +7 -0
- llama_stack/distributions/open-benchmark/build.yaml +36 -0
- llama_stack/distributions/open-benchmark/open_benchmark.py +303 -0
- llama_stack/distributions/open-benchmark/run.yaml +252 -0
- llama_stack/distributions/postgres-demo/__init__.py +7 -0
- llama_stack/distributions/postgres-demo/build.yaml +23 -0
- llama_stack/distributions/postgres-demo/postgres_demo.py +125 -0
- llama_stack/distributions/postgres-demo/run.yaml +115 -0
- llama_stack/{apis/memory → distributions/starter}/__init__.py +1 -1
- llama_stack/distributions/starter/build.yaml +61 -0
- llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
- llama_stack/distributions/starter/run.yaml +276 -0
- llama_stack/distributions/starter/starter.py +345 -0
- llama_stack/distributions/starter-gpu/__init__.py +7 -0
- llama_stack/distributions/starter-gpu/build.yaml +61 -0
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
- llama_stack/distributions/starter-gpu/run.yaml +279 -0
- llama_stack/distributions/starter-gpu/starter_gpu.py +20 -0
- llama_stack/distributions/template.py +456 -0
- llama_stack/distributions/watsonx/__init__.py +7 -0
- llama_stack/distributions/watsonx/build.yaml +33 -0
- llama_stack/distributions/watsonx/run.yaml +133 -0
- llama_stack/distributions/watsonx/watsonx.py +95 -0
- llama_stack/env.py +24 -0
- llama_stack/log.py +314 -0
- llama_stack/models/llama/checkpoint.py +164 -0
- llama_stack/models/llama/datatypes.py +164 -0
- llama_stack/models/llama/hadamard_utils.py +86 -0
- llama_stack/models/llama/llama3/args.py +74 -0
- llama_stack/models/llama/llama3/chat_format.py +286 -0
- llama_stack/models/llama/llama3/generation.py +376 -0
- llama_stack/models/llama/llama3/interface.py +255 -0
- llama_stack/models/llama/llama3/model.py +304 -0
- llama_stack/models/llama/llama3/multimodal/__init__.py +12 -0
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +180 -0
- llama_stack/models/llama/llama3/multimodal/image_transform.py +409 -0
- llama_stack/models/llama/llama3/multimodal/model.py +1430 -0
- llama_stack/models/llama/llama3/multimodal/utils.py +26 -0
- llama_stack/models/llama/llama3/prompt_templates/__init__.py +22 -0
- llama_stack/models/llama/llama3/prompt_templates/base.py +39 -0
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +319 -0
- llama_stack/models/llama/llama3/prompt_templates/tool_response.py +62 -0
- llama_stack/models/llama/llama3/quantization/loader.py +316 -0
- llama_stack/models/llama/llama3/template_data.py +116 -0
- llama_stack/models/llama/llama3/tokenizer.model +128000 -0
- llama_stack/models/llama/llama3/tokenizer.py +198 -0
- llama_stack/models/llama/llama3/tool_utils.py +266 -0
- llama_stack/models/llama/llama3_1/__init__.py +12 -0
- llama_stack/models/llama/llama3_1/prompt_format.md +358 -0
- llama_stack/models/llama/llama3_1/prompts.py +258 -0
- llama_stack/models/llama/llama3_2/prompts_text.py +229 -0
- llama_stack/models/llama/llama3_2/prompts_vision.py +126 -0
- llama_stack/models/llama/llama3_2/text_prompt_format.md +286 -0
- llama_stack/models/llama/llama3_2/vision_prompt_format.md +141 -0
- llama_stack/models/llama/llama3_3/prompts.py +259 -0
- llama_stack/models/llama/llama4/args.py +107 -0
- llama_stack/models/llama/llama4/chat_format.py +317 -0
- llama_stack/models/llama/llama4/datatypes.py +56 -0
- llama_stack/models/llama/llama4/ffn.py +58 -0
- llama_stack/models/llama/llama4/generation.py +313 -0
- llama_stack/models/llama/llama4/model.py +437 -0
- llama_stack/models/llama/llama4/moe.py +214 -0
- llama_stack/models/llama/llama4/preprocess.py +435 -0
- llama_stack/models/llama/llama4/prompt_format.md +304 -0
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +136 -0
- llama_stack/models/llama/llama4/prompts.py +279 -0
- llama_stack/models/llama/llama4/quantization/__init__.py +5 -0
- llama_stack/models/llama/llama4/quantization/loader.py +226 -0
- llama_stack/models/llama/llama4/tokenizer.model +200000 -0
- llama_stack/models/llama/llama4/tokenizer.py +263 -0
- llama_stack/models/llama/llama4/vision/__init__.py +5 -0
- llama_stack/models/llama/llama4/vision/embedding.py +210 -0
- llama_stack/models/llama/llama4/vision/encoder.py +412 -0
- llama_stack/models/llama/prompt_format.py +191 -0
- llama_stack/models/llama/quantize_impls.py +316 -0
- llama_stack/models/llama/sku_list.py +1029 -0
- llama_stack/models/llama/sku_types.py +233 -0
- llama_stack/models/llama/tokenizer_utils.py +40 -0
- llama_stack/providers/datatypes.py +136 -107
- llama_stack/providers/inline/__init__.py +5 -0
- llama_stack/providers/inline/agents/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/agents/meta_reference}/__init__.py +12 -5
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +1024 -0
- llama_stack/providers/inline/agents/meta_reference/agents.py +383 -0
- llama_stack/providers/inline/agents/meta_reference/config.py +37 -0
- llama_stack/providers/inline/agents/meta_reference/persistence.py +228 -0
- llama_stack/providers/inline/agents/meta_reference/responses/__init__.py +5 -0
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +423 -0
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +1226 -0
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +449 -0
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +194 -0
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +365 -0
- llama_stack/providers/inline/agents/meta_reference/safety.py +52 -0
- llama_stack/providers/inline/batches/__init__.py +5 -0
- llama_stack/providers/inline/batches/reference/__init__.py +36 -0
- llama_stack/providers/inline/batches/reference/batches.py +679 -0
- llama_stack/providers/inline/batches/reference/config.py +40 -0
- llama_stack/providers/inline/datasetio/__init__.py +5 -0
- llama_stack/providers/inline/datasetio/localfs/__init__.py +20 -0
- llama_stack/providers/inline/datasetio/localfs/config.py +23 -0
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +113 -0
- llama_stack/providers/inline/eval/__init__.py +5 -0
- llama_stack/providers/inline/eval/meta_reference/__init__.py +28 -0
- llama_stack/providers/inline/eval/meta_reference/config.py +23 -0
- llama_stack/providers/inline/eval/meta_reference/eval.py +259 -0
- llama_stack/providers/inline/files/localfs/__init__.py +20 -0
- llama_stack/providers/inline/files/localfs/config.py +31 -0
- llama_stack/providers/inline/files/localfs/files.py +219 -0
- llama_stack/providers/inline/inference/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/__init__.py +4 -4
- llama_stack/providers/inline/inference/meta_reference/common.py +24 -0
- llama_stack/providers/inline/inference/meta_reference/config.py +68 -0
- llama_stack/providers/inline/inference/meta_reference/generators.py +211 -0
- llama_stack/providers/inline/inference/meta_reference/inference.py +158 -0
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +96 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/parallel_utils.py +56 -73
- llama_stack/providers/inline/inference/sentence_transformers/__init__.py +22 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/inference/sentence_transformers}/config.py +6 -4
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +83 -0
- llama_stack/providers/inline/post_training/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/utils.py +35 -0
- llama_stack/providers/inline/post_training/common/validator.py +36 -0
- llama_stack/providers/inline/post_training/huggingface/__init__.py +27 -0
- llama_stack/providers/inline/post_training/huggingface/config.py +83 -0
- llama_stack/providers/inline/post_training/huggingface/post_training.py +208 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +519 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +485 -0
- llama_stack/providers/inline/post_training/huggingface/utils.py +269 -0
- llama_stack/providers/inline/post_training/torchtune/__init__.py +27 -0
- llama_stack/providers/inline/post_training/torchtune/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +240 -0
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +99 -0
- llama_stack/providers/inline/post_training/torchtune/config.py +20 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +57 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/sft.py +78 -0
- llama_stack/providers/inline/post_training/torchtune/post_training.py +178 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +588 -0
- llama_stack/providers/inline/safety/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/codeshield → inline/safety/code_scanner}/__init__.py +4 -2
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +128 -0
- llama_stack/providers/{impls/meta_reference/memory → inline/safety/code_scanner}/config.py +5 -3
- llama_stack/providers/inline/safety/llama_guard/__init__.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/config.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +489 -0
- llama_stack/providers/{adapters/memory/sample → inline/safety/prompt_guard}/__init__.py +4 -4
- llama_stack/providers/inline/safety/prompt_guard/config.py +32 -0
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +131 -0
- llama_stack/providers/inline/scoring/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/__init__.py +25 -0
- llama_stack/providers/{adapters/memory/weaviate → inline/scoring/basic}/config.py +5 -7
- llama_stack/providers/inline/scoring/basic/scoring.py +126 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +240 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +41 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +23 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +27 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +71 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +80 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +66 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +58 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +38 -0
- llama_stack/providers/inline/scoring/basic/utils/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py +3319 -0
- llama_stack/providers/inline/scoring/basic/utils/math_utils.py +330 -0
- llama_stack/providers/inline/scoring/braintrust/__init__.py +27 -0
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +230 -0
- llama_stack/providers/inline/scoring/braintrust/config.py +21 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +23 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +24 -0
- llama_stack/providers/inline/scoring/llm_as_judge/__init__.py +21 -0
- llama_stack/providers/inline/scoring/llm_as_judge/config.py +14 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +113 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +96 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +20 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +81 -0
- llama_stack/providers/inline/telemetry/__init__.py +5 -0
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +21 -0
- llama_stack/providers/inline/telemetry/meta_reference/config.py +47 -0
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +252 -0
- llama_stack/providers/inline/tool_runtime/__init__.py +5 -0
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +19 -0
- llama_stack/providers/{impls/meta_reference/telemetry → inline/tool_runtime/rag}/config.py +5 -3
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +77 -0
- llama_stack/providers/inline/tool_runtime/rag/memory.py +332 -0
- llama_stack/providers/inline/vector_io/__init__.py +5 -0
- llama_stack/providers/inline/vector_io/chroma/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/chroma/config.py +30 -0
- llama_stack/providers/inline/vector_io/faiss/__init__.py +21 -0
- llama_stack/providers/inline/vector_io/faiss/config.py +26 -0
- llama_stack/providers/inline/vector_io/faiss/faiss.py +293 -0
- llama_stack/providers/inline/vector_io/milvus/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/milvus/config.py +29 -0
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/qdrant/config.py +29 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/config.py +26 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +483 -0
- llama_stack/providers/registry/agents.py +16 -18
- llama_stack/providers/registry/batches.py +26 -0
- llama_stack/providers/registry/datasetio.py +49 -0
- llama_stack/providers/registry/eval.py +46 -0
- llama_stack/providers/registry/files.py +31 -0
- llama_stack/providers/registry/inference.py +273 -118
- llama_stack/providers/registry/post_training.py +69 -0
- llama_stack/providers/registry/safety.py +46 -41
- llama_stack/providers/registry/scoring.py +51 -0
- llama_stack/providers/registry/tool_runtime.py +87 -0
- llama_stack/providers/registry/vector_io.py +828 -0
- llama_stack/providers/remote/__init__.py +5 -0
- llama_stack/providers/remote/agents/__init__.py +5 -0
- llama_stack/providers/remote/datasetio/__init__.py +5 -0
- llama_stack/providers/{adapters/memory/chroma → remote/datasetio/huggingface}/__init__.py +7 -4
- llama_stack/providers/remote/datasetio/huggingface/config.py +23 -0
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +99 -0
- llama_stack/providers/remote/datasetio/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/datasetio/nvidia/config.py +61 -0
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +116 -0
- llama_stack/providers/remote/eval/__init__.py +5 -0
- llama_stack/providers/remote/eval/nvidia/__init__.py +31 -0
- llama_stack/providers/remote/eval/nvidia/config.py +29 -0
- llama_stack/providers/remote/eval/nvidia/eval.py +162 -0
- llama_stack/providers/remote/files/s3/__init__.py +19 -0
- llama_stack/providers/remote/files/s3/config.py +42 -0
- llama_stack/providers/remote/files/s3/files.py +313 -0
- llama_stack/providers/remote/inference/__init__.py +5 -0
- llama_stack/providers/{adapters/safety/sample → remote/inference/anthropic}/__init__.py +4 -6
- llama_stack/providers/remote/inference/anthropic/anthropic.py +36 -0
- llama_stack/providers/remote/inference/anthropic/config.py +28 -0
- llama_stack/providers/{impls/meta_reference/telemetry → remote/inference/azure}/__init__.py +4 -4
- llama_stack/providers/remote/inference/azure/azure.py +25 -0
- llama_stack/providers/remote/inference/azure/config.py +61 -0
- llama_stack/providers/{adapters → remote}/inference/bedrock/__init__.py +18 -17
- llama_stack/providers/remote/inference/bedrock/bedrock.py +142 -0
- llama_stack/providers/{adapters/inference/sample → remote/inference/bedrock}/config.py +3 -4
- llama_stack/providers/remote/inference/bedrock/models.py +29 -0
- llama_stack/providers/remote/inference/cerebras/__init__.py +19 -0
- llama_stack/providers/remote/inference/cerebras/cerebras.py +28 -0
- llama_stack/providers/remote/inference/cerebras/config.py +30 -0
- llama_stack/providers/{adapters → remote}/inference/databricks/__init__.py +4 -5
- llama_stack/providers/remote/inference/databricks/config.py +37 -0
- llama_stack/providers/remote/inference/databricks/databricks.py +44 -0
- llama_stack/providers/{adapters → remote}/inference/fireworks/__init__.py +8 -4
- llama_stack/providers/remote/inference/fireworks/config.py +27 -0
- llama_stack/providers/remote/inference/fireworks/fireworks.py +27 -0
- llama_stack/providers/{adapters/memory/pgvector → remote/inference/gemini}/__init__.py +4 -4
- llama_stack/providers/remote/inference/gemini/config.py +28 -0
- llama_stack/providers/remote/inference/gemini/gemini.py +82 -0
- llama_stack/providers/remote/inference/groq/__init__.py +15 -0
- llama_stack/providers/remote/inference/groq/config.py +34 -0
- llama_stack/providers/remote/inference/groq/groq.py +18 -0
- llama_stack/providers/remote/inference/llama_openai_compat/__init__.py +15 -0
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +34 -0
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +46 -0
- llama_stack/providers/remote/inference/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/inference/nvidia/config.py +64 -0
- llama_stack/providers/remote/inference/nvidia/nvidia.py +61 -0
- llama_stack/providers/{adapters/safety/sample/config.py → remote/inference/nvidia/utils.py} +3 -4
- llama_stack/providers/{impls/vllm → remote/inference/ollama}/__init__.py +4 -6
- llama_stack/providers/remote/inference/ollama/config.py +25 -0
- llama_stack/providers/remote/inference/ollama/ollama.py +102 -0
- llama_stack/providers/{adapters/telemetry/opentelemetry → remote/inference/openai}/__init__.py +4 -4
- llama_stack/providers/remote/inference/openai/config.py +39 -0
- llama_stack/providers/remote/inference/openai/openai.py +38 -0
- llama_stack/providers/remote/inference/passthrough/__init__.py +23 -0
- llama_stack/providers/remote/inference/passthrough/config.py +34 -0
- llama_stack/providers/remote/inference/passthrough/passthrough.py +122 -0
- llama_stack/providers/remote/inference/runpod/__init__.py +16 -0
- llama_stack/providers/remote/inference/runpod/config.py +32 -0
- llama_stack/providers/remote/inference/runpod/runpod.py +42 -0
- llama_stack/providers/remote/inference/sambanova/__init__.py +16 -0
- llama_stack/providers/remote/inference/sambanova/config.py +34 -0
- llama_stack/providers/remote/inference/sambanova/sambanova.py +28 -0
- llama_stack/providers/{adapters → remote}/inference/tgi/__init__.py +3 -4
- llama_stack/providers/remote/inference/tgi/config.py +76 -0
- llama_stack/providers/remote/inference/tgi/tgi.py +85 -0
- llama_stack/providers/{adapters → remote}/inference/together/__init__.py +8 -4
- llama_stack/providers/remote/inference/together/config.py +27 -0
- llama_stack/providers/remote/inference/together/together.py +102 -0
- llama_stack/providers/remote/inference/vertexai/__init__.py +15 -0
- llama_stack/providers/remote/inference/vertexai/config.py +48 -0
- llama_stack/providers/remote/inference/vertexai/vertexai.py +54 -0
- llama_stack/providers/remote/inference/vllm/__init__.py +22 -0
- llama_stack/providers/remote/inference/vllm/config.py +59 -0
- llama_stack/providers/remote/inference/vllm/vllm.py +111 -0
- llama_stack/providers/remote/inference/watsonx/__init__.py +15 -0
- llama_stack/providers/remote/inference/watsonx/config.py +45 -0
- llama_stack/providers/remote/inference/watsonx/watsonx.py +336 -0
- llama_stack/providers/remote/post_training/__init__.py +5 -0
- llama_stack/providers/remote/post_training/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/post_training/nvidia/config.py +113 -0
- llama_stack/providers/remote/post_training/nvidia/models.py +27 -0
- llama_stack/providers/remote/post_training/nvidia/post_training.py +430 -0
- llama_stack/providers/remote/post_training/nvidia/utils.py +63 -0
- llama_stack/providers/remote/safety/__init__.py +5 -0
- llama_stack/providers/remote/safety/bedrock/bedrock.py +111 -0
- llama_stack/providers/remote/safety/bedrock/config.py +14 -0
- llama_stack/providers/{adapters/inference/sample → remote/safety/nvidia}/__init__.py +5 -4
- llama_stack/providers/remote/safety/nvidia/config.py +40 -0
- llama_stack/providers/remote/safety/nvidia/nvidia.py +161 -0
- llama_stack/providers/{adapters/agents/sample → remote/safety/sambanova}/__init__.py +5 -4
- llama_stack/providers/remote/safety/sambanova/config.py +37 -0
- llama_stack/providers/remote/safety/sambanova/sambanova.py +98 -0
- llama_stack/providers/remote/tool_runtime/__init__.py +5 -0
- llama_stack/providers/remote/tool_runtime/bing_search/__init__.py +21 -0
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +112 -0
- llama_stack/providers/remote/tool_runtime/bing_search/config.py +22 -0
- llama_stack/providers/remote/tool_runtime/brave_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +148 -0
- llama_stack/providers/remote/tool_runtime/brave_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py +15 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +20 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +73 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +84 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py +22 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py +21 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +140 -0
- llama_stack/providers/remote/vector_io/__init__.py +5 -0
- llama_stack/providers/remote/vector_io/chroma/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/chroma/chroma.py +215 -0
- llama_stack/providers/remote/vector_io/chroma/config.py +28 -0
- llama_stack/providers/remote/vector_io/milvus/__init__.py +18 -0
- llama_stack/providers/remote/vector_io/milvus/config.py +35 -0
- llama_stack/providers/remote/vector_io/milvus/milvus.py +375 -0
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/pgvector/config.py +47 -0
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +460 -0
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/qdrant/config.py +37 -0
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +265 -0
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/weaviate/config.py +32 -0
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +393 -0
- llama_stack/providers/utils/bedrock/__init__.py +5 -0
- llama_stack/providers/utils/bedrock/client.py +74 -0
- llama_stack/providers/utils/bedrock/config.py +64 -0
- llama_stack/providers/utils/bedrock/refreshable_boto_session.py +112 -0
- llama_stack/providers/utils/common/__init__.py +5 -0
- llama_stack/providers/utils/common/data_schema_validator.py +103 -0
- llama_stack/providers/utils/datasetio/__init__.py +5 -0
- llama_stack/providers/utils/datasetio/url_utils.py +47 -0
- llama_stack/providers/utils/files/__init__.py +5 -0
- llama_stack/providers/utils/files/form_data.py +69 -0
- llama_stack/providers/utils/inference/__init__.py +8 -7
- llama_stack/providers/utils/inference/embedding_mixin.py +101 -0
- llama_stack/providers/utils/inference/inference_store.py +264 -0
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +336 -0
- llama_stack/providers/utils/inference/model_registry.py +173 -23
- llama_stack/providers/utils/inference/openai_compat.py +1261 -49
- llama_stack/providers/utils/inference/openai_mixin.py +506 -0
- llama_stack/providers/utils/inference/prompt_adapter.py +365 -67
- llama_stack/providers/utils/kvstore/api.py +6 -6
- llama_stack/providers/utils/kvstore/config.py +28 -48
- llama_stack/providers/utils/kvstore/kvstore.py +61 -15
- llama_stack/providers/utils/kvstore/mongodb/__init__.py +9 -0
- llama_stack/providers/utils/kvstore/mongodb/mongodb.py +82 -0
- llama_stack/providers/utils/kvstore/postgres/__init__.py +7 -0
- llama_stack/providers/utils/kvstore/postgres/postgres.py +114 -0
- llama_stack/providers/utils/kvstore/redis/redis.py +33 -9
- llama_stack/providers/utils/kvstore/sqlite/config.py +2 -1
- llama_stack/providers/utils/kvstore/sqlite/sqlite.py +123 -22
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +1304 -0
- llama_stack/providers/utils/memory/vector_store.py +220 -82
- llama_stack/providers/utils/pagination.py +43 -0
- llama_stack/providers/utils/responses/__init__.py +5 -0
- llama_stack/providers/utils/responses/responses_store.py +292 -0
- llama_stack/providers/utils/scheduler.py +270 -0
- llama_stack/providers/utils/scoring/__init__.py +5 -0
- llama_stack/providers/utils/scoring/aggregation_utils.py +75 -0
- llama_stack/providers/utils/scoring/base_scoring_fn.py +114 -0
- llama_stack/providers/utils/scoring/basic_scoring_utils.py +26 -0
- llama_stack/providers/utils/sqlstore/__init__.py +5 -0
- llama_stack/providers/utils/sqlstore/api.py +128 -0
- llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +319 -0
- llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +343 -0
- llama_stack/providers/utils/sqlstore/sqlstore.py +70 -0
- llama_stack/providers/utils/telemetry/trace_protocol.py +142 -0
- llama_stack/providers/utils/telemetry/tracing.py +192 -53
- llama_stack/providers/utils/tools/__init__.py +5 -0
- llama_stack/providers/utils/tools/mcp.py +148 -0
- llama_stack/providers/utils/tools/ttl_dict.py +70 -0
- llama_stack/providers/utils/vector_io/__init__.py +5 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +156 -0
- llama_stack/schema_utils.py +118 -0
- llama_stack/strong_typing/__init__.py +19 -0
- llama_stack/strong_typing/auxiliary.py +228 -0
- llama_stack/strong_typing/classdef.py +440 -0
- llama_stack/strong_typing/core.py +46 -0
- llama_stack/strong_typing/deserializer.py +877 -0
- llama_stack/strong_typing/docstring.py +409 -0
- llama_stack/strong_typing/exception.py +23 -0
- llama_stack/strong_typing/inspection.py +1085 -0
- llama_stack/strong_typing/mapping.py +40 -0
- llama_stack/strong_typing/name.py +182 -0
- llama_stack/strong_typing/py.typed +0 -0
- llama_stack/strong_typing/schema.py +792 -0
- llama_stack/strong_typing/serialization.py +97 -0
- llama_stack/strong_typing/serializer.py +500 -0
- llama_stack/strong_typing/slots.py +27 -0
- llama_stack/strong_typing/topological.py +89 -0
- llama_stack/testing/__init__.py +5 -0
- llama_stack/testing/api_recorder.py +956 -0
- llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
- llama_stack-0.3.4.dist-info/METADATA +261 -0
- llama_stack-0.3.4.dist-info/RECORD +625 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/WHEEL +1 -1
- llama_stack/apis/agents/client.py +0 -292
- llama_stack/apis/agents/event_logger.py +0 -184
- llama_stack/apis/batch_inference/batch_inference.py +0 -72
- llama_stack/apis/common/deployment_types.py +0 -31
- llama_stack/apis/dataset/dataset.py +0 -63
- llama_stack/apis/evals/evals.py +0 -122
- llama_stack/apis/inference/client.py +0 -197
- llama_stack/apis/inspect/client.py +0 -82
- llama_stack/apis/memory/client.py +0 -155
- llama_stack/apis/memory/memory.py +0 -65
- llama_stack/apis/memory_banks/__init__.py +0 -7
- llama_stack/apis/memory_banks/client.py +0 -101
- llama_stack/apis/memory_banks/memory_banks.py +0 -78
- llama_stack/apis/models/client.py +0 -83
- llama_stack/apis/reward_scoring/__init__.py +0 -7
- llama_stack/apis/reward_scoring/reward_scoring.py +0 -55
- llama_stack/apis/safety/client.py +0 -105
- llama_stack/apis/shields/client.py +0 -79
- llama_stack/cli/download.py +0 -340
- llama_stack/cli/model/describe.py +0 -82
- llama_stack/cli/model/download.py +0 -24
- llama_stack/cli/model/list.py +0 -62
- llama_stack/cli/model/model.py +0 -34
- llama_stack/cli/model/prompt_format.py +0 -112
- llama_stack/cli/model/safety_models.py +0 -52
- llama_stack/cli/stack/build.py +0 -299
- llama_stack/cli/stack/configure.py +0 -178
- llama_stack/distribution/build.py +0 -123
- llama_stack/distribution/build_conda_env.sh +0 -136
- llama_stack/distribution/build_container.sh +0 -142
- llama_stack/distribution/common.sh +0 -40
- llama_stack/distribution/configure_container.sh +0 -47
- llama_stack/distribution/datatypes.py +0 -139
- llama_stack/distribution/distribution.py +0 -58
- llama_stack/distribution/inspect.py +0 -67
- llama_stack/distribution/request_headers.py +0 -57
- llama_stack/distribution/resolver.py +0 -323
- llama_stack/distribution/routers/__init__.py +0 -48
- llama_stack/distribution/routers/routers.py +0 -158
- llama_stack/distribution/routers/routing_tables.py +0 -173
- llama_stack/distribution/server/endpoints.py +0 -48
- llama_stack/distribution/server/server.py +0 -343
- llama_stack/distribution/start_conda_env.sh +0 -42
- llama_stack/distribution/start_container.sh +0 -64
- llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml +0 -10
- llama_stack/distribution/templates/local-build.yaml +0 -10
- llama_stack/distribution/templates/local-databricks-build.yaml +0 -10
- llama_stack/distribution/templates/local-fireworks-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-endpoint-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-serverless-build.yaml +0 -10
- llama_stack/distribution/templates/local-ollama-build.yaml +0 -10
- llama_stack/distribution/templates/local-tgi-build.yaml +0 -10
- llama_stack/distribution/templates/local-together-build.yaml +0 -10
- llama_stack/distribution/templates/local-vllm-build.yaml +0 -10
- llama_stack/distribution/utils/exec.py +0 -105
- llama_stack/providers/adapters/agents/sample/sample.py +0 -18
- llama_stack/providers/adapters/inference/bedrock/bedrock.py +0 -451
- llama_stack/providers/adapters/inference/bedrock/config.py +0 -55
- llama_stack/providers/adapters/inference/databricks/config.py +0 -21
- llama_stack/providers/adapters/inference/databricks/databricks.py +0 -125
- llama_stack/providers/adapters/inference/fireworks/config.py +0 -20
- llama_stack/providers/adapters/inference/fireworks/fireworks.py +0 -130
- llama_stack/providers/adapters/inference/ollama/__init__.py +0 -19
- llama_stack/providers/adapters/inference/ollama/ollama.py +0 -175
- llama_stack/providers/adapters/inference/sample/sample.py +0 -23
- llama_stack/providers/adapters/inference/tgi/config.py +0 -43
- llama_stack/providers/adapters/inference/tgi/tgi.py +0 -200
- llama_stack/providers/adapters/inference/together/config.py +0 -22
- llama_stack/providers/adapters/inference/together/together.py +0 -143
- llama_stack/providers/adapters/memory/chroma/chroma.py +0 -157
- llama_stack/providers/adapters/memory/pgvector/config.py +0 -17
- llama_stack/providers/adapters/memory/pgvector/pgvector.py +0 -211
- llama_stack/providers/adapters/memory/sample/sample.py +0 -23
- llama_stack/providers/adapters/memory/weaviate/__init__.py +0 -15
- llama_stack/providers/adapters/memory/weaviate/weaviate.py +0 -190
- llama_stack/providers/adapters/safety/bedrock/bedrock.py +0 -113
- llama_stack/providers/adapters/safety/bedrock/config.py +0 -16
- llama_stack/providers/adapters/safety/sample/sample.py +0 -23
- llama_stack/providers/adapters/safety/together/__init__.py +0 -18
- llama_stack/providers/adapters/safety/together/config.py +0 -26
- llama_stack/providers/adapters/safety/together/together.py +0 -101
- llama_stack/providers/adapters/telemetry/opentelemetry/config.py +0 -12
- llama_stack/providers/adapters/telemetry/opentelemetry/opentelemetry.py +0 -201
- llama_stack/providers/adapters/telemetry/sample/__init__.py +0 -17
- llama_stack/providers/adapters/telemetry/sample/config.py +0 -12
- llama_stack/providers/adapters/telemetry/sample/sample.py +0 -18
- llama_stack/providers/impls/meta_reference/agents/agent_instance.py +0 -844
- llama_stack/providers/impls/meta_reference/agents/agents.py +0 -161
- llama_stack/providers/impls/meta_reference/agents/persistence.py +0 -84
- llama_stack/providers/impls/meta_reference/agents/rag/context_retriever.py +0 -74
- llama_stack/providers/impls/meta_reference/agents/safety.py +0 -57
- llama_stack/providers/impls/meta_reference/agents/tests/code_execution.py +0 -93
- llama_stack/providers/impls/meta_reference/agents/tests/test_chat_agent.py +0 -305
- llama_stack/providers/impls/meta_reference/agents/tools/base.py +0 -20
- llama_stack/providers/impls/meta_reference/agents/tools/builtin.py +0 -375
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_env_prefix.py +0 -133
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_execution.py +0 -256
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py +0 -87
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/utils.py +0 -21
- llama_stack/providers/impls/meta_reference/agents/tools/safety.py +0 -43
- llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py +0 -58
- llama_stack/providers/impls/meta_reference/inference/config.py +0 -45
- llama_stack/providers/impls/meta_reference/inference/generation.py +0 -376
- llama_stack/providers/impls/meta_reference/inference/inference.py +0 -280
- llama_stack/providers/impls/meta_reference/inference/model_parallel.py +0 -99
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_impls.py +0 -184
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_txest_disabled.py +0 -76
- llama_stack/providers/impls/meta_reference/inference/quantization/loader.py +0 -97
- llama_stack/providers/impls/meta_reference/inference/quantization/scripts/quantize_checkpoint.py +0 -161
- llama_stack/providers/impls/meta_reference/memory/__init__.py +0 -19
- llama_stack/providers/impls/meta_reference/memory/faiss.py +0 -113
- llama_stack/providers/impls/meta_reference/safety/__init__.py +0 -17
- llama_stack/providers/impls/meta_reference/safety/base.py +0 -57
- llama_stack/providers/impls/meta_reference/safety/config.py +0 -48
- llama_stack/providers/impls/meta_reference/safety/llama_guard.py +0 -268
- llama_stack/providers/impls/meta_reference/safety/prompt_guard.py +0 -145
- llama_stack/providers/impls/meta_reference/safety/safety.py +0 -112
- llama_stack/providers/impls/meta_reference/telemetry/console.py +0 -89
- llama_stack/providers/impls/vllm/config.py +0 -35
- llama_stack/providers/impls/vllm/vllm.py +0 -241
- llama_stack/providers/registry/memory.py +0 -78
- llama_stack/providers/registry/telemetry.py +0 -44
- llama_stack/providers/tests/agents/test_agents.py +0 -210
- llama_stack/providers/tests/inference/test_inference.py +0 -257
- llama_stack/providers/tests/inference/test_prompt_adapter.py +0 -126
- llama_stack/providers/tests/memory/test_memory.py +0 -136
- llama_stack/providers/tests/resolver.py +0 -100
- llama_stack/providers/tests/safety/test_safety.py +0 -77
- llama_stack-0.0.42.dist-info/METADATA +0 -137
- llama_stack-0.0.42.dist-info/RECORD +0 -256
- /llama_stack/{distribution → core}/__init__.py +0 -0
- /llama_stack/{distribution/server → core/access_control}/__init__.py +0 -0
- /llama_stack/{distribution/utils → core/conversations}/__init__.py +0 -0
- /llama_stack/{providers/adapters → core/prompts}/__init__.py +0 -0
- /llama_stack/{providers/adapters/agents → core/routing_tables}/__init__.py +0 -0
- /llama_stack/{providers/adapters/inference → core/server}/__init__.py +0 -0
- /llama_stack/{providers/adapters/memory → core/storage}/__init__.py +0 -0
- /llama_stack/{providers/adapters/safety → core/ui}/__init__.py +0 -0
- /llama_stack/{providers/adapters/telemetry → core/ui/modules}/__init__.py +0 -0
- /llama_stack/{providers/impls → core/ui/page}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference → core/ui/page/distribution}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/rag → core/ui/page/evaluations}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tests → core/ui/page/playground}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools → core/utils}/__init__.py +0 -0
- /llama_stack/{distribution → core}/utils/dynamic.py +0 -0
- /llama_stack/{distribution → core}/utils/serialize.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools/ipython_tool → distributions}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization → models}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization/scripts → models/llama}/__init__.py +0 -0
- /llama_stack/{providers/tests → models/llama/llama3}/__init__.py +0 -0
- /llama_stack/{providers/tests/agents → models/llama/llama3/quantization}/__init__.py +0 -0
- /llama_stack/{providers/tests/inference → models/llama/llama3_2}/__init__.py +0 -0
- /llama_stack/{providers/tests/memory → models/llama/llama3_3}/__init__.py +0 -0
- /llama_stack/{providers/tests/safety → models/llama/llama4}/__init__.py +0 -0
- /llama_stack/{scripts → models/llama/llama4/prompt_templates}/__init__.py +0 -0
- /llama_stack/providers/{adapters → remote}/safety/bedrock/__init__.py +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info/licenses}/LICENSE +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,625 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from enum import StrEnum
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Annotated, Any, Literal, Self
|
|
10
|
+
from urllib.parse import urlparse
|
|
11
|
+
|
|
12
|
+
from pydantic import BaseModel, Field, field_validator, model_validator
|
|
13
|
+
|
|
14
|
+
from llama_stack.apis.benchmarks import Benchmark, BenchmarkInput
|
|
15
|
+
from llama_stack.apis.datasetio import DatasetIO
|
|
16
|
+
from llama_stack.apis.datasets import Dataset, DatasetInput
|
|
17
|
+
from llama_stack.apis.eval import Eval
|
|
18
|
+
from llama_stack.apis.inference import Inference
|
|
19
|
+
from llama_stack.apis.models import Model, ModelInput
|
|
20
|
+
from llama_stack.apis.resource import Resource
|
|
21
|
+
from llama_stack.apis.safety import Safety
|
|
22
|
+
from llama_stack.apis.scoring import Scoring
|
|
23
|
+
from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnInput
|
|
24
|
+
from llama_stack.apis.shields import Shield, ShieldInput
|
|
25
|
+
from llama_stack.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime
|
|
26
|
+
from llama_stack.apis.vector_io import VectorIO
|
|
27
|
+
from llama_stack.apis.vector_stores import VectorStore, VectorStoreInput
|
|
28
|
+
from llama_stack.core.access_control.datatypes import AccessRule
|
|
29
|
+
from llama_stack.core.storage.datatypes import (
|
|
30
|
+
KVStoreReference,
|
|
31
|
+
StorageBackendType,
|
|
32
|
+
StorageConfig,
|
|
33
|
+
)
|
|
34
|
+
from llama_stack.providers.datatypes import Api, ProviderSpec
|
|
35
|
+
|
|
36
|
+
LLAMA_STACK_BUILD_CONFIG_VERSION = 2
|
|
37
|
+
LLAMA_STACK_RUN_CONFIG_VERSION = 2
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
RoutingKey = str | list[str]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class RegistryEntrySource(StrEnum):
|
|
44
|
+
via_register_api = "via_register_api"
|
|
45
|
+
listed_from_provider = "listed_from_provider"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class User(BaseModel):
|
|
49
|
+
principal: str
|
|
50
|
+
# further attributes that may be used for access control decisions
|
|
51
|
+
attributes: dict[str, list[str]] | None = None
|
|
52
|
+
|
|
53
|
+
def __init__(self, principal: str, attributes: dict[str, list[str]] | None):
|
|
54
|
+
super().__init__(principal=principal, attributes=attributes)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class ResourceWithOwner(Resource):
|
|
58
|
+
"""Extension of Resource that adds an optional owner, i.e. the user that created the
|
|
59
|
+
resource. This can be used to constrain access to the resource."""
|
|
60
|
+
|
|
61
|
+
owner: User | None = None
|
|
62
|
+
source: RegistryEntrySource = RegistryEntrySource.via_register_api
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
# Use the extended Resource for all routable objects
|
|
66
|
+
class ModelWithOwner(Model, ResourceWithOwner):
|
|
67
|
+
pass
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class ShieldWithOwner(Shield, ResourceWithOwner):
|
|
71
|
+
pass
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class VectorStoreWithOwner(VectorStore, ResourceWithOwner):
|
|
75
|
+
pass
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class DatasetWithOwner(Dataset, ResourceWithOwner):
|
|
79
|
+
pass
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class ScoringFnWithOwner(ScoringFn, ResourceWithOwner):
|
|
83
|
+
pass
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class BenchmarkWithOwner(Benchmark, ResourceWithOwner):
|
|
87
|
+
pass
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class ToolGroupWithOwner(ToolGroup, ResourceWithOwner):
|
|
91
|
+
pass
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
RoutableObject = Model | Shield | VectorStore | Dataset | ScoringFn | Benchmark | ToolGroup
|
|
95
|
+
|
|
96
|
+
RoutableObjectWithProvider = Annotated[
|
|
97
|
+
ModelWithOwner
|
|
98
|
+
| ShieldWithOwner
|
|
99
|
+
| VectorStoreWithOwner
|
|
100
|
+
| DatasetWithOwner
|
|
101
|
+
| ScoringFnWithOwner
|
|
102
|
+
| BenchmarkWithOwner
|
|
103
|
+
| ToolGroupWithOwner,
|
|
104
|
+
Field(discriminator="type"),
|
|
105
|
+
]
|
|
106
|
+
|
|
107
|
+
RoutedProtocol = Inference | Safety | VectorIO | DatasetIO | Scoring | Eval | ToolRuntime
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
# Example: /inference, /safety
|
|
111
|
+
class AutoRoutedProviderSpec(ProviderSpec):
|
|
112
|
+
provider_type: str = "router"
|
|
113
|
+
config_class: str = ""
|
|
114
|
+
|
|
115
|
+
container_image: str | None = None
|
|
116
|
+
routing_table_api: Api
|
|
117
|
+
module: str
|
|
118
|
+
provider_data_validator: str | None = Field(
|
|
119
|
+
default=None,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
# Example: /models, /shields
|
|
124
|
+
class RoutingTableProviderSpec(ProviderSpec):
|
|
125
|
+
provider_type: str = "routing_table"
|
|
126
|
+
config_class: str = ""
|
|
127
|
+
container_image: str | None = None
|
|
128
|
+
|
|
129
|
+
router_api: Api
|
|
130
|
+
module: str
|
|
131
|
+
pip_packages: list[str] = Field(default_factory=list)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class Provider(BaseModel):
|
|
135
|
+
# provider_id of None means that the provider is not enabled - this happens
|
|
136
|
+
# when the provider is enabled via a conditional environment variable
|
|
137
|
+
provider_id: str | None
|
|
138
|
+
provider_type: str
|
|
139
|
+
config: dict[str, Any] = {}
|
|
140
|
+
module: str | None = Field(
|
|
141
|
+
default=None,
|
|
142
|
+
description="""
|
|
143
|
+
Fully-qualified name of the external provider module to import. The module is expected to have:
|
|
144
|
+
|
|
145
|
+
- `get_adapter_impl(config, deps)`: returns the adapter implementation
|
|
146
|
+
|
|
147
|
+
Example: `module: ramalama_stack`
|
|
148
|
+
""",
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
class BuildProvider(BaseModel):
|
|
153
|
+
provider_type: str
|
|
154
|
+
module: str | None = Field(
|
|
155
|
+
default=None,
|
|
156
|
+
description="""
|
|
157
|
+
Fully-qualified name of the external provider module to import. The module is expected to have:
|
|
158
|
+
|
|
159
|
+
- `get_adapter_impl(config, deps)`: returns the adapter implementation
|
|
160
|
+
|
|
161
|
+
Example: `module: ramalama_stack`
|
|
162
|
+
""",
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
class DistributionSpec(BaseModel):
|
|
167
|
+
description: str | None = Field(
|
|
168
|
+
default="",
|
|
169
|
+
description="Description of the distribution",
|
|
170
|
+
)
|
|
171
|
+
container_image: str | None = None
|
|
172
|
+
providers: dict[str, list[BuildProvider]] = Field(
|
|
173
|
+
default_factory=dict,
|
|
174
|
+
description="""
|
|
175
|
+
Provider Types for each of the APIs provided by this distribution. If you
|
|
176
|
+
select multiple providers, you should provide an appropriate 'routing_map'
|
|
177
|
+
in the runtime configuration to help route to the correct provider.
|
|
178
|
+
""",
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
class TelemetryConfig(BaseModel):
|
|
183
|
+
"""
|
|
184
|
+
Configuration for telemetry.
|
|
185
|
+
|
|
186
|
+
Llama Stack uses OpenTelemetry for telemetry. Please refer to https://opentelemetry.io/docs/languages/sdk-configuration/
|
|
187
|
+
for env variables to configure the OpenTelemetry SDK.
|
|
188
|
+
|
|
189
|
+
Example:
|
|
190
|
+
```bash
|
|
191
|
+
OTEL_SERVICE_NAME=llama-stack OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 uv run llama stack run starter
|
|
192
|
+
```
|
|
193
|
+
"""
|
|
194
|
+
|
|
195
|
+
enabled: bool = Field(default=False, description="enable or disable telemetry")
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
class LoggingConfig(BaseModel):
|
|
199
|
+
category_levels: dict[str, str] = Field(
|
|
200
|
+
default_factory=dict,
|
|
201
|
+
description="""
|
|
202
|
+
Dictionary of different logging configurations for different portions (ex: core, server) of llama stack""",
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
class OAuth2JWKSConfig(BaseModel):
|
|
207
|
+
# The JWKS URI for collecting public keys
|
|
208
|
+
uri: str
|
|
209
|
+
token: str | None = Field(default=None, description="token to authorise access to jwks")
|
|
210
|
+
key_recheck_period: int = Field(default=3600, description="The period to recheck the JWKS URI for key updates")
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
class OAuth2IntrospectionConfig(BaseModel):
|
|
214
|
+
url: str
|
|
215
|
+
client_id: str
|
|
216
|
+
client_secret: str
|
|
217
|
+
send_secret_in_body: bool = False
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
class AuthProviderType(StrEnum):
|
|
221
|
+
"""Supported authentication provider types."""
|
|
222
|
+
|
|
223
|
+
OAUTH2_TOKEN = "oauth2_token"
|
|
224
|
+
GITHUB_TOKEN = "github_token"
|
|
225
|
+
CUSTOM = "custom"
|
|
226
|
+
KUBERNETES = "kubernetes"
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
class OAuth2TokenAuthConfig(BaseModel):
|
|
230
|
+
"""Configuration for OAuth2 token authentication."""
|
|
231
|
+
|
|
232
|
+
type: Literal[AuthProviderType.OAUTH2_TOKEN] = AuthProviderType.OAUTH2_TOKEN
|
|
233
|
+
audience: str = Field(default="llama-stack")
|
|
234
|
+
verify_tls: bool = Field(default=True)
|
|
235
|
+
tls_cafile: Path | None = Field(default=None)
|
|
236
|
+
issuer: str | None = Field(default=None, description="The OIDC issuer URL.")
|
|
237
|
+
claims_mapping: dict[str, str] = Field(
|
|
238
|
+
default_factory=lambda: {
|
|
239
|
+
"sub": "roles",
|
|
240
|
+
"username": "roles",
|
|
241
|
+
"groups": "teams",
|
|
242
|
+
"team": "teams",
|
|
243
|
+
"project": "projects",
|
|
244
|
+
"tenant": "namespaces",
|
|
245
|
+
"namespace": "namespaces",
|
|
246
|
+
},
|
|
247
|
+
)
|
|
248
|
+
jwks: OAuth2JWKSConfig | None = Field(default=None, description="JWKS configuration")
|
|
249
|
+
introspection: OAuth2IntrospectionConfig | None = Field(
|
|
250
|
+
default=None, description="OAuth2 introspection configuration"
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
@classmethod
|
|
254
|
+
@field_validator("claims_mapping")
|
|
255
|
+
def validate_claims_mapping(cls, v):
|
|
256
|
+
for key, value in v.items():
|
|
257
|
+
if not value:
|
|
258
|
+
raise ValueError(f"claims_mapping value cannot be empty: {key}")
|
|
259
|
+
return v
|
|
260
|
+
|
|
261
|
+
@model_validator(mode="after")
|
|
262
|
+
def validate_mode(self) -> Self:
|
|
263
|
+
if not self.jwks and not self.introspection:
|
|
264
|
+
raise ValueError("One of jwks or introspection must be configured")
|
|
265
|
+
if self.jwks and self.introspection:
|
|
266
|
+
raise ValueError("At present only one of jwks or introspection should be configured")
|
|
267
|
+
return self
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
class CustomAuthConfig(BaseModel):
|
|
271
|
+
"""Configuration for custom authentication."""
|
|
272
|
+
|
|
273
|
+
type: Literal[AuthProviderType.CUSTOM] = AuthProviderType.CUSTOM
|
|
274
|
+
endpoint: str = Field(
|
|
275
|
+
...,
|
|
276
|
+
description="Custom authentication endpoint URL",
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
class GitHubTokenAuthConfig(BaseModel):
|
|
281
|
+
"""Configuration for GitHub token authentication."""
|
|
282
|
+
|
|
283
|
+
type: Literal[AuthProviderType.GITHUB_TOKEN] = AuthProviderType.GITHUB_TOKEN
|
|
284
|
+
github_api_base_url: str = Field(
|
|
285
|
+
default="https://api.github.com",
|
|
286
|
+
description="Base URL for GitHub API (use https://api.github.com for public GitHub)",
|
|
287
|
+
)
|
|
288
|
+
claims_mapping: dict[str, str] = Field(
|
|
289
|
+
default_factory=lambda: {
|
|
290
|
+
"login": "roles",
|
|
291
|
+
"organizations": "teams",
|
|
292
|
+
},
|
|
293
|
+
description="Mapping from GitHub user fields to access attributes",
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
class KubernetesAuthProviderConfig(BaseModel):
|
|
298
|
+
"""Configuration for Kubernetes authentication provider."""
|
|
299
|
+
|
|
300
|
+
type: Literal[AuthProviderType.KUBERNETES] = AuthProviderType.KUBERNETES
|
|
301
|
+
api_server_url: str = Field(
|
|
302
|
+
default="https://kubernetes.default.svc",
|
|
303
|
+
description="Kubernetes API server URL (e.g., https://api.cluster.domain:6443)",
|
|
304
|
+
)
|
|
305
|
+
verify_tls: bool = Field(default=True, description="Whether to verify TLS certificates")
|
|
306
|
+
tls_cafile: Path | None = Field(default=None, description="Path to CA certificate file for TLS verification")
|
|
307
|
+
claims_mapping: dict[str, str] = Field(
|
|
308
|
+
default_factory=lambda: {
|
|
309
|
+
"username": "roles",
|
|
310
|
+
"groups": "roles",
|
|
311
|
+
},
|
|
312
|
+
description="Mapping of Kubernetes user claims to access attributes",
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
@field_validator("api_server_url")
|
|
316
|
+
@classmethod
|
|
317
|
+
def validate_api_server_url(cls, v):
|
|
318
|
+
parsed = urlparse(v)
|
|
319
|
+
if not parsed.scheme or not parsed.netloc:
|
|
320
|
+
raise ValueError(f"api_server_url must be a valid URL with scheme and host: {v}")
|
|
321
|
+
if parsed.scheme not in ["http", "https"]:
|
|
322
|
+
raise ValueError(f"api_server_url scheme must be http or https: {v}")
|
|
323
|
+
return v
|
|
324
|
+
|
|
325
|
+
@field_validator("claims_mapping")
|
|
326
|
+
@classmethod
|
|
327
|
+
def validate_claims_mapping(cls, v):
|
|
328
|
+
for key, value in v.items():
|
|
329
|
+
if not value:
|
|
330
|
+
raise ValueError(f"claims_mapping value cannot be empty: {key}")
|
|
331
|
+
return v
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
AuthProviderConfig = Annotated[
|
|
335
|
+
OAuth2TokenAuthConfig | GitHubTokenAuthConfig | CustomAuthConfig | KubernetesAuthProviderConfig,
|
|
336
|
+
Field(discriminator="type"),
|
|
337
|
+
]
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
class AuthenticationConfig(BaseModel):
|
|
341
|
+
"""Top-level authentication configuration."""
|
|
342
|
+
|
|
343
|
+
provider_config: AuthProviderConfig = Field(
|
|
344
|
+
...,
|
|
345
|
+
description="Authentication provider configuration",
|
|
346
|
+
)
|
|
347
|
+
access_policy: list[AccessRule] = Field(
|
|
348
|
+
default=[],
|
|
349
|
+
description="Rules for determining access to resources",
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
class AuthenticationRequiredError(Exception):
|
|
354
|
+
pass
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
class QualifiedModel(BaseModel):
|
|
358
|
+
"""A qualified model identifier, consisting of a provider ID and a model ID."""
|
|
359
|
+
|
|
360
|
+
provider_id: str
|
|
361
|
+
model_id: str
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
class VectorStoresConfig(BaseModel):
|
|
365
|
+
"""Configuration for vector stores in the stack."""
|
|
366
|
+
|
|
367
|
+
default_provider_id: str | None = Field(
|
|
368
|
+
default=None,
|
|
369
|
+
description="ID of the vector_io provider to use as default when multiple providers are available and none is specified.",
|
|
370
|
+
)
|
|
371
|
+
default_embedding_model: QualifiedModel | None = Field(
|
|
372
|
+
default=None,
|
|
373
|
+
description="Default embedding model configuration for vector stores.",
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
class QuotaPeriod(StrEnum):
|
|
378
|
+
DAY = "day"
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
class QuotaConfig(BaseModel):
|
|
382
|
+
kvstore: KVStoreReference = Field(description="Config for KV store backend (SQLite only for now)")
|
|
383
|
+
anonymous_max_requests: int = Field(default=100, description="Max requests for unauthenticated clients per period")
|
|
384
|
+
authenticated_max_requests: int = Field(
|
|
385
|
+
default=1000, description="Max requests for authenticated clients per period"
|
|
386
|
+
)
|
|
387
|
+
period: QuotaPeriod = Field(default=QuotaPeriod.DAY, description="Quota period to set")
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
class CORSConfig(BaseModel):
|
|
391
|
+
allow_origins: list[str] = Field(default_factory=list)
|
|
392
|
+
allow_origin_regex: str | None = Field(default=None)
|
|
393
|
+
allow_methods: list[str] = Field(default=["OPTIONS"])
|
|
394
|
+
allow_headers: list[str] = Field(default_factory=list)
|
|
395
|
+
allow_credentials: bool = Field(default=False)
|
|
396
|
+
expose_headers: list[str] = Field(default_factory=list)
|
|
397
|
+
max_age: int = Field(default=600, ge=0)
|
|
398
|
+
|
|
399
|
+
@model_validator(mode="after")
|
|
400
|
+
def validate_credentials_config(self) -> Self:
|
|
401
|
+
if self.allow_credentials and (self.allow_origins == ["*"] or "*" in self.allow_origins):
|
|
402
|
+
raise ValueError("Cannot use wildcard origins with credentials enabled")
|
|
403
|
+
return self
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
def process_cors_config(cors_config: bool | CORSConfig | None) -> CORSConfig | None:
|
|
407
|
+
if cors_config is False or cors_config is None:
|
|
408
|
+
return None
|
|
409
|
+
|
|
410
|
+
if cors_config is True:
|
|
411
|
+
# dev mode: allow localhost on any port
|
|
412
|
+
return CORSConfig(
|
|
413
|
+
allow_origins=[],
|
|
414
|
+
allow_origin_regex=r"https?://localhost:\d+",
|
|
415
|
+
allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
|
|
416
|
+
allow_headers=["Content-Type", "Authorization", "X-Requested-With"],
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
if isinstance(cors_config, CORSConfig):
|
|
420
|
+
return cors_config
|
|
421
|
+
|
|
422
|
+
raise ValueError(f"Expected bool or CORSConfig, got {type(cors_config).__name__}")
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
class RegisteredResources(BaseModel):
|
|
426
|
+
"""Registry of resources available in the distribution."""
|
|
427
|
+
|
|
428
|
+
models: list[ModelInput] = Field(default_factory=list)
|
|
429
|
+
shields: list[ShieldInput] = Field(default_factory=list)
|
|
430
|
+
vector_stores: list[VectorStoreInput] = Field(default_factory=list)
|
|
431
|
+
datasets: list[DatasetInput] = Field(default_factory=list)
|
|
432
|
+
scoring_fns: list[ScoringFnInput] = Field(default_factory=list)
|
|
433
|
+
benchmarks: list[BenchmarkInput] = Field(default_factory=list)
|
|
434
|
+
tool_groups: list[ToolGroupInput] = Field(default_factory=list)
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
class ServerConfig(BaseModel):
|
|
438
|
+
port: int = Field(
|
|
439
|
+
default=8321,
|
|
440
|
+
description="Port to listen on",
|
|
441
|
+
ge=1024,
|
|
442
|
+
le=65535,
|
|
443
|
+
)
|
|
444
|
+
tls_certfile: str | None = Field(
|
|
445
|
+
default=None,
|
|
446
|
+
description="Path to TLS certificate file for HTTPS",
|
|
447
|
+
)
|
|
448
|
+
tls_keyfile: str | None = Field(
|
|
449
|
+
default=None,
|
|
450
|
+
description="Path to TLS key file for HTTPS",
|
|
451
|
+
)
|
|
452
|
+
tls_cafile: str | None = Field(
|
|
453
|
+
default=None,
|
|
454
|
+
description="Path to TLS CA file for HTTPS with mutual TLS authentication",
|
|
455
|
+
)
|
|
456
|
+
auth: AuthenticationConfig | None = Field(
|
|
457
|
+
default=None,
|
|
458
|
+
description="Authentication configuration for the server",
|
|
459
|
+
)
|
|
460
|
+
host: str | None = Field(
|
|
461
|
+
default=None,
|
|
462
|
+
description="The host the server should listen on",
|
|
463
|
+
)
|
|
464
|
+
quota: QuotaConfig | None = Field(
|
|
465
|
+
default=None,
|
|
466
|
+
description="Per client quota request configuration",
|
|
467
|
+
)
|
|
468
|
+
cors: bool | CORSConfig | None = Field(
|
|
469
|
+
default=None,
|
|
470
|
+
description="CORS configuration for cross-origin requests. Can be:\n"
|
|
471
|
+
"- true: Enable localhost CORS for development\n"
|
|
472
|
+
"- {allow_origins: [...], allow_methods: [...], ...}: Full configuration",
|
|
473
|
+
)
|
|
474
|
+
workers: int = Field(
|
|
475
|
+
default=1,
|
|
476
|
+
description="Number of workers to use for the server",
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
class StackRunConfig(BaseModel):
|
|
481
|
+
version: int = LLAMA_STACK_RUN_CONFIG_VERSION
|
|
482
|
+
|
|
483
|
+
image_name: str = Field(
|
|
484
|
+
...,
|
|
485
|
+
description="""
|
|
486
|
+
Reference to the distribution this package refers to. For unregistered (adhoc) packages,
|
|
487
|
+
this could be just a hash
|
|
488
|
+
""",
|
|
489
|
+
)
|
|
490
|
+
container_image: str | None = Field(
|
|
491
|
+
default=None,
|
|
492
|
+
description="Reference to the container image if this package refers to a container",
|
|
493
|
+
)
|
|
494
|
+
apis: list[str] = Field(
|
|
495
|
+
default_factory=list,
|
|
496
|
+
description="""
|
|
497
|
+
The list of APIs to serve. If not specified, all APIs specified in the provider_map will be served""",
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
providers: dict[str, list[Provider]] = Field(
|
|
501
|
+
description="""
|
|
502
|
+
One or more providers to use for each API. The same provider_type (e.g., meta-reference)
|
|
503
|
+
can be instantiated multiple times (with different configs) if necessary.
|
|
504
|
+
""",
|
|
505
|
+
)
|
|
506
|
+
storage: StorageConfig = Field(
|
|
507
|
+
description="Catalog of named storage backends and references available to the stack",
|
|
508
|
+
)
|
|
509
|
+
|
|
510
|
+
registered_resources: RegisteredResources = Field(
|
|
511
|
+
default_factory=RegisteredResources,
|
|
512
|
+
description="Registry of resources available in the distribution",
|
|
513
|
+
)
|
|
514
|
+
|
|
515
|
+
logging: LoggingConfig | None = Field(default=None, description="Configuration for Llama Stack Logging")
|
|
516
|
+
|
|
517
|
+
telemetry: TelemetryConfig = Field(default_factory=TelemetryConfig, description="Configuration for telemetry")
|
|
518
|
+
|
|
519
|
+
server: ServerConfig = Field(
|
|
520
|
+
default_factory=ServerConfig,
|
|
521
|
+
description="Configuration for the HTTP(S) server",
|
|
522
|
+
)
|
|
523
|
+
|
|
524
|
+
external_providers_dir: Path | None = Field(
|
|
525
|
+
default=None,
|
|
526
|
+
description="Path to directory containing external provider implementations. The providers code and dependencies must be installed on the system.",
|
|
527
|
+
)
|
|
528
|
+
|
|
529
|
+
external_apis_dir: Path | None = Field(
|
|
530
|
+
default=None,
|
|
531
|
+
description="Path to directory containing external API implementations. The APIs code and dependencies must be installed on the system.",
|
|
532
|
+
)
|
|
533
|
+
|
|
534
|
+
vector_stores: VectorStoresConfig | None = Field(
|
|
535
|
+
default=None,
|
|
536
|
+
description="Configuration for vector stores, including default embedding model",
|
|
537
|
+
)
|
|
538
|
+
|
|
539
|
+
@field_validator("external_providers_dir")
|
|
540
|
+
@classmethod
|
|
541
|
+
def validate_external_providers_dir(cls, v):
|
|
542
|
+
if v is None:
|
|
543
|
+
return None
|
|
544
|
+
if isinstance(v, str):
|
|
545
|
+
return Path(v)
|
|
546
|
+
return v
|
|
547
|
+
|
|
548
|
+
@model_validator(mode="after")
|
|
549
|
+
def validate_server_stores(self) -> "StackRunConfig":
|
|
550
|
+
backend_map = self.storage.backends
|
|
551
|
+
stores = self.storage.stores
|
|
552
|
+
kv_backends = {
|
|
553
|
+
name
|
|
554
|
+
for name, cfg in backend_map.items()
|
|
555
|
+
if cfg.type
|
|
556
|
+
in {
|
|
557
|
+
StorageBackendType.KV_REDIS,
|
|
558
|
+
StorageBackendType.KV_SQLITE,
|
|
559
|
+
StorageBackendType.KV_POSTGRES,
|
|
560
|
+
StorageBackendType.KV_MONGODB,
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
sql_backends = {
|
|
564
|
+
name
|
|
565
|
+
for name, cfg in backend_map.items()
|
|
566
|
+
if cfg.type in {StorageBackendType.SQL_SQLITE, StorageBackendType.SQL_POSTGRES}
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
def _ensure_backend(reference, expected_set, store_name: str) -> None:
|
|
570
|
+
if reference is None:
|
|
571
|
+
return
|
|
572
|
+
backend_name = reference.backend
|
|
573
|
+
if backend_name not in backend_map:
|
|
574
|
+
raise ValueError(
|
|
575
|
+
f"{store_name} references unknown backend '{backend_name}'. "
|
|
576
|
+
f"Available backends: {sorted(backend_map)}"
|
|
577
|
+
)
|
|
578
|
+
if backend_name not in expected_set:
|
|
579
|
+
raise ValueError(
|
|
580
|
+
f"{store_name} references backend '{backend_name}' of type "
|
|
581
|
+
f"'{backend_map[backend_name].type.value}', but a backend of type "
|
|
582
|
+
f"{'kv_*' if expected_set is kv_backends else 'sql_*'} is required."
|
|
583
|
+
)
|
|
584
|
+
|
|
585
|
+
_ensure_backend(stores.metadata, kv_backends, "storage.stores.metadata")
|
|
586
|
+
_ensure_backend(stores.inference, sql_backends, "storage.stores.inference")
|
|
587
|
+
_ensure_backend(stores.conversations, sql_backends, "storage.stores.conversations")
|
|
588
|
+
_ensure_backend(stores.responses, sql_backends, "storage.stores.responses")
|
|
589
|
+
return self
|
|
590
|
+
|
|
591
|
+
|
|
592
|
+
class BuildConfig(BaseModel):
|
|
593
|
+
version: int = LLAMA_STACK_BUILD_CONFIG_VERSION
|
|
594
|
+
|
|
595
|
+
distribution_spec: DistributionSpec = Field(description="The distribution spec to build including API providers. ")
|
|
596
|
+
image_type: str = Field(
|
|
597
|
+
default="venv",
|
|
598
|
+
description="Type of package to build (container | venv)",
|
|
599
|
+
)
|
|
600
|
+
image_name: str | None = Field(
|
|
601
|
+
default=None,
|
|
602
|
+
description="Name of the distribution to build",
|
|
603
|
+
)
|
|
604
|
+
external_providers_dir: Path | None = Field(
|
|
605
|
+
default=None,
|
|
606
|
+
description="Path to directory containing external provider implementations. The providers packages will be resolved from this directory. "
|
|
607
|
+
"pip_packages MUST contain the provider package name.",
|
|
608
|
+
)
|
|
609
|
+
additional_pip_packages: list[str] = Field(
|
|
610
|
+
default_factory=list,
|
|
611
|
+
description="Additional pip packages to install in the distribution. These packages will be installed in the distribution environment.",
|
|
612
|
+
)
|
|
613
|
+
external_apis_dir: Path | None = Field(
|
|
614
|
+
default=None,
|
|
615
|
+
description="Path to directory containing external API implementations. The APIs code and dependencies must be installed on the system.",
|
|
616
|
+
)
|
|
617
|
+
|
|
618
|
+
@field_validator("external_providers_dir")
|
|
619
|
+
@classmethod
|
|
620
|
+
def validate_external_providers_dir(cls, v):
|
|
621
|
+
if v is None:
|
|
622
|
+
return None
|
|
623
|
+
if isinstance(v, str):
|
|
624
|
+
return Path(v)
|
|
625
|
+
return v
|