llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +5 -0
- llama_stack/apis/agents/__init__.py +1 -1
- llama_stack/apis/agents/agents.py +700 -281
- llama_stack/apis/agents/openai_responses.py +1311 -0
- llama_stack/{providers/adapters/memory/sample/config.py → apis/batches/__init__.py} +2 -5
- llama_stack/apis/batches/batches.py +100 -0
- llama_stack/apis/benchmarks/__init__.py +7 -0
- llama_stack/apis/benchmarks/benchmarks.py +108 -0
- llama_stack/apis/common/content_types.py +143 -0
- llama_stack/apis/common/errors.py +103 -0
- llama_stack/apis/common/job_types.py +38 -0
- llama_stack/apis/common/responses.py +36 -0
- llama_stack/apis/common/training_types.py +36 -5
- llama_stack/apis/common/type_system.py +158 -0
- llama_stack/apis/conversations/__init__.py +31 -0
- llama_stack/apis/conversations/conversations.py +286 -0
- llama_stack/apis/datasetio/__init__.py +7 -0
- llama_stack/apis/datasetio/datasetio.py +59 -0
- llama_stack/apis/datasets/__init__.py +7 -0
- llama_stack/apis/datasets/datasets.py +251 -0
- llama_stack/apis/datatypes.py +160 -0
- llama_stack/apis/eval/__init__.py +7 -0
- llama_stack/apis/eval/eval.py +169 -0
- llama_stack/apis/files/__init__.py +7 -0
- llama_stack/apis/files/files.py +199 -0
- llama_stack/apis/inference/__init__.py +1 -1
- llama_stack/apis/inference/inference.py +1169 -113
- llama_stack/apis/inspect/__init__.py +1 -1
- llama_stack/apis/inspect/inspect.py +69 -16
- llama_stack/apis/models/__init__.py +1 -1
- llama_stack/apis/models/models.py +148 -21
- llama_stack/apis/post_training/__init__.py +1 -1
- llama_stack/apis/post_training/post_training.py +265 -120
- llama_stack/{providers/adapters/agents/sample/config.py → apis/prompts/__init__.py} +2 -5
- llama_stack/apis/prompts/prompts.py +204 -0
- llama_stack/apis/providers/__init__.py +7 -0
- llama_stack/apis/providers/providers.py +69 -0
- llama_stack/apis/resource.py +37 -0
- llama_stack/apis/safety/__init__.py +1 -1
- llama_stack/apis/safety/safety.py +95 -12
- llama_stack/apis/scoring/__init__.py +7 -0
- llama_stack/apis/scoring/scoring.py +93 -0
- llama_stack/apis/scoring_functions/__init__.py +7 -0
- llama_stack/apis/scoring_functions/scoring_functions.py +208 -0
- llama_stack/apis/shields/__init__.py +1 -1
- llama_stack/apis/shields/shields.py +76 -33
- llama_stack/apis/synthetic_data_generation/__init__.py +1 -1
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +40 -17
- llama_stack/apis/telemetry/__init__.py +1 -1
- llama_stack/apis/telemetry/telemetry.py +322 -31
- llama_stack/apis/{dataset → tools}/__init__.py +2 -1
- llama_stack/apis/tools/rag_tool.py +218 -0
- llama_stack/apis/tools/tools.py +221 -0
- llama_stack/apis/vector_io/__init__.py +7 -0
- llama_stack/apis/vector_io/vector_io.py +960 -0
- llama_stack/apis/vector_stores/__init__.py +7 -0
- llama_stack/apis/vector_stores/vector_stores.py +51 -0
- llama_stack/apis/version.py +9 -0
- llama_stack/cli/llama.py +13 -5
- llama_stack/cli/stack/_list_deps.py +182 -0
- llama_stack/cli/stack/list_apis.py +1 -1
- llama_stack/cli/stack/list_deps.py +55 -0
- llama_stack/cli/stack/list_providers.py +24 -10
- llama_stack/cli/stack/list_stacks.py +56 -0
- llama_stack/cli/stack/remove.py +115 -0
- llama_stack/cli/stack/run.py +169 -56
- llama_stack/cli/stack/stack.py +18 -4
- llama_stack/cli/stack/utils.py +151 -0
- llama_stack/cli/table.py +23 -61
- llama_stack/cli/utils.py +29 -0
- llama_stack/core/access_control/access_control.py +131 -0
- llama_stack/core/access_control/conditions.py +129 -0
- llama_stack/core/access_control/datatypes.py +107 -0
- llama_stack/core/build.py +164 -0
- llama_stack/core/client.py +205 -0
- llama_stack/core/common.sh +37 -0
- llama_stack/{distribution → core}/configure.py +74 -55
- llama_stack/core/conversations/conversations.py +309 -0
- llama_stack/core/datatypes.py +625 -0
- llama_stack/core/distribution.py +276 -0
- llama_stack/core/external.py +54 -0
- llama_stack/core/id_generation.py +42 -0
- llama_stack/core/inspect.py +86 -0
- llama_stack/core/library_client.py +539 -0
- llama_stack/core/prompts/prompts.py +234 -0
- llama_stack/core/providers.py +137 -0
- llama_stack/core/request_headers.py +115 -0
- llama_stack/core/resolver.py +506 -0
- llama_stack/core/routers/__init__.py +101 -0
- llama_stack/core/routers/datasets.py +73 -0
- llama_stack/core/routers/eval_scoring.py +155 -0
- llama_stack/core/routers/inference.py +645 -0
- llama_stack/core/routers/safety.py +85 -0
- llama_stack/core/routers/tool_runtime.py +91 -0
- llama_stack/core/routers/vector_io.py +442 -0
- llama_stack/core/routing_tables/benchmarks.py +62 -0
- llama_stack/core/routing_tables/common.py +254 -0
- llama_stack/core/routing_tables/datasets.py +91 -0
- llama_stack/core/routing_tables/models.py +163 -0
- llama_stack/core/routing_tables/scoring_functions.py +66 -0
- llama_stack/core/routing_tables/shields.py +61 -0
- llama_stack/core/routing_tables/toolgroups.py +129 -0
- llama_stack/core/routing_tables/vector_stores.py +292 -0
- llama_stack/core/server/auth.py +187 -0
- llama_stack/core/server/auth_providers.py +494 -0
- llama_stack/core/server/quota.py +110 -0
- llama_stack/core/server/routes.py +141 -0
- llama_stack/core/server/server.py +542 -0
- llama_stack/core/server/tracing.py +80 -0
- llama_stack/core/stack.py +546 -0
- llama_stack/core/start_stack.sh +117 -0
- llama_stack/core/storage/datatypes.py +283 -0
- llama_stack/{cli/model → core/store}/__init__.py +1 -1
- llama_stack/core/store/registry.py +199 -0
- llama_stack/core/testing_context.py +49 -0
- llama_stack/core/ui/app.py +55 -0
- llama_stack/core/ui/modules/api.py +32 -0
- llama_stack/core/ui/modules/utils.py +42 -0
- llama_stack/core/ui/page/distribution/datasets.py +18 -0
- llama_stack/core/ui/page/distribution/eval_tasks.py +20 -0
- llama_stack/core/ui/page/distribution/models.py +18 -0
- llama_stack/core/ui/page/distribution/providers.py +27 -0
- llama_stack/core/ui/page/distribution/resources.py +48 -0
- llama_stack/core/ui/page/distribution/scoring_functions.py +18 -0
- llama_stack/core/ui/page/distribution/shields.py +19 -0
- llama_stack/core/ui/page/evaluations/app_eval.py +143 -0
- llama_stack/core/ui/page/evaluations/native_eval.py +253 -0
- llama_stack/core/ui/page/playground/chat.py +130 -0
- llama_stack/core/ui/page/playground/tools.py +352 -0
- llama_stack/core/utils/config.py +30 -0
- llama_stack/{distribution → core}/utils/config_dirs.py +3 -6
- llama_stack/core/utils/config_resolution.py +125 -0
- llama_stack/core/utils/context.py +84 -0
- llama_stack/core/utils/exec.py +96 -0
- llama_stack/{providers/impls/meta_reference/codeshield/config.py → core/utils/image_types.py} +4 -3
- llama_stack/{distribution → core}/utils/model_utils.py +2 -2
- llama_stack/{distribution → core}/utils/prompt_for_config.py +30 -63
- llama_stack/{apis/batch_inference → distributions/dell}/__init__.py +1 -1
- llama_stack/distributions/dell/build.yaml +33 -0
- llama_stack/distributions/dell/dell.py +158 -0
- llama_stack/distributions/dell/run-with-safety.yaml +141 -0
- llama_stack/distributions/dell/run.yaml +132 -0
- llama_stack/distributions/meta-reference-gpu/__init__.py +7 -0
- llama_stack/distributions/meta-reference-gpu/build.yaml +32 -0
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +163 -0
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +154 -0
- llama_stack/distributions/meta-reference-gpu/run.yaml +139 -0
- llama_stack/{apis/evals → distributions/nvidia}/__init__.py +1 -1
- llama_stack/distributions/nvidia/build.yaml +29 -0
- llama_stack/distributions/nvidia/nvidia.py +154 -0
- llama_stack/distributions/nvidia/run-with-safety.yaml +137 -0
- llama_stack/distributions/nvidia/run.yaml +116 -0
- llama_stack/distributions/open-benchmark/__init__.py +7 -0
- llama_stack/distributions/open-benchmark/build.yaml +36 -0
- llama_stack/distributions/open-benchmark/open_benchmark.py +303 -0
- llama_stack/distributions/open-benchmark/run.yaml +252 -0
- llama_stack/distributions/postgres-demo/__init__.py +7 -0
- llama_stack/distributions/postgres-demo/build.yaml +23 -0
- llama_stack/distributions/postgres-demo/postgres_demo.py +125 -0
- llama_stack/distributions/postgres-demo/run.yaml +115 -0
- llama_stack/{apis/memory → distributions/starter}/__init__.py +1 -1
- llama_stack/distributions/starter/build.yaml +61 -0
- llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
- llama_stack/distributions/starter/run.yaml +276 -0
- llama_stack/distributions/starter/starter.py +345 -0
- llama_stack/distributions/starter-gpu/__init__.py +7 -0
- llama_stack/distributions/starter-gpu/build.yaml +61 -0
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
- llama_stack/distributions/starter-gpu/run.yaml +279 -0
- llama_stack/distributions/starter-gpu/starter_gpu.py +20 -0
- llama_stack/distributions/template.py +456 -0
- llama_stack/distributions/watsonx/__init__.py +7 -0
- llama_stack/distributions/watsonx/build.yaml +33 -0
- llama_stack/distributions/watsonx/run.yaml +133 -0
- llama_stack/distributions/watsonx/watsonx.py +95 -0
- llama_stack/env.py +24 -0
- llama_stack/log.py +314 -0
- llama_stack/models/llama/checkpoint.py +164 -0
- llama_stack/models/llama/datatypes.py +164 -0
- llama_stack/models/llama/hadamard_utils.py +86 -0
- llama_stack/models/llama/llama3/args.py +74 -0
- llama_stack/models/llama/llama3/chat_format.py +286 -0
- llama_stack/models/llama/llama3/generation.py +376 -0
- llama_stack/models/llama/llama3/interface.py +255 -0
- llama_stack/models/llama/llama3/model.py +304 -0
- llama_stack/models/llama/llama3/multimodal/__init__.py +12 -0
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +180 -0
- llama_stack/models/llama/llama3/multimodal/image_transform.py +409 -0
- llama_stack/models/llama/llama3/multimodal/model.py +1430 -0
- llama_stack/models/llama/llama3/multimodal/utils.py +26 -0
- llama_stack/models/llama/llama3/prompt_templates/__init__.py +22 -0
- llama_stack/models/llama/llama3/prompt_templates/base.py +39 -0
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +319 -0
- llama_stack/models/llama/llama3/prompt_templates/tool_response.py +62 -0
- llama_stack/models/llama/llama3/quantization/loader.py +316 -0
- llama_stack/models/llama/llama3/template_data.py +116 -0
- llama_stack/models/llama/llama3/tokenizer.model +128000 -0
- llama_stack/models/llama/llama3/tokenizer.py +198 -0
- llama_stack/models/llama/llama3/tool_utils.py +266 -0
- llama_stack/models/llama/llama3_1/__init__.py +12 -0
- llama_stack/models/llama/llama3_1/prompt_format.md +358 -0
- llama_stack/models/llama/llama3_1/prompts.py +258 -0
- llama_stack/models/llama/llama3_2/prompts_text.py +229 -0
- llama_stack/models/llama/llama3_2/prompts_vision.py +126 -0
- llama_stack/models/llama/llama3_2/text_prompt_format.md +286 -0
- llama_stack/models/llama/llama3_2/vision_prompt_format.md +141 -0
- llama_stack/models/llama/llama3_3/prompts.py +259 -0
- llama_stack/models/llama/llama4/args.py +107 -0
- llama_stack/models/llama/llama4/chat_format.py +317 -0
- llama_stack/models/llama/llama4/datatypes.py +56 -0
- llama_stack/models/llama/llama4/ffn.py +58 -0
- llama_stack/models/llama/llama4/generation.py +313 -0
- llama_stack/models/llama/llama4/model.py +437 -0
- llama_stack/models/llama/llama4/moe.py +214 -0
- llama_stack/models/llama/llama4/preprocess.py +435 -0
- llama_stack/models/llama/llama4/prompt_format.md +304 -0
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +136 -0
- llama_stack/models/llama/llama4/prompts.py +279 -0
- llama_stack/models/llama/llama4/quantization/__init__.py +5 -0
- llama_stack/models/llama/llama4/quantization/loader.py +226 -0
- llama_stack/models/llama/llama4/tokenizer.model +200000 -0
- llama_stack/models/llama/llama4/tokenizer.py +263 -0
- llama_stack/models/llama/llama4/vision/__init__.py +5 -0
- llama_stack/models/llama/llama4/vision/embedding.py +210 -0
- llama_stack/models/llama/llama4/vision/encoder.py +412 -0
- llama_stack/models/llama/prompt_format.py +191 -0
- llama_stack/models/llama/quantize_impls.py +316 -0
- llama_stack/models/llama/sku_list.py +1029 -0
- llama_stack/models/llama/sku_types.py +233 -0
- llama_stack/models/llama/tokenizer_utils.py +40 -0
- llama_stack/providers/datatypes.py +136 -107
- llama_stack/providers/inline/__init__.py +5 -0
- llama_stack/providers/inline/agents/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/agents/meta_reference}/__init__.py +12 -5
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +1024 -0
- llama_stack/providers/inline/agents/meta_reference/agents.py +383 -0
- llama_stack/providers/inline/agents/meta_reference/config.py +37 -0
- llama_stack/providers/inline/agents/meta_reference/persistence.py +228 -0
- llama_stack/providers/inline/agents/meta_reference/responses/__init__.py +5 -0
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +423 -0
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +1226 -0
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +449 -0
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +194 -0
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +365 -0
- llama_stack/providers/inline/agents/meta_reference/safety.py +52 -0
- llama_stack/providers/inline/batches/__init__.py +5 -0
- llama_stack/providers/inline/batches/reference/__init__.py +36 -0
- llama_stack/providers/inline/batches/reference/batches.py +679 -0
- llama_stack/providers/inline/batches/reference/config.py +40 -0
- llama_stack/providers/inline/datasetio/__init__.py +5 -0
- llama_stack/providers/inline/datasetio/localfs/__init__.py +20 -0
- llama_stack/providers/inline/datasetio/localfs/config.py +23 -0
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +113 -0
- llama_stack/providers/inline/eval/__init__.py +5 -0
- llama_stack/providers/inline/eval/meta_reference/__init__.py +28 -0
- llama_stack/providers/inline/eval/meta_reference/config.py +23 -0
- llama_stack/providers/inline/eval/meta_reference/eval.py +259 -0
- llama_stack/providers/inline/files/localfs/__init__.py +20 -0
- llama_stack/providers/inline/files/localfs/config.py +31 -0
- llama_stack/providers/inline/files/localfs/files.py +219 -0
- llama_stack/providers/inline/inference/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/__init__.py +4 -4
- llama_stack/providers/inline/inference/meta_reference/common.py +24 -0
- llama_stack/providers/inline/inference/meta_reference/config.py +68 -0
- llama_stack/providers/inline/inference/meta_reference/generators.py +211 -0
- llama_stack/providers/inline/inference/meta_reference/inference.py +158 -0
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +96 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/parallel_utils.py +56 -73
- llama_stack/providers/inline/inference/sentence_transformers/__init__.py +22 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/inference/sentence_transformers}/config.py +6 -4
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +83 -0
- llama_stack/providers/inline/post_training/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/utils.py +35 -0
- llama_stack/providers/inline/post_training/common/validator.py +36 -0
- llama_stack/providers/inline/post_training/huggingface/__init__.py +27 -0
- llama_stack/providers/inline/post_training/huggingface/config.py +83 -0
- llama_stack/providers/inline/post_training/huggingface/post_training.py +208 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +519 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +485 -0
- llama_stack/providers/inline/post_training/huggingface/utils.py +269 -0
- llama_stack/providers/inline/post_training/torchtune/__init__.py +27 -0
- llama_stack/providers/inline/post_training/torchtune/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +240 -0
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +99 -0
- llama_stack/providers/inline/post_training/torchtune/config.py +20 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +57 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/sft.py +78 -0
- llama_stack/providers/inline/post_training/torchtune/post_training.py +178 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +588 -0
- llama_stack/providers/inline/safety/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/codeshield → inline/safety/code_scanner}/__init__.py +4 -2
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +128 -0
- llama_stack/providers/{impls/meta_reference/memory → inline/safety/code_scanner}/config.py +5 -3
- llama_stack/providers/inline/safety/llama_guard/__init__.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/config.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +489 -0
- llama_stack/providers/{adapters/memory/sample → inline/safety/prompt_guard}/__init__.py +4 -4
- llama_stack/providers/inline/safety/prompt_guard/config.py +32 -0
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +131 -0
- llama_stack/providers/inline/scoring/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/__init__.py +25 -0
- llama_stack/providers/{adapters/memory/weaviate → inline/scoring/basic}/config.py +5 -7
- llama_stack/providers/inline/scoring/basic/scoring.py +126 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +240 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +41 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +23 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +27 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +71 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +80 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +66 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +58 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +38 -0
- llama_stack/providers/inline/scoring/basic/utils/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py +3319 -0
- llama_stack/providers/inline/scoring/basic/utils/math_utils.py +330 -0
- llama_stack/providers/inline/scoring/braintrust/__init__.py +27 -0
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +230 -0
- llama_stack/providers/inline/scoring/braintrust/config.py +21 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +23 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +24 -0
- llama_stack/providers/inline/scoring/llm_as_judge/__init__.py +21 -0
- llama_stack/providers/inline/scoring/llm_as_judge/config.py +14 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +113 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +96 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +20 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +81 -0
- llama_stack/providers/inline/telemetry/__init__.py +5 -0
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +21 -0
- llama_stack/providers/inline/telemetry/meta_reference/config.py +47 -0
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +252 -0
- llama_stack/providers/inline/tool_runtime/__init__.py +5 -0
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +19 -0
- llama_stack/providers/{impls/meta_reference/telemetry → inline/tool_runtime/rag}/config.py +5 -3
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +77 -0
- llama_stack/providers/inline/tool_runtime/rag/memory.py +332 -0
- llama_stack/providers/inline/vector_io/__init__.py +5 -0
- llama_stack/providers/inline/vector_io/chroma/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/chroma/config.py +30 -0
- llama_stack/providers/inline/vector_io/faiss/__init__.py +21 -0
- llama_stack/providers/inline/vector_io/faiss/config.py +26 -0
- llama_stack/providers/inline/vector_io/faiss/faiss.py +293 -0
- llama_stack/providers/inline/vector_io/milvus/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/milvus/config.py +29 -0
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/qdrant/config.py +29 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/config.py +26 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +483 -0
- llama_stack/providers/registry/agents.py +16 -18
- llama_stack/providers/registry/batches.py +26 -0
- llama_stack/providers/registry/datasetio.py +49 -0
- llama_stack/providers/registry/eval.py +46 -0
- llama_stack/providers/registry/files.py +31 -0
- llama_stack/providers/registry/inference.py +273 -118
- llama_stack/providers/registry/post_training.py +69 -0
- llama_stack/providers/registry/safety.py +46 -41
- llama_stack/providers/registry/scoring.py +51 -0
- llama_stack/providers/registry/tool_runtime.py +87 -0
- llama_stack/providers/registry/vector_io.py +828 -0
- llama_stack/providers/remote/__init__.py +5 -0
- llama_stack/providers/remote/agents/__init__.py +5 -0
- llama_stack/providers/remote/datasetio/__init__.py +5 -0
- llama_stack/providers/{adapters/memory/chroma → remote/datasetio/huggingface}/__init__.py +7 -4
- llama_stack/providers/remote/datasetio/huggingface/config.py +23 -0
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +99 -0
- llama_stack/providers/remote/datasetio/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/datasetio/nvidia/config.py +61 -0
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +116 -0
- llama_stack/providers/remote/eval/__init__.py +5 -0
- llama_stack/providers/remote/eval/nvidia/__init__.py +31 -0
- llama_stack/providers/remote/eval/nvidia/config.py +29 -0
- llama_stack/providers/remote/eval/nvidia/eval.py +162 -0
- llama_stack/providers/remote/files/s3/__init__.py +19 -0
- llama_stack/providers/remote/files/s3/config.py +42 -0
- llama_stack/providers/remote/files/s3/files.py +313 -0
- llama_stack/providers/remote/inference/__init__.py +5 -0
- llama_stack/providers/{adapters/safety/sample → remote/inference/anthropic}/__init__.py +4 -6
- llama_stack/providers/remote/inference/anthropic/anthropic.py +36 -0
- llama_stack/providers/remote/inference/anthropic/config.py +28 -0
- llama_stack/providers/{impls/meta_reference/telemetry → remote/inference/azure}/__init__.py +4 -4
- llama_stack/providers/remote/inference/azure/azure.py +25 -0
- llama_stack/providers/remote/inference/azure/config.py +61 -0
- llama_stack/providers/{adapters → remote}/inference/bedrock/__init__.py +18 -17
- llama_stack/providers/remote/inference/bedrock/bedrock.py +142 -0
- llama_stack/providers/{adapters/inference/sample → remote/inference/bedrock}/config.py +3 -4
- llama_stack/providers/remote/inference/bedrock/models.py +29 -0
- llama_stack/providers/remote/inference/cerebras/__init__.py +19 -0
- llama_stack/providers/remote/inference/cerebras/cerebras.py +28 -0
- llama_stack/providers/remote/inference/cerebras/config.py +30 -0
- llama_stack/providers/{adapters → remote}/inference/databricks/__init__.py +4 -5
- llama_stack/providers/remote/inference/databricks/config.py +37 -0
- llama_stack/providers/remote/inference/databricks/databricks.py +44 -0
- llama_stack/providers/{adapters → remote}/inference/fireworks/__init__.py +8 -4
- llama_stack/providers/remote/inference/fireworks/config.py +27 -0
- llama_stack/providers/remote/inference/fireworks/fireworks.py +27 -0
- llama_stack/providers/{adapters/memory/pgvector → remote/inference/gemini}/__init__.py +4 -4
- llama_stack/providers/remote/inference/gemini/config.py +28 -0
- llama_stack/providers/remote/inference/gemini/gemini.py +82 -0
- llama_stack/providers/remote/inference/groq/__init__.py +15 -0
- llama_stack/providers/remote/inference/groq/config.py +34 -0
- llama_stack/providers/remote/inference/groq/groq.py +18 -0
- llama_stack/providers/remote/inference/llama_openai_compat/__init__.py +15 -0
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +34 -0
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +46 -0
- llama_stack/providers/remote/inference/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/inference/nvidia/config.py +64 -0
- llama_stack/providers/remote/inference/nvidia/nvidia.py +61 -0
- llama_stack/providers/{adapters/safety/sample/config.py → remote/inference/nvidia/utils.py} +3 -4
- llama_stack/providers/{impls/vllm → remote/inference/ollama}/__init__.py +4 -6
- llama_stack/providers/remote/inference/ollama/config.py +25 -0
- llama_stack/providers/remote/inference/ollama/ollama.py +102 -0
- llama_stack/providers/{adapters/telemetry/opentelemetry → remote/inference/openai}/__init__.py +4 -4
- llama_stack/providers/remote/inference/openai/config.py +39 -0
- llama_stack/providers/remote/inference/openai/openai.py +38 -0
- llama_stack/providers/remote/inference/passthrough/__init__.py +23 -0
- llama_stack/providers/remote/inference/passthrough/config.py +34 -0
- llama_stack/providers/remote/inference/passthrough/passthrough.py +122 -0
- llama_stack/providers/remote/inference/runpod/__init__.py +16 -0
- llama_stack/providers/remote/inference/runpod/config.py +32 -0
- llama_stack/providers/remote/inference/runpod/runpod.py +42 -0
- llama_stack/providers/remote/inference/sambanova/__init__.py +16 -0
- llama_stack/providers/remote/inference/sambanova/config.py +34 -0
- llama_stack/providers/remote/inference/sambanova/sambanova.py +28 -0
- llama_stack/providers/{adapters → remote}/inference/tgi/__init__.py +3 -4
- llama_stack/providers/remote/inference/tgi/config.py +76 -0
- llama_stack/providers/remote/inference/tgi/tgi.py +85 -0
- llama_stack/providers/{adapters → remote}/inference/together/__init__.py +8 -4
- llama_stack/providers/remote/inference/together/config.py +27 -0
- llama_stack/providers/remote/inference/together/together.py +102 -0
- llama_stack/providers/remote/inference/vertexai/__init__.py +15 -0
- llama_stack/providers/remote/inference/vertexai/config.py +48 -0
- llama_stack/providers/remote/inference/vertexai/vertexai.py +54 -0
- llama_stack/providers/remote/inference/vllm/__init__.py +22 -0
- llama_stack/providers/remote/inference/vllm/config.py +59 -0
- llama_stack/providers/remote/inference/vllm/vllm.py +111 -0
- llama_stack/providers/remote/inference/watsonx/__init__.py +15 -0
- llama_stack/providers/remote/inference/watsonx/config.py +45 -0
- llama_stack/providers/remote/inference/watsonx/watsonx.py +336 -0
- llama_stack/providers/remote/post_training/__init__.py +5 -0
- llama_stack/providers/remote/post_training/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/post_training/nvidia/config.py +113 -0
- llama_stack/providers/remote/post_training/nvidia/models.py +27 -0
- llama_stack/providers/remote/post_training/nvidia/post_training.py +430 -0
- llama_stack/providers/remote/post_training/nvidia/utils.py +63 -0
- llama_stack/providers/remote/safety/__init__.py +5 -0
- llama_stack/providers/remote/safety/bedrock/bedrock.py +111 -0
- llama_stack/providers/remote/safety/bedrock/config.py +14 -0
- llama_stack/providers/{adapters/inference/sample → remote/safety/nvidia}/__init__.py +5 -4
- llama_stack/providers/remote/safety/nvidia/config.py +40 -0
- llama_stack/providers/remote/safety/nvidia/nvidia.py +161 -0
- llama_stack/providers/{adapters/agents/sample → remote/safety/sambanova}/__init__.py +5 -4
- llama_stack/providers/remote/safety/sambanova/config.py +37 -0
- llama_stack/providers/remote/safety/sambanova/sambanova.py +98 -0
- llama_stack/providers/remote/tool_runtime/__init__.py +5 -0
- llama_stack/providers/remote/tool_runtime/bing_search/__init__.py +21 -0
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +112 -0
- llama_stack/providers/remote/tool_runtime/bing_search/config.py +22 -0
- llama_stack/providers/remote/tool_runtime/brave_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +148 -0
- llama_stack/providers/remote/tool_runtime/brave_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py +15 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +20 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +73 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +84 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py +22 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py +21 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +140 -0
- llama_stack/providers/remote/vector_io/__init__.py +5 -0
- llama_stack/providers/remote/vector_io/chroma/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/chroma/chroma.py +215 -0
- llama_stack/providers/remote/vector_io/chroma/config.py +28 -0
- llama_stack/providers/remote/vector_io/milvus/__init__.py +18 -0
- llama_stack/providers/remote/vector_io/milvus/config.py +35 -0
- llama_stack/providers/remote/vector_io/milvus/milvus.py +375 -0
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/pgvector/config.py +47 -0
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +460 -0
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/qdrant/config.py +37 -0
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +265 -0
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/weaviate/config.py +32 -0
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +393 -0
- llama_stack/providers/utils/bedrock/__init__.py +5 -0
- llama_stack/providers/utils/bedrock/client.py +74 -0
- llama_stack/providers/utils/bedrock/config.py +64 -0
- llama_stack/providers/utils/bedrock/refreshable_boto_session.py +112 -0
- llama_stack/providers/utils/common/__init__.py +5 -0
- llama_stack/providers/utils/common/data_schema_validator.py +103 -0
- llama_stack/providers/utils/datasetio/__init__.py +5 -0
- llama_stack/providers/utils/datasetio/url_utils.py +47 -0
- llama_stack/providers/utils/files/__init__.py +5 -0
- llama_stack/providers/utils/files/form_data.py +69 -0
- llama_stack/providers/utils/inference/__init__.py +8 -7
- llama_stack/providers/utils/inference/embedding_mixin.py +101 -0
- llama_stack/providers/utils/inference/inference_store.py +264 -0
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +336 -0
- llama_stack/providers/utils/inference/model_registry.py +173 -23
- llama_stack/providers/utils/inference/openai_compat.py +1261 -49
- llama_stack/providers/utils/inference/openai_mixin.py +506 -0
- llama_stack/providers/utils/inference/prompt_adapter.py +365 -67
- llama_stack/providers/utils/kvstore/api.py +6 -6
- llama_stack/providers/utils/kvstore/config.py +28 -48
- llama_stack/providers/utils/kvstore/kvstore.py +61 -15
- llama_stack/providers/utils/kvstore/mongodb/__init__.py +9 -0
- llama_stack/providers/utils/kvstore/mongodb/mongodb.py +82 -0
- llama_stack/providers/utils/kvstore/postgres/__init__.py +7 -0
- llama_stack/providers/utils/kvstore/postgres/postgres.py +114 -0
- llama_stack/providers/utils/kvstore/redis/redis.py +33 -9
- llama_stack/providers/utils/kvstore/sqlite/config.py +2 -1
- llama_stack/providers/utils/kvstore/sqlite/sqlite.py +123 -22
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +1304 -0
- llama_stack/providers/utils/memory/vector_store.py +220 -82
- llama_stack/providers/utils/pagination.py +43 -0
- llama_stack/providers/utils/responses/__init__.py +5 -0
- llama_stack/providers/utils/responses/responses_store.py +292 -0
- llama_stack/providers/utils/scheduler.py +270 -0
- llama_stack/providers/utils/scoring/__init__.py +5 -0
- llama_stack/providers/utils/scoring/aggregation_utils.py +75 -0
- llama_stack/providers/utils/scoring/base_scoring_fn.py +114 -0
- llama_stack/providers/utils/scoring/basic_scoring_utils.py +26 -0
- llama_stack/providers/utils/sqlstore/__init__.py +5 -0
- llama_stack/providers/utils/sqlstore/api.py +128 -0
- llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +319 -0
- llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +343 -0
- llama_stack/providers/utils/sqlstore/sqlstore.py +70 -0
- llama_stack/providers/utils/telemetry/trace_protocol.py +142 -0
- llama_stack/providers/utils/telemetry/tracing.py +192 -53
- llama_stack/providers/utils/tools/__init__.py +5 -0
- llama_stack/providers/utils/tools/mcp.py +148 -0
- llama_stack/providers/utils/tools/ttl_dict.py +70 -0
- llama_stack/providers/utils/vector_io/__init__.py +5 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +156 -0
- llama_stack/schema_utils.py +118 -0
- llama_stack/strong_typing/__init__.py +19 -0
- llama_stack/strong_typing/auxiliary.py +228 -0
- llama_stack/strong_typing/classdef.py +440 -0
- llama_stack/strong_typing/core.py +46 -0
- llama_stack/strong_typing/deserializer.py +877 -0
- llama_stack/strong_typing/docstring.py +409 -0
- llama_stack/strong_typing/exception.py +23 -0
- llama_stack/strong_typing/inspection.py +1085 -0
- llama_stack/strong_typing/mapping.py +40 -0
- llama_stack/strong_typing/name.py +182 -0
- llama_stack/strong_typing/py.typed +0 -0
- llama_stack/strong_typing/schema.py +792 -0
- llama_stack/strong_typing/serialization.py +97 -0
- llama_stack/strong_typing/serializer.py +500 -0
- llama_stack/strong_typing/slots.py +27 -0
- llama_stack/strong_typing/topological.py +89 -0
- llama_stack/testing/__init__.py +5 -0
- llama_stack/testing/api_recorder.py +956 -0
- llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
- llama_stack-0.3.4.dist-info/METADATA +261 -0
- llama_stack-0.3.4.dist-info/RECORD +625 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/WHEEL +1 -1
- llama_stack/apis/agents/client.py +0 -292
- llama_stack/apis/agents/event_logger.py +0 -184
- llama_stack/apis/batch_inference/batch_inference.py +0 -72
- llama_stack/apis/common/deployment_types.py +0 -31
- llama_stack/apis/dataset/dataset.py +0 -63
- llama_stack/apis/evals/evals.py +0 -122
- llama_stack/apis/inference/client.py +0 -197
- llama_stack/apis/inspect/client.py +0 -82
- llama_stack/apis/memory/client.py +0 -155
- llama_stack/apis/memory/memory.py +0 -65
- llama_stack/apis/memory_banks/__init__.py +0 -7
- llama_stack/apis/memory_banks/client.py +0 -101
- llama_stack/apis/memory_banks/memory_banks.py +0 -78
- llama_stack/apis/models/client.py +0 -83
- llama_stack/apis/reward_scoring/__init__.py +0 -7
- llama_stack/apis/reward_scoring/reward_scoring.py +0 -55
- llama_stack/apis/safety/client.py +0 -105
- llama_stack/apis/shields/client.py +0 -79
- llama_stack/cli/download.py +0 -340
- llama_stack/cli/model/describe.py +0 -82
- llama_stack/cli/model/download.py +0 -24
- llama_stack/cli/model/list.py +0 -62
- llama_stack/cli/model/model.py +0 -34
- llama_stack/cli/model/prompt_format.py +0 -112
- llama_stack/cli/model/safety_models.py +0 -52
- llama_stack/cli/stack/build.py +0 -299
- llama_stack/cli/stack/configure.py +0 -178
- llama_stack/distribution/build.py +0 -123
- llama_stack/distribution/build_conda_env.sh +0 -136
- llama_stack/distribution/build_container.sh +0 -142
- llama_stack/distribution/common.sh +0 -40
- llama_stack/distribution/configure_container.sh +0 -47
- llama_stack/distribution/datatypes.py +0 -139
- llama_stack/distribution/distribution.py +0 -58
- llama_stack/distribution/inspect.py +0 -67
- llama_stack/distribution/request_headers.py +0 -57
- llama_stack/distribution/resolver.py +0 -323
- llama_stack/distribution/routers/__init__.py +0 -48
- llama_stack/distribution/routers/routers.py +0 -158
- llama_stack/distribution/routers/routing_tables.py +0 -173
- llama_stack/distribution/server/endpoints.py +0 -48
- llama_stack/distribution/server/server.py +0 -343
- llama_stack/distribution/start_conda_env.sh +0 -42
- llama_stack/distribution/start_container.sh +0 -64
- llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml +0 -10
- llama_stack/distribution/templates/local-build.yaml +0 -10
- llama_stack/distribution/templates/local-databricks-build.yaml +0 -10
- llama_stack/distribution/templates/local-fireworks-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-endpoint-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-serverless-build.yaml +0 -10
- llama_stack/distribution/templates/local-ollama-build.yaml +0 -10
- llama_stack/distribution/templates/local-tgi-build.yaml +0 -10
- llama_stack/distribution/templates/local-together-build.yaml +0 -10
- llama_stack/distribution/templates/local-vllm-build.yaml +0 -10
- llama_stack/distribution/utils/exec.py +0 -105
- llama_stack/providers/adapters/agents/sample/sample.py +0 -18
- llama_stack/providers/adapters/inference/bedrock/bedrock.py +0 -451
- llama_stack/providers/adapters/inference/bedrock/config.py +0 -55
- llama_stack/providers/adapters/inference/databricks/config.py +0 -21
- llama_stack/providers/adapters/inference/databricks/databricks.py +0 -125
- llama_stack/providers/adapters/inference/fireworks/config.py +0 -20
- llama_stack/providers/adapters/inference/fireworks/fireworks.py +0 -130
- llama_stack/providers/adapters/inference/ollama/__init__.py +0 -19
- llama_stack/providers/adapters/inference/ollama/ollama.py +0 -175
- llama_stack/providers/adapters/inference/sample/sample.py +0 -23
- llama_stack/providers/adapters/inference/tgi/config.py +0 -43
- llama_stack/providers/adapters/inference/tgi/tgi.py +0 -200
- llama_stack/providers/adapters/inference/together/config.py +0 -22
- llama_stack/providers/adapters/inference/together/together.py +0 -143
- llama_stack/providers/adapters/memory/chroma/chroma.py +0 -157
- llama_stack/providers/adapters/memory/pgvector/config.py +0 -17
- llama_stack/providers/adapters/memory/pgvector/pgvector.py +0 -211
- llama_stack/providers/adapters/memory/sample/sample.py +0 -23
- llama_stack/providers/adapters/memory/weaviate/__init__.py +0 -15
- llama_stack/providers/adapters/memory/weaviate/weaviate.py +0 -190
- llama_stack/providers/adapters/safety/bedrock/bedrock.py +0 -113
- llama_stack/providers/adapters/safety/bedrock/config.py +0 -16
- llama_stack/providers/adapters/safety/sample/sample.py +0 -23
- llama_stack/providers/adapters/safety/together/__init__.py +0 -18
- llama_stack/providers/adapters/safety/together/config.py +0 -26
- llama_stack/providers/adapters/safety/together/together.py +0 -101
- llama_stack/providers/adapters/telemetry/opentelemetry/config.py +0 -12
- llama_stack/providers/adapters/telemetry/opentelemetry/opentelemetry.py +0 -201
- llama_stack/providers/adapters/telemetry/sample/__init__.py +0 -17
- llama_stack/providers/adapters/telemetry/sample/config.py +0 -12
- llama_stack/providers/adapters/telemetry/sample/sample.py +0 -18
- llama_stack/providers/impls/meta_reference/agents/agent_instance.py +0 -844
- llama_stack/providers/impls/meta_reference/agents/agents.py +0 -161
- llama_stack/providers/impls/meta_reference/agents/persistence.py +0 -84
- llama_stack/providers/impls/meta_reference/agents/rag/context_retriever.py +0 -74
- llama_stack/providers/impls/meta_reference/agents/safety.py +0 -57
- llama_stack/providers/impls/meta_reference/agents/tests/code_execution.py +0 -93
- llama_stack/providers/impls/meta_reference/agents/tests/test_chat_agent.py +0 -305
- llama_stack/providers/impls/meta_reference/agents/tools/base.py +0 -20
- llama_stack/providers/impls/meta_reference/agents/tools/builtin.py +0 -375
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_env_prefix.py +0 -133
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_execution.py +0 -256
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py +0 -87
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/utils.py +0 -21
- llama_stack/providers/impls/meta_reference/agents/tools/safety.py +0 -43
- llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py +0 -58
- llama_stack/providers/impls/meta_reference/inference/config.py +0 -45
- llama_stack/providers/impls/meta_reference/inference/generation.py +0 -376
- llama_stack/providers/impls/meta_reference/inference/inference.py +0 -280
- llama_stack/providers/impls/meta_reference/inference/model_parallel.py +0 -99
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_impls.py +0 -184
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_txest_disabled.py +0 -76
- llama_stack/providers/impls/meta_reference/inference/quantization/loader.py +0 -97
- llama_stack/providers/impls/meta_reference/inference/quantization/scripts/quantize_checkpoint.py +0 -161
- llama_stack/providers/impls/meta_reference/memory/__init__.py +0 -19
- llama_stack/providers/impls/meta_reference/memory/faiss.py +0 -113
- llama_stack/providers/impls/meta_reference/safety/__init__.py +0 -17
- llama_stack/providers/impls/meta_reference/safety/base.py +0 -57
- llama_stack/providers/impls/meta_reference/safety/config.py +0 -48
- llama_stack/providers/impls/meta_reference/safety/llama_guard.py +0 -268
- llama_stack/providers/impls/meta_reference/safety/prompt_guard.py +0 -145
- llama_stack/providers/impls/meta_reference/safety/safety.py +0 -112
- llama_stack/providers/impls/meta_reference/telemetry/console.py +0 -89
- llama_stack/providers/impls/vllm/config.py +0 -35
- llama_stack/providers/impls/vllm/vllm.py +0 -241
- llama_stack/providers/registry/memory.py +0 -78
- llama_stack/providers/registry/telemetry.py +0 -44
- llama_stack/providers/tests/agents/test_agents.py +0 -210
- llama_stack/providers/tests/inference/test_inference.py +0 -257
- llama_stack/providers/tests/inference/test_prompt_adapter.py +0 -126
- llama_stack/providers/tests/memory/test_memory.py +0 -136
- llama_stack/providers/tests/resolver.py +0 -100
- llama_stack/providers/tests/safety/test_safety.py +0 -77
- llama_stack-0.0.42.dist-info/METADATA +0 -137
- llama_stack-0.0.42.dist-info/RECORD +0 -256
- /llama_stack/{distribution → core}/__init__.py +0 -0
- /llama_stack/{distribution/server → core/access_control}/__init__.py +0 -0
- /llama_stack/{distribution/utils → core/conversations}/__init__.py +0 -0
- /llama_stack/{providers/adapters → core/prompts}/__init__.py +0 -0
- /llama_stack/{providers/adapters/agents → core/routing_tables}/__init__.py +0 -0
- /llama_stack/{providers/adapters/inference → core/server}/__init__.py +0 -0
- /llama_stack/{providers/adapters/memory → core/storage}/__init__.py +0 -0
- /llama_stack/{providers/adapters/safety → core/ui}/__init__.py +0 -0
- /llama_stack/{providers/adapters/telemetry → core/ui/modules}/__init__.py +0 -0
- /llama_stack/{providers/impls → core/ui/page}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference → core/ui/page/distribution}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/rag → core/ui/page/evaluations}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tests → core/ui/page/playground}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools → core/utils}/__init__.py +0 -0
- /llama_stack/{distribution → core}/utils/dynamic.py +0 -0
- /llama_stack/{distribution → core}/utils/serialize.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools/ipython_tool → distributions}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization → models}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization/scripts → models/llama}/__init__.py +0 -0
- /llama_stack/{providers/tests → models/llama/llama3}/__init__.py +0 -0
- /llama_stack/{providers/tests/agents → models/llama/llama3/quantization}/__init__.py +0 -0
- /llama_stack/{providers/tests/inference → models/llama/llama3_2}/__init__.py +0 -0
- /llama_stack/{providers/tests/memory → models/llama/llama3_3}/__init__.py +0 -0
- /llama_stack/{providers/tests/safety → models/llama/llama4}/__init__.py +0 -0
- /llama_stack/{scripts → models/llama/llama4/prompt_templates}/__init__.py +0 -0
- /llama_stack/providers/{adapters → remote}/safety/bedrock/__init__.py +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info/licenses}/LICENSE +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,506 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
import importlib
|
|
7
|
+
import importlib.metadata
|
|
8
|
+
import inspect
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from llama_stack.apis.agents import Agents
|
|
12
|
+
from llama_stack.apis.batches import Batches
|
|
13
|
+
from llama_stack.apis.benchmarks import Benchmarks
|
|
14
|
+
from llama_stack.apis.conversations import Conversations
|
|
15
|
+
from llama_stack.apis.datasetio import DatasetIO
|
|
16
|
+
from llama_stack.apis.datasets import Datasets
|
|
17
|
+
from llama_stack.apis.datatypes import ExternalApiSpec
|
|
18
|
+
from llama_stack.apis.eval import Eval
|
|
19
|
+
from llama_stack.apis.files import Files
|
|
20
|
+
from llama_stack.apis.inference import Inference, InferenceProvider
|
|
21
|
+
from llama_stack.apis.inspect import Inspect
|
|
22
|
+
from llama_stack.apis.models import Models
|
|
23
|
+
from llama_stack.apis.post_training import PostTraining
|
|
24
|
+
from llama_stack.apis.prompts import Prompts
|
|
25
|
+
from llama_stack.apis.providers import Providers as ProvidersAPI
|
|
26
|
+
from llama_stack.apis.safety import Safety
|
|
27
|
+
from llama_stack.apis.scoring import Scoring
|
|
28
|
+
from llama_stack.apis.scoring_functions import ScoringFunctions
|
|
29
|
+
from llama_stack.apis.shields import Shields
|
|
30
|
+
from llama_stack.apis.telemetry import Telemetry
|
|
31
|
+
from llama_stack.apis.tools import ToolGroups, ToolRuntime
|
|
32
|
+
from llama_stack.apis.vector_io import VectorIO
|
|
33
|
+
from llama_stack.apis.vector_stores import VectorStore
|
|
34
|
+
from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
|
|
35
|
+
from llama_stack.core.client import get_client_impl
|
|
36
|
+
from llama_stack.core.datatypes import (
|
|
37
|
+
AccessRule,
|
|
38
|
+
AutoRoutedProviderSpec,
|
|
39
|
+
Provider,
|
|
40
|
+
RoutingTableProviderSpec,
|
|
41
|
+
StackRunConfig,
|
|
42
|
+
)
|
|
43
|
+
from llama_stack.core.distribution import builtin_automatically_routed_apis
|
|
44
|
+
from llama_stack.core.external import load_external_apis
|
|
45
|
+
from llama_stack.core.store import DistributionRegistry
|
|
46
|
+
from llama_stack.core.utils.dynamic import instantiate_class_type
|
|
47
|
+
from llama_stack.log import get_logger
|
|
48
|
+
from llama_stack.providers.datatypes import (
|
|
49
|
+
Api,
|
|
50
|
+
BenchmarksProtocolPrivate,
|
|
51
|
+
DatasetsProtocolPrivate,
|
|
52
|
+
InlineProviderSpec,
|
|
53
|
+
ModelsProtocolPrivate,
|
|
54
|
+
ProviderSpec,
|
|
55
|
+
RemoteProviderConfig,
|
|
56
|
+
RemoteProviderSpec,
|
|
57
|
+
ScoringFunctionsProtocolPrivate,
|
|
58
|
+
ShieldsProtocolPrivate,
|
|
59
|
+
ToolGroupsProtocolPrivate,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
logger = get_logger(name=__name__, category="core")
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class InvalidProviderError(Exception):
|
|
66
|
+
pass
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def api_protocol_map(external_apis: dict[Api, ExternalApiSpec] | None = None) -> dict[Api, Any]:
|
|
70
|
+
"""Get a mapping of API types to their protocol classes.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
external_apis: Optional dictionary of external API specifications
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
Dictionary mapping API types to their protocol classes
|
|
77
|
+
"""
|
|
78
|
+
protocols = {
|
|
79
|
+
Api.providers: ProvidersAPI,
|
|
80
|
+
Api.agents: Agents,
|
|
81
|
+
Api.inference: Inference,
|
|
82
|
+
Api.inspect: Inspect,
|
|
83
|
+
Api.batches: Batches,
|
|
84
|
+
Api.vector_io: VectorIO,
|
|
85
|
+
Api.vector_stores: VectorStore,
|
|
86
|
+
Api.models: Models,
|
|
87
|
+
Api.safety: Safety,
|
|
88
|
+
Api.shields: Shields,
|
|
89
|
+
Api.datasetio: DatasetIO,
|
|
90
|
+
Api.datasets: Datasets,
|
|
91
|
+
Api.scoring: Scoring,
|
|
92
|
+
Api.scoring_functions: ScoringFunctions,
|
|
93
|
+
Api.eval: Eval,
|
|
94
|
+
Api.benchmarks: Benchmarks,
|
|
95
|
+
Api.post_training: PostTraining,
|
|
96
|
+
Api.tool_groups: ToolGroups,
|
|
97
|
+
Api.tool_runtime: ToolRuntime,
|
|
98
|
+
Api.files: Files,
|
|
99
|
+
Api.prompts: Prompts,
|
|
100
|
+
Api.conversations: Conversations,
|
|
101
|
+
Api.telemetry: Telemetry,
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
if external_apis:
|
|
105
|
+
for api, api_spec in external_apis.items():
|
|
106
|
+
try:
|
|
107
|
+
module = importlib.import_module(api_spec.module)
|
|
108
|
+
api_class = getattr(module, api_spec.protocol)
|
|
109
|
+
|
|
110
|
+
protocols[api] = api_class
|
|
111
|
+
except (ImportError, AttributeError):
|
|
112
|
+
logger.exception(f"Failed to load external API {api_spec.name}")
|
|
113
|
+
|
|
114
|
+
return protocols
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def api_protocol_map_for_compliance_check(config: Any) -> dict[Api, Any]:
|
|
118
|
+
external_apis = load_external_apis(config)
|
|
119
|
+
return {
|
|
120
|
+
**api_protocol_map(external_apis),
|
|
121
|
+
Api.inference: InferenceProvider,
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def additional_protocols_map() -> dict[Api, Any]:
|
|
126
|
+
return {
|
|
127
|
+
Api.inference: (ModelsProtocolPrivate, Models, Api.models),
|
|
128
|
+
Api.tool_groups: (ToolGroupsProtocolPrivate, ToolGroups, Api.tool_groups),
|
|
129
|
+
Api.safety: (ShieldsProtocolPrivate, Shields, Api.shields),
|
|
130
|
+
Api.datasetio: (DatasetsProtocolPrivate, Datasets, Api.datasets),
|
|
131
|
+
Api.scoring: (
|
|
132
|
+
ScoringFunctionsProtocolPrivate,
|
|
133
|
+
ScoringFunctions,
|
|
134
|
+
Api.scoring_functions,
|
|
135
|
+
),
|
|
136
|
+
Api.eval: (BenchmarksProtocolPrivate, Benchmarks, Api.benchmarks),
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
# TODO: make all this naming far less atrocious. Provider. ProviderSpec. ProviderWithSpec. WTF!
|
|
141
|
+
class ProviderWithSpec(Provider):
|
|
142
|
+
spec: ProviderSpec
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
ProviderRegistry = dict[Api, dict[str, ProviderSpec]]
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
async def resolve_impls(
|
|
149
|
+
run_config: StackRunConfig,
|
|
150
|
+
provider_registry: ProviderRegistry,
|
|
151
|
+
dist_registry: DistributionRegistry,
|
|
152
|
+
policy: list[AccessRule],
|
|
153
|
+
internal_impls: dict[Api, Any] | None = None,
|
|
154
|
+
) -> dict[Api, Any]:
|
|
155
|
+
"""
|
|
156
|
+
Resolves provider implementations by:
|
|
157
|
+
1. Validating and organizing providers.
|
|
158
|
+
2. Sorting them in dependency order.
|
|
159
|
+
3. Instantiating them with required dependencies.
|
|
160
|
+
"""
|
|
161
|
+
routing_table_apis = {x.routing_table_api for x in builtin_automatically_routed_apis()}
|
|
162
|
+
router_apis = {x.router_api for x in builtin_automatically_routed_apis()}
|
|
163
|
+
|
|
164
|
+
providers_with_specs = validate_and_prepare_providers(
|
|
165
|
+
run_config, provider_registry, routing_table_apis, router_apis
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
apis_to_serve = run_config.apis or set(
|
|
169
|
+
list(providers_with_specs.keys()) + [x.value for x in routing_table_apis] + [x.value for x in router_apis]
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
providers_with_specs.update(specs_for_autorouted_apis(apis_to_serve))
|
|
173
|
+
|
|
174
|
+
sorted_providers = sort_providers_by_deps(providers_with_specs, run_config)
|
|
175
|
+
|
|
176
|
+
return await instantiate_providers(sorted_providers, router_apis, dist_registry, run_config, policy, internal_impls)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def specs_for_autorouted_apis(apis_to_serve: list[str] | set[str]) -> dict[str, dict[str, ProviderWithSpec]]:
|
|
180
|
+
"""Generates specifications for automatically routed APIs."""
|
|
181
|
+
specs = {}
|
|
182
|
+
for info in builtin_automatically_routed_apis():
|
|
183
|
+
if info.router_api.value not in apis_to_serve:
|
|
184
|
+
continue
|
|
185
|
+
|
|
186
|
+
specs[info.routing_table_api.value] = {
|
|
187
|
+
"__builtin__": ProviderWithSpec(
|
|
188
|
+
provider_id="__routing_table__",
|
|
189
|
+
provider_type="__routing_table__",
|
|
190
|
+
config={},
|
|
191
|
+
spec=RoutingTableProviderSpec(
|
|
192
|
+
api=info.routing_table_api,
|
|
193
|
+
router_api=info.router_api,
|
|
194
|
+
module="llama_stack.core.routers",
|
|
195
|
+
api_dependencies=[],
|
|
196
|
+
deps__=[f"inner-{info.router_api.value}"],
|
|
197
|
+
),
|
|
198
|
+
)
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
specs[info.router_api.value] = {
|
|
202
|
+
"__builtin__": ProviderWithSpec(
|
|
203
|
+
provider_id="__autorouted__",
|
|
204
|
+
provider_type="__autorouted__",
|
|
205
|
+
config={},
|
|
206
|
+
spec=AutoRoutedProviderSpec(
|
|
207
|
+
api=info.router_api,
|
|
208
|
+
module="llama_stack.core.routers",
|
|
209
|
+
routing_table_api=info.routing_table_api,
|
|
210
|
+
api_dependencies=[info.routing_table_api],
|
|
211
|
+
deps__=([info.routing_table_api.value]),
|
|
212
|
+
),
|
|
213
|
+
)
|
|
214
|
+
}
|
|
215
|
+
return specs
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def validate_and_prepare_providers(
|
|
219
|
+
run_config: StackRunConfig, provider_registry: ProviderRegistry, routing_table_apis: set[Api], router_apis: set[Api]
|
|
220
|
+
) -> dict[str, dict[str, ProviderWithSpec]]:
|
|
221
|
+
"""Validates providers, handles deprecations, and organizes them into a spec dictionary."""
|
|
222
|
+
providers_with_specs: dict[str, dict[str, ProviderWithSpec]] = {}
|
|
223
|
+
|
|
224
|
+
for api_str, providers in run_config.providers.items():
|
|
225
|
+
api = Api(api_str)
|
|
226
|
+
if api in routing_table_apis:
|
|
227
|
+
raise ValueError(f"Provider for `{api_str}` is automatically provided and cannot be overridden")
|
|
228
|
+
|
|
229
|
+
specs = {}
|
|
230
|
+
for provider in providers:
|
|
231
|
+
if not provider.provider_id or provider.provider_id == "__disabled__":
|
|
232
|
+
logger.debug(f"Provider `{provider.provider_type}` for API `{api}` is disabled")
|
|
233
|
+
continue
|
|
234
|
+
|
|
235
|
+
validate_provider(provider, api, provider_registry)
|
|
236
|
+
p = provider_registry[api][provider.provider_type]
|
|
237
|
+
p.deps__ = [a.value for a in p.api_dependencies] + [a.value for a in p.optional_api_dependencies]
|
|
238
|
+
spec = ProviderWithSpec(spec=p, **provider.model_dump())
|
|
239
|
+
specs[provider.provider_id] = spec
|
|
240
|
+
|
|
241
|
+
key = api_str if api not in router_apis else f"inner-{api_str}"
|
|
242
|
+
providers_with_specs[key] = specs
|
|
243
|
+
|
|
244
|
+
# TODO: remove this logic, telemetry should not have providers.
|
|
245
|
+
# if telemetry has been enabled in the config initialize our internal impl
|
|
246
|
+
# telemetry is not an external API so it SHOULD NOT be auto-routed.
|
|
247
|
+
if run_config.telemetry.enabled:
|
|
248
|
+
specs = {}
|
|
249
|
+
p = InlineProviderSpec(
|
|
250
|
+
api=Api.telemetry,
|
|
251
|
+
provider_type="inline::meta-reference",
|
|
252
|
+
pip_packages=[],
|
|
253
|
+
optional_api_dependencies=[Api.datasetio],
|
|
254
|
+
module="llama_stack.providers.inline.telemetry.meta_reference",
|
|
255
|
+
config_class="llama_stack.providers.inline.telemetry.meta_reference.config.TelemetryConfig",
|
|
256
|
+
description="Meta's reference implementation of telemetry and observability using OpenTelemetry.",
|
|
257
|
+
)
|
|
258
|
+
spec = ProviderWithSpec(spec=p, provider_type="inline::meta-reference", provider_id="meta-reference")
|
|
259
|
+
specs["meta-reference"] = spec
|
|
260
|
+
providers_with_specs["telemetry"] = specs
|
|
261
|
+
|
|
262
|
+
return providers_with_specs
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def validate_provider(provider: Provider, api: Api, provider_registry: ProviderRegistry):
|
|
266
|
+
"""Validates if the provider is allowed and handles deprecations."""
|
|
267
|
+
if provider.provider_type not in provider_registry[api]:
|
|
268
|
+
raise ValueError(f"Provider `{provider.provider_type}` is not available for API `{api}`")
|
|
269
|
+
|
|
270
|
+
p = provider_registry[api][provider.provider_type]
|
|
271
|
+
if p.deprecation_error:
|
|
272
|
+
logger.error(p.deprecation_error)
|
|
273
|
+
raise InvalidProviderError(p.deprecation_error)
|
|
274
|
+
elif p.deprecation_warning:
|
|
275
|
+
logger.warning(
|
|
276
|
+
f"Provider `{provider.provider_type}` for API `{api}` is deprecated and will be removed in a future release: {p.deprecation_warning}",
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def sort_providers_by_deps(
|
|
281
|
+
providers_with_specs: dict[str, dict[str, ProviderWithSpec]], run_config: StackRunConfig
|
|
282
|
+
) -> list[tuple[str, ProviderWithSpec]]:
|
|
283
|
+
"""Sorts providers based on their dependencies."""
|
|
284
|
+
sorted_providers: list[tuple[str, ProviderWithSpec]] = topological_sort(
|
|
285
|
+
{k: list(v.values()) for k, v in providers_with_specs.items()}
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
logger.debug(f"Resolved {len(sorted_providers)} providers")
|
|
289
|
+
for api_str, provider in sorted_providers:
|
|
290
|
+
logger.debug(f" {api_str} => {provider.provider_id}")
|
|
291
|
+
return sorted_providers
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
async def instantiate_providers(
|
|
295
|
+
sorted_providers: list[tuple[str, ProviderWithSpec]],
|
|
296
|
+
router_apis: set[Api],
|
|
297
|
+
dist_registry: DistributionRegistry,
|
|
298
|
+
run_config: StackRunConfig,
|
|
299
|
+
policy: list[AccessRule],
|
|
300
|
+
internal_impls: dict[Api, Any] | None = None,
|
|
301
|
+
) -> dict[Api, Any]:
|
|
302
|
+
"""Instantiates providers asynchronously while managing dependencies."""
|
|
303
|
+
impls: dict[Api, Any] = internal_impls.copy() if internal_impls else {}
|
|
304
|
+
inner_impls_by_provider_id: dict[str, dict[str, Any]] = {f"inner-{x.value}": {} for x in router_apis}
|
|
305
|
+
for api_str, provider in sorted_providers:
|
|
306
|
+
# Skip providers that are not enabled
|
|
307
|
+
if provider.provider_id is None:
|
|
308
|
+
continue
|
|
309
|
+
|
|
310
|
+
try:
|
|
311
|
+
deps = {a: impls[a] for a in provider.spec.api_dependencies}
|
|
312
|
+
except KeyError as e:
|
|
313
|
+
missing_api = e.args[0]
|
|
314
|
+
raise RuntimeError(
|
|
315
|
+
f"Failed to resolve '{provider.spec.api.value}' provider '{provider.provider_id}' of type '{provider.spec.provider_type}': "
|
|
316
|
+
f"required dependency '{missing_api.value}' is not available. "
|
|
317
|
+
f"Please add a '{missing_api.value}' provider to your configuration or check if the provider is properly configured."
|
|
318
|
+
) from e
|
|
319
|
+
for a in provider.spec.optional_api_dependencies:
|
|
320
|
+
if a in impls:
|
|
321
|
+
deps[a] = impls[a]
|
|
322
|
+
|
|
323
|
+
inner_impls = {}
|
|
324
|
+
if isinstance(provider.spec, RoutingTableProviderSpec):
|
|
325
|
+
inner_impls = inner_impls_by_provider_id[f"inner-{provider.spec.router_api.value}"]
|
|
326
|
+
|
|
327
|
+
impl = await instantiate_provider(provider, deps, inner_impls, dist_registry, run_config, policy)
|
|
328
|
+
|
|
329
|
+
if api_str.startswith("inner-"):
|
|
330
|
+
inner_impls_by_provider_id[api_str][provider.provider_id] = impl
|
|
331
|
+
else:
|
|
332
|
+
api = Api(api_str)
|
|
333
|
+
impls[api] = impl
|
|
334
|
+
|
|
335
|
+
return impls
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def topological_sort(
|
|
339
|
+
providers_with_specs: dict[str, list[ProviderWithSpec]],
|
|
340
|
+
) -> list[tuple[str, ProviderWithSpec]]:
|
|
341
|
+
def dfs(kv, visited: set[str], stack: list[str]):
|
|
342
|
+
api_str, providers = kv
|
|
343
|
+
visited.add(api_str)
|
|
344
|
+
|
|
345
|
+
deps = []
|
|
346
|
+
for provider in providers:
|
|
347
|
+
for dep in provider.spec.deps__:
|
|
348
|
+
deps.append(dep)
|
|
349
|
+
|
|
350
|
+
for dep in deps:
|
|
351
|
+
if dep not in visited and dep in providers_with_specs:
|
|
352
|
+
dfs((dep, providers_with_specs[dep]), visited, stack)
|
|
353
|
+
|
|
354
|
+
stack.append(api_str)
|
|
355
|
+
|
|
356
|
+
visited: set[str] = set()
|
|
357
|
+
stack: list[str] = []
|
|
358
|
+
|
|
359
|
+
for api_str, providers in providers_with_specs.items():
|
|
360
|
+
if api_str not in visited:
|
|
361
|
+
dfs((api_str, providers), visited, stack)
|
|
362
|
+
|
|
363
|
+
flattened = []
|
|
364
|
+
for api_str in stack:
|
|
365
|
+
for provider in providers_with_specs[api_str]:
|
|
366
|
+
flattened.append((api_str, provider))
|
|
367
|
+
|
|
368
|
+
return flattened
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
# returns a class implementing the protocol corresponding to the Api
|
|
372
|
+
async def instantiate_provider(
|
|
373
|
+
provider: ProviderWithSpec,
|
|
374
|
+
deps: dict[Api, Any],
|
|
375
|
+
inner_impls: dict[str, Any],
|
|
376
|
+
dist_registry: DistributionRegistry,
|
|
377
|
+
run_config: StackRunConfig,
|
|
378
|
+
policy: list[AccessRule],
|
|
379
|
+
):
|
|
380
|
+
provider_spec = provider.spec
|
|
381
|
+
if not hasattr(provider_spec, "module") or provider_spec.module is None:
|
|
382
|
+
raise AttributeError(f"ProviderSpec of type {type(provider_spec)} does not have a 'module' attribute")
|
|
383
|
+
|
|
384
|
+
logger.debug(f"Instantiating provider {provider.provider_id} from {provider_spec.module}")
|
|
385
|
+
module = importlib.import_module(provider_spec.module)
|
|
386
|
+
args = []
|
|
387
|
+
if isinstance(provider_spec, RemoteProviderSpec):
|
|
388
|
+
config_type = instantiate_class_type(provider_spec.config_class)
|
|
389
|
+
config = config_type(**provider.config)
|
|
390
|
+
|
|
391
|
+
method = "get_adapter_impl"
|
|
392
|
+
args = [config, deps]
|
|
393
|
+
|
|
394
|
+
if "policy" in inspect.signature(getattr(module, method)).parameters:
|
|
395
|
+
args.append(policy)
|
|
396
|
+
|
|
397
|
+
elif isinstance(provider_spec, AutoRoutedProviderSpec):
|
|
398
|
+
method = "get_auto_router_impl"
|
|
399
|
+
|
|
400
|
+
config = None
|
|
401
|
+
args = [provider_spec.api, deps[provider_spec.routing_table_api], deps, run_config, policy]
|
|
402
|
+
elif isinstance(provider_spec, RoutingTableProviderSpec):
|
|
403
|
+
method = "get_routing_table_impl"
|
|
404
|
+
|
|
405
|
+
config = None
|
|
406
|
+
args = [provider_spec.api, inner_impls, deps, dist_registry, policy]
|
|
407
|
+
else:
|
|
408
|
+
method = "get_provider_impl"
|
|
409
|
+
|
|
410
|
+
config_type = instantiate_class_type(provider_spec.config_class)
|
|
411
|
+
config = config_type(**provider.config)
|
|
412
|
+
args = [config, deps]
|
|
413
|
+
if "policy" in inspect.signature(getattr(module, method)).parameters:
|
|
414
|
+
args.append(policy)
|
|
415
|
+
if "telemetry_enabled" in inspect.signature(getattr(module, method)).parameters and run_config.telemetry:
|
|
416
|
+
args.append(run_config.telemetry.enabled)
|
|
417
|
+
|
|
418
|
+
fn = getattr(module, method)
|
|
419
|
+
impl = await fn(*args)
|
|
420
|
+
impl.__provider_id__ = provider.provider_id
|
|
421
|
+
impl.__provider_spec__ = provider_spec
|
|
422
|
+
impl.__provider_config__ = config
|
|
423
|
+
|
|
424
|
+
protocols = api_protocol_map_for_compliance_check(run_config)
|
|
425
|
+
additional_protocols = additional_protocols_map()
|
|
426
|
+
# TODO: check compliance for special tool groups
|
|
427
|
+
# the impl should be for Api.tool_runtime, the name should be the special tool group, the protocol should be the special tool group protocol
|
|
428
|
+
check_protocol_compliance(impl, protocols[provider_spec.api])
|
|
429
|
+
if not isinstance(provider_spec, AutoRoutedProviderSpec) and provider_spec.api in additional_protocols:
|
|
430
|
+
additional_api, _, _ = additional_protocols[provider_spec.api]
|
|
431
|
+
check_protocol_compliance(impl, additional_api)
|
|
432
|
+
|
|
433
|
+
return impl
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
def check_protocol_compliance(obj: Any, protocol: Any) -> None:
|
|
437
|
+
missing_methods = []
|
|
438
|
+
|
|
439
|
+
mro = type(obj).__mro__
|
|
440
|
+
for name, value in inspect.getmembers(protocol):
|
|
441
|
+
if inspect.isfunction(value) and hasattr(value, "__webmethods__"):
|
|
442
|
+
has_alpha_api = False
|
|
443
|
+
for webmethod in value.__webmethods__:
|
|
444
|
+
if webmethod.level == LLAMA_STACK_API_V1ALPHA:
|
|
445
|
+
has_alpha_api = True
|
|
446
|
+
break
|
|
447
|
+
# if this API has multiple webmethods, and one of them is an alpha API, this API should be skipped when checking for missing or not callable routes
|
|
448
|
+
if has_alpha_api:
|
|
449
|
+
continue
|
|
450
|
+
if not hasattr(obj, name):
|
|
451
|
+
missing_methods.append((name, "missing"))
|
|
452
|
+
elif not callable(getattr(obj, name)):
|
|
453
|
+
missing_methods.append((name, "not_callable"))
|
|
454
|
+
else:
|
|
455
|
+
# Check if the method signatures are compatible
|
|
456
|
+
obj_method = getattr(obj, name)
|
|
457
|
+
proto_sig = inspect.signature(value)
|
|
458
|
+
obj_sig = inspect.signature(obj_method)
|
|
459
|
+
|
|
460
|
+
proto_params = set(proto_sig.parameters)
|
|
461
|
+
proto_params.discard("self")
|
|
462
|
+
obj_params = set(obj_sig.parameters)
|
|
463
|
+
obj_params.discard("self")
|
|
464
|
+
if not (proto_params <= obj_params):
|
|
465
|
+
logger.error(f"Method {name} incompatible proto: {proto_params} vs. obj: {obj_params}")
|
|
466
|
+
missing_methods.append((name, "signature_mismatch"))
|
|
467
|
+
else:
|
|
468
|
+
# Check if the method has a concrete implementation (not just a protocol stub)
|
|
469
|
+
# Find all classes in MRO that define this method
|
|
470
|
+
method_owners = [cls for cls in mro if name in cls.__dict__]
|
|
471
|
+
|
|
472
|
+
# Allow methods from mixins/parents, only reject if ONLY the protocol defines it
|
|
473
|
+
if len(method_owners) == 1 and method_owners[0].__name__ == protocol.__name__:
|
|
474
|
+
# Only reject if the method is ONLY defined in the protocol itself (abstract stub)
|
|
475
|
+
missing_methods.append((name, "not_actually_implemented"))
|
|
476
|
+
|
|
477
|
+
if missing_methods:
|
|
478
|
+
raise ValueError(
|
|
479
|
+
f"Provider `{obj.__provider_id__} ({obj.__provider_spec__.api})` does not implement the following methods:\n{missing_methods}"
|
|
480
|
+
)
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
async def resolve_remote_stack_impls(
|
|
484
|
+
config: RemoteProviderConfig,
|
|
485
|
+
apis: list[str],
|
|
486
|
+
) -> dict[Api, Any]:
|
|
487
|
+
protocols = api_protocol_map()
|
|
488
|
+
additional_protocols = additional_protocols_map()
|
|
489
|
+
|
|
490
|
+
impls = {}
|
|
491
|
+
for api_str in apis:
|
|
492
|
+
api = Api(api_str)
|
|
493
|
+
impls[api] = await get_client_impl(
|
|
494
|
+
protocols[api],
|
|
495
|
+
config,
|
|
496
|
+
{},
|
|
497
|
+
)
|
|
498
|
+
if api in additional_protocols:
|
|
499
|
+
_, additional_protocol, additional_api = additional_protocols[api]
|
|
500
|
+
impls[additional_api] = await get_client_impl(
|
|
501
|
+
additional_protocol,
|
|
502
|
+
config,
|
|
503
|
+
{},
|
|
504
|
+
)
|
|
505
|
+
|
|
506
|
+
return impls
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from llama_stack.core.datatypes import (
|
|
10
|
+
AccessRule,
|
|
11
|
+
RoutedProtocol,
|
|
12
|
+
)
|
|
13
|
+
from llama_stack.core.stack import StackRunConfig
|
|
14
|
+
from llama_stack.core.store import DistributionRegistry
|
|
15
|
+
from llama_stack.providers.datatypes import Api, RoutingTable
|
|
16
|
+
from llama_stack.providers.utils.inference.inference_store import InferenceStore
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
async def get_routing_table_impl(
|
|
20
|
+
api: Api,
|
|
21
|
+
impls_by_provider_id: dict[str, RoutedProtocol],
|
|
22
|
+
_deps,
|
|
23
|
+
dist_registry: DistributionRegistry,
|
|
24
|
+
policy: list[AccessRule],
|
|
25
|
+
) -> Any:
|
|
26
|
+
from ..routing_tables.benchmarks import BenchmarksRoutingTable
|
|
27
|
+
from ..routing_tables.datasets import DatasetsRoutingTable
|
|
28
|
+
from ..routing_tables.models import ModelsRoutingTable
|
|
29
|
+
from ..routing_tables.scoring_functions import ScoringFunctionsRoutingTable
|
|
30
|
+
from ..routing_tables.shields import ShieldsRoutingTable
|
|
31
|
+
from ..routing_tables.toolgroups import ToolGroupsRoutingTable
|
|
32
|
+
from ..routing_tables.vector_stores import VectorStoresRoutingTable
|
|
33
|
+
|
|
34
|
+
api_to_tables = {
|
|
35
|
+
"models": ModelsRoutingTable,
|
|
36
|
+
"shields": ShieldsRoutingTable,
|
|
37
|
+
"datasets": DatasetsRoutingTable,
|
|
38
|
+
"scoring_functions": ScoringFunctionsRoutingTable,
|
|
39
|
+
"benchmarks": BenchmarksRoutingTable,
|
|
40
|
+
"tool_groups": ToolGroupsRoutingTable,
|
|
41
|
+
"vector_stores": VectorStoresRoutingTable,
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
if api.value not in api_to_tables:
|
|
45
|
+
raise ValueError(f"API {api.value} not found in router map")
|
|
46
|
+
|
|
47
|
+
impl = api_to_tables[api.value](impls_by_provider_id, dist_registry, policy)
|
|
48
|
+
await impl.initialize()
|
|
49
|
+
return impl
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
async def get_auto_router_impl(
|
|
53
|
+
api: Api, routing_table: RoutingTable, deps: dict[str, Any], run_config: StackRunConfig, policy: list[AccessRule]
|
|
54
|
+
) -> Any:
|
|
55
|
+
from .datasets import DatasetIORouter
|
|
56
|
+
from .eval_scoring import EvalRouter, ScoringRouter
|
|
57
|
+
from .inference import InferenceRouter
|
|
58
|
+
from .safety import SafetyRouter
|
|
59
|
+
from .tool_runtime import ToolRuntimeRouter
|
|
60
|
+
from .vector_io import VectorIORouter
|
|
61
|
+
|
|
62
|
+
api_to_routers = {
|
|
63
|
+
"vector_io": VectorIORouter,
|
|
64
|
+
"inference": InferenceRouter,
|
|
65
|
+
"safety": SafetyRouter,
|
|
66
|
+
"datasetio": DatasetIORouter,
|
|
67
|
+
"scoring": ScoringRouter,
|
|
68
|
+
"eval": EvalRouter,
|
|
69
|
+
"tool_runtime": ToolRuntimeRouter,
|
|
70
|
+
}
|
|
71
|
+
if api.value not in api_to_routers:
|
|
72
|
+
raise ValueError(f"API {api.value} not found in router map")
|
|
73
|
+
|
|
74
|
+
api_to_dep_impl = {}
|
|
75
|
+
if run_config.telemetry.enabled:
|
|
76
|
+
api_to_deps = {
|
|
77
|
+
"inference": {"telemetry": Api.telemetry},
|
|
78
|
+
}
|
|
79
|
+
for dep_name, dep_api in api_to_deps.get(api.value, {}).items():
|
|
80
|
+
if dep_api in deps:
|
|
81
|
+
api_to_dep_impl[dep_name] = deps[dep_api]
|
|
82
|
+
|
|
83
|
+
# TODO: move pass configs to routers instead
|
|
84
|
+
if api == Api.inference:
|
|
85
|
+
inference_ref = run_config.storage.stores.inference
|
|
86
|
+
if not inference_ref:
|
|
87
|
+
raise ValueError("storage.stores.inference must be configured in run config")
|
|
88
|
+
|
|
89
|
+
inference_store = InferenceStore(
|
|
90
|
+
reference=inference_ref,
|
|
91
|
+
policy=policy,
|
|
92
|
+
)
|
|
93
|
+
await inference_store.initialize()
|
|
94
|
+
api_to_dep_impl["store"] = inference_store
|
|
95
|
+
|
|
96
|
+
elif api == Api.vector_io:
|
|
97
|
+
api_to_dep_impl["vector_stores_config"] = run_config.vector_stores
|
|
98
|
+
|
|
99
|
+
impl = api_to_routers[api.value](routing_table, **api_to_dep_impl)
|
|
100
|
+
await impl.initialize()
|
|
101
|
+
return impl
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from llama_stack.apis.common.responses import PaginatedResponse
|
|
10
|
+
from llama_stack.apis.datasetio import DatasetIO
|
|
11
|
+
from llama_stack.apis.datasets import DatasetPurpose, DataSource
|
|
12
|
+
from llama_stack.log import get_logger
|
|
13
|
+
from llama_stack.providers.datatypes import RoutingTable
|
|
14
|
+
|
|
15
|
+
logger = get_logger(name=__name__, category="core::routers")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class DatasetIORouter(DatasetIO):
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
routing_table: RoutingTable,
|
|
22
|
+
) -> None:
|
|
23
|
+
logger.debug("Initializing DatasetIORouter")
|
|
24
|
+
self.routing_table = routing_table
|
|
25
|
+
|
|
26
|
+
async def initialize(self) -> None:
|
|
27
|
+
logger.debug("DatasetIORouter.initialize")
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
async def shutdown(self) -> None:
|
|
31
|
+
logger.debug("DatasetIORouter.shutdown")
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
async def register_dataset(
|
|
35
|
+
self,
|
|
36
|
+
purpose: DatasetPurpose,
|
|
37
|
+
source: DataSource,
|
|
38
|
+
metadata: dict[str, Any] | None = None,
|
|
39
|
+
dataset_id: str | None = None,
|
|
40
|
+
) -> None:
|
|
41
|
+
logger.debug(
|
|
42
|
+
f"DatasetIORouter.register_dataset: {purpose=} {source=} {metadata=} {dataset_id=}",
|
|
43
|
+
)
|
|
44
|
+
await self.routing_table.register_dataset(
|
|
45
|
+
purpose=purpose,
|
|
46
|
+
source=source,
|
|
47
|
+
metadata=metadata,
|
|
48
|
+
dataset_id=dataset_id,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
async def iterrows(
|
|
52
|
+
self,
|
|
53
|
+
dataset_id: str,
|
|
54
|
+
start_index: int | None = None,
|
|
55
|
+
limit: int | None = None,
|
|
56
|
+
) -> PaginatedResponse:
|
|
57
|
+
logger.debug(
|
|
58
|
+
f"DatasetIORouter.iterrows: {dataset_id}, {start_index=} {limit=}",
|
|
59
|
+
)
|
|
60
|
+
provider = await self.routing_table.get_provider_impl(dataset_id)
|
|
61
|
+
return await provider.iterrows(
|
|
62
|
+
dataset_id=dataset_id,
|
|
63
|
+
start_index=start_index,
|
|
64
|
+
limit=limit,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None:
|
|
68
|
+
logger.debug(f"DatasetIORouter.append_rows: {dataset_id}, {len(rows)} rows")
|
|
69
|
+
provider = await self.routing_table.get_provider_impl(dataset_id)
|
|
70
|
+
return await provider.append_rows(
|
|
71
|
+
dataset_id=dataset_id,
|
|
72
|
+
rows=rows,
|
|
73
|
+
)
|