llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +5 -0
- llama_stack/apis/agents/__init__.py +1 -1
- llama_stack/apis/agents/agents.py +700 -281
- llama_stack/apis/agents/openai_responses.py +1311 -0
- llama_stack/{providers/adapters/memory/sample/config.py → apis/batches/__init__.py} +2 -5
- llama_stack/apis/batches/batches.py +100 -0
- llama_stack/apis/benchmarks/__init__.py +7 -0
- llama_stack/apis/benchmarks/benchmarks.py +108 -0
- llama_stack/apis/common/content_types.py +143 -0
- llama_stack/apis/common/errors.py +103 -0
- llama_stack/apis/common/job_types.py +38 -0
- llama_stack/apis/common/responses.py +36 -0
- llama_stack/apis/common/training_types.py +36 -5
- llama_stack/apis/common/type_system.py +158 -0
- llama_stack/apis/conversations/__init__.py +31 -0
- llama_stack/apis/conversations/conversations.py +286 -0
- llama_stack/apis/datasetio/__init__.py +7 -0
- llama_stack/apis/datasetio/datasetio.py +59 -0
- llama_stack/apis/datasets/__init__.py +7 -0
- llama_stack/apis/datasets/datasets.py +251 -0
- llama_stack/apis/datatypes.py +160 -0
- llama_stack/apis/eval/__init__.py +7 -0
- llama_stack/apis/eval/eval.py +169 -0
- llama_stack/apis/files/__init__.py +7 -0
- llama_stack/apis/files/files.py +199 -0
- llama_stack/apis/inference/__init__.py +1 -1
- llama_stack/apis/inference/inference.py +1169 -113
- llama_stack/apis/inspect/__init__.py +1 -1
- llama_stack/apis/inspect/inspect.py +69 -16
- llama_stack/apis/models/__init__.py +1 -1
- llama_stack/apis/models/models.py +148 -21
- llama_stack/apis/post_training/__init__.py +1 -1
- llama_stack/apis/post_training/post_training.py +265 -120
- llama_stack/{providers/adapters/agents/sample/config.py → apis/prompts/__init__.py} +2 -5
- llama_stack/apis/prompts/prompts.py +204 -0
- llama_stack/apis/providers/__init__.py +7 -0
- llama_stack/apis/providers/providers.py +69 -0
- llama_stack/apis/resource.py +37 -0
- llama_stack/apis/safety/__init__.py +1 -1
- llama_stack/apis/safety/safety.py +95 -12
- llama_stack/apis/scoring/__init__.py +7 -0
- llama_stack/apis/scoring/scoring.py +93 -0
- llama_stack/apis/scoring_functions/__init__.py +7 -0
- llama_stack/apis/scoring_functions/scoring_functions.py +208 -0
- llama_stack/apis/shields/__init__.py +1 -1
- llama_stack/apis/shields/shields.py +76 -33
- llama_stack/apis/synthetic_data_generation/__init__.py +1 -1
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +40 -17
- llama_stack/apis/telemetry/__init__.py +1 -1
- llama_stack/apis/telemetry/telemetry.py +322 -31
- llama_stack/apis/{dataset → tools}/__init__.py +2 -1
- llama_stack/apis/tools/rag_tool.py +218 -0
- llama_stack/apis/tools/tools.py +221 -0
- llama_stack/apis/vector_io/__init__.py +7 -0
- llama_stack/apis/vector_io/vector_io.py +960 -0
- llama_stack/apis/vector_stores/__init__.py +7 -0
- llama_stack/apis/vector_stores/vector_stores.py +51 -0
- llama_stack/apis/version.py +9 -0
- llama_stack/cli/llama.py +13 -5
- llama_stack/cli/stack/_list_deps.py +182 -0
- llama_stack/cli/stack/list_apis.py +1 -1
- llama_stack/cli/stack/list_deps.py +55 -0
- llama_stack/cli/stack/list_providers.py +24 -10
- llama_stack/cli/stack/list_stacks.py +56 -0
- llama_stack/cli/stack/remove.py +115 -0
- llama_stack/cli/stack/run.py +169 -56
- llama_stack/cli/stack/stack.py +18 -4
- llama_stack/cli/stack/utils.py +151 -0
- llama_stack/cli/table.py +23 -61
- llama_stack/cli/utils.py +29 -0
- llama_stack/core/access_control/access_control.py +131 -0
- llama_stack/core/access_control/conditions.py +129 -0
- llama_stack/core/access_control/datatypes.py +107 -0
- llama_stack/core/build.py +164 -0
- llama_stack/core/client.py +205 -0
- llama_stack/core/common.sh +37 -0
- llama_stack/{distribution → core}/configure.py +74 -55
- llama_stack/core/conversations/conversations.py +309 -0
- llama_stack/core/datatypes.py +625 -0
- llama_stack/core/distribution.py +276 -0
- llama_stack/core/external.py +54 -0
- llama_stack/core/id_generation.py +42 -0
- llama_stack/core/inspect.py +86 -0
- llama_stack/core/library_client.py +539 -0
- llama_stack/core/prompts/prompts.py +234 -0
- llama_stack/core/providers.py +137 -0
- llama_stack/core/request_headers.py +115 -0
- llama_stack/core/resolver.py +506 -0
- llama_stack/core/routers/__init__.py +101 -0
- llama_stack/core/routers/datasets.py +73 -0
- llama_stack/core/routers/eval_scoring.py +155 -0
- llama_stack/core/routers/inference.py +645 -0
- llama_stack/core/routers/safety.py +85 -0
- llama_stack/core/routers/tool_runtime.py +91 -0
- llama_stack/core/routers/vector_io.py +442 -0
- llama_stack/core/routing_tables/benchmarks.py +62 -0
- llama_stack/core/routing_tables/common.py +254 -0
- llama_stack/core/routing_tables/datasets.py +91 -0
- llama_stack/core/routing_tables/models.py +163 -0
- llama_stack/core/routing_tables/scoring_functions.py +66 -0
- llama_stack/core/routing_tables/shields.py +61 -0
- llama_stack/core/routing_tables/toolgroups.py +129 -0
- llama_stack/core/routing_tables/vector_stores.py +292 -0
- llama_stack/core/server/auth.py +187 -0
- llama_stack/core/server/auth_providers.py +494 -0
- llama_stack/core/server/quota.py +110 -0
- llama_stack/core/server/routes.py +141 -0
- llama_stack/core/server/server.py +542 -0
- llama_stack/core/server/tracing.py +80 -0
- llama_stack/core/stack.py +546 -0
- llama_stack/core/start_stack.sh +117 -0
- llama_stack/core/storage/datatypes.py +283 -0
- llama_stack/{cli/model → core/store}/__init__.py +1 -1
- llama_stack/core/store/registry.py +199 -0
- llama_stack/core/testing_context.py +49 -0
- llama_stack/core/ui/app.py +55 -0
- llama_stack/core/ui/modules/api.py +32 -0
- llama_stack/core/ui/modules/utils.py +42 -0
- llama_stack/core/ui/page/distribution/datasets.py +18 -0
- llama_stack/core/ui/page/distribution/eval_tasks.py +20 -0
- llama_stack/core/ui/page/distribution/models.py +18 -0
- llama_stack/core/ui/page/distribution/providers.py +27 -0
- llama_stack/core/ui/page/distribution/resources.py +48 -0
- llama_stack/core/ui/page/distribution/scoring_functions.py +18 -0
- llama_stack/core/ui/page/distribution/shields.py +19 -0
- llama_stack/core/ui/page/evaluations/app_eval.py +143 -0
- llama_stack/core/ui/page/evaluations/native_eval.py +253 -0
- llama_stack/core/ui/page/playground/chat.py +130 -0
- llama_stack/core/ui/page/playground/tools.py +352 -0
- llama_stack/core/utils/config.py +30 -0
- llama_stack/{distribution → core}/utils/config_dirs.py +3 -6
- llama_stack/core/utils/config_resolution.py +125 -0
- llama_stack/core/utils/context.py +84 -0
- llama_stack/core/utils/exec.py +96 -0
- llama_stack/{providers/impls/meta_reference/codeshield/config.py → core/utils/image_types.py} +4 -3
- llama_stack/{distribution → core}/utils/model_utils.py +2 -2
- llama_stack/{distribution → core}/utils/prompt_for_config.py +30 -63
- llama_stack/{apis/batch_inference → distributions/dell}/__init__.py +1 -1
- llama_stack/distributions/dell/build.yaml +33 -0
- llama_stack/distributions/dell/dell.py +158 -0
- llama_stack/distributions/dell/run-with-safety.yaml +141 -0
- llama_stack/distributions/dell/run.yaml +132 -0
- llama_stack/distributions/meta-reference-gpu/__init__.py +7 -0
- llama_stack/distributions/meta-reference-gpu/build.yaml +32 -0
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +163 -0
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +154 -0
- llama_stack/distributions/meta-reference-gpu/run.yaml +139 -0
- llama_stack/{apis/evals → distributions/nvidia}/__init__.py +1 -1
- llama_stack/distributions/nvidia/build.yaml +29 -0
- llama_stack/distributions/nvidia/nvidia.py +154 -0
- llama_stack/distributions/nvidia/run-with-safety.yaml +137 -0
- llama_stack/distributions/nvidia/run.yaml +116 -0
- llama_stack/distributions/open-benchmark/__init__.py +7 -0
- llama_stack/distributions/open-benchmark/build.yaml +36 -0
- llama_stack/distributions/open-benchmark/open_benchmark.py +303 -0
- llama_stack/distributions/open-benchmark/run.yaml +252 -0
- llama_stack/distributions/postgres-demo/__init__.py +7 -0
- llama_stack/distributions/postgres-demo/build.yaml +23 -0
- llama_stack/distributions/postgres-demo/postgres_demo.py +125 -0
- llama_stack/distributions/postgres-demo/run.yaml +115 -0
- llama_stack/{apis/memory → distributions/starter}/__init__.py +1 -1
- llama_stack/distributions/starter/build.yaml +61 -0
- llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
- llama_stack/distributions/starter/run.yaml +276 -0
- llama_stack/distributions/starter/starter.py +345 -0
- llama_stack/distributions/starter-gpu/__init__.py +7 -0
- llama_stack/distributions/starter-gpu/build.yaml +61 -0
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
- llama_stack/distributions/starter-gpu/run.yaml +279 -0
- llama_stack/distributions/starter-gpu/starter_gpu.py +20 -0
- llama_stack/distributions/template.py +456 -0
- llama_stack/distributions/watsonx/__init__.py +7 -0
- llama_stack/distributions/watsonx/build.yaml +33 -0
- llama_stack/distributions/watsonx/run.yaml +133 -0
- llama_stack/distributions/watsonx/watsonx.py +95 -0
- llama_stack/env.py +24 -0
- llama_stack/log.py +314 -0
- llama_stack/models/llama/checkpoint.py +164 -0
- llama_stack/models/llama/datatypes.py +164 -0
- llama_stack/models/llama/hadamard_utils.py +86 -0
- llama_stack/models/llama/llama3/args.py +74 -0
- llama_stack/models/llama/llama3/chat_format.py +286 -0
- llama_stack/models/llama/llama3/generation.py +376 -0
- llama_stack/models/llama/llama3/interface.py +255 -0
- llama_stack/models/llama/llama3/model.py +304 -0
- llama_stack/models/llama/llama3/multimodal/__init__.py +12 -0
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +180 -0
- llama_stack/models/llama/llama3/multimodal/image_transform.py +409 -0
- llama_stack/models/llama/llama3/multimodal/model.py +1430 -0
- llama_stack/models/llama/llama3/multimodal/utils.py +26 -0
- llama_stack/models/llama/llama3/prompt_templates/__init__.py +22 -0
- llama_stack/models/llama/llama3/prompt_templates/base.py +39 -0
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +319 -0
- llama_stack/models/llama/llama3/prompt_templates/tool_response.py +62 -0
- llama_stack/models/llama/llama3/quantization/loader.py +316 -0
- llama_stack/models/llama/llama3/template_data.py +116 -0
- llama_stack/models/llama/llama3/tokenizer.model +128000 -0
- llama_stack/models/llama/llama3/tokenizer.py +198 -0
- llama_stack/models/llama/llama3/tool_utils.py +266 -0
- llama_stack/models/llama/llama3_1/__init__.py +12 -0
- llama_stack/models/llama/llama3_1/prompt_format.md +358 -0
- llama_stack/models/llama/llama3_1/prompts.py +258 -0
- llama_stack/models/llama/llama3_2/prompts_text.py +229 -0
- llama_stack/models/llama/llama3_2/prompts_vision.py +126 -0
- llama_stack/models/llama/llama3_2/text_prompt_format.md +286 -0
- llama_stack/models/llama/llama3_2/vision_prompt_format.md +141 -0
- llama_stack/models/llama/llama3_3/prompts.py +259 -0
- llama_stack/models/llama/llama4/args.py +107 -0
- llama_stack/models/llama/llama4/chat_format.py +317 -0
- llama_stack/models/llama/llama4/datatypes.py +56 -0
- llama_stack/models/llama/llama4/ffn.py +58 -0
- llama_stack/models/llama/llama4/generation.py +313 -0
- llama_stack/models/llama/llama4/model.py +437 -0
- llama_stack/models/llama/llama4/moe.py +214 -0
- llama_stack/models/llama/llama4/preprocess.py +435 -0
- llama_stack/models/llama/llama4/prompt_format.md +304 -0
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +136 -0
- llama_stack/models/llama/llama4/prompts.py +279 -0
- llama_stack/models/llama/llama4/quantization/__init__.py +5 -0
- llama_stack/models/llama/llama4/quantization/loader.py +226 -0
- llama_stack/models/llama/llama4/tokenizer.model +200000 -0
- llama_stack/models/llama/llama4/tokenizer.py +263 -0
- llama_stack/models/llama/llama4/vision/__init__.py +5 -0
- llama_stack/models/llama/llama4/vision/embedding.py +210 -0
- llama_stack/models/llama/llama4/vision/encoder.py +412 -0
- llama_stack/models/llama/prompt_format.py +191 -0
- llama_stack/models/llama/quantize_impls.py +316 -0
- llama_stack/models/llama/sku_list.py +1029 -0
- llama_stack/models/llama/sku_types.py +233 -0
- llama_stack/models/llama/tokenizer_utils.py +40 -0
- llama_stack/providers/datatypes.py +136 -107
- llama_stack/providers/inline/__init__.py +5 -0
- llama_stack/providers/inline/agents/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/agents/meta_reference}/__init__.py +12 -5
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +1024 -0
- llama_stack/providers/inline/agents/meta_reference/agents.py +383 -0
- llama_stack/providers/inline/agents/meta_reference/config.py +37 -0
- llama_stack/providers/inline/agents/meta_reference/persistence.py +228 -0
- llama_stack/providers/inline/agents/meta_reference/responses/__init__.py +5 -0
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +423 -0
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +1226 -0
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +449 -0
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +194 -0
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +365 -0
- llama_stack/providers/inline/agents/meta_reference/safety.py +52 -0
- llama_stack/providers/inline/batches/__init__.py +5 -0
- llama_stack/providers/inline/batches/reference/__init__.py +36 -0
- llama_stack/providers/inline/batches/reference/batches.py +679 -0
- llama_stack/providers/inline/batches/reference/config.py +40 -0
- llama_stack/providers/inline/datasetio/__init__.py +5 -0
- llama_stack/providers/inline/datasetio/localfs/__init__.py +20 -0
- llama_stack/providers/inline/datasetio/localfs/config.py +23 -0
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +113 -0
- llama_stack/providers/inline/eval/__init__.py +5 -0
- llama_stack/providers/inline/eval/meta_reference/__init__.py +28 -0
- llama_stack/providers/inline/eval/meta_reference/config.py +23 -0
- llama_stack/providers/inline/eval/meta_reference/eval.py +259 -0
- llama_stack/providers/inline/files/localfs/__init__.py +20 -0
- llama_stack/providers/inline/files/localfs/config.py +31 -0
- llama_stack/providers/inline/files/localfs/files.py +219 -0
- llama_stack/providers/inline/inference/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/__init__.py +4 -4
- llama_stack/providers/inline/inference/meta_reference/common.py +24 -0
- llama_stack/providers/inline/inference/meta_reference/config.py +68 -0
- llama_stack/providers/inline/inference/meta_reference/generators.py +211 -0
- llama_stack/providers/inline/inference/meta_reference/inference.py +158 -0
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +96 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/parallel_utils.py +56 -73
- llama_stack/providers/inline/inference/sentence_transformers/__init__.py +22 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/inference/sentence_transformers}/config.py +6 -4
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +83 -0
- llama_stack/providers/inline/post_training/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/utils.py +35 -0
- llama_stack/providers/inline/post_training/common/validator.py +36 -0
- llama_stack/providers/inline/post_training/huggingface/__init__.py +27 -0
- llama_stack/providers/inline/post_training/huggingface/config.py +83 -0
- llama_stack/providers/inline/post_training/huggingface/post_training.py +208 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +519 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +485 -0
- llama_stack/providers/inline/post_training/huggingface/utils.py +269 -0
- llama_stack/providers/inline/post_training/torchtune/__init__.py +27 -0
- llama_stack/providers/inline/post_training/torchtune/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +240 -0
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +99 -0
- llama_stack/providers/inline/post_training/torchtune/config.py +20 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +57 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/sft.py +78 -0
- llama_stack/providers/inline/post_training/torchtune/post_training.py +178 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +588 -0
- llama_stack/providers/inline/safety/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/codeshield → inline/safety/code_scanner}/__init__.py +4 -2
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +128 -0
- llama_stack/providers/{impls/meta_reference/memory → inline/safety/code_scanner}/config.py +5 -3
- llama_stack/providers/inline/safety/llama_guard/__init__.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/config.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +489 -0
- llama_stack/providers/{adapters/memory/sample → inline/safety/prompt_guard}/__init__.py +4 -4
- llama_stack/providers/inline/safety/prompt_guard/config.py +32 -0
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +131 -0
- llama_stack/providers/inline/scoring/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/__init__.py +25 -0
- llama_stack/providers/{adapters/memory/weaviate → inline/scoring/basic}/config.py +5 -7
- llama_stack/providers/inline/scoring/basic/scoring.py +126 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +240 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +41 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +23 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +27 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +71 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +80 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +66 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +58 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +38 -0
- llama_stack/providers/inline/scoring/basic/utils/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py +3319 -0
- llama_stack/providers/inline/scoring/basic/utils/math_utils.py +330 -0
- llama_stack/providers/inline/scoring/braintrust/__init__.py +27 -0
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +230 -0
- llama_stack/providers/inline/scoring/braintrust/config.py +21 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +23 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +24 -0
- llama_stack/providers/inline/scoring/llm_as_judge/__init__.py +21 -0
- llama_stack/providers/inline/scoring/llm_as_judge/config.py +14 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +113 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +96 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +20 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +81 -0
- llama_stack/providers/inline/telemetry/__init__.py +5 -0
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +21 -0
- llama_stack/providers/inline/telemetry/meta_reference/config.py +47 -0
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +252 -0
- llama_stack/providers/inline/tool_runtime/__init__.py +5 -0
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +19 -0
- llama_stack/providers/{impls/meta_reference/telemetry → inline/tool_runtime/rag}/config.py +5 -3
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +77 -0
- llama_stack/providers/inline/tool_runtime/rag/memory.py +332 -0
- llama_stack/providers/inline/vector_io/__init__.py +5 -0
- llama_stack/providers/inline/vector_io/chroma/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/chroma/config.py +30 -0
- llama_stack/providers/inline/vector_io/faiss/__init__.py +21 -0
- llama_stack/providers/inline/vector_io/faiss/config.py +26 -0
- llama_stack/providers/inline/vector_io/faiss/faiss.py +293 -0
- llama_stack/providers/inline/vector_io/milvus/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/milvus/config.py +29 -0
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/qdrant/config.py +29 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/config.py +26 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +483 -0
- llama_stack/providers/registry/agents.py +16 -18
- llama_stack/providers/registry/batches.py +26 -0
- llama_stack/providers/registry/datasetio.py +49 -0
- llama_stack/providers/registry/eval.py +46 -0
- llama_stack/providers/registry/files.py +31 -0
- llama_stack/providers/registry/inference.py +273 -118
- llama_stack/providers/registry/post_training.py +69 -0
- llama_stack/providers/registry/safety.py +46 -41
- llama_stack/providers/registry/scoring.py +51 -0
- llama_stack/providers/registry/tool_runtime.py +87 -0
- llama_stack/providers/registry/vector_io.py +828 -0
- llama_stack/providers/remote/__init__.py +5 -0
- llama_stack/providers/remote/agents/__init__.py +5 -0
- llama_stack/providers/remote/datasetio/__init__.py +5 -0
- llama_stack/providers/{adapters/memory/chroma → remote/datasetio/huggingface}/__init__.py +7 -4
- llama_stack/providers/remote/datasetio/huggingface/config.py +23 -0
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +99 -0
- llama_stack/providers/remote/datasetio/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/datasetio/nvidia/config.py +61 -0
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +116 -0
- llama_stack/providers/remote/eval/__init__.py +5 -0
- llama_stack/providers/remote/eval/nvidia/__init__.py +31 -0
- llama_stack/providers/remote/eval/nvidia/config.py +29 -0
- llama_stack/providers/remote/eval/nvidia/eval.py +162 -0
- llama_stack/providers/remote/files/s3/__init__.py +19 -0
- llama_stack/providers/remote/files/s3/config.py +42 -0
- llama_stack/providers/remote/files/s3/files.py +313 -0
- llama_stack/providers/remote/inference/__init__.py +5 -0
- llama_stack/providers/{adapters/safety/sample → remote/inference/anthropic}/__init__.py +4 -6
- llama_stack/providers/remote/inference/anthropic/anthropic.py +36 -0
- llama_stack/providers/remote/inference/anthropic/config.py +28 -0
- llama_stack/providers/{impls/meta_reference/telemetry → remote/inference/azure}/__init__.py +4 -4
- llama_stack/providers/remote/inference/azure/azure.py +25 -0
- llama_stack/providers/remote/inference/azure/config.py +61 -0
- llama_stack/providers/{adapters → remote}/inference/bedrock/__init__.py +18 -17
- llama_stack/providers/remote/inference/bedrock/bedrock.py +142 -0
- llama_stack/providers/{adapters/inference/sample → remote/inference/bedrock}/config.py +3 -4
- llama_stack/providers/remote/inference/bedrock/models.py +29 -0
- llama_stack/providers/remote/inference/cerebras/__init__.py +19 -0
- llama_stack/providers/remote/inference/cerebras/cerebras.py +28 -0
- llama_stack/providers/remote/inference/cerebras/config.py +30 -0
- llama_stack/providers/{adapters → remote}/inference/databricks/__init__.py +4 -5
- llama_stack/providers/remote/inference/databricks/config.py +37 -0
- llama_stack/providers/remote/inference/databricks/databricks.py +44 -0
- llama_stack/providers/{adapters → remote}/inference/fireworks/__init__.py +8 -4
- llama_stack/providers/remote/inference/fireworks/config.py +27 -0
- llama_stack/providers/remote/inference/fireworks/fireworks.py +27 -0
- llama_stack/providers/{adapters/memory/pgvector → remote/inference/gemini}/__init__.py +4 -4
- llama_stack/providers/remote/inference/gemini/config.py +28 -0
- llama_stack/providers/remote/inference/gemini/gemini.py +82 -0
- llama_stack/providers/remote/inference/groq/__init__.py +15 -0
- llama_stack/providers/remote/inference/groq/config.py +34 -0
- llama_stack/providers/remote/inference/groq/groq.py +18 -0
- llama_stack/providers/remote/inference/llama_openai_compat/__init__.py +15 -0
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +34 -0
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +46 -0
- llama_stack/providers/remote/inference/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/inference/nvidia/config.py +64 -0
- llama_stack/providers/remote/inference/nvidia/nvidia.py +61 -0
- llama_stack/providers/{adapters/safety/sample/config.py → remote/inference/nvidia/utils.py} +3 -4
- llama_stack/providers/{impls/vllm → remote/inference/ollama}/__init__.py +4 -6
- llama_stack/providers/remote/inference/ollama/config.py +25 -0
- llama_stack/providers/remote/inference/ollama/ollama.py +102 -0
- llama_stack/providers/{adapters/telemetry/opentelemetry → remote/inference/openai}/__init__.py +4 -4
- llama_stack/providers/remote/inference/openai/config.py +39 -0
- llama_stack/providers/remote/inference/openai/openai.py +38 -0
- llama_stack/providers/remote/inference/passthrough/__init__.py +23 -0
- llama_stack/providers/remote/inference/passthrough/config.py +34 -0
- llama_stack/providers/remote/inference/passthrough/passthrough.py +122 -0
- llama_stack/providers/remote/inference/runpod/__init__.py +16 -0
- llama_stack/providers/remote/inference/runpod/config.py +32 -0
- llama_stack/providers/remote/inference/runpod/runpod.py +42 -0
- llama_stack/providers/remote/inference/sambanova/__init__.py +16 -0
- llama_stack/providers/remote/inference/sambanova/config.py +34 -0
- llama_stack/providers/remote/inference/sambanova/sambanova.py +28 -0
- llama_stack/providers/{adapters → remote}/inference/tgi/__init__.py +3 -4
- llama_stack/providers/remote/inference/tgi/config.py +76 -0
- llama_stack/providers/remote/inference/tgi/tgi.py +85 -0
- llama_stack/providers/{adapters → remote}/inference/together/__init__.py +8 -4
- llama_stack/providers/remote/inference/together/config.py +27 -0
- llama_stack/providers/remote/inference/together/together.py +102 -0
- llama_stack/providers/remote/inference/vertexai/__init__.py +15 -0
- llama_stack/providers/remote/inference/vertexai/config.py +48 -0
- llama_stack/providers/remote/inference/vertexai/vertexai.py +54 -0
- llama_stack/providers/remote/inference/vllm/__init__.py +22 -0
- llama_stack/providers/remote/inference/vllm/config.py +59 -0
- llama_stack/providers/remote/inference/vllm/vllm.py +111 -0
- llama_stack/providers/remote/inference/watsonx/__init__.py +15 -0
- llama_stack/providers/remote/inference/watsonx/config.py +45 -0
- llama_stack/providers/remote/inference/watsonx/watsonx.py +336 -0
- llama_stack/providers/remote/post_training/__init__.py +5 -0
- llama_stack/providers/remote/post_training/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/post_training/nvidia/config.py +113 -0
- llama_stack/providers/remote/post_training/nvidia/models.py +27 -0
- llama_stack/providers/remote/post_training/nvidia/post_training.py +430 -0
- llama_stack/providers/remote/post_training/nvidia/utils.py +63 -0
- llama_stack/providers/remote/safety/__init__.py +5 -0
- llama_stack/providers/remote/safety/bedrock/bedrock.py +111 -0
- llama_stack/providers/remote/safety/bedrock/config.py +14 -0
- llama_stack/providers/{adapters/inference/sample → remote/safety/nvidia}/__init__.py +5 -4
- llama_stack/providers/remote/safety/nvidia/config.py +40 -0
- llama_stack/providers/remote/safety/nvidia/nvidia.py +161 -0
- llama_stack/providers/{adapters/agents/sample → remote/safety/sambanova}/__init__.py +5 -4
- llama_stack/providers/remote/safety/sambanova/config.py +37 -0
- llama_stack/providers/remote/safety/sambanova/sambanova.py +98 -0
- llama_stack/providers/remote/tool_runtime/__init__.py +5 -0
- llama_stack/providers/remote/tool_runtime/bing_search/__init__.py +21 -0
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +112 -0
- llama_stack/providers/remote/tool_runtime/bing_search/config.py +22 -0
- llama_stack/providers/remote/tool_runtime/brave_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +148 -0
- llama_stack/providers/remote/tool_runtime/brave_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py +15 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +20 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +73 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +84 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py +22 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py +21 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +140 -0
- llama_stack/providers/remote/vector_io/__init__.py +5 -0
- llama_stack/providers/remote/vector_io/chroma/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/chroma/chroma.py +215 -0
- llama_stack/providers/remote/vector_io/chroma/config.py +28 -0
- llama_stack/providers/remote/vector_io/milvus/__init__.py +18 -0
- llama_stack/providers/remote/vector_io/milvus/config.py +35 -0
- llama_stack/providers/remote/vector_io/milvus/milvus.py +375 -0
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/pgvector/config.py +47 -0
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +460 -0
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/qdrant/config.py +37 -0
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +265 -0
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/weaviate/config.py +32 -0
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +393 -0
- llama_stack/providers/utils/bedrock/__init__.py +5 -0
- llama_stack/providers/utils/bedrock/client.py +74 -0
- llama_stack/providers/utils/bedrock/config.py +64 -0
- llama_stack/providers/utils/bedrock/refreshable_boto_session.py +112 -0
- llama_stack/providers/utils/common/__init__.py +5 -0
- llama_stack/providers/utils/common/data_schema_validator.py +103 -0
- llama_stack/providers/utils/datasetio/__init__.py +5 -0
- llama_stack/providers/utils/datasetio/url_utils.py +47 -0
- llama_stack/providers/utils/files/__init__.py +5 -0
- llama_stack/providers/utils/files/form_data.py +69 -0
- llama_stack/providers/utils/inference/__init__.py +8 -7
- llama_stack/providers/utils/inference/embedding_mixin.py +101 -0
- llama_stack/providers/utils/inference/inference_store.py +264 -0
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +336 -0
- llama_stack/providers/utils/inference/model_registry.py +173 -23
- llama_stack/providers/utils/inference/openai_compat.py +1261 -49
- llama_stack/providers/utils/inference/openai_mixin.py +506 -0
- llama_stack/providers/utils/inference/prompt_adapter.py +365 -67
- llama_stack/providers/utils/kvstore/api.py +6 -6
- llama_stack/providers/utils/kvstore/config.py +28 -48
- llama_stack/providers/utils/kvstore/kvstore.py +61 -15
- llama_stack/providers/utils/kvstore/mongodb/__init__.py +9 -0
- llama_stack/providers/utils/kvstore/mongodb/mongodb.py +82 -0
- llama_stack/providers/utils/kvstore/postgres/__init__.py +7 -0
- llama_stack/providers/utils/kvstore/postgres/postgres.py +114 -0
- llama_stack/providers/utils/kvstore/redis/redis.py +33 -9
- llama_stack/providers/utils/kvstore/sqlite/config.py +2 -1
- llama_stack/providers/utils/kvstore/sqlite/sqlite.py +123 -22
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +1304 -0
- llama_stack/providers/utils/memory/vector_store.py +220 -82
- llama_stack/providers/utils/pagination.py +43 -0
- llama_stack/providers/utils/responses/__init__.py +5 -0
- llama_stack/providers/utils/responses/responses_store.py +292 -0
- llama_stack/providers/utils/scheduler.py +270 -0
- llama_stack/providers/utils/scoring/__init__.py +5 -0
- llama_stack/providers/utils/scoring/aggregation_utils.py +75 -0
- llama_stack/providers/utils/scoring/base_scoring_fn.py +114 -0
- llama_stack/providers/utils/scoring/basic_scoring_utils.py +26 -0
- llama_stack/providers/utils/sqlstore/__init__.py +5 -0
- llama_stack/providers/utils/sqlstore/api.py +128 -0
- llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +319 -0
- llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +343 -0
- llama_stack/providers/utils/sqlstore/sqlstore.py +70 -0
- llama_stack/providers/utils/telemetry/trace_protocol.py +142 -0
- llama_stack/providers/utils/telemetry/tracing.py +192 -53
- llama_stack/providers/utils/tools/__init__.py +5 -0
- llama_stack/providers/utils/tools/mcp.py +148 -0
- llama_stack/providers/utils/tools/ttl_dict.py +70 -0
- llama_stack/providers/utils/vector_io/__init__.py +5 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +156 -0
- llama_stack/schema_utils.py +118 -0
- llama_stack/strong_typing/__init__.py +19 -0
- llama_stack/strong_typing/auxiliary.py +228 -0
- llama_stack/strong_typing/classdef.py +440 -0
- llama_stack/strong_typing/core.py +46 -0
- llama_stack/strong_typing/deserializer.py +877 -0
- llama_stack/strong_typing/docstring.py +409 -0
- llama_stack/strong_typing/exception.py +23 -0
- llama_stack/strong_typing/inspection.py +1085 -0
- llama_stack/strong_typing/mapping.py +40 -0
- llama_stack/strong_typing/name.py +182 -0
- llama_stack/strong_typing/py.typed +0 -0
- llama_stack/strong_typing/schema.py +792 -0
- llama_stack/strong_typing/serialization.py +97 -0
- llama_stack/strong_typing/serializer.py +500 -0
- llama_stack/strong_typing/slots.py +27 -0
- llama_stack/strong_typing/topological.py +89 -0
- llama_stack/testing/__init__.py +5 -0
- llama_stack/testing/api_recorder.py +956 -0
- llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
- llama_stack-0.3.4.dist-info/METADATA +261 -0
- llama_stack-0.3.4.dist-info/RECORD +625 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/WHEEL +1 -1
- llama_stack/apis/agents/client.py +0 -292
- llama_stack/apis/agents/event_logger.py +0 -184
- llama_stack/apis/batch_inference/batch_inference.py +0 -72
- llama_stack/apis/common/deployment_types.py +0 -31
- llama_stack/apis/dataset/dataset.py +0 -63
- llama_stack/apis/evals/evals.py +0 -122
- llama_stack/apis/inference/client.py +0 -197
- llama_stack/apis/inspect/client.py +0 -82
- llama_stack/apis/memory/client.py +0 -155
- llama_stack/apis/memory/memory.py +0 -65
- llama_stack/apis/memory_banks/__init__.py +0 -7
- llama_stack/apis/memory_banks/client.py +0 -101
- llama_stack/apis/memory_banks/memory_banks.py +0 -78
- llama_stack/apis/models/client.py +0 -83
- llama_stack/apis/reward_scoring/__init__.py +0 -7
- llama_stack/apis/reward_scoring/reward_scoring.py +0 -55
- llama_stack/apis/safety/client.py +0 -105
- llama_stack/apis/shields/client.py +0 -79
- llama_stack/cli/download.py +0 -340
- llama_stack/cli/model/describe.py +0 -82
- llama_stack/cli/model/download.py +0 -24
- llama_stack/cli/model/list.py +0 -62
- llama_stack/cli/model/model.py +0 -34
- llama_stack/cli/model/prompt_format.py +0 -112
- llama_stack/cli/model/safety_models.py +0 -52
- llama_stack/cli/stack/build.py +0 -299
- llama_stack/cli/stack/configure.py +0 -178
- llama_stack/distribution/build.py +0 -123
- llama_stack/distribution/build_conda_env.sh +0 -136
- llama_stack/distribution/build_container.sh +0 -142
- llama_stack/distribution/common.sh +0 -40
- llama_stack/distribution/configure_container.sh +0 -47
- llama_stack/distribution/datatypes.py +0 -139
- llama_stack/distribution/distribution.py +0 -58
- llama_stack/distribution/inspect.py +0 -67
- llama_stack/distribution/request_headers.py +0 -57
- llama_stack/distribution/resolver.py +0 -323
- llama_stack/distribution/routers/__init__.py +0 -48
- llama_stack/distribution/routers/routers.py +0 -158
- llama_stack/distribution/routers/routing_tables.py +0 -173
- llama_stack/distribution/server/endpoints.py +0 -48
- llama_stack/distribution/server/server.py +0 -343
- llama_stack/distribution/start_conda_env.sh +0 -42
- llama_stack/distribution/start_container.sh +0 -64
- llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml +0 -10
- llama_stack/distribution/templates/local-build.yaml +0 -10
- llama_stack/distribution/templates/local-databricks-build.yaml +0 -10
- llama_stack/distribution/templates/local-fireworks-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-endpoint-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-serverless-build.yaml +0 -10
- llama_stack/distribution/templates/local-ollama-build.yaml +0 -10
- llama_stack/distribution/templates/local-tgi-build.yaml +0 -10
- llama_stack/distribution/templates/local-together-build.yaml +0 -10
- llama_stack/distribution/templates/local-vllm-build.yaml +0 -10
- llama_stack/distribution/utils/exec.py +0 -105
- llama_stack/providers/adapters/agents/sample/sample.py +0 -18
- llama_stack/providers/adapters/inference/bedrock/bedrock.py +0 -451
- llama_stack/providers/adapters/inference/bedrock/config.py +0 -55
- llama_stack/providers/adapters/inference/databricks/config.py +0 -21
- llama_stack/providers/adapters/inference/databricks/databricks.py +0 -125
- llama_stack/providers/adapters/inference/fireworks/config.py +0 -20
- llama_stack/providers/adapters/inference/fireworks/fireworks.py +0 -130
- llama_stack/providers/adapters/inference/ollama/__init__.py +0 -19
- llama_stack/providers/adapters/inference/ollama/ollama.py +0 -175
- llama_stack/providers/adapters/inference/sample/sample.py +0 -23
- llama_stack/providers/adapters/inference/tgi/config.py +0 -43
- llama_stack/providers/adapters/inference/tgi/tgi.py +0 -200
- llama_stack/providers/adapters/inference/together/config.py +0 -22
- llama_stack/providers/adapters/inference/together/together.py +0 -143
- llama_stack/providers/adapters/memory/chroma/chroma.py +0 -157
- llama_stack/providers/adapters/memory/pgvector/config.py +0 -17
- llama_stack/providers/adapters/memory/pgvector/pgvector.py +0 -211
- llama_stack/providers/adapters/memory/sample/sample.py +0 -23
- llama_stack/providers/adapters/memory/weaviate/__init__.py +0 -15
- llama_stack/providers/adapters/memory/weaviate/weaviate.py +0 -190
- llama_stack/providers/adapters/safety/bedrock/bedrock.py +0 -113
- llama_stack/providers/adapters/safety/bedrock/config.py +0 -16
- llama_stack/providers/adapters/safety/sample/sample.py +0 -23
- llama_stack/providers/adapters/safety/together/__init__.py +0 -18
- llama_stack/providers/adapters/safety/together/config.py +0 -26
- llama_stack/providers/adapters/safety/together/together.py +0 -101
- llama_stack/providers/adapters/telemetry/opentelemetry/config.py +0 -12
- llama_stack/providers/adapters/telemetry/opentelemetry/opentelemetry.py +0 -201
- llama_stack/providers/adapters/telemetry/sample/__init__.py +0 -17
- llama_stack/providers/adapters/telemetry/sample/config.py +0 -12
- llama_stack/providers/adapters/telemetry/sample/sample.py +0 -18
- llama_stack/providers/impls/meta_reference/agents/agent_instance.py +0 -844
- llama_stack/providers/impls/meta_reference/agents/agents.py +0 -161
- llama_stack/providers/impls/meta_reference/agents/persistence.py +0 -84
- llama_stack/providers/impls/meta_reference/agents/rag/context_retriever.py +0 -74
- llama_stack/providers/impls/meta_reference/agents/safety.py +0 -57
- llama_stack/providers/impls/meta_reference/agents/tests/code_execution.py +0 -93
- llama_stack/providers/impls/meta_reference/agents/tests/test_chat_agent.py +0 -305
- llama_stack/providers/impls/meta_reference/agents/tools/base.py +0 -20
- llama_stack/providers/impls/meta_reference/agents/tools/builtin.py +0 -375
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_env_prefix.py +0 -133
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_execution.py +0 -256
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py +0 -87
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/utils.py +0 -21
- llama_stack/providers/impls/meta_reference/agents/tools/safety.py +0 -43
- llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py +0 -58
- llama_stack/providers/impls/meta_reference/inference/config.py +0 -45
- llama_stack/providers/impls/meta_reference/inference/generation.py +0 -376
- llama_stack/providers/impls/meta_reference/inference/inference.py +0 -280
- llama_stack/providers/impls/meta_reference/inference/model_parallel.py +0 -99
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_impls.py +0 -184
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_txest_disabled.py +0 -76
- llama_stack/providers/impls/meta_reference/inference/quantization/loader.py +0 -97
- llama_stack/providers/impls/meta_reference/inference/quantization/scripts/quantize_checkpoint.py +0 -161
- llama_stack/providers/impls/meta_reference/memory/__init__.py +0 -19
- llama_stack/providers/impls/meta_reference/memory/faiss.py +0 -113
- llama_stack/providers/impls/meta_reference/safety/__init__.py +0 -17
- llama_stack/providers/impls/meta_reference/safety/base.py +0 -57
- llama_stack/providers/impls/meta_reference/safety/config.py +0 -48
- llama_stack/providers/impls/meta_reference/safety/llama_guard.py +0 -268
- llama_stack/providers/impls/meta_reference/safety/prompt_guard.py +0 -145
- llama_stack/providers/impls/meta_reference/safety/safety.py +0 -112
- llama_stack/providers/impls/meta_reference/telemetry/console.py +0 -89
- llama_stack/providers/impls/vllm/config.py +0 -35
- llama_stack/providers/impls/vllm/vllm.py +0 -241
- llama_stack/providers/registry/memory.py +0 -78
- llama_stack/providers/registry/telemetry.py +0 -44
- llama_stack/providers/tests/agents/test_agents.py +0 -210
- llama_stack/providers/tests/inference/test_inference.py +0 -257
- llama_stack/providers/tests/inference/test_prompt_adapter.py +0 -126
- llama_stack/providers/tests/memory/test_memory.py +0 -136
- llama_stack/providers/tests/resolver.py +0 -100
- llama_stack/providers/tests/safety/test_safety.py +0 -77
- llama_stack-0.0.42.dist-info/METADATA +0 -137
- llama_stack-0.0.42.dist-info/RECORD +0 -256
- /llama_stack/{distribution → core}/__init__.py +0 -0
- /llama_stack/{distribution/server → core/access_control}/__init__.py +0 -0
- /llama_stack/{distribution/utils → core/conversations}/__init__.py +0 -0
- /llama_stack/{providers/adapters → core/prompts}/__init__.py +0 -0
- /llama_stack/{providers/adapters/agents → core/routing_tables}/__init__.py +0 -0
- /llama_stack/{providers/adapters/inference → core/server}/__init__.py +0 -0
- /llama_stack/{providers/adapters/memory → core/storage}/__init__.py +0 -0
- /llama_stack/{providers/adapters/safety → core/ui}/__init__.py +0 -0
- /llama_stack/{providers/adapters/telemetry → core/ui/modules}/__init__.py +0 -0
- /llama_stack/{providers/impls → core/ui/page}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference → core/ui/page/distribution}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/rag → core/ui/page/evaluations}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tests → core/ui/page/playground}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools → core/utils}/__init__.py +0 -0
- /llama_stack/{distribution → core}/utils/dynamic.py +0 -0
- /llama_stack/{distribution → core}/utils/serialize.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools/ipython_tool → distributions}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization → models}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization/scripts → models/llama}/__init__.py +0 -0
- /llama_stack/{providers/tests → models/llama/llama3}/__init__.py +0 -0
- /llama_stack/{providers/tests/agents → models/llama/llama3/quantization}/__init__.py +0 -0
- /llama_stack/{providers/tests/inference → models/llama/llama3_2}/__init__.py +0 -0
- /llama_stack/{providers/tests/memory → models/llama/llama3_3}/__init__.py +0 -0
- /llama_stack/{providers/tests/safety → models/llama/llama4}/__init__.py +0 -0
- /llama_stack/{scripts → models/llama/llama4/prompt_templates}/__init__.py +0 -0
- /llama_stack/providers/{adapters → remote}/safety/bedrock/__init__.py +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info/licenses}/LICENSE +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/top_level.txt +0 -0
|
@@ -4,12 +4,15 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
from
|
|
7
|
+
from .config import HuggingfaceDatasetIOConfig
|
|
8
8
|
|
|
9
9
|
|
|
10
|
-
async def get_adapter_impl(
|
|
11
|
-
|
|
10
|
+
async def get_adapter_impl(
|
|
11
|
+
config: HuggingfaceDatasetIOConfig,
|
|
12
|
+
_deps,
|
|
13
|
+
):
|
|
14
|
+
from .huggingface import HuggingfaceDatasetIOImpl
|
|
12
15
|
|
|
13
|
-
impl =
|
|
16
|
+
impl = HuggingfaceDatasetIOImpl(config)
|
|
14
17
|
await impl.initialize()
|
|
15
18
|
return impl
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from pydantic import BaseModel
|
|
9
|
+
|
|
10
|
+
from llama_stack.core.storage.datatypes import KVStoreReference
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class HuggingfaceDatasetIOConfig(BaseModel):
|
|
14
|
+
kvstore: KVStoreReference
|
|
15
|
+
|
|
16
|
+
@classmethod
|
|
17
|
+
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
|
|
18
|
+
return {
|
|
19
|
+
"kvstore": KVStoreReference(
|
|
20
|
+
backend="kv_default",
|
|
21
|
+
namespace="datasetio::huggingface",
|
|
22
|
+
).model_dump(exclude_none=True)
|
|
23
|
+
}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
from typing import Any
|
|
7
|
+
from urllib.parse import parse_qs, urlparse
|
|
8
|
+
|
|
9
|
+
from llama_stack.apis.common.responses import PaginatedResponse
|
|
10
|
+
from llama_stack.apis.datasetio import DatasetIO
|
|
11
|
+
from llama_stack.apis.datasets import Dataset
|
|
12
|
+
from llama_stack.providers.datatypes import DatasetsProtocolPrivate
|
|
13
|
+
from llama_stack.providers.utils.kvstore import kvstore_impl
|
|
14
|
+
from llama_stack.providers.utils.pagination import paginate_records
|
|
15
|
+
|
|
16
|
+
from .config import HuggingfaceDatasetIOConfig
|
|
17
|
+
|
|
18
|
+
DATASETS_PREFIX = "datasets:"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def parse_hf_params(dataset_def: Dataset):
|
|
22
|
+
uri = dataset_def.source.uri
|
|
23
|
+
parsed_uri = urlparse(uri)
|
|
24
|
+
params = parse_qs(parsed_uri.query)
|
|
25
|
+
params = {k: v[0] for k, v in params.items()}
|
|
26
|
+
path = parsed_uri.path.lstrip("/")
|
|
27
|
+
|
|
28
|
+
return path, params
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class HuggingfaceDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate):
|
|
32
|
+
def __init__(self, config: HuggingfaceDatasetIOConfig) -> None:
|
|
33
|
+
self.config = config
|
|
34
|
+
# local registry for keeping track of datasets within the provider
|
|
35
|
+
self.dataset_infos = {}
|
|
36
|
+
self.kvstore = None
|
|
37
|
+
|
|
38
|
+
async def initialize(self) -> None:
|
|
39
|
+
self.kvstore = await kvstore_impl(self.config.kvstore)
|
|
40
|
+
# Load existing datasets from kvstore
|
|
41
|
+
start_key = DATASETS_PREFIX
|
|
42
|
+
end_key = f"{DATASETS_PREFIX}\xff"
|
|
43
|
+
stored_datasets = await self.kvstore.values_in_range(start_key, end_key)
|
|
44
|
+
|
|
45
|
+
for dataset in stored_datasets:
|
|
46
|
+
dataset = Dataset.model_validate_json(dataset)
|
|
47
|
+
self.dataset_infos[dataset.identifier] = dataset
|
|
48
|
+
|
|
49
|
+
async def shutdown(self) -> None: ...
|
|
50
|
+
|
|
51
|
+
async def register_dataset(
|
|
52
|
+
self,
|
|
53
|
+
dataset_def: Dataset,
|
|
54
|
+
) -> None:
|
|
55
|
+
# Store in kvstore
|
|
56
|
+
key = f"{DATASETS_PREFIX}{dataset_def.identifier}"
|
|
57
|
+
await self.kvstore.set(
|
|
58
|
+
key=key,
|
|
59
|
+
value=dataset_def.model_dump_json(),
|
|
60
|
+
)
|
|
61
|
+
self.dataset_infos[dataset_def.identifier] = dataset_def
|
|
62
|
+
|
|
63
|
+
async def unregister_dataset(self, dataset_id: str) -> None:
|
|
64
|
+
key = f"{DATASETS_PREFIX}{dataset_id}"
|
|
65
|
+
await self.kvstore.delete(key=key)
|
|
66
|
+
del self.dataset_infos[dataset_id]
|
|
67
|
+
|
|
68
|
+
async def iterrows(
|
|
69
|
+
self,
|
|
70
|
+
dataset_id: str,
|
|
71
|
+
start_index: int | None = None,
|
|
72
|
+
limit: int | None = None,
|
|
73
|
+
) -> PaginatedResponse:
|
|
74
|
+
import datasets as hf_datasets
|
|
75
|
+
|
|
76
|
+
dataset_def = self.dataset_infos[dataset_id]
|
|
77
|
+
path, params = parse_hf_params(dataset_def)
|
|
78
|
+
loaded_dataset = hf_datasets.load_dataset(path, **params)
|
|
79
|
+
|
|
80
|
+
records = [loaded_dataset[i] for i in range(len(loaded_dataset))]
|
|
81
|
+
return paginate_records(records, start_index, limit)
|
|
82
|
+
|
|
83
|
+
async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None:
|
|
84
|
+
import datasets as hf_datasets
|
|
85
|
+
|
|
86
|
+
dataset_def = self.dataset_infos[dataset_id]
|
|
87
|
+
path, params = parse_hf_params(dataset_def)
|
|
88
|
+
loaded_dataset = hf_datasets.load_dataset(path, **params)
|
|
89
|
+
|
|
90
|
+
# Convert rows to HF Dataset format
|
|
91
|
+
new_dataset = hf_datasets.Dataset.from_list(rows)
|
|
92
|
+
|
|
93
|
+
# Concatenate the new rows with existing dataset
|
|
94
|
+
updated_dataset = hf_datasets.concatenate_datasets([loaded_dataset, new_dataset])
|
|
95
|
+
|
|
96
|
+
if dataset_def.metadata.get("path", None):
|
|
97
|
+
updated_dataset.push_to_hub(dataset_def.metadata["path"])
|
|
98
|
+
else:
|
|
99
|
+
raise NotImplementedError("Uploading to URL-based datasets is not supported yet")
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from .config import NvidiaDatasetIOConfig
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
async def get_adapter_impl(
|
|
11
|
+
config: NvidiaDatasetIOConfig,
|
|
12
|
+
_deps,
|
|
13
|
+
):
|
|
14
|
+
from .datasetio import NvidiaDatasetIOAdapter
|
|
15
|
+
|
|
16
|
+
if not isinstance(config, NvidiaDatasetIOConfig):
|
|
17
|
+
raise RuntimeError(f"Unexpected config type: {type(config)}")
|
|
18
|
+
|
|
19
|
+
impl = NvidiaDatasetIOAdapter(config)
|
|
20
|
+
return impl
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
__all__ = ["get_adapter_impl", "NvidiaDatasetIOAdapter"]
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
import warnings
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from pydantic import BaseModel, Field
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class NvidiaDatasetIOConfig(BaseModel):
|
|
15
|
+
"""Configuration for NVIDIA DatasetIO implementation."""
|
|
16
|
+
|
|
17
|
+
api_key: str | None = Field(
|
|
18
|
+
default_factory=lambda: os.getenv("NVIDIA_API_KEY"),
|
|
19
|
+
description="The NVIDIA API key.",
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
dataset_namespace: str | None = Field(
|
|
23
|
+
default_factory=lambda: os.getenv("NVIDIA_DATASET_NAMESPACE", "default"),
|
|
24
|
+
description="The NVIDIA dataset namespace.",
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
project_id: str | None = Field(
|
|
28
|
+
default_factory=lambda: os.getenv("NVIDIA_PROJECT_ID", "test-project"),
|
|
29
|
+
description="The NVIDIA project ID.",
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
datasets_url: str = Field(
|
|
33
|
+
default_factory=lambda: os.getenv("NVIDIA_DATASETS_URL", "http://nemo.test"),
|
|
34
|
+
description="Base URL for the NeMo Dataset API",
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
# warning for default values
|
|
38
|
+
def __post_init__(self):
|
|
39
|
+
default_values = []
|
|
40
|
+
if os.getenv("NVIDIA_PROJECT_ID") is None:
|
|
41
|
+
default_values.append("project_id='test-project'")
|
|
42
|
+
if os.getenv("NVIDIA_DATASET_NAMESPACE") is None:
|
|
43
|
+
default_values.append("dataset_namespace='default'")
|
|
44
|
+
if os.getenv("NVIDIA_DATASETS_URL") is None:
|
|
45
|
+
default_values.append("datasets_url='http://nemo.test'")
|
|
46
|
+
|
|
47
|
+
if default_values:
|
|
48
|
+
warnings.warn(
|
|
49
|
+
f"Using default values: {', '.join(default_values)}. \
|
|
50
|
+
Please set the environment variables to avoid this default behavior.",
|
|
51
|
+
stacklevel=2,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
@classmethod
|
|
55
|
+
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
|
56
|
+
return {
|
|
57
|
+
"api_key": "${env.NVIDIA_API_KEY:=}",
|
|
58
|
+
"dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:=default}",
|
|
59
|
+
"project_id": "${env.NVIDIA_PROJECT_ID:=test-project}",
|
|
60
|
+
"datasets_url": "${env.NVIDIA_DATASETS_URL:=http://nemo.test}",
|
|
61
|
+
}
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
import aiohttp
|
|
10
|
+
|
|
11
|
+
from llama_stack.apis.common.content_types import URL
|
|
12
|
+
from llama_stack.apis.common.responses import PaginatedResponse
|
|
13
|
+
from llama_stack.apis.common.type_system import ParamType
|
|
14
|
+
from llama_stack.apis.datasets import Dataset
|
|
15
|
+
|
|
16
|
+
from .config import NvidiaDatasetIOConfig
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class NvidiaDatasetIOAdapter:
|
|
20
|
+
"""Nvidia NeMo DatasetIO API."""
|
|
21
|
+
|
|
22
|
+
def __init__(self, config: NvidiaDatasetIOConfig):
|
|
23
|
+
self.config = config
|
|
24
|
+
self.headers = {}
|
|
25
|
+
|
|
26
|
+
async def _make_request(
|
|
27
|
+
self,
|
|
28
|
+
method: str,
|
|
29
|
+
path: str,
|
|
30
|
+
headers: dict[str, Any] | None = None,
|
|
31
|
+
params: dict[str, Any] | None = None,
|
|
32
|
+
json: dict[str, Any] | None = None,
|
|
33
|
+
**kwargs,
|
|
34
|
+
) -> dict[str, Any]:
|
|
35
|
+
"""Helper method to make HTTP requests to the Customizer API."""
|
|
36
|
+
url = f"{self.config.datasets_url}{path}"
|
|
37
|
+
request_headers = self.headers.copy()
|
|
38
|
+
|
|
39
|
+
# Set default Content-Type for JSON requests
|
|
40
|
+
if json is not None:
|
|
41
|
+
request_headers["Content-Type"] = "application/json"
|
|
42
|
+
|
|
43
|
+
if headers:
|
|
44
|
+
request_headers.update(headers)
|
|
45
|
+
|
|
46
|
+
async with aiohttp.ClientSession(headers=request_headers) as session:
|
|
47
|
+
async with session.request(method, url, params=params, json=json, **kwargs) as response:
|
|
48
|
+
if response.status != 200:
|
|
49
|
+
error_data = await response.json()
|
|
50
|
+
raise Exception(f"API request failed: {error_data}")
|
|
51
|
+
return await response.json()
|
|
52
|
+
|
|
53
|
+
async def register_dataset(
|
|
54
|
+
self,
|
|
55
|
+
dataset_def: Dataset,
|
|
56
|
+
) -> Dataset:
|
|
57
|
+
"""Register a new dataset.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
dataset_def [Dataset]: The dataset definition.
|
|
61
|
+
dataset_id [str]: The ID of the dataset.
|
|
62
|
+
source [DataSource]: The source of the dataset.
|
|
63
|
+
metadata [Dict[str, Any]]: The metadata of the dataset.
|
|
64
|
+
format [str]: The format of the dataset.
|
|
65
|
+
description [str]: The description of the dataset.
|
|
66
|
+
Returns:
|
|
67
|
+
Dataset
|
|
68
|
+
"""
|
|
69
|
+
# add warnings for unsupported params
|
|
70
|
+
request_body = {
|
|
71
|
+
"name": dataset_def.identifier,
|
|
72
|
+
"namespace": self.config.dataset_namespace,
|
|
73
|
+
"files_url": dataset_def.source.uri,
|
|
74
|
+
"project": self.config.project_id,
|
|
75
|
+
}
|
|
76
|
+
if dataset_def.metadata:
|
|
77
|
+
request_body["format"] = dataset_def.metadata.get("format")
|
|
78
|
+
request_body["description"] = dataset_def.metadata.get("description")
|
|
79
|
+
await self._make_request(
|
|
80
|
+
"POST",
|
|
81
|
+
"/v1/datasets",
|
|
82
|
+
json=request_body,
|
|
83
|
+
)
|
|
84
|
+
return dataset_def
|
|
85
|
+
|
|
86
|
+
async def update_dataset(
|
|
87
|
+
self,
|
|
88
|
+
dataset_id: str,
|
|
89
|
+
dataset_schema: dict[str, ParamType],
|
|
90
|
+
url: URL,
|
|
91
|
+
provider_dataset_id: str | None = None,
|
|
92
|
+
provider_id: str | None = None,
|
|
93
|
+
metadata: dict[str, Any] | None = None,
|
|
94
|
+
) -> None:
|
|
95
|
+
raise NotImplementedError("Not implemented")
|
|
96
|
+
|
|
97
|
+
async def unregister_dataset(
|
|
98
|
+
self,
|
|
99
|
+
dataset_id: str,
|
|
100
|
+
) -> None:
|
|
101
|
+
await self._make_request(
|
|
102
|
+
"DELETE",
|
|
103
|
+
f"/v1/datasets/{self.config.dataset_namespace}/{dataset_id}",
|
|
104
|
+
headers={"Accept": "application/json", "Content-Type": "application/json"},
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
async def iterrows(
|
|
108
|
+
self,
|
|
109
|
+
dataset_id: str,
|
|
110
|
+
start_index: int | None = None,
|
|
111
|
+
limit: int | None = None,
|
|
112
|
+
) -> PaginatedResponse:
|
|
113
|
+
raise NotImplementedError("Not implemented")
|
|
114
|
+
|
|
115
|
+
async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None:
|
|
116
|
+
raise NotImplementedError("Not implemented")
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from llama_stack.core.datatypes import Api
|
|
9
|
+
|
|
10
|
+
from .config import NVIDIAEvalConfig
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
async def get_adapter_impl(
|
|
14
|
+
config: NVIDIAEvalConfig,
|
|
15
|
+
deps: dict[Api, Any],
|
|
16
|
+
):
|
|
17
|
+
from .eval import NVIDIAEvalImpl
|
|
18
|
+
|
|
19
|
+
impl = NVIDIAEvalImpl(
|
|
20
|
+
config,
|
|
21
|
+
deps[Api.datasetio],
|
|
22
|
+
deps[Api.datasets],
|
|
23
|
+
deps[Api.scoring],
|
|
24
|
+
deps[Api.inference],
|
|
25
|
+
deps[Api.agents],
|
|
26
|
+
)
|
|
27
|
+
await impl.initialize()
|
|
28
|
+
return impl
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
__all__ = ["get_adapter_impl", "NVIDIAEvalImpl"]
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
import os
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel, Field
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class NVIDIAEvalConfig(BaseModel):
|
|
13
|
+
"""
|
|
14
|
+
Configuration for the NVIDIA NeMo Evaluator microservice endpoint.
|
|
15
|
+
|
|
16
|
+
Attributes:
|
|
17
|
+
evaluator_url (str): A base url for accessing the NVIDIA evaluation endpoint, e.g. http://localhost:8000.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
evaluator_url: str = Field(
|
|
21
|
+
default_factory=lambda: os.getenv("NVIDIA_EVALUATOR_URL", "http://0.0.0.0:7331"),
|
|
22
|
+
description="The url for accessing the evaluator service",
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
@classmethod
|
|
26
|
+
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
|
27
|
+
return {
|
|
28
|
+
"evaluator_url": "${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331}",
|
|
29
|
+
}
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
import requests
|
|
9
|
+
|
|
10
|
+
from llama_stack.apis.agents import Agents
|
|
11
|
+
from llama_stack.apis.benchmarks import Benchmark
|
|
12
|
+
from llama_stack.apis.datasetio import DatasetIO
|
|
13
|
+
from llama_stack.apis.datasets import Datasets
|
|
14
|
+
from llama_stack.apis.inference import Inference
|
|
15
|
+
from llama_stack.apis.scoring import Scoring, ScoringResult
|
|
16
|
+
from llama_stack.providers.datatypes import BenchmarksProtocolPrivate
|
|
17
|
+
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
|
|
18
|
+
|
|
19
|
+
from .....apis.common.job_types import Job, JobStatus
|
|
20
|
+
from .....apis.eval.eval import BenchmarkConfig, Eval, EvaluateResponse
|
|
21
|
+
from .config import NVIDIAEvalConfig
|
|
22
|
+
|
|
23
|
+
DEFAULT_NAMESPACE = "nvidia"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class NVIDIAEvalImpl(
|
|
27
|
+
Eval,
|
|
28
|
+
BenchmarksProtocolPrivate,
|
|
29
|
+
ModelRegistryHelper,
|
|
30
|
+
):
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
config: NVIDIAEvalConfig,
|
|
34
|
+
datasetio_api: DatasetIO,
|
|
35
|
+
datasets_api: Datasets,
|
|
36
|
+
scoring_api: Scoring,
|
|
37
|
+
inference_api: Inference,
|
|
38
|
+
agents_api: Agents,
|
|
39
|
+
) -> None:
|
|
40
|
+
self.config = config
|
|
41
|
+
self.datasetio_api = datasetio_api
|
|
42
|
+
self.datasets_api = datasets_api
|
|
43
|
+
self.scoring_api = scoring_api
|
|
44
|
+
self.inference_api = inference_api
|
|
45
|
+
self.agents_api = agents_api
|
|
46
|
+
|
|
47
|
+
ModelRegistryHelper.__init__(self)
|
|
48
|
+
|
|
49
|
+
async def initialize(self) -> None: ...
|
|
50
|
+
|
|
51
|
+
async def shutdown(self) -> None: ...
|
|
52
|
+
|
|
53
|
+
async def _evaluator_get(self, path: str):
|
|
54
|
+
"""Helper for making GET requests to the evaluator service."""
|
|
55
|
+
response = requests.get(url=f"{self.config.evaluator_url}{path}")
|
|
56
|
+
response.raise_for_status()
|
|
57
|
+
return response.json()
|
|
58
|
+
|
|
59
|
+
async def _evaluator_post(self, path: str, data: dict[str, Any]):
|
|
60
|
+
"""Helper for making POST requests to the evaluator service."""
|
|
61
|
+
response = requests.post(url=f"{self.config.evaluator_url}{path}", json=data)
|
|
62
|
+
response.raise_for_status()
|
|
63
|
+
return response.json()
|
|
64
|
+
|
|
65
|
+
async def _evaluator_delete(self, path: str) -> None:
|
|
66
|
+
"""Helper for making DELETE requests to the evaluator service."""
|
|
67
|
+
response = requests.delete(url=f"{self.config.evaluator_url}{path}")
|
|
68
|
+
response.raise_for_status()
|
|
69
|
+
|
|
70
|
+
async def register_benchmark(self, task_def: Benchmark) -> None:
|
|
71
|
+
"""Register a benchmark as an evaluation configuration."""
|
|
72
|
+
await self._evaluator_post(
|
|
73
|
+
"/v1/evaluation/configs",
|
|
74
|
+
{
|
|
75
|
+
"namespace": DEFAULT_NAMESPACE,
|
|
76
|
+
"name": task_def.benchmark_id,
|
|
77
|
+
# metadata is copied to request body as-is
|
|
78
|
+
**task_def.metadata,
|
|
79
|
+
},
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
async def unregister_benchmark(self, benchmark_id: str) -> None:
|
|
83
|
+
"""Unregister a benchmark evaluation configuration from NeMo Evaluator."""
|
|
84
|
+
await self._evaluator_delete(f"/v1/evaluation/configs/{DEFAULT_NAMESPACE}/{benchmark_id}")
|
|
85
|
+
|
|
86
|
+
async def run_eval(
|
|
87
|
+
self,
|
|
88
|
+
benchmark_id: str,
|
|
89
|
+
benchmark_config: BenchmarkConfig,
|
|
90
|
+
) -> Job:
|
|
91
|
+
"""Run an evaluation job for a benchmark."""
|
|
92
|
+
model = (
|
|
93
|
+
benchmark_config.eval_candidate.model
|
|
94
|
+
if benchmark_config.eval_candidate.type == "model"
|
|
95
|
+
else benchmark_config.eval_candidate.config.model
|
|
96
|
+
)
|
|
97
|
+
nvidia_model = self.get_provider_model_id(model) or model
|
|
98
|
+
|
|
99
|
+
result = await self._evaluator_post(
|
|
100
|
+
"/v1/evaluation/jobs",
|
|
101
|
+
{
|
|
102
|
+
"config": f"{DEFAULT_NAMESPACE}/{benchmark_id}",
|
|
103
|
+
"target": {"type": "model", "model": nvidia_model},
|
|
104
|
+
},
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
return Job(job_id=result["id"], status=JobStatus.in_progress)
|
|
108
|
+
|
|
109
|
+
async def evaluate_rows(
|
|
110
|
+
self,
|
|
111
|
+
benchmark_id: str,
|
|
112
|
+
input_rows: list[dict[str, Any]],
|
|
113
|
+
scoring_functions: list[str],
|
|
114
|
+
benchmark_config: BenchmarkConfig,
|
|
115
|
+
) -> EvaluateResponse:
|
|
116
|
+
raise NotImplementedError()
|
|
117
|
+
|
|
118
|
+
async def job_status(self, benchmark_id: str, job_id: str) -> Job:
|
|
119
|
+
"""Get the status of an evaluation job.
|
|
120
|
+
|
|
121
|
+
EvaluatorStatus: "created", "pending", "running", "cancelled", "cancelling", "failed", "completed".
|
|
122
|
+
JobStatus: "scheduled", "in_progress", "completed", "cancelled", "failed"
|
|
123
|
+
"""
|
|
124
|
+
result = await self._evaluator_get(f"/v1/evaluation/jobs/{job_id}")
|
|
125
|
+
result_status = result["status"]
|
|
126
|
+
|
|
127
|
+
job_status = JobStatus.failed
|
|
128
|
+
if result_status in ["created", "pending"]:
|
|
129
|
+
job_status = JobStatus.scheduled
|
|
130
|
+
elif result_status in ["running"]:
|
|
131
|
+
job_status = JobStatus.in_progress
|
|
132
|
+
elif result_status in ["completed"]:
|
|
133
|
+
job_status = JobStatus.completed
|
|
134
|
+
elif result_status in ["cancelled"]:
|
|
135
|
+
job_status = JobStatus.cancelled
|
|
136
|
+
|
|
137
|
+
return Job(job_id=job_id, status=job_status)
|
|
138
|
+
|
|
139
|
+
async def job_cancel(self, benchmark_id: str, job_id: str) -> None:
|
|
140
|
+
"""Cancel the evaluation job."""
|
|
141
|
+
await self._evaluator_post(f"/v1/evaluation/jobs/{job_id}/cancel", {})
|
|
142
|
+
|
|
143
|
+
async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse:
|
|
144
|
+
"""Returns the results of the evaluation job."""
|
|
145
|
+
|
|
146
|
+
job = await self.job_status(benchmark_id, job_id)
|
|
147
|
+
status = job.status
|
|
148
|
+
if not status or status != JobStatus.completed:
|
|
149
|
+
raise ValueError(f"Job {job_id} not completed. Status: {status.value}")
|
|
150
|
+
|
|
151
|
+
result = await self._evaluator_get(f"/v1/evaluation/jobs/{job_id}/results")
|
|
152
|
+
|
|
153
|
+
return EvaluateResponse(
|
|
154
|
+
# TODO: these are stored in detailed results on NeMo Evaluator side; can be added
|
|
155
|
+
generations=[],
|
|
156
|
+
scores={
|
|
157
|
+
benchmark_id: ScoringResult(
|
|
158
|
+
score_rows=[],
|
|
159
|
+
aggregated_results=result,
|
|
160
|
+
)
|
|
161
|
+
},
|
|
162
|
+
)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from llama_stack.core.datatypes import AccessRule, Api
|
|
10
|
+
|
|
11
|
+
from .config import S3FilesImplConfig
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
async def get_adapter_impl(config: S3FilesImplConfig, deps: dict[Api, Any], policy: list[AccessRule] | None = None):
|
|
15
|
+
from .files import S3FilesImpl
|
|
16
|
+
|
|
17
|
+
impl = S3FilesImpl(config, policy or [])
|
|
18
|
+
await impl.initialize()
|
|
19
|
+
return impl
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel, Field
|
|
10
|
+
|
|
11
|
+
from llama_stack.core.storage.datatypes import SqlStoreReference
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class S3FilesImplConfig(BaseModel):
|
|
15
|
+
"""Configuration for S3-based files provider."""
|
|
16
|
+
|
|
17
|
+
bucket_name: str = Field(description="S3 bucket name to store files")
|
|
18
|
+
region: str = Field(default="us-east-1", description="AWS region where the bucket is located")
|
|
19
|
+
aws_access_key_id: str | None = Field(default=None, description="AWS access key ID (optional if using IAM roles)")
|
|
20
|
+
aws_secret_access_key: str | None = Field(
|
|
21
|
+
default=None, description="AWS secret access key (optional if using IAM roles)"
|
|
22
|
+
)
|
|
23
|
+
endpoint_url: str | None = Field(default=None, description="Custom S3 endpoint URL (for MinIO, LocalStack, etc.)")
|
|
24
|
+
auto_create_bucket: bool = Field(
|
|
25
|
+
default=False, description="Automatically create the S3 bucket if it doesn't exist"
|
|
26
|
+
)
|
|
27
|
+
metadata_store: SqlStoreReference = Field(description="SQL store configuration for file metadata")
|
|
28
|
+
|
|
29
|
+
@classmethod
|
|
30
|
+
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
|
|
31
|
+
return {
|
|
32
|
+
"bucket_name": "${env.S3_BUCKET_NAME}", # no default, buckets must be globally unique
|
|
33
|
+
"region": "${env.AWS_REGION:=us-east-1}",
|
|
34
|
+
"aws_access_key_id": "${env.AWS_ACCESS_KEY_ID:=}",
|
|
35
|
+
"aws_secret_access_key": "${env.AWS_SECRET_ACCESS_KEY:=}",
|
|
36
|
+
"endpoint_url": "${env.S3_ENDPOINT_URL:=}",
|
|
37
|
+
"auto_create_bucket": "${env.S3_AUTO_CREATE_BUCKET:=false}",
|
|
38
|
+
"metadata_store": SqlStoreReference(
|
|
39
|
+
backend="sql_default",
|
|
40
|
+
table_name="s3_files_metadata",
|
|
41
|
+
).model_dump(exclude_none=True),
|
|
42
|
+
}
|