llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +5 -0
- llama_stack/apis/agents/__init__.py +1 -1
- llama_stack/apis/agents/agents.py +700 -281
- llama_stack/apis/agents/openai_responses.py +1311 -0
- llama_stack/{providers/adapters/memory/sample/config.py → apis/batches/__init__.py} +2 -5
- llama_stack/apis/batches/batches.py +100 -0
- llama_stack/apis/benchmarks/__init__.py +7 -0
- llama_stack/apis/benchmarks/benchmarks.py +108 -0
- llama_stack/apis/common/content_types.py +143 -0
- llama_stack/apis/common/errors.py +103 -0
- llama_stack/apis/common/job_types.py +38 -0
- llama_stack/apis/common/responses.py +36 -0
- llama_stack/apis/common/training_types.py +36 -5
- llama_stack/apis/common/type_system.py +158 -0
- llama_stack/apis/conversations/__init__.py +31 -0
- llama_stack/apis/conversations/conversations.py +286 -0
- llama_stack/apis/datasetio/__init__.py +7 -0
- llama_stack/apis/datasetio/datasetio.py +59 -0
- llama_stack/apis/datasets/__init__.py +7 -0
- llama_stack/apis/datasets/datasets.py +251 -0
- llama_stack/apis/datatypes.py +160 -0
- llama_stack/apis/eval/__init__.py +7 -0
- llama_stack/apis/eval/eval.py +169 -0
- llama_stack/apis/files/__init__.py +7 -0
- llama_stack/apis/files/files.py +199 -0
- llama_stack/apis/inference/__init__.py +1 -1
- llama_stack/apis/inference/inference.py +1169 -113
- llama_stack/apis/inspect/__init__.py +1 -1
- llama_stack/apis/inspect/inspect.py +69 -16
- llama_stack/apis/models/__init__.py +1 -1
- llama_stack/apis/models/models.py +148 -21
- llama_stack/apis/post_training/__init__.py +1 -1
- llama_stack/apis/post_training/post_training.py +265 -120
- llama_stack/{providers/adapters/agents/sample/config.py → apis/prompts/__init__.py} +2 -5
- llama_stack/apis/prompts/prompts.py +204 -0
- llama_stack/apis/providers/__init__.py +7 -0
- llama_stack/apis/providers/providers.py +69 -0
- llama_stack/apis/resource.py +37 -0
- llama_stack/apis/safety/__init__.py +1 -1
- llama_stack/apis/safety/safety.py +95 -12
- llama_stack/apis/scoring/__init__.py +7 -0
- llama_stack/apis/scoring/scoring.py +93 -0
- llama_stack/apis/scoring_functions/__init__.py +7 -0
- llama_stack/apis/scoring_functions/scoring_functions.py +208 -0
- llama_stack/apis/shields/__init__.py +1 -1
- llama_stack/apis/shields/shields.py +76 -33
- llama_stack/apis/synthetic_data_generation/__init__.py +1 -1
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +40 -17
- llama_stack/apis/telemetry/__init__.py +1 -1
- llama_stack/apis/telemetry/telemetry.py +322 -31
- llama_stack/apis/{dataset → tools}/__init__.py +2 -1
- llama_stack/apis/tools/rag_tool.py +218 -0
- llama_stack/apis/tools/tools.py +221 -0
- llama_stack/apis/vector_io/__init__.py +7 -0
- llama_stack/apis/vector_io/vector_io.py +960 -0
- llama_stack/apis/vector_stores/__init__.py +7 -0
- llama_stack/apis/vector_stores/vector_stores.py +51 -0
- llama_stack/apis/version.py +9 -0
- llama_stack/cli/llama.py +13 -5
- llama_stack/cli/stack/_list_deps.py +182 -0
- llama_stack/cli/stack/list_apis.py +1 -1
- llama_stack/cli/stack/list_deps.py +55 -0
- llama_stack/cli/stack/list_providers.py +24 -10
- llama_stack/cli/stack/list_stacks.py +56 -0
- llama_stack/cli/stack/remove.py +115 -0
- llama_stack/cli/stack/run.py +169 -56
- llama_stack/cli/stack/stack.py +18 -4
- llama_stack/cli/stack/utils.py +151 -0
- llama_stack/cli/table.py +23 -61
- llama_stack/cli/utils.py +29 -0
- llama_stack/core/access_control/access_control.py +131 -0
- llama_stack/core/access_control/conditions.py +129 -0
- llama_stack/core/access_control/datatypes.py +107 -0
- llama_stack/core/build.py +164 -0
- llama_stack/core/client.py +205 -0
- llama_stack/core/common.sh +37 -0
- llama_stack/{distribution → core}/configure.py +74 -55
- llama_stack/core/conversations/conversations.py +309 -0
- llama_stack/core/datatypes.py +625 -0
- llama_stack/core/distribution.py +276 -0
- llama_stack/core/external.py +54 -0
- llama_stack/core/id_generation.py +42 -0
- llama_stack/core/inspect.py +86 -0
- llama_stack/core/library_client.py +539 -0
- llama_stack/core/prompts/prompts.py +234 -0
- llama_stack/core/providers.py +137 -0
- llama_stack/core/request_headers.py +115 -0
- llama_stack/core/resolver.py +506 -0
- llama_stack/core/routers/__init__.py +101 -0
- llama_stack/core/routers/datasets.py +73 -0
- llama_stack/core/routers/eval_scoring.py +155 -0
- llama_stack/core/routers/inference.py +645 -0
- llama_stack/core/routers/safety.py +85 -0
- llama_stack/core/routers/tool_runtime.py +91 -0
- llama_stack/core/routers/vector_io.py +442 -0
- llama_stack/core/routing_tables/benchmarks.py +62 -0
- llama_stack/core/routing_tables/common.py +254 -0
- llama_stack/core/routing_tables/datasets.py +91 -0
- llama_stack/core/routing_tables/models.py +163 -0
- llama_stack/core/routing_tables/scoring_functions.py +66 -0
- llama_stack/core/routing_tables/shields.py +61 -0
- llama_stack/core/routing_tables/toolgroups.py +129 -0
- llama_stack/core/routing_tables/vector_stores.py +292 -0
- llama_stack/core/server/auth.py +187 -0
- llama_stack/core/server/auth_providers.py +494 -0
- llama_stack/core/server/quota.py +110 -0
- llama_stack/core/server/routes.py +141 -0
- llama_stack/core/server/server.py +542 -0
- llama_stack/core/server/tracing.py +80 -0
- llama_stack/core/stack.py +546 -0
- llama_stack/core/start_stack.sh +117 -0
- llama_stack/core/storage/datatypes.py +283 -0
- llama_stack/{cli/model → core/store}/__init__.py +1 -1
- llama_stack/core/store/registry.py +199 -0
- llama_stack/core/testing_context.py +49 -0
- llama_stack/core/ui/app.py +55 -0
- llama_stack/core/ui/modules/api.py +32 -0
- llama_stack/core/ui/modules/utils.py +42 -0
- llama_stack/core/ui/page/distribution/datasets.py +18 -0
- llama_stack/core/ui/page/distribution/eval_tasks.py +20 -0
- llama_stack/core/ui/page/distribution/models.py +18 -0
- llama_stack/core/ui/page/distribution/providers.py +27 -0
- llama_stack/core/ui/page/distribution/resources.py +48 -0
- llama_stack/core/ui/page/distribution/scoring_functions.py +18 -0
- llama_stack/core/ui/page/distribution/shields.py +19 -0
- llama_stack/core/ui/page/evaluations/app_eval.py +143 -0
- llama_stack/core/ui/page/evaluations/native_eval.py +253 -0
- llama_stack/core/ui/page/playground/chat.py +130 -0
- llama_stack/core/ui/page/playground/tools.py +352 -0
- llama_stack/core/utils/config.py +30 -0
- llama_stack/{distribution → core}/utils/config_dirs.py +3 -6
- llama_stack/core/utils/config_resolution.py +125 -0
- llama_stack/core/utils/context.py +84 -0
- llama_stack/core/utils/exec.py +96 -0
- llama_stack/{providers/impls/meta_reference/codeshield/config.py → core/utils/image_types.py} +4 -3
- llama_stack/{distribution → core}/utils/model_utils.py +2 -2
- llama_stack/{distribution → core}/utils/prompt_for_config.py +30 -63
- llama_stack/{apis/batch_inference → distributions/dell}/__init__.py +1 -1
- llama_stack/distributions/dell/build.yaml +33 -0
- llama_stack/distributions/dell/dell.py +158 -0
- llama_stack/distributions/dell/run-with-safety.yaml +141 -0
- llama_stack/distributions/dell/run.yaml +132 -0
- llama_stack/distributions/meta-reference-gpu/__init__.py +7 -0
- llama_stack/distributions/meta-reference-gpu/build.yaml +32 -0
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +163 -0
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +154 -0
- llama_stack/distributions/meta-reference-gpu/run.yaml +139 -0
- llama_stack/{apis/evals → distributions/nvidia}/__init__.py +1 -1
- llama_stack/distributions/nvidia/build.yaml +29 -0
- llama_stack/distributions/nvidia/nvidia.py +154 -0
- llama_stack/distributions/nvidia/run-with-safety.yaml +137 -0
- llama_stack/distributions/nvidia/run.yaml +116 -0
- llama_stack/distributions/open-benchmark/__init__.py +7 -0
- llama_stack/distributions/open-benchmark/build.yaml +36 -0
- llama_stack/distributions/open-benchmark/open_benchmark.py +303 -0
- llama_stack/distributions/open-benchmark/run.yaml +252 -0
- llama_stack/distributions/postgres-demo/__init__.py +7 -0
- llama_stack/distributions/postgres-demo/build.yaml +23 -0
- llama_stack/distributions/postgres-demo/postgres_demo.py +125 -0
- llama_stack/distributions/postgres-demo/run.yaml +115 -0
- llama_stack/{apis/memory → distributions/starter}/__init__.py +1 -1
- llama_stack/distributions/starter/build.yaml +61 -0
- llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
- llama_stack/distributions/starter/run.yaml +276 -0
- llama_stack/distributions/starter/starter.py +345 -0
- llama_stack/distributions/starter-gpu/__init__.py +7 -0
- llama_stack/distributions/starter-gpu/build.yaml +61 -0
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
- llama_stack/distributions/starter-gpu/run.yaml +279 -0
- llama_stack/distributions/starter-gpu/starter_gpu.py +20 -0
- llama_stack/distributions/template.py +456 -0
- llama_stack/distributions/watsonx/__init__.py +7 -0
- llama_stack/distributions/watsonx/build.yaml +33 -0
- llama_stack/distributions/watsonx/run.yaml +133 -0
- llama_stack/distributions/watsonx/watsonx.py +95 -0
- llama_stack/env.py +24 -0
- llama_stack/log.py +314 -0
- llama_stack/models/llama/checkpoint.py +164 -0
- llama_stack/models/llama/datatypes.py +164 -0
- llama_stack/models/llama/hadamard_utils.py +86 -0
- llama_stack/models/llama/llama3/args.py +74 -0
- llama_stack/models/llama/llama3/chat_format.py +286 -0
- llama_stack/models/llama/llama3/generation.py +376 -0
- llama_stack/models/llama/llama3/interface.py +255 -0
- llama_stack/models/llama/llama3/model.py +304 -0
- llama_stack/models/llama/llama3/multimodal/__init__.py +12 -0
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +180 -0
- llama_stack/models/llama/llama3/multimodal/image_transform.py +409 -0
- llama_stack/models/llama/llama3/multimodal/model.py +1430 -0
- llama_stack/models/llama/llama3/multimodal/utils.py +26 -0
- llama_stack/models/llama/llama3/prompt_templates/__init__.py +22 -0
- llama_stack/models/llama/llama3/prompt_templates/base.py +39 -0
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +319 -0
- llama_stack/models/llama/llama3/prompt_templates/tool_response.py +62 -0
- llama_stack/models/llama/llama3/quantization/loader.py +316 -0
- llama_stack/models/llama/llama3/template_data.py +116 -0
- llama_stack/models/llama/llama3/tokenizer.model +128000 -0
- llama_stack/models/llama/llama3/tokenizer.py +198 -0
- llama_stack/models/llama/llama3/tool_utils.py +266 -0
- llama_stack/models/llama/llama3_1/__init__.py +12 -0
- llama_stack/models/llama/llama3_1/prompt_format.md +358 -0
- llama_stack/models/llama/llama3_1/prompts.py +258 -0
- llama_stack/models/llama/llama3_2/prompts_text.py +229 -0
- llama_stack/models/llama/llama3_2/prompts_vision.py +126 -0
- llama_stack/models/llama/llama3_2/text_prompt_format.md +286 -0
- llama_stack/models/llama/llama3_2/vision_prompt_format.md +141 -0
- llama_stack/models/llama/llama3_3/prompts.py +259 -0
- llama_stack/models/llama/llama4/args.py +107 -0
- llama_stack/models/llama/llama4/chat_format.py +317 -0
- llama_stack/models/llama/llama4/datatypes.py +56 -0
- llama_stack/models/llama/llama4/ffn.py +58 -0
- llama_stack/models/llama/llama4/generation.py +313 -0
- llama_stack/models/llama/llama4/model.py +437 -0
- llama_stack/models/llama/llama4/moe.py +214 -0
- llama_stack/models/llama/llama4/preprocess.py +435 -0
- llama_stack/models/llama/llama4/prompt_format.md +304 -0
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +136 -0
- llama_stack/models/llama/llama4/prompts.py +279 -0
- llama_stack/models/llama/llama4/quantization/__init__.py +5 -0
- llama_stack/models/llama/llama4/quantization/loader.py +226 -0
- llama_stack/models/llama/llama4/tokenizer.model +200000 -0
- llama_stack/models/llama/llama4/tokenizer.py +263 -0
- llama_stack/models/llama/llama4/vision/__init__.py +5 -0
- llama_stack/models/llama/llama4/vision/embedding.py +210 -0
- llama_stack/models/llama/llama4/vision/encoder.py +412 -0
- llama_stack/models/llama/prompt_format.py +191 -0
- llama_stack/models/llama/quantize_impls.py +316 -0
- llama_stack/models/llama/sku_list.py +1029 -0
- llama_stack/models/llama/sku_types.py +233 -0
- llama_stack/models/llama/tokenizer_utils.py +40 -0
- llama_stack/providers/datatypes.py +136 -107
- llama_stack/providers/inline/__init__.py +5 -0
- llama_stack/providers/inline/agents/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/agents/meta_reference}/__init__.py +12 -5
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +1024 -0
- llama_stack/providers/inline/agents/meta_reference/agents.py +383 -0
- llama_stack/providers/inline/agents/meta_reference/config.py +37 -0
- llama_stack/providers/inline/agents/meta_reference/persistence.py +228 -0
- llama_stack/providers/inline/agents/meta_reference/responses/__init__.py +5 -0
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +423 -0
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +1226 -0
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +449 -0
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +194 -0
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +365 -0
- llama_stack/providers/inline/agents/meta_reference/safety.py +52 -0
- llama_stack/providers/inline/batches/__init__.py +5 -0
- llama_stack/providers/inline/batches/reference/__init__.py +36 -0
- llama_stack/providers/inline/batches/reference/batches.py +679 -0
- llama_stack/providers/inline/batches/reference/config.py +40 -0
- llama_stack/providers/inline/datasetio/__init__.py +5 -0
- llama_stack/providers/inline/datasetio/localfs/__init__.py +20 -0
- llama_stack/providers/inline/datasetio/localfs/config.py +23 -0
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +113 -0
- llama_stack/providers/inline/eval/__init__.py +5 -0
- llama_stack/providers/inline/eval/meta_reference/__init__.py +28 -0
- llama_stack/providers/inline/eval/meta_reference/config.py +23 -0
- llama_stack/providers/inline/eval/meta_reference/eval.py +259 -0
- llama_stack/providers/inline/files/localfs/__init__.py +20 -0
- llama_stack/providers/inline/files/localfs/config.py +31 -0
- llama_stack/providers/inline/files/localfs/files.py +219 -0
- llama_stack/providers/inline/inference/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/__init__.py +4 -4
- llama_stack/providers/inline/inference/meta_reference/common.py +24 -0
- llama_stack/providers/inline/inference/meta_reference/config.py +68 -0
- llama_stack/providers/inline/inference/meta_reference/generators.py +211 -0
- llama_stack/providers/inline/inference/meta_reference/inference.py +158 -0
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +96 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/parallel_utils.py +56 -73
- llama_stack/providers/inline/inference/sentence_transformers/__init__.py +22 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/inference/sentence_transformers}/config.py +6 -4
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +83 -0
- llama_stack/providers/inline/post_training/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/utils.py +35 -0
- llama_stack/providers/inline/post_training/common/validator.py +36 -0
- llama_stack/providers/inline/post_training/huggingface/__init__.py +27 -0
- llama_stack/providers/inline/post_training/huggingface/config.py +83 -0
- llama_stack/providers/inline/post_training/huggingface/post_training.py +208 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +519 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +485 -0
- llama_stack/providers/inline/post_training/huggingface/utils.py +269 -0
- llama_stack/providers/inline/post_training/torchtune/__init__.py +27 -0
- llama_stack/providers/inline/post_training/torchtune/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +240 -0
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +99 -0
- llama_stack/providers/inline/post_training/torchtune/config.py +20 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +57 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/sft.py +78 -0
- llama_stack/providers/inline/post_training/torchtune/post_training.py +178 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +588 -0
- llama_stack/providers/inline/safety/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/codeshield → inline/safety/code_scanner}/__init__.py +4 -2
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +128 -0
- llama_stack/providers/{impls/meta_reference/memory → inline/safety/code_scanner}/config.py +5 -3
- llama_stack/providers/inline/safety/llama_guard/__init__.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/config.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +489 -0
- llama_stack/providers/{adapters/memory/sample → inline/safety/prompt_guard}/__init__.py +4 -4
- llama_stack/providers/inline/safety/prompt_guard/config.py +32 -0
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +131 -0
- llama_stack/providers/inline/scoring/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/__init__.py +25 -0
- llama_stack/providers/{adapters/memory/weaviate → inline/scoring/basic}/config.py +5 -7
- llama_stack/providers/inline/scoring/basic/scoring.py +126 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +240 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +41 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +23 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +27 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +71 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +80 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +66 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +58 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +38 -0
- llama_stack/providers/inline/scoring/basic/utils/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py +3319 -0
- llama_stack/providers/inline/scoring/basic/utils/math_utils.py +330 -0
- llama_stack/providers/inline/scoring/braintrust/__init__.py +27 -0
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +230 -0
- llama_stack/providers/inline/scoring/braintrust/config.py +21 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +23 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +24 -0
- llama_stack/providers/inline/scoring/llm_as_judge/__init__.py +21 -0
- llama_stack/providers/inline/scoring/llm_as_judge/config.py +14 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +113 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +96 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +20 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +81 -0
- llama_stack/providers/inline/telemetry/__init__.py +5 -0
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +21 -0
- llama_stack/providers/inline/telemetry/meta_reference/config.py +47 -0
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +252 -0
- llama_stack/providers/inline/tool_runtime/__init__.py +5 -0
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +19 -0
- llama_stack/providers/{impls/meta_reference/telemetry → inline/tool_runtime/rag}/config.py +5 -3
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +77 -0
- llama_stack/providers/inline/tool_runtime/rag/memory.py +332 -0
- llama_stack/providers/inline/vector_io/__init__.py +5 -0
- llama_stack/providers/inline/vector_io/chroma/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/chroma/config.py +30 -0
- llama_stack/providers/inline/vector_io/faiss/__init__.py +21 -0
- llama_stack/providers/inline/vector_io/faiss/config.py +26 -0
- llama_stack/providers/inline/vector_io/faiss/faiss.py +293 -0
- llama_stack/providers/inline/vector_io/milvus/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/milvus/config.py +29 -0
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/qdrant/config.py +29 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/config.py +26 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +483 -0
- llama_stack/providers/registry/agents.py +16 -18
- llama_stack/providers/registry/batches.py +26 -0
- llama_stack/providers/registry/datasetio.py +49 -0
- llama_stack/providers/registry/eval.py +46 -0
- llama_stack/providers/registry/files.py +31 -0
- llama_stack/providers/registry/inference.py +273 -118
- llama_stack/providers/registry/post_training.py +69 -0
- llama_stack/providers/registry/safety.py +46 -41
- llama_stack/providers/registry/scoring.py +51 -0
- llama_stack/providers/registry/tool_runtime.py +87 -0
- llama_stack/providers/registry/vector_io.py +828 -0
- llama_stack/providers/remote/__init__.py +5 -0
- llama_stack/providers/remote/agents/__init__.py +5 -0
- llama_stack/providers/remote/datasetio/__init__.py +5 -0
- llama_stack/providers/{adapters/memory/chroma → remote/datasetio/huggingface}/__init__.py +7 -4
- llama_stack/providers/remote/datasetio/huggingface/config.py +23 -0
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +99 -0
- llama_stack/providers/remote/datasetio/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/datasetio/nvidia/config.py +61 -0
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +116 -0
- llama_stack/providers/remote/eval/__init__.py +5 -0
- llama_stack/providers/remote/eval/nvidia/__init__.py +31 -0
- llama_stack/providers/remote/eval/nvidia/config.py +29 -0
- llama_stack/providers/remote/eval/nvidia/eval.py +162 -0
- llama_stack/providers/remote/files/s3/__init__.py +19 -0
- llama_stack/providers/remote/files/s3/config.py +42 -0
- llama_stack/providers/remote/files/s3/files.py +313 -0
- llama_stack/providers/remote/inference/__init__.py +5 -0
- llama_stack/providers/{adapters/safety/sample → remote/inference/anthropic}/__init__.py +4 -6
- llama_stack/providers/remote/inference/anthropic/anthropic.py +36 -0
- llama_stack/providers/remote/inference/anthropic/config.py +28 -0
- llama_stack/providers/{impls/meta_reference/telemetry → remote/inference/azure}/__init__.py +4 -4
- llama_stack/providers/remote/inference/azure/azure.py +25 -0
- llama_stack/providers/remote/inference/azure/config.py +61 -0
- llama_stack/providers/{adapters → remote}/inference/bedrock/__init__.py +18 -17
- llama_stack/providers/remote/inference/bedrock/bedrock.py +142 -0
- llama_stack/providers/{adapters/inference/sample → remote/inference/bedrock}/config.py +3 -4
- llama_stack/providers/remote/inference/bedrock/models.py +29 -0
- llama_stack/providers/remote/inference/cerebras/__init__.py +19 -0
- llama_stack/providers/remote/inference/cerebras/cerebras.py +28 -0
- llama_stack/providers/remote/inference/cerebras/config.py +30 -0
- llama_stack/providers/{adapters → remote}/inference/databricks/__init__.py +4 -5
- llama_stack/providers/remote/inference/databricks/config.py +37 -0
- llama_stack/providers/remote/inference/databricks/databricks.py +44 -0
- llama_stack/providers/{adapters → remote}/inference/fireworks/__init__.py +8 -4
- llama_stack/providers/remote/inference/fireworks/config.py +27 -0
- llama_stack/providers/remote/inference/fireworks/fireworks.py +27 -0
- llama_stack/providers/{adapters/memory/pgvector → remote/inference/gemini}/__init__.py +4 -4
- llama_stack/providers/remote/inference/gemini/config.py +28 -0
- llama_stack/providers/remote/inference/gemini/gemini.py +82 -0
- llama_stack/providers/remote/inference/groq/__init__.py +15 -0
- llama_stack/providers/remote/inference/groq/config.py +34 -0
- llama_stack/providers/remote/inference/groq/groq.py +18 -0
- llama_stack/providers/remote/inference/llama_openai_compat/__init__.py +15 -0
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +34 -0
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +46 -0
- llama_stack/providers/remote/inference/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/inference/nvidia/config.py +64 -0
- llama_stack/providers/remote/inference/nvidia/nvidia.py +61 -0
- llama_stack/providers/{adapters/safety/sample/config.py → remote/inference/nvidia/utils.py} +3 -4
- llama_stack/providers/{impls/vllm → remote/inference/ollama}/__init__.py +4 -6
- llama_stack/providers/remote/inference/ollama/config.py +25 -0
- llama_stack/providers/remote/inference/ollama/ollama.py +102 -0
- llama_stack/providers/{adapters/telemetry/opentelemetry → remote/inference/openai}/__init__.py +4 -4
- llama_stack/providers/remote/inference/openai/config.py +39 -0
- llama_stack/providers/remote/inference/openai/openai.py +38 -0
- llama_stack/providers/remote/inference/passthrough/__init__.py +23 -0
- llama_stack/providers/remote/inference/passthrough/config.py +34 -0
- llama_stack/providers/remote/inference/passthrough/passthrough.py +122 -0
- llama_stack/providers/remote/inference/runpod/__init__.py +16 -0
- llama_stack/providers/remote/inference/runpod/config.py +32 -0
- llama_stack/providers/remote/inference/runpod/runpod.py +42 -0
- llama_stack/providers/remote/inference/sambanova/__init__.py +16 -0
- llama_stack/providers/remote/inference/sambanova/config.py +34 -0
- llama_stack/providers/remote/inference/sambanova/sambanova.py +28 -0
- llama_stack/providers/{adapters → remote}/inference/tgi/__init__.py +3 -4
- llama_stack/providers/remote/inference/tgi/config.py +76 -0
- llama_stack/providers/remote/inference/tgi/tgi.py +85 -0
- llama_stack/providers/{adapters → remote}/inference/together/__init__.py +8 -4
- llama_stack/providers/remote/inference/together/config.py +27 -0
- llama_stack/providers/remote/inference/together/together.py +102 -0
- llama_stack/providers/remote/inference/vertexai/__init__.py +15 -0
- llama_stack/providers/remote/inference/vertexai/config.py +48 -0
- llama_stack/providers/remote/inference/vertexai/vertexai.py +54 -0
- llama_stack/providers/remote/inference/vllm/__init__.py +22 -0
- llama_stack/providers/remote/inference/vllm/config.py +59 -0
- llama_stack/providers/remote/inference/vllm/vllm.py +111 -0
- llama_stack/providers/remote/inference/watsonx/__init__.py +15 -0
- llama_stack/providers/remote/inference/watsonx/config.py +45 -0
- llama_stack/providers/remote/inference/watsonx/watsonx.py +336 -0
- llama_stack/providers/remote/post_training/__init__.py +5 -0
- llama_stack/providers/remote/post_training/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/post_training/nvidia/config.py +113 -0
- llama_stack/providers/remote/post_training/nvidia/models.py +27 -0
- llama_stack/providers/remote/post_training/nvidia/post_training.py +430 -0
- llama_stack/providers/remote/post_training/nvidia/utils.py +63 -0
- llama_stack/providers/remote/safety/__init__.py +5 -0
- llama_stack/providers/remote/safety/bedrock/bedrock.py +111 -0
- llama_stack/providers/remote/safety/bedrock/config.py +14 -0
- llama_stack/providers/{adapters/inference/sample → remote/safety/nvidia}/__init__.py +5 -4
- llama_stack/providers/remote/safety/nvidia/config.py +40 -0
- llama_stack/providers/remote/safety/nvidia/nvidia.py +161 -0
- llama_stack/providers/{adapters/agents/sample → remote/safety/sambanova}/__init__.py +5 -4
- llama_stack/providers/remote/safety/sambanova/config.py +37 -0
- llama_stack/providers/remote/safety/sambanova/sambanova.py +98 -0
- llama_stack/providers/remote/tool_runtime/__init__.py +5 -0
- llama_stack/providers/remote/tool_runtime/bing_search/__init__.py +21 -0
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +112 -0
- llama_stack/providers/remote/tool_runtime/bing_search/config.py +22 -0
- llama_stack/providers/remote/tool_runtime/brave_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +148 -0
- llama_stack/providers/remote/tool_runtime/brave_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py +15 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +20 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +73 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +84 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py +22 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py +21 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +140 -0
- llama_stack/providers/remote/vector_io/__init__.py +5 -0
- llama_stack/providers/remote/vector_io/chroma/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/chroma/chroma.py +215 -0
- llama_stack/providers/remote/vector_io/chroma/config.py +28 -0
- llama_stack/providers/remote/vector_io/milvus/__init__.py +18 -0
- llama_stack/providers/remote/vector_io/milvus/config.py +35 -0
- llama_stack/providers/remote/vector_io/milvus/milvus.py +375 -0
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/pgvector/config.py +47 -0
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +460 -0
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/qdrant/config.py +37 -0
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +265 -0
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/weaviate/config.py +32 -0
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +393 -0
- llama_stack/providers/utils/bedrock/__init__.py +5 -0
- llama_stack/providers/utils/bedrock/client.py +74 -0
- llama_stack/providers/utils/bedrock/config.py +64 -0
- llama_stack/providers/utils/bedrock/refreshable_boto_session.py +112 -0
- llama_stack/providers/utils/common/__init__.py +5 -0
- llama_stack/providers/utils/common/data_schema_validator.py +103 -0
- llama_stack/providers/utils/datasetio/__init__.py +5 -0
- llama_stack/providers/utils/datasetio/url_utils.py +47 -0
- llama_stack/providers/utils/files/__init__.py +5 -0
- llama_stack/providers/utils/files/form_data.py +69 -0
- llama_stack/providers/utils/inference/__init__.py +8 -7
- llama_stack/providers/utils/inference/embedding_mixin.py +101 -0
- llama_stack/providers/utils/inference/inference_store.py +264 -0
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +336 -0
- llama_stack/providers/utils/inference/model_registry.py +173 -23
- llama_stack/providers/utils/inference/openai_compat.py +1261 -49
- llama_stack/providers/utils/inference/openai_mixin.py +506 -0
- llama_stack/providers/utils/inference/prompt_adapter.py +365 -67
- llama_stack/providers/utils/kvstore/api.py +6 -6
- llama_stack/providers/utils/kvstore/config.py +28 -48
- llama_stack/providers/utils/kvstore/kvstore.py +61 -15
- llama_stack/providers/utils/kvstore/mongodb/__init__.py +9 -0
- llama_stack/providers/utils/kvstore/mongodb/mongodb.py +82 -0
- llama_stack/providers/utils/kvstore/postgres/__init__.py +7 -0
- llama_stack/providers/utils/kvstore/postgres/postgres.py +114 -0
- llama_stack/providers/utils/kvstore/redis/redis.py +33 -9
- llama_stack/providers/utils/kvstore/sqlite/config.py +2 -1
- llama_stack/providers/utils/kvstore/sqlite/sqlite.py +123 -22
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +1304 -0
- llama_stack/providers/utils/memory/vector_store.py +220 -82
- llama_stack/providers/utils/pagination.py +43 -0
- llama_stack/providers/utils/responses/__init__.py +5 -0
- llama_stack/providers/utils/responses/responses_store.py +292 -0
- llama_stack/providers/utils/scheduler.py +270 -0
- llama_stack/providers/utils/scoring/__init__.py +5 -0
- llama_stack/providers/utils/scoring/aggregation_utils.py +75 -0
- llama_stack/providers/utils/scoring/base_scoring_fn.py +114 -0
- llama_stack/providers/utils/scoring/basic_scoring_utils.py +26 -0
- llama_stack/providers/utils/sqlstore/__init__.py +5 -0
- llama_stack/providers/utils/sqlstore/api.py +128 -0
- llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +319 -0
- llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +343 -0
- llama_stack/providers/utils/sqlstore/sqlstore.py +70 -0
- llama_stack/providers/utils/telemetry/trace_protocol.py +142 -0
- llama_stack/providers/utils/telemetry/tracing.py +192 -53
- llama_stack/providers/utils/tools/__init__.py +5 -0
- llama_stack/providers/utils/tools/mcp.py +148 -0
- llama_stack/providers/utils/tools/ttl_dict.py +70 -0
- llama_stack/providers/utils/vector_io/__init__.py +5 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +156 -0
- llama_stack/schema_utils.py +118 -0
- llama_stack/strong_typing/__init__.py +19 -0
- llama_stack/strong_typing/auxiliary.py +228 -0
- llama_stack/strong_typing/classdef.py +440 -0
- llama_stack/strong_typing/core.py +46 -0
- llama_stack/strong_typing/deserializer.py +877 -0
- llama_stack/strong_typing/docstring.py +409 -0
- llama_stack/strong_typing/exception.py +23 -0
- llama_stack/strong_typing/inspection.py +1085 -0
- llama_stack/strong_typing/mapping.py +40 -0
- llama_stack/strong_typing/name.py +182 -0
- llama_stack/strong_typing/py.typed +0 -0
- llama_stack/strong_typing/schema.py +792 -0
- llama_stack/strong_typing/serialization.py +97 -0
- llama_stack/strong_typing/serializer.py +500 -0
- llama_stack/strong_typing/slots.py +27 -0
- llama_stack/strong_typing/topological.py +89 -0
- llama_stack/testing/__init__.py +5 -0
- llama_stack/testing/api_recorder.py +956 -0
- llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
- llama_stack-0.3.4.dist-info/METADATA +261 -0
- llama_stack-0.3.4.dist-info/RECORD +625 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/WHEEL +1 -1
- llama_stack/apis/agents/client.py +0 -292
- llama_stack/apis/agents/event_logger.py +0 -184
- llama_stack/apis/batch_inference/batch_inference.py +0 -72
- llama_stack/apis/common/deployment_types.py +0 -31
- llama_stack/apis/dataset/dataset.py +0 -63
- llama_stack/apis/evals/evals.py +0 -122
- llama_stack/apis/inference/client.py +0 -197
- llama_stack/apis/inspect/client.py +0 -82
- llama_stack/apis/memory/client.py +0 -155
- llama_stack/apis/memory/memory.py +0 -65
- llama_stack/apis/memory_banks/__init__.py +0 -7
- llama_stack/apis/memory_banks/client.py +0 -101
- llama_stack/apis/memory_banks/memory_banks.py +0 -78
- llama_stack/apis/models/client.py +0 -83
- llama_stack/apis/reward_scoring/__init__.py +0 -7
- llama_stack/apis/reward_scoring/reward_scoring.py +0 -55
- llama_stack/apis/safety/client.py +0 -105
- llama_stack/apis/shields/client.py +0 -79
- llama_stack/cli/download.py +0 -340
- llama_stack/cli/model/describe.py +0 -82
- llama_stack/cli/model/download.py +0 -24
- llama_stack/cli/model/list.py +0 -62
- llama_stack/cli/model/model.py +0 -34
- llama_stack/cli/model/prompt_format.py +0 -112
- llama_stack/cli/model/safety_models.py +0 -52
- llama_stack/cli/stack/build.py +0 -299
- llama_stack/cli/stack/configure.py +0 -178
- llama_stack/distribution/build.py +0 -123
- llama_stack/distribution/build_conda_env.sh +0 -136
- llama_stack/distribution/build_container.sh +0 -142
- llama_stack/distribution/common.sh +0 -40
- llama_stack/distribution/configure_container.sh +0 -47
- llama_stack/distribution/datatypes.py +0 -139
- llama_stack/distribution/distribution.py +0 -58
- llama_stack/distribution/inspect.py +0 -67
- llama_stack/distribution/request_headers.py +0 -57
- llama_stack/distribution/resolver.py +0 -323
- llama_stack/distribution/routers/__init__.py +0 -48
- llama_stack/distribution/routers/routers.py +0 -158
- llama_stack/distribution/routers/routing_tables.py +0 -173
- llama_stack/distribution/server/endpoints.py +0 -48
- llama_stack/distribution/server/server.py +0 -343
- llama_stack/distribution/start_conda_env.sh +0 -42
- llama_stack/distribution/start_container.sh +0 -64
- llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml +0 -10
- llama_stack/distribution/templates/local-build.yaml +0 -10
- llama_stack/distribution/templates/local-databricks-build.yaml +0 -10
- llama_stack/distribution/templates/local-fireworks-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-endpoint-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-serverless-build.yaml +0 -10
- llama_stack/distribution/templates/local-ollama-build.yaml +0 -10
- llama_stack/distribution/templates/local-tgi-build.yaml +0 -10
- llama_stack/distribution/templates/local-together-build.yaml +0 -10
- llama_stack/distribution/templates/local-vllm-build.yaml +0 -10
- llama_stack/distribution/utils/exec.py +0 -105
- llama_stack/providers/adapters/agents/sample/sample.py +0 -18
- llama_stack/providers/adapters/inference/bedrock/bedrock.py +0 -451
- llama_stack/providers/adapters/inference/bedrock/config.py +0 -55
- llama_stack/providers/adapters/inference/databricks/config.py +0 -21
- llama_stack/providers/adapters/inference/databricks/databricks.py +0 -125
- llama_stack/providers/adapters/inference/fireworks/config.py +0 -20
- llama_stack/providers/adapters/inference/fireworks/fireworks.py +0 -130
- llama_stack/providers/adapters/inference/ollama/__init__.py +0 -19
- llama_stack/providers/adapters/inference/ollama/ollama.py +0 -175
- llama_stack/providers/adapters/inference/sample/sample.py +0 -23
- llama_stack/providers/adapters/inference/tgi/config.py +0 -43
- llama_stack/providers/adapters/inference/tgi/tgi.py +0 -200
- llama_stack/providers/adapters/inference/together/config.py +0 -22
- llama_stack/providers/adapters/inference/together/together.py +0 -143
- llama_stack/providers/adapters/memory/chroma/chroma.py +0 -157
- llama_stack/providers/adapters/memory/pgvector/config.py +0 -17
- llama_stack/providers/adapters/memory/pgvector/pgvector.py +0 -211
- llama_stack/providers/adapters/memory/sample/sample.py +0 -23
- llama_stack/providers/adapters/memory/weaviate/__init__.py +0 -15
- llama_stack/providers/adapters/memory/weaviate/weaviate.py +0 -190
- llama_stack/providers/adapters/safety/bedrock/bedrock.py +0 -113
- llama_stack/providers/adapters/safety/bedrock/config.py +0 -16
- llama_stack/providers/adapters/safety/sample/sample.py +0 -23
- llama_stack/providers/adapters/safety/together/__init__.py +0 -18
- llama_stack/providers/adapters/safety/together/config.py +0 -26
- llama_stack/providers/adapters/safety/together/together.py +0 -101
- llama_stack/providers/adapters/telemetry/opentelemetry/config.py +0 -12
- llama_stack/providers/adapters/telemetry/opentelemetry/opentelemetry.py +0 -201
- llama_stack/providers/adapters/telemetry/sample/__init__.py +0 -17
- llama_stack/providers/adapters/telemetry/sample/config.py +0 -12
- llama_stack/providers/adapters/telemetry/sample/sample.py +0 -18
- llama_stack/providers/impls/meta_reference/agents/agent_instance.py +0 -844
- llama_stack/providers/impls/meta_reference/agents/agents.py +0 -161
- llama_stack/providers/impls/meta_reference/agents/persistence.py +0 -84
- llama_stack/providers/impls/meta_reference/agents/rag/context_retriever.py +0 -74
- llama_stack/providers/impls/meta_reference/agents/safety.py +0 -57
- llama_stack/providers/impls/meta_reference/agents/tests/code_execution.py +0 -93
- llama_stack/providers/impls/meta_reference/agents/tests/test_chat_agent.py +0 -305
- llama_stack/providers/impls/meta_reference/agents/tools/base.py +0 -20
- llama_stack/providers/impls/meta_reference/agents/tools/builtin.py +0 -375
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_env_prefix.py +0 -133
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_execution.py +0 -256
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py +0 -87
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/utils.py +0 -21
- llama_stack/providers/impls/meta_reference/agents/tools/safety.py +0 -43
- llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py +0 -58
- llama_stack/providers/impls/meta_reference/inference/config.py +0 -45
- llama_stack/providers/impls/meta_reference/inference/generation.py +0 -376
- llama_stack/providers/impls/meta_reference/inference/inference.py +0 -280
- llama_stack/providers/impls/meta_reference/inference/model_parallel.py +0 -99
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_impls.py +0 -184
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_txest_disabled.py +0 -76
- llama_stack/providers/impls/meta_reference/inference/quantization/loader.py +0 -97
- llama_stack/providers/impls/meta_reference/inference/quantization/scripts/quantize_checkpoint.py +0 -161
- llama_stack/providers/impls/meta_reference/memory/__init__.py +0 -19
- llama_stack/providers/impls/meta_reference/memory/faiss.py +0 -113
- llama_stack/providers/impls/meta_reference/safety/__init__.py +0 -17
- llama_stack/providers/impls/meta_reference/safety/base.py +0 -57
- llama_stack/providers/impls/meta_reference/safety/config.py +0 -48
- llama_stack/providers/impls/meta_reference/safety/llama_guard.py +0 -268
- llama_stack/providers/impls/meta_reference/safety/prompt_guard.py +0 -145
- llama_stack/providers/impls/meta_reference/safety/safety.py +0 -112
- llama_stack/providers/impls/meta_reference/telemetry/console.py +0 -89
- llama_stack/providers/impls/vllm/config.py +0 -35
- llama_stack/providers/impls/vllm/vllm.py +0 -241
- llama_stack/providers/registry/memory.py +0 -78
- llama_stack/providers/registry/telemetry.py +0 -44
- llama_stack/providers/tests/agents/test_agents.py +0 -210
- llama_stack/providers/tests/inference/test_inference.py +0 -257
- llama_stack/providers/tests/inference/test_prompt_adapter.py +0 -126
- llama_stack/providers/tests/memory/test_memory.py +0 -136
- llama_stack/providers/tests/resolver.py +0 -100
- llama_stack/providers/tests/safety/test_safety.py +0 -77
- llama_stack-0.0.42.dist-info/METADATA +0 -137
- llama_stack-0.0.42.dist-info/RECORD +0 -256
- /llama_stack/{distribution → core}/__init__.py +0 -0
- /llama_stack/{distribution/server → core/access_control}/__init__.py +0 -0
- /llama_stack/{distribution/utils → core/conversations}/__init__.py +0 -0
- /llama_stack/{providers/adapters → core/prompts}/__init__.py +0 -0
- /llama_stack/{providers/adapters/agents → core/routing_tables}/__init__.py +0 -0
- /llama_stack/{providers/adapters/inference → core/server}/__init__.py +0 -0
- /llama_stack/{providers/adapters/memory → core/storage}/__init__.py +0 -0
- /llama_stack/{providers/adapters/safety → core/ui}/__init__.py +0 -0
- /llama_stack/{providers/adapters/telemetry → core/ui/modules}/__init__.py +0 -0
- /llama_stack/{providers/impls → core/ui/page}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference → core/ui/page/distribution}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/rag → core/ui/page/evaluations}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tests → core/ui/page/playground}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools → core/utils}/__init__.py +0 -0
- /llama_stack/{distribution → core}/utils/dynamic.py +0 -0
- /llama_stack/{distribution → core}/utils/serialize.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools/ipython_tool → distributions}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization → models}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization/scripts → models/llama}/__init__.py +0 -0
- /llama_stack/{providers/tests → models/llama/llama3}/__init__.py +0 -0
- /llama_stack/{providers/tests/agents → models/llama/llama3/quantization}/__init__.py +0 -0
- /llama_stack/{providers/tests/inference → models/llama/llama3_2}/__init__.py +0 -0
- /llama_stack/{providers/tests/memory → models/llama/llama3_3}/__init__.py +0 -0
- /llama_stack/{providers/tests/safety → models/llama/llama4}/__init__.py +0 -0
- /llama_stack/{scripts → models/llama/llama4/prompt_templates}/__init__.py +0 -0
- /llama_stack/providers/{adapters → remote}/safety/bedrock/__init__.py +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info/licenses}/LICENSE +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from enum import Enum, StrEnum
|
|
8
|
+
from typing import Annotated, Any, Literal, Protocol
|
|
9
|
+
|
|
10
|
+
from pydantic import BaseModel, Field
|
|
11
|
+
|
|
12
|
+
from llama_stack.apis.resource import Resource, ResourceType
|
|
13
|
+
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1BETA
|
|
14
|
+
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class DatasetPurpose(StrEnum):
|
|
18
|
+
"""
|
|
19
|
+
Purpose of the dataset. Each purpose has a required input data schema.
|
|
20
|
+
|
|
21
|
+
:cvar post-training/messages: The dataset contains messages used for post-training.
|
|
22
|
+
{
|
|
23
|
+
"messages": [
|
|
24
|
+
{"role": "user", "content": "Hello, world!"},
|
|
25
|
+
{"role": "assistant", "content": "Hello, world!"},
|
|
26
|
+
]
|
|
27
|
+
}
|
|
28
|
+
:cvar eval/question-answer: The dataset contains a question column and an answer column.
|
|
29
|
+
{
|
|
30
|
+
"question": "What is the capital of France?",
|
|
31
|
+
"answer": "Paris"
|
|
32
|
+
}
|
|
33
|
+
:cvar eval/messages-answer: The dataset contains a messages column with list of messages and an answer column.
|
|
34
|
+
{
|
|
35
|
+
"messages": [
|
|
36
|
+
{"role": "user", "content": "Hello, my name is John Doe."},
|
|
37
|
+
{"role": "assistant", "content": "Hello, John Doe. How can I help you today?"},
|
|
38
|
+
{"role": "user", "content": "What's my name?"},
|
|
39
|
+
],
|
|
40
|
+
"answer": "John Doe"
|
|
41
|
+
}
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
post_training_messages = "post-training/messages"
|
|
45
|
+
eval_question_answer = "eval/question-answer"
|
|
46
|
+
eval_messages_answer = "eval/messages-answer"
|
|
47
|
+
|
|
48
|
+
# TODO: add more schemas here
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class DatasetType(Enum):
|
|
52
|
+
"""
|
|
53
|
+
Type of the dataset source.
|
|
54
|
+
:cvar uri: The dataset can be obtained from a URI.
|
|
55
|
+
:cvar rows: The dataset is stored in rows.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
uri = "uri"
|
|
59
|
+
rows = "rows"
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@json_schema_type
|
|
63
|
+
class URIDataSource(BaseModel):
|
|
64
|
+
"""A dataset that can be obtained from a URI.
|
|
65
|
+
:param uri: The dataset can be obtained from a URI. E.g.
|
|
66
|
+
- "https://mywebsite.com/mydata.jsonl"
|
|
67
|
+
- "lsfs://mydata.jsonl"
|
|
68
|
+
- "data:csv;base64,{base64_content}"
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
type: Literal["uri"] = "uri"
|
|
72
|
+
uri: str
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@json_schema_type
|
|
76
|
+
class RowsDataSource(BaseModel):
|
|
77
|
+
"""A dataset stored in rows.
|
|
78
|
+
:param rows: The dataset is stored in rows. E.g.
|
|
79
|
+
- [
|
|
80
|
+
{"messages": [{"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}]}
|
|
81
|
+
]
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
type: Literal["rows"] = "rows"
|
|
85
|
+
rows: list[dict[str, Any]]
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
DataSource = Annotated[
|
|
89
|
+
URIDataSource | RowsDataSource,
|
|
90
|
+
Field(discriminator="type"),
|
|
91
|
+
]
|
|
92
|
+
register_schema(DataSource, name="DataSource")
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class CommonDatasetFields(BaseModel):
|
|
96
|
+
"""
|
|
97
|
+
Common fields for a dataset.
|
|
98
|
+
|
|
99
|
+
:param purpose: Purpose of the dataset indicating its intended use
|
|
100
|
+
:param source: Data source configuration for the dataset
|
|
101
|
+
:param metadata: Additional metadata for the dataset
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
purpose: DatasetPurpose
|
|
105
|
+
source: DataSource
|
|
106
|
+
metadata: dict[str, Any] = Field(
|
|
107
|
+
default_factory=dict,
|
|
108
|
+
description="Any additional metadata for this dataset",
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@json_schema_type
|
|
113
|
+
class Dataset(CommonDatasetFields, Resource):
|
|
114
|
+
"""Dataset resource for storing and accessing training or evaluation data.
|
|
115
|
+
|
|
116
|
+
:param type: Type of resource, always 'dataset' for datasets
|
|
117
|
+
"""
|
|
118
|
+
|
|
119
|
+
type: Literal[ResourceType.dataset] = ResourceType.dataset
|
|
120
|
+
|
|
121
|
+
@property
|
|
122
|
+
def dataset_id(self) -> str:
|
|
123
|
+
return self.identifier
|
|
124
|
+
|
|
125
|
+
@property
|
|
126
|
+
def provider_dataset_id(self) -> str | None:
|
|
127
|
+
return self.provider_resource_id
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class DatasetInput(CommonDatasetFields, BaseModel):
|
|
131
|
+
"""Input parameters for dataset operations.
|
|
132
|
+
|
|
133
|
+
:param dataset_id: Unique identifier for the dataset
|
|
134
|
+
"""
|
|
135
|
+
|
|
136
|
+
dataset_id: str
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class ListDatasetsResponse(BaseModel):
|
|
140
|
+
"""Response from listing datasets.
|
|
141
|
+
|
|
142
|
+
:param data: List of datasets
|
|
143
|
+
"""
|
|
144
|
+
|
|
145
|
+
data: list[Dataset]
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
class Datasets(Protocol):
|
|
149
|
+
@webmethod(route="/datasets", method="POST", deprecated=True, level=LLAMA_STACK_API_V1)
|
|
150
|
+
@webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1BETA)
|
|
151
|
+
async def register_dataset(
|
|
152
|
+
self,
|
|
153
|
+
purpose: DatasetPurpose,
|
|
154
|
+
source: DataSource,
|
|
155
|
+
metadata: dict[str, Any] | None = None,
|
|
156
|
+
dataset_id: str | None = None,
|
|
157
|
+
) -> Dataset:
|
|
158
|
+
"""
|
|
159
|
+
Register a new dataset.
|
|
160
|
+
|
|
161
|
+
:param purpose: The purpose of the dataset.
|
|
162
|
+
One of:
|
|
163
|
+
- "post-training/messages": The dataset contains a messages column with list of messages for post-training.
|
|
164
|
+
{
|
|
165
|
+
"messages": [
|
|
166
|
+
{"role": "user", "content": "Hello, world!"},
|
|
167
|
+
{"role": "assistant", "content": "Hello, world!"},
|
|
168
|
+
]
|
|
169
|
+
}
|
|
170
|
+
- "eval/question-answer": The dataset contains a question column and an answer column for evaluation.
|
|
171
|
+
{
|
|
172
|
+
"question": "What is the capital of France?",
|
|
173
|
+
"answer": "Paris"
|
|
174
|
+
}
|
|
175
|
+
- "eval/messages-answer": The dataset contains a messages column with list of messages and an answer column for evaluation.
|
|
176
|
+
{
|
|
177
|
+
"messages": [
|
|
178
|
+
{"role": "user", "content": "Hello, my name is John Doe."},
|
|
179
|
+
{"role": "assistant", "content": "Hello, John Doe. How can I help you today?"},
|
|
180
|
+
{"role": "user", "content": "What's my name?"},
|
|
181
|
+
],
|
|
182
|
+
"answer": "John Doe"
|
|
183
|
+
}
|
|
184
|
+
:param source: The data source of the dataset. Ensure that the data source schema is compatible with the purpose of the dataset. Examples:
|
|
185
|
+
- {
|
|
186
|
+
"type": "uri",
|
|
187
|
+
"uri": "https://mywebsite.com/mydata.jsonl"
|
|
188
|
+
}
|
|
189
|
+
- {
|
|
190
|
+
"type": "uri",
|
|
191
|
+
"uri": "lsfs://mydata.jsonl"
|
|
192
|
+
}
|
|
193
|
+
- {
|
|
194
|
+
"type": "uri",
|
|
195
|
+
"uri": "data:csv;base64,{base64_content}"
|
|
196
|
+
}
|
|
197
|
+
- {
|
|
198
|
+
"type": "uri",
|
|
199
|
+
"uri": "huggingface://llamastack/simpleqa?split=train"
|
|
200
|
+
}
|
|
201
|
+
- {
|
|
202
|
+
"type": "rows",
|
|
203
|
+
"rows": [
|
|
204
|
+
{
|
|
205
|
+
"messages": [
|
|
206
|
+
{"role": "user", "content": "Hello, world!"},
|
|
207
|
+
{"role": "assistant", "content": "Hello, world!"},
|
|
208
|
+
]
|
|
209
|
+
}
|
|
210
|
+
]
|
|
211
|
+
}
|
|
212
|
+
:param metadata: The metadata for the dataset.
|
|
213
|
+
- E.g. {"description": "My dataset"}.
|
|
214
|
+
:param dataset_id: The ID of the dataset. If not provided, an ID will be generated.
|
|
215
|
+
:returns: A Dataset.
|
|
216
|
+
"""
|
|
217
|
+
...
|
|
218
|
+
|
|
219
|
+
@webmethod(route="/datasets/{dataset_id:path}", method="GET", deprecated=True, level=LLAMA_STACK_API_V1)
|
|
220
|
+
@webmethod(route="/datasets/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1BETA)
|
|
221
|
+
async def get_dataset(
|
|
222
|
+
self,
|
|
223
|
+
dataset_id: str,
|
|
224
|
+
) -> Dataset:
|
|
225
|
+
"""Get a dataset by its ID.
|
|
226
|
+
|
|
227
|
+
:param dataset_id: The ID of the dataset to get.
|
|
228
|
+
:returns: A Dataset.
|
|
229
|
+
"""
|
|
230
|
+
...
|
|
231
|
+
|
|
232
|
+
@webmethod(route="/datasets", method="GET", deprecated=True, level=LLAMA_STACK_API_V1)
|
|
233
|
+
@webmethod(route="/datasets", method="GET", level=LLAMA_STACK_API_V1BETA)
|
|
234
|
+
async def list_datasets(self) -> ListDatasetsResponse:
|
|
235
|
+
"""List all datasets.
|
|
236
|
+
|
|
237
|
+
:returns: A ListDatasetsResponse.
|
|
238
|
+
"""
|
|
239
|
+
...
|
|
240
|
+
|
|
241
|
+
@webmethod(route="/datasets/{dataset_id:path}", method="DELETE", deprecated=True, level=LLAMA_STACK_API_V1)
|
|
242
|
+
@webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1BETA)
|
|
243
|
+
async def unregister_dataset(
|
|
244
|
+
self,
|
|
245
|
+
dataset_id: str,
|
|
246
|
+
) -> None:
|
|
247
|
+
"""Unregister a dataset by its ID.
|
|
248
|
+
|
|
249
|
+
:param dataset_id: The ID of the dataset to unregister.
|
|
250
|
+
"""
|
|
251
|
+
...
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from enum import Enum, EnumMeta
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel, Field
|
|
10
|
+
|
|
11
|
+
from llama_stack.schema_utils import json_schema_type
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class DynamicApiMeta(EnumMeta):
|
|
15
|
+
def __new__(cls, name, bases, namespace):
|
|
16
|
+
# Store the original enum values
|
|
17
|
+
original_values = {k: v for k, v in namespace.items() if not k.startswith("_")}
|
|
18
|
+
|
|
19
|
+
# Create the enum class
|
|
20
|
+
cls = super().__new__(cls, name, bases, namespace)
|
|
21
|
+
|
|
22
|
+
# Store the original values for reference
|
|
23
|
+
cls._original_values = original_values
|
|
24
|
+
# Initialize _dynamic_values
|
|
25
|
+
cls._dynamic_values = {}
|
|
26
|
+
|
|
27
|
+
return cls
|
|
28
|
+
|
|
29
|
+
def __call__(cls, value):
|
|
30
|
+
try:
|
|
31
|
+
return super().__call__(value)
|
|
32
|
+
except ValueError as e:
|
|
33
|
+
# If this value was already dynamically added, return it
|
|
34
|
+
if value in cls._dynamic_values:
|
|
35
|
+
return cls._dynamic_values[value]
|
|
36
|
+
|
|
37
|
+
# If the value doesn't exist, create a new enum member
|
|
38
|
+
# Create a new member name from the value
|
|
39
|
+
member_name = value.lower().replace("-", "_")
|
|
40
|
+
|
|
41
|
+
# If this member name already exists in the enum, return the existing member
|
|
42
|
+
if member_name in cls._member_map_:
|
|
43
|
+
return cls._member_map_[member_name]
|
|
44
|
+
|
|
45
|
+
# Instead of creating a new member, raise ValueError to force users to use Api.add() to
|
|
46
|
+
# register new APIs explicitly
|
|
47
|
+
raise ValueError(f"API '{value}' does not exist. Use Api.add() to register new APIs.") from e
|
|
48
|
+
|
|
49
|
+
def __iter__(cls):
|
|
50
|
+
# Allow iteration over both static and dynamic members
|
|
51
|
+
yield from super().__iter__()
|
|
52
|
+
if hasattr(cls, "_dynamic_values"):
|
|
53
|
+
yield from cls._dynamic_values.values()
|
|
54
|
+
|
|
55
|
+
def add(cls, value):
|
|
56
|
+
"""
|
|
57
|
+
Add a new API to the enum.
|
|
58
|
+
Used to register external APIs.
|
|
59
|
+
"""
|
|
60
|
+
member_name = value.lower().replace("-", "_")
|
|
61
|
+
|
|
62
|
+
# If this member name already exists in the enum, return it
|
|
63
|
+
if member_name in cls._member_map_:
|
|
64
|
+
return cls._member_map_[member_name]
|
|
65
|
+
|
|
66
|
+
# Create a new enum member
|
|
67
|
+
member = object.__new__(cls)
|
|
68
|
+
member._name_ = member_name
|
|
69
|
+
member._value_ = value
|
|
70
|
+
|
|
71
|
+
# Add it to the enum class
|
|
72
|
+
cls._member_map_[member_name] = member
|
|
73
|
+
cls._member_names_.append(member_name)
|
|
74
|
+
cls._member_type_ = str
|
|
75
|
+
|
|
76
|
+
# Store it in our dynamic values
|
|
77
|
+
cls._dynamic_values[value] = member
|
|
78
|
+
|
|
79
|
+
return member
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@json_schema_type
|
|
83
|
+
class Api(Enum, metaclass=DynamicApiMeta):
|
|
84
|
+
"""Enumeration of all available APIs in the Llama Stack system.
|
|
85
|
+
:cvar providers: Provider management and configuration
|
|
86
|
+
:cvar inference: Text generation, chat completions, and embeddings
|
|
87
|
+
:cvar safety: Content moderation and safety shields
|
|
88
|
+
:cvar agents: Agent orchestration and execution
|
|
89
|
+
:cvar batches: Batch processing for asynchronous API requests
|
|
90
|
+
:cvar vector_io: Vector database operations and queries
|
|
91
|
+
:cvar datasetio: Dataset input/output operations
|
|
92
|
+
:cvar scoring: Model output evaluation and scoring
|
|
93
|
+
:cvar eval: Model evaluation and benchmarking framework
|
|
94
|
+
:cvar post_training: Fine-tuning and model training
|
|
95
|
+
:cvar tool_runtime: Tool execution and management
|
|
96
|
+
:cvar telemetry: Observability and system monitoring
|
|
97
|
+
:cvar models: Model metadata and management
|
|
98
|
+
:cvar shields: Safety shield implementations
|
|
99
|
+
:cvar datasets: Dataset creation and management
|
|
100
|
+
:cvar scoring_functions: Scoring function definitions
|
|
101
|
+
:cvar benchmarks: Benchmark suite management
|
|
102
|
+
:cvar tool_groups: Tool group organization
|
|
103
|
+
:cvar files: File storage and management
|
|
104
|
+
:cvar prompts: Prompt versions and management
|
|
105
|
+
:cvar inspect: Built-in system inspection and introspection
|
|
106
|
+
"""
|
|
107
|
+
|
|
108
|
+
providers = "providers"
|
|
109
|
+
inference = "inference"
|
|
110
|
+
safety = "safety"
|
|
111
|
+
agents = "agents"
|
|
112
|
+
batches = "batches"
|
|
113
|
+
vector_io = "vector_io"
|
|
114
|
+
datasetio = "datasetio"
|
|
115
|
+
scoring = "scoring"
|
|
116
|
+
eval = "eval"
|
|
117
|
+
post_training = "post_training"
|
|
118
|
+
tool_runtime = "tool_runtime"
|
|
119
|
+
|
|
120
|
+
telemetry = "telemetry"
|
|
121
|
+
|
|
122
|
+
models = "models"
|
|
123
|
+
shields = "shields"
|
|
124
|
+
vector_stores = "vector_stores" # only used for routing table
|
|
125
|
+
datasets = "datasets"
|
|
126
|
+
scoring_functions = "scoring_functions"
|
|
127
|
+
benchmarks = "benchmarks"
|
|
128
|
+
tool_groups = "tool_groups"
|
|
129
|
+
files = "files"
|
|
130
|
+
prompts = "prompts"
|
|
131
|
+
conversations = "conversations"
|
|
132
|
+
|
|
133
|
+
# built-in API
|
|
134
|
+
inspect = "inspect"
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
@json_schema_type
|
|
138
|
+
class Error(BaseModel):
|
|
139
|
+
"""
|
|
140
|
+
Error response from the API. Roughly follows RFC 7807.
|
|
141
|
+
|
|
142
|
+
:param status: HTTP status code
|
|
143
|
+
:param title: Error title, a short summary of the error which is invariant for an error type
|
|
144
|
+
:param detail: Error detail, a longer human-readable description of the error
|
|
145
|
+
:param instance: (Optional) A URL which can be used to retrieve more information about the specific occurrence of the error
|
|
146
|
+
"""
|
|
147
|
+
|
|
148
|
+
status: int
|
|
149
|
+
title: str
|
|
150
|
+
detail: str
|
|
151
|
+
instance: str | None = None
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class ExternalApiSpec(BaseModel):
|
|
155
|
+
"""Specification for an external API implementation."""
|
|
156
|
+
|
|
157
|
+
module: str = Field(..., description="Python module containing the API implementation")
|
|
158
|
+
name: str = Field(..., description="Name of the API")
|
|
159
|
+
pip_packages: list[str] = Field(default=[], description="List of pip packages to install the API")
|
|
160
|
+
protocol: str = Field(..., description="Name of the protocol class for the API")
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from typing import Annotated, Any, Literal, Protocol
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel, Field
|
|
10
|
+
|
|
11
|
+
from llama_stack.apis.agents import AgentConfig
|
|
12
|
+
from llama_stack.apis.common.job_types import Job
|
|
13
|
+
from llama_stack.apis.inference import SamplingParams, SystemMessage
|
|
14
|
+
from llama_stack.apis.scoring import ScoringResult
|
|
15
|
+
from llama_stack.apis.scoring_functions import ScoringFnParams
|
|
16
|
+
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
|
|
17
|
+
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@json_schema_type
|
|
21
|
+
class ModelCandidate(BaseModel):
|
|
22
|
+
"""A model candidate for evaluation.
|
|
23
|
+
|
|
24
|
+
:param model: The model ID to evaluate.
|
|
25
|
+
:param sampling_params: The sampling parameters for the model.
|
|
26
|
+
:param system_message: (Optional) The system message providing instructions or context to the model.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
type: Literal["model"] = "model"
|
|
30
|
+
model: str
|
|
31
|
+
sampling_params: SamplingParams
|
|
32
|
+
system_message: SystemMessage | None = None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@json_schema_type
|
|
36
|
+
class AgentCandidate(BaseModel):
|
|
37
|
+
"""An agent candidate for evaluation.
|
|
38
|
+
|
|
39
|
+
:param config: The configuration for the agent candidate.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
type: Literal["agent"] = "agent"
|
|
43
|
+
config: AgentConfig
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
EvalCandidate = Annotated[ModelCandidate | AgentCandidate, Field(discriminator="type")]
|
|
47
|
+
register_schema(EvalCandidate, name="EvalCandidate")
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@json_schema_type
|
|
51
|
+
class BenchmarkConfig(BaseModel):
|
|
52
|
+
"""A benchmark configuration for evaluation.
|
|
53
|
+
|
|
54
|
+
:param eval_candidate: The candidate to evaluate.
|
|
55
|
+
:param scoring_params: Map between scoring function id and parameters for each scoring function you want to run
|
|
56
|
+
:param num_examples: (Optional) The number of examples to evaluate. If not provided, all examples in the dataset will be evaluated
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
eval_candidate: EvalCandidate
|
|
60
|
+
scoring_params: dict[str, ScoringFnParams] = Field(
|
|
61
|
+
description="Map between scoring function id and parameters for each scoring function you want to run",
|
|
62
|
+
default_factory=dict,
|
|
63
|
+
)
|
|
64
|
+
num_examples: int | None = Field(
|
|
65
|
+
description="Number of examples to evaluate (useful for testing), if not provided, all examples in the dataset will be evaluated",
|
|
66
|
+
default=None,
|
|
67
|
+
)
|
|
68
|
+
# we could optinally add any specific dataset config here
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@json_schema_type
|
|
72
|
+
class EvaluateResponse(BaseModel):
|
|
73
|
+
"""The response from an evaluation.
|
|
74
|
+
|
|
75
|
+
:param generations: The generations from the evaluation.
|
|
76
|
+
:param scores: The scores from the evaluation.
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
generations: list[dict[str, Any]]
|
|
80
|
+
# each key in the dict is a scoring function name
|
|
81
|
+
scores: dict[str, ScoringResult]
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class Eval(Protocol):
|
|
85
|
+
"""Evaluations
|
|
86
|
+
|
|
87
|
+
Llama Stack Evaluation API for running evaluations on model and agent candidates."""
|
|
88
|
+
|
|
89
|
+
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
|
90
|
+
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
|
91
|
+
async def run_eval(
|
|
92
|
+
self,
|
|
93
|
+
benchmark_id: str,
|
|
94
|
+
benchmark_config: BenchmarkConfig,
|
|
95
|
+
) -> Job:
|
|
96
|
+
"""Run an evaluation on a benchmark.
|
|
97
|
+
|
|
98
|
+
:param benchmark_id: The ID of the benchmark to run the evaluation on.
|
|
99
|
+
:param benchmark_config: The configuration for the benchmark.
|
|
100
|
+
:returns: The job that was created to run the evaluation.
|
|
101
|
+
"""
|
|
102
|
+
...
|
|
103
|
+
|
|
104
|
+
@webmethod(
|
|
105
|
+
route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True
|
|
106
|
+
)
|
|
107
|
+
@webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
|
108
|
+
async def evaluate_rows(
|
|
109
|
+
self,
|
|
110
|
+
benchmark_id: str,
|
|
111
|
+
input_rows: list[dict[str, Any]],
|
|
112
|
+
scoring_functions: list[str],
|
|
113
|
+
benchmark_config: BenchmarkConfig,
|
|
114
|
+
) -> EvaluateResponse:
|
|
115
|
+
"""Evaluate a list of rows on a benchmark.
|
|
116
|
+
|
|
117
|
+
:param benchmark_id: The ID of the benchmark to run the evaluation on.
|
|
118
|
+
:param input_rows: The rows to evaluate.
|
|
119
|
+
:param scoring_functions: The scoring functions to use for the evaluation.
|
|
120
|
+
:param benchmark_config: The configuration for the benchmark.
|
|
121
|
+
:returns: EvaluateResponse object containing generations and scores.
|
|
122
|
+
"""
|
|
123
|
+
...
|
|
124
|
+
|
|
125
|
+
@webmethod(
|
|
126
|
+
route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
|
|
127
|
+
)
|
|
128
|
+
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
|
129
|
+
async def job_status(self, benchmark_id: str, job_id: str) -> Job:
|
|
130
|
+
"""Get the status of a job.
|
|
131
|
+
|
|
132
|
+
:param benchmark_id: The ID of the benchmark to run the evaluation on.
|
|
133
|
+
:param job_id: The ID of the job to get the status of.
|
|
134
|
+
:returns: The status of the evaluation job.
|
|
135
|
+
"""
|
|
136
|
+
...
|
|
137
|
+
|
|
138
|
+
@webmethod(
|
|
139
|
+
route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
|
|
140
|
+
method="DELETE",
|
|
141
|
+
level=LLAMA_STACK_API_V1,
|
|
142
|
+
deprecated=True,
|
|
143
|
+
)
|
|
144
|
+
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
|
|
145
|
+
async def job_cancel(self, benchmark_id: str, job_id: str) -> None:
|
|
146
|
+
"""Cancel a job.
|
|
147
|
+
|
|
148
|
+
:param benchmark_id: The ID of the benchmark to run the evaluation on.
|
|
149
|
+
:param job_id: The ID of the job to cancel.
|
|
150
|
+
"""
|
|
151
|
+
...
|
|
152
|
+
|
|
153
|
+
@webmethod(
|
|
154
|
+
route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result",
|
|
155
|
+
method="GET",
|
|
156
|
+
level=LLAMA_STACK_API_V1,
|
|
157
|
+
deprecated=True,
|
|
158
|
+
)
|
|
159
|
+
@webmethod(
|
|
160
|
+
route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET", level=LLAMA_STACK_API_V1ALPHA
|
|
161
|
+
)
|
|
162
|
+
async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse:
|
|
163
|
+
"""Get the result of a job.
|
|
164
|
+
|
|
165
|
+
:param benchmark_id: The ID of the benchmark to run the evaluation on.
|
|
166
|
+
:param job_id: The ID of the job to get the result of.
|
|
167
|
+
:returns: The result of the job.
|
|
168
|
+
"""
|
|
169
|
+
...
|