llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +5 -0
- llama_stack/apis/agents/__init__.py +1 -1
- llama_stack/apis/agents/agents.py +700 -281
- llama_stack/apis/agents/openai_responses.py +1311 -0
- llama_stack/{providers/adapters/memory/sample/config.py → apis/batches/__init__.py} +2 -5
- llama_stack/apis/batches/batches.py +100 -0
- llama_stack/apis/benchmarks/__init__.py +7 -0
- llama_stack/apis/benchmarks/benchmarks.py +108 -0
- llama_stack/apis/common/content_types.py +143 -0
- llama_stack/apis/common/errors.py +103 -0
- llama_stack/apis/common/job_types.py +38 -0
- llama_stack/apis/common/responses.py +36 -0
- llama_stack/apis/common/training_types.py +36 -5
- llama_stack/apis/common/type_system.py +158 -0
- llama_stack/apis/conversations/__init__.py +31 -0
- llama_stack/apis/conversations/conversations.py +286 -0
- llama_stack/apis/datasetio/__init__.py +7 -0
- llama_stack/apis/datasetio/datasetio.py +59 -0
- llama_stack/apis/datasets/__init__.py +7 -0
- llama_stack/apis/datasets/datasets.py +251 -0
- llama_stack/apis/datatypes.py +160 -0
- llama_stack/apis/eval/__init__.py +7 -0
- llama_stack/apis/eval/eval.py +169 -0
- llama_stack/apis/files/__init__.py +7 -0
- llama_stack/apis/files/files.py +199 -0
- llama_stack/apis/inference/__init__.py +1 -1
- llama_stack/apis/inference/inference.py +1169 -113
- llama_stack/apis/inspect/__init__.py +1 -1
- llama_stack/apis/inspect/inspect.py +69 -16
- llama_stack/apis/models/__init__.py +1 -1
- llama_stack/apis/models/models.py +148 -21
- llama_stack/apis/post_training/__init__.py +1 -1
- llama_stack/apis/post_training/post_training.py +265 -120
- llama_stack/{providers/adapters/agents/sample/config.py → apis/prompts/__init__.py} +2 -5
- llama_stack/apis/prompts/prompts.py +204 -0
- llama_stack/apis/providers/__init__.py +7 -0
- llama_stack/apis/providers/providers.py +69 -0
- llama_stack/apis/resource.py +37 -0
- llama_stack/apis/safety/__init__.py +1 -1
- llama_stack/apis/safety/safety.py +95 -12
- llama_stack/apis/scoring/__init__.py +7 -0
- llama_stack/apis/scoring/scoring.py +93 -0
- llama_stack/apis/scoring_functions/__init__.py +7 -0
- llama_stack/apis/scoring_functions/scoring_functions.py +208 -0
- llama_stack/apis/shields/__init__.py +1 -1
- llama_stack/apis/shields/shields.py +76 -33
- llama_stack/apis/synthetic_data_generation/__init__.py +1 -1
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +40 -17
- llama_stack/apis/telemetry/__init__.py +1 -1
- llama_stack/apis/telemetry/telemetry.py +322 -31
- llama_stack/apis/{dataset → tools}/__init__.py +2 -1
- llama_stack/apis/tools/rag_tool.py +218 -0
- llama_stack/apis/tools/tools.py +221 -0
- llama_stack/apis/vector_io/__init__.py +7 -0
- llama_stack/apis/vector_io/vector_io.py +960 -0
- llama_stack/apis/vector_stores/__init__.py +7 -0
- llama_stack/apis/vector_stores/vector_stores.py +51 -0
- llama_stack/apis/version.py +9 -0
- llama_stack/cli/llama.py +13 -5
- llama_stack/cli/stack/_list_deps.py +182 -0
- llama_stack/cli/stack/list_apis.py +1 -1
- llama_stack/cli/stack/list_deps.py +55 -0
- llama_stack/cli/stack/list_providers.py +24 -10
- llama_stack/cli/stack/list_stacks.py +56 -0
- llama_stack/cli/stack/remove.py +115 -0
- llama_stack/cli/stack/run.py +169 -56
- llama_stack/cli/stack/stack.py +18 -4
- llama_stack/cli/stack/utils.py +151 -0
- llama_stack/cli/table.py +23 -61
- llama_stack/cli/utils.py +29 -0
- llama_stack/core/access_control/access_control.py +131 -0
- llama_stack/core/access_control/conditions.py +129 -0
- llama_stack/core/access_control/datatypes.py +107 -0
- llama_stack/core/build.py +164 -0
- llama_stack/core/client.py +205 -0
- llama_stack/core/common.sh +37 -0
- llama_stack/{distribution → core}/configure.py +74 -55
- llama_stack/core/conversations/conversations.py +309 -0
- llama_stack/core/datatypes.py +625 -0
- llama_stack/core/distribution.py +276 -0
- llama_stack/core/external.py +54 -0
- llama_stack/core/id_generation.py +42 -0
- llama_stack/core/inspect.py +86 -0
- llama_stack/core/library_client.py +539 -0
- llama_stack/core/prompts/prompts.py +234 -0
- llama_stack/core/providers.py +137 -0
- llama_stack/core/request_headers.py +115 -0
- llama_stack/core/resolver.py +506 -0
- llama_stack/core/routers/__init__.py +101 -0
- llama_stack/core/routers/datasets.py +73 -0
- llama_stack/core/routers/eval_scoring.py +155 -0
- llama_stack/core/routers/inference.py +645 -0
- llama_stack/core/routers/safety.py +85 -0
- llama_stack/core/routers/tool_runtime.py +91 -0
- llama_stack/core/routers/vector_io.py +442 -0
- llama_stack/core/routing_tables/benchmarks.py +62 -0
- llama_stack/core/routing_tables/common.py +254 -0
- llama_stack/core/routing_tables/datasets.py +91 -0
- llama_stack/core/routing_tables/models.py +163 -0
- llama_stack/core/routing_tables/scoring_functions.py +66 -0
- llama_stack/core/routing_tables/shields.py +61 -0
- llama_stack/core/routing_tables/toolgroups.py +129 -0
- llama_stack/core/routing_tables/vector_stores.py +292 -0
- llama_stack/core/server/auth.py +187 -0
- llama_stack/core/server/auth_providers.py +494 -0
- llama_stack/core/server/quota.py +110 -0
- llama_stack/core/server/routes.py +141 -0
- llama_stack/core/server/server.py +542 -0
- llama_stack/core/server/tracing.py +80 -0
- llama_stack/core/stack.py +546 -0
- llama_stack/core/start_stack.sh +117 -0
- llama_stack/core/storage/datatypes.py +283 -0
- llama_stack/{cli/model → core/store}/__init__.py +1 -1
- llama_stack/core/store/registry.py +199 -0
- llama_stack/core/testing_context.py +49 -0
- llama_stack/core/ui/app.py +55 -0
- llama_stack/core/ui/modules/api.py +32 -0
- llama_stack/core/ui/modules/utils.py +42 -0
- llama_stack/core/ui/page/distribution/datasets.py +18 -0
- llama_stack/core/ui/page/distribution/eval_tasks.py +20 -0
- llama_stack/core/ui/page/distribution/models.py +18 -0
- llama_stack/core/ui/page/distribution/providers.py +27 -0
- llama_stack/core/ui/page/distribution/resources.py +48 -0
- llama_stack/core/ui/page/distribution/scoring_functions.py +18 -0
- llama_stack/core/ui/page/distribution/shields.py +19 -0
- llama_stack/core/ui/page/evaluations/app_eval.py +143 -0
- llama_stack/core/ui/page/evaluations/native_eval.py +253 -0
- llama_stack/core/ui/page/playground/chat.py +130 -0
- llama_stack/core/ui/page/playground/tools.py +352 -0
- llama_stack/core/utils/config.py +30 -0
- llama_stack/{distribution → core}/utils/config_dirs.py +3 -6
- llama_stack/core/utils/config_resolution.py +125 -0
- llama_stack/core/utils/context.py +84 -0
- llama_stack/core/utils/exec.py +96 -0
- llama_stack/{providers/impls/meta_reference/codeshield/config.py → core/utils/image_types.py} +4 -3
- llama_stack/{distribution → core}/utils/model_utils.py +2 -2
- llama_stack/{distribution → core}/utils/prompt_for_config.py +30 -63
- llama_stack/{apis/batch_inference → distributions/dell}/__init__.py +1 -1
- llama_stack/distributions/dell/build.yaml +33 -0
- llama_stack/distributions/dell/dell.py +158 -0
- llama_stack/distributions/dell/run-with-safety.yaml +141 -0
- llama_stack/distributions/dell/run.yaml +132 -0
- llama_stack/distributions/meta-reference-gpu/__init__.py +7 -0
- llama_stack/distributions/meta-reference-gpu/build.yaml +32 -0
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +163 -0
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +154 -0
- llama_stack/distributions/meta-reference-gpu/run.yaml +139 -0
- llama_stack/{apis/evals → distributions/nvidia}/__init__.py +1 -1
- llama_stack/distributions/nvidia/build.yaml +29 -0
- llama_stack/distributions/nvidia/nvidia.py +154 -0
- llama_stack/distributions/nvidia/run-with-safety.yaml +137 -0
- llama_stack/distributions/nvidia/run.yaml +116 -0
- llama_stack/distributions/open-benchmark/__init__.py +7 -0
- llama_stack/distributions/open-benchmark/build.yaml +36 -0
- llama_stack/distributions/open-benchmark/open_benchmark.py +303 -0
- llama_stack/distributions/open-benchmark/run.yaml +252 -0
- llama_stack/distributions/postgres-demo/__init__.py +7 -0
- llama_stack/distributions/postgres-demo/build.yaml +23 -0
- llama_stack/distributions/postgres-demo/postgres_demo.py +125 -0
- llama_stack/distributions/postgres-demo/run.yaml +115 -0
- llama_stack/{apis/memory → distributions/starter}/__init__.py +1 -1
- llama_stack/distributions/starter/build.yaml +61 -0
- llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
- llama_stack/distributions/starter/run.yaml +276 -0
- llama_stack/distributions/starter/starter.py +345 -0
- llama_stack/distributions/starter-gpu/__init__.py +7 -0
- llama_stack/distributions/starter-gpu/build.yaml +61 -0
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
- llama_stack/distributions/starter-gpu/run.yaml +279 -0
- llama_stack/distributions/starter-gpu/starter_gpu.py +20 -0
- llama_stack/distributions/template.py +456 -0
- llama_stack/distributions/watsonx/__init__.py +7 -0
- llama_stack/distributions/watsonx/build.yaml +33 -0
- llama_stack/distributions/watsonx/run.yaml +133 -0
- llama_stack/distributions/watsonx/watsonx.py +95 -0
- llama_stack/env.py +24 -0
- llama_stack/log.py +314 -0
- llama_stack/models/llama/checkpoint.py +164 -0
- llama_stack/models/llama/datatypes.py +164 -0
- llama_stack/models/llama/hadamard_utils.py +86 -0
- llama_stack/models/llama/llama3/args.py +74 -0
- llama_stack/models/llama/llama3/chat_format.py +286 -0
- llama_stack/models/llama/llama3/generation.py +376 -0
- llama_stack/models/llama/llama3/interface.py +255 -0
- llama_stack/models/llama/llama3/model.py +304 -0
- llama_stack/models/llama/llama3/multimodal/__init__.py +12 -0
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +180 -0
- llama_stack/models/llama/llama3/multimodal/image_transform.py +409 -0
- llama_stack/models/llama/llama3/multimodal/model.py +1430 -0
- llama_stack/models/llama/llama3/multimodal/utils.py +26 -0
- llama_stack/models/llama/llama3/prompt_templates/__init__.py +22 -0
- llama_stack/models/llama/llama3/prompt_templates/base.py +39 -0
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +319 -0
- llama_stack/models/llama/llama3/prompt_templates/tool_response.py +62 -0
- llama_stack/models/llama/llama3/quantization/loader.py +316 -0
- llama_stack/models/llama/llama3/template_data.py +116 -0
- llama_stack/models/llama/llama3/tokenizer.model +128000 -0
- llama_stack/models/llama/llama3/tokenizer.py +198 -0
- llama_stack/models/llama/llama3/tool_utils.py +266 -0
- llama_stack/models/llama/llama3_1/__init__.py +12 -0
- llama_stack/models/llama/llama3_1/prompt_format.md +358 -0
- llama_stack/models/llama/llama3_1/prompts.py +258 -0
- llama_stack/models/llama/llama3_2/prompts_text.py +229 -0
- llama_stack/models/llama/llama3_2/prompts_vision.py +126 -0
- llama_stack/models/llama/llama3_2/text_prompt_format.md +286 -0
- llama_stack/models/llama/llama3_2/vision_prompt_format.md +141 -0
- llama_stack/models/llama/llama3_3/prompts.py +259 -0
- llama_stack/models/llama/llama4/args.py +107 -0
- llama_stack/models/llama/llama4/chat_format.py +317 -0
- llama_stack/models/llama/llama4/datatypes.py +56 -0
- llama_stack/models/llama/llama4/ffn.py +58 -0
- llama_stack/models/llama/llama4/generation.py +313 -0
- llama_stack/models/llama/llama4/model.py +437 -0
- llama_stack/models/llama/llama4/moe.py +214 -0
- llama_stack/models/llama/llama4/preprocess.py +435 -0
- llama_stack/models/llama/llama4/prompt_format.md +304 -0
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +136 -0
- llama_stack/models/llama/llama4/prompts.py +279 -0
- llama_stack/models/llama/llama4/quantization/__init__.py +5 -0
- llama_stack/models/llama/llama4/quantization/loader.py +226 -0
- llama_stack/models/llama/llama4/tokenizer.model +200000 -0
- llama_stack/models/llama/llama4/tokenizer.py +263 -0
- llama_stack/models/llama/llama4/vision/__init__.py +5 -0
- llama_stack/models/llama/llama4/vision/embedding.py +210 -0
- llama_stack/models/llama/llama4/vision/encoder.py +412 -0
- llama_stack/models/llama/prompt_format.py +191 -0
- llama_stack/models/llama/quantize_impls.py +316 -0
- llama_stack/models/llama/sku_list.py +1029 -0
- llama_stack/models/llama/sku_types.py +233 -0
- llama_stack/models/llama/tokenizer_utils.py +40 -0
- llama_stack/providers/datatypes.py +136 -107
- llama_stack/providers/inline/__init__.py +5 -0
- llama_stack/providers/inline/agents/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/agents/meta_reference}/__init__.py +12 -5
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +1024 -0
- llama_stack/providers/inline/agents/meta_reference/agents.py +383 -0
- llama_stack/providers/inline/agents/meta_reference/config.py +37 -0
- llama_stack/providers/inline/agents/meta_reference/persistence.py +228 -0
- llama_stack/providers/inline/agents/meta_reference/responses/__init__.py +5 -0
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +423 -0
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +1226 -0
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +449 -0
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +194 -0
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +365 -0
- llama_stack/providers/inline/agents/meta_reference/safety.py +52 -0
- llama_stack/providers/inline/batches/__init__.py +5 -0
- llama_stack/providers/inline/batches/reference/__init__.py +36 -0
- llama_stack/providers/inline/batches/reference/batches.py +679 -0
- llama_stack/providers/inline/batches/reference/config.py +40 -0
- llama_stack/providers/inline/datasetio/__init__.py +5 -0
- llama_stack/providers/inline/datasetio/localfs/__init__.py +20 -0
- llama_stack/providers/inline/datasetio/localfs/config.py +23 -0
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +113 -0
- llama_stack/providers/inline/eval/__init__.py +5 -0
- llama_stack/providers/inline/eval/meta_reference/__init__.py +28 -0
- llama_stack/providers/inline/eval/meta_reference/config.py +23 -0
- llama_stack/providers/inline/eval/meta_reference/eval.py +259 -0
- llama_stack/providers/inline/files/localfs/__init__.py +20 -0
- llama_stack/providers/inline/files/localfs/config.py +31 -0
- llama_stack/providers/inline/files/localfs/files.py +219 -0
- llama_stack/providers/inline/inference/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/__init__.py +4 -4
- llama_stack/providers/inline/inference/meta_reference/common.py +24 -0
- llama_stack/providers/inline/inference/meta_reference/config.py +68 -0
- llama_stack/providers/inline/inference/meta_reference/generators.py +211 -0
- llama_stack/providers/inline/inference/meta_reference/inference.py +158 -0
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +96 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/parallel_utils.py +56 -73
- llama_stack/providers/inline/inference/sentence_transformers/__init__.py +22 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/inference/sentence_transformers}/config.py +6 -4
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +83 -0
- llama_stack/providers/inline/post_training/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/utils.py +35 -0
- llama_stack/providers/inline/post_training/common/validator.py +36 -0
- llama_stack/providers/inline/post_training/huggingface/__init__.py +27 -0
- llama_stack/providers/inline/post_training/huggingface/config.py +83 -0
- llama_stack/providers/inline/post_training/huggingface/post_training.py +208 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +519 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +485 -0
- llama_stack/providers/inline/post_training/huggingface/utils.py +269 -0
- llama_stack/providers/inline/post_training/torchtune/__init__.py +27 -0
- llama_stack/providers/inline/post_training/torchtune/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +240 -0
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +99 -0
- llama_stack/providers/inline/post_training/torchtune/config.py +20 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +57 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/sft.py +78 -0
- llama_stack/providers/inline/post_training/torchtune/post_training.py +178 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +588 -0
- llama_stack/providers/inline/safety/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/codeshield → inline/safety/code_scanner}/__init__.py +4 -2
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +128 -0
- llama_stack/providers/{impls/meta_reference/memory → inline/safety/code_scanner}/config.py +5 -3
- llama_stack/providers/inline/safety/llama_guard/__init__.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/config.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +489 -0
- llama_stack/providers/{adapters/memory/sample → inline/safety/prompt_guard}/__init__.py +4 -4
- llama_stack/providers/inline/safety/prompt_guard/config.py +32 -0
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +131 -0
- llama_stack/providers/inline/scoring/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/__init__.py +25 -0
- llama_stack/providers/{adapters/memory/weaviate → inline/scoring/basic}/config.py +5 -7
- llama_stack/providers/inline/scoring/basic/scoring.py +126 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +240 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +41 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +23 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +27 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +71 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +80 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +66 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +58 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +38 -0
- llama_stack/providers/inline/scoring/basic/utils/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py +3319 -0
- llama_stack/providers/inline/scoring/basic/utils/math_utils.py +330 -0
- llama_stack/providers/inline/scoring/braintrust/__init__.py +27 -0
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +230 -0
- llama_stack/providers/inline/scoring/braintrust/config.py +21 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +23 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +24 -0
- llama_stack/providers/inline/scoring/llm_as_judge/__init__.py +21 -0
- llama_stack/providers/inline/scoring/llm_as_judge/config.py +14 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +113 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +96 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +20 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +81 -0
- llama_stack/providers/inline/telemetry/__init__.py +5 -0
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +21 -0
- llama_stack/providers/inline/telemetry/meta_reference/config.py +47 -0
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +252 -0
- llama_stack/providers/inline/tool_runtime/__init__.py +5 -0
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +19 -0
- llama_stack/providers/{impls/meta_reference/telemetry → inline/tool_runtime/rag}/config.py +5 -3
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +77 -0
- llama_stack/providers/inline/tool_runtime/rag/memory.py +332 -0
- llama_stack/providers/inline/vector_io/__init__.py +5 -0
- llama_stack/providers/inline/vector_io/chroma/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/chroma/config.py +30 -0
- llama_stack/providers/inline/vector_io/faiss/__init__.py +21 -0
- llama_stack/providers/inline/vector_io/faiss/config.py +26 -0
- llama_stack/providers/inline/vector_io/faiss/faiss.py +293 -0
- llama_stack/providers/inline/vector_io/milvus/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/milvus/config.py +29 -0
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/qdrant/config.py +29 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/config.py +26 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +483 -0
- llama_stack/providers/registry/agents.py +16 -18
- llama_stack/providers/registry/batches.py +26 -0
- llama_stack/providers/registry/datasetio.py +49 -0
- llama_stack/providers/registry/eval.py +46 -0
- llama_stack/providers/registry/files.py +31 -0
- llama_stack/providers/registry/inference.py +273 -118
- llama_stack/providers/registry/post_training.py +69 -0
- llama_stack/providers/registry/safety.py +46 -41
- llama_stack/providers/registry/scoring.py +51 -0
- llama_stack/providers/registry/tool_runtime.py +87 -0
- llama_stack/providers/registry/vector_io.py +828 -0
- llama_stack/providers/remote/__init__.py +5 -0
- llama_stack/providers/remote/agents/__init__.py +5 -0
- llama_stack/providers/remote/datasetio/__init__.py +5 -0
- llama_stack/providers/{adapters/memory/chroma → remote/datasetio/huggingface}/__init__.py +7 -4
- llama_stack/providers/remote/datasetio/huggingface/config.py +23 -0
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +99 -0
- llama_stack/providers/remote/datasetio/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/datasetio/nvidia/config.py +61 -0
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +116 -0
- llama_stack/providers/remote/eval/__init__.py +5 -0
- llama_stack/providers/remote/eval/nvidia/__init__.py +31 -0
- llama_stack/providers/remote/eval/nvidia/config.py +29 -0
- llama_stack/providers/remote/eval/nvidia/eval.py +162 -0
- llama_stack/providers/remote/files/s3/__init__.py +19 -0
- llama_stack/providers/remote/files/s3/config.py +42 -0
- llama_stack/providers/remote/files/s3/files.py +313 -0
- llama_stack/providers/remote/inference/__init__.py +5 -0
- llama_stack/providers/{adapters/safety/sample → remote/inference/anthropic}/__init__.py +4 -6
- llama_stack/providers/remote/inference/anthropic/anthropic.py +36 -0
- llama_stack/providers/remote/inference/anthropic/config.py +28 -0
- llama_stack/providers/{impls/meta_reference/telemetry → remote/inference/azure}/__init__.py +4 -4
- llama_stack/providers/remote/inference/azure/azure.py +25 -0
- llama_stack/providers/remote/inference/azure/config.py +61 -0
- llama_stack/providers/{adapters → remote}/inference/bedrock/__init__.py +18 -17
- llama_stack/providers/remote/inference/bedrock/bedrock.py +142 -0
- llama_stack/providers/{adapters/inference/sample → remote/inference/bedrock}/config.py +3 -4
- llama_stack/providers/remote/inference/bedrock/models.py +29 -0
- llama_stack/providers/remote/inference/cerebras/__init__.py +19 -0
- llama_stack/providers/remote/inference/cerebras/cerebras.py +28 -0
- llama_stack/providers/remote/inference/cerebras/config.py +30 -0
- llama_stack/providers/{adapters → remote}/inference/databricks/__init__.py +4 -5
- llama_stack/providers/remote/inference/databricks/config.py +37 -0
- llama_stack/providers/remote/inference/databricks/databricks.py +44 -0
- llama_stack/providers/{adapters → remote}/inference/fireworks/__init__.py +8 -4
- llama_stack/providers/remote/inference/fireworks/config.py +27 -0
- llama_stack/providers/remote/inference/fireworks/fireworks.py +27 -0
- llama_stack/providers/{adapters/memory/pgvector → remote/inference/gemini}/__init__.py +4 -4
- llama_stack/providers/remote/inference/gemini/config.py +28 -0
- llama_stack/providers/remote/inference/gemini/gemini.py +82 -0
- llama_stack/providers/remote/inference/groq/__init__.py +15 -0
- llama_stack/providers/remote/inference/groq/config.py +34 -0
- llama_stack/providers/remote/inference/groq/groq.py +18 -0
- llama_stack/providers/remote/inference/llama_openai_compat/__init__.py +15 -0
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +34 -0
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +46 -0
- llama_stack/providers/remote/inference/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/inference/nvidia/config.py +64 -0
- llama_stack/providers/remote/inference/nvidia/nvidia.py +61 -0
- llama_stack/providers/{adapters/safety/sample/config.py → remote/inference/nvidia/utils.py} +3 -4
- llama_stack/providers/{impls/vllm → remote/inference/ollama}/__init__.py +4 -6
- llama_stack/providers/remote/inference/ollama/config.py +25 -0
- llama_stack/providers/remote/inference/ollama/ollama.py +102 -0
- llama_stack/providers/{adapters/telemetry/opentelemetry → remote/inference/openai}/__init__.py +4 -4
- llama_stack/providers/remote/inference/openai/config.py +39 -0
- llama_stack/providers/remote/inference/openai/openai.py +38 -0
- llama_stack/providers/remote/inference/passthrough/__init__.py +23 -0
- llama_stack/providers/remote/inference/passthrough/config.py +34 -0
- llama_stack/providers/remote/inference/passthrough/passthrough.py +122 -0
- llama_stack/providers/remote/inference/runpod/__init__.py +16 -0
- llama_stack/providers/remote/inference/runpod/config.py +32 -0
- llama_stack/providers/remote/inference/runpod/runpod.py +42 -0
- llama_stack/providers/remote/inference/sambanova/__init__.py +16 -0
- llama_stack/providers/remote/inference/sambanova/config.py +34 -0
- llama_stack/providers/remote/inference/sambanova/sambanova.py +28 -0
- llama_stack/providers/{adapters → remote}/inference/tgi/__init__.py +3 -4
- llama_stack/providers/remote/inference/tgi/config.py +76 -0
- llama_stack/providers/remote/inference/tgi/tgi.py +85 -0
- llama_stack/providers/{adapters → remote}/inference/together/__init__.py +8 -4
- llama_stack/providers/remote/inference/together/config.py +27 -0
- llama_stack/providers/remote/inference/together/together.py +102 -0
- llama_stack/providers/remote/inference/vertexai/__init__.py +15 -0
- llama_stack/providers/remote/inference/vertexai/config.py +48 -0
- llama_stack/providers/remote/inference/vertexai/vertexai.py +54 -0
- llama_stack/providers/remote/inference/vllm/__init__.py +22 -0
- llama_stack/providers/remote/inference/vllm/config.py +59 -0
- llama_stack/providers/remote/inference/vllm/vllm.py +111 -0
- llama_stack/providers/remote/inference/watsonx/__init__.py +15 -0
- llama_stack/providers/remote/inference/watsonx/config.py +45 -0
- llama_stack/providers/remote/inference/watsonx/watsonx.py +336 -0
- llama_stack/providers/remote/post_training/__init__.py +5 -0
- llama_stack/providers/remote/post_training/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/post_training/nvidia/config.py +113 -0
- llama_stack/providers/remote/post_training/nvidia/models.py +27 -0
- llama_stack/providers/remote/post_training/nvidia/post_training.py +430 -0
- llama_stack/providers/remote/post_training/nvidia/utils.py +63 -0
- llama_stack/providers/remote/safety/__init__.py +5 -0
- llama_stack/providers/remote/safety/bedrock/bedrock.py +111 -0
- llama_stack/providers/remote/safety/bedrock/config.py +14 -0
- llama_stack/providers/{adapters/inference/sample → remote/safety/nvidia}/__init__.py +5 -4
- llama_stack/providers/remote/safety/nvidia/config.py +40 -0
- llama_stack/providers/remote/safety/nvidia/nvidia.py +161 -0
- llama_stack/providers/{adapters/agents/sample → remote/safety/sambanova}/__init__.py +5 -4
- llama_stack/providers/remote/safety/sambanova/config.py +37 -0
- llama_stack/providers/remote/safety/sambanova/sambanova.py +98 -0
- llama_stack/providers/remote/tool_runtime/__init__.py +5 -0
- llama_stack/providers/remote/tool_runtime/bing_search/__init__.py +21 -0
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +112 -0
- llama_stack/providers/remote/tool_runtime/bing_search/config.py +22 -0
- llama_stack/providers/remote/tool_runtime/brave_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +148 -0
- llama_stack/providers/remote/tool_runtime/brave_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py +15 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +20 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +73 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +84 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py +22 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py +21 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +140 -0
- llama_stack/providers/remote/vector_io/__init__.py +5 -0
- llama_stack/providers/remote/vector_io/chroma/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/chroma/chroma.py +215 -0
- llama_stack/providers/remote/vector_io/chroma/config.py +28 -0
- llama_stack/providers/remote/vector_io/milvus/__init__.py +18 -0
- llama_stack/providers/remote/vector_io/milvus/config.py +35 -0
- llama_stack/providers/remote/vector_io/milvus/milvus.py +375 -0
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/pgvector/config.py +47 -0
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +460 -0
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/qdrant/config.py +37 -0
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +265 -0
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/weaviate/config.py +32 -0
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +393 -0
- llama_stack/providers/utils/bedrock/__init__.py +5 -0
- llama_stack/providers/utils/bedrock/client.py +74 -0
- llama_stack/providers/utils/bedrock/config.py +64 -0
- llama_stack/providers/utils/bedrock/refreshable_boto_session.py +112 -0
- llama_stack/providers/utils/common/__init__.py +5 -0
- llama_stack/providers/utils/common/data_schema_validator.py +103 -0
- llama_stack/providers/utils/datasetio/__init__.py +5 -0
- llama_stack/providers/utils/datasetio/url_utils.py +47 -0
- llama_stack/providers/utils/files/__init__.py +5 -0
- llama_stack/providers/utils/files/form_data.py +69 -0
- llama_stack/providers/utils/inference/__init__.py +8 -7
- llama_stack/providers/utils/inference/embedding_mixin.py +101 -0
- llama_stack/providers/utils/inference/inference_store.py +264 -0
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +336 -0
- llama_stack/providers/utils/inference/model_registry.py +173 -23
- llama_stack/providers/utils/inference/openai_compat.py +1261 -49
- llama_stack/providers/utils/inference/openai_mixin.py +506 -0
- llama_stack/providers/utils/inference/prompt_adapter.py +365 -67
- llama_stack/providers/utils/kvstore/api.py +6 -6
- llama_stack/providers/utils/kvstore/config.py +28 -48
- llama_stack/providers/utils/kvstore/kvstore.py +61 -15
- llama_stack/providers/utils/kvstore/mongodb/__init__.py +9 -0
- llama_stack/providers/utils/kvstore/mongodb/mongodb.py +82 -0
- llama_stack/providers/utils/kvstore/postgres/__init__.py +7 -0
- llama_stack/providers/utils/kvstore/postgres/postgres.py +114 -0
- llama_stack/providers/utils/kvstore/redis/redis.py +33 -9
- llama_stack/providers/utils/kvstore/sqlite/config.py +2 -1
- llama_stack/providers/utils/kvstore/sqlite/sqlite.py +123 -22
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +1304 -0
- llama_stack/providers/utils/memory/vector_store.py +220 -82
- llama_stack/providers/utils/pagination.py +43 -0
- llama_stack/providers/utils/responses/__init__.py +5 -0
- llama_stack/providers/utils/responses/responses_store.py +292 -0
- llama_stack/providers/utils/scheduler.py +270 -0
- llama_stack/providers/utils/scoring/__init__.py +5 -0
- llama_stack/providers/utils/scoring/aggregation_utils.py +75 -0
- llama_stack/providers/utils/scoring/base_scoring_fn.py +114 -0
- llama_stack/providers/utils/scoring/basic_scoring_utils.py +26 -0
- llama_stack/providers/utils/sqlstore/__init__.py +5 -0
- llama_stack/providers/utils/sqlstore/api.py +128 -0
- llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +319 -0
- llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +343 -0
- llama_stack/providers/utils/sqlstore/sqlstore.py +70 -0
- llama_stack/providers/utils/telemetry/trace_protocol.py +142 -0
- llama_stack/providers/utils/telemetry/tracing.py +192 -53
- llama_stack/providers/utils/tools/__init__.py +5 -0
- llama_stack/providers/utils/tools/mcp.py +148 -0
- llama_stack/providers/utils/tools/ttl_dict.py +70 -0
- llama_stack/providers/utils/vector_io/__init__.py +5 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +156 -0
- llama_stack/schema_utils.py +118 -0
- llama_stack/strong_typing/__init__.py +19 -0
- llama_stack/strong_typing/auxiliary.py +228 -0
- llama_stack/strong_typing/classdef.py +440 -0
- llama_stack/strong_typing/core.py +46 -0
- llama_stack/strong_typing/deserializer.py +877 -0
- llama_stack/strong_typing/docstring.py +409 -0
- llama_stack/strong_typing/exception.py +23 -0
- llama_stack/strong_typing/inspection.py +1085 -0
- llama_stack/strong_typing/mapping.py +40 -0
- llama_stack/strong_typing/name.py +182 -0
- llama_stack/strong_typing/py.typed +0 -0
- llama_stack/strong_typing/schema.py +792 -0
- llama_stack/strong_typing/serialization.py +97 -0
- llama_stack/strong_typing/serializer.py +500 -0
- llama_stack/strong_typing/slots.py +27 -0
- llama_stack/strong_typing/topological.py +89 -0
- llama_stack/testing/__init__.py +5 -0
- llama_stack/testing/api_recorder.py +956 -0
- llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
- llama_stack-0.3.4.dist-info/METADATA +261 -0
- llama_stack-0.3.4.dist-info/RECORD +625 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/WHEEL +1 -1
- llama_stack/apis/agents/client.py +0 -292
- llama_stack/apis/agents/event_logger.py +0 -184
- llama_stack/apis/batch_inference/batch_inference.py +0 -72
- llama_stack/apis/common/deployment_types.py +0 -31
- llama_stack/apis/dataset/dataset.py +0 -63
- llama_stack/apis/evals/evals.py +0 -122
- llama_stack/apis/inference/client.py +0 -197
- llama_stack/apis/inspect/client.py +0 -82
- llama_stack/apis/memory/client.py +0 -155
- llama_stack/apis/memory/memory.py +0 -65
- llama_stack/apis/memory_banks/__init__.py +0 -7
- llama_stack/apis/memory_banks/client.py +0 -101
- llama_stack/apis/memory_banks/memory_banks.py +0 -78
- llama_stack/apis/models/client.py +0 -83
- llama_stack/apis/reward_scoring/__init__.py +0 -7
- llama_stack/apis/reward_scoring/reward_scoring.py +0 -55
- llama_stack/apis/safety/client.py +0 -105
- llama_stack/apis/shields/client.py +0 -79
- llama_stack/cli/download.py +0 -340
- llama_stack/cli/model/describe.py +0 -82
- llama_stack/cli/model/download.py +0 -24
- llama_stack/cli/model/list.py +0 -62
- llama_stack/cli/model/model.py +0 -34
- llama_stack/cli/model/prompt_format.py +0 -112
- llama_stack/cli/model/safety_models.py +0 -52
- llama_stack/cli/stack/build.py +0 -299
- llama_stack/cli/stack/configure.py +0 -178
- llama_stack/distribution/build.py +0 -123
- llama_stack/distribution/build_conda_env.sh +0 -136
- llama_stack/distribution/build_container.sh +0 -142
- llama_stack/distribution/common.sh +0 -40
- llama_stack/distribution/configure_container.sh +0 -47
- llama_stack/distribution/datatypes.py +0 -139
- llama_stack/distribution/distribution.py +0 -58
- llama_stack/distribution/inspect.py +0 -67
- llama_stack/distribution/request_headers.py +0 -57
- llama_stack/distribution/resolver.py +0 -323
- llama_stack/distribution/routers/__init__.py +0 -48
- llama_stack/distribution/routers/routers.py +0 -158
- llama_stack/distribution/routers/routing_tables.py +0 -173
- llama_stack/distribution/server/endpoints.py +0 -48
- llama_stack/distribution/server/server.py +0 -343
- llama_stack/distribution/start_conda_env.sh +0 -42
- llama_stack/distribution/start_container.sh +0 -64
- llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml +0 -10
- llama_stack/distribution/templates/local-build.yaml +0 -10
- llama_stack/distribution/templates/local-databricks-build.yaml +0 -10
- llama_stack/distribution/templates/local-fireworks-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-endpoint-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-serverless-build.yaml +0 -10
- llama_stack/distribution/templates/local-ollama-build.yaml +0 -10
- llama_stack/distribution/templates/local-tgi-build.yaml +0 -10
- llama_stack/distribution/templates/local-together-build.yaml +0 -10
- llama_stack/distribution/templates/local-vllm-build.yaml +0 -10
- llama_stack/distribution/utils/exec.py +0 -105
- llama_stack/providers/adapters/agents/sample/sample.py +0 -18
- llama_stack/providers/adapters/inference/bedrock/bedrock.py +0 -451
- llama_stack/providers/adapters/inference/bedrock/config.py +0 -55
- llama_stack/providers/adapters/inference/databricks/config.py +0 -21
- llama_stack/providers/adapters/inference/databricks/databricks.py +0 -125
- llama_stack/providers/adapters/inference/fireworks/config.py +0 -20
- llama_stack/providers/adapters/inference/fireworks/fireworks.py +0 -130
- llama_stack/providers/adapters/inference/ollama/__init__.py +0 -19
- llama_stack/providers/adapters/inference/ollama/ollama.py +0 -175
- llama_stack/providers/adapters/inference/sample/sample.py +0 -23
- llama_stack/providers/adapters/inference/tgi/config.py +0 -43
- llama_stack/providers/adapters/inference/tgi/tgi.py +0 -200
- llama_stack/providers/adapters/inference/together/config.py +0 -22
- llama_stack/providers/adapters/inference/together/together.py +0 -143
- llama_stack/providers/adapters/memory/chroma/chroma.py +0 -157
- llama_stack/providers/adapters/memory/pgvector/config.py +0 -17
- llama_stack/providers/adapters/memory/pgvector/pgvector.py +0 -211
- llama_stack/providers/adapters/memory/sample/sample.py +0 -23
- llama_stack/providers/adapters/memory/weaviate/__init__.py +0 -15
- llama_stack/providers/adapters/memory/weaviate/weaviate.py +0 -190
- llama_stack/providers/adapters/safety/bedrock/bedrock.py +0 -113
- llama_stack/providers/adapters/safety/bedrock/config.py +0 -16
- llama_stack/providers/adapters/safety/sample/sample.py +0 -23
- llama_stack/providers/adapters/safety/together/__init__.py +0 -18
- llama_stack/providers/adapters/safety/together/config.py +0 -26
- llama_stack/providers/adapters/safety/together/together.py +0 -101
- llama_stack/providers/adapters/telemetry/opentelemetry/config.py +0 -12
- llama_stack/providers/adapters/telemetry/opentelemetry/opentelemetry.py +0 -201
- llama_stack/providers/adapters/telemetry/sample/__init__.py +0 -17
- llama_stack/providers/adapters/telemetry/sample/config.py +0 -12
- llama_stack/providers/adapters/telemetry/sample/sample.py +0 -18
- llama_stack/providers/impls/meta_reference/agents/agent_instance.py +0 -844
- llama_stack/providers/impls/meta_reference/agents/agents.py +0 -161
- llama_stack/providers/impls/meta_reference/agents/persistence.py +0 -84
- llama_stack/providers/impls/meta_reference/agents/rag/context_retriever.py +0 -74
- llama_stack/providers/impls/meta_reference/agents/safety.py +0 -57
- llama_stack/providers/impls/meta_reference/agents/tests/code_execution.py +0 -93
- llama_stack/providers/impls/meta_reference/agents/tests/test_chat_agent.py +0 -305
- llama_stack/providers/impls/meta_reference/agents/tools/base.py +0 -20
- llama_stack/providers/impls/meta_reference/agents/tools/builtin.py +0 -375
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_env_prefix.py +0 -133
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_execution.py +0 -256
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py +0 -87
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/utils.py +0 -21
- llama_stack/providers/impls/meta_reference/agents/tools/safety.py +0 -43
- llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py +0 -58
- llama_stack/providers/impls/meta_reference/inference/config.py +0 -45
- llama_stack/providers/impls/meta_reference/inference/generation.py +0 -376
- llama_stack/providers/impls/meta_reference/inference/inference.py +0 -280
- llama_stack/providers/impls/meta_reference/inference/model_parallel.py +0 -99
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_impls.py +0 -184
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_txest_disabled.py +0 -76
- llama_stack/providers/impls/meta_reference/inference/quantization/loader.py +0 -97
- llama_stack/providers/impls/meta_reference/inference/quantization/scripts/quantize_checkpoint.py +0 -161
- llama_stack/providers/impls/meta_reference/memory/__init__.py +0 -19
- llama_stack/providers/impls/meta_reference/memory/faiss.py +0 -113
- llama_stack/providers/impls/meta_reference/safety/__init__.py +0 -17
- llama_stack/providers/impls/meta_reference/safety/base.py +0 -57
- llama_stack/providers/impls/meta_reference/safety/config.py +0 -48
- llama_stack/providers/impls/meta_reference/safety/llama_guard.py +0 -268
- llama_stack/providers/impls/meta_reference/safety/prompt_guard.py +0 -145
- llama_stack/providers/impls/meta_reference/safety/safety.py +0 -112
- llama_stack/providers/impls/meta_reference/telemetry/console.py +0 -89
- llama_stack/providers/impls/vllm/config.py +0 -35
- llama_stack/providers/impls/vllm/vllm.py +0 -241
- llama_stack/providers/registry/memory.py +0 -78
- llama_stack/providers/registry/telemetry.py +0 -44
- llama_stack/providers/tests/agents/test_agents.py +0 -210
- llama_stack/providers/tests/inference/test_inference.py +0 -257
- llama_stack/providers/tests/inference/test_prompt_adapter.py +0 -126
- llama_stack/providers/tests/memory/test_memory.py +0 -136
- llama_stack/providers/tests/resolver.py +0 -100
- llama_stack/providers/tests/safety/test_safety.py +0 -77
- llama_stack-0.0.42.dist-info/METADATA +0 -137
- llama_stack-0.0.42.dist-info/RECORD +0 -256
- /llama_stack/{distribution → core}/__init__.py +0 -0
- /llama_stack/{distribution/server → core/access_control}/__init__.py +0 -0
- /llama_stack/{distribution/utils → core/conversations}/__init__.py +0 -0
- /llama_stack/{providers/adapters → core/prompts}/__init__.py +0 -0
- /llama_stack/{providers/adapters/agents → core/routing_tables}/__init__.py +0 -0
- /llama_stack/{providers/adapters/inference → core/server}/__init__.py +0 -0
- /llama_stack/{providers/adapters/memory → core/storage}/__init__.py +0 -0
- /llama_stack/{providers/adapters/safety → core/ui}/__init__.py +0 -0
- /llama_stack/{providers/adapters/telemetry → core/ui/modules}/__init__.py +0 -0
- /llama_stack/{providers/impls → core/ui/page}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference → core/ui/page/distribution}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/rag → core/ui/page/evaluations}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tests → core/ui/page/playground}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools → core/utils}/__init__.py +0 -0
- /llama_stack/{distribution → core}/utils/dynamic.py +0 -0
- /llama_stack/{distribution → core}/utils/serialize.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools/ipython_tool → distributions}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization → models}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization/scripts → models/llama}/__init__.py +0 -0
- /llama_stack/{providers/tests → models/llama/llama3}/__init__.py +0 -0
- /llama_stack/{providers/tests/agents → models/llama/llama3/quantization}/__init__.py +0 -0
- /llama_stack/{providers/tests/inference → models/llama/llama3_2}/__init__.py +0 -0
- /llama_stack/{providers/tests/memory → models/llama/llama3_3}/__init__.py +0 -0
- /llama_stack/{providers/tests/safety → models/llama/llama4}/__init__.py +0 -0
- /llama_stack/{scripts → models/llama/llama4/prompt_templates}/__init__.py +0 -0
- /llama_stack/providers/{adapters → remote}/safety/bedrock/__init__.py +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info/licenses}/LICENSE +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,430 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
import warnings
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from typing import Any, Literal
|
|
9
|
+
|
|
10
|
+
import aiohttp
|
|
11
|
+
from pydantic import BaseModel, ConfigDict
|
|
12
|
+
|
|
13
|
+
from llama_stack.apis.post_training import (
|
|
14
|
+
AlgorithmConfig,
|
|
15
|
+
DPOAlignmentConfig,
|
|
16
|
+
JobStatus,
|
|
17
|
+
PostTrainingJob,
|
|
18
|
+
PostTrainingJobArtifactsResponse,
|
|
19
|
+
PostTrainingJobStatusResponse,
|
|
20
|
+
TrainingConfig,
|
|
21
|
+
)
|
|
22
|
+
from llama_stack.providers.remote.post_training.nvidia.config import NvidiaPostTrainingConfig
|
|
23
|
+
from llama_stack.providers.remote.post_training.nvidia.utils import warn_unsupported_params
|
|
24
|
+
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
|
|
25
|
+
|
|
26
|
+
from .models import _MODEL_ENTRIES
|
|
27
|
+
|
|
28
|
+
# Map API status to JobStatus enum
|
|
29
|
+
STATUS_MAPPING = {
|
|
30
|
+
"running": JobStatus.in_progress.value,
|
|
31
|
+
"completed": JobStatus.completed.value,
|
|
32
|
+
"failed": JobStatus.failed.value,
|
|
33
|
+
"cancelled": JobStatus.cancelled.value,
|
|
34
|
+
"pending": JobStatus.scheduled.value,
|
|
35
|
+
"unknown": JobStatus.scheduled.value,
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class NvidiaPostTrainingJob(PostTrainingJob):
|
|
40
|
+
"""Parse the response from the Customizer API.
|
|
41
|
+
Inherits job_uuid from PostTrainingJob.
|
|
42
|
+
Adds status, created_at, updated_at parameters.
|
|
43
|
+
Passes through all other parameters from data field in the response.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
model_config = ConfigDict(extra="allow")
|
|
47
|
+
status: JobStatus
|
|
48
|
+
created_at: datetime
|
|
49
|
+
updated_at: datetime
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class ListNvidiaPostTrainingJobs(BaseModel):
|
|
53
|
+
data: list[NvidiaPostTrainingJob]
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class NvidiaPostTrainingJobStatusResponse(PostTrainingJobStatusResponse):
|
|
57
|
+
model_config = ConfigDict(extra="allow")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class NvidiaPostTrainingAdapter(ModelRegistryHelper):
|
|
61
|
+
def __init__(self, config: NvidiaPostTrainingConfig):
|
|
62
|
+
self.config = config
|
|
63
|
+
self.headers = {}
|
|
64
|
+
if config.api_key:
|
|
65
|
+
self.headers["Authorization"] = f"Bearer {config.api_key}"
|
|
66
|
+
|
|
67
|
+
self.timeout = aiohttp.ClientTimeout(total=config.timeout)
|
|
68
|
+
# TODO: filter by available models based on /config endpoint
|
|
69
|
+
ModelRegistryHelper.__init__(self, model_entries=_MODEL_ENTRIES)
|
|
70
|
+
self.session = None
|
|
71
|
+
|
|
72
|
+
self.customizer_url = config.customizer_url
|
|
73
|
+
if not self.customizer_url:
|
|
74
|
+
warnings.warn("Customizer URL is not set, using default value: http://nemo.test", stacklevel=2)
|
|
75
|
+
self.customizer_url = "http://nemo.test"
|
|
76
|
+
|
|
77
|
+
async def _get_session(self) -> aiohttp.ClientSession:
|
|
78
|
+
if self.session is None or self.session.closed:
|
|
79
|
+
self.session = aiohttp.ClientSession(headers=self.headers, timeout=self.timeout)
|
|
80
|
+
return self.session
|
|
81
|
+
|
|
82
|
+
async def _make_request(
|
|
83
|
+
self,
|
|
84
|
+
method: str,
|
|
85
|
+
path: str,
|
|
86
|
+
headers: dict[str, Any] | None = None,
|
|
87
|
+
params: dict[str, Any] | None = None,
|
|
88
|
+
json: dict[str, Any] | None = None,
|
|
89
|
+
**kwargs,
|
|
90
|
+
) -> dict[str, Any]:
|
|
91
|
+
"""Helper method to make HTTP requests to the Customizer API."""
|
|
92
|
+
url = f"{self.customizer_url}{path}"
|
|
93
|
+
request_headers = self.headers.copy()
|
|
94
|
+
|
|
95
|
+
if headers:
|
|
96
|
+
request_headers.update(headers)
|
|
97
|
+
|
|
98
|
+
# Add content-type header for JSON requests
|
|
99
|
+
if json and "Content-Type" not in request_headers:
|
|
100
|
+
request_headers["Content-Type"] = "application/json"
|
|
101
|
+
|
|
102
|
+
session = await self._get_session()
|
|
103
|
+
for _ in range(self.config.max_retries):
|
|
104
|
+
async with session.request(method, url, params=params, json=json, **kwargs) as response:
|
|
105
|
+
if response.status >= 400:
|
|
106
|
+
error_data = await response.json()
|
|
107
|
+
raise Exception(f"API request failed: {error_data}")
|
|
108
|
+
return await response.json()
|
|
109
|
+
|
|
110
|
+
async def get_training_jobs(
|
|
111
|
+
self,
|
|
112
|
+
page: int | None = 1,
|
|
113
|
+
page_size: int | None = 10,
|
|
114
|
+
sort: Literal["created_at", "-created_at"] | None = "created_at",
|
|
115
|
+
) -> ListNvidiaPostTrainingJobs:
|
|
116
|
+
"""Get all customization jobs.
|
|
117
|
+
Updated the base class return type from ListPostTrainingJobsResponse to ListNvidiaPostTrainingJobs.
|
|
118
|
+
|
|
119
|
+
Returns a ListNvidiaPostTrainingJobs object with the following fields:
|
|
120
|
+
- data: List[NvidiaPostTrainingJob] - List of NvidiaPostTrainingJob objects
|
|
121
|
+
|
|
122
|
+
ToDo: Support for schema input for filtering.
|
|
123
|
+
"""
|
|
124
|
+
params = {"page": page, "page_size": page_size, "sort": sort}
|
|
125
|
+
|
|
126
|
+
response = await self._make_request("GET", "/v1/customization/jobs", params=params)
|
|
127
|
+
|
|
128
|
+
jobs = []
|
|
129
|
+
for job in response.get("data", []):
|
|
130
|
+
job_id = job.pop("id")
|
|
131
|
+
job_status = job.pop("status", "scheduled").lower()
|
|
132
|
+
mapped_status = STATUS_MAPPING.get(job_status, "scheduled")
|
|
133
|
+
|
|
134
|
+
# Convert string timestamps to datetime objects
|
|
135
|
+
created_at = (
|
|
136
|
+
datetime.fromisoformat(job.pop("created_at"))
|
|
137
|
+
if "created_at" in job
|
|
138
|
+
else datetime.now(tz=datetime.timezone.utc)
|
|
139
|
+
)
|
|
140
|
+
updated_at = (
|
|
141
|
+
datetime.fromisoformat(job.pop("updated_at"))
|
|
142
|
+
if "updated_at" in job
|
|
143
|
+
else datetime.now(tz=datetime.timezone.utc)
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
# Create NvidiaPostTrainingJob instance
|
|
147
|
+
jobs.append(
|
|
148
|
+
NvidiaPostTrainingJob(
|
|
149
|
+
job_uuid=job_id,
|
|
150
|
+
status=JobStatus(mapped_status),
|
|
151
|
+
created_at=created_at,
|
|
152
|
+
updated_at=updated_at,
|
|
153
|
+
**job,
|
|
154
|
+
)
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
return ListNvidiaPostTrainingJobs(data=jobs)
|
|
158
|
+
|
|
159
|
+
async def get_training_job_status(self, job_uuid: str) -> NvidiaPostTrainingJobStatusResponse:
|
|
160
|
+
"""Get the status of a customization job.
|
|
161
|
+
Updated the base class return type from PostTrainingJobResponse to NvidiaPostTrainingJob.
|
|
162
|
+
|
|
163
|
+
Returns a NvidiaPostTrainingJob object with the following fields:
|
|
164
|
+
- job_uuid: str - Unique identifier for the job
|
|
165
|
+
- status: JobStatus - Current status of the job (in_progress, completed, failed, cancelled, scheduled)
|
|
166
|
+
- created_at: datetime - The time when the job was created
|
|
167
|
+
- updated_at: datetime - The last time the job status was updated
|
|
168
|
+
|
|
169
|
+
Additional fields that may be included:
|
|
170
|
+
- steps_completed: Optional[int] - Number of training steps completed
|
|
171
|
+
- epochs_completed: Optional[int] - Number of epochs completed
|
|
172
|
+
- percentage_done: Optional[float] - Percentage of training completed (0-100)
|
|
173
|
+
- best_epoch: Optional[int] - The epoch with the best performance
|
|
174
|
+
- train_loss: Optional[float] - Training loss of the best checkpoint
|
|
175
|
+
- val_loss: Optional[float] - Validation loss of the best checkpoint
|
|
176
|
+
- metrics: Optional[Dict] - Additional training metrics
|
|
177
|
+
- status_logs: Optional[List] - Detailed logs of status changes
|
|
178
|
+
"""
|
|
179
|
+
response = await self._make_request(
|
|
180
|
+
"GET",
|
|
181
|
+
f"/v1/customization/jobs/{job_uuid}/status",
|
|
182
|
+
params={"job_id": job_uuid},
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
api_status = response.pop("status").lower()
|
|
186
|
+
mapped_status = STATUS_MAPPING.get(api_status, "scheduled")
|
|
187
|
+
|
|
188
|
+
return NvidiaPostTrainingJobStatusResponse(
|
|
189
|
+
status=JobStatus(mapped_status),
|
|
190
|
+
job_uuid=job_uuid,
|
|
191
|
+
started_at=datetime.fromisoformat(response.pop("created_at")),
|
|
192
|
+
updated_at=datetime.fromisoformat(response.pop("updated_at")),
|
|
193
|
+
**response,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
async def cancel_training_job(self, job_uuid: str) -> None:
|
|
197
|
+
await self._make_request(
|
|
198
|
+
method="POST", path=f"/v1/customization/jobs/{job_uuid}/cancel", params={"job_id": job_uuid}
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse:
|
|
202
|
+
raise NotImplementedError("Job artifacts are not implemented yet")
|
|
203
|
+
|
|
204
|
+
async def get_post_training_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse:
|
|
205
|
+
raise NotImplementedError("Job artifacts are not implemented yet")
|
|
206
|
+
|
|
207
|
+
async def supervised_fine_tune(
|
|
208
|
+
self,
|
|
209
|
+
job_uuid: str,
|
|
210
|
+
training_config: dict[str, Any],
|
|
211
|
+
hyperparam_search_config: dict[str, Any],
|
|
212
|
+
logger_config: dict[str, Any],
|
|
213
|
+
model: str,
|
|
214
|
+
checkpoint_dir: str | None,
|
|
215
|
+
algorithm_config: AlgorithmConfig | None = None,
|
|
216
|
+
) -> NvidiaPostTrainingJob:
|
|
217
|
+
"""
|
|
218
|
+
Fine-tunes a model on a dataset.
|
|
219
|
+
Currently only supports Lora finetuning for standlone docker container.
|
|
220
|
+
Assumptions:
|
|
221
|
+
- nemo microservice is running and endpoint is set in config.customizer_url
|
|
222
|
+
- dataset is registered separately in nemo datastore
|
|
223
|
+
- model checkpoint is downloaded as per nemo customizer requirements
|
|
224
|
+
|
|
225
|
+
Parameters:
|
|
226
|
+
training_config: TrainingConfig - Configuration for training
|
|
227
|
+
model: str - NeMo Customizer configuration name
|
|
228
|
+
algorithm_config: Optional[AlgorithmConfig] - Algorithm-specific configuration
|
|
229
|
+
checkpoint_dir: Optional[str] - Directory containing model checkpoints, ignored atm
|
|
230
|
+
job_uuid: str - Unique identifier for the job, ignored atm
|
|
231
|
+
hyperparam_search_config: Dict[str, Any] - Configuration for hyperparameter search, ignored atm
|
|
232
|
+
logger_config: Dict[str, Any] - Configuration for logging, ignored atm
|
|
233
|
+
|
|
234
|
+
Environment Variables:
|
|
235
|
+
- NVIDIA_API_KEY: str - API key for the NVIDIA API
|
|
236
|
+
Default: None
|
|
237
|
+
- NVIDIA_DATASET_NAMESPACE: str - Namespace of the dataset
|
|
238
|
+
Default: "default"
|
|
239
|
+
- NVIDIA_CUSTOMIZER_URL: str - URL of the NeMo Customizer API
|
|
240
|
+
Default: "http://nemo.test"
|
|
241
|
+
- NVIDIA_PROJECT_ID: str - ID of the project
|
|
242
|
+
Default: "test-project"
|
|
243
|
+
- NVIDIA_OUTPUT_MODEL_DIR: str - Directory to save the output model
|
|
244
|
+
Default: "test-example-model@v1"
|
|
245
|
+
|
|
246
|
+
Supported models:
|
|
247
|
+
- meta/llama-3.1-8b-instruct
|
|
248
|
+
- meta/llama-3.2-1b-instruct
|
|
249
|
+
|
|
250
|
+
Supported algorithm configs:
|
|
251
|
+
- LoRA, SFT
|
|
252
|
+
|
|
253
|
+
Supported Parameters:
|
|
254
|
+
- TrainingConfig:
|
|
255
|
+
- n_epochs: int - Number of epochs to train
|
|
256
|
+
Default: 50
|
|
257
|
+
- data_config: DataConfig - Configuration for the dataset
|
|
258
|
+
- optimizer_config: OptimizerConfig - Configuration for the optimizer
|
|
259
|
+
- dtype: str - Data type for training
|
|
260
|
+
not supported (users are informed via warnings)
|
|
261
|
+
- efficiency_config: EfficiencyConfig - Configuration for efficiency
|
|
262
|
+
not supported
|
|
263
|
+
- max_steps_per_epoch: int - Maximum number of steps per epoch
|
|
264
|
+
Default: 1000
|
|
265
|
+
## NeMo customizer specific parameters
|
|
266
|
+
- log_every_n_steps: int - Log every n steps
|
|
267
|
+
Default: None
|
|
268
|
+
- val_check_interval: float - Validation check interval
|
|
269
|
+
Default: 0.25
|
|
270
|
+
- sequence_packing_enabled: bool - Sequence packing enabled
|
|
271
|
+
Default: False
|
|
272
|
+
## NeMo customizer specific SFT parameters
|
|
273
|
+
- hidden_dropout: float - Hidden dropout
|
|
274
|
+
Default: None (0.0-1.0)
|
|
275
|
+
- attention_dropout: float - Attention dropout
|
|
276
|
+
Default: None (0.0-1.0)
|
|
277
|
+
- ffn_dropout: float - FFN dropout
|
|
278
|
+
Default: None (0.0-1.0)
|
|
279
|
+
|
|
280
|
+
- DataConfig:
|
|
281
|
+
- dataset_id: str - Dataset ID
|
|
282
|
+
- batch_size: int - Batch size
|
|
283
|
+
Default: 8
|
|
284
|
+
|
|
285
|
+
- OptimizerConfig:
|
|
286
|
+
- lr: float - Learning rate
|
|
287
|
+
Default: 0.0001
|
|
288
|
+
## NeMo customizer specific parameter
|
|
289
|
+
- weight_decay: float - Weight decay
|
|
290
|
+
Default: 0.01
|
|
291
|
+
|
|
292
|
+
- LoRA config:
|
|
293
|
+
## NeMo customizer specific LoRA parameters
|
|
294
|
+
- alpha: int - Scaling factor for the LoRA update
|
|
295
|
+
Default: 16
|
|
296
|
+
Note:
|
|
297
|
+
- checkpoint_dir, hyperparam_search_config, logger_config are not supported (users are informed via warnings)
|
|
298
|
+
- Some parameters from TrainingConfig, DataConfig, OptimizerConfig are not supported (users are informed via warnings)
|
|
299
|
+
|
|
300
|
+
User is informed about unsupported parameters via warnings.
|
|
301
|
+
"""
|
|
302
|
+
|
|
303
|
+
# Check for unsupported method parameters
|
|
304
|
+
unsupported_method_params = []
|
|
305
|
+
if checkpoint_dir:
|
|
306
|
+
unsupported_method_params.append(f"checkpoint_dir={checkpoint_dir}")
|
|
307
|
+
if hyperparam_search_config:
|
|
308
|
+
unsupported_method_params.append("hyperparam_search_config")
|
|
309
|
+
if logger_config:
|
|
310
|
+
unsupported_method_params.append("logger_config")
|
|
311
|
+
|
|
312
|
+
if unsupported_method_params:
|
|
313
|
+
warnings.warn(
|
|
314
|
+
f"Parameters: {', '.join(unsupported_method_params)} are not supported and will be ignored",
|
|
315
|
+
stacklevel=2,
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
# Define all supported parameters
|
|
319
|
+
supported_params = {
|
|
320
|
+
"training_config": {
|
|
321
|
+
"n_epochs",
|
|
322
|
+
"data_config",
|
|
323
|
+
"optimizer_config",
|
|
324
|
+
"log_every_n_steps",
|
|
325
|
+
"val_check_interval",
|
|
326
|
+
"sequence_packing_enabled",
|
|
327
|
+
"hidden_dropout",
|
|
328
|
+
"attention_dropout",
|
|
329
|
+
"ffn_dropout",
|
|
330
|
+
},
|
|
331
|
+
"data_config": {"dataset_id", "batch_size"},
|
|
332
|
+
"optimizer_config": {"lr", "weight_decay"},
|
|
333
|
+
"lora_config": {"type", "alpha"},
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
# Validate all parameters at once
|
|
337
|
+
warn_unsupported_params(training_config, supported_params["training_config"], "TrainingConfig")
|
|
338
|
+
warn_unsupported_params(training_config["data_config"], supported_params["data_config"], "DataConfig")
|
|
339
|
+
warn_unsupported_params(
|
|
340
|
+
training_config["optimizer_config"], supported_params["optimizer_config"], "OptimizerConfig"
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
output_model = self.config.output_model_dir
|
|
344
|
+
|
|
345
|
+
# Prepare base job configuration
|
|
346
|
+
job_config = {
|
|
347
|
+
"config": model,
|
|
348
|
+
"dataset": {
|
|
349
|
+
"name": training_config["data_config"]["dataset_id"],
|
|
350
|
+
"namespace": self.config.dataset_namespace,
|
|
351
|
+
},
|
|
352
|
+
"hyperparameters": {
|
|
353
|
+
"training_type": "sft",
|
|
354
|
+
"finetuning_type": "lora",
|
|
355
|
+
**{
|
|
356
|
+
k: v
|
|
357
|
+
for k, v in {
|
|
358
|
+
"epochs": training_config.get("n_epochs"),
|
|
359
|
+
"batch_size": training_config["data_config"].get("batch_size"),
|
|
360
|
+
"learning_rate": training_config["optimizer_config"].get("lr"),
|
|
361
|
+
"weight_decay": training_config["optimizer_config"].get("weight_decay"),
|
|
362
|
+
"log_every_n_steps": training_config.get("log_every_n_steps"),
|
|
363
|
+
"val_check_interval": training_config.get("val_check_interval"),
|
|
364
|
+
"sequence_packing_enabled": training_config.get("sequence_packing_enabled"),
|
|
365
|
+
}.items()
|
|
366
|
+
if v is not None
|
|
367
|
+
},
|
|
368
|
+
},
|
|
369
|
+
"project": self.config.project_id,
|
|
370
|
+
# TODO: ignored ownership, add it later
|
|
371
|
+
# "ownership": {"created_by": self.config.user_id, "access_policies": self.config.access_policies},
|
|
372
|
+
"output_model": output_model,
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
# Handle SFT-specific optional parameters
|
|
376
|
+
job_config["hyperparameters"]["sft"] = {
|
|
377
|
+
k: v
|
|
378
|
+
for k, v in {
|
|
379
|
+
"ffn_dropout": training_config.get("ffn_dropout"),
|
|
380
|
+
"hidden_dropout": training_config.get("hidden_dropout"),
|
|
381
|
+
"attention_dropout": training_config.get("attention_dropout"),
|
|
382
|
+
}.items()
|
|
383
|
+
if v is not None
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
# Remove the sft dictionary if it's empty
|
|
387
|
+
if not job_config["hyperparameters"]["sft"]:
|
|
388
|
+
job_config["hyperparameters"].pop("sft")
|
|
389
|
+
|
|
390
|
+
# Handle LoRA-specific configuration
|
|
391
|
+
if algorithm_config:
|
|
392
|
+
if algorithm_config.type == "LoRA":
|
|
393
|
+
warn_unsupported_params(algorithm_config, supported_params["lora_config"], "LoRA config")
|
|
394
|
+
job_config["hyperparameters"]["lora"] = {
|
|
395
|
+
k: v for k, v in {"alpha": algorithm_config.alpha}.items() if v is not None
|
|
396
|
+
}
|
|
397
|
+
else:
|
|
398
|
+
raise NotImplementedError(f"Unsupported algorithm config: {algorithm_config}")
|
|
399
|
+
|
|
400
|
+
# Create the customization job
|
|
401
|
+
response = await self._make_request(
|
|
402
|
+
method="POST",
|
|
403
|
+
path="/v1/customization/jobs",
|
|
404
|
+
headers={"Accept": "application/json"},
|
|
405
|
+
json=job_config,
|
|
406
|
+
)
|
|
407
|
+
|
|
408
|
+
job_uuid = response["id"]
|
|
409
|
+
response.pop("status")
|
|
410
|
+
created_at = datetime.fromisoformat(response.pop("created_at"))
|
|
411
|
+
updated_at = datetime.fromisoformat(response.pop("updated_at"))
|
|
412
|
+
|
|
413
|
+
return NvidiaPostTrainingJob(
|
|
414
|
+
job_uuid=job_uuid, status=JobStatus.in_progress, created_at=created_at, updated_at=updated_at, **response
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
async def preference_optimize(
|
|
418
|
+
self,
|
|
419
|
+
job_uuid: str,
|
|
420
|
+
finetuned_model: str,
|
|
421
|
+
algorithm_config: DPOAlignmentConfig,
|
|
422
|
+
training_config: TrainingConfig,
|
|
423
|
+
hyperparam_search_config: dict[str, Any],
|
|
424
|
+
logger_config: dict[str, Any],
|
|
425
|
+
) -> PostTrainingJob:
|
|
426
|
+
"""Optimize a model based on preference data."""
|
|
427
|
+
raise NotImplementedError("Preference optimization is not implemented yet")
|
|
428
|
+
|
|
429
|
+
async def get_training_job_container_logs(self, job_uuid: str) -> PostTrainingJobStatusResponse:
|
|
430
|
+
raise NotImplementedError("Job logs are not implemented yet")
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
import warnings
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from pydantic import BaseModel
|
|
11
|
+
|
|
12
|
+
from llama_stack.apis.post_training import TrainingConfig
|
|
13
|
+
from llama_stack.log import get_logger
|
|
14
|
+
from llama_stack.providers.remote.post_training.nvidia.config import SFTLoRADefaultConfig
|
|
15
|
+
|
|
16
|
+
from .config import NvidiaPostTrainingConfig
|
|
17
|
+
|
|
18
|
+
logger = get_logger(name=__name__, category="post_training::nvidia")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def warn_unsupported_params(config_dict: Any, supported_keys: set[str], config_name: str) -> None:
|
|
22
|
+
keys = set(config_dict.__annotations__.keys()) if isinstance(config_dict, BaseModel) else config_dict.keys()
|
|
23
|
+
unsupported_params = [k for k in keys if k not in supported_keys]
|
|
24
|
+
if unsupported_params:
|
|
25
|
+
warnings.warn(
|
|
26
|
+
f"Parameters: {unsupported_params} in `{config_name}` not supported and will be ignored.", stacklevel=2
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def validate_training_params(
|
|
31
|
+
training_config: dict[str, Any], supported_keys: set[str], config_name: str = "TrainingConfig"
|
|
32
|
+
) -> None:
|
|
33
|
+
"""
|
|
34
|
+
Validates training parameters against supported keys.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
training_config: Dictionary containing training configuration parameters
|
|
38
|
+
supported_keys: Set of supported parameter keys
|
|
39
|
+
config_name: Name of the configuration for warning messages
|
|
40
|
+
"""
|
|
41
|
+
sft_lora_fields = set(SFTLoRADefaultConfig.__annotations__.keys())
|
|
42
|
+
training_config_fields = set(TrainingConfig.__annotations__.keys())
|
|
43
|
+
|
|
44
|
+
# Check for not supported parameters:
|
|
45
|
+
# - not in either of configs
|
|
46
|
+
# - in TrainingConfig but not in SFTLoRADefaultConfig
|
|
47
|
+
unsupported_params = []
|
|
48
|
+
for key in training_config:
|
|
49
|
+
if isinstance(key, str) and key not in (supported_keys.union(sft_lora_fields)):
|
|
50
|
+
if key in (not sft_lora_fields or training_config_fields):
|
|
51
|
+
unsupported_params.append(key)
|
|
52
|
+
|
|
53
|
+
if unsupported_params:
|
|
54
|
+
warnings.warn(
|
|
55
|
+
f"Parameters: {unsupported_params} in `{config_name}` are not supported and will be ignored.", stacklevel=2
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
# ToDo: implement post health checks for customizer are enabled
|
|
60
|
+
async def _get_health(url: str) -> tuple[bool, bool]: ...
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
async def check_health(config: NvidiaPostTrainingConfig) -> None: ...
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from llama_stack.apis.inference import OpenAIMessageParam
|
|
11
|
+
from llama_stack.apis.safety import (
|
|
12
|
+
RunShieldResponse,
|
|
13
|
+
Safety,
|
|
14
|
+
SafetyViolation,
|
|
15
|
+
ViolationLevel,
|
|
16
|
+
)
|
|
17
|
+
from llama_stack.apis.shields import Shield
|
|
18
|
+
from llama_stack.log import get_logger
|
|
19
|
+
from llama_stack.providers.datatypes import ShieldsProtocolPrivate
|
|
20
|
+
from llama_stack.providers.utils.bedrock.client import create_bedrock_client
|
|
21
|
+
|
|
22
|
+
from .config import BedrockSafetyConfig
|
|
23
|
+
|
|
24
|
+
logger = get_logger(name=__name__, category="safety::bedrock")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class BedrockSafetyAdapter(Safety, ShieldsProtocolPrivate):
|
|
28
|
+
def __init__(self, config: BedrockSafetyConfig) -> None:
|
|
29
|
+
self.config = config
|
|
30
|
+
self.registered_shields = []
|
|
31
|
+
|
|
32
|
+
async def initialize(self) -> None:
|
|
33
|
+
try:
|
|
34
|
+
self.bedrock_runtime_client = create_bedrock_client(self.config)
|
|
35
|
+
self.bedrock_client = create_bedrock_client(self.config, "bedrock")
|
|
36
|
+
except Exception as e:
|
|
37
|
+
raise RuntimeError("Error initializing BedrockSafetyAdapter") from e
|
|
38
|
+
|
|
39
|
+
async def shutdown(self) -> None:
|
|
40
|
+
pass
|
|
41
|
+
|
|
42
|
+
async def register_shield(self, shield: Shield) -> None:
|
|
43
|
+
response = self.bedrock_client.list_guardrails(
|
|
44
|
+
guardrailIdentifier=shield.provider_resource_id,
|
|
45
|
+
)
|
|
46
|
+
if (
|
|
47
|
+
not response["guardrails"]
|
|
48
|
+
or len(response["guardrails"]) == 0
|
|
49
|
+
or response["guardrails"][0]["version"] != shield.params["guardrailVersion"]
|
|
50
|
+
):
|
|
51
|
+
raise ValueError(
|
|
52
|
+
f"Shield {shield.provider_resource_id} with version {shield.params['guardrailVersion']} not found in Bedrock"
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
async def unregister_shield(self, identifier: str) -> None:
|
|
56
|
+
pass
|
|
57
|
+
|
|
58
|
+
async def run_shield(
|
|
59
|
+
self, shield_id: str, messages: list[OpenAIMessageParam], params: dict[str, Any] = None
|
|
60
|
+
) -> RunShieldResponse:
|
|
61
|
+
shield = await self.shield_store.get_shield(shield_id)
|
|
62
|
+
if not shield:
|
|
63
|
+
raise ValueError(f"Shield {shield_id} not found")
|
|
64
|
+
|
|
65
|
+
"""
|
|
66
|
+
This is the implementation for the bedrock guardrails. The input to the guardrails is to be of this format
|
|
67
|
+
```content = [
|
|
68
|
+
{
|
|
69
|
+
"text": {
|
|
70
|
+
"text": "Is the AB503 Product a better investment than the S&P 500?"
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
]```
|
|
74
|
+
Incoming messages contain content, role . For now we will extract the content and
|
|
75
|
+
default the "qualifiers": ["query"]
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
shield_params = shield.params
|
|
79
|
+
logger.debug(f"run_shield::{shield_params}::messages={messages}")
|
|
80
|
+
|
|
81
|
+
# - convert the messages into format Bedrock expects
|
|
82
|
+
content_messages = []
|
|
83
|
+
for message in messages:
|
|
84
|
+
content_messages.append({"text": {"text": message.content}})
|
|
85
|
+
logger.debug(f"run_shield::final:messages::{json.dumps(content_messages, indent=2)}:")
|
|
86
|
+
|
|
87
|
+
response = self.bedrock_runtime_client.apply_guardrail(
|
|
88
|
+
guardrailIdentifier=shield.provider_resource_id,
|
|
89
|
+
guardrailVersion=shield_params["guardrailVersion"],
|
|
90
|
+
source="OUTPUT", # or 'INPUT' depending on your use case
|
|
91
|
+
content=content_messages,
|
|
92
|
+
)
|
|
93
|
+
if response["action"] == "GUARDRAIL_INTERVENED":
|
|
94
|
+
user_message = ""
|
|
95
|
+
metadata = {}
|
|
96
|
+
for output in response["outputs"]:
|
|
97
|
+
# guardrails returns a list - however for this implementation we will leverage the last values
|
|
98
|
+
user_message = output["text"]
|
|
99
|
+
for assessment in response["assessments"]:
|
|
100
|
+
# guardrails returns a list - however for this implementation we will leverage the last values
|
|
101
|
+
metadata = dict(assessment)
|
|
102
|
+
|
|
103
|
+
return RunShieldResponse(
|
|
104
|
+
violation=SafetyViolation(
|
|
105
|
+
user_message=user_message,
|
|
106
|
+
violation_level=ViolationLevel.ERROR,
|
|
107
|
+
metadata=metadata,
|
|
108
|
+
)
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
return RunShieldResponse()
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
from llama_stack.providers.utils.bedrock.config import BedrockBaseConfig
|
|
9
|
+
from llama_stack.schema_utils import json_schema_type
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@json_schema_type
|
|
13
|
+
class BedrockSafetyConfig(BedrockBaseConfig):
|
|
14
|
+
pass
|
|
@@ -4,14 +4,15 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
+
|
|
7
8
|
from typing import Any
|
|
8
9
|
|
|
9
|
-
from .config import
|
|
10
|
+
from .config import NVIDIASafetyConfig
|
|
10
11
|
|
|
11
12
|
|
|
12
|
-
async def get_adapter_impl(config:
|
|
13
|
-
from .
|
|
13
|
+
async def get_adapter_impl(config: NVIDIASafetyConfig, _deps) -> Any:
|
|
14
|
+
from .nvidia import NVIDIASafetyAdapter
|
|
14
15
|
|
|
15
|
-
impl =
|
|
16
|
+
impl = NVIDIASafetyAdapter(config)
|
|
16
17
|
await impl.initialize()
|
|
17
18
|
return impl
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
import os
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel, Field
|
|
10
|
+
|
|
11
|
+
from llama_stack.schema_utils import json_schema_type
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@json_schema_type
|
|
15
|
+
class NVIDIASafetyConfig(BaseModel):
|
|
16
|
+
"""
|
|
17
|
+
Configuration for the NVIDIA Guardrail microservice endpoint.
|
|
18
|
+
|
|
19
|
+
Attributes:
|
|
20
|
+
guardrails_service_url (str): A base url for accessing the NVIDIA guardrail endpoint, e.g. http://0.0.0.0:7331
|
|
21
|
+
config_id (str): The ID of the guardrails configuration to use from the configuration store
|
|
22
|
+
(https://developer.nvidia.com/docs/nemo-microservices/guardrails/source/guides/configuration-store-guide.html)
|
|
23
|
+
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
guardrails_service_url: str = Field(
|
|
27
|
+
default_factory=lambda: os.getenv("GUARDRAILS_SERVICE_URL", "http://0.0.0.0:7331"),
|
|
28
|
+
description="The url for accessing the Guardrails service",
|
|
29
|
+
)
|
|
30
|
+
config_id: str | None = Field(
|
|
31
|
+
default_factory=lambda: os.getenv("NVIDIA_GUARDRAILS_CONFIG_ID", "self-check"),
|
|
32
|
+
description="Guardrails configuration ID to use from the Guardrails configuration store",
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
@classmethod
|
|
36
|
+
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
|
37
|
+
return {
|
|
38
|
+
"guardrails_service_url": "${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331}",
|
|
39
|
+
"config_id": "${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check}",
|
|
40
|
+
}
|