llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +5 -0
- llama_stack/apis/agents/__init__.py +1 -1
- llama_stack/apis/agents/agents.py +700 -281
- llama_stack/apis/agents/openai_responses.py +1311 -0
- llama_stack/{providers/adapters/memory/sample/config.py → apis/batches/__init__.py} +2 -5
- llama_stack/apis/batches/batches.py +100 -0
- llama_stack/apis/benchmarks/__init__.py +7 -0
- llama_stack/apis/benchmarks/benchmarks.py +108 -0
- llama_stack/apis/common/content_types.py +143 -0
- llama_stack/apis/common/errors.py +103 -0
- llama_stack/apis/common/job_types.py +38 -0
- llama_stack/apis/common/responses.py +36 -0
- llama_stack/apis/common/training_types.py +36 -5
- llama_stack/apis/common/type_system.py +158 -0
- llama_stack/apis/conversations/__init__.py +31 -0
- llama_stack/apis/conversations/conversations.py +286 -0
- llama_stack/apis/datasetio/__init__.py +7 -0
- llama_stack/apis/datasetio/datasetio.py +59 -0
- llama_stack/apis/datasets/__init__.py +7 -0
- llama_stack/apis/datasets/datasets.py +251 -0
- llama_stack/apis/datatypes.py +160 -0
- llama_stack/apis/eval/__init__.py +7 -0
- llama_stack/apis/eval/eval.py +169 -0
- llama_stack/apis/files/__init__.py +7 -0
- llama_stack/apis/files/files.py +199 -0
- llama_stack/apis/inference/__init__.py +1 -1
- llama_stack/apis/inference/inference.py +1169 -113
- llama_stack/apis/inspect/__init__.py +1 -1
- llama_stack/apis/inspect/inspect.py +69 -16
- llama_stack/apis/models/__init__.py +1 -1
- llama_stack/apis/models/models.py +148 -21
- llama_stack/apis/post_training/__init__.py +1 -1
- llama_stack/apis/post_training/post_training.py +265 -120
- llama_stack/{providers/adapters/agents/sample/config.py → apis/prompts/__init__.py} +2 -5
- llama_stack/apis/prompts/prompts.py +204 -0
- llama_stack/apis/providers/__init__.py +7 -0
- llama_stack/apis/providers/providers.py +69 -0
- llama_stack/apis/resource.py +37 -0
- llama_stack/apis/safety/__init__.py +1 -1
- llama_stack/apis/safety/safety.py +95 -12
- llama_stack/apis/scoring/__init__.py +7 -0
- llama_stack/apis/scoring/scoring.py +93 -0
- llama_stack/apis/scoring_functions/__init__.py +7 -0
- llama_stack/apis/scoring_functions/scoring_functions.py +208 -0
- llama_stack/apis/shields/__init__.py +1 -1
- llama_stack/apis/shields/shields.py +76 -33
- llama_stack/apis/synthetic_data_generation/__init__.py +1 -1
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +40 -17
- llama_stack/apis/telemetry/__init__.py +1 -1
- llama_stack/apis/telemetry/telemetry.py +322 -31
- llama_stack/apis/{dataset → tools}/__init__.py +2 -1
- llama_stack/apis/tools/rag_tool.py +218 -0
- llama_stack/apis/tools/tools.py +221 -0
- llama_stack/apis/vector_io/__init__.py +7 -0
- llama_stack/apis/vector_io/vector_io.py +960 -0
- llama_stack/apis/vector_stores/__init__.py +7 -0
- llama_stack/apis/vector_stores/vector_stores.py +51 -0
- llama_stack/apis/version.py +9 -0
- llama_stack/cli/llama.py +13 -5
- llama_stack/cli/stack/_list_deps.py +182 -0
- llama_stack/cli/stack/list_apis.py +1 -1
- llama_stack/cli/stack/list_deps.py +55 -0
- llama_stack/cli/stack/list_providers.py +24 -10
- llama_stack/cli/stack/list_stacks.py +56 -0
- llama_stack/cli/stack/remove.py +115 -0
- llama_stack/cli/stack/run.py +169 -56
- llama_stack/cli/stack/stack.py +18 -4
- llama_stack/cli/stack/utils.py +151 -0
- llama_stack/cli/table.py +23 -61
- llama_stack/cli/utils.py +29 -0
- llama_stack/core/access_control/access_control.py +131 -0
- llama_stack/core/access_control/conditions.py +129 -0
- llama_stack/core/access_control/datatypes.py +107 -0
- llama_stack/core/build.py +164 -0
- llama_stack/core/client.py +205 -0
- llama_stack/core/common.sh +37 -0
- llama_stack/{distribution → core}/configure.py +74 -55
- llama_stack/core/conversations/conversations.py +309 -0
- llama_stack/core/datatypes.py +625 -0
- llama_stack/core/distribution.py +276 -0
- llama_stack/core/external.py +54 -0
- llama_stack/core/id_generation.py +42 -0
- llama_stack/core/inspect.py +86 -0
- llama_stack/core/library_client.py +539 -0
- llama_stack/core/prompts/prompts.py +234 -0
- llama_stack/core/providers.py +137 -0
- llama_stack/core/request_headers.py +115 -0
- llama_stack/core/resolver.py +506 -0
- llama_stack/core/routers/__init__.py +101 -0
- llama_stack/core/routers/datasets.py +73 -0
- llama_stack/core/routers/eval_scoring.py +155 -0
- llama_stack/core/routers/inference.py +645 -0
- llama_stack/core/routers/safety.py +85 -0
- llama_stack/core/routers/tool_runtime.py +91 -0
- llama_stack/core/routers/vector_io.py +442 -0
- llama_stack/core/routing_tables/benchmarks.py +62 -0
- llama_stack/core/routing_tables/common.py +254 -0
- llama_stack/core/routing_tables/datasets.py +91 -0
- llama_stack/core/routing_tables/models.py +163 -0
- llama_stack/core/routing_tables/scoring_functions.py +66 -0
- llama_stack/core/routing_tables/shields.py +61 -0
- llama_stack/core/routing_tables/toolgroups.py +129 -0
- llama_stack/core/routing_tables/vector_stores.py +292 -0
- llama_stack/core/server/auth.py +187 -0
- llama_stack/core/server/auth_providers.py +494 -0
- llama_stack/core/server/quota.py +110 -0
- llama_stack/core/server/routes.py +141 -0
- llama_stack/core/server/server.py +542 -0
- llama_stack/core/server/tracing.py +80 -0
- llama_stack/core/stack.py +546 -0
- llama_stack/core/start_stack.sh +117 -0
- llama_stack/core/storage/datatypes.py +283 -0
- llama_stack/{cli/model → core/store}/__init__.py +1 -1
- llama_stack/core/store/registry.py +199 -0
- llama_stack/core/testing_context.py +49 -0
- llama_stack/core/ui/app.py +55 -0
- llama_stack/core/ui/modules/api.py +32 -0
- llama_stack/core/ui/modules/utils.py +42 -0
- llama_stack/core/ui/page/distribution/datasets.py +18 -0
- llama_stack/core/ui/page/distribution/eval_tasks.py +20 -0
- llama_stack/core/ui/page/distribution/models.py +18 -0
- llama_stack/core/ui/page/distribution/providers.py +27 -0
- llama_stack/core/ui/page/distribution/resources.py +48 -0
- llama_stack/core/ui/page/distribution/scoring_functions.py +18 -0
- llama_stack/core/ui/page/distribution/shields.py +19 -0
- llama_stack/core/ui/page/evaluations/app_eval.py +143 -0
- llama_stack/core/ui/page/evaluations/native_eval.py +253 -0
- llama_stack/core/ui/page/playground/chat.py +130 -0
- llama_stack/core/ui/page/playground/tools.py +352 -0
- llama_stack/core/utils/config.py +30 -0
- llama_stack/{distribution → core}/utils/config_dirs.py +3 -6
- llama_stack/core/utils/config_resolution.py +125 -0
- llama_stack/core/utils/context.py +84 -0
- llama_stack/core/utils/exec.py +96 -0
- llama_stack/{providers/impls/meta_reference/codeshield/config.py → core/utils/image_types.py} +4 -3
- llama_stack/{distribution → core}/utils/model_utils.py +2 -2
- llama_stack/{distribution → core}/utils/prompt_for_config.py +30 -63
- llama_stack/{apis/batch_inference → distributions/dell}/__init__.py +1 -1
- llama_stack/distributions/dell/build.yaml +33 -0
- llama_stack/distributions/dell/dell.py +158 -0
- llama_stack/distributions/dell/run-with-safety.yaml +141 -0
- llama_stack/distributions/dell/run.yaml +132 -0
- llama_stack/distributions/meta-reference-gpu/__init__.py +7 -0
- llama_stack/distributions/meta-reference-gpu/build.yaml +32 -0
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +163 -0
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +154 -0
- llama_stack/distributions/meta-reference-gpu/run.yaml +139 -0
- llama_stack/{apis/evals → distributions/nvidia}/__init__.py +1 -1
- llama_stack/distributions/nvidia/build.yaml +29 -0
- llama_stack/distributions/nvidia/nvidia.py +154 -0
- llama_stack/distributions/nvidia/run-with-safety.yaml +137 -0
- llama_stack/distributions/nvidia/run.yaml +116 -0
- llama_stack/distributions/open-benchmark/__init__.py +7 -0
- llama_stack/distributions/open-benchmark/build.yaml +36 -0
- llama_stack/distributions/open-benchmark/open_benchmark.py +303 -0
- llama_stack/distributions/open-benchmark/run.yaml +252 -0
- llama_stack/distributions/postgres-demo/__init__.py +7 -0
- llama_stack/distributions/postgres-demo/build.yaml +23 -0
- llama_stack/distributions/postgres-demo/postgres_demo.py +125 -0
- llama_stack/distributions/postgres-demo/run.yaml +115 -0
- llama_stack/{apis/memory → distributions/starter}/__init__.py +1 -1
- llama_stack/distributions/starter/build.yaml +61 -0
- llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
- llama_stack/distributions/starter/run.yaml +276 -0
- llama_stack/distributions/starter/starter.py +345 -0
- llama_stack/distributions/starter-gpu/__init__.py +7 -0
- llama_stack/distributions/starter-gpu/build.yaml +61 -0
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
- llama_stack/distributions/starter-gpu/run.yaml +279 -0
- llama_stack/distributions/starter-gpu/starter_gpu.py +20 -0
- llama_stack/distributions/template.py +456 -0
- llama_stack/distributions/watsonx/__init__.py +7 -0
- llama_stack/distributions/watsonx/build.yaml +33 -0
- llama_stack/distributions/watsonx/run.yaml +133 -0
- llama_stack/distributions/watsonx/watsonx.py +95 -0
- llama_stack/env.py +24 -0
- llama_stack/log.py +314 -0
- llama_stack/models/llama/checkpoint.py +164 -0
- llama_stack/models/llama/datatypes.py +164 -0
- llama_stack/models/llama/hadamard_utils.py +86 -0
- llama_stack/models/llama/llama3/args.py +74 -0
- llama_stack/models/llama/llama3/chat_format.py +286 -0
- llama_stack/models/llama/llama3/generation.py +376 -0
- llama_stack/models/llama/llama3/interface.py +255 -0
- llama_stack/models/llama/llama3/model.py +304 -0
- llama_stack/models/llama/llama3/multimodal/__init__.py +12 -0
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +180 -0
- llama_stack/models/llama/llama3/multimodal/image_transform.py +409 -0
- llama_stack/models/llama/llama3/multimodal/model.py +1430 -0
- llama_stack/models/llama/llama3/multimodal/utils.py +26 -0
- llama_stack/models/llama/llama3/prompt_templates/__init__.py +22 -0
- llama_stack/models/llama/llama3/prompt_templates/base.py +39 -0
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +319 -0
- llama_stack/models/llama/llama3/prompt_templates/tool_response.py +62 -0
- llama_stack/models/llama/llama3/quantization/loader.py +316 -0
- llama_stack/models/llama/llama3/template_data.py +116 -0
- llama_stack/models/llama/llama3/tokenizer.model +128000 -0
- llama_stack/models/llama/llama3/tokenizer.py +198 -0
- llama_stack/models/llama/llama3/tool_utils.py +266 -0
- llama_stack/models/llama/llama3_1/__init__.py +12 -0
- llama_stack/models/llama/llama3_1/prompt_format.md +358 -0
- llama_stack/models/llama/llama3_1/prompts.py +258 -0
- llama_stack/models/llama/llama3_2/prompts_text.py +229 -0
- llama_stack/models/llama/llama3_2/prompts_vision.py +126 -0
- llama_stack/models/llama/llama3_2/text_prompt_format.md +286 -0
- llama_stack/models/llama/llama3_2/vision_prompt_format.md +141 -0
- llama_stack/models/llama/llama3_3/prompts.py +259 -0
- llama_stack/models/llama/llama4/args.py +107 -0
- llama_stack/models/llama/llama4/chat_format.py +317 -0
- llama_stack/models/llama/llama4/datatypes.py +56 -0
- llama_stack/models/llama/llama4/ffn.py +58 -0
- llama_stack/models/llama/llama4/generation.py +313 -0
- llama_stack/models/llama/llama4/model.py +437 -0
- llama_stack/models/llama/llama4/moe.py +214 -0
- llama_stack/models/llama/llama4/preprocess.py +435 -0
- llama_stack/models/llama/llama4/prompt_format.md +304 -0
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +136 -0
- llama_stack/models/llama/llama4/prompts.py +279 -0
- llama_stack/models/llama/llama4/quantization/__init__.py +5 -0
- llama_stack/models/llama/llama4/quantization/loader.py +226 -0
- llama_stack/models/llama/llama4/tokenizer.model +200000 -0
- llama_stack/models/llama/llama4/tokenizer.py +263 -0
- llama_stack/models/llama/llama4/vision/__init__.py +5 -0
- llama_stack/models/llama/llama4/vision/embedding.py +210 -0
- llama_stack/models/llama/llama4/vision/encoder.py +412 -0
- llama_stack/models/llama/prompt_format.py +191 -0
- llama_stack/models/llama/quantize_impls.py +316 -0
- llama_stack/models/llama/sku_list.py +1029 -0
- llama_stack/models/llama/sku_types.py +233 -0
- llama_stack/models/llama/tokenizer_utils.py +40 -0
- llama_stack/providers/datatypes.py +136 -107
- llama_stack/providers/inline/__init__.py +5 -0
- llama_stack/providers/inline/agents/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/agents/meta_reference}/__init__.py +12 -5
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +1024 -0
- llama_stack/providers/inline/agents/meta_reference/agents.py +383 -0
- llama_stack/providers/inline/agents/meta_reference/config.py +37 -0
- llama_stack/providers/inline/agents/meta_reference/persistence.py +228 -0
- llama_stack/providers/inline/agents/meta_reference/responses/__init__.py +5 -0
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +423 -0
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +1226 -0
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +449 -0
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +194 -0
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +365 -0
- llama_stack/providers/inline/agents/meta_reference/safety.py +52 -0
- llama_stack/providers/inline/batches/__init__.py +5 -0
- llama_stack/providers/inline/batches/reference/__init__.py +36 -0
- llama_stack/providers/inline/batches/reference/batches.py +679 -0
- llama_stack/providers/inline/batches/reference/config.py +40 -0
- llama_stack/providers/inline/datasetio/__init__.py +5 -0
- llama_stack/providers/inline/datasetio/localfs/__init__.py +20 -0
- llama_stack/providers/inline/datasetio/localfs/config.py +23 -0
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +113 -0
- llama_stack/providers/inline/eval/__init__.py +5 -0
- llama_stack/providers/inline/eval/meta_reference/__init__.py +28 -0
- llama_stack/providers/inline/eval/meta_reference/config.py +23 -0
- llama_stack/providers/inline/eval/meta_reference/eval.py +259 -0
- llama_stack/providers/inline/files/localfs/__init__.py +20 -0
- llama_stack/providers/inline/files/localfs/config.py +31 -0
- llama_stack/providers/inline/files/localfs/files.py +219 -0
- llama_stack/providers/inline/inference/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/__init__.py +4 -4
- llama_stack/providers/inline/inference/meta_reference/common.py +24 -0
- llama_stack/providers/inline/inference/meta_reference/config.py +68 -0
- llama_stack/providers/inline/inference/meta_reference/generators.py +211 -0
- llama_stack/providers/inline/inference/meta_reference/inference.py +158 -0
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +96 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/parallel_utils.py +56 -73
- llama_stack/providers/inline/inference/sentence_transformers/__init__.py +22 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/inference/sentence_transformers}/config.py +6 -4
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +83 -0
- llama_stack/providers/inline/post_training/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/utils.py +35 -0
- llama_stack/providers/inline/post_training/common/validator.py +36 -0
- llama_stack/providers/inline/post_training/huggingface/__init__.py +27 -0
- llama_stack/providers/inline/post_training/huggingface/config.py +83 -0
- llama_stack/providers/inline/post_training/huggingface/post_training.py +208 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +519 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +485 -0
- llama_stack/providers/inline/post_training/huggingface/utils.py +269 -0
- llama_stack/providers/inline/post_training/torchtune/__init__.py +27 -0
- llama_stack/providers/inline/post_training/torchtune/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +240 -0
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +99 -0
- llama_stack/providers/inline/post_training/torchtune/config.py +20 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +57 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/sft.py +78 -0
- llama_stack/providers/inline/post_training/torchtune/post_training.py +178 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +588 -0
- llama_stack/providers/inline/safety/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/codeshield → inline/safety/code_scanner}/__init__.py +4 -2
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +128 -0
- llama_stack/providers/{impls/meta_reference/memory → inline/safety/code_scanner}/config.py +5 -3
- llama_stack/providers/inline/safety/llama_guard/__init__.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/config.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +489 -0
- llama_stack/providers/{adapters/memory/sample → inline/safety/prompt_guard}/__init__.py +4 -4
- llama_stack/providers/inline/safety/prompt_guard/config.py +32 -0
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +131 -0
- llama_stack/providers/inline/scoring/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/__init__.py +25 -0
- llama_stack/providers/{adapters/memory/weaviate → inline/scoring/basic}/config.py +5 -7
- llama_stack/providers/inline/scoring/basic/scoring.py +126 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +240 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +41 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +23 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +27 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +71 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +80 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +66 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +58 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +38 -0
- llama_stack/providers/inline/scoring/basic/utils/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py +3319 -0
- llama_stack/providers/inline/scoring/basic/utils/math_utils.py +330 -0
- llama_stack/providers/inline/scoring/braintrust/__init__.py +27 -0
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +230 -0
- llama_stack/providers/inline/scoring/braintrust/config.py +21 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +23 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +24 -0
- llama_stack/providers/inline/scoring/llm_as_judge/__init__.py +21 -0
- llama_stack/providers/inline/scoring/llm_as_judge/config.py +14 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +113 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +96 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +20 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +81 -0
- llama_stack/providers/inline/telemetry/__init__.py +5 -0
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +21 -0
- llama_stack/providers/inline/telemetry/meta_reference/config.py +47 -0
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +252 -0
- llama_stack/providers/inline/tool_runtime/__init__.py +5 -0
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +19 -0
- llama_stack/providers/{impls/meta_reference/telemetry → inline/tool_runtime/rag}/config.py +5 -3
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +77 -0
- llama_stack/providers/inline/tool_runtime/rag/memory.py +332 -0
- llama_stack/providers/inline/vector_io/__init__.py +5 -0
- llama_stack/providers/inline/vector_io/chroma/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/chroma/config.py +30 -0
- llama_stack/providers/inline/vector_io/faiss/__init__.py +21 -0
- llama_stack/providers/inline/vector_io/faiss/config.py +26 -0
- llama_stack/providers/inline/vector_io/faiss/faiss.py +293 -0
- llama_stack/providers/inline/vector_io/milvus/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/milvus/config.py +29 -0
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/qdrant/config.py +29 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/config.py +26 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +483 -0
- llama_stack/providers/registry/agents.py +16 -18
- llama_stack/providers/registry/batches.py +26 -0
- llama_stack/providers/registry/datasetio.py +49 -0
- llama_stack/providers/registry/eval.py +46 -0
- llama_stack/providers/registry/files.py +31 -0
- llama_stack/providers/registry/inference.py +273 -118
- llama_stack/providers/registry/post_training.py +69 -0
- llama_stack/providers/registry/safety.py +46 -41
- llama_stack/providers/registry/scoring.py +51 -0
- llama_stack/providers/registry/tool_runtime.py +87 -0
- llama_stack/providers/registry/vector_io.py +828 -0
- llama_stack/providers/remote/__init__.py +5 -0
- llama_stack/providers/remote/agents/__init__.py +5 -0
- llama_stack/providers/remote/datasetio/__init__.py +5 -0
- llama_stack/providers/{adapters/memory/chroma → remote/datasetio/huggingface}/__init__.py +7 -4
- llama_stack/providers/remote/datasetio/huggingface/config.py +23 -0
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +99 -0
- llama_stack/providers/remote/datasetio/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/datasetio/nvidia/config.py +61 -0
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +116 -0
- llama_stack/providers/remote/eval/__init__.py +5 -0
- llama_stack/providers/remote/eval/nvidia/__init__.py +31 -0
- llama_stack/providers/remote/eval/nvidia/config.py +29 -0
- llama_stack/providers/remote/eval/nvidia/eval.py +162 -0
- llama_stack/providers/remote/files/s3/__init__.py +19 -0
- llama_stack/providers/remote/files/s3/config.py +42 -0
- llama_stack/providers/remote/files/s3/files.py +313 -0
- llama_stack/providers/remote/inference/__init__.py +5 -0
- llama_stack/providers/{adapters/safety/sample → remote/inference/anthropic}/__init__.py +4 -6
- llama_stack/providers/remote/inference/anthropic/anthropic.py +36 -0
- llama_stack/providers/remote/inference/anthropic/config.py +28 -0
- llama_stack/providers/{impls/meta_reference/telemetry → remote/inference/azure}/__init__.py +4 -4
- llama_stack/providers/remote/inference/azure/azure.py +25 -0
- llama_stack/providers/remote/inference/azure/config.py +61 -0
- llama_stack/providers/{adapters → remote}/inference/bedrock/__init__.py +18 -17
- llama_stack/providers/remote/inference/bedrock/bedrock.py +142 -0
- llama_stack/providers/{adapters/inference/sample → remote/inference/bedrock}/config.py +3 -4
- llama_stack/providers/remote/inference/bedrock/models.py +29 -0
- llama_stack/providers/remote/inference/cerebras/__init__.py +19 -0
- llama_stack/providers/remote/inference/cerebras/cerebras.py +28 -0
- llama_stack/providers/remote/inference/cerebras/config.py +30 -0
- llama_stack/providers/{adapters → remote}/inference/databricks/__init__.py +4 -5
- llama_stack/providers/remote/inference/databricks/config.py +37 -0
- llama_stack/providers/remote/inference/databricks/databricks.py +44 -0
- llama_stack/providers/{adapters → remote}/inference/fireworks/__init__.py +8 -4
- llama_stack/providers/remote/inference/fireworks/config.py +27 -0
- llama_stack/providers/remote/inference/fireworks/fireworks.py +27 -0
- llama_stack/providers/{adapters/memory/pgvector → remote/inference/gemini}/__init__.py +4 -4
- llama_stack/providers/remote/inference/gemini/config.py +28 -0
- llama_stack/providers/remote/inference/gemini/gemini.py +82 -0
- llama_stack/providers/remote/inference/groq/__init__.py +15 -0
- llama_stack/providers/remote/inference/groq/config.py +34 -0
- llama_stack/providers/remote/inference/groq/groq.py +18 -0
- llama_stack/providers/remote/inference/llama_openai_compat/__init__.py +15 -0
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +34 -0
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +46 -0
- llama_stack/providers/remote/inference/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/inference/nvidia/config.py +64 -0
- llama_stack/providers/remote/inference/nvidia/nvidia.py +61 -0
- llama_stack/providers/{adapters/safety/sample/config.py → remote/inference/nvidia/utils.py} +3 -4
- llama_stack/providers/{impls/vllm → remote/inference/ollama}/__init__.py +4 -6
- llama_stack/providers/remote/inference/ollama/config.py +25 -0
- llama_stack/providers/remote/inference/ollama/ollama.py +102 -0
- llama_stack/providers/{adapters/telemetry/opentelemetry → remote/inference/openai}/__init__.py +4 -4
- llama_stack/providers/remote/inference/openai/config.py +39 -0
- llama_stack/providers/remote/inference/openai/openai.py +38 -0
- llama_stack/providers/remote/inference/passthrough/__init__.py +23 -0
- llama_stack/providers/remote/inference/passthrough/config.py +34 -0
- llama_stack/providers/remote/inference/passthrough/passthrough.py +122 -0
- llama_stack/providers/remote/inference/runpod/__init__.py +16 -0
- llama_stack/providers/remote/inference/runpod/config.py +32 -0
- llama_stack/providers/remote/inference/runpod/runpod.py +42 -0
- llama_stack/providers/remote/inference/sambanova/__init__.py +16 -0
- llama_stack/providers/remote/inference/sambanova/config.py +34 -0
- llama_stack/providers/remote/inference/sambanova/sambanova.py +28 -0
- llama_stack/providers/{adapters → remote}/inference/tgi/__init__.py +3 -4
- llama_stack/providers/remote/inference/tgi/config.py +76 -0
- llama_stack/providers/remote/inference/tgi/tgi.py +85 -0
- llama_stack/providers/{adapters → remote}/inference/together/__init__.py +8 -4
- llama_stack/providers/remote/inference/together/config.py +27 -0
- llama_stack/providers/remote/inference/together/together.py +102 -0
- llama_stack/providers/remote/inference/vertexai/__init__.py +15 -0
- llama_stack/providers/remote/inference/vertexai/config.py +48 -0
- llama_stack/providers/remote/inference/vertexai/vertexai.py +54 -0
- llama_stack/providers/remote/inference/vllm/__init__.py +22 -0
- llama_stack/providers/remote/inference/vllm/config.py +59 -0
- llama_stack/providers/remote/inference/vllm/vllm.py +111 -0
- llama_stack/providers/remote/inference/watsonx/__init__.py +15 -0
- llama_stack/providers/remote/inference/watsonx/config.py +45 -0
- llama_stack/providers/remote/inference/watsonx/watsonx.py +336 -0
- llama_stack/providers/remote/post_training/__init__.py +5 -0
- llama_stack/providers/remote/post_training/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/post_training/nvidia/config.py +113 -0
- llama_stack/providers/remote/post_training/nvidia/models.py +27 -0
- llama_stack/providers/remote/post_training/nvidia/post_training.py +430 -0
- llama_stack/providers/remote/post_training/nvidia/utils.py +63 -0
- llama_stack/providers/remote/safety/__init__.py +5 -0
- llama_stack/providers/remote/safety/bedrock/bedrock.py +111 -0
- llama_stack/providers/remote/safety/bedrock/config.py +14 -0
- llama_stack/providers/{adapters/inference/sample → remote/safety/nvidia}/__init__.py +5 -4
- llama_stack/providers/remote/safety/nvidia/config.py +40 -0
- llama_stack/providers/remote/safety/nvidia/nvidia.py +161 -0
- llama_stack/providers/{adapters/agents/sample → remote/safety/sambanova}/__init__.py +5 -4
- llama_stack/providers/remote/safety/sambanova/config.py +37 -0
- llama_stack/providers/remote/safety/sambanova/sambanova.py +98 -0
- llama_stack/providers/remote/tool_runtime/__init__.py +5 -0
- llama_stack/providers/remote/tool_runtime/bing_search/__init__.py +21 -0
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +112 -0
- llama_stack/providers/remote/tool_runtime/bing_search/config.py +22 -0
- llama_stack/providers/remote/tool_runtime/brave_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +148 -0
- llama_stack/providers/remote/tool_runtime/brave_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py +15 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +20 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +73 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +84 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py +22 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py +21 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +140 -0
- llama_stack/providers/remote/vector_io/__init__.py +5 -0
- llama_stack/providers/remote/vector_io/chroma/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/chroma/chroma.py +215 -0
- llama_stack/providers/remote/vector_io/chroma/config.py +28 -0
- llama_stack/providers/remote/vector_io/milvus/__init__.py +18 -0
- llama_stack/providers/remote/vector_io/milvus/config.py +35 -0
- llama_stack/providers/remote/vector_io/milvus/milvus.py +375 -0
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/pgvector/config.py +47 -0
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +460 -0
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/qdrant/config.py +37 -0
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +265 -0
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/weaviate/config.py +32 -0
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +393 -0
- llama_stack/providers/utils/bedrock/__init__.py +5 -0
- llama_stack/providers/utils/bedrock/client.py +74 -0
- llama_stack/providers/utils/bedrock/config.py +64 -0
- llama_stack/providers/utils/bedrock/refreshable_boto_session.py +112 -0
- llama_stack/providers/utils/common/__init__.py +5 -0
- llama_stack/providers/utils/common/data_schema_validator.py +103 -0
- llama_stack/providers/utils/datasetio/__init__.py +5 -0
- llama_stack/providers/utils/datasetio/url_utils.py +47 -0
- llama_stack/providers/utils/files/__init__.py +5 -0
- llama_stack/providers/utils/files/form_data.py +69 -0
- llama_stack/providers/utils/inference/__init__.py +8 -7
- llama_stack/providers/utils/inference/embedding_mixin.py +101 -0
- llama_stack/providers/utils/inference/inference_store.py +264 -0
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +336 -0
- llama_stack/providers/utils/inference/model_registry.py +173 -23
- llama_stack/providers/utils/inference/openai_compat.py +1261 -49
- llama_stack/providers/utils/inference/openai_mixin.py +506 -0
- llama_stack/providers/utils/inference/prompt_adapter.py +365 -67
- llama_stack/providers/utils/kvstore/api.py +6 -6
- llama_stack/providers/utils/kvstore/config.py +28 -48
- llama_stack/providers/utils/kvstore/kvstore.py +61 -15
- llama_stack/providers/utils/kvstore/mongodb/__init__.py +9 -0
- llama_stack/providers/utils/kvstore/mongodb/mongodb.py +82 -0
- llama_stack/providers/utils/kvstore/postgres/__init__.py +7 -0
- llama_stack/providers/utils/kvstore/postgres/postgres.py +114 -0
- llama_stack/providers/utils/kvstore/redis/redis.py +33 -9
- llama_stack/providers/utils/kvstore/sqlite/config.py +2 -1
- llama_stack/providers/utils/kvstore/sqlite/sqlite.py +123 -22
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +1304 -0
- llama_stack/providers/utils/memory/vector_store.py +220 -82
- llama_stack/providers/utils/pagination.py +43 -0
- llama_stack/providers/utils/responses/__init__.py +5 -0
- llama_stack/providers/utils/responses/responses_store.py +292 -0
- llama_stack/providers/utils/scheduler.py +270 -0
- llama_stack/providers/utils/scoring/__init__.py +5 -0
- llama_stack/providers/utils/scoring/aggregation_utils.py +75 -0
- llama_stack/providers/utils/scoring/base_scoring_fn.py +114 -0
- llama_stack/providers/utils/scoring/basic_scoring_utils.py +26 -0
- llama_stack/providers/utils/sqlstore/__init__.py +5 -0
- llama_stack/providers/utils/sqlstore/api.py +128 -0
- llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +319 -0
- llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +343 -0
- llama_stack/providers/utils/sqlstore/sqlstore.py +70 -0
- llama_stack/providers/utils/telemetry/trace_protocol.py +142 -0
- llama_stack/providers/utils/telemetry/tracing.py +192 -53
- llama_stack/providers/utils/tools/__init__.py +5 -0
- llama_stack/providers/utils/tools/mcp.py +148 -0
- llama_stack/providers/utils/tools/ttl_dict.py +70 -0
- llama_stack/providers/utils/vector_io/__init__.py +5 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +156 -0
- llama_stack/schema_utils.py +118 -0
- llama_stack/strong_typing/__init__.py +19 -0
- llama_stack/strong_typing/auxiliary.py +228 -0
- llama_stack/strong_typing/classdef.py +440 -0
- llama_stack/strong_typing/core.py +46 -0
- llama_stack/strong_typing/deserializer.py +877 -0
- llama_stack/strong_typing/docstring.py +409 -0
- llama_stack/strong_typing/exception.py +23 -0
- llama_stack/strong_typing/inspection.py +1085 -0
- llama_stack/strong_typing/mapping.py +40 -0
- llama_stack/strong_typing/name.py +182 -0
- llama_stack/strong_typing/py.typed +0 -0
- llama_stack/strong_typing/schema.py +792 -0
- llama_stack/strong_typing/serialization.py +97 -0
- llama_stack/strong_typing/serializer.py +500 -0
- llama_stack/strong_typing/slots.py +27 -0
- llama_stack/strong_typing/topological.py +89 -0
- llama_stack/testing/__init__.py +5 -0
- llama_stack/testing/api_recorder.py +956 -0
- llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
- llama_stack-0.3.4.dist-info/METADATA +261 -0
- llama_stack-0.3.4.dist-info/RECORD +625 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/WHEEL +1 -1
- llama_stack/apis/agents/client.py +0 -292
- llama_stack/apis/agents/event_logger.py +0 -184
- llama_stack/apis/batch_inference/batch_inference.py +0 -72
- llama_stack/apis/common/deployment_types.py +0 -31
- llama_stack/apis/dataset/dataset.py +0 -63
- llama_stack/apis/evals/evals.py +0 -122
- llama_stack/apis/inference/client.py +0 -197
- llama_stack/apis/inspect/client.py +0 -82
- llama_stack/apis/memory/client.py +0 -155
- llama_stack/apis/memory/memory.py +0 -65
- llama_stack/apis/memory_banks/__init__.py +0 -7
- llama_stack/apis/memory_banks/client.py +0 -101
- llama_stack/apis/memory_banks/memory_banks.py +0 -78
- llama_stack/apis/models/client.py +0 -83
- llama_stack/apis/reward_scoring/__init__.py +0 -7
- llama_stack/apis/reward_scoring/reward_scoring.py +0 -55
- llama_stack/apis/safety/client.py +0 -105
- llama_stack/apis/shields/client.py +0 -79
- llama_stack/cli/download.py +0 -340
- llama_stack/cli/model/describe.py +0 -82
- llama_stack/cli/model/download.py +0 -24
- llama_stack/cli/model/list.py +0 -62
- llama_stack/cli/model/model.py +0 -34
- llama_stack/cli/model/prompt_format.py +0 -112
- llama_stack/cli/model/safety_models.py +0 -52
- llama_stack/cli/stack/build.py +0 -299
- llama_stack/cli/stack/configure.py +0 -178
- llama_stack/distribution/build.py +0 -123
- llama_stack/distribution/build_conda_env.sh +0 -136
- llama_stack/distribution/build_container.sh +0 -142
- llama_stack/distribution/common.sh +0 -40
- llama_stack/distribution/configure_container.sh +0 -47
- llama_stack/distribution/datatypes.py +0 -139
- llama_stack/distribution/distribution.py +0 -58
- llama_stack/distribution/inspect.py +0 -67
- llama_stack/distribution/request_headers.py +0 -57
- llama_stack/distribution/resolver.py +0 -323
- llama_stack/distribution/routers/__init__.py +0 -48
- llama_stack/distribution/routers/routers.py +0 -158
- llama_stack/distribution/routers/routing_tables.py +0 -173
- llama_stack/distribution/server/endpoints.py +0 -48
- llama_stack/distribution/server/server.py +0 -343
- llama_stack/distribution/start_conda_env.sh +0 -42
- llama_stack/distribution/start_container.sh +0 -64
- llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml +0 -10
- llama_stack/distribution/templates/local-build.yaml +0 -10
- llama_stack/distribution/templates/local-databricks-build.yaml +0 -10
- llama_stack/distribution/templates/local-fireworks-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-endpoint-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-serverless-build.yaml +0 -10
- llama_stack/distribution/templates/local-ollama-build.yaml +0 -10
- llama_stack/distribution/templates/local-tgi-build.yaml +0 -10
- llama_stack/distribution/templates/local-together-build.yaml +0 -10
- llama_stack/distribution/templates/local-vllm-build.yaml +0 -10
- llama_stack/distribution/utils/exec.py +0 -105
- llama_stack/providers/adapters/agents/sample/sample.py +0 -18
- llama_stack/providers/adapters/inference/bedrock/bedrock.py +0 -451
- llama_stack/providers/adapters/inference/bedrock/config.py +0 -55
- llama_stack/providers/adapters/inference/databricks/config.py +0 -21
- llama_stack/providers/adapters/inference/databricks/databricks.py +0 -125
- llama_stack/providers/adapters/inference/fireworks/config.py +0 -20
- llama_stack/providers/adapters/inference/fireworks/fireworks.py +0 -130
- llama_stack/providers/adapters/inference/ollama/__init__.py +0 -19
- llama_stack/providers/adapters/inference/ollama/ollama.py +0 -175
- llama_stack/providers/adapters/inference/sample/sample.py +0 -23
- llama_stack/providers/adapters/inference/tgi/config.py +0 -43
- llama_stack/providers/adapters/inference/tgi/tgi.py +0 -200
- llama_stack/providers/adapters/inference/together/config.py +0 -22
- llama_stack/providers/adapters/inference/together/together.py +0 -143
- llama_stack/providers/adapters/memory/chroma/chroma.py +0 -157
- llama_stack/providers/adapters/memory/pgvector/config.py +0 -17
- llama_stack/providers/adapters/memory/pgvector/pgvector.py +0 -211
- llama_stack/providers/adapters/memory/sample/sample.py +0 -23
- llama_stack/providers/adapters/memory/weaviate/__init__.py +0 -15
- llama_stack/providers/adapters/memory/weaviate/weaviate.py +0 -190
- llama_stack/providers/adapters/safety/bedrock/bedrock.py +0 -113
- llama_stack/providers/adapters/safety/bedrock/config.py +0 -16
- llama_stack/providers/adapters/safety/sample/sample.py +0 -23
- llama_stack/providers/adapters/safety/together/__init__.py +0 -18
- llama_stack/providers/adapters/safety/together/config.py +0 -26
- llama_stack/providers/adapters/safety/together/together.py +0 -101
- llama_stack/providers/adapters/telemetry/opentelemetry/config.py +0 -12
- llama_stack/providers/adapters/telemetry/opentelemetry/opentelemetry.py +0 -201
- llama_stack/providers/adapters/telemetry/sample/__init__.py +0 -17
- llama_stack/providers/adapters/telemetry/sample/config.py +0 -12
- llama_stack/providers/adapters/telemetry/sample/sample.py +0 -18
- llama_stack/providers/impls/meta_reference/agents/agent_instance.py +0 -844
- llama_stack/providers/impls/meta_reference/agents/agents.py +0 -161
- llama_stack/providers/impls/meta_reference/agents/persistence.py +0 -84
- llama_stack/providers/impls/meta_reference/agents/rag/context_retriever.py +0 -74
- llama_stack/providers/impls/meta_reference/agents/safety.py +0 -57
- llama_stack/providers/impls/meta_reference/agents/tests/code_execution.py +0 -93
- llama_stack/providers/impls/meta_reference/agents/tests/test_chat_agent.py +0 -305
- llama_stack/providers/impls/meta_reference/agents/tools/base.py +0 -20
- llama_stack/providers/impls/meta_reference/agents/tools/builtin.py +0 -375
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_env_prefix.py +0 -133
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_execution.py +0 -256
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py +0 -87
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/utils.py +0 -21
- llama_stack/providers/impls/meta_reference/agents/tools/safety.py +0 -43
- llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py +0 -58
- llama_stack/providers/impls/meta_reference/inference/config.py +0 -45
- llama_stack/providers/impls/meta_reference/inference/generation.py +0 -376
- llama_stack/providers/impls/meta_reference/inference/inference.py +0 -280
- llama_stack/providers/impls/meta_reference/inference/model_parallel.py +0 -99
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_impls.py +0 -184
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_txest_disabled.py +0 -76
- llama_stack/providers/impls/meta_reference/inference/quantization/loader.py +0 -97
- llama_stack/providers/impls/meta_reference/inference/quantization/scripts/quantize_checkpoint.py +0 -161
- llama_stack/providers/impls/meta_reference/memory/__init__.py +0 -19
- llama_stack/providers/impls/meta_reference/memory/faiss.py +0 -113
- llama_stack/providers/impls/meta_reference/safety/__init__.py +0 -17
- llama_stack/providers/impls/meta_reference/safety/base.py +0 -57
- llama_stack/providers/impls/meta_reference/safety/config.py +0 -48
- llama_stack/providers/impls/meta_reference/safety/llama_guard.py +0 -268
- llama_stack/providers/impls/meta_reference/safety/prompt_guard.py +0 -145
- llama_stack/providers/impls/meta_reference/safety/safety.py +0 -112
- llama_stack/providers/impls/meta_reference/telemetry/console.py +0 -89
- llama_stack/providers/impls/vllm/config.py +0 -35
- llama_stack/providers/impls/vllm/vllm.py +0 -241
- llama_stack/providers/registry/memory.py +0 -78
- llama_stack/providers/registry/telemetry.py +0 -44
- llama_stack/providers/tests/agents/test_agents.py +0 -210
- llama_stack/providers/tests/inference/test_inference.py +0 -257
- llama_stack/providers/tests/inference/test_prompt_adapter.py +0 -126
- llama_stack/providers/tests/memory/test_memory.py +0 -136
- llama_stack/providers/tests/resolver.py +0 -100
- llama_stack/providers/tests/safety/test_safety.py +0 -77
- llama_stack-0.0.42.dist-info/METADATA +0 -137
- llama_stack-0.0.42.dist-info/RECORD +0 -256
- /llama_stack/{distribution → core}/__init__.py +0 -0
- /llama_stack/{distribution/server → core/access_control}/__init__.py +0 -0
- /llama_stack/{distribution/utils → core/conversations}/__init__.py +0 -0
- /llama_stack/{providers/adapters → core/prompts}/__init__.py +0 -0
- /llama_stack/{providers/adapters/agents → core/routing_tables}/__init__.py +0 -0
- /llama_stack/{providers/adapters/inference → core/server}/__init__.py +0 -0
- /llama_stack/{providers/adapters/memory → core/storage}/__init__.py +0 -0
- /llama_stack/{providers/adapters/safety → core/ui}/__init__.py +0 -0
- /llama_stack/{providers/adapters/telemetry → core/ui/modules}/__init__.py +0 -0
- /llama_stack/{providers/impls → core/ui/page}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference → core/ui/page/distribution}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/rag → core/ui/page/evaluations}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tests → core/ui/page/playground}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools → core/utils}/__init__.py +0 -0
- /llama_stack/{distribution → core}/utils/dynamic.py +0 -0
- /llama_stack/{distribution → core}/utils/serialize.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools/ipython_tool → distributions}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization → models}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization/scripts → models/llama}/__init__.py +0 -0
- /llama_stack/{providers/tests → models/llama/llama3}/__init__.py +0 -0
- /llama_stack/{providers/tests/agents → models/llama/llama3/quantization}/__init__.py +0 -0
- /llama_stack/{providers/tests/inference → models/llama/llama3_2}/__init__.py +0 -0
- /llama_stack/{providers/tests/memory → models/llama/llama3_3}/__init__.py +0 -0
- /llama_stack/{providers/tests/safety → models/llama/llama4}/__init__.py +0 -0
- /llama_stack/{scripts → models/llama/llama4/prompt_templates}/__init__.py +0 -0
- /llama_stack/providers/{adapters → remote}/safety/bedrock/__init__.py +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info/licenses}/LICENSE +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from enum import Enum
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class CheckpointQuantizationFormat(Enum):
|
|
14
|
+
# default format
|
|
15
|
+
bf16 = "bf16"
|
|
16
|
+
|
|
17
|
+
# used for enabling fp8_rowwise inference, some weights are bf16
|
|
18
|
+
fp8_mixed = "fp8-mixed"
|
|
19
|
+
|
|
20
|
+
int8 = "int8"
|
|
21
|
+
|
|
22
|
+
int4 = "int4"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ModelFamily(Enum):
|
|
26
|
+
llama2 = "llama2"
|
|
27
|
+
llama3 = "llama3"
|
|
28
|
+
llama3_1 = "llama3_1"
|
|
29
|
+
llama3_2 = "llama3_2"
|
|
30
|
+
llama3_3 = "llama3_3"
|
|
31
|
+
llama4 = "llama4"
|
|
32
|
+
safety = "safety"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class CoreModelId(Enum):
|
|
36
|
+
"""Each of these models is a unique "SKU". These root models can be served in various garbs (especially by quantizing them)"""
|
|
37
|
+
|
|
38
|
+
# Llama 2 family
|
|
39
|
+
llama2_7b = "Llama-2-7b"
|
|
40
|
+
llama2_13b = "Llama-2-13b"
|
|
41
|
+
llama2_70b = "Llama-2-70b"
|
|
42
|
+
llama2_7b_chat = "Llama-2-7b-chat"
|
|
43
|
+
llama2_13b_chat = "Llama-2-13b-chat"
|
|
44
|
+
llama2_70b_chat = "Llama-2-70b-chat"
|
|
45
|
+
|
|
46
|
+
# Llama 3 family
|
|
47
|
+
llama3_8b = "Llama-3-8B"
|
|
48
|
+
llama3_70b = "Llama-3-70B"
|
|
49
|
+
llama3_8b_instruct = "Llama-3-8B-Instruct"
|
|
50
|
+
llama3_70b_instruct = "Llama-3-70B-Instruct"
|
|
51
|
+
|
|
52
|
+
# Llama 3.1 family
|
|
53
|
+
llama3_1_8b = "Llama3.1-8B"
|
|
54
|
+
llama3_1_70b = "Llama3.1-70B"
|
|
55
|
+
llama3_1_405b = "Llama3.1-405B"
|
|
56
|
+
llama3_1_8b_instruct = "Llama3.1-8B-Instruct"
|
|
57
|
+
llama3_1_70b_instruct = "Llama3.1-70B-Instruct"
|
|
58
|
+
llama3_1_405b_instruct = "Llama3.1-405B-Instruct"
|
|
59
|
+
|
|
60
|
+
# Llama 3.2 family
|
|
61
|
+
llama3_2_1b = "Llama3.2-1B"
|
|
62
|
+
llama3_2_3b = "Llama3.2-3B"
|
|
63
|
+
llama3_2_1b_instruct = "Llama3.2-1B-Instruct"
|
|
64
|
+
llama3_2_3b_instruct = "Llama3.2-3B-Instruct"
|
|
65
|
+
llama3_2_11b_vision = "Llama3.2-11B-Vision"
|
|
66
|
+
llama3_2_90b_vision = "Llama3.2-90B-Vision"
|
|
67
|
+
llama3_2_11b_vision_instruct = "Llama3.2-11B-Vision-Instruct"
|
|
68
|
+
llama3_2_90b_vision_instruct = "Llama3.2-90B-Vision-Instruct"
|
|
69
|
+
|
|
70
|
+
# Llama 3.3 family
|
|
71
|
+
llama3_3_70b_instruct = "Llama3.3-70B-Instruct"
|
|
72
|
+
|
|
73
|
+
# Llama 4 family
|
|
74
|
+
llama4_scout_17b_16e = "Llama-4-Scout-17B-16E"
|
|
75
|
+
llama4_scout_17b_16e_instruct = "Llama-4-Scout-17B-16E-Instruct"
|
|
76
|
+
llama4_maverick_17b_128e = "Llama-4-Maverick-17B-128E"
|
|
77
|
+
llama4_maverick_17b_128e_instruct = "Llama-4-Maverick-17B-128E-Instruct"
|
|
78
|
+
|
|
79
|
+
# Safety models
|
|
80
|
+
llama_guard_3_8b = "Llama-Guard-3-8B"
|
|
81
|
+
llama_guard_2_8b = "Llama-Guard-2-8B"
|
|
82
|
+
llama_guard_3_11b_vision = "Llama-Guard-3-11B-Vision"
|
|
83
|
+
llama_guard_3_1b = "Llama-Guard-3-1B"
|
|
84
|
+
llama_guard_4_12b = "Llama-Guard-4-12B"
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def is_multimodal(model_id) -> bool:
|
|
88
|
+
if model_id in [
|
|
89
|
+
CoreModelId.llama3_2_11b_vision,
|
|
90
|
+
CoreModelId.llama3_2_90b_vision,
|
|
91
|
+
CoreModelId.llama3_2_11b_vision_instruct,
|
|
92
|
+
CoreModelId.llama3_2_90b_vision_instruct,
|
|
93
|
+
]:
|
|
94
|
+
return True
|
|
95
|
+
else:
|
|
96
|
+
return False
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def model_family(model_id) -> ModelFamily:
|
|
100
|
+
if model_id in [
|
|
101
|
+
CoreModelId.llama2_7b,
|
|
102
|
+
CoreModelId.llama2_13b,
|
|
103
|
+
CoreModelId.llama2_70b,
|
|
104
|
+
CoreModelId.llama2_7b_chat,
|
|
105
|
+
CoreModelId.llama2_13b_chat,
|
|
106
|
+
CoreModelId.llama2_70b_chat,
|
|
107
|
+
]:
|
|
108
|
+
return ModelFamily.llama2
|
|
109
|
+
elif model_id in [
|
|
110
|
+
CoreModelId.llama3_8b,
|
|
111
|
+
CoreModelId.llama3_70b,
|
|
112
|
+
CoreModelId.llama3_8b_instruct,
|
|
113
|
+
CoreModelId.llama3_70b_instruct,
|
|
114
|
+
]:
|
|
115
|
+
return ModelFamily.llama3
|
|
116
|
+
elif model_id in [
|
|
117
|
+
CoreModelId.llama3_1_8b,
|
|
118
|
+
CoreModelId.llama3_1_70b,
|
|
119
|
+
CoreModelId.llama3_1_405b,
|
|
120
|
+
CoreModelId.llama3_1_8b_instruct,
|
|
121
|
+
CoreModelId.llama3_1_70b_instruct,
|
|
122
|
+
CoreModelId.llama3_1_405b_instruct,
|
|
123
|
+
]:
|
|
124
|
+
return ModelFamily.llama3_1
|
|
125
|
+
elif model_id in [
|
|
126
|
+
CoreModelId.llama3_2_1b,
|
|
127
|
+
CoreModelId.llama3_2_3b,
|
|
128
|
+
CoreModelId.llama3_2_1b_instruct,
|
|
129
|
+
CoreModelId.llama3_2_3b_instruct,
|
|
130
|
+
CoreModelId.llama3_2_11b_vision,
|
|
131
|
+
CoreModelId.llama3_2_90b_vision,
|
|
132
|
+
CoreModelId.llama3_2_11b_vision_instruct,
|
|
133
|
+
CoreModelId.llama3_2_90b_vision_instruct,
|
|
134
|
+
]:
|
|
135
|
+
return ModelFamily.llama3_2
|
|
136
|
+
elif model_id in [
|
|
137
|
+
CoreModelId.llama3_3_70b_instruct,
|
|
138
|
+
]:
|
|
139
|
+
return ModelFamily.llama3_3
|
|
140
|
+
elif model_id in [
|
|
141
|
+
CoreModelId.llama4_scout_17b_16e,
|
|
142
|
+
CoreModelId.llama4_scout_17b_16e_instruct,
|
|
143
|
+
CoreModelId.llama4_maverick_17b_128e,
|
|
144
|
+
CoreModelId.llama4_maverick_17b_128e_instruct,
|
|
145
|
+
]:
|
|
146
|
+
return ModelFamily.llama4
|
|
147
|
+
elif model_id in [
|
|
148
|
+
CoreModelId.llama_guard_3_8b,
|
|
149
|
+
CoreModelId.llama_guard_2_8b,
|
|
150
|
+
CoreModelId.llama_guard_3_11b_vision,
|
|
151
|
+
CoreModelId.llama_guard_3_1b,
|
|
152
|
+
CoreModelId.llama_guard_4_12b,
|
|
153
|
+
]:
|
|
154
|
+
return ModelFamily.safety
|
|
155
|
+
else:
|
|
156
|
+
raise ValueError(f"Unknown model family for {model_id}")
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
class Model(BaseModel):
|
|
160
|
+
core_model_id: CoreModelId
|
|
161
|
+
description: str
|
|
162
|
+
huggingface_repo: str | None = None
|
|
163
|
+
arch_args: dict[str, Any]
|
|
164
|
+
variant: str = ""
|
|
165
|
+
|
|
166
|
+
quantization_format: CheckpointQuantizationFormat = CheckpointQuantizationFormat.bf16
|
|
167
|
+
pth_file_count: int
|
|
168
|
+
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
169
|
+
|
|
170
|
+
# silence pydantic until we remove the `model_` fields
|
|
171
|
+
model_config = ConfigDict(protected_namespaces=())
|
|
172
|
+
|
|
173
|
+
@property
|
|
174
|
+
def model_family(self) -> ModelFamily:
|
|
175
|
+
return model_family(self.core_model_id)
|
|
176
|
+
|
|
177
|
+
# The SKU is uniquely identified by (model_id, variant) combo
|
|
178
|
+
def descriptor(self, shorten_default_variant: bool = True) -> str:
|
|
179
|
+
if not self.variant:
|
|
180
|
+
return self.core_model_id.value
|
|
181
|
+
return f"{self.core_model_id.value}:{self.variant}"
|
|
182
|
+
|
|
183
|
+
@property
|
|
184
|
+
def is_instruct_model(self) -> bool:
|
|
185
|
+
return "instruct" in self.core_model_id.value
|
|
186
|
+
|
|
187
|
+
# Featured models are shown in the non-exhaustive model list
|
|
188
|
+
@property
|
|
189
|
+
def is_featured(self) -> bool:
|
|
190
|
+
return self.model_family in [
|
|
191
|
+
ModelFamily.llama3_1,
|
|
192
|
+
ModelFamily.llama3_2,
|
|
193
|
+
ModelFamily.llama3_3,
|
|
194
|
+
ModelFamily.llama4,
|
|
195
|
+
ModelFamily.safety,
|
|
196
|
+
]
|
|
197
|
+
|
|
198
|
+
@property
|
|
199
|
+
def max_seq_length(self) -> int:
|
|
200
|
+
if self.model_family == ModelFamily.llama2:
|
|
201
|
+
return 4096
|
|
202
|
+
elif self.core_model_id == CoreModelId.llama_guard_2_8b:
|
|
203
|
+
return 4096
|
|
204
|
+
elif self.model_family == ModelFamily.llama3:
|
|
205
|
+
return 8192
|
|
206
|
+
elif self.model_family in [ModelFamily.llama3_1, ModelFamily.llama3_3]:
|
|
207
|
+
return 131072
|
|
208
|
+
elif self.model_family == ModelFamily.llama3_2:
|
|
209
|
+
if self.quantization_format == CheckpointQuantizationFormat.int4:
|
|
210
|
+
return 8192
|
|
211
|
+
return 131072
|
|
212
|
+
elif self.model_family == ModelFamily.llama4:
|
|
213
|
+
if self.core_model_id in {
|
|
214
|
+
CoreModelId.llama4_scout_17b_16e,
|
|
215
|
+
CoreModelId.llama4_maverick_17b_128e,
|
|
216
|
+
}:
|
|
217
|
+
return 262144
|
|
218
|
+
if self.core_model_id == CoreModelId.llama4_scout_17b_16e_instruct:
|
|
219
|
+
return 10485760
|
|
220
|
+
if self.core_model_id == CoreModelId.llama4_maverick_17b_128e_instruct:
|
|
221
|
+
return 1048576
|
|
222
|
+
|
|
223
|
+
raise AssertionError(f"Unexpected core model id: {self.core_model_id}")
|
|
224
|
+
elif self.core_model_id in [
|
|
225
|
+
CoreModelId.llama_guard_3_8b,
|
|
226
|
+
CoreModelId.llama_guard_3_11b_vision,
|
|
227
|
+
CoreModelId.llama_guard_3_1b,
|
|
228
|
+
]:
|
|
229
|
+
return 131072
|
|
230
|
+
elif self.core_model_id == CoreModelId.llama_guard_4_12b:
|
|
231
|
+
return 8192
|
|
232
|
+
else:
|
|
233
|
+
raise ValueError(f"Unknown max_seq_len for {self.core_model_id}")
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
import base64
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from llama_stack.log import get_logger
|
|
11
|
+
|
|
12
|
+
logger = get_logger(__name__, "models")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def load_bpe_file(model_path: Path) -> dict[bytes, int]:
|
|
16
|
+
"""
|
|
17
|
+
Load BPE file directly and return mergeable ranks.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
model_path (Path): Path to the BPE model file.
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
dict[bytes, int]: Dictionary mapping byte sequences to their ranks.
|
|
24
|
+
"""
|
|
25
|
+
mergeable_ranks = {}
|
|
26
|
+
|
|
27
|
+
with open(model_path, encoding="utf-8") as f:
|
|
28
|
+
content = f.read()
|
|
29
|
+
|
|
30
|
+
for line in content.splitlines():
|
|
31
|
+
if not line.strip(): # Skip empty lines
|
|
32
|
+
continue
|
|
33
|
+
try:
|
|
34
|
+
token, rank = line.split()
|
|
35
|
+
mergeable_ranks[base64.b64decode(token)] = int(rank)
|
|
36
|
+
except Exception as e:
|
|
37
|
+
logger.warning(f"Failed to parse line '{line}': {e}")
|
|
38
|
+
continue
|
|
39
|
+
|
|
40
|
+
return mergeable_ranks
|
|
@@ -4,51 +4,96 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
from enum import
|
|
8
|
-
from typing import Any,
|
|
7
|
+
from enum import StrEnum
|
|
8
|
+
from typing import Any, Protocol
|
|
9
|
+
from urllib.parse import urlparse
|
|
9
10
|
|
|
10
|
-
from llama_models.schema_utils import json_schema_type
|
|
11
11
|
from pydantic import BaseModel, Field
|
|
12
12
|
|
|
13
|
-
from llama_stack.apis.
|
|
13
|
+
from llama_stack.apis.benchmarks import Benchmark
|
|
14
|
+
from llama_stack.apis.datasets import Dataset
|
|
15
|
+
from llama_stack.apis.datatypes import Api
|
|
16
|
+
from llama_stack.apis.models import Model
|
|
17
|
+
from llama_stack.apis.scoring_functions import ScoringFn
|
|
18
|
+
from llama_stack.apis.shields import Shield
|
|
19
|
+
from llama_stack.apis.tools import ToolGroup
|
|
20
|
+
from llama_stack.apis.vector_stores import VectorStore
|
|
21
|
+
from llama_stack.schema_utils import json_schema_type
|
|
14
22
|
|
|
15
|
-
from llama_stack.apis.models import ModelDef
|
|
16
|
-
from llama_stack.apis.shields import ShieldDef
|
|
17
23
|
|
|
24
|
+
class ModelsProtocolPrivate(Protocol):
|
|
25
|
+
"""
|
|
26
|
+
Protocol for model management.
|
|
18
27
|
|
|
19
|
-
|
|
20
|
-
class Api(Enum):
|
|
21
|
-
inference = "inference"
|
|
22
|
-
safety = "safety"
|
|
23
|
-
agents = "agents"
|
|
24
|
-
memory = "memory"
|
|
28
|
+
This allows users to register their preferred model identifiers.
|
|
25
29
|
|
|
26
|
-
|
|
30
|
+
Model registration requires -
|
|
31
|
+
- a provider, used to route the registration request
|
|
32
|
+
- a model identifier, user's intended name for the model during inference
|
|
33
|
+
- a provider model identifier, a model identifier supported by the provider
|
|
27
34
|
|
|
28
|
-
|
|
29
|
-
shields = "shields"
|
|
30
|
-
memory_banks = "memory_banks"
|
|
35
|
+
Providers will only accept registration for provider model ids they support.
|
|
31
36
|
|
|
32
|
-
|
|
33
|
-
|
|
37
|
+
Example,
|
|
38
|
+
register: provider x my-model-id x provider-model-id
|
|
39
|
+
-> Error if provider does not support provider-model-id
|
|
40
|
+
-> Error if my-model-id is already registered
|
|
41
|
+
-> Success if provider supports provider-model-id
|
|
42
|
+
inference: my-model-id x ...
|
|
43
|
+
-> Provider uses provider-model-id for inference
|
|
44
|
+
"""
|
|
34
45
|
|
|
46
|
+
# this should be called `on_model_register` or something like that.
|
|
47
|
+
# the provider should _not_ be able to change the object in this
|
|
48
|
+
# callback
|
|
49
|
+
async def register_model(self, model: Model) -> Model: ...
|
|
35
50
|
|
|
36
|
-
|
|
37
|
-
async def list_models(self) -> List[ModelDef]: ...
|
|
51
|
+
async def unregister_model(self, model_id: str) -> None: ...
|
|
38
52
|
|
|
39
|
-
|
|
53
|
+
# the Stack router will query each provider for their list of models
|
|
54
|
+
# if a `refresh_interval_seconds` is provided, this method will be called
|
|
55
|
+
# periodically to refresh the list of models
|
|
56
|
+
#
|
|
57
|
+
# NOTE: each model returned will be registered with the model registry. this means
|
|
58
|
+
# a callback to the `register_model()` method will be made. this is duplicative and
|
|
59
|
+
# may be removed in the future.
|
|
60
|
+
async def list_models(self) -> list[Model] | None: ...
|
|
61
|
+
|
|
62
|
+
async def should_refresh_models(self) -> bool: ...
|
|
40
63
|
|
|
41
64
|
|
|
42
65
|
class ShieldsProtocolPrivate(Protocol):
|
|
43
|
-
async def
|
|
66
|
+
async def register_shield(self, shield: Shield) -> None: ...
|
|
67
|
+
|
|
68
|
+
async def unregister_shield(self, identifier: str) -> None: ...
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class VectorStoresProtocolPrivate(Protocol):
|
|
72
|
+
async def register_vector_store(self, vector_store: VectorStore) -> None: ...
|
|
73
|
+
|
|
74
|
+
async def unregister_vector_store(self, vector_store_id: str) -> None: ...
|
|
75
|
+
|
|
44
76
|
|
|
45
|
-
|
|
77
|
+
class DatasetsProtocolPrivate(Protocol):
|
|
78
|
+
async def register_dataset(self, dataset: Dataset) -> None: ...
|
|
46
79
|
|
|
80
|
+
async def unregister_dataset(self, dataset_id: str) -> None: ...
|
|
47
81
|
|
|
48
|
-
class MemoryBanksProtocolPrivate(Protocol):
|
|
49
|
-
async def list_memory_banks(self) -> List[MemoryBankDef]: ...
|
|
50
82
|
|
|
51
|
-
|
|
83
|
+
class ScoringFunctionsProtocolPrivate(Protocol):
|
|
84
|
+
async def list_scoring_functions(self) -> list[ScoringFn]: ...
|
|
85
|
+
|
|
86
|
+
async def register_scoring_function(self, scoring_fn: ScoringFn) -> None: ...
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class BenchmarksProtocolPrivate(Protocol):
|
|
90
|
+
async def register_benchmark(self, benchmark: Benchmark) -> None: ...
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class ToolGroupsProtocolPrivate(Protocol):
|
|
94
|
+
async def register_toolgroup(self, toolgroup: ToolGroup) -> None: ...
|
|
95
|
+
|
|
96
|
+
async def unregister_toolgroup(self, toolgroup_id: str) -> None: ...
|
|
52
97
|
|
|
53
98
|
|
|
54
99
|
@json_schema_type
|
|
@@ -59,130 +104,114 @@ class ProviderSpec(BaseModel):
|
|
|
59
104
|
...,
|
|
60
105
|
description="Fully-qualified classname of the config for this provider",
|
|
61
106
|
)
|
|
62
|
-
api_dependencies:
|
|
107
|
+
api_dependencies: list[Api] = Field(
|
|
63
108
|
default_factory=list,
|
|
64
109
|
description="Higher-level API surfaces may depend on other providers to provide their functionality",
|
|
65
110
|
)
|
|
111
|
+
optional_api_dependencies: list[Api] = Field(
|
|
112
|
+
default_factory=list,
|
|
113
|
+
)
|
|
114
|
+
deprecation_warning: str | None = Field(
|
|
115
|
+
default=None,
|
|
116
|
+
description="If this provider is deprecated, specify the warning message here",
|
|
117
|
+
)
|
|
118
|
+
deprecation_error: str | None = Field(
|
|
119
|
+
default=None,
|
|
120
|
+
description="If this provider is deprecated and does NOT work, specify the error message here",
|
|
121
|
+
)
|
|
66
122
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
class RoutingTable(Protocol):
|
|
72
|
-
def get_provider_impl(self, routing_key: str) -> Any: ...
|
|
123
|
+
module: str | None = Field(
|
|
124
|
+
default=None,
|
|
125
|
+
description="""
|
|
126
|
+
Fully-qualified name of the module to import. The module is expected to have:
|
|
73
127
|
|
|
128
|
+
- `get_adapter_impl(config, deps)`: returns the adapter implementation
|
|
74
129
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
adapter_type: str = Field(
|
|
78
|
-
...,
|
|
79
|
-
description="Unique identifier for this adapter",
|
|
130
|
+
Example: `module: ramalama_stack`
|
|
131
|
+
""",
|
|
80
132
|
)
|
|
81
|
-
module: str = Field(
|
|
82
|
-
...,
|
|
83
|
-
description="""
|
|
84
|
-
Fully-qualified name of the module to import. The module is expected to have:
|
|
85
133
|
|
|
86
|
-
|
|
87
|
-
""",
|
|
88
|
-
)
|
|
89
|
-
pip_packages: List[str] = Field(
|
|
134
|
+
pip_packages: list[str] = Field(
|
|
90
135
|
default_factory=list,
|
|
91
136
|
description="The pip dependencies needed for this implementation",
|
|
92
137
|
)
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
description="Fully-qualified classname of the config for this provider",
|
|
96
|
-
)
|
|
97
|
-
provider_data_validator: Optional[str] = Field(
|
|
138
|
+
|
|
139
|
+
provider_data_validator: str | None = Field(
|
|
98
140
|
default=None,
|
|
99
141
|
)
|
|
100
142
|
|
|
143
|
+
is_external: bool = Field(default=False, description="Notes whether this provider is an external provider.")
|
|
144
|
+
|
|
145
|
+
# used internally by the resolver; this is a hack for now
|
|
146
|
+
deps__: list[str] = Field(default_factory=list)
|
|
147
|
+
|
|
148
|
+
@property
|
|
149
|
+
def is_sample(self) -> bool:
|
|
150
|
+
return self.provider_type in ("sample", "remote::sample")
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class RoutingTable(Protocol):
|
|
154
|
+
async def get_provider_impl(self, routing_key: str) -> Any: ...
|
|
155
|
+
|
|
101
156
|
|
|
102
157
|
@json_schema_type
|
|
103
158
|
class InlineProviderSpec(ProviderSpec):
|
|
104
|
-
|
|
105
|
-
default_factory=list,
|
|
106
|
-
description="The pip dependencies needed for this implementation",
|
|
107
|
-
)
|
|
108
|
-
docker_image: Optional[str] = Field(
|
|
159
|
+
container_image: str | None = Field(
|
|
109
160
|
default=None,
|
|
110
161
|
description="""
|
|
111
|
-
The
|
|
112
|
-
If a provider depends on other providers, the dependencies MUST NOT specify a
|
|
162
|
+
The container image to use for this implementation. If one is provided, pip_packages will be ignored.
|
|
163
|
+
If a provider depends on other providers, the dependencies MUST NOT specify a container image.
|
|
113
164
|
""",
|
|
114
165
|
)
|
|
115
|
-
|
|
116
|
-
|
|
166
|
+
description: str | None = Field(
|
|
167
|
+
default=None,
|
|
117
168
|
description="""
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
- `get_provider_impl(config, deps)`: returns the local implementation
|
|
169
|
+
A description of the provider. This is used to display in the documentation.
|
|
121
170
|
""",
|
|
122
171
|
)
|
|
123
|
-
provider_data_validator: Optional[str] = Field(
|
|
124
|
-
default=None,
|
|
125
|
-
)
|
|
126
172
|
|
|
127
173
|
|
|
128
174
|
class RemoteProviderConfig(BaseModel):
|
|
129
175
|
host: str = "localhost"
|
|
130
|
-
port: int
|
|
176
|
+
port: int | None = None
|
|
177
|
+
protocol: str = "http"
|
|
131
178
|
|
|
132
179
|
@property
|
|
133
180
|
def url(self) -> str:
|
|
134
|
-
|
|
181
|
+
if self.port is None:
|
|
182
|
+
return f"{self.protocol}://{self.host}"
|
|
183
|
+
return f"{self.protocol}://{self.host}:{self.port}"
|
|
184
|
+
|
|
185
|
+
@classmethod
|
|
186
|
+
def from_url(cls, url: str) -> "RemoteProviderConfig":
|
|
187
|
+
parsed = urlparse(url)
|
|
188
|
+
attrs = {k: v for k, v in parsed._asdict().items() if v is not None}
|
|
189
|
+
return cls(**attrs)
|
|
135
190
|
|
|
136
191
|
|
|
137
192
|
@json_schema_type
|
|
138
193
|
class RemoteProviderSpec(ProviderSpec):
|
|
139
|
-
|
|
194
|
+
adapter_type: str = Field(
|
|
195
|
+
...,
|
|
196
|
+
description="Unique identifier for this adapter",
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
description: str | None = Field(
|
|
140
200
|
default=None,
|
|
141
201
|
description="""
|
|
142
|
-
|
|
143
|
-
API responses, specify the adapter here. If not specified, it indicates the remote
|
|
144
|
-
as being "Llama Stack compatible"
|
|
202
|
+
A description of the provider. This is used to display in the documentation.
|
|
145
203
|
""",
|
|
146
204
|
)
|
|
147
205
|
|
|
148
206
|
@property
|
|
149
|
-
def
|
|
207
|
+
def container_image(self) -> str | None:
|
|
150
208
|
return None
|
|
151
209
|
|
|
152
|
-
@property
|
|
153
|
-
def module(self) -> str:
|
|
154
|
-
if self.adapter:
|
|
155
|
-
return self.adapter.module
|
|
156
|
-
return f"llama_stack.apis.{self.api.value}.client"
|
|
157
210
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
return []
|
|
211
|
+
class HealthStatus(StrEnum):
|
|
212
|
+
OK = "OK"
|
|
213
|
+
ERROR = "Error"
|
|
214
|
+
NOT_IMPLEMENTED = "Not Implemented"
|
|
163
215
|
|
|
164
|
-
@property
|
|
165
|
-
def provider_data_validator(self) -> Optional[str]:
|
|
166
|
-
if self.adapter:
|
|
167
|
-
return self.adapter.provider_data_validator
|
|
168
|
-
return None
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
def is_passthrough(spec: ProviderSpec) -> bool:
|
|
172
|
-
return isinstance(spec, RemoteProviderSpec) and spec.adapter is None
|
|
173
216
|
|
|
174
|
-
|
|
175
|
-
# Can avoid this by using Pydantic computed_field
|
|
176
|
-
def remote_provider_spec(
|
|
177
|
-
api: Api, adapter: Optional[AdapterSpec] = None
|
|
178
|
-
) -> RemoteProviderSpec:
|
|
179
|
-
config_class = (
|
|
180
|
-
adapter.config_class
|
|
181
|
-
if adapter and adapter.config_class
|
|
182
|
-
else "llama_stack.distribution.datatypes.RemoteProviderConfig"
|
|
183
|
-
)
|
|
184
|
-
provider_type = f"remote::{adapter.adapter_type}" if adapter else "remote"
|
|
185
|
-
|
|
186
|
-
return RemoteProviderSpec(
|
|
187
|
-
api=api, provider_type=provider_type, config_class=config_class, adapter=adapter
|
|
188
|
-
)
|
|
217
|
+
HealthResponse = dict[str, Any]
|
llama_stack/providers/{impls/meta_reference/agents → inline/agents/meta_reference}/__init__.py
RENAMED
|
@@ -4,24 +4,31 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
from typing import
|
|
7
|
+
from typing import Any
|
|
8
8
|
|
|
9
|
-
from llama_stack.
|
|
9
|
+
from llama_stack.core.datatypes import AccessRule, Api
|
|
10
10
|
|
|
11
11
|
from .config import MetaReferenceAgentsImplConfig
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
async def get_provider_impl(
|
|
15
|
-
config: MetaReferenceAgentsImplConfig,
|
|
15
|
+
config: MetaReferenceAgentsImplConfig,
|
|
16
|
+
deps: dict[Api, Any],
|
|
17
|
+
policy: list[AccessRule],
|
|
18
|
+
telemetry_enabled: bool = False,
|
|
16
19
|
):
|
|
17
20
|
from .agents import MetaReferenceAgentsImpl
|
|
18
21
|
|
|
19
22
|
impl = MetaReferenceAgentsImpl(
|
|
20
23
|
config,
|
|
21
24
|
deps[Api.inference],
|
|
22
|
-
deps[Api.
|
|
25
|
+
deps[Api.vector_io],
|
|
23
26
|
deps[Api.safety],
|
|
24
|
-
deps[Api.
|
|
27
|
+
deps[Api.tool_runtime],
|
|
28
|
+
deps[Api.tool_groups],
|
|
29
|
+
deps[Api.conversations],
|
|
30
|
+
policy,
|
|
31
|
+
telemetry_enabled,
|
|
25
32
|
)
|
|
26
33
|
await impl.initialize()
|
|
27
34
|
return impl
|