llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +5 -0
- llama_stack/apis/agents/__init__.py +1 -1
- llama_stack/apis/agents/agents.py +700 -281
- llama_stack/apis/agents/openai_responses.py +1311 -0
- llama_stack/{providers/adapters/memory/sample/config.py → apis/batches/__init__.py} +2 -5
- llama_stack/apis/batches/batches.py +100 -0
- llama_stack/apis/benchmarks/__init__.py +7 -0
- llama_stack/apis/benchmarks/benchmarks.py +108 -0
- llama_stack/apis/common/content_types.py +143 -0
- llama_stack/apis/common/errors.py +103 -0
- llama_stack/apis/common/job_types.py +38 -0
- llama_stack/apis/common/responses.py +36 -0
- llama_stack/apis/common/training_types.py +36 -5
- llama_stack/apis/common/type_system.py +158 -0
- llama_stack/apis/conversations/__init__.py +31 -0
- llama_stack/apis/conversations/conversations.py +286 -0
- llama_stack/apis/datasetio/__init__.py +7 -0
- llama_stack/apis/datasetio/datasetio.py +59 -0
- llama_stack/apis/datasets/__init__.py +7 -0
- llama_stack/apis/datasets/datasets.py +251 -0
- llama_stack/apis/datatypes.py +160 -0
- llama_stack/apis/eval/__init__.py +7 -0
- llama_stack/apis/eval/eval.py +169 -0
- llama_stack/apis/files/__init__.py +7 -0
- llama_stack/apis/files/files.py +199 -0
- llama_stack/apis/inference/__init__.py +1 -1
- llama_stack/apis/inference/inference.py +1169 -113
- llama_stack/apis/inspect/__init__.py +1 -1
- llama_stack/apis/inspect/inspect.py +69 -16
- llama_stack/apis/models/__init__.py +1 -1
- llama_stack/apis/models/models.py +148 -21
- llama_stack/apis/post_training/__init__.py +1 -1
- llama_stack/apis/post_training/post_training.py +265 -120
- llama_stack/{providers/adapters/agents/sample/config.py → apis/prompts/__init__.py} +2 -5
- llama_stack/apis/prompts/prompts.py +204 -0
- llama_stack/apis/providers/__init__.py +7 -0
- llama_stack/apis/providers/providers.py +69 -0
- llama_stack/apis/resource.py +37 -0
- llama_stack/apis/safety/__init__.py +1 -1
- llama_stack/apis/safety/safety.py +95 -12
- llama_stack/apis/scoring/__init__.py +7 -0
- llama_stack/apis/scoring/scoring.py +93 -0
- llama_stack/apis/scoring_functions/__init__.py +7 -0
- llama_stack/apis/scoring_functions/scoring_functions.py +208 -0
- llama_stack/apis/shields/__init__.py +1 -1
- llama_stack/apis/shields/shields.py +76 -33
- llama_stack/apis/synthetic_data_generation/__init__.py +1 -1
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +40 -17
- llama_stack/apis/telemetry/__init__.py +1 -1
- llama_stack/apis/telemetry/telemetry.py +322 -31
- llama_stack/apis/{dataset → tools}/__init__.py +2 -1
- llama_stack/apis/tools/rag_tool.py +218 -0
- llama_stack/apis/tools/tools.py +221 -0
- llama_stack/apis/vector_io/__init__.py +7 -0
- llama_stack/apis/vector_io/vector_io.py +960 -0
- llama_stack/apis/vector_stores/__init__.py +7 -0
- llama_stack/apis/vector_stores/vector_stores.py +51 -0
- llama_stack/apis/version.py +9 -0
- llama_stack/cli/llama.py +13 -5
- llama_stack/cli/stack/_list_deps.py +182 -0
- llama_stack/cli/stack/list_apis.py +1 -1
- llama_stack/cli/stack/list_deps.py +55 -0
- llama_stack/cli/stack/list_providers.py +24 -10
- llama_stack/cli/stack/list_stacks.py +56 -0
- llama_stack/cli/stack/remove.py +115 -0
- llama_stack/cli/stack/run.py +169 -56
- llama_stack/cli/stack/stack.py +18 -4
- llama_stack/cli/stack/utils.py +151 -0
- llama_stack/cli/table.py +23 -61
- llama_stack/cli/utils.py +29 -0
- llama_stack/core/access_control/access_control.py +131 -0
- llama_stack/core/access_control/conditions.py +129 -0
- llama_stack/core/access_control/datatypes.py +107 -0
- llama_stack/core/build.py +164 -0
- llama_stack/core/client.py +205 -0
- llama_stack/core/common.sh +37 -0
- llama_stack/{distribution → core}/configure.py +74 -55
- llama_stack/core/conversations/conversations.py +309 -0
- llama_stack/core/datatypes.py +625 -0
- llama_stack/core/distribution.py +276 -0
- llama_stack/core/external.py +54 -0
- llama_stack/core/id_generation.py +42 -0
- llama_stack/core/inspect.py +86 -0
- llama_stack/core/library_client.py +539 -0
- llama_stack/core/prompts/prompts.py +234 -0
- llama_stack/core/providers.py +137 -0
- llama_stack/core/request_headers.py +115 -0
- llama_stack/core/resolver.py +506 -0
- llama_stack/core/routers/__init__.py +101 -0
- llama_stack/core/routers/datasets.py +73 -0
- llama_stack/core/routers/eval_scoring.py +155 -0
- llama_stack/core/routers/inference.py +645 -0
- llama_stack/core/routers/safety.py +85 -0
- llama_stack/core/routers/tool_runtime.py +91 -0
- llama_stack/core/routers/vector_io.py +442 -0
- llama_stack/core/routing_tables/benchmarks.py +62 -0
- llama_stack/core/routing_tables/common.py +254 -0
- llama_stack/core/routing_tables/datasets.py +91 -0
- llama_stack/core/routing_tables/models.py +163 -0
- llama_stack/core/routing_tables/scoring_functions.py +66 -0
- llama_stack/core/routing_tables/shields.py +61 -0
- llama_stack/core/routing_tables/toolgroups.py +129 -0
- llama_stack/core/routing_tables/vector_stores.py +292 -0
- llama_stack/core/server/auth.py +187 -0
- llama_stack/core/server/auth_providers.py +494 -0
- llama_stack/core/server/quota.py +110 -0
- llama_stack/core/server/routes.py +141 -0
- llama_stack/core/server/server.py +542 -0
- llama_stack/core/server/tracing.py +80 -0
- llama_stack/core/stack.py +546 -0
- llama_stack/core/start_stack.sh +117 -0
- llama_stack/core/storage/datatypes.py +283 -0
- llama_stack/{cli/model → core/store}/__init__.py +1 -1
- llama_stack/core/store/registry.py +199 -0
- llama_stack/core/testing_context.py +49 -0
- llama_stack/core/ui/app.py +55 -0
- llama_stack/core/ui/modules/api.py +32 -0
- llama_stack/core/ui/modules/utils.py +42 -0
- llama_stack/core/ui/page/distribution/datasets.py +18 -0
- llama_stack/core/ui/page/distribution/eval_tasks.py +20 -0
- llama_stack/core/ui/page/distribution/models.py +18 -0
- llama_stack/core/ui/page/distribution/providers.py +27 -0
- llama_stack/core/ui/page/distribution/resources.py +48 -0
- llama_stack/core/ui/page/distribution/scoring_functions.py +18 -0
- llama_stack/core/ui/page/distribution/shields.py +19 -0
- llama_stack/core/ui/page/evaluations/app_eval.py +143 -0
- llama_stack/core/ui/page/evaluations/native_eval.py +253 -0
- llama_stack/core/ui/page/playground/chat.py +130 -0
- llama_stack/core/ui/page/playground/tools.py +352 -0
- llama_stack/core/utils/config.py +30 -0
- llama_stack/{distribution → core}/utils/config_dirs.py +3 -6
- llama_stack/core/utils/config_resolution.py +125 -0
- llama_stack/core/utils/context.py +84 -0
- llama_stack/core/utils/exec.py +96 -0
- llama_stack/{providers/impls/meta_reference/codeshield/config.py → core/utils/image_types.py} +4 -3
- llama_stack/{distribution → core}/utils/model_utils.py +2 -2
- llama_stack/{distribution → core}/utils/prompt_for_config.py +30 -63
- llama_stack/{apis/batch_inference → distributions/dell}/__init__.py +1 -1
- llama_stack/distributions/dell/build.yaml +33 -0
- llama_stack/distributions/dell/dell.py +158 -0
- llama_stack/distributions/dell/run-with-safety.yaml +141 -0
- llama_stack/distributions/dell/run.yaml +132 -0
- llama_stack/distributions/meta-reference-gpu/__init__.py +7 -0
- llama_stack/distributions/meta-reference-gpu/build.yaml +32 -0
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +163 -0
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +154 -0
- llama_stack/distributions/meta-reference-gpu/run.yaml +139 -0
- llama_stack/{apis/evals → distributions/nvidia}/__init__.py +1 -1
- llama_stack/distributions/nvidia/build.yaml +29 -0
- llama_stack/distributions/nvidia/nvidia.py +154 -0
- llama_stack/distributions/nvidia/run-with-safety.yaml +137 -0
- llama_stack/distributions/nvidia/run.yaml +116 -0
- llama_stack/distributions/open-benchmark/__init__.py +7 -0
- llama_stack/distributions/open-benchmark/build.yaml +36 -0
- llama_stack/distributions/open-benchmark/open_benchmark.py +303 -0
- llama_stack/distributions/open-benchmark/run.yaml +252 -0
- llama_stack/distributions/postgres-demo/__init__.py +7 -0
- llama_stack/distributions/postgres-demo/build.yaml +23 -0
- llama_stack/distributions/postgres-demo/postgres_demo.py +125 -0
- llama_stack/distributions/postgres-demo/run.yaml +115 -0
- llama_stack/{apis/memory → distributions/starter}/__init__.py +1 -1
- llama_stack/distributions/starter/build.yaml +61 -0
- llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
- llama_stack/distributions/starter/run.yaml +276 -0
- llama_stack/distributions/starter/starter.py +345 -0
- llama_stack/distributions/starter-gpu/__init__.py +7 -0
- llama_stack/distributions/starter-gpu/build.yaml +61 -0
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
- llama_stack/distributions/starter-gpu/run.yaml +279 -0
- llama_stack/distributions/starter-gpu/starter_gpu.py +20 -0
- llama_stack/distributions/template.py +456 -0
- llama_stack/distributions/watsonx/__init__.py +7 -0
- llama_stack/distributions/watsonx/build.yaml +33 -0
- llama_stack/distributions/watsonx/run.yaml +133 -0
- llama_stack/distributions/watsonx/watsonx.py +95 -0
- llama_stack/env.py +24 -0
- llama_stack/log.py +314 -0
- llama_stack/models/llama/checkpoint.py +164 -0
- llama_stack/models/llama/datatypes.py +164 -0
- llama_stack/models/llama/hadamard_utils.py +86 -0
- llama_stack/models/llama/llama3/args.py +74 -0
- llama_stack/models/llama/llama3/chat_format.py +286 -0
- llama_stack/models/llama/llama3/generation.py +376 -0
- llama_stack/models/llama/llama3/interface.py +255 -0
- llama_stack/models/llama/llama3/model.py +304 -0
- llama_stack/models/llama/llama3/multimodal/__init__.py +12 -0
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +180 -0
- llama_stack/models/llama/llama3/multimodal/image_transform.py +409 -0
- llama_stack/models/llama/llama3/multimodal/model.py +1430 -0
- llama_stack/models/llama/llama3/multimodal/utils.py +26 -0
- llama_stack/models/llama/llama3/prompt_templates/__init__.py +22 -0
- llama_stack/models/llama/llama3/prompt_templates/base.py +39 -0
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +319 -0
- llama_stack/models/llama/llama3/prompt_templates/tool_response.py +62 -0
- llama_stack/models/llama/llama3/quantization/loader.py +316 -0
- llama_stack/models/llama/llama3/template_data.py +116 -0
- llama_stack/models/llama/llama3/tokenizer.model +128000 -0
- llama_stack/models/llama/llama3/tokenizer.py +198 -0
- llama_stack/models/llama/llama3/tool_utils.py +266 -0
- llama_stack/models/llama/llama3_1/__init__.py +12 -0
- llama_stack/models/llama/llama3_1/prompt_format.md +358 -0
- llama_stack/models/llama/llama3_1/prompts.py +258 -0
- llama_stack/models/llama/llama3_2/prompts_text.py +229 -0
- llama_stack/models/llama/llama3_2/prompts_vision.py +126 -0
- llama_stack/models/llama/llama3_2/text_prompt_format.md +286 -0
- llama_stack/models/llama/llama3_2/vision_prompt_format.md +141 -0
- llama_stack/models/llama/llama3_3/prompts.py +259 -0
- llama_stack/models/llama/llama4/args.py +107 -0
- llama_stack/models/llama/llama4/chat_format.py +317 -0
- llama_stack/models/llama/llama4/datatypes.py +56 -0
- llama_stack/models/llama/llama4/ffn.py +58 -0
- llama_stack/models/llama/llama4/generation.py +313 -0
- llama_stack/models/llama/llama4/model.py +437 -0
- llama_stack/models/llama/llama4/moe.py +214 -0
- llama_stack/models/llama/llama4/preprocess.py +435 -0
- llama_stack/models/llama/llama4/prompt_format.md +304 -0
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +136 -0
- llama_stack/models/llama/llama4/prompts.py +279 -0
- llama_stack/models/llama/llama4/quantization/__init__.py +5 -0
- llama_stack/models/llama/llama4/quantization/loader.py +226 -0
- llama_stack/models/llama/llama4/tokenizer.model +200000 -0
- llama_stack/models/llama/llama4/tokenizer.py +263 -0
- llama_stack/models/llama/llama4/vision/__init__.py +5 -0
- llama_stack/models/llama/llama4/vision/embedding.py +210 -0
- llama_stack/models/llama/llama4/vision/encoder.py +412 -0
- llama_stack/models/llama/prompt_format.py +191 -0
- llama_stack/models/llama/quantize_impls.py +316 -0
- llama_stack/models/llama/sku_list.py +1029 -0
- llama_stack/models/llama/sku_types.py +233 -0
- llama_stack/models/llama/tokenizer_utils.py +40 -0
- llama_stack/providers/datatypes.py +136 -107
- llama_stack/providers/inline/__init__.py +5 -0
- llama_stack/providers/inline/agents/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/agents/meta_reference}/__init__.py +12 -5
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +1024 -0
- llama_stack/providers/inline/agents/meta_reference/agents.py +383 -0
- llama_stack/providers/inline/agents/meta_reference/config.py +37 -0
- llama_stack/providers/inline/agents/meta_reference/persistence.py +228 -0
- llama_stack/providers/inline/agents/meta_reference/responses/__init__.py +5 -0
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +423 -0
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +1226 -0
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +449 -0
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +194 -0
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +365 -0
- llama_stack/providers/inline/agents/meta_reference/safety.py +52 -0
- llama_stack/providers/inline/batches/__init__.py +5 -0
- llama_stack/providers/inline/batches/reference/__init__.py +36 -0
- llama_stack/providers/inline/batches/reference/batches.py +679 -0
- llama_stack/providers/inline/batches/reference/config.py +40 -0
- llama_stack/providers/inline/datasetio/__init__.py +5 -0
- llama_stack/providers/inline/datasetio/localfs/__init__.py +20 -0
- llama_stack/providers/inline/datasetio/localfs/config.py +23 -0
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +113 -0
- llama_stack/providers/inline/eval/__init__.py +5 -0
- llama_stack/providers/inline/eval/meta_reference/__init__.py +28 -0
- llama_stack/providers/inline/eval/meta_reference/config.py +23 -0
- llama_stack/providers/inline/eval/meta_reference/eval.py +259 -0
- llama_stack/providers/inline/files/localfs/__init__.py +20 -0
- llama_stack/providers/inline/files/localfs/config.py +31 -0
- llama_stack/providers/inline/files/localfs/files.py +219 -0
- llama_stack/providers/inline/inference/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/__init__.py +4 -4
- llama_stack/providers/inline/inference/meta_reference/common.py +24 -0
- llama_stack/providers/inline/inference/meta_reference/config.py +68 -0
- llama_stack/providers/inline/inference/meta_reference/generators.py +211 -0
- llama_stack/providers/inline/inference/meta_reference/inference.py +158 -0
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +96 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/parallel_utils.py +56 -73
- llama_stack/providers/inline/inference/sentence_transformers/__init__.py +22 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/inference/sentence_transformers}/config.py +6 -4
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +83 -0
- llama_stack/providers/inline/post_training/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/utils.py +35 -0
- llama_stack/providers/inline/post_training/common/validator.py +36 -0
- llama_stack/providers/inline/post_training/huggingface/__init__.py +27 -0
- llama_stack/providers/inline/post_training/huggingface/config.py +83 -0
- llama_stack/providers/inline/post_training/huggingface/post_training.py +208 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +519 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +485 -0
- llama_stack/providers/inline/post_training/huggingface/utils.py +269 -0
- llama_stack/providers/inline/post_training/torchtune/__init__.py +27 -0
- llama_stack/providers/inline/post_training/torchtune/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +240 -0
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +99 -0
- llama_stack/providers/inline/post_training/torchtune/config.py +20 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +57 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/sft.py +78 -0
- llama_stack/providers/inline/post_training/torchtune/post_training.py +178 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +588 -0
- llama_stack/providers/inline/safety/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/codeshield → inline/safety/code_scanner}/__init__.py +4 -2
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +128 -0
- llama_stack/providers/{impls/meta_reference/memory → inline/safety/code_scanner}/config.py +5 -3
- llama_stack/providers/inline/safety/llama_guard/__init__.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/config.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +489 -0
- llama_stack/providers/{adapters/memory/sample → inline/safety/prompt_guard}/__init__.py +4 -4
- llama_stack/providers/inline/safety/prompt_guard/config.py +32 -0
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +131 -0
- llama_stack/providers/inline/scoring/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/__init__.py +25 -0
- llama_stack/providers/{adapters/memory/weaviate → inline/scoring/basic}/config.py +5 -7
- llama_stack/providers/inline/scoring/basic/scoring.py +126 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +240 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +41 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +23 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +27 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +71 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +80 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +66 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +58 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +38 -0
- llama_stack/providers/inline/scoring/basic/utils/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py +3319 -0
- llama_stack/providers/inline/scoring/basic/utils/math_utils.py +330 -0
- llama_stack/providers/inline/scoring/braintrust/__init__.py +27 -0
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +230 -0
- llama_stack/providers/inline/scoring/braintrust/config.py +21 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +23 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +24 -0
- llama_stack/providers/inline/scoring/llm_as_judge/__init__.py +21 -0
- llama_stack/providers/inline/scoring/llm_as_judge/config.py +14 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +113 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +96 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +20 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +81 -0
- llama_stack/providers/inline/telemetry/__init__.py +5 -0
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +21 -0
- llama_stack/providers/inline/telemetry/meta_reference/config.py +47 -0
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +252 -0
- llama_stack/providers/inline/tool_runtime/__init__.py +5 -0
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +19 -0
- llama_stack/providers/{impls/meta_reference/telemetry → inline/tool_runtime/rag}/config.py +5 -3
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +77 -0
- llama_stack/providers/inline/tool_runtime/rag/memory.py +332 -0
- llama_stack/providers/inline/vector_io/__init__.py +5 -0
- llama_stack/providers/inline/vector_io/chroma/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/chroma/config.py +30 -0
- llama_stack/providers/inline/vector_io/faiss/__init__.py +21 -0
- llama_stack/providers/inline/vector_io/faiss/config.py +26 -0
- llama_stack/providers/inline/vector_io/faiss/faiss.py +293 -0
- llama_stack/providers/inline/vector_io/milvus/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/milvus/config.py +29 -0
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/qdrant/config.py +29 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/config.py +26 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +483 -0
- llama_stack/providers/registry/agents.py +16 -18
- llama_stack/providers/registry/batches.py +26 -0
- llama_stack/providers/registry/datasetio.py +49 -0
- llama_stack/providers/registry/eval.py +46 -0
- llama_stack/providers/registry/files.py +31 -0
- llama_stack/providers/registry/inference.py +273 -118
- llama_stack/providers/registry/post_training.py +69 -0
- llama_stack/providers/registry/safety.py +46 -41
- llama_stack/providers/registry/scoring.py +51 -0
- llama_stack/providers/registry/tool_runtime.py +87 -0
- llama_stack/providers/registry/vector_io.py +828 -0
- llama_stack/providers/remote/__init__.py +5 -0
- llama_stack/providers/remote/agents/__init__.py +5 -0
- llama_stack/providers/remote/datasetio/__init__.py +5 -0
- llama_stack/providers/{adapters/memory/chroma → remote/datasetio/huggingface}/__init__.py +7 -4
- llama_stack/providers/remote/datasetio/huggingface/config.py +23 -0
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +99 -0
- llama_stack/providers/remote/datasetio/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/datasetio/nvidia/config.py +61 -0
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +116 -0
- llama_stack/providers/remote/eval/__init__.py +5 -0
- llama_stack/providers/remote/eval/nvidia/__init__.py +31 -0
- llama_stack/providers/remote/eval/nvidia/config.py +29 -0
- llama_stack/providers/remote/eval/nvidia/eval.py +162 -0
- llama_stack/providers/remote/files/s3/__init__.py +19 -0
- llama_stack/providers/remote/files/s3/config.py +42 -0
- llama_stack/providers/remote/files/s3/files.py +313 -0
- llama_stack/providers/remote/inference/__init__.py +5 -0
- llama_stack/providers/{adapters/safety/sample → remote/inference/anthropic}/__init__.py +4 -6
- llama_stack/providers/remote/inference/anthropic/anthropic.py +36 -0
- llama_stack/providers/remote/inference/anthropic/config.py +28 -0
- llama_stack/providers/{impls/meta_reference/telemetry → remote/inference/azure}/__init__.py +4 -4
- llama_stack/providers/remote/inference/azure/azure.py +25 -0
- llama_stack/providers/remote/inference/azure/config.py +61 -0
- llama_stack/providers/{adapters → remote}/inference/bedrock/__init__.py +18 -17
- llama_stack/providers/remote/inference/bedrock/bedrock.py +142 -0
- llama_stack/providers/{adapters/inference/sample → remote/inference/bedrock}/config.py +3 -4
- llama_stack/providers/remote/inference/bedrock/models.py +29 -0
- llama_stack/providers/remote/inference/cerebras/__init__.py +19 -0
- llama_stack/providers/remote/inference/cerebras/cerebras.py +28 -0
- llama_stack/providers/remote/inference/cerebras/config.py +30 -0
- llama_stack/providers/{adapters → remote}/inference/databricks/__init__.py +4 -5
- llama_stack/providers/remote/inference/databricks/config.py +37 -0
- llama_stack/providers/remote/inference/databricks/databricks.py +44 -0
- llama_stack/providers/{adapters → remote}/inference/fireworks/__init__.py +8 -4
- llama_stack/providers/remote/inference/fireworks/config.py +27 -0
- llama_stack/providers/remote/inference/fireworks/fireworks.py +27 -0
- llama_stack/providers/{adapters/memory/pgvector → remote/inference/gemini}/__init__.py +4 -4
- llama_stack/providers/remote/inference/gemini/config.py +28 -0
- llama_stack/providers/remote/inference/gemini/gemini.py +82 -0
- llama_stack/providers/remote/inference/groq/__init__.py +15 -0
- llama_stack/providers/remote/inference/groq/config.py +34 -0
- llama_stack/providers/remote/inference/groq/groq.py +18 -0
- llama_stack/providers/remote/inference/llama_openai_compat/__init__.py +15 -0
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +34 -0
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +46 -0
- llama_stack/providers/remote/inference/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/inference/nvidia/config.py +64 -0
- llama_stack/providers/remote/inference/nvidia/nvidia.py +61 -0
- llama_stack/providers/{adapters/safety/sample/config.py → remote/inference/nvidia/utils.py} +3 -4
- llama_stack/providers/{impls/vllm → remote/inference/ollama}/__init__.py +4 -6
- llama_stack/providers/remote/inference/ollama/config.py +25 -0
- llama_stack/providers/remote/inference/ollama/ollama.py +102 -0
- llama_stack/providers/{adapters/telemetry/opentelemetry → remote/inference/openai}/__init__.py +4 -4
- llama_stack/providers/remote/inference/openai/config.py +39 -0
- llama_stack/providers/remote/inference/openai/openai.py +38 -0
- llama_stack/providers/remote/inference/passthrough/__init__.py +23 -0
- llama_stack/providers/remote/inference/passthrough/config.py +34 -0
- llama_stack/providers/remote/inference/passthrough/passthrough.py +122 -0
- llama_stack/providers/remote/inference/runpod/__init__.py +16 -0
- llama_stack/providers/remote/inference/runpod/config.py +32 -0
- llama_stack/providers/remote/inference/runpod/runpod.py +42 -0
- llama_stack/providers/remote/inference/sambanova/__init__.py +16 -0
- llama_stack/providers/remote/inference/sambanova/config.py +34 -0
- llama_stack/providers/remote/inference/sambanova/sambanova.py +28 -0
- llama_stack/providers/{adapters → remote}/inference/tgi/__init__.py +3 -4
- llama_stack/providers/remote/inference/tgi/config.py +76 -0
- llama_stack/providers/remote/inference/tgi/tgi.py +85 -0
- llama_stack/providers/{adapters → remote}/inference/together/__init__.py +8 -4
- llama_stack/providers/remote/inference/together/config.py +27 -0
- llama_stack/providers/remote/inference/together/together.py +102 -0
- llama_stack/providers/remote/inference/vertexai/__init__.py +15 -0
- llama_stack/providers/remote/inference/vertexai/config.py +48 -0
- llama_stack/providers/remote/inference/vertexai/vertexai.py +54 -0
- llama_stack/providers/remote/inference/vllm/__init__.py +22 -0
- llama_stack/providers/remote/inference/vllm/config.py +59 -0
- llama_stack/providers/remote/inference/vllm/vllm.py +111 -0
- llama_stack/providers/remote/inference/watsonx/__init__.py +15 -0
- llama_stack/providers/remote/inference/watsonx/config.py +45 -0
- llama_stack/providers/remote/inference/watsonx/watsonx.py +336 -0
- llama_stack/providers/remote/post_training/__init__.py +5 -0
- llama_stack/providers/remote/post_training/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/post_training/nvidia/config.py +113 -0
- llama_stack/providers/remote/post_training/nvidia/models.py +27 -0
- llama_stack/providers/remote/post_training/nvidia/post_training.py +430 -0
- llama_stack/providers/remote/post_training/nvidia/utils.py +63 -0
- llama_stack/providers/remote/safety/__init__.py +5 -0
- llama_stack/providers/remote/safety/bedrock/bedrock.py +111 -0
- llama_stack/providers/remote/safety/bedrock/config.py +14 -0
- llama_stack/providers/{adapters/inference/sample → remote/safety/nvidia}/__init__.py +5 -4
- llama_stack/providers/remote/safety/nvidia/config.py +40 -0
- llama_stack/providers/remote/safety/nvidia/nvidia.py +161 -0
- llama_stack/providers/{adapters/agents/sample → remote/safety/sambanova}/__init__.py +5 -4
- llama_stack/providers/remote/safety/sambanova/config.py +37 -0
- llama_stack/providers/remote/safety/sambanova/sambanova.py +98 -0
- llama_stack/providers/remote/tool_runtime/__init__.py +5 -0
- llama_stack/providers/remote/tool_runtime/bing_search/__init__.py +21 -0
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +112 -0
- llama_stack/providers/remote/tool_runtime/bing_search/config.py +22 -0
- llama_stack/providers/remote/tool_runtime/brave_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +148 -0
- llama_stack/providers/remote/tool_runtime/brave_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py +15 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +20 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +73 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +84 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py +22 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py +21 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +140 -0
- llama_stack/providers/remote/vector_io/__init__.py +5 -0
- llama_stack/providers/remote/vector_io/chroma/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/chroma/chroma.py +215 -0
- llama_stack/providers/remote/vector_io/chroma/config.py +28 -0
- llama_stack/providers/remote/vector_io/milvus/__init__.py +18 -0
- llama_stack/providers/remote/vector_io/milvus/config.py +35 -0
- llama_stack/providers/remote/vector_io/milvus/milvus.py +375 -0
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/pgvector/config.py +47 -0
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +460 -0
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/qdrant/config.py +37 -0
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +265 -0
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/weaviate/config.py +32 -0
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +393 -0
- llama_stack/providers/utils/bedrock/__init__.py +5 -0
- llama_stack/providers/utils/bedrock/client.py +74 -0
- llama_stack/providers/utils/bedrock/config.py +64 -0
- llama_stack/providers/utils/bedrock/refreshable_boto_session.py +112 -0
- llama_stack/providers/utils/common/__init__.py +5 -0
- llama_stack/providers/utils/common/data_schema_validator.py +103 -0
- llama_stack/providers/utils/datasetio/__init__.py +5 -0
- llama_stack/providers/utils/datasetio/url_utils.py +47 -0
- llama_stack/providers/utils/files/__init__.py +5 -0
- llama_stack/providers/utils/files/form_data.py +69 -0
- llama_stack/providers/utils/inference/__init__.py +8 -7
- llama_stack/providers/utils/inference/embedding_mixin.py +101 -0
- llama_stack/providers/utils/inference/inference_store.py +264 -0
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +336 -0
- llama_stack/providers/utils/inference/model_registry.py +173 -23
- llama_stack/providers/utils/inference/openai_compat.py +1261 -49
- llama_stack/providers/utils/inference/openai_mixin.py +506 -0
- llama_stack/providers/utils/inference/prompt_adapter.py +365 -67
- llama_stack/providers/utils/kvstore/api.py +6 -6
- llama_stack/providers/utils/kvstore/config.py +28 -48
- llama_stack/providers/utils/kvstore/kvstore.py +61 -15
- llama_stack/providers/utils/kvstore/mongodb/__init__.py +9 -0
- llama_stack/providers/utils/kvstore/mongodb/mongodb.py +82 -0
- llama_stack/providers/utils/kvstore/postgres/__init__.py +7 -0
- llama_stack/providers/utils/kvstore/postgres/postgres.py +114 -0
- llama_stack/providers/utils/kvstore/redis/redis.py +33 -9
- llama_stack/providers/utils/kvstore/sqlite/config.py +2 -1
- llama_stack/providers/utils/kvstore/sqlite/sqlite.py +123 -22
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +1304 -0
- llama_stack/providers/utils/memory/vector_store.py +220 -82
- llama_stack/providers/utils/pagination.py +43 -0
- llama_stack/providers/utils/responses/__init__.py +5 -0
- llama_stack/providers/utils/responses/responses_store.py +292 -0
- llama_stack/providers/utils/scheduler.py +270 -0
- llama_stack/providers/utils/scoring/__init__.py +5 -0
- llama_stack/providers/utils/scoring/aggregation_utils.py +75 -0
- llama_stack/providers/utils/scoring/base_scoring_fn.py +114 -0
- llama_stack/providers/utils/scoring/basic_scoring_utils.py +26 -0
- llama_stack/providers/utils/sqlstore/__init__.py +5 -0
- llama_stack/providers/utils/sqlstore/api.py +128 -0
- llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +319 -0
- llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +343 -0
- llama_stack/providers/utils/sqlstore/sqlstore.py +70 -0
- llama_stack/providers/utils/telemetry/trace_protocol.py +142 -0
- llama_stack/providers/utils/telemetry/tracing.py +192 -53
- llama_stack/providers/utils/tools/__init__.py +5 -0
- llama_stack/providers/utils/tools/mcp.py +148 -0
- llama_stack/providers/utils/tools/ttl_dict.py +70 -0
- llama_stack/providers/utils/vector_io/__init__.py +5 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +156 -0
- llama_stack/schema_utils.py +118 -0
- llama_stack/strong_typing/__init__.py +19 -0
- llama_stack/strong_typing/auxiliary.py +228 -0
- llama_stack/strong_typing/classdef.py +440 -0
- llama_stack/strong_typing/core.py +46 -0
- llama_stack/strong_typing/deserializer.py +877 -0
- llama_stack/strong_typing/docstring.py +409 -0
- llama_stack/strong_typing/exception.py +23 -0
- llama_stack/strong_typing/inspection.py +1085 -0
- llama_stack/strong_typing/mapping.py +40 -0
- llama_stack/strong_typing/name.py +182 -0
- llama_stack/strong_typing/py.typed +0 -0
- llama_stack/strong_typing/schema.py +792 -0
- llama_stack/strong_typing/serialization.py +97 -0
- llama_stack/strong_typing/serializer.py +500 -0
- llama_stack/strong_typing/slots.py +27 -0
- llama_stack/strong_typing/topological.py +89 -0
- llama_stack/testing/__init__.py +5 -0
- llama_stack/testing/api_recorder.py +956 -0
- llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
- llama_stack-0.3.4.dist-info/METADATA +261 -0
- llama_stack-0.3.4.dist-info/RECORD +625 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/WHEEL +1 -1
- llama_stack/apis/agents/client.py +0 -292
- llama_stack/apis/agents/event_logger.py +0 -184
- llama_stack/apis/batch_inference/batch_inference.py +0 -72
- llama_stack/apis/common/deployment_types.py +0 -31
- llama_stack/apis/dataset/dataset.py +0 -63
- llama_stack/apis/evals/evals.py +0 -122
- llama_stack/apis/inference/client.py +0 -197
- llama_stack/apis/inspect/client.py +0 -82
- llama_stack/apis/memory/client.py +0 -155
- llama_stack/apis/memory/memory.py +0 -65
- llama_stack/apis/memory_banks/__init__.py +0 -7
- llama_stack/apis/memory_banks/client.py +0 -101
- llama_stack/apis/memory_banks/memory_banks.py +0 -78
- llama_stack/apis/models/client.py +0 -83
- llama_stack/apis/reward_scoring/__init__.py +0 -7
- llama_stack/apis/reward_scoring/reward_scoring.py +0 -55
- llama_stack/apis/safety/client.py +0 -105
- llama_stack/apis/shields/client.py +0 -79
- llama_stack/cli/download.py +0 -340
- llama_stack/cli/model/describe.py +0 -82
- llama_stack/cli/model/download.py +0 -24
- llama_stack/cli/model/list.py +0 -62
- llama_stack/cli/model/model.py +0 -34
- llama_stack/cli/model/prompt_format.py +0 -112
- llama_stack/cli/model/safety_models.py +0 -52
- llama_stack/cli/stack/build.py +0 -299
- llama_stack/cli/stack/configure.py +0 -178
- llama_stack/distribution/build.py +0 -123
- llama_stack/distribution/build_conda_env.sh +0 -136
- llama_stack/distribution/build_container.sh +0 -142
- llama_stack/distribution/common.sh +0 -40
- llama_stack/distribution/configure_container.sh +0 -47
- llama_stack/distribution/datatypes.py +0 -139
- llama_stack/distribution/distribution.py +0 -58
- llama_stack/distribution/inspect.py +0 -67
- llama_stack/distribution/request_headers.py +0 -57
- llama_stack/distribution/resolver.py +0 -323
- llama_stack/distribution/routers/__init__.py +0 -48
- llama_stack/distribution/routers/routers.py +0 -158
- llama_stack/distribution/routers/routing_tables.py +0 -173
- llama_stack/distribution/server/endpoints.py +0 -48
- llama_stack/distribution/server/server.py +0 -343
- llama_stack/distribution/start_conda_env.sh +0 -42
- llama_stack/distribution/start_container.sh +0 -64
- llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml +0 -10
- llama_stack/distribution/templates/local-build.yaml +0 -10
- llama_stack/distribution/templates/local-databricks-build.yaml +0 -10
- llama_stack/distribution/templates/local-fireworks-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-endpoint-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-serverless-build.yaml +0 -10
- llama_stack/distribution/templates/local-ollama-build.yaml +0 -10
- llama_stack/distribution/templates/local-tgi-build.yaml +0 -10
- llama_stack/distribution/templates/local-together-build.yaml +0 -10
- llama_stack/distribution/templates/local-vllm-build.yaml +0 -10
- llama_stack/distribution/utils/exec.py +0 -105
- llama_stack/providers/adapters/agents/sample/sample.py +0 -18
- llama_stack/providers/adapters/inference/bedrock/bedrock.py +0 -451
- llama_stack/providers/adapters/inference/bedrock/config.py +0 -55
- llama_stack/providers/adapters/inference/databricks/config.py +0 -21
- llama_stack/providers/adapters/inference/databricks/databricks.py +0 -125
- llama_stack/providers/adapters/inference/fireworks/config.py +0 -20
- llama_stack/providers/adapters/inference/fireworks/fireworks.py +0 -130
- llama_stack/providers/adapters/inference/ollama/__init__.py +0 -19
- llama_stack/providers/adapters/inference/ollama/ollama.py +0 -175
- llama_stack/providers/adapters/inference/sample/sample.py +0 -23
- llama_stack/providers/adapters/inference/tgi/config.py +0 -43
- llama_stack/providers/adapters/inference/tgi/tgi.py +0 -200
- llama_stack/providers/adapters/inference/together/config.py +0 -22
- llama_stack/providers/adapters/inference/together/together.py +0 -143
- llama_stack/providers/adapters/memory/chroma/chroma.py +0 -157
- llama_stack/providers/adapters/memory/pgvector/config.py +0 -17
- llama_stack/providers/adapters/memory/pgvector/pgvector.py +0 -211
- llama_stack/providers/adapters/memory/sample/sample.py +0 -23
- llama_stack/providers/adapters/memory/weaviate/__init__.py +0 -15
- llama_stack/providers/adapters/memory/weaviate/weaviate.py +0 -190
- llama_stack/providers/adapters/safety/bedrock/bedrock.py +0 -113
- llama_stack/providers/adapters/safety/bedrock/config.py +0 -16
- llama_stack/providers/adapters/safety/sample/sample.py +0 -23
- llama_stack/providers/adapters/safety/together/__init__.py +0 -18
- llama_stack/providers/adapters/safety/together/config.py +0 -26
- llama_stack/providers/adapters/safety/together/together.py +0 -101
- llama_stack/providers/adapters/telemetry/opentelemetry/config.py +0 -12
- llama_stack/providers/adapters/telemetry/opentelemetry/opentelemetry.py +0 -201
- llama_stack/providers/adapters/telemetry/sample/__init__.py +0 -17
- llama_stack/providers/adapters/telemetry/sample/config.py +0 -12
- llama_stack/providers/adapters/telemetry/sample/sample.py +0 -18
- llama_stack/providers/impls/meta_reference/agents/agent_instance.py +0 -844
- llama_stack/providers/impls/meta_reference/agents/agents.py +0 -161
- llama_stack/providers/impls/meta_reference/agents/persistence.py +0 -84
- llama_stack/providers/impls/meta_reference/agents/rag/context_retriever.py +0 -74
- llama_stack/providers/impls/meta_reference/agents/safety.py +0 -57
- llama_stack/providers/impls/meta_reference/agents/tests/code_execution.py +0 -93
- llama_stack/providers/impls/meta_reference/agents/tests/test_chat_agent.py +0 -305
- llama_stack/providers/impls/meta_reference/agents/tools/base.py +0 -20
- llama_stack/providers/impls/meta_reference/agents/tools/builtin.py +0 -375
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_env_prefix.py +0 -133
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_execution.py +0 -256
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py +0 -87
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/utils.py +0 -21
- llama_stack/providers/impls/meta_reference/agents/tools/safety.py +0 -43
- llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py +0 -58
- llama_stack/providers/impls/meta_reference/inference/config.py +0 -45
- llama_stack/providers/impls/meta_reference/inference/generation.py +0 -376
- llama_stack/providers/impls/meta_reference/inference/inference.py +0 -280
- llama_stack/providers/impls/meta_reference/inference/model_parallel.py +0 -99
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_impls.py +0 -184
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_txest_disabled.py +0 -76
- llama_stack/providers/impls/meta_reference/inference/quantization/loader.py +0 -97
- llama_stack/providers/impls/meta_reference/inference/quantization/scripts/quantize_checkpoint.py +0 -161
- llama_stack/providers/impls/meta_reference/memory/__init__.py +0 -19
- llama_stack/providers/impls/meta_reference/memory/faiss.py +0 -113
- llama_stack/providers/impls/meta_reference/safety/__init__.py +0 -17
- llama_stack/providers/impls/meta_reference/safety/base.py +0 -57
- llama_stack/providers/impls/meta_reference/safety/config.py +0 -48
- llama_stack/providers/impls/meta_reference/safety/llama_guard.py +0 -268
- llama_stack/providers/impls/meta_reference/safety/prompt_guard.py +0 -145
- llama_stack/providers/impls/meta_reference/safety/safety.py +0 -112
- llama_stack/providers/impls/meta_reference/telemetry/console.py +0 -89
- llama_stack/providers/impls/vllm/config.py +0 -35
- llama_stack/providers/impls/vllm/vllm.py +0 -241
- llama_stack/providers/registry/memory.py +0 -78
- llama_stack/providers/registry/telemetry.py +0 -44
- llama_stack/providers/tests/agents/test_agents.py +0 -210
- llama_stack/providers/tests/inference/test_inference.py +0 -257
- llama_stack/providers/tests/inference/test_prompt_adapter.py +0 -126
- llama_stack/providers/tests/memory/test_memory.py +0 -136
- llama_stack/providers/tests/resolver.py +0 -100
- llama_stack/providers/tests/safety/test_safety.py +0 -77
- llama_stack-0.0.42.dist-info/METADATA +0 -137
- llama_stack-0.0.42.dist-info/RECORD +0 -256
- /llama_stack/{distribution → core}/__init__.py +0 -0
- /llama_stack/{distribution/server → core/access_control}/__init__.py +0 -0
- /llama_stack/{distribution/utils → core/conversations}/__init__.py +0 -0
- /llama_stack/{providers/adapters → core/prompts}/__init__.py +0 -0
- /llama_stack/{providers/adapters/agents → core/routing_tables}/__init__.py +0 -0
- /llama_stack/{providers/adapters/inference → core/server}/__init__.py +0 -0
- /llama_stack/{providers/adapters/memory → core/storage}/__init__.py +0 -0
- /llama_stack/{providers/adapters/safety → core/ui}/__init__.py +0 -0
- /llama_stack/{providers/adapters/telemetry → core/ui/modules}/__init__.py +0 -0
- /llama_stack/{providers/impls → core/ui/page}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference → core/ui/page/distribution}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/rag → core/ui/page/evaluations}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tests → core/ui/page/playground}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools → core/utils}/__init__.py +0 -0
- /llama_stack/{distribution → core}/utils/dynamic.py +0 -0
- /llama_stack/{distribution → core}/utils/serialize.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools/ipython_tool → distributions}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization → models}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization/scripts → models/llama}/__init__.py +0 -0
- /llama_stack/{providers/tests → models/llama/llama3}/__init__.py +0 -0
- /llama_stack/{providers/tests/agents → models/llama/llama3/quantization}/__init__.py +0 -0
- /llama_stack/{providers/tests/inference → models/llama/llama3_2}/__init__.py +0 -0
- /llama_stack/{providers/tests/memory → models/llama/llama3_3}/__init__.py +0 -0
- /llama_stack/{providers/tests/safety → models/llama/llama4}/__init__.py +0 -0
- /llama_stack/{scripts → models/llama/llama4/prompt_templates}/__init__.py +0 -0
- /llama_stack/providers/{adapters → remote}/safety/bedrock/__init__.py +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info/licenses}/LICENSE +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from llama_stack.apis.common.errors import ModelNotFoundError
|
|
10
|
+
from llama_stack.apis.models import Model
|
|
11
|
+
from llama_stack.apis.resource import ResourceType
|
|
12
|
+
from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed
|
|
13
|
+
from llama_stack.core.access_control.datatypes import Action
|
|
14
|
+
from llama_stack.core.datatypes import (
|
|
15
|
+
AccessRule,
|
|
16
|
+
RoutableObject,
|
|
17
|
+
RoutableObjectWithProvider,
|
|
18
|
+
RoutedProtocol,
|
|
19
|
+
ScoringFnWithOwner,
|
|
20
|
+
)
|
|
21
|
+
from llama_stack.core.request_headers import get_authenticated_user
|
|
22
|
+
from llama_stack.core.store import DistributionRegistry
|
|
23
|
+
from llama_stack.log import get_logger
|
|
24
|
+
from llama_stack.providers.datatypes import Api, RoutingTable
|
|
25
|
+
|
|
26
|
+
logger = get_logger(name=__name__, category="core::routing_tables")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def get_impl_api(p: Any) -> Api:
|
|
30
|
+
return p.__provider_spec__.api
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# TODO: this should return the registered object for all APIs
|
|
34
|
+
async def register_object_with_provider(obj: RoutableObject, p: Any) -> RoutableObject:
|
|
35
|
+
api = get_impl_api(p)
|
|
36
|
+
|
|
37
|
+
assert obj.provider_id != "remote", "Remote provider should not be registered"
|
|
38
|
+
|
|
39
|
+
if api == Api.inference:
|
|
40
|
+
return await p.register_model(obj)
|
|
41
|
+
elif api == Api.safety:
|
|
42
|
+
return await p.register_shield(obj)
|
|
43
|
+
elif api == Api.vector_io:
|
|
44
|
+
return await p.register_vector_store(obj)
|
|
45
|
+
elif api == Api.datasetio:
|
|
46
|
+
return await p.register_dataset(obj)
|
|
47
|
+
elif api == Api.scoring:
|
|
48
|
+
return await p.register_scoring_function(obj)
|
|
49
|
+
elif api == Api.eval:
|
|
50
|
+
return await p.register_benchmark(obj)
|
|
51
|
+
elif api == Api.tool_runtime:
|
|
52
|
+
return await p.register_toolgroup(obj)
|
|
53
|
+
else:
|
|
54
|
+
raise ValueError(f"Unknown API {api} for registering object with provider")
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
async def unregister_object_from_provider(obj: RoutableObject, p: Any) -> None:
|
|
58
|
+
api = get_impl_api(p)
|
|
59
|
+
if api == Api.vector_io:
|
|
60
|
+
return await p.unregister_vector_store(obj.identifier)
|
|
61
|
+
elif api == Api.inference:
|
|
62
|
+
return await p.unregister_model(obj.identifier)
|
|
63
|
+
elif api == Api.safety:
|
|
64
|
+
return await p.unregister_shield(obj.identifier)
|
|
65
|
+
elif api == Api.datasetio:
|
|
66
|
+
return await p.unregister_dataset(obj.identifier)
|
|
67
|
+
elif api == Api.eval:
|
|
68
|
+
return await p.unregister_benchmark(obj.identifier)
|
|
69
|
+
elif api == Api.scoring:
|
|
70
|
+
return await p.unregister_scoring_function(obj.identifier)
|
|
71
|
+
elif api == Api.tool_runtime:
|
|
72
|
+
return await p.unregister_toolgroup(obj.identifier)
|
|
73
|
+
else:
|
|
74
|
+
raise ValueError(f"Unregister not supported for {api}")
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
Registry = dict[str, list[RoutableObjectWithProvider]]
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class CommonRoutingTableImpl(RoutingTable):
|
|
81
|
+
def __init__(
|
|
82
|
+
self,
|
|
83
|
+
impls_by_provider_id: dict[str, RoutedProtocol],
|
|
84
|
+
dist_registry: DistributionRegistry,
|
|
85
|
+
policy: list[AccessRule],
|
|
86
|
+
) -> None:
|
|
87
|
+
self.impls_by_provider_id = impls_by_provider_id
|
|
88
|
+
self.dist_registry = dist_registry
|
|
89
|
+
self.policy = policy
|
|
90
|
+
|
|
91
|
+
async def initialize(self) -> None:
|
|
92
|
+
async def add_objects(objs: list[RoutableObjectWithProvider], provider_id: str, cls) -> None:
|
|
93
|
+
for obj in objs:
|
|
94
|
+
if cls is None:
|
|
95
|
+
obj.provider_id = provider_id
|
|
96
|
+
else:
|
|
97
|
+
# Create a copy of the model data and explicitly set provider_id
|
|
98
|
+
model_data = obj.model_dump()
|
|
99
|
+
model_data["provider_id"] = provider_id
|
|
100
|
+
obj = cls(**model_data)
|
|
101
|
+
await self.dist_registry.register(obj)
|
|
102
|
+
|
|
103
|
+
# Register all objects from providers
|
|
104
|
+
for pid, p in self.impls_by_provider_id.items():
|
|
105
|
+
api = get_impl_api(p)
|
|
106
|
+
if api == Api.inference:
|
|
107
|
+
p.model_store = self
|
|
108
|
+
elif api == Api.safety:
|
|
109
|
+
p.shield_store = self
|
|
110
|
+
elif api == Api.vector_io:
|
|
111
|
+
p.vector_store_store = self
|
|
112
|
+
elif api == Api.datasetio:
|
|
113
|
+
p.dataset_store = self
|
|
114
|
+
elif api == Api.scoring:
|
|
115
|
+
p.scoring_function_store = self
|
|
116
|
+
scoring_functions = await p.list_scoring_functions()
|
|
117
|
+
await add_objects(scoring_functions, pid, ScoringFnWithOwner)
|
|
118
|
+
elif api == Api.eval:
|
|
119
|
+
p.benchmark_store = self
|
|
120
|
+
elif api == Api.tool_runtime:
|
|
121
|
+
p.tool_store = self
|
|
122
|
+
|
|
123
|
+
async def shutdown(self) -> None:
|
|
124
|
+
for p in self.impls_by_provider_id.values():
|
|
125
|
+
await p.shutdown()
|
|
126
|
+
|
|
127
|
+
async def refresh(self) -> None:
|
|
128
|
+
pass
|
|
129
|
+
|
|
130
|
+
async def get_provider_impl(self, routing_key: str, provider_id: str | None = None) -> Any:
|
|
131
|
+
from .benchmarks import BenchmarksRoutingTable
|
|
132
|
+
from .datasets import DatasetsRoutingTable
|
|
133
|
+
from .models import ModelsRoutingTable
|
|
134
|
+
from .scoring_functions import ScoringFunctionsRoutingTable
|
|
135
|
+
from .shields import ShieldsRoutingTable
|
|
136
|
+
from .toolgroups import ToolGroupsRoutingTable
|
|
137
|
+
from .vector_stores import VectorStoresRoutingTable
|
|
138
|
+
|
|
139
|
+
def apiname_object():
|
|
140
|
+
if isinstance(self, ModelsRoutingTable):
|
|
141
|
+
return ("Inference", "model")
|
|
142
|
+
elif isinstance(self, ShieldsRoutingTable):
|
|
143
|
+
return ("Safety", "shield")
|
|
144
|
+
elif isinstance(self, VectorStoresRoutingTable):
|
|
145
|
+
return ("VectorIO", "vector_store")
|
|
146
|
+
elif isinstance(self, DatasetsRoutingTable):
|
|
147
|
+
return ("DatasetIO", "dataset")
|
|
148
|
+
elif isinstance(self, ScoringFunctionsRoutingTable):
|
|
149
|
+
return ("Scoring", "scoring_function")
|
|
150
|
+
elif isinstance(self, BenchmarksRoutingTable):
|
|
151
|
+
return ("Eval", "benchmark")
|
|
152
|
+
elif isinstance(self, ToolGroupsRoutingTable):
|
|
153
|
+
return ("ToolGroups", "tool_group")
|
|
154
|
+
else:
|
|
155
|
+
raise ValueError("Unknown routing table type")
|
|
156
|
+
|
|
157
|
+
apiname, objtype = apiname_object()
|
|
158
|
+
|
|
159
|
+
# Get objects from disk registry
|
|
160
|
+
obj = self.dist_registry.get_cached(objtype, routing_key)
|
|
161
|
+
if not obj:
|
|
162
|
+
provider_ids = list(self.impls_by_provider_id.keys())
|
|
163
|
+
if len(provider_ids) > 1:
|
|
164
|
+
provider_ids_str = f"any of the providers: {', '.join(provider_ids)}"
|
|
165
|
+
else:
|
|
166
|
+
provider_ids_str = f"provider: `{provider_ids[0]}`"
|
|
167
|
+
raise ValueError(
|
|
168
|
+
f"{objtype.capitalize()} `{routing_key}` not served by {provider_ids_str}. Make sure there is an {apiname} provider serving this {objtype}."
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
if not provider_id or provider_id == obj.provider_id:
|
|
172
|
+
return self.impls_by_provider_id[obj.provider_id]
|
|
173
|
+
|
|
174
|
+
raise ValueError(f"Provider not found for `{routing_key}`")
|
|
175
|
+
|
|
176
|
+
async def get_object_by_identifier(self, type: str, identifier: str) -> RoutableObjectWithProvider | None:
|
|
177
|
+
# Get from disk registry
|
|
178
|
+
obj = await self.dist_registry.get(type, identifier)
|
|
179
|
+
if not obj:
|
|
180
|
+
return None
|
|
181
|
+
|
|
182
|
+
# Check if user has permission to access this object
|
|
183
|
+
if not is_action_allowed(self.policy, "read", obj, get_authenticated_user()):
|
|
184
|
+
logger.debug(f"Access denied to {type} '{identifier}'")
|
|
185
|
+
return None
|
|
186
|
+
|
|
187
|
+
return obj
|
|
188
|
+
|
|
189
|
+
async def unregister_object(self, obj: RoutableObjectWithProvider) -> None:
|
|
190
|
+
user = get_authenticated_user()
|
|
191
|
+
if not is_action_allowed(self.policy, "delete", obj, user):
|
|
192
|
+
raise AccessDeniedError("delete", obj, user)
|
|
193
|
+
await self.dist_registry.delete(obj.type, obj.identifier)
|
|
194
|
+
await unregister_object_from_provider(obj, self.impls_by_provider_id[obj.provider_id])
|
|
195
|
+
|
|
196
|
+
async def register_object(self, obj: RoutableObjectWithProvider) -> RoutableObjectWithProvider:
|
|
197
|
+
# if provider_id is not specified, pick an arbitrary one from existing entries
|
|
198
|
+
if not obj.provider_id and len(self.impls_by_provider_id) > 0:
|
|
199
|
+
obj.provider_id = list(self.impls_by_provider_id.keys())[0]
|
|
200
|
+
|
|
201
|
+
if obj.provider_id not in self.impls_by_provider_id:
|
|
202
|
+
raise ValueError(f"Provider `{obj.provider_id}` not found")
|
|
203
|
+
|
|
204
|
+
p = self.impls_by_provider_id[obj.provider_id]
|
|
205
|
+
|
|
206
|
+
# If object supports access control but no attributes set, use creator's attributes
|
|
207
|
+
creator = get_authenticated_user()
|
|
208
|
+
if not is_action_allowed(self.policy, "create", obj, creator):
|
|
209
|
+
raise AccessDeniedError("create", obj, creator)
|
|
210
|
+
if creator:
|
|
211
|
+
obj.owner = creator
|
|
212
|
+
logger.info(f"Setting owner for {obj.type} '{obj.identifier}' to {obj.owner.principal}")
|
|
213
|
+
|
|
214
|
+
registered_obj = await register_object_with_provider(obj, p)
|
|
215
|
+
# TODO: This needs to be fixed for all APIs once they return the registered object
|
|
216
|
+
if obj.type == ResourceType.model.value:
|
|
217
|
+
await self.dist_registry.register(registered_obj)
|
|
218
|
+
return registered_obj
|
|
219
|
+
else:
|
|
220
|
+
await self.dist_registry.register(obj)
|
|
221
|
+
return obj
|
|
222
|
+
|
|
223
|
+
async def assert_action_allowed(
|
|
224
|
+
self,
|
|
225
|
+
action: Action,
|
|
226
|
+
type: str,
|
|
227
|
+
identifier: str,
|
|
228
|
+
) -> None:
|
|
229
|
+
"""Fetch a registered object by type/identifier and enforce the given action permission."""
|
|
230
|
+
obj = await self.get_object_by_identifier(type, identifier)
|
|
231
|
+
if obj is None:
|
|
232
|
+
raise ValueError(f"{type.capitalize()} '{identifier}' not found")
|
|
233
|
+
user = get_authenticated_user()
|
|
234
|
+
if not is_action_allowed(self.policy, action, obj, user):
|
|
235
|
+
raise AccessDeniedError(action, obj, user)
|
|
236
|
+
|
|
237
|
+
async def get_all_with_type(self, type: str) -> list[RoutableObjectWithProvider]:
|
|
238
|
+
objs = await self.dist_registry.get_all()
|
|
239
|
+
filtered_objs = [obj for obj in objs if obj.type == type]
|
|
240
|
+
|
|
241
|
+
# Apply attribute-based access control filtering
|
|
242
|
+
if filtered_objs:
|
|
243
|
+
filtered_objs = [
|
|
244
|
+
obj for obj in filtered_objs if is_action_allowed(self.policy, "read", obj, get_authenticated_user())
|
|
245
|
+
]
|
|
246
|
+
|
|
247
|
+
return filtered_objs
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
async def lookup_model(routing_table: CommonRoutingTableImpl, model_id: str) -> Model:
|
|
251
|
+
model = await routing_table.get_object_by_identifier("model", model_id)
|
|
252
|
+
if not model:
|
|
253
|
+
raise ModelNotFoundError(model_id)
|
|
254
|
+
return model
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
import uuid
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from llama_stack.apis.common.errors import DatasetNotFoundError
|
|
11
|
+
from llama_stack.apis.datasets import (
|
|
12
|
+
Dataset,
|
|
13
|
+
DatasetPurpose,
|
|
14
|
+
Datasets,
|
|
15
|
+
DatasetType,
|
|
16
|
+
DataSource,
|
|
17
|
+
ListDatasetsResponse,
|
|
18
|
+
RowsDataSource,
|
|
19
|
+
URIDataSource,
|
|
20
|
+
)
|
|
21
|
+
from llama_stack.apis.resource import ResourceType
|
|
22
|
+
from llama_stack.core.datatypes import (
|
|
23
|
+
DatasetWithOwner,
|
|
24
|
+
)
|
|
25
|
+
from llama_stack.log import get_logger
|
|
26
|
+
|
|
27
|
+
from .common import CommonRoutingTableImpl
|
|
28
|
+
|
|
29
|
+
logger = get_logger(name=__name__, category="core::routing_tables")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets):
|
|
33
|
+
async def list_datasets(self) -> ListDatasetsResponse:
|
|
34
|
+
return ListDatasetsResponse(data=await self.get_all_with_type(ResourceType.dataset.value))
|
|
35
|
+
|
|
36
|
+
async def get_dataset(self, dataset_id: str) -> Dataset:
|
|
37
|
+
dataset = await self.get_object_by_identifier("dataset", dataset_id)
|
|
38
|
+
if dataset is None:
|
|
39
|
+
raise DatasetNotFoundError(dataset_id)
|
|
40
|
+
return dataset
|
|
41
|
+
|
|
42
|
+
async def register_dataset(
|
|
43
|
+
self,
|
|
44
|
+
purpose: DatasetPurpose,
|
|
45
|
+
source: DataSource,
|
|
46
|
+
metadata: dict[str, Any] | None = None,
|
|
47
|
+
dataset_id: str | None = None,
|
|
48
|
+
) -> Dataset:
|
|
49
|
+
if isinstance(source, dict):
|
|
50
|
+
if source["type"] == "uri":
|
|
51
|
+
source = URIDataSource.parse_obj(source)
|
|
52
|
+
elif source["type"] == "rows":
|
|
53
|
+
source = RowsDataSource.parse_obj(source)
|
|
54
|
+
|
|
55
|
+
if not dataset_id:
|
|
56
|
+
dataset_id = f"dataset-{str(uuid.uuid4())}"
|
|
57
|
+
|
|
58
|
+
provider_dataset_id = dataset_id
|
|
59
|
+
|
|
60
|
+
# infer provider from source
|
|
61
|
+
if metadata and metadata.get("provider_id"):
|
|
62
|
+
provider_id = metadata.get("provider_id") # pass through from nvidia datasetio
|
|
63
|
+
elif source.type == DatasetType.rows.value:
|
|
64
|
+
provider_id = "localfs"
|
|
65
|
+
elif source.type == DatasetType.uri.value:
|
|
66
|
+
# infer provider from uri
|
|
67
|
+
if source.uri.startswith("huggingface"):
|
|
68
|
+
provider_id = "huggingface"
|
|
69
|
+
else:
|
|
70
|
+
provider_id = "localfs"
|
|
71
|
+
else:
|
|
72
|
+
raise ValueError(f"Unknown data source type: {source.type}")
|
|
73
|
+
|
|
74
|
+
if metadata is None:
|
|
75
|
+
metadata = {}
|
|
76
|
+
|
|
77
|
+
dataset = DatasetWithOwner(
|
|
78
|
+
identifier=dataset_id,
|
|
79
|
+
provider_resource_id=provider_dataset_id,
|
|
80
|
+
provider_id=provider_id,
|
|
81
|
+
purpose=purpose,
|
|
82
|
+
source=source,
|
|
83
|
+
metadata=metadata,
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
await self.register_object(dataset)
|
|
87
|
+
return dataset
|
|
88
|
+
|
|
89
|
+
async def unregister_dataset(self, dataset_id: str) -> None:
|
|
90
|
+
dataset = await self.get_dataset(dataset_id)
|
|
91
|
+
await self.unregister_object(dataset)
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
import time
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from llama_stack.apis.common.errors import ModelNotFoundError
|
|
11
|
+
from llama_stack.apis.models import ListModelsResponse, Model, Models, ModelType, OpenAIListModelsResponse, OpenAIModel
|
|
12
|
+
from llama_stack.core.datatypes import (
|
|
13
|
+
ModelWithOwner,
|
|
14
|
+
RegistryEntrySource,
|
|
15
|
+
)
|
|
16
|
+
from llama_stack.log import get_logger
|
|
17
|
+
|
|
18
|
+
from .common import CommonRoutingTableImpl, lookup_model
|
|
19
|
+
|
|
20
|
+
logger = get_logger(name=__name__, category="core::routing_tables")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class ModelsRoutingTable(CommonRoutingTableImpl, Models):
|
|
24
|
+
listed_providers: set[str] = set()
|
|
25
|
+
|
|
26
|
+
async def refresh(self) -> None:
|
|
27
|
+
for provider_id, provider in self.impls_by_provider_id.items():
|
|
28
|
+
refresh = await provider.should_refresh_models()
|
|
29
|
+
refresh = refresh or provider_id not in self.listed_providers
|
|
30
|
+
if not refresh:
|
|
31
|
+
continue
|
|
32
|
+
|
|
33
|
+
try:
|
|
34
|
+
models = await provider.list_models()
|
|
35
|
+
except Exception as e:
|
|
36
|
+
logger.warning(f"Model refresh failed for provider {provider_id}: {e}")
|
|
37
|
+
continue
|
|
38
|
+
|
|
39
|
+
self.listed_providers.add(provider_id)
|
|
40
|
+
if models is None:
|
|
41
|
+
continue
|
|
42
|
+
|
|
43
|
+
await self.update_registered_models(provider_id, models)
|
|
44
|
+
|
|
45
|
+
async def list_models(self) -> ListModelsResponse:
|
|
46
|
+
return ListModelsResponse(data=await self.get_all_with_type("model"))
|
|
47
|
+
|
|
48
|
+
async def openai_list_models(self) -> OpenAIListModelsResponse:
|
|
49
|
+
models = await self.get_all_with_type("model")
|
|
50
|
+
openai_models = [
|
|
51
|
+
OpenAIModel(
|
|
52
|
+
id=model.identifier,
|
|
53
|
+
object="model",
|
|
54
|
+
created=int(time.time()),
|
|
55
|
+
owned_by="llama_stack",
|
|
56
|
+
)
|
|
57
|
+
for model in models
|
|
58
|
+
]
|
|
59
|
+
return OpenAIListModelsResponse(data=openai_models)
|
|
60
|
+
|
|
61
|
+
async def get_model(self, model_id: str) -> Model:
|
|
62
|
+
return await lookup_model(self, model_id)
|
|
63
|
+
|
|
64
|
+
async def get_provider_impl(self, model_id: str) -> Any:
|
|
65
|
+
model = await lookup_model(self, model_id)
|
|
66
|
+
if model.provider_id not in self.impls_by_provider_id:
|
|
67
|
+
raise ValueError(f"Provider {model.provider_id} not found in the routing table")
|
|
68
|
+
return self.impls_by_provider_id[model.provider_id]
|
|
69
|
+
|
|
70
|
+
async def has_model(self, model_id: str) -> bool:
|
|
71
|
+
"""
|
|
72
|
+
Check if a model exists in the routing table.
|
|
73
|
+
|
|
74
|
+
:param model_id: The model identifier to check
|
|
75
|
+
:return: True if the model exists, False otherwise
|
|
76
|
+
"""
|
|
77
|
+
try:
|
|
78
|
+
await lookup_model(self, model_id)
|
|
79
|
+
return True
|
|
80
|
+
except ModelNotFoundError:
|
|
81
|
+
return False
|
|
82
|
+
|
|
83
|
+
async def register_model(
|
|
84
|
+
self,
|
|
85
|
+
model_id: str,
|
|
86
|
+
provider_model_id: str | None = None,
|
|
87
|
+
provider_id: str | None = None,
|
|
88
|
+
metadata: dict[str, Any] | None = None,
|
|
89
|
+
model_type: ModelType | None = None,
|
|
90
|
+
) -> Model:
|
|
91
|
+
if provider_id is None:
|
|
92
|
+
# If provider_id not specified, use the only provider if it supports this model
|
|
93
|
+
if len(self.impls_by_provider_id) == 1:
|
|
94
|
+
provider_id = list(self.impls_by_provider_id.keys())[0]
|
|
95
|
+
else:
|
|
96
|
+
raise ValueError(
|
|
97
|
+
f"Please specify a provider_id for model {model_id} since multiple providers are available: {self.impls_by_provider_id.keys()}.\n\n"
|
|
98
|
+
"Use the provider_id as a prefix to disambiguate, e.g. 'provider_id/model_id'."
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
provider_model_id = provider_model_id or model_id
|
|
102
|
+
metadata = metadata or {}
|
|
103
|
+
model_type = model_type or ModelType.llm
|
|
104
|
+
if "embedding_dimension" not in metadata and model_type == ModelType.embedding:
|
|
105
|
+
raise ValueError("Embedding model must have an embedding dimension in its metadata")
|
|
106
|
+
|
|
107
|
+
identifier = f"{provider_id}/{provider_model_id}"
|
|
108
|
+
model = ModelWithOwner(
|
|
109
|
+
identifier=identifier,
|
|
110
|
+
provider_resource_id=provider_model_id,
|
|
111
|
+
provider_id=provider_id,
|
|
112
|
+
metadata=metadata,
|
|
113
|
+
model_type=model_type,
|
|
114
|
+
source=RegistryEntrySource.via_register_api,
|
|
115
|
+
)
|
|
116
|
+
registered_model = await self.register_object(model)
|
|
117
|
+
return registered_model
|
|
118
|
+
|
|
119
|
+
async def unregister_model(self, model_id: str) -> None:
|
|
120
|
+
existing_model = await self.get_model(model_id)
|
|
121
|
+
if existing_model is None:
|
|
122
|
+
raise ModelNotFoundError(model_id)
|
|
123
|
+
await self.unregister_object(existing_model)
|
|
124
|
+
|
|
125
|
+
async def update_registered_models(
|
|
126
|
+
self,
|
|
127
|
+
provider_id: str,
|
|
128
|
+
models: list[Model],
|
|
129
|
+
) -> None:
|
|
130
|
+
existing_models = await self.get_all_with_type("model")
|
|
131
|
+
|
|
132
|
+
# we may have an alias for the model registered by the user (or during initialization
|
|
133
|
+
# from run.yaml) that we need to keep track of
|
|
134
|
+
model_ids = {}
|
|
135
|
+
for model in existing_models:
|
|
136
|
+
if model.provider_id != provider_id:
|
|
137
|
+
continue
|
|
138
|
+
if model.source == RegistryEntrySource.via_register_api:
|
|
139
|
+
model_ids[model.provider_resource_id] = model.identifier
|
|
140
|
+
continue
|
|
141
|
+
|
|
142
|
+
logger.debug(f"unregistering model {model.identifier}")
|
|
143
|
+
await self.unregister_object(model)
|
|
144
|
+
|
|
145
|
+
for model in models:
|
|
146
|
+
if model.provider_resource_id in model_ids:
|
|
147
|
+
# avoid overwriting a non-provider-registered model entry
|
|
148
|
+
continue
|
|
149
|
+
|
|
150
|
+
if model.identifier == model.provider_resource_id:
|
|
151
|
+
model.identifier = f"{provider_id}/{model.provider_resource_id}"
|
|
152
|
+
|
|
153
|
+
logger.debug(f"registering model {model.identifier} ({model.provider_resource_id})")
|
|
154
|
+
await self.register_object(
|
|
155
|
+
ModelWithOwner(
|
|
156
|
+
identifier=model.identifier,
|
|
157
|
+
provider_resource_id=model.provider_resource_id,
|
|
158
|
+
provider_id=provider_id,
|
|
159
|
+
metadata=model.metadata,
|
|
160
|
+
model_type=model.model_type,
|
|
161
|
+
source=RegistryEntrySource.listed_from_provider,
|
|
162
|
+
)
|
|
163
|
+
)
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from llama_stack.apis.common.type_system import ParamType
|
|
8
|
+
from llama_stack.apis.resource import ResourceType
|
|
9
|
+
from llama_stack.apis.scoring_functions import (
|
|
10
|
+
ListScoringFunctionsResponse,
|
|
11
|
+
ScoringFn,
|
|
12
|
+
ScoringFnParams,
|
|
13
|
+
ScoringFunctions,
|
|
14
|
+
)
|
|
15
|
+
from llama_stack.core.datatypes import (
|
|
16
|
+
ScoringFnWithOwner,
|
|
17
|
+
)
|
|
18
|
+
from llama_stack.log import get_logger
|
|
19
|
+
|
|
20
|
+
from .common import CommonRoutingTableImpl
|
|
21
|
+
|
|
22
|
+
logger = get_logger(name=__name__, category="core::routing_tables")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ScoringFunctionsRoutingTable(CommonRoutingTableImpl, ScoringFunctions):
|
|
26
|
+
async def list_scoring_functions(self) -> ListScoringFunctionsResponse:
|
|
27
|
+
return ListScoringFunctionsResponse(data=await self.get_all_with_type(ResourceType.scoring_function.value))
|
|
28
|
+
|
|
29
|
+
async def get_scoring_function(self, scoring_fn_id: str) -> ScoringFn:
|
|
30
|
+
scoring_fn = await self.get_object_by_identifier("scoring_function", scoring_fn_id)
|
|
31
|
+
if scoring_fn is None:
|
|
32
|
+
raise ValueError(f"Scoring function '{scoring_fn_id}' not found")
|
|
33
|
+
return scoring_fn
|
|
34
|
+
|
|
35
|
+
async def register_scoring_function(
|
|
36
|
+
self,
|
|
37
|
+
scoring_fn_id: str,
|
|
38
|
+
description: str,
|
|
39
|
+
return_type: ParamType,
|
|
40
|
+
provider_scoring_fn_id: str | None = None,
|
|
41
|
+
provider_id: str | None = None,
|
|
42
|
+
params: ScoringFnParams | None = None,
|
|
43
|
+
) -> None:
|
|
44
|
+
if provider_scoring_fn_id is None:
|
|
45
|
+
provider_scoring_fn_id = scoring_fn_id
|
|
46
|
+
if provider_id is None:
|
|
47
|
+
if len(self.impls_by_provider_id) == 1:
|
|
48
|
+
provider_id = list(self.impls_by_provider_id.keys())[0]
|
|
49
|
+
else:
|
|
50
|
+
raise ValueError(
|
|
51
|
+
"No provider specified and multiple providers available. Please specify a provider_id."
|
|
52
|
+
)
|
|
53
|
+
scoring_fn = ScoringFnWithOwner(
|
|
54
|
+
identifier=scoring_fn_id,
|
|
55
|
+
description=description,
|
|
56
|
+
return_type=return_type,
|
|
57
|
+
provider_resource_id=provider_scoring_fn_id,
|
|
58
|
+
provider_id=provider_id,
|
|
59
|
+
params=params,
|
|
60
|
+
)
|
|
61
|
+
scoring_fn.provider_id = provider_id
|
|
62
|
+
await self.register_object(scoring_fn)
|
|
63
|
+
|
|
64
|
+
async def unregister_scoring_function(self, scoring_fn_id: str) -> None:
|
|
65
|
+
existing_scoring_fn = await self.get_scoring_function(scoring_fn_id)
|
|
66
|
+
await self.unregister_object(existing_scoring_fn)
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from llama_stack.apis.resource import ResourceType
|
|
10
|
+
from llama_stack.apis.shields import ListShieldsResponse, Shield, Shields
|
|
11
|
+
from llama_stack.core.datatypes import (
|
|
12
|
+
ShieldWithOwner,
|
|
13
|
+
)
|
|
14
|
+
from llama_stack.log import get_logger
|
|
15
|
+
|
|
16
|
+
from .common import CommonRoutingTableImpl
|
|
17
|
+
|
|
18
|
+
logger = get_logger(name=__name__, category="core::routing_tables")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ShieldsRoutingTable(CommonRoutingTableImpl, Shields):
|
|
22
|
+
async def list_shields(self) -> ListShieldsResponse:
|
|
23
|
+
return ListShieldsResponse(data=await self.get_all_with_type(ResourceType.shield.value))
|
|
24
|
+
|
|
25
|
+
async def get_shield(self, identifier: str) -> Shield:
|
|
26
|
+
shield = await self.get_object_by_identifier("shield", identifier)
|
|
27
|
+
if shield is None:
|
|
28
|
+
raise ValueError(f"Shield '{identifier}' not found")
|
|
29
|
+
return shield
|
|
30
|
+
|
|
31
|
+
async def register_shield(
|
|
32
|
+
self,
|
|
33
|
+
shield_id: str,
|
|
34
|
+
provider_shield_id: str | None = None,
|
|
35
|
+
provider_id: str | None = None,
|
|
36
|
+
params: dict[str, Any] | None = None,
|
|
37
|
+
) -> Shield:
|
|
38
|
+
if provider_shield_id is None:
|
|
39
|
+
provider_shield_id = shield_id
|
|
40
|
+
if provider_id is None:
|
|
41
|
+
# If provider_id not specified, use the only provider if it supports this shield type
|
|
42
|
+
if len(self.impls_by_provider_id) == 1:
|
|
43
|
+
provider_id = list(self.impls_by_provider_id.keys())[0]
|
|
44
|
+
else:
|
|
45
|
+
raise ValueError(
|
|
46
|
+
"No provider specified and multiple providers available. Please specify a provider_id."
|
|
47
|
+
)
|
|
48
|
+
if params is None:
|
|
49
|
+
params = {}
|
|
50
|
+
shield = ShieldWithOwner(
|
|
51
|
+
identifier=shield_id,
|
|
52
|
+
provider_resource_id=provider_shield_id,
|
|
53
|
+
provider_id=provider_id,
|
|
54
|
+
params=params,
|
|
55
|
+
)
|
|
56
|
+
await self.register_object(shield)
|
|
57
|
+
return shield
|
|
58
|
+
|
|
59
|
+
async def unregister_shield(self, identifier: str) -> None:
|
|
60
|
+
existing_shield = await self.get_shield(identifier)
|
|
61
|
+
await self.unregister_object(existing_shield)
|