llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +5 -0
- llama_stack/apis/agents/__init__.py +1 -1
- llama_stack/apis/agents/agents.py +700 -281
- llama_stack/apis/agents/openai_responses.py +1311 -0
- llama_stack/{providers/adapters/memory/sample/config.py → apis/batches/__init__.py} +2 -5
- llama_stack/apis/batches/batches.py +100 -0
- llama_stack/apis/benchmarks/__init__.py +7 -0
- llama_stack/apis/benchmarks/benchmarks.py +108 -0
- llama_stack/apis/common/content_types.py +143 -0
- llama_stack/apis/common/errors.py +103 -0
- llama_stack/apis/common/job_types.py +38 -0
- llama_stack/apis/common/responses.py +36 -0
- llama_stack/apis/common/training_types.py +36 -5
- llama_stack/apis/common/type_system.py +158 -0
- llama_stack/apis/conversations/__init__.py +31 -0
- llama_stack/apis/conversations/conversations.py +286 -0
- llama_stack/apis/datasetio/__init__.py +7 -0
- llama_stack/apis/datasetio/datasetio.py +59 -0
- llama_stack/apis/datasets/__init__.py +7 -0
- llama_stack/apis/datasets/datasets.py +251 -0
- llama_stack/apis/datatypes.py +160 -0
- llama_stack/apis/eval/__init__.py +7 -0
- llama_stack/apis/eval/eval.py +169 -0
- llama_stack/apis/files/__init__.py +7 -0
- llama_stack/apis/files/files.py +199 -0
- llama_stack/apis/inference/__init__.py +1 -1
- llama_stack/apis/inference/inference.py +1169 -113
- llama_stack/apis/inspect/__init__.py +1 -1
- llama_stack/apis/inspect/inspect.py +69 -16
- llama_stack/apis/models/__init__.py +1 -1
- llama_stack/apis/models/models.py +148 -21
- llama_stack/apis/post_training/__init__.py +1 -1
- llama_stack/apis/post_training/post_training.py +265 -120
- llama_stack/{providers/adapters/agents/sample/config.py → apis/prompts/__init__.py} +2 -5
- llama_stack/apis/prompts/prompts.py +204 -0
- llama_stack/apis/providers/__init__.py +7 -0
- llama_stack/apis/providers/providers.py +69 -0
- llama_stack/apis/resource.py +37 -0
- llama_stack/apis/safety/__init__.py +1 -1
- llama_stack/apis/safety/safety.py +95 -12
- llama_stack/apis/scoring/__init__.py +7 -0
- llama_stack/apis/scoring/scoring.py +93 -0
- llama_stack/apis/scoring_functions/__init__.py +7 -0
- llama_stack/apis/scoring_functions/scoring_functions.py +208 -0
- llama_stack/apis/shields/__init__.py +1 -1
- llama_stack/apis/shields/shields.py +76 -33
- llama_stack/apis/synthetic_data_generation/__init__.py +1 -1
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +40 -17
- llama_stack/apis/telemetry/__init__.py +1 -1
- llama_stack/apis/telemetry/telemetry.py +322 -31
- llama_stack/apis/{dataset → tools}/__init__.py +2 -1
- llama_stack/apis/tools/rag_tool.py +218 -0
- llama_stack/apis/tools/tools.py +221 -0
- llama_stack/apis/vector_io/__init__.py +7 -0
- llama_stack/apis/vector_io/vector_io.py +960 -0
- llama_stack/apis/vector_stores/__init__.py +7 -0
- llama_stack/apis/vector_stores/vector_stores.py +51 -0
- llama_stack/apis/version.py +9 -0
- llama_stack/cli/llama.py +13 -5
- llama_stack/cli/stack/_list_deps.py +182 -0
- llama_stack/cli/stack/list_apis.py +1 -1
- llama_stack/cli/stack/list_deps.py +55 -0
- llama_stack/cli/stack/list_providers.py +24 -10
- llama_stack/cli/stack/list_stacks.py +56 -0
- llama_stack/cli/stack/remove.py +115 -0
- llama_stack/cli/stack/run.py +169 -56
- llama_stack/cli/stack/stack.py +18 -4
- llama_stack/cli/stack/utils.py +151 -0
- llama_stack/cli/table.py +23 -61
- llama_stack/cli/utils.py +29 -0
- llama_stack/core/access_control/access_control.py +131 -0
- llama_stack/core/access_control/conditions.py +129 -0
- llama_stack/core/access_control/datatypes.py +107 -0
- llama_stack/core/build.py +164 -0
- llama_stack/core/client.py +205 -0
- llama_stack/core/common.sh +37 -0
- llama_stack/{distribution → core}/configure.py +74 -55
- llama_stack/core/conversations/conversations.py +309 -0
- llama_stack/core/datatypes.py +625 -0
- llama_stack/core/distribution.py +276 -0
- llama_stack/core/external.py +54 -0
- llama_stack/core/id_generation.py +42 -0
- llama_stack/core/inspect.py +86 -0
- llama_stack/core/library_client.py +539 -0
- llama_stack/core/prompts/prompts.py +234 -0
- llama_stack/core/providers.py +137 -0
- llama_stack/core/request_headers.py +115 -0
- llama_stack/core/resolver.py +506 -0
- llama_stack/core/routers/__init__.py +101 -0
- llama_stack/core/routers/datasets.py +73 -0
- llama_stack/core/routers/eval_scoring.py +155 -0
- llama_stack/core/routers/inference.py +645 -0
- llama_stack/core/routers/safety.py +85 -0
- llama_stack/core/routers/tool_runtime.py +91 -0
- llama_stack/core/routers/vector_io.py +442 -0
- llama_stack/core/routing_tables/benchmarks.py +62 -0
- llama_stack/core/routing_tables/common.py +254 -0
- llama_stack/core/routing_tables/datasets.py +91 -0
- llama_stack/core/routing_tables/models.py +163 -0
- llama_stack/core/routing_tables/scoring_functions.py +66 -0
- llama_stack/core/routing_tables/shields.py +61 -0
- llama_stack/core/routing_tables/toolgroups.py +129 -0
- llama_stack/core/routing_tables/vector_stores.py +292 -0
- llama_stack/core/server/auth.py +187 -0
- llama_stack/core/server/auth_providers.py +494 -0
- llama_stack/core/server/quota.py +110 -0
- llama_stack/core/server/routes.py +141 -0
- llama_stack/core/server/server.py +542 -0
- llama_stack/core/server/tracing.py +80 -0
- llama_stack/core/stack.py +546 -0
- llama_stack/core/start_stack.sh +117 -0
- llama_stack/core/storage/datatypes.py +283 -0
- llama_stack/{cli/model → core/store}/__init__.py +1 -1
- llama_stack/core/store/registry.py +199 -0
- llama_stack/core/testing_context.py +49 -0
- llama_stack/core/ui/app.py +55 -0
- llama_stack/core/ui/modules/api.py +32 -0
- llama_stack/core/ui/modules/utils.py +42 -0
- llama_stack/core/ui/page/distribution/datasets.py +18 -0
- llama_stack/core/ui/page/distribution/eval_tasks.py +20 -0
- llama_stack/core/ui/page/distribution/models.py +18 -0
- llama_stack/core/ui/page/distribution/providers.py +27 -0
- llama_stack/core/ui/page/distribution/resources.py +48 -0
- llama_stack/core/ui/page/distribution/scoring_functions.py +18 -0
- llama_stack/core/ui/page/distribution/shields.py +19 -0
- llama_stack/core/ui/page/evaluations/app_eval.py +143 -0
- llama_stack/core/ui/page/evaluations/native_eval.py +253 -0
- llama_stack/core/ui/page/playground/chat.py +130 -0
- llama_stack/core/ui/page/playground/tools.py +352 -0
- llama_stack/core/utils/config.py +30 -0
- llama_stack/{distribution → core}/utils/config_dirs.py +3 -6
- llama_stack/core/utils/config_resolution.py +125 -0
- llama_stack/core/utils/context.py +84 -0
- llama_stack/core/utils/exec.py +96 -0
- llama_stack/{providers/impls/meta_reference/codeshield/config.py → core/utils/image_types.py} +4 -3
- llama_stack/{distribution → core}/utils/model_utils.py +2 -2
- llama_stack/{distribution → core}/utils/prompt_for_config.py +30 -63
- llama_stack/{apis/batch_inference → distributions/dell}/__init__.py +1 -1
- llama_stack/distributions/dell/build.yaml +33 -0
- llama_stack/distributions/dell/dell.py +158 -0
- llama_stack/distributions/dell/run-with-safety.yaml +141 -0
- llama_stack/distributions/dell/run.yaml +132 -0
- llama_stack/distributions/meta-reference-gpu/__init__.py +7 -0
- llama_stack/distributions/meta-reference-gpu/build.yaml +32 -0
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +163 -0
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +154 -0
- llama_stack/distributions/meta-reference-gpu/run.yaml +139 -0
- llama_stack/{apis/evals → distributions/nvidia}/__init__.py +1 -1
- llama_stack/distributions/nvidia/build.yaml +29 -0
- llama_stack/distributions/nvidia/nvidia.py +154 -0
- llama_stack/distributions/nvidia/run-with-safety.yaml +137 -0
- llama_stack/distributions/nvidia/run.yaml +116 -0
- llama_stack/distributions/open-benchmark/__init__.py +7 -0
- llama_stack/distributions/open-benchmark/build.yaml +36 -0
- llama_stack/distributions/open-benchmark/open_benchmark.py +303 -0
- llama_stack/distributions/open-benchmark/run.yaml +252 -0
- llama_stack/distributions/postgres-demo/__init__.py +7 -0
- llama_stack/distributions/postgres-demo/build.yaml +23 -0
- llama_stack/distributions/postgres-demo/postgres_demo.py +125 -0
- llama_stack/distributions/postgres-demo/run.yaml +115 -0
- llama_stack/{apis/memory → distributions/starter}/__init__.py +1 -1
- llama_stack/distributions/starter/build.yaml +61 -0
- llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
- llama_stack/distributions/starter/run.yaml +276 -0
- llama_stack/distributions/starter/starter.py +345 -0
- llama_stack/distributions/starter-gpu/__init__.py +7 -0
- llama_stack/distributions/starter-gpu/build.yaml +61 -0
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
- llama_stack/distributions/starter-gpu/run.yaml +279 -0
- llama_stack/distributions/starter-gpu/starter_gpu.py +20 -0
- llama_stack/distributions/template.py +456 -0
- llama_stack/distributions/watsonx/__init__.py +7 -0
- llama_stack/distributions/watsonx/build.yaml +33 -0
- llama_stack/distributions/watsonx/run.yaml +133 -0
- llama_stack/distributions/watsonx/watsonx.py +95 -0
- llama_stack/env.py +24 -0
- llama_stack/log.py +314 -0
- llama_stack/models/llama/checkpoint.py +164 -0
- llama_stack/models/llama/datatypes.py +164 -0
- llama_stack/models/llama/hadamard_utils.py +86 -0
- llama_stack/models/llama/llama3/args.py +74 -0
- llama_stack/models/llama/llama3/chat_format.py +286 -0
- llama_stack/models/llama/llama3/generation.py +376 -0
- llama_stack/models/llama/llama3/interface.py +255 -0
- llama_stack/models/llama/llama3/model.py +304 -0
- llama_stack/models/llama/llama3/multimodal/__init__.py +12 -0
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +180 -0
- llama_stack/models/llama/llama3/multimodal/image_transform.py +409 -0
- llama_stack/models/llama/llama3/multimodal/model.py +1430 -0
- llama_stack/models/llama/llama3/multimodal/utils.py +26 -0
- llama_stack/models/llama/llama3/prompt_templates/__init__.py +22 -0
- llama_stack/models/llama/llama3/prompt_templates/base.py +39 -0
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +319 -0
- llama_stack/models/llama/llama3/prompt_templates/tool_response.py +62 -0
- llama_stack/models/llama/llama3/quantization/loader.py +316 -0
- llama_stack/models/llama/llama3/template_data.py +116 -0
- llama_stack/models/llama/llama3/tokenizer.model +128000 -0
- llama_stack/models/llama/llama3/tokenizer.py +198 -0
- llama_stack/models/llama/llama3/tool_utils.py +266 -0
- llama_stack/models/llama/llama3_1/__init__.py +12 -0
- llama_stack/models/llama/llama3_1/prompt_format.md +358 -0
- llama_stack/models/llama/llama3_1/prompts.py +258 -0
- llama_stack/models/llama/llama3_2/prompts_text.py +229 -0
- llama_stack/models/llama/llama3_2/prompts_vision.py +126 -0
- llama_stack/models/llama/llama3_2/text_prompt_format.md +286 -0
- llama_stack/models/llama/llama3_2/vision_prompt_format.md +141 -0
- llama_stack/models/llama/llama3_3/prompts.py +259 -0
- llama_stack/models/llama/llama4/args.py +107 -0
- llama_stack/models/llama/llama4/chat_format.py +317 -0
- llama_stack/models/llama/llama4/datatypes.py +56 -0
- llama_stack/models/llama/llama4/ffn.py +58 -0
- llama_stack/models/llama/llama4/generation.py +313 -0
- llama_stack/models/llama/llama4/model.py +437 -0
- llama_stack/models/llama/llama4/moe.py +214 -0
- llama_stack/models/llama/llama4/preprocess.py +435 -0
- llama_stack/models/llama/llama4/prompt_format.md +304 -0
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +136 -0
- llama_stack/models/llama/llama4/prompts.py +279 -0
- llama_stack/models/llama/llama4/quantization/__init__.py +5 -0
- llama_stack/models/llama/llama4/quantization/loader.py +226 -0
- llama_stack/models/llama/llama4/tokenizer.model +200000 -0
- llama_stack/models/llama/llama4/tokenizer.py +263 -0
- llama_stack/models/llama/llama4/vision/__init__.py +5 -0
- llama_stack/models/llama/llama4/vision/embedding.py +210 -0
- llama_stack/models/llama/llama4/vision/encoder.py +412 -0
- llama_stack/models/llama/prompt_format.py +191 -0
- llama_stack/models/llama/quantize_impls.py +316 -0
- llama_stack/models/llama/sku_list.py +1029 -0
- llama_stack/models/llama/sku_types.py +233 -0
- llama_stack/models/llama/tokenizer_utils.py +40 -0
- llama_stack/providers/datatypes.py +136 -107
- llama_stack/providers/inline/__init__.py +5 -0
- llama_stack/providers/inline/agents/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/agents/meta_reference}/__init__.py +12 -5
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +1024 -0
- llama_stack/providers/inline/agents/meta_reference/agents.py +383 -0
- llama_stack/providers/inline/agents/meta_reference/config.py +37 -0
- llama_stack/providers/inline/agents/meta_reference/persistence.py +228 -0
- llama_stack/providers/inline/agents/meta_reference/responses/__init__.py +5 -0
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +423 -0
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +1226 -0
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +449 -0
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +194 -0
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +365 -0
- llama_stack/providers/inline/agents/meta_reference/safety.py +52 -0
- llama_stack/providers/inline/batches/__init__.py +5 -0
- llama_stack/providers/inline/batches/reference/__init__.py +36 -0
- llama_stack/providers/inline/batches/reference/batches.py +679 -0
- llama_stack/providers/inline/batches/reference/config.py +40 -0
- llama_stack/providers/inline/datasetio/__init__.py +5 -0
- llama_stack/providers/inline/datasetio/localfs/__init__.py +20 -0
- llama_stack/providers/inline/datasetio/localfs/config.py +23 -0
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +113 -0
- llama_stack/providers/inline/eval/__init__.py +5 -0
- llama_stack/providers/inline/eval/meta_reference/__init__.py +28 -0
- llama_stack/providers/inline/eval/meta_reference/config.py +23 -0
- llama_stack/providers/inline/eval/meta_reference/eval.py +259 -0
- llama_stack/providers/inline/files/localfs/__init__.py +20 -0
- llama_stack/providers/inline/files/localfs/config.py +31 -0
- llama_stack/providers/inline/files/localfs/files.py +219 -0
- llama_stack/providers/inline/inference/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/__init__.py +4 -4
- llama_stack/providers/inline/inference/meta_reference/common.py +24 -0
- llama_stack/providers/inline/inference/meta_reference/config.py +68 -0
- llama_stack/providers/inline/inference/meta_reference/generators.py +211 -0
- llama_stack/providers/inline/inference/meta_reference/inference.py +158 -0
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +96 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/parallel_utils.py +56 -73
- llama_stack/providers/inline/inference/sentence_transformers/__init__.py +22 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/inference/sentence_transformers}/config.py +6 -4
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +83 -0
- llama_stack/providers/inline/post_training/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/utils.py +35 -0
- llama_stack/providers/inline/post_training/common/validator.py +36 -0
- llama_stack/providers/inline/post_training/huggingface/__init__.py +27 -0
- llama_stack/providers/inline/post_training/huggingface/config.py +83 -0
- llama_stack/providers/inline/post_training/huggingface/post_training.py +208 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +519 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +485 -0
- llama_stack/providers/inline/post_training/huggingface/utils.py +269 -0
- llama_stack/providers/inline/post_training/torchtune/__init__.py +27 -0
- llama_stack/providers/inline/post_training/torchtune/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +240 -0
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +99 -0
- llama_stack/providers/inline/post_training/torchtune/config.py +20 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +57 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/sft.py +78 -0
- llama_stack/providers/inline/post_training/torchtune/post_training.py +178 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +588 -0
- llama_stack/providers/inline/safety/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/codeshield → inline/safety/code_scanner}/__init__.py +4 -2
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +128 -0
- llama_stack/providers/{impls/meta_reference/memory → inline/safety/code_scanner}/config.py +5 -3
- llama_stack/providers/inline/safety/llama_guard/__init__.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/config.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +489 -0
- llama_stack/providers/{adapters/memory/sample → inline/safety/prompt_guard}/__init__.py +4 -4
- llama_stack/providers/inline/safety/prompt_guard/config.py +32 -0
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +131 -0
- llama_stack/providers/inline/scoring/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/__init__.py +25 -0
- llama_stack/providers/{adapters/memory/weaviate → inline/scoring/basic}/config.py +5 -7
- llama_stack/providers/inline/scoring/basic/scoring.py +126 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +240 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +41 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +23 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +27 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +71 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +80 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +66 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +58 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +38 -0
- llama_stack/providers/inline/scoring/basic/utils/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py +3319 -0
- llama_stack/providers/inline/scoring/basic/utils/math_utils.py +330 -0
- llama_stack/providers/inline/scoring/braintrust/__init__.py +27 -0
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +230 -0
- llama_stack/providers/inline/scoring/braintrust/config.py +21 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +23 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +24 -0
- llama_stack/providers/inline/scoring/llm_as_judge/__init__.py +21 -0
- llama_stack/providers/inline/scoring/llm_as_judge/config.py +14 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +113 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +96 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +20 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +81 -0
- llama_stack/providers/inline/telemetry/__init__.py +5 -0
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +21 -0
- llama_stack/providers/inline/telemetry/meta_reference/config.py +47 -0
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +252 -0
- llama_stack/providers/inline/tool_runtime/__init__.py +5 -0
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +19 -0
- llama_stack/providers/{impls/meta_reference/telemetry → inline/tool_runtime/rag}/config.py +5 -3
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +77 -0
- llama_stack/providers/inline/tool_runtime/rag/memory.py +332 -0
- llama_stack/providers/inline/vector_io/__init__.py +5 -0
- llama_stack/providers/inline/vector_io/chroma/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/chroma/config.py +30 -0
- llama_stack/providers/inline/vector_io/faiss/__init__.py +21 -0
- llama_stack/providers/inline/vector_io/faiss/config.py +26 -0
- llama_stack/providers/inline/vector_io/faiss/faiss.py +293 -0
- llama_stack/providers/inline/vector_io/milvus/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/milvus/config.py +29 -0
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/qdrant/config.py +29 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/config.py +26 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +483 -0
- llama_stack/providers/registry/agents.py +16 -18
- llama_stack/providers/registry/batches.py +26 -0
- llama_stack/providers/registry/datasetio.py +49 -0
- llama_stack/providers/registry/eval.py +46 -0
- llama_stack/providers/registry/files.py +31 -0
- llama_stack/providers/registry/inference.py +273 -118
- llama_stack/providers/registry/post_training.py +69 -0
- llama_stack/providers/registry/safety.py +46 -41
- llama_stack/providers/registry/scoring.py +51 -0
- llama_stack/providers/registry/tool_runtime.py +87 -0
- llama_stack/providers/registry/vector_io.py +828 -0
- llama_stack/providers/remote/__init__.py +5 -0
- llama_stack/providers/remote/agents/__init__.py +5 -0
- llama_stack/providers/remote/datasetio/__init__.py +5 -0
- llama_stack/providers/{adapters/memory/chroma → remote/datasetio/huggingface}/__init__.py +7 -4
- llama_stack/providers/remote/datasetio/huggingface/config.py +23 -0
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +99 -0
- llama_stack/providers/remote/datasetio/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/datasetio/nvidia/config.py +61 -0
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +116 -0
- llama_stack/providers/remote/eval/__init__.py +5 -0
- llama_stack/providers/remote/eval/nvidia/__init__.py +31 -0
- llama_stack/providers/remote/eval/nvidia/config.py +29 -0
- llama_stack/providers/remote/eval/nvidia/eval.py +162 -0
- llama_stack/providers/remote/files/s3/__init__.py +19 -0
- llama_stack/providers/remote/files/s3/config.py +42 -0
- llama_stack/providers/remote/files/s3/files.py +313 -0
- llama_stack/providers/remote/inference/__init__.py +5 -0
- llama_stack/providers/{adapters/safety/sample → remote/inference/anthropic}/__init__.py +4 -6
- llama_stack/providers/remote/inference/anthropic/anthropic.py +36 -0
- llama_stack/providers/remote/inference/anthropic/config.py +28 -0
- llama_stack/providers/{impls/meta_reference/telemetry → remote/inference/azure}/__init__.py +4 -4
- llama_stack/providers/remote/inference/azure/azure.py +25 -0
- llama_stack/providers/remote/inference/azure/config.py +61 -0
- llama_stack/providers/{adapters → remote}/inference/bedrock/__init__.py +18 -17
- llama_stack/providers/remote/inference/bedrock/bedrock.py +142 -0
- llama_stack/providers/{adapters/inference/sample → remote/inference/bedrock}/config.py +3 -4
- llama_stack/providers/remote/inference/bedrock/models.py +29 -0
- llama_stack/providers/remote/inference/cerebras/__init__.py +19 -0
- llama_stack/providers/remote/inference/cerebras/cerebras.py +28 -0
- llama_stack/providers/remote/inference/cerebras/config.py +30 -0
- llama_stack/providers/{adapters → remote}/inference/databricks/__init__.py +4 -5
- llama_stack/providers/remote/inference/databricks/config.py +37 -0
- llama_stack/providers/remote/inference/databricks/databricks.py +44 -0
- llama_stack/providers/{adapters → remote}/inference/fireworks/__init__.py +8 -4
- llama_stack/providers/remote/inference/fireworks/config.py +27 -0
- llama_stack/providers/remote/inference/fireworks/fireworks.py +27 -0
- llama_stack/providers/{adapters/memory/pgvector → remote/inference/gemini}/__init__.py +4 -4
- llama_stack/providers/remote/inference/gemini/config.py +28 -0
- llama_stack/providers/remote/inference/gemini/gemini.py +82 -0
- llama_stack/providers/remote/inference/groq/__init__.py +15 -0
- llama_stack/providers/remote/inference/groq/config.py +34 -0
- llama_stack/providers/remote/inference/groq/groq.py +18 -0
- llama_stack/providers/remote/inference/llama_openai_compat/__init__.py +15 -0
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +34 -0
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +46 -0
- llama_stack/providers/remote/inference/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/inference/nvidia/config.py +64 -0
- llama_stack/providers/remote/inference/nvidia/nvidia.py +61 -0
- llama_stack/providers/{adapters/safety/sample/config.py → remote/inference/nvidia/utils.py} +3 -4
- llama_stack/providers/{impls/vllm → remote/inference/ollama}/__init__.py +4 -6
- llama_stack/providers/remote/inference/ollama/config.py +25 -0
- llama_stack/providers/remote/inference/ollama/ollama.py +102 -0
- llama_stack/providers/{adapters/telemetry/opentelemetry → remote/inference/openai}/__init__.py +4 -4
- llama_stack/providers/remote/inference/openai/config.py +39 -0
- llama_stack/providers/remote/inference/openai/openai.py +38 -0
- llama_stack/providers/remote/inference/passthrough/__init__.py +23 -0
- llama_stack/providers/remote/inference/passthrough/config.py +34 -0
- llama_stack/providers/remote/inference/passthrough/passthrough.py +122 -0
- llama_stack/providers/remote/inference/runpod/__init__.py +16 -0
- llama_stack/providers/remote/inference/runpod/config.py +32 -0
- llama_stack/providers/remote/inference/runpod/runpod.py +42 -0
- llama_stack/providers/remote/inference/sambanova/__init__.py +16 -0
- llama_stack/providers/remote/inference/sambanova/config.py +34 -0
- llama_stack/providers/remote/inference/sambanova/sambanova.py +28 -0
- llama_stack/providers/{adapters → remote}/inference/tgi/__init__.py +3 -4
- llama_stack/providers/remote/inference/tgi/config.py +76 -0
- llama_stack/providers/remote/inference/tgi/tgi.py +85 -0
- llama_stack/providers/{adapters → remote}/inference/together/__init__.py +8 -4
- llama_stack/providers/remote/inference/together/config.py +27 -0
- llama_stack/providers/remote/inference/together/together.py +102 -0
- llama_stack/providers/remote/inference/vertexai/__init__.py +15 -0
- llama_stack/providers/remote/inference/vertexai/config.py +48 -0
- llama_stack/providers/remote/inference/vertexai/vertexai.py +54 -0
- llama_stack/providers/remote/inference/vllm/__init__.py +22 -0
- llama_stack/providers/remote/inference/vllm/config.py +59 -0
- llama_stack/providers/remote/inference/vllm/vllm.py +111 -0
- llama_stack/providers/remote/inference/watsonx/__init__.py +15 -0
- llama_stack/providers/remote/inference/watsonx/config.py +45 -0
- llama_stack/providers/remote/inference/watsonx/watsonx.py +336 -0
- llama_stack/providers/remote/post_training/__init__.py +5 -0
- llama_stack/providers/remote/post_training/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/post_training/nvidia/config.py +113 -0
- llama_stack/providers/remote/post_training/nvidia/models.py +27 -0
- llama_stack/providers/remote/post_training/nvidia/post_training.py +430 -0
- llama_stack/providers/remote/post_training/nvidia/utils.py +63 -0
- llama_stack/providers/remote/safety/__init__.py +5 -0
- llama_stack/providers/remote/safety/bedrock/bedrock.py +111 -0
- llama_stack/providers/remote/safety/bedrock/config.py +14 -0
- llama_stack/providers/{adapters/inference/sample → remote/safety/nvidia}/__init__.py +5 -4
- llama_stack/providers/remote/safety/nvidia/config.py +40 -0
- llama_stack/providers/remote/safety/nvidia/nvidia.py +161 -0
- llama_stack/providers/{adapters/agents/sample → remote/safety/sambanova}/__init__.py +5 -4
- llama_stack/providers/remote/safety/sambanova/config.py +37 -0
- llama_stack/providers/remote/safety/sambanova/sambanova.py +98 -0
- llama_stack/providers/remote/tool_runtime/__init__.py +5 -0
- llama_stack/providers/remote/tool_runtime/bing_search/__init__.py +21 -0
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +112 -0
- llama_stack/providers/remote/tool_runtime/bing_search/config.py +22 -0
- llama_stack/providers/remote/tool_runtime/brave_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +148 -0
- llama_stack/providers/remote/tool_runtime/brave_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py +15 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +20 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +73 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +84 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py +22 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py +21 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +140 -0
- llama_stack/providers/remote/vector_io/__init__.py +5 -0
- llama_stack/providers/remote/vector_io/chroma/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/chroma/chroma.py +215 -0
- llama_stack/providers/remote/vector_io/chroma/config.py +28 -0
- llama_stack/providers/remote/vector_io/milvus/__init__.py +18 -0
- llama_stack/providers/remote/vector_io/milvus/config.py +35 -0
- llama_stack/providers/remote/vector_io/milvus/milvus.py +375 -0
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/pgvector/config.py +47 -0
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +460 -0
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/qdrant/config.py +37 -0
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +265 -0
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/weaviate/config.py +32 -0
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +393 -0
- llama_stack/providers/utils/bedrock/__init__.py +5 -0
- llama_stack/providers/utils/bedrock/client.py +74 -0
- llama_stack/providers/utils/bedrock/config.py +64 -0
- llama_stack/providers/utils/bedrock/refreshable_boto_session.py +112 -0
- llama_stack/providers/utils/common/__init__.py +5 -0
- llama_stack/providers/utils/common/data_schema_validator.py +103 -0
- llama_stack/providers/utils/datasetio/__init__.py +5 -0
- llama_stack/providers/utils/datasetio/url_utils.py +47 -0
- llama_stack/providers/utils/files/__init__.py +5 -0
- llama_stack/providers/utils/files/form_data.py +69 -0
- llama_stack/providers/utils/inference/__init__.py +8 -7
- llama_stack/providers/utils/inference/embedding_mixin.py +101 -0
- llama_stack/providers/utils/inference/inference_store.py +264 -0
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +336 -0
- llama_stack/providers/utils/inference/model_registry.py +173 -23
- llama_stack/providers/utils/inference/openai_compat.py +1261 -49
- llama_stack/providers/utils/inference/openai_mixin.py +506 -0
- llama_stack/providers/utils/inference/prompt_adapter.py +365 -67
- llama_stack/providers/utils/kvstore/api.py +6 -6
- llama_stack/providers/utils/kvstore/config.py +28 -48
- llama_stack/providers/utils/kvstore/kvstore.py +61 -15
- llama_stack/providers/utils/kvstore/mongodb/__init__.py +9 -0
- llama_stack/providers/utils/kvstore/mongodb/mongodb.py +82 -0
- llama_stack/providers/utils/kvstore/postgres/__init__.py +7 -0
- llama_stack/providers/utils/kvstore/postgres/postgres.py +114 -0
- llama_stack/providers/utils/kvstore/redis/redis.py +33 -9
- llama_stack/providers/utils/kvstore/sqlite/config.py +2 -1
- llama_stack/providers/utils/kvstore/sqlite/sqlite.py +123 -22
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +1304 -0
- llama_stack/providers/utils/memory/vector_store.py +220 -82
- llama_stack/providers/utils/pagination.py +43 -0
- llama_stack/providers/utils/responses/__init__.py +5 -0
- llama_stack/providers/utils/responses/responses_store.py +292 -0
- llama_stack/providers/utils/scheduler.py +270 -0
- llama_stack/providers/utils/scoring/__init__.py +5 -0
- llama_stack/providers/utils/scoring/aggregation_utils.py +75 -0
- llama_stack/providers/utils/scoring/base_scoring_fn.py +114 -0
- llama_stack/providers/utils/scoring/basic_scoring_utils.py +26 -0
- llama_stack/providers/utils/sqlstore/__init__.py +5 -0
- llama_stack/providers/utils/sqlstore/api.py +128 -0
- llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +319 -0
- llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +343 -0
- llama_stack/providers/utils/sqlstore/sqlstore.py +70 -0
- llama_stack/providers/utils/telemetry/trace_protocol.py +142 -0
- llama_stack/providers/utils/telemetry/tracing.py +192 -53
- llama_stack/providers/utils/tools/__init__.py +5 -0
- llama_stack/providers/utils/tools/mcp.py +148 -0
- llama_stack/providers/utils/tools/ttl_dict.py +70 -0
- llama_stack/providers/utils/vector_io/__init__.py +5 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +156 -0
- llama_stack/schema_utils.py +118 -0
- llama_stack/strong_typing/__init__.py +19 -0
- llama_stack/strong_typing/auxiliary.py +228 -0
- llama_stack/strong_typing/classdef.py +440 -0
- llama_stack/strong_typing/core.py +46 -0
- llama_stack/strong_typing/deserializer.py +877 -0
- llama_stack/strong_typing/docstring.py +409 -0
- llama_stack/strong_typing/exception.py +23 -0
- llama_stack/strong_typing/inspection.py +1085 -0
- llama_stack/strong_typing/mapping.py +40 -0
- llama_stack/strong_typing/name.py +182 -0
- llama_stack/strong_typing/py.typed +0 -0
- llama_stack/strong_typing/schema.py +792 -0
- llama_stack/strong_typing/serialization.py +97 -0
- llama_stack/strong_typing/serializer.py +500 -0
- llama_stack/strong_typing/slots.py +27 -0
- llama_stack/strong_typing/topological.py +89 -0
- llama_stack/testing/__init__.py +5 -0
- llama_stack/testing/api_recorder.py +956 -0
- llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
- llama_stack-0.3.4.dist-info/METADATA +261 -0
- llama_stack-0.3.4.dist-info/RECORD +625 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/WHEEL +1 -1
- llama_stack/apis/agents/client.py +0 -292
- llama_stack/apis/agents/event_logger.py +0 -184
- llama_stack/apis/batch_inference/batch_inference.py +0 -72
- llama_stack/apis/common/deployment_types.py +0 -31
- llama_stack/apis/dataset/dataset.py +0 -63
- llama_stack/apis/evals/evals.py +0 -122
- llama_stack/apis/inference/client.py +0 -197
- llama_stack/apis/inspect/client.py +0 -82
- llama_stack/apis/memory/client.py +0 -155
- llama_stack/apis/memory/memory.py +0 -65
- llama_stack/apis/memory_banks/__init__.py +0 -7
- llama_stack/apis/memory_banks/client.py +0 -101
- llama_stack/apis/memory_banks/memory_banks.py +0 -78
- llama_stack/apis/models/client.py +0 -83
- llama_stack/apis/reward_scoring/__init__.py +0 -7
- llama_stack/apis/reward_scoring/reward_scoring.py +0 -55
- llama_stack/apis/safety/client.py +0 -105
- llama_stack/apis/shields/client.py +0 -79
- llama_stack/cli/download.py +0 -340
- llama_stack/cli/model/describe.py +0 -82
- llama_stack/cli/model/download.py +0 -24
- llama_stack/cli/model/list.py +0 -62
- llama_stack/cli/model/model.py +0 -34
- llama_stack/cli/model/prompt_format.py +0 -112
- llama_stack/cli/model/safety_models.py +0 -52
- llama_stack/cli/stack/build.py +0 -299
- llama_stack/cli/stack/configure.py +0 -178
- llama_stack/distribution/build.py +0 -123
- llama_stack/distribution/build_conda_env.sh +0 -136
- llama_stack/distribution/build_container.sh +0 -142
- llama_stack/distribution/common.sh +0 -40
- llama_stack/distribution/configure_container.sh +0 -47
- llama_stack/distribution/datatypes.py +0 -139
- llama_stack/distribution/distribution.py +0 -58
- llama_stack/distribution/inspect.py +0 -67
- llama_stack/distribution/request_headers.py +0 -57
- llama_stack/distribution/resolver.py +0 -323
- llama_stack/distribution/routers/__init__.py +0 -48
- llama_stack/distribution/routers/routers.py +0 -158
- llama_stack/distribution/routers/routing_tables.py +0 -173
- llama_stack/distribution/server/endpoints.py +0 -48
- llama_stack/distribution/server/server.py +0 -343
- llama_stack/distribution/start_conda_env.sh +0 -42
- llama_stack/distribution/start_container.sh +0 -64
- llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml +0 -10
- llama_stack/distribution/templates/local-build.yaml +0 -10
- llama_stack/distribution/templates/local-databricks-build.yaml +0 -10
- llama_stack/distribution/templates/local-fireworks-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-endpoint-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-serverless-build.yaml +0 -10
- llama_stack/distribution/templates/local-ollama-build.yaml +0 -10
- llama_stack/distribution/templates/local-tgi-build.yaml +0 -10
- llama_stack/distribution/templates/local-together-build.yaml +0 -10
- llama_stack/distribution/templates/local-vllm-build.yaml +0 -10
- llama_stack/distribution/utils/exec.py +0 -105
- llama_stack/providers/adapters/agents/sample/sample.py +0 -18
- llama_stack/providers/adapters/inference/bedrock/bedrock.py +0 -451
- llama_stack/providers/adapters/inference/bedrock/config.py +0 -55
- llama_stack/providers/adapters/inference/databricks/config.py +0 -21
- llama_stack/providers/adapters/inference/databricks/databricks.py +0 -125
- llama_stack/providers/adapters/inference/fireworks/config.py +0 -20
- llama_stack/providers/adapters/inference/fireworks/fireworks.py +0 -130
- llama_stack/providers/adapters/inference/ollama/__init__.py +0 -19
- llama_stack/providers/adapters/inference/ollama/ollama.py +0 -175
- llama_stack/providers/adapters/inference/sample/sample.py +0 -23
- llama_stack/providers/adapters/inference/tgi/config.py +0 -43
- llama_stack/providers/adapters/inference/tgi/tgi.py +0 -200
- llama_stack/providers/adapters/inference/together/config.py +0 -22
- llama_stack/providers/adapters/inference/together/together.py +0 -143
- llama_stack/providers/adapters/memory/chroma/chroma.py +0 -157
- llama_stack/providers/adapters/memory/pgvector/config.py +0 -17
- llama_stack/providers/adapters/memory/pgvector/pgvector.py +0 -211
- llama_stack/providers/adapters/memory/sample/sample.py +0 -23
- llama_stack/providers/adapters/memory/weaviate/__init__.py +0 -15
- llama_stack/providers/adapters/memory/weaviate/weaviate.py +0 -190
- llama_stack/providers/adapters/safety/bedrock/bedrock.py +0 -113
- llama_stack/providers/adapters/safety/bedrock/config.py +0 -16
- llama_stack/providers/adapters/safety/sample/sample.py +0 -23
- llama_stack/providers/adapters/safety/together/__init__.py +0 -18
- llama_stack/providers/adapters/safety/together/config.py +0 -26
- llama_stack/providers/adapters/safety/together/together.py +0 -101
- llama_stack/providers/adapters/telemetry/opentelemetry/config.py +0 -12
- llama_stack/providers/adapters/telemetry/opentelemetry/opentelemetry.py +0 -201
- llama_stack/providers/adapters/telemetry/sample/__init__.py +0 -17
- llama_stack/providers/adapters/telemetry/sample/config.py +0 -12
- llama_stack/providers/adapters/telemetry/sample/sample.py +0 -18
- llama_stack/providers/impls/meta_reference/agents/agent_instance.py +0 -844
- llama_stack/providers/impls/meta_reference/agents/agents.py +0 -161
- llama_stack/providers/impls/meta_reference/agents/persistence.py +0 -84
- llama_stack/providers/impls/meta_reference/agents/rag/context_retriever.py +0 -74
- llama_stack/providers/impls/meta_reference/agents/safety.py +0 -57
- llama_stack/providers/impls/meta_reference/agents/tests/code_execution.py +0 -93
- llama_stack/providers/impls/meta_reference/agents/tests/test_chat_agent.py +0 -305
- llama_stack/providers/impls/meta_reference/agents/tools/base.py +0 -20
- llama_stack/providers/impls/meta_reference/agents/tools/builtin.py +0 -375
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_env_prefix.py +0 -133
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_execution.py +0 -256
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py +0 -87
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/utils.py +0 -21
- llama_stack/providers/impls/meta_reference/agents/tools/safety.py +0 -43
- llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py +0 -58
- llama_stack/providers/impls/meta_reference/inference/config.py +0 -45
- llama_stack/providers/impls/meta_reference/inference/generation.py +0 -376
- llama_stack/providers/impls/meta_reference/inference/inference.py +0 -280
- llama_stack/providers/impls/meta_reference/inference/model_parallel.py +0 -99
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_impls.py +0 -184
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_txest_disabled.py +0 -76
- llama_stack/providers/impls/meta_reference/inference/quantization/loader.py +0 -97
- llama_stack/providers/impls/meta_reference/inference/quantization/scripts/quantize_checkpoint.py +0 -161
- llama_stack/providers/impls/meta_reference/memory/__init__.py +0 -19
- llama_stack/providers/impls/meta_reference/memory/faiss.py +0 -113
- llama_stack/providers/impls/meta_reference/safety/__init__.py +0 -17
- llama_stack/providers/impls/meta_reference/safety/base.py +0 -57
- llama_stack/providers/impls/meta_reference/safety/config.py +0 -48
- llama_stack/providers/impls/meta_reference/safety/llama_guard.py +0 -268
- llama_stack/providers/impls/meta_reference/safety/prompt_guard.py +0 -145
- llama_stack/providers/impls/meta_reference/safety/safety.py +0 -112
- llama_stack/providers/impls/meta_reference/telemetry/console.py +0 -89
- llama_stack/providers/impls/vllm/config.py +0 -35
- llama_stack/providers/impls/vllm/vllm.py +0 -241
- llama_stack/providers/registry/memory.py +0 -78
- llama_stack/providers/registry/telemetry.py +0 -44
- llama_stack/providers/tests/agents/test_agents.py +0 -210
- llama_stack/providers/tests/inference/test_inference.py +0 -257
- llama_stack/providers/tests/inference/test_prompt_adapter.py +0 -126
- llama_stack/providers/tests/memory/test_memory.py +0 -136
- llama_stack/providers/tests/resolver.py +0 -100
- llama_stack/providers/tests/safety/test_safety.py +0 -77
- llama_stack-0.0.42.dist-info/METADATA +0 -137
- llama_stack-0.0.42.dist-info/RECORD +0 -256
- /llama_stack/{distribution → core}/__init__.py +0 -0
- /llama_stack/{distribution/server → core/access_control}/__init__.py +0 -0
- /llama_stack/{distribution/utils → core/conversations}/__init__.py +0 -0
- /llama_stack/{providers/adapters → core/prompts}/__init__.py +0 -0
- /llama_stack/{providers/adapters/agents → core/routing_tables}/__init__.py +0 -0
- /llama_stack/{providers/adapters/inference → core/server}/__init__.py +0 -0
- /llama_stack/{providers/adapters/memory → core/storage}/__init__.py +0 -0
- /llama_stack/{providers/adapters/safety → core/ui}/__init__.py +0 -0
- /llama_stack/{providers/adapters/telemetry → core/ui/modules}/__init__.py +0 -0
- /llama_stack/{providers/impls → core/ui/page}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference → core/ui/page/distribution}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/rag → core/ui/page/evaluations}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tests → core/ui/page/playground}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools → core/utils}/__init__.py +0 -0
- /llama_stack/{distribution → core}/utils/dynamic.py +0 -0
- /llama_stack/{distribution → core}/utils/serialize.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools/ipython_tool → distributions}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization → models}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization/scripts → models/llama}/__init__.py +0 -0
- /llama_stack/{providers/tests → models/llama/llama3}/__init__.py +0 -0
- /llama_stack/{providers/tests/agents → models/llama/llama3/quantization}/__init__.py +0 -0
- /llama_stack/{providers/tests/inference → models/llama/llama3_2}/__init__.py +0 -0
- /llama_stack/{providers/tests/memory → models/llama/llama3_3}/__init__.py +0 -0
- /llama_stack/{providers/tests/safety → models/llama/llama4}/__init__.py +0 -0
- /llama_stack/{scripts → models/llama/llama4/prompt_templates}/__init__.py +0 -0
- /llama_stack/providers/{adapters → remote}/safety/bedrock/__init__.py +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info/licenses}/LICENSE +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
import uuid
|
|
8
|
+
from datetime import UTC, datetime
|
|
9
|
+
from typing import Annotated, Any
|
|
10
|
+
|
|
11
|
+
import boto3
|
|
12
|
+
from botocore.exceptions import BotoCoreError, ClientError, NoCredentialsError
|
|
13
|
+
from fastapi import Depends, File, Form, Response, UploadFile
|
|
14
|
+
|
|
15
|
+
from llama_stack.apis.common.errors import ResourceNotFoundError
|
|
16
|
+
from llama_stack.apis.common.responses import Order
|
|
17
|
+
from llama_stack.apis.files import (
|
|
18
|
+
ExpiresAfter,
|
|
19
|
+
Files,
|
|
20
|
+
ListOpenAIFileResponse,
|
|
21
|
+
OpenAIFileDeleteResponse,
|
|
22
|
+
OpenAIFileObject,
|
|
23
|
+
OpenAIFilePurpose,
|
|
24
|
+
)
|
|
25
|
+
from llama_stack.core.datatypes import AccessRule
|
|
26
|
+
from llama_stack.core.id_generation import generate_object_id
|
|
27
|
+
from llama_stack.providers.utils.files.form_data import parse_expires_after
|
|
28
|
+
from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
|
|
29
|
+
from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
|
|
30
|
+
from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
|
|
31
|
+
|
|
32
|
+
from .config import S3FilesImplConfig
|
|
33
|
+
|
|
34
|
+
# TODO: provider data for S3 credentials
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _create_s3_client(config: S3FilesImplConfig) -> boto3.client:
|
|
38
|
+
try:
|
|
39
|
+
s3_config = {
|
|
40
|
+
"region_name": config.region,
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
# endpoint URL if specified (for MinIO, LocalStack, etc.)
|
|
44
|
+
if config.endpoint_url:
|
|
45
|
+
s3_config["endpoint_url"] = config.endpoint_url
|
|
46
|
+
|
|
47
|
+
if config.aws_access_key_id and config.aws_secret_access_key:
|
|
48
|
+
s3_config.update(
|
|
49
|
+
{
|
|
50
|
+
"aws_access_key_id": config.aws_access_key_id,
|
|
51
|
+
"aws_secret_access_key": config.aws_secret_access_key,
|
|
52
|
+
}
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
return boto3.client("s3", **s3_config)
|
|
56
|
+
|
|
57
|
+
except (BotoCoreError, NoCredentialsError) as e:
|
|
58
|
+
raise RuntimeError(f"Failed to initialize S3 client: {e}") from e
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
async def _create_bucket_if_not_exists(client: boto3.client, config: S3FilesImplConfig) -> None:
|
|
62
|
+
try:
|
|
63
|
+
client.head_bucket(Bucket=config.bucket_name)
|
|
64
|
+
except ClientError as e:
|
|
65
|
+
error_code = e.response["Error"]["Code"]
|
|
66
|
+
if error_code == "404":
|
|
67
|
+
if not config.auto_create_bucket:
|
|
68
|
+
raise RuntimeError(
|
|
69
|
+
f"S3 bucket '{config.bucket_name}' does not exist. "
|
|
70
|
+
f"Either create the bucket manually or set 'auto_create_bucket: true' in your configuration."
|
|
71
|
+
) from e
|
|
72
|
+
try:
|
|
73
|
+
# For us-east-1, we can't specify LocationConstraint
|
|
74
|
+
if config.region == "us-east-1":
|
|
75
|
+
client.create_bucket(Bucket=config.bucket_name)
|
|
76
|
+
else:
|
|
77
|
+
client.create_bucket(
|
|
78
|
+
Bucket=config.bucket_name,
|
|
79
|
+
CreateBucketConfiguration={"LocationConstraint": config.region},
|
|
80
|
+
)
|
|
81
|
+
except ClientError as create_error:
|
|
82
|
+
raise RuntimeError(
|
|
83
|
+
f"Failed to create S3 bucket '{config.bucket_name}': {create_error}"
|
|
84
|
+
) from create_error
|
|
85
|
+
elif error_code == "403":
|
|
86
|
+
raise RuntimeError(f"Access denied to S3 bucket '{config.bucket_name}'") from e
|
|
87
|
+
else:
|
|
88
|
+
raise RuntimeError(f"Failed to access S3 bucket '{config.bucket_name}': {e}") from e
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _make_file_object(
|
|
92
|
+
*,
|
|
93
|
+
id: str,
|
|
94
|
+
filename: str,
|
|
95
|
+
purpose: str,
|
|
96
|
+
bytes: int,
|
|
97
|
+
created_at: int,
|
|
98
|
+
expires_at: int,
|
|
99
|
+
**kwargs: Any, # here to ignore any additional fields, e.g. extra fields from AuthorizedSqlStore
|
|
100
|
+
) -> OpenAIFileObject:
|
|
101
|
+
"""
|
|
102
|
+
Construct an OpenAIFileObject and normalize expires_at.
|
|
103
|
+
|
|
104
|
+
If expires_at is greater than the max we treat it as no-expiration and
|
|
105
|
+
return None for expires_at.
|
|
106
|
+
|
|
107
|
+
The OpenAI spec says expires_at type is Integer, but the implementation
|
|
108
|
+
will return None for no expiration.
|
|
109
|
+
"""
|
|
110
|
+
obj = OpenAIFileObject(
|
|
111
|
+
id=id,
|
|
112
|
+
filename=filename,
|
|
113
|
+
purpose=OpenAIFilePurpose(purpose),
|
|
114
|
+
bytes=bytes,
|
|
115
|
+
created_at=created_at,
|
|
116
|
+
expires_at=expires_at,
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
if obj.expires_at is not None and obj.expires_at > (obj.created_at + ExpiresAfter.MAX):
|
|
120
|
+
obj.expires_at = None # type: ignore
|
|
121
|
+
|
|
122
|
+
return obj
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class S3FilesImpl(Files):
|
|
126
|
+
"""S3-based implementation of the Files API."""
|
|
127
|
+
|
|
128
|
+
def __init__(self, config: S3FilesImplConfig, policy: list[AccessRule]) -> None:
|
|
129
|
+
self._config = config
|
|
130
|
+
self.policy = policy
|
|
131
|
+
self._client: boto3.client | None = None
|
|
132
|
+
self._sql_store: AuthorizedSqlStore | None = None
|
|
133
|
+
|
|
134
|
+
def _now(self) -> int:
|
|
135
|
+
"""Return current UTC timestamp as int seconds."""
|
|
136
|
+
return int(datetime.now(UTC).timestamp())
|
|
137
|
+
|
|
138
|
+
async def _get_file(self, file_id: str, return_expired: bool = False) -> dict[str, Any]:
|
|
139
|
+
where: dict[str, str | dict] = {"id": file_id}
|
|
140
|
+
if not return_expired:
|
|
141
|
+
where["expires_at"] = {">": self._now()}
|
|
142
|
+
if not (row := await self.sql_store.fetch_one("openai_files", where=where)):
|
|
143
|
+
raise ResourceNotFoundError(file_id, "File", "files.list()")
|
|
144
|
+
return row
|
|
145
|
+
|
|
146
|
+
async def _delete_file(self, file_id: str) -> None:
|
|
147
|
+
"""Delete a file from S3 and the database."""
|
|
148
|
+
try:
|
|
149
|
+
self.client.delete_object(
|
|
150
|
+
Bucket=self._config.bucket_name,
|
|
151
|
+
Key=file_id,
|
|
152
|
+
)
|
|
153
|
+
except ClientError as e:
|
|
154
|
+
if e.response["Error"]["Code"] != "NoSuchKey":
|
|
155
|
+
raise RuntimeError(f"Failed to delete file from S3: {e}") from e
|
|
156
|
+
|
|
157
|
+
await self.sql_store.delete("openai_files", where={"id": file_id})
|
|
158
|
+
|
|
159
|
+
async def _delete_if_expired(self, file_id: str) -> None:
|
|
160
|
+
"""If the file exists and is expired, delete it."""
|
|
161
|
+
if row := await self._get_file(file_id, return_expired=True):
|
|
162
|
+
if (expires_at := row.get("expires_at")) and expires_at <= self._now():
|
|
163
|
+
await self._delete_file(file_id)
|
|
164
|
+
|
|
165
|
+
async def initialize(self) -> None:
|
|
166
|
+
self._client = _create_s3_client(self._config)
|
|
167
|
+
await _create_bucket_if_not_exists(self._client, self._config)
|
|
168
|
+
|
|
169
|
+
self._sql_store = AuthorizedSqlStore(sqlstore_impl(self._config.metadata_store), self.policy)
|
|
170
|
+
await self._sql_store.create_table(
|
|
171
|
+
"openai_files",
|
|
172
|
+
{
|
|
173
|
+
"id": ColumnDefinition(type=ColumnType.STRING, primary_key=True),
|
|
174
|
+
"filename": ColumnType.STRING,
|
|
175
|
+
"purpose": ColumnType.STRING,
|
|
176
|
+
"bytes": ColumnType.INTEGER,
|
|
177
|
+
"created_at": ColumnType.INTEGER,
|
|
178
|
+
"expires_at": ColumnType.INTEGER,
|
|
179
|
+
# TODO: add s3_etag field for integrity checking
|
|
180
|
+
},
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
async def shutdown(self) -> None:
|
|
184
|
+
pass
|
|
185
|
+
|
|
186
|
+
@property
|
|
187
|
+
def client(self) -> boto3.client:
|
|
188
|
+
assert self._client is not None, "Provider not initialized"
|
|
189
|
+
return self._client
|
|
190
|
+
|
|
191
|
+
@property
|
|
192
|
+
def sql_store(self) -> AuthorizedSqlStore:
|
|
193
|
+
assert self._sql_store is not None, "Provider not initialized"
|
|
194
|
+
return self._sql_store
|
|
195
|
+
|
|
196
|
+
async def openai_upload_file(
|
|
197
|
+
self,
|
|
198
|
+
file: Annotated[UploadFile, File()],
|
|
199
|
+
purpose: Annotated[OpenAIFilePurpose, Form()],
|
|
200
|
+
expires_after: Annotated[ExpiresAfter | None, Depends(parse_expires_after)] = None,
|
|
201
|
+
) -> OpenAIFileObject:
|
|
202
|
+
file_id = generate_object_id("file", lambda: f"file-{uuid.uuid4().hex}")
|
|
203
|
+
|
|
204
|
+
filename = getattr(file, "filename", None) or "uploaded_file"
|
|
205
|
+
|
|
206
|
+
created_at = self._now()
|
|
207
|
+
|
|
208
|
+
# the default is no expiration.
|
|
209
|
+
# to implement no expiration we set an expiration beyond the max.
|
|
210
|
+
# we'll hide this fact from users when returning the file object.
|
|
211
|
+
expires_at = created_at + ExpiresAfter.MAX * 42
|
|
212
|
+
# the default for BATCH files is 30 days, which happens to be the expiration max.
|
|
213
|
+
if purpose == OpenAIFilePurpose.BATCH:
|
|
214
|
+
expires_at = created_at + ExpiresAfter.MAX
|
|
215
|
+
|
|
216
|
+
if expires_after is not None:
|
|
217
|
+
expires_at = created_at + expires_after.seconds
|
|
218
|
+
|
|
219
|
+
content = await file.read()
|
|
220
|
+
file_size = len(content)
|
|
221
|
+
|
|
222
|
+
entry: dict[str, Any] = {
|
|
223
|
+
"id": file_id,
|
|
224
|
+
"filename": filename,
|
|
225
|
+
"purpose": purpose.value,
|
|
226
|
+
"bytes": file_size,
|
|
227
|
+
"created_at": created_at,
|
|
228
|
+
"expires_at": expires_at,
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
await self.sql_store.insert("openai_files", entry)
|
|
232
|
+
|
|
233
|
+
try:
|
|
234
|
+
self.client.put_object(
|
|
235
|
+
Bucket=self._config.bucket_name,
|
|
236
|
+
Key=file_id,
|
|
237
|
+
Body=content,
|
|
238
|
+
# TODO: enable server-side encryption
|
|
239
|
+
)
|
|
240
|
+
except ClientError as e:
|
|
241
|
+
await self.sql_store.delete("openai_files", where={"id": file_id})
|
|
242
|
+
|
|
243
|
+
raise RuntimeError(f"Failed to upload file to S3: {e}") from e
|
|
244
|
+
|
|
245
|
+
return _make_file_object(**entry)
|
|
246
|
+
|
|
247
|
+
async def openai_list_files(
|
|
248
|
+
self,
|
|
249
|
+
after: str | None = None,
|
|
250
|
+
limit: int | None = 10000,
|
|
251
|
+
order: Order | None = Order.desc,
|
|
252
|
+
purpose: OpenAIFilePurpose | None = None,
|
|
253
|
+
) -> ListOpenAIFileResponse:
|
|
254
|
+
# this purely defensive. it should not happen because the router also default to Order.desc.
|
|
255
|
+
if not order:
|
|
256
|
+
order = Order.desc
|
|
257
|
+
|
|
258
|
+
where_conditions: dict[str, Any] = {"expires_at": {">": self._now()}}
|
|
259
|
+
if purpose:
|
|
260
|
+
where_conditions["purpose"] = purpose.value
|
|
261
|
+
|
|
262
|
+
paginated_result = await self.sql_store.fetch_all(
|
|
263
|
+
table="openai_files",
|
|
264
|
+
where=where_conditions,
|
|
265
|
+
order_by=[("created_at", order.value)],
|
|
266
|
+
cursor=("id", after) if after else None,
|
|
267
|
+
limit=limit,
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
files = [_make_file_object(**row) for row in paginated_result.data]
|
|
271
|
+
|
|
272
|
+
return ListOpenAIFileResponse(
|
|
273
|
+
data=files,
|
|
274
|
+
has_more=paginated_result.has_more,
|
|
275
|
+
# empty string or None? spec says str, ref impl returns str | None, we go with spec
|
|
276
|
+
first_id=files[0].id if files else "",
|
|
277
|
+
last_id=files[-1].id if files else "",
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
async def openai_retrieve_file(self, file_id: str) -> OpenAIFileObject:
|
|
281
|
+
await self._delete_if_expired(file_id)
|
|
282
|
+
row = await self._get_file(file_id)
|
|
283
|
+
return _make_file_object(**row)
|
|
284
|
+
|
|
285
|
+
async def openai_delete_file(self, file_id: str) -> OpenAIFileDeleteResponse:
|
|
286
|
+
await self._delete_if_expired(file_id)
|
|
287
|
+
_ = await self._get_file(file_id) # raises if not found
|
|
288
|
+
await self._delete_file(file_id)
|
|
289
|
+
return OpenAIFileDeleteResponse(id=file_id, deleted=True)
|
|
290
|
+
|
|
291
|
+
async def openai_retrieve_file_content(self, file_id: str) -> Response:
|
|
292
|
+
await self._delete_if_expired(file_id)
|
|
293
|
+
|
|
294
|
+
row = await self._get_file(file_id)
|
|
295
|
+
|
|
296
|
+
try:
|
|
297
|
+
response = self.client.get_object(
|
|
298
|
+
Bucket=self._config.bucket_name,
|
|
299
|
+
Key=row["id"],
|
|
300
|
+
)
|
|
301
|
+
# TODO: can we stream this instead of loading it into memory
|
|
302
|
+
content = response["Body"].read()
|
|
303
|
+
except ClientError as e:
|
|
304
|
+
if e.response["Error"]["Code"] == "NoSuchKey":
|
|
305
|
+
await self._delete_file(file_id)
|
|
306
|
+
raise ResourceNotFoundError(file_id, "File", "files.list()") from e
|
|
307
|
+
raise RuntimeError(f"Failed to download file from S3: {e}") from e
|
|
308
|
+
|
|
309
|
+
return Response(
|
|
310
|
+
content=content,
|
|
311
|
+
media_type="application/octet-stream",
|
|
312
|
+
headers={"Content-Disposition": f'attachment; filename="{row["filename"]}"'},
|
|
313
|
+
)
|
|
@@ -4,14 +4,12 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
from
|
|
7
|
+
from .config import AnthropicConfig
|
|
8
8
|
|
|
9
|
-
from .config import SampleConfig
|
|
10
9
|
|
|
10
|
+
async def get_adapter_impl(config: AnthropicConfig, _deps):
|
|
11
|
+
from .anthropic import AnthropicInferenceAdapter
|
|
11
12
|
|
|
12
|
-
|
|
13
|
-
from .sample import SampleSafetyImpl
|
|
14
|
-
|
|
15
|
-
impl = SampleSafetyImpl(config)
|
|
13
|
+
impl = AnthropicInferenceAdapter(config=config)
|
|
16
14
|
await impl.initialize()
|
|
17
15
|
return impl
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from collections.abc import Iterable
|
|
8
|
+
|
|
9
|
+
from anthropic import AsyncAnthropic
|
|
10
|
+
|
|
11
|
+
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
|
12
|
+
|
|
13
|
+
from .config import AnthropicConfig
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class AnthropicInferenceAdapter(OpenAIMixin):
|
|
17
|
+
config: AnthropicConfig
|
|
18
|
+
|
|
19
|
+
provider_data_api_key_field: str = "anthropic_api_key"
|
|
20
|
+
# source: https://docs.claude.com/en/docs/build-with-claude/embeddings
|
|
21
|
+
# TODO: add support for voyageai, which is where these models are hosted
|
|
22
|
+
# embedding_model_metadata = {
|
|
23
|
+
# "voyage-3-large": {"embedding_dimension": 1024, "context_length": 32000}, # supports dimensions 256, 512, 1024, 2048
|
|
24
|
+
# "voyage-3.5": {"embedding_dimension": 1024, "context_length": 32000}, # supports dimensions 256, 512, 1024, 2048
|
|
25
|
+
# "voyage-3.5-lite": {"embedding_dimension": 1024, "context_length": 32000}, # supports dimensions 256, 512, 1024, 2048
|
|
26
|
+
# "voyage-code-3": {"embedding_dimension": 1024, "context_length": 32000}, # supports dimensions 256, 512, 1024, 2048
|
|
27
|
+
# "voyage-finance-2": {"embedding_dimension": 1024, "context_length": 32000},
|
|
28
|
+
# "voyage-law-2": {"embedding_dimension": 1024, "context_length": 16000},
|
|
29
|
+
# "voyage-multimodal-3": {"embedding_dimension": 1024, "context_length": 32000},
|
|
30
|
+
# }
|
|
31
|
+
|
|
32
|
+
def get_base_url(self):
|
|
33
|
+
return "https://api.anthropic.com/v1"
|
|
34
|
+
|
|
35
|
+
async def list_provider_model_ids(self) -> Iterable[str]:
|
|
36
|
+
return [m.id async for m in AsyncAnthropic(api_key=self.get_api_key()).models.list()]
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel, Field
|
|
10
|
+
|
|
11
|
+
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
|
12
|
+
from llama_stack.schema_utils import json_schema_type
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class AnthropicProviderDataValidator(BaseModel):
|
|
16
|
+
anthropic_api_key: str | None = Field(
|
|
17
|
+
default=None,
|
|
18
|
+
description="API key for Anthropic models",
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@json_schema_type
|
|
23
|
+
class AnthropicConfig(RemoteInferenceProviderConfig):
|
|
24
|
+
@classmethod
|
|
25
|
+
def sample_run_config(cls, api_key: str = "${env.ANTHROPIC_API_KEY:=}", **kwargs) -> dict[str, Any]:
|
|
26
|
+
return {
|
|
27
|
+
"api_key": api_key,
|
|
28
|
+
}
|
|
@@ -4,12 +4,12 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
from .config import
|
|
7
|
+
from .config import AzureConfig
|
|
8
8
|
|
|
9
9
|
|
|
10
|
-
async def
|
|
11
|
-
from .
|
|
10
|
+
async def get_adapter_impl(config: AzureConfig, _deps):
|
|
11
|
+
from .azure import AzureInferenceAdapter
|
|
12
12
|
|
|
13
|
-
impl =
|
|
13
|
+
impl = AzureInferenceAdapter(config=config)
|
|
14
14
|
await impl.initialize()
|
|
15
15
|
return impl
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from urllib.parse import urljoin
|
|
8
|
+
|
|
9
|
+
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
|
10
|
+
|
|
11
|
+
from .config import AzureConfig
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class AzureInferenceAdapter(OpenAIMixin):
|
|
15
|
+
config: AzureConfig
|
|
16
|
+
|
|
17
|
+
provider_data_api_key_field: str = "azure_api_key"
|
|
18
|
+
|
|
19
|
+
def get_base_url(self) -> str:
|
|
20
|
+
"""
|
|
21
|
+
Get the Azure API base URL.
|
|
22
|
+
|
|
23
|
+
Returns the Azure API base URL from the configuration.
|
|
24
|
+
"""
|
|
25
|
+
return urljoin(str(self.config.api_base), "/openai/v1")
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from pydantic import BaseModel, Field, HttpUrl, SecretStr
|
|
11
|
+
|
|
12
|
+
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
|
13
|
+
from llama_stack.schema_utils import json_schema_type
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class AzureProviderDataValidator(BaseModel):
|
|
17
|
+
azure_api_key: SecretStr = Field(
|
|
18
|
+
description="Azure API key for Azure",
|
|
19
|
+
)
|
|
20
|
+
azure_api_base: HttpUrl = Field(
|
|
21
|
+
description="Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com)",
|
|
22
|
+
)
|
|
23
|
+
azure_api_version: str | None = Field(
|
|
24
|
+
default=None,
|
|
25
|
+
description="Azure API version for Azure (e.g., 2024-06-01)",
|
|
26
|
+
)
|
|
27
|
+
azure_api_type: str | None = Field(
|
|
28
|
+
default="azure",
|
|
29
|
+
description="Azure API type for Azure (e.g., azure)",
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@json_schema_type
|
|
34
|
+
class AzureConfig(RemoteInferenceProviderConfig):
|
|
35
|
+
api_base: HttpUrl = Field(
|
|
36
|
+
description="Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com)",
|
|
37
|
+
)
|
|
38
|
+
api_version: str | None = Field(
|
|
39
|
+
default_factory=lambda: os.getenv("AZURE_API_VERSION"),
|
|
40
|
+
description="Azure API version for Azure (e.g., 2024-12-01-preview)",
|
|
41
|
+
)
|
|
42
|
+
api_type: str | None = Field(
|
|
43
|
+
default_factory=lambda: os.getenv("AZURE_API_TYPE", "azure"),
|
|
44
|
+
description="Azure API type for Azure (e.g., azure)",
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
@classmethod
|
|
48
|
+
def sample_run_config(
|
|
49
|
+
cls,
|
|
50
|
+
api_key: str = "${env.AZURE_API_KEY:=}",
|
|
51
|
+
api_base: str = "${env.AZURE_API_BASE:=}",
|
|
52
|
+
api_version: str = "${env.AZURE_API_VERSION:=}",
|
|
53
|
+
api_type: str = "${env.AZURE_API_TYPE:=}",
|
|
54
|
+
**kwargs,
|
|
55
|
+
) -> dict[str, Any]:
|
|
56
|
+
return {
|
|
57
|
+
"api_key": api_key,
|
|
58
|
+
"api_base": api_base,
|
|
59
|
+
"api_version": api_version,
|
|
60
|
+
"api_type": api_type,
|
|
61
|
+
}
|
|
@@ -1,17 +1,18 @@
|
|
|
1
|
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
-
# All rights reserved.
|
|
3
|
-
#
|
|
4
|
-
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
-
# the root directory of this source tree.
|
|
6
|
-
from .
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
from .config import BedrockConfig
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
async def get_adapter_impl(config: BedrockConfig, _deps):
|
|
10
|
+
from .bedrock import BedrockInferenceAdapter
|
|
11
|
+
|
|
12
|
+
assert isinstance(config, BedrockConfig), f"Unexpected config type: {type(config)}"
|
|
13
|
+
|
|
14
|
+
impl = BedrockInferenceAdapter(config)
|
|
15
|
+
|
|
16
|
+
await impl.initialize()
|
|
17
|
+
|
|
18
|
+
return impl
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
from collections.abc import AsyncIterator
|
|
9
|
+
|
|
10
|
+
from botocore.client import BaseClient
|
|
11
|
+
|
|
12
|
+
from llama_stack.apis.inference import (
|
|
13
|
+
ChatCompletionRequest,
|
|
14
|
+
Inference,
|
|
15
|
+
OpenAIChatCompletionRequestWithExtraBody,
|
|
16
|
+
OpenAICompletionRequestWithExtraBody,
|
|
17
|
+
OpenAIEmbeddingsRequestWithExtraBody,
|
|
18
|
+
OpenAIEmbeddingsResponse,
|
|
19
|
+
)
|
|
20
|
+
from llama_stack.apis.inference.inference import (
|
|
21
|
+
OpenAIChatCompletion,
|
|
22
|
+
OpenAIChatCompletionChunk,
|
|
23
|
+
OpenAICompletion,
|
|
24
|
+
)
|
|
25
|
+
from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
|
|
26
|
+
from llama_stack.providers.utils.bedrock.client import create_bedrock_client
|
|
27
|
+
from llama_stack.providers.utils.inference.model_registry import (
|
|
28
|
+
ModelRegistryHelper,
|
|
29
|
+
)
|
|
30
|
+
from llama_stack.providers.utils.inference.openai_compat import (
|
|
31
|
+
get_sampling_strategy_options,
|
|
32
|
+
)
|
|
33
|
+
from llama_stack.providers.utils.inference.prompt_adapter import (
|
|
34
|
+
chat_completion_request_to_prompt,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
from .models import MODEL_ENTRIES
|
|
38
|
+
|
|
39
|
+
REGION_PREFIX_MAP = {
|
|
40
|
+
"us": "us.",
|
|
41
|
+
"eu": "eu.",
|
|
42
|
+
"ap": "ap.",
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _get_region_prefix(region: str | None) -> str:
|
|
47
|
+
# AWS requires region prefixes for inference profiles
|
|
48
|
+
if region is None:
|
|
49
|
+
return "us." # default to US when we don't know
|
|
50
|
+
|
|
51
|
+
# Handle case insensitive region matching
|
|
52
|
+
region_lower = region.lower()
|
|
53
|
+
for prefix in REGION_PREFIX_MAP:
|
|
54
|
+
if region_lower.startswith(f"{prefix}-"):
|
|
55
|
+
return REGION_PREFIX_MAP[prefix]
|
|
56
|
+
|
|
57
|
+
# Fallback to US for anything we don't recognize
|
|
58
|
+
return "us."
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _to_inference_profile_id(model_id: str, region: str = None) -> str:
|
|
62
|
+
# Return ARNs unchanged
|
|
63
|
+
if model_id.startswith("arn:"):
|
|
64
|
+
return model_id
|
|
65
|
+
|
|
66
|
+
# Return inference profile IDs that already have regional prefixes
|
|
67
|
+
if any(model_id.startswith(p) for p in REGION_PREFIX_MAP.values()):
|
|
68
|
+
return model_id
|
|
69
|
+
|
|
70
|
+
# Default to US East when no region is provided
|
|
71
|
+
if region is None:
|
|
72
|
+
region = "us-east-1"
|
|
73
|
+
|
|
74
|
+
return _get_region_prefix(region) + model_id
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class BedrockInferenceAdapter(
|
|
78
|
+
ModelRegistryHelper,
|
|
79
|
+
Inference,
|
|
80
|
+
):
|
|
81
|
+
def __init__(self, config: BedrockConfig) -> None:
|
|
82
|
+
ModelRegistryHelper.__init__(self, model_entries=MODEL_ENTRIES)
|
|
83
|
+
self._config = config
|
|
84
|
+
self._client = None
|
|
85
|
+
|
|
86
|
+
@property
|
|
87
|
+
def client(self) -> BaseClient:
|
|
88
|
+
if self._client is None:
|
|
89
|
+
self._client = create_bedrock_client(self._config)
|
|
90
|
+
return self._client
|
|
91
|
+
|
|
92
|
+
async def initialize(self) -> None:
|
|
93
|
+
pass
|
|
94
|
+
|
|
95
|
+
async def shutdown(self) -> None:
|
|
96
|
+
if self._client is not None:
|
|
97
|
+
self._client.close()
|
|
98
|
+
|
|
99
|
+
async def _get_params_for_chat_completion(self, request: ChatCompletionRequest) -> dict:
|
|
100
|
+
bedrock_model = request.model
|
|
101
|
+
|
|
102
|
+
sampling_params = request.sampling_params
|
|
103
|
+
options = get_sampling_strategy_options(sampling_params)
|
|
104
|
+
|
|
105
|
+
if sampling_params.max_tokens:
|
|
106
|
+
options["max_gen_len"] = sampling_params.max_tokens
|
|
107
|
+
if sampling_params.repetition_penalty > 0:
|
|
108
|
+
options["repetition_penalty"] = sampling_params.repetition_penalty
|
|
109
|
+
|
|
110
|
+
prompt = await chat_completion_request_to_prompt(request, self.get_llama_model(request.model))
|
|
111
|
+
|
|
112
|
+
# Convert foundation model ID to inference profile ID
|
|
113
|
+
region_name = self.client.meta.region_name
|
|
114
|
+
inference_profile_id = _to_inference_profile_id(bedrock_model, region_name)
|
|
115
|
+
|
|
116
|
+
return {
|
|
117
|
+
"modelId": inference_profile_id,
|
|
118
|
+
"body": json.dumps(
|
|
119
|
+
{
|
|
120
|
+
"prompt": prompt,
|
|
121
|
+
**options,
|
|
122
|
+
}
|
|
123
|
+
),
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
async def openai_embeddings(
|
|
127
|
+
self,
|
|
128
|
+
params: OpenAIEmbeddingsRequestWithExtraBody,
|
|
129
|
+
) -> OpenAIEmbeddingsResponse:
|
|
130
|
+
raise NotImplementedError()
|
|
131
|
+
|
|
132
|
+
async def openai_completion(
|
|
133
|
+
self,
|
|
134
|
+
params: OpenAICompletionRequestWithExtraBody,
|
|
135
|
+
) -> OpenAICompletion:
|
|
136
|
+
raise NotImplementedError("OpenAI completion not supported by the Bedrock provider")
|
|
137
|
+
|
|
138
|
+
async def openai_chat_completion(
|
|
139
|
+
self,
|
|
140
|
+
params: OpenAIChatCompletionRequestWithExtraBody,
|
|
141
|
+
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
|
142
|
+
raise NotImplementedError("OpenAI chat completion not supported by the Bedrock provider")
|