llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +5 -0
- llama_stack/apis/agents/__init__.py +1 -1
- llama_stack/apis/agents/agents.py +700 -281
- llama_stack/apis/agents/openai_responses.py +1311 -0
- llama_stack/{providers/adapters/memory/sample/config.py → apis/batches/__init__.py} +2 -5
- llama_stack/apis/batches/batches.py +100 -0
- llama_stack/apis/benchmarks/__init__.py +7 -0
- llama_stack/apis/benchmarks/benchmarks.py +108 -0
- llama_stack/apis/common/content_types.py +143 -0
- llama_stack/apis/common/errors.py +103 -0
- llama_stack/apis/common/job_types.py +38 -0
- llama_stack/apis/common/responses.py +36 -0
- llama_stack/apis/common/training_types.py +36 -5
- llama_stack/apis/common/type_system.py +158 -0
- llama_stack/apis/conversations/__init__.py +31 -0
- llama_stack/apis/conversations/conversations.py +286 -0
- llama_stack/apis/datasetio/__init__.py +7 -0
- llama_stack/apis/datasetio/datasetio.py +59 -0
- llama_stack/apis/datasets/__init__.py +7 -0
- llama_stack/apis/datasets/datasets.py +251 -0
- llama_stack/apis/datatypes.py +160 -0
- llama_stack/apis/eval/__init__.py +7 -0
- llama_stack/apis/eval/eval.py +169 -0
- llama_stack/apis/files/__init__.py +7 -0
- llama_stack/apis/files/files.py +199 -0
- llama_stack/apis/inference/__init__.py +1 -1
- llama_stack/apis/inference/inference.py +1169 -113
- llama_stack/apis/inspect/__init__.py +1 -1
- llama_stack/apis/inspect/inspect.py +69 -16
- llama_stack/apis/models/__init__.py +1 -1
- llama_stack/apis/models/models.py +148 -21
- llama_stack/apis/post_training/__init__.py +1 -1
- llama_stack/apis/post_training/post_training.py +265 -120
- llama_stack/{providers/adapters/agents/sample/config.py → apis/prompts/__init__.py} +2 -5
- llama_stack/apis/prompts/prompts.py +204 -0
- llama_stack/apis/providers/__init__.py +7 -0
- llama_stack/apis/providers/providers.py +69 -0
- llama_stack/apis/resource.py +37 -0
- llama_stack/apis/safety/__init__.py +1 -1
- llama_stack/apis/safety/safety.py +95 -12
- llama_stack/apis/scoring/__init__.py +7 -0
- llama_stack/apis/scoring/scoring.py +93 -0
- llama_stack/apis/scoring_functions/__init__.py +7 -0
- llama_stack/apis/scoring_functions/scoring_functions.py +208 -0
- llama_stack/apis/shields/__init__.py +1 -1
- llama_stack/apis/shields/shields.py +76 -33
- llama_stack/apis/synthetic_data_generation/__init__.py +1 -1
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +40 -17
- llama_stack/apis/telemetry/__init__.py +1 -1
- llama_stack/apis/telemetry/telemetry.py +322 -31
- llama_stack/apis/{dataset → tools}/__init__.py +2 -1
- llama_stack/apis/tools/rag_tool.py +218 -0
- llama_stack/apis/tools/tools.py +221 -0
- llama_stack/apis/vector_io/__init__.py +7 -0
- llama_stack/apis/vector_io/vector_io.py +960 -0
- llama_stack/apis/vector_stores/__init__.py +7 -0
- llama_stack/apis/vector_stores/vector_stores.py +51 -0
- llama_stack/apis/version.py +9 -0
- llama_stack/cli/llama.py +13 -5
- llama_stack/cli/stack/_list_deps.py +182 -0
- llama_stack/cli/stack/list_apis.py +1 -1
- llama_stack/cli/stack/list_deps.py +55 -0
- llama_stack/cli/stack/list_providers.py +24 -10
- llama_stack/cli/stack/list_stacks.py +56 -0
- llama_stack/cli/stack/remove.py +115 -0
- llama_stack/cli/stack/run.py +169 -56
- llama_stack/cli/stack/stack.py +18 -4
- llama_stack/cli/stack/utils.py +151 -0
- llama_stack/cli/table.py +23 -61
- llama_stack/cli/utils.py +29 -0
- llama_stack/core/access_control/access_control.py +131 -0
- llama_stack/core/access_control/conditions.py +129 -0
- llama_stack/core/access_control/datatypes.py +107 -0
- llama_stack/core/build.py +164 -0
- llama_stack/core/client.py +205 -0
- llama_stack/core/common.sh +37 -0
- llama_stack/{distribution → core}/configure.py +74 -55
- llama_stack/core/conversations/conversations.py +309 -0
- llama_stack/core/datatypes.py +625 -0
- llama_stack/core/distribution.py +276 -0
- llama_stack/core/external.py +54 -0
- llama_stack/core/id_generation.py +42 -0
- llama_stack/core/inspect.py +86 -0
- llama_stack/core/library_client.py +539 -0
- llama_stack/core/prompts/prompts.py +234 -0
- llama_stack/core/providers.py +137 -0
- llama_stack/core/request_headers.py +115 -0
- llama_stack/core/resolver.py +506 -0
- llama_stack/core/routers/__init__.py +101 -0
- llama_stack/core/routers/datasets.py +73 -0
- llama_stack/core/routers/eval_scoring.py +155 -0
- llama_stack/core/routers/inference.py +645 -0
- llama_stack/core/routers/safety.py +85 -0
- llama_stack/core/routers/tool_runtime.py +91 -0
- llama_stack/core/routers/vector_io.py +442 -0
- llama_stack/core/routing_tables/benchmarks.py +62 -0
- llama_stack/core/routing_tables/common.py +254 -0
- llama_stack/core/routing_tables/datasets.py +91 -0
- llama_stack/core/routing_tables/models.py +163 -0
- llama_stack/core/routing_tables/scoring_functions.py +66 -0
- llama_stack/core/routing_tables/shields.py +61 -0
- llama_stack/core/routing_tables/toolgroups.py +129 -0
- llama_stack/core/routing_tables/vector_stores.py +292 -0
- llama_stack/core/server/auth.py +187 -0
- llama_stack/core/server/auth_providers.py +494 -0
- llama_stack/core/server/quota.py +110 -0
- llama_stack/core/server/routes.py +141 -0
- llama_stack/core/server/server.py +542 -0
- llama_stack/core/server/tracing.py +80 -0
- llama_stack/core/stack.py +546 -0
- llama_stack/core/start_stack.sh +117 -0
- llama_stack/core/storage/datatypes.py +283 -0
- llama_stack/{cli/model → core/store}/__init__.py +1 -1
- llama_stack/core/store/registry.py +199 -0
- llama_stack/core/testing_context.py +49 -0
- llama_stack/core/ui/app.py +55 -0
- llama_stack/core/ui/modules/api.py +32 -0
- llama_stack/core/ui/modules/utils.py +42 -0
- llama_stack/core/ui/page/distribution/datasets.py +18 -0
- llama_stack/core/ui/page/distribution/eval_tasks.py +20 -0
- llama_stack/core/ui/page/distribution/models.py +18 -0
- llama_stack/core/ui/page/distribution/providers.py +27 -0
- llama_stack/core/ui/page/distribution/resources.py +48 -0
- llama_stack/core/ui/page/distribution/scoring_functions.py +18 -0
- llama_stack/core/ui/page/distribution/shields.py +19 -0
- llama_stack/core/ui/page/evaluations/app_eval.py +143 -0
- llama_stack/core/ui/page/evaluations/native_eval.py +253 -0
- llama_stack/core/ui/page/playground/chat.py +130 -0
- llama_stack/core/ui/page/playground/tools.py +352 -0
- llama_stack/core/utils/config.py +30 -0
- llama_stack/{distribution → core}/utils/config_dirs.py +3 -6
- llama_stack/core/utils/config_resolution.py +125 -0
- llama_stack/core/utils/context.py +84 -0
- llama_stack/core/utils/exec.py +96 -0
- llama_stack/{providers/impls/meta_reference/codeshield/config.py → core/utils/image_types.py} +4 -3
- llama_stack/{distribution → core}/utils/model_utils.py +2 -2
- llama_stack/{distribution → core}/utils/prompt_for_config.py +30 -63
- llama_stack/{apis/batch_inference → distributions/dell}/__init__.py +1 -1
- llama_stack/distributions/dell/build.yaml +33 -0
- llama_stack/distributions/dell/dell.py +158 -0
- llama_stack/distributions/dell/run-with-safety.yaml +141 -0
- llama_stack/distributions/dell/run.yaml +132 -0
- llama_stack/distributions/meta-reference-gpu/__init__.py +7 -0
- llama_stack/distributions/meta-reference-gpu/build.yaml +32 -0
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +163 -0
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +154 -0
- llama_stack/distributions/meta-reference-gpu/run.yaml +139 -0
- llama_stack/{apis/evals → distributions/nvidia}/__init__.py +1 -1
- llama_stack/distributions/nvidia/build.yaml +29 -0
- llama_stack/distributions/nvidia/nvidia.py +154 -0
- llama_stack/distributions/nvidia/run-with-safety.yaml +137 -0
- llama_stack/distributions/nvidia/run.yaml +116 -0
- llama_stack/distributions/open-benchmark/__init__.py +7 -0
- llama_stack/distributions/open-benchmark/build.yaml +36 -0
- llama_stack/distributions/open-benchmark/open_benchmark.py +303 -0
- llama_stack/distributions/open-benchmark/run.yaml +252 -0
- llama_stack/distributions/postgres-demo/__init__.py +7 -0
- llama_stack/distributions/postgres-demo/build.yaml +23 -0
- llama_stack/distributions/postgres-demo/postgres_demo.py +125 -0
- llama_stack/distributions/postgres-demo/run.yaml +115 -0
- llama_stack/{apis/memory → distributions/starter}/__init__.py +1 -1
- llama_stack/distributions/starter/build.yaml +61 -0
- llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
- llama_stack/distributions/starter/run.yaml +276 -0
- llama_stack/distributions/starter/starter.py +345 -0
- llama_stack/distributions/starter-gpu/__init__.py +7 -0
- llama_stack/distributions/starter-gpu/build.yaml +61 -0
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
- llama_stack/distributions/starter-gpu/run.yaml +279 -0
- llama_stack/distributions/starter-gpu/starter_gpu.py +20 -0
- llama_stack/distributions/template.py +456 -0
- llama_stack/distributions/watsonx/__init__.py +7 -0
- llama_stack/distributions/watsonx/build.yaml +33 -0
- llama_stack/distributions/watsonx/run.yaml +133 -0
- llama_stack/distributions/watsonx/watsonx.py +95 -0
- llama_stack/env.py +24 -0
- llama_stack/log.py +314 -0
- llama_stack/models/llama/checkpoint.py +164 -0
- llama_stack/models/llama/datatypes.py +164 -0
- llama_stack/models/llama/hadamard_utils.py +86 -0
- llama_stack/models/llama/llama3/args.py +74 -0
- llama_stack/models/llama/llama3/chat_format.py +286 -0
- llama_stack/models/llama/llama3/generation.py +376 -0
- llama_stack/models/llama/llama3/interface.py +255 -0
- llama_stack/models/llama/llama3/model.py +304 -0
- llama_stack/models/llama/llama3/multimodal/__init__.py +12 -0
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +180 -0
- llama_stack/models/llama/llama3/multimodal/image_transform.py +409 -0
- llama_stack/models/llama/llama3/multimodal/model.py +1430 -0
- llama_stack/models/llama/llama3/multimodal/utils.py +26 -0
- llama_stack/models/llama/llama3/prompt_templates/__init__.py +22 -0
- llama_stack/models/llama/llama3/prompt_templates/base.py +39 -0
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +319 -0
- llama_stack/models/llama/llama3/prompt_templates/tool_response.py +62 -0
- llama_stack/models/llama/llama3/quantization/loader.py +316 -0
- llama_stack/models/llama/llama3/template_data.py +116 -0
- llama_stack/models/llama/llama3/tokenizer.model +128000 -0
- llama_stack/models/llama/llama3/tokenizer.py +198 -0
- llama_stack/models/llama/llama3/tool_utils.py +266 -0
- llama_stack/models/llama/llama3_1/__init__.py +12 -0
- llama_stack/models/llama/llama3_1/prompt_format.md +358 -0
- llama_stack/models/llama/llama3_1/prompts.py +258 -0
- llama_stack/models/llama/llama3_2/prompts_text.py +229 -0
- llama_stack/models/llama/llama3_2/prompts_vision.py +126 -0
- llama_stack/models/llama/llama3_2/text_prompt_format.md +286 -0
- llama_stack/models/llama/llama3_2/vision_prompt_format.md +141 -0
- llama_stack/models/llama/llama3_3/prompts.py +259 -0
- llama_stack/models/llama/llama4/args.py +107 -0
- llama_stack/models/llama/llama4/chat_format.py +317 -0
- llama_stack/models/llama/llama4/datatypes.py +56 -0
- llama_stack/models/llama/llama4/ffn.py +58 -0
- llama_stack/models/llama/llama4/generation.py +313 -0
- llama_stack/models/llama/llama4/model.py +437 -0
- llama_stack/models/llama/llama4/moe.py +214 -0
- llama_stack/models/llama/llama4/preprocess.py +435 -0
- llama_stack/models/llama/llama4/prompt_format.md +304 -0
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +136 -0
- llama_stack/models/llama/llama4/prompts.py +279 -0
- llama_stack/models/llama/llama4/quantization/__init__.py +5 -0
- llama_stack/models/llama/llama4/quantization/loader.py +226 -0
- llama_stack/models/llama/llama4/tokenizer.model +200000 -0
- llama_stack/models/llama/llama4/tokenizer.py +263 -0
- llama_stack/models/llama/llama4/vision/__init__.py +5 -0
- llama_stack/models/llama/llama4/vision/embedding.py +210 -0
- llama_stack/models/llama/llama4/vision/encoder.py +412 -0
- llama_stack/models/llama/prompt_format.py +191 -0
- llama_stack/models/llama/quantize_impls.py +316 -0
- llama_stack/models/llama/sku_list.py +1029 -0
- llama_stack/models/llama/sku_types.py +233 -0
- llama_stack/models/llama/tokenizer_utils.py +40 -0
- llama_stack/providers/datatypes.py +136 -107
- llama_stack/providers/inline/__init__.py +5 -0
- llama_stack/providers/inline/agents/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/agents/meta_reference}/__init__.py +12 -5
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +1024 -0
- llama_stack/providers/inline/agents/meta_reference/agents.py +383 -0
- llama_stack/providers/inline/agents/meta_reference/config.py +37 -0
- llama_stack/providers/inline/agents/meta_reference/persistence.py +228 -0
- llama_stack/providers/inline/agents/meta_reference/responses/__init__.py +5 -0
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +423 -0
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +1226 -0
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +449 -0
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +194 -0
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +365 -0
- llama_stack/providers/inline/agents/meta_reference/safety.py +52 -0
- llama_stack/providers/inline/batches/__init__.py +5 -0
- llama_stack/providers/inline/batches/reference/__init__.py +36 -0
- llama_stack/providers/inline/batches/reference/batches.py +679 -0
- llama_stack/providers/inline/batches/reference/config.py +40 -0
- llama_stack/providers/inline/datasetio/__init__.py +5 -0
- llama_stack/providers/inline/datasetio/localfs/__init__.py +20 -0
- llama_stack/providers/inline/datasetio/localfs/config.py +23 -0
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +113 -0
- llama_stack/providers/inline/eval/__init__.py +5 -0
- llama_stack/providers/inline/eval/meta_reference/__init__.py +28 -0
- llama_stack/providers/inline/eval/meta_reference/config.py +23 -0
- llama_stack/providers/inline/eval/meta_reference/eval.py +259 -0
- llama_stack/providers/inline/files/localfs/__init__.py +20 -0
- llama_stack/providers/inline/files/localfs/config.py +31 -0
- llama_stack/providers/inline/files/localfs/files.py +219 -0
- llama_stack/providers/inline/inference/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/__init__.py +4 -4
- llama_stack/providers/inline/inference/meta_reference/common.py +24 -0
- llama_stack/providers/inline/inference/meta_reference/config.py +68 -0
- llama_stack/providers/inline/inference/meta_reference/generators.py +211 -0
- llama_stack/providers/inline/inference/meta_reference/inference.py +158 -0
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +96 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/parallel_utils.py +56 -73
- llama_stack/providers/inline/inference/sentence_transformers/__init__.py +22 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/inference/sentence_transformers}/config.py +6 -4
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +83 -0
- llama_stack/providers/inline/post_training/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/utils.py +35 -0
- llama_stack/providers/inline/post_training/common/validator.py +36 -0
- llama_stack/providers/inline/post_training/huggingface/__init__.py +27 -0
- llama_stack/providers/inline/post_training/huggingface/config.py +83 -0
- llama_stack/providers/inline/post_training/huggingface/post_training.py +208 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +519 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +485 -0
- llama_stack/providers/inline/post_training/huggingface/utils.py +269 -0
- llama_stack/providers/inline/post_training/torchtune/__init__.py +27 -0
- llama_stack/providers/inline/post_training/torchtune/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +240 -0
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +99 -0
- llama_stack/providers/inline/post_training/torchtune/config.py +20 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +57 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/sft.py +78 -0
- llama_stack/providers/inline/post_training/torchtune/post_training.py +178 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +588 -0
- llama_stack/providers/inline/safety/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/codeshield → inline/safety/code_scanner}/__init__.py +4 -2
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +128 -0
- llama_stack/providers/{impls/meta_reference/memory → inline/safety/code_scanner}/config.py +5 -3
- llama_stack/providers/inline/safety/llama_guard/__init__.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/config.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +489 -0
- llama_stack/providers/{adapters/memory/sample → inline/safety/prompt_guard}/__init__.py +4 -4
- llama_stack/providers/inline/safety/prompt_guard/config.py +32 -0
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +131 -0
- llama_stack/providers/inline/scoring/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/__init__.py +25 -0
- llama_stack/providers/{adapters/memory/weaviate → inline/scoring/basic}/config.py +5 -7
- llama_stack/providers/inline/scoring/basic/scoring.py +126 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +240 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +41 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +23 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +27 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +71 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +80 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +66 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +58 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +38 -0
- llama_stack/providers/inline/scoring/basic/utils/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py +3319 -0
- llama_stack/providers/inline/scoring/basic/utils/math_utils.py +330 -0
- llama_stack/providers/inline/scoring/braintrust/__init__.py +27 -0
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +230 -0
- llama_stack/providers/inline/scoring/braintrust/config.py +21 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +23 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +24 -0
- llama_stack/providers/inline/scoring/llm_as_judge/__init__.py +21 -0
- llama_stack/providers/inline/scoring/llm_as_judge/config.py +14 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +113 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +96 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +20 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +81 -0
- llama_stack/providers/inline/telemetry/__init__.py +5 -0
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +21 -0
- llama_stack/providers/inline/telemetry/meta_reference/config.py +47 -0
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +252 -0
- llama_stack/providers/inline/tool_runtime/__init__.py +5 -0
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +19 -0
- llama_stack/providers/{impls/meta_reference/telemetry → inline/tool_runtime/rag}/config.py +5 -3
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +77 -0
- llama_stack/providers/inline/tool_runtime/rag/memory.py +332 -0
- llama_stack/providers/inline/vector_io/__init__.py +5 -0
- llama_stack/providers/inline/vector_io/chroma/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/chroma/config.py +30 -0
- llama_stack/providers/inline/vector_io/faiss/__init__.py +21 -0
- llama_stack/providers/inline/vector_io/faiss/config.py +26 -0
- llama_stack/providers/inline/vector_io/faiss/faiss.py +293 -0
- llama_stack/providers/inline/vector_io/milvus/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/milvus/config.py +29 -0
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/qdrant/config.py +29 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/config.py +26 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +483 -0
- llama_stack/providers/registry/agents.py +16 -18
- llama_stack/providers/registry/batches.py +26 -0
- llama_stack/providers/registry/datasetio.py +49 -0
- llama_stack/providers/registry/eval.py +46 -0
- llama_stack/providers/registry/files.py +31 -0
- llama_stack/providers/registry/inference.py +273 -118
- llama_stack/providers/registry/post_training.py +69 -0
- llama_stack/providers/registry/safety.py +46 -41
- llama_stack/providers/registry/scoring.py +51 -0
- llama_stack/providers/registry/tool_runtime.py +87 -0
- llama_stack/providers/registry/vector_io.py +828 -0
- llama_stack/providers/remote/__init__.py +5 -0
- llama_stack/providers/remote/agents/__init__.py +5 -0
- llama_stack/providers/remote/datasetio/__init__.py +5 -0
- llama_stack/providers/{adapters/memory/chroma → remote/datasetio/huggingface}/__init__.py +7 -4
- llama_stack/providers/remote/datasetio/huggingface/config.py +23 -0
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +99 -0
- llama_stack/providers/remote/datasetio/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/datasetio/nvidia/config.py +61 -0
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +116 -0
- llama_stack/providers/remote/eval/__init__.py +5 -0
- llama_stack/providers/remote/eval/nvidia/__init__.py +31 -0
- llama_stack/providers/remote/eval/nvidia/config.py +29 -0
- llama_stack/providers/remote/eval/nvidia/eval.py +162 -0
- llama_stack/providers/remote/files/s3/__init__.py +19 -0
- llama_stack/providers/remote/files/s3/config.py +42 -0
- llama_stack/providers/remote/files/s3/files.py +313 -0
- llama_stack/providers/remote/inference/__init__.py +5 -0
- llama_stack/providers/{adapters/safety/sample → remote/inference/anthropic}/__init__.py +4 -6
- llama_stack/providers/remote/inference/anthropic/anthropic.py +36 -0
- llama_stack/providers/remote/inference/anthropic/config.py +28 -0
- llama_stack/providers/{impls/meta_reference/telemetry → remote/inference/azure}/__init__.py +4 -4
- llama_stack/providers/remote/inference/azure/azure.py +25 -0
- llama_stack/providers/remote/inference/azure/config.py +61 -0
- llama_stack/providers/{adapters → remote}/inference/bedrock/__init__.py +18 -17
- llama_stack/providers/remote/inference/bedrock/bedrock.py +142 -0
- llama_stack/providers/{adapters/inference/sample → remote/inference/bedrock}/config.py +3 -4
- llama_stack/providers/remote/inference/bedrock/models.py +29 -0
- llama_stack/providers/remote/inference/cerebras/__init__.py +19 -0
- llama_stack/providers/remote/inference/cerebras/cerebras.py +28 -0
- llama_stack/providers/remote/inference/cerebras/config.py +30 -0
- llama_stack/providers/{adapters → remote}/inference/databricks/__init__.py +4 -5
- llama_stack/providers/remote/inference/databricks/config.py +37 -0
- llama_stack/providers/remote/inference/databricks/databricks.py +44 -0
- llama_stack/providers/{adapters → remote}/inference/fireworks/__init__.py +8 -4
- llama_stack/providers/remote/inference/fireworks/config.py +27 -0
- llama_stack/providers/remote/inference/fireworks/fireworks.py +27 -0
- llama_stack/providers/{adapters/memory/pgvector → remote/inference/gemini}/__init__.py +4 -4
- llama_stack/providers/remote/inference/gemini/config.py +28 -0
- llama_stack/providers/remote/inference/gemini/gemini.py +82 -0
- llama_stack/providers/remote/inference/groq/__init__.py +15 -0
- llama_stack/providers/remote/inference/groq/config.py +34 -0
- llama_stack/providers/remote/inference/groq/groq.py +18 -0
- llama_stack/providers/remote/inference/llama_openai_compat/__init__.py +15 -0
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +34 -0
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +46 -0
- llama_stack/providers/remote/inference/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/inference/nvidia/config.py +64 -0
- llama_stack/providers/remote/inference/nvidia/nvidia.py +61 -0
- llama_stack/providers/{adapters/safety/sample/config.py → remote/inference/nvidia/utils.py} +3 -4
- llama_stack/providers/{impls/vllm → remote/inference/ollama}/__init__.py +4 -6
- llama_stack/providers/remote/inference/ollama/config.py +25 -0
- llama_stack/providers/remote/inference/ollama/ollama.py +102 -0
- llama_stack/providers/{adapters/telemetry/opentelemetry → remote/inference/openai}/__init__.py +4 -4
- llama_stack/providers/remote/inference/openai/config.py +39 -0
- llama_stack/providers/remote/inference/openai/openai.py +38 -0
- llama_stack/providers/remote/inference/passthrough/__init__.py +23 -0
- llama_stack/providers/remote/inference/passthrough/config.py +34 -0
- llama_stack/providers/remote/inference/passthrough/passthrough.py +122 -0
- llama_stack/providers/remote/inference/runpod/__init__.py +16 -0
- llama_stack/providers/remote/inference/runpod/config.py +32 -0
- llama_stack/providers/remote/inference/runpod/runpod.py +42 -0
- llama_stack/providers/remote/inference/sambanova/__init__.py +16 -0
- llama_stack/providers/remote/inference/sambanova/config.py +34 -0
- llama_stack/providers/remote/inference/sambanova/sambanova.py +28 -0
- llama_stack/providers/{adapters → remote}/inference/tgi/__init__.py +3 -4
- llama_stack/providers/remote/inference/tgi/config.py +76 -0
- llama_stack/providers/remote/inference/tgi/tgi.py +85 -0
- llama_stack/providers/{adapters → remote}/inference/together/__init__.py +8 -4
- llama_stack/providers/remote/inference/together/config.py +27 -0
- llama_stack/providers/remote/inference/together/together.py +102 -0
- llama_stack/providers/remote/inference/vertexai/__init__.py +15 -0
- llama_stack/providers/remote/inference/vertexai/config.py +48 -0
- llama_stack/providers/remote/inference/vertexai/vertexai.py +54 -0
- llama_stack/providers/remote/inference/vllm/__init__.py +22 -0
- llama_stack/providers/remote/inference/vllm/config.py +59 -0
- llama_stack/providers/remote/inference/vllm/vllm.py +111 -0
- llama_stack/providers/remote/inference/watsonx/__init__.py +15 -0
- llama_stack/providers/remote/inference/watsonx/config.py +45 -0
- llama_stack/providers/remote/inference/watsonx/watsonx.py +336 -0
- llama_stack/providers/remote/post_training/__init__.py +5 -0
- llama_stack/providers/remote/post_training/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/post_training/nvidia/config.py +113 -0
- llama_stack/providers/remote/post_training/nvidia/models.py +27 -0
- llama_stack/providers/remote/post_training/nvidia/post_training.py +430 -0
- llama_stack/providers/remote/post_training/nvidia/utils.py +63 -0
- llama_stack/providers/remote/safety/__init__.py +5 -0
- llama_stack/providers/remote/safety/bedrock/bedrock.py +111 -0
- llama_stack/providers/remote/safety/bedrock/config.py +14 -0
- llama_stack/providers/{adapters/inference/sample → remote/safety/nvidia}/__init__.py +5 -4
- llama_stack/providers/remote/safety/nvidia/config.py +40 -0
- llama_stack/providers/remote/safety/nvidia/nvidia.py +161 -0
- llama_stack/providers/{adapters/agents/sample → remote/safety/sambanova}/__init__.py +5 -4
- llama_stack/providers/remote/safety/sambanova/config.py +37 -0
- llama_stack/providers/remote/safety/sambanova/sambanova.py +98 -0
- llama_stack/providers/remote/tool_runtime/__init__.py +5 -0
- llama_stack/providers/remote/tool_runtime/bing_search/__init__.py +21 -0
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +112 -0
- llama_stack/providers/remote/tool_runtime/bing_search/config.py +22 -0
- llama_stack/providers/remote/tool_runtime/brave_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +148 -0
- llama_stack/providers/remote/tool_runtime/brave_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py +15 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +20 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +73 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +84 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py +22 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py +21 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +140 -0
- llama_stack/providers/remote/vector_io/__init__.py +5 -0
- llama_stack/providers/remote/vector_io/chroma/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/chroma/chroma.py +215 -0
- llama_stack/providers/remote/vector_io/chroma/config.py +28 -0
- llama_stack/providers/remote/vector_io/milvus/__init__.py +18 -0
- llama_stack/providers/remote/vector_io/milvus/config.py +35 -0
- llama_stack/providers/remote/vector_io/milvus/milvus.py +375 -0
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/pgvector/config.py +47 -0
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +460 -0
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/qdrant/config.py +37 -0
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +265 -0
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/weaviate/config.py +32 -0
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +393 -0
- llama_stack/providers/utils/bedrock/__init__.py +5 -0
- llama_stack/providers/utils/bedrock/client.py +74 -0
- llama_stack/providers/utils/bedrock/config.py +64 -0
- llama_stack/providers/utils/bedrock/refreshable_boto_session.py +112 -0
- llama_stack/providers/utils/common/__init__.py +5 -0
- llama_stack/providers/utils/common/data_schema_validator.py +103 -0
- llama_stack/providers/utils/datasetio/__init__.py +5 -0
- llama_stack/providers/utils/datasetio/url_utils.py +47 -0
- llama_stack/providers/utils/files/__init__.py +5 -0
- llama_stack/providers/utils/files/form_data.py +69 -0
- llama_stack/providers/utils/inference/__init__.py +8 -7
- llama_stack/providers/utils/inference/embedding_mixin.py +101 -0
- llama_stack/providers/utils/inference/inference_store.py +264 -0
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +336 -0
- llama_stack/providers/utils/inference/model_registry.py +173 -23
- llama_stack/providers/utils/inference/openai_compat.py +1261 -49
- llama_stack/providers/utils/inference/openai_mixin.py +506 -0
- llama_stack/providers/utils/inference/prompt_adapter.py +365 -67
- llama_stack/providers/utils/kvstore/api.py +6 -6
- llama_stack/providers/utils/kvstore/config.py +28 -48
- llama_stack/providers/utils/kvstore/kvstore.py +61 -15
- llama_stack/providers/utils/kvstore/mongodb/__init__.py +9 -0
- llama_stack/providers/utils/kvstore/mongodb/mongodb.py +82 -0
- llama_stack/providers/utils/kvstore/postgres/__init__.py +7 -0
- llama_stack/providers/utils/kvstore/postgres/postgres.py +114 -0
- llama_stack/providers/utils/kvstore/redis/redis.py +33 -9
- llama_stack/providers/utils/kvstore/sqlite/config.py +2 -1
- llama_stack/providers/utils/kvstore/sqlite/sqlite.py +123 -22
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +1304 -0
- llama_stack/providers/utils/memory/vector_store.py +220 -82
- llama_stack/providers/utils/pagination.py +43 -0
- llama_stack/providers/utils/responses/__init__.py +5 -0
- llama_stack/providers/utils/responses/responses_store.py +292 -0
- llama_stack/providers/utils/scheduler.py +270 -0
- llama_stack/providers/utils/scoring/__init__.py +5 -0
- llama_stack/providers/utils/scoring/aggregation_utils.py +75 -0
- llama_stack/providers/utils/scoring/base_scoring_fn.py +114 -0
- llama_stack/providers/utils/scoring/basic_scoring_utils.py +26 -0
- llama_stack/providers/utils/sqlstore/__init__.py +5 -0
- llama_stack/providers/utils/sqlstore/api.py +128 -0
- llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +319 -0
- llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +343 -0
- llama_stack/providers/utils/sqlstore/sqlstore.py +70 -0
- llama_stack/providers/utils/telemetry/trace_protocol.py +142 -0
- llama_stack/providers/utils/telemetry/tracing.py +192 -53
- llama_stack/providers/utils/tools/__init__.py +5 -0
- llama_stack/providers/utils/tools/mcp.py +148 -0
- llama_stack/providers/utils/tools/ttl_dict.py +70 -0
- llama_stack/providers/utils/vector_io/__init__.py +5 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +156 -0
- llama_stack/schema_utils.py +118 -0
- llama_stack/strong_typing/__init__.py +19 -0
- llama_stack/strong_typing/auxiliary.py +228 -0
- llama_stack/strong_typing/classdef.py +440 -0
- llama_stack/strong_typing/core.py +46 -0
- llama_stack/strong_typing/deserializer.py +877 -0
- llama_stack/strong_typing/docstring.py +409 -0
- llama_stack/strong_typing/exception.py +23 -0
- llama_stack/strong_typing/inspection.py +1085 -0
- llama_stack/strong_typing/mapping.py +40 -0
- llama_stack/strong_typing/name.py +182 -0
- llama_stack/strong_typing/py.typed +0 -0
- llama_stack/strong_typing/schema.py +792 -0
- llama_stack/strong_typing/serialization.py +97 -0
- llama_stack/strong_typing/serializer.py +500 -0
- llama_stack/strong_typing/slots.py +27 -0
- llama_stack/strong_typing/topological.py +89 -0
- llama_stack/testing/__init__.py +5 -0
- llama_stack/testing/api_recorder.py +956 -0
- llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
- llama_stack-0.3.4.dist-info/METADATA +261 -0
- llama_stack-0.3.4.dist-info/RECORD +625 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/WHEEL +1 -1
- llama_stack/apis/agents/client.py +0 -292
- llama_stack/apis/agents/event_logger.py +0 -184
- llama_stack/apis/batch_inference/batch_inference.py +0 -72
- llama_stack/apis/common/deployment_types.py +0 -31
- llama_stack/apis/dataset/dataset.py +0 -63
- llama_stack/apis/evals/evals.py +0 -122
- llama_stack/apis/inference/client.py +0 -197
- llama_stack/apis/inspect/client.py +0 -82
- llama_stack/apis/memory/client.py +0 -155
- llama_stack/apis/memory/memory.py +0 -65
- llama_stack/apis/memory_banks/__init__.py +0 -7
- llama_stack/apis/memory_banks/client.py +0 -101
- llama_stack/apis/memory_banks/memory_banks.py +0 -78
- llama_stack/apis/models/client.py +0 -83
- llama_stack/apis/reward_scoring/__init__.py +0 -7
- llama_stack/apis/reward_scoring/reward_scoring.py +0 -55
- llama_stack/apis/safety/client.py +0 -105
- llama_stack/apis/shields/client.py +0 -79
- llama_stack/cli/download.py +0 -340
- llama_stack/cli/model/describe.py +0 -82
- llama_stack/cli/model/download.py +0 -24
- llama_stack/cli/model/list.py +0 -62
- llama_stack/cli/model/model.py +0 -34
- llama_stack/cli/model/prompt_format.py +0 -112
- llama_stack/cli/model/safety_models.py +0 -52
- llama_stack/cli/stack/build.py +0 -299
- llama_stack/cli/stack/configure.py +0 -178
- llama_stack/distribution/build.py +0 -123
- llama_stack/distribution/build_conda_env.sh +0 -136
- llama_stack/distribution/build_container.sh +0 -142
- llama_stack/distribution/common.sh +0 -40
- llama_stack/distribution/configure_container.sh +0 -47
- llama_stack/distribution/datatypes.py +0 -139
- llama_stack/distribution/distribution.py +0 -58
- llama_stack/distribution/inspect.py +0 -67
- llama_stack/distribution/request_headers.py +0 -57
- llama_stack/distribution/resolver.py +0 -323
- llama_stack/distribution/routers/__init__.py +0 -48
- llama_stack/distribution/routers/routers.py +0 -158
- llama_stack/distribution/routers/routing_tables.py +0 -173
- llama_stack/distribution/server/endpoints.py +0 -48
- llama_stack/distribution/server/server.py +0 -343
- llama_stack/distribution/start_conda_env.sh +0 -42
- llama_stack/distribution/start_container.sh +0 -64
- llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml +0 -10
- llama_stack/distribution/templates/local-build.yaml +0 -10
- llama_stack/distribution/templates/local-databricks-build.yaml +0 -10
- llama_stack/distribution/templates/local-fireworks-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-endpoint-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-serverless-build.yaml +0 -10
- llama_stack/distribution/templates/local-ollama-build.yaml +0 -10
- llama_stack/distribution/templates/local-tgi-build.yaml +0 -10
- llama_stack/distribution/templates/local-together-build.yaml +0 -10
- llama_stack/distribution/templates/local-vllm-build.yaml +0 -10
- llama_stack/distribution/utils/exec.py +0 -105
- llama_stack/providers/adapters/agents/sample/sample.py +0 -18
- llama_stack/providers/adapters/inference/bedrock/bedrock.py +0 -451
- llama_stack/providers/adapters/inference/bedrock/config.py +0 -55
- llama_stack/providers/adapters/inference/databricks/config.py +0 -21
- llama_stack/providers/adapters/inference/databricks/databricks.py +0 -125
- llama_stack/providers/adapters/inference/fireworks/config.py +0 -20
- llama_stack/providers/adapters/inference/fireworks/fireworks.py +0 -130
- llama_stack/providers/adapters/inference/ollama/__init__.py +0 -19
- llama_stack/providers/adapters/inference/ollama/ollama.py +0 -175
- llama_stack/providers/adapters/inference/sample/sample.py +0 -23
- llama_stack/providers/adapters/inference/tgi/config.py +0 -43
- llama_stack/providers/adapters/inference/tgi/tgi.py +0 -200
- llama_stack/providers/adapters/inference/together/config.py +0 -22
- llama_stack/providers/adapters/inference/together/together.py +0 -143
- llama_stack/providers/adapters/memory/chroma/chroma.py +0 -157
- llama_stack/providers/adapters/memory/pgvector/config.py +0 -17
- llama_stack/providers/adapters/memory/pgvector/pgvector.py +0 -211
- llama_stack/providers/adapters/memory/sample/sample.py +0 -23
- llama_stack/providers/adapters/memory/weaviate/__init__.py +0 -15
- llama_stack/providers/adapters/memory/weaviate/weaviate.py +0 -190
- llama_stack/providers/adapters/safety/bedrock/bedrock.py +0 -113
- llama_stack/providers/adapters/safety/bedrock/config.py +0 -16
- llama_stack/providers/adapters/safety/sample/sample.py +0 -23
- llama_stack/providers/adapters/safety/together/__init__.py +0 -18
- llama_stack/providers/adapters/safety/together/config.py +0 -26
- llama_stack/providers/adapters/safety/together/together.py +0 -101
- llama_stack/providers/adapters/telemetry/opentelemetry/config.py +0 -12
- llama_stack/providers/adapters/telemetry/opentelemetry/opentelemetry.py +0 -201
- llama_stack/providers/adapters/telemetry/sample/__init__.py +0 -17
- llama_stack/providers/adapters/telemetry/sample/config.py +0 -12
- llama_stack/providers/adapters/telemetry/sample/sample.py +0 -18
- llama_stack/providers/impls/meta_reference/agents/agent_instance.py +0 -844
- llama_stack/providers/impls/meta_reference/agents/agents.py +0 -161
- llama_stack/providers/impls/meta_reference/agents/persistence.py +0 -84
- llama_stack/providers/impls/meta_reference/agents/rag/context_retriever.py +0 -74
- llama_stack/providers/impls/meta_reference/agents/safety.py +0 -57
- llama_stack/providers/impls/meta_reference/agents/tests/code_execution.py +0 -93
- llama_stack/providers/impls/meta_reference/agents/tests/test_chat_agent.py +0 -305
- llama_stack/providers/impls/meta_reference/agents/tools/base.py +0 -20
- llama_stack/providers/impls/meta_reference/agents/tools/builtin.py +0 -375
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_env_prefix.py +0 -133
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_execution.py +0 -256
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py +0 -87
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/utils.py +0 -21
- llama_stack/providers/impls/meta_reference/agents/tools/safety.py +0 -43
- llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py +0 -58
- llama_stack/providers/impls/meta_reference/inference/config.py +0 -45
- llama_stack/providers/impls/meta_reference/inference/generation.py +0 -376
- llama_stack/providers/impls/meta_reference/inference/inference.py +0 -280
- llama_stack/providers/impls/meta_reference/inference/model_parallel.py +0 -99
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_impls.py +0 -184
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_txest_disabled.py +0 -76
- llama_stack/providers/impls/meta_reference/inference/quantization/loader.py +0 -97
- llama_stack/providers/impls/meta_reference/inference/quantization/scripts/quantize_checkpoint.py +0 -161
- llama_stack/providers/impls/meta_reference/memory/__init__.py +0 -19
- llama_stack/providers/impls/meta_reference/memory/faiss.py +0 -113
- llama_stack/providers/impls/meta_reference/safety/__init__.py +0 -17
- llama_stack/providers/impls/meta_reference/safety/base.py +0 -57
- llama_stack/providers/impls/meta_reference/safety/config.py +0 -48
- llama_stack/providers/impls/meta_reference/safety/llama_guard.py +0 -268
- llama_stack/providers/impls/meta_reference/safety/prompt_guard.py +0 -145
- llama_stack/providers/impls/meta_reference/safety/safety.py +0 -112
- llama_stack/providers/impls/meta_reference/telemetry/console.py +0 -89
- llama_stack/providers/impls/vllm/config.py +0 -35
- llama_stack/providers/impls/vllm/vllm.py +0 -241
- llama_stack/providers/registry/memory.py +0 -78
- llama_stack/providers/registry/telemetry.py +0 -44
- llama_stack/providers/tests/agents/test_agents.py +0 -210
- llama_stack/providers/tests/inference/test_inference.py +0 -257
- llama_stack/providers/tests/inference/test_prompt_adapter.py +0 -126
- llama_stack/providers/tests/memory/test_memory.py +0 -136
- llama_stack/providers/tests/resolver.py +0 -100
- llama_stack/providers/tests/safety/test_safety.py +0 -77
- llama_stack-0.0.42.dist-info/METADATA +0 -137
- llama_stack-0.0.42.dist-info/RECORD +0 -256
- /llama_stack/{distribution → core}/__init__.py +0 -0
- /llama_stack/{distribution/server → core/access_control}/__init__.py +0 -0
- /llama_stack/{distribution/utils → core/conversations}/__init__.py +0 -0
- /llama_stack/{providers/adapters → core/prompts}/__init__.py +0 -0
- /llama_stack/{providers/adapters/agents → core/routing_tables}/__init__.py +0 -0
- /llama_stack/{providers/adapters/inference → core/server}/__init__.py +0 -0
- /llama_stack/{providers/adapters/memory → core/storage}/__init__.py +0 -0
- /llama_stack/{providers/adapters/safety → core/ui}/__init__.py +0 -0
- /llama_stack/{providers/adapters/telemetry → core/ui/modules}/__init__.py +0 -0
- /llama_stack/{providers/impls → core/ui/page}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference → core/ui/page/distribution}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/rag → core/ui/page/evaluations}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tests → core/ui/page/playground}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools → core/utils}/__init__.py +0 -0
- /llama_stack/{distribution → core}/utils/dynamic.py +0 -0
- /llama_stack/{distribution → core}/utils/serialize.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools/ipython_tool → distributions}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization → models}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization/scripts → models/llama}/__init__.py +0 -0
- /llama_stack/{providers/tests → models/llama/llama3}/__init__.py +0 -0
- /llama_stack/{providers/tests/agents → models/llama/llama3/quantization}/__init__.py +0 -0
- /llama_stack/{providers/tests/inference → models/llama/llama3_2}/__init__.py +0 -0
- /llama_stack/{providers/tests/memory → models/llama/llama3_3}/__init__.py +0 -0
- /llama_stack/{providers/tests/safety → models/llama/llama4}/__init__.py +0 -0
- /llama_stack/{scripts → models/llama/llama4/prompt_templates}/__init__.py +0 -0
- /llama_stack/providers/{adapters → remote}/safety/bedrock/__init__.py +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info/licenses}/LICENSE +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,393 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
import json
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
import weaviate
|
|
10
|
+
import weaviate.classes as wvc
|
|
11
|
+
from numpy.typing import NDArray
|
|
12
|
+
from weaviate.classes.init import Auth
|
|
13
|
+
from weaviate.classes.query import Filter, HybridFusion
|
|
14
|
+
|
|
15
|
+
from llama_stack.apis.common.content_types import InterleavedContent
|
|
16
|
+
from llama_stack.apis.common.errors import VectorStoreNotFoundError
|
|
17
|
+
from llama_stack.apis.files import Files
|
|
18
|
+
from llama_stack.apis.inference import Inference
|
|
19
|
+
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
|
|
20
|
+
from llama_stack.apis.vector_stores import VectorStore
|
|
21
|
+
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
22
|
+
from llama_stack.log import get_logger
|
|
23
|
+
from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
|
|
24
|
+
from llama_stack.providers.utils.kvstore import kvstore_impl
|
|
25
|
+
from llama_stack.providers.utils.kvstore.api import KVStore
|
|
26
|
+
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
|
27
|
+
from llama_stack.providers.utils.memory.vector_store import (
|
|
28
|
+
RERANKER_TYPE_RRF,
|
|
29
|
+
ChunkForDeletion,
|
|
30
|
+
EmbeddingIndex,
|
|
31
|
+
VectorStoreWithIndex,
|
|
32
|
+
)
|
|
33
|
+
from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name
|
|
34
|
+
|
|
35
|
+
from .config import WeaviateVectorIOConfig
|
|
36
|
+
|
|
37
|
+
log = get_logger(name=__name__, category="vector_io::weaviate")
|
|
38
|
+
|
|
39
|
+
VERSION = "v3"
|
|
40
|
+
VECTOR_DBS_PREFIX = f"vector_stores:weaviate:{VERSION}::"
|
|
41
|
+
VECTOR_INDEX_PREFIX = f"vector_index:weaviate:{VERSION}::"
|
|
42
|
+
OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:weaviate:{VERSION}::"
|
|
43
|
+
OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:weaviate:{VERSION}::"
|
|
44
|
+
OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX = f"openai_vector_stores_files_contents:weaviate:{VERSION}::"
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class WeaviateIndex(EmbeddingIndex):
|
|
48
|
+
def __init__(self, client: weaviate.WeaviateClient, collection_name: str, kvstore: KVStore | None = None):
|
|
49
|
+
self.client = client
|
|
50
|
+
self.collection_name = sanitize_collection_name(collection_name, weaviate_format=True)
|
|
51
|
+
self.kvstore = kvstore
|
|
52
|
+
|
|
53
|
+
async def initialize(self):
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
|
|
57
|
+
assert len(chunks) == len(embeddings), (
|
|
58
|
+
f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
data_objects = []
|
|
62
|
+
for chunk, embedding in zip(chunks, embeddings, strict=False):
|
|
63
|
+
data_objects.append(
|
|
64
|
+
wvc.data.DataObject(
|
|
65
|
+
properties={
|
|
66
|
+
"chunk_id": chunk.chunk_id,
|
|
67
|
+
"chunk_content": chunk.model_dump_json(),
|
|
68
|
+
},
|
|
69
|
+
vector=embedding.tolist(),
|
|
70
|
+
)
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# Inserting chunks into a prespecified Weaviate collection
|
|
74
|
+
collection = self.client.collections.get(self.collection_name)
|
|
75
|
+
|
|
76
|
+
# TODO: make this async friendly
|
|
77
|
+
collection.data.insert_many(data_objects)
|
|
78
|
+
|
|
79
|
+
async def delete_chunks(self, chunks_for_deletion: list[ChunkForDeletion]) -> None:
|
|
80
|
+
sanitized_collection_name = sanitize_collection_name(self.collection_name, weaviate_format=True)
|
|
81
|
+
collection = self.client.collections.get(sanitized_collection_name)
|
|
82
|
+
chunk_ids = [chunk.chunk_id for chunk in chunks_for_deletion]
|
|
83
|
+
collection.data.delete_many(where=Filter.by_property("chunk_id").contains_any(chunk_ids))
|
|
84
|
+
|
|
85
|
+
async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
|
|
86
|
+
"""
|
|
87
|
+
Performs vector search using Weaviate's built-in vector search.
|
|
88
|
+
Args:
|
|
89
|
+
embedding: The query embedding vector
|
|
90
|
+
k: Limit of number of results to return
|
|
91
|
+
score_threshold: Minimum similarity score threshold
|
|
92
|
+
Returns:
|
|
93
|
+
QueryChunksResponse with chunks and scores.
|
|
94
|
+
"""
|
|
95
|
+
log.debug(
|
|
96
|
+
f"WEAVIATE VECTOR SEARCH CALLED: embedding_shape={embedding.shape}, k={k}, threshold={score_threshold}"
|
|
97
|
+
)
|
|
98
|
+
sanitized_collection_name = sanitize_collection_name(self.collection_name, weaviate_format=True)
|
|
99
|
+
collection = self.client.collections.get(sanitized_collection_name)
|
|
100
|
+
|
|
101
|
+
try:
|
|
102
|
+
results = collection.query.near_vector(
|
|
103
|
+
near_vector=embedding.tolist(), limit=k, return_metadata=wvc.query.MetadataQuery(distance=True)
|
|
104
|
+
)
|
|
105
|
+
except Exception as e:
|
|
106
|
+
log.error(f"Weaviate client vector search failed: {e}")
|
|
107
|
+
raise
|
|
108
|
+
|
|
109
|
+
chunks = []
|
|
110
|
+
scores = []
|
|
111
|
+
for doc in results.objects:
|
|
112
|
+
chunk_json = doc.properties["chunk_content"]
|
|
113
|
+
try:
|
|
114
|
+
chunk_dict = json.loads(chunk_json)
|
|
115
|
+
chunk = Chunk(**chunk_dict)
|
|
116
|
+
except Exception:
|
|
117
|
+
log.exception(f"Failed to parse document: {chunk_json}")
|
|
118
|
+
continue
|
|
119
|
+
|
|
120
|
+
if doc.metadata.distance is None:
|
|
121
|
+
continue
|
|
122
|
+
# Convert cosine distance ∈ [0,2] -> normalized cosine similarity ∈ [0,1]
|
|
123
|
+
score = 1.0 - (float(doc.metadata.distance) / 2.0)
|
|
124
|
+
if score < score_threshold:
|
|
125
|
+
continue
|
|
126
|
+
|
|
127
|
+
chunks.append(chunk)
|
|
128
|
+
scores.append(score)
|
|
129
|
+
|
|
130
|
+
log.debug(f"WEAVIATE VECTOR SEARCH RESULTS: Found {len(chunks)} chunks with scores {scores}")
|
|
131
|
+
return QueryChunksResponse(chunks=chunks, scores=scores)
|
|
132
|
+
|
|
133
|
+
async def delete(self, chunk_ids: list[str] | None = None) -> None:
|
|
134
|
+
"""
|
|
135
|
+
Delete chunks by IDs if provided, otherwise drop the entire collection.
|
|
136
|
+
"""
|
|
137
|
+
sanitized_collection_name = sanitize_collection_name(self.collection_name, weaviate_format=True)
|
|
138
|
+
if chunk_ids is None:
|
|
139
|
+
# Drop entire collection if it exists
|
|
140
|
+
if self.client.collections.exists(sanitized_collection_name):
|
|
141
|
+
self.client.collections.delete(sanitized_collection_name)
|
|
142
|
+
return
|
|
143
|
+
collection = self.client.collections.get(sanitized_collection_name)
|
|
144
|
+
collection.data.delete_many(where=Filter.by_property("id").contains_any(chunk_ids))
|
|
145
|
+
|
|
146
|
+
async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse:
|
|
147
|
+
"""
|
|
148
|
+
Performs BM25-based keyword search using Weaviate's built-in full-text search.
|
|
149
|
+
Args:
|
|
150
|
+
query_string: The text query for keyword search
|
|
151
|
+
k: Limit of number of results to return
|
|
152
|
+
score_threshold: Minimum similarity score threshold
|
|
153
|
+
Returns:
|
|
154
|
+
QueryChunksResponse with chunks and scores
|
|
155
|
+
"""
|
|
156
|
+
log.debug(f"WEAVIATE KEYWORD SEARCH CALLED: query='{query_string}', k={k}, threshold={score_threshold}")
|
|
157
|
+
sanitized_collection_name = sanitize_collection_name(self.collection_name, weaviate_format=True)
|
|
158
|
+
collection = self.client.collections.get(sanitized_collection_name)
|
|
159
|
+
|
|
160
|
+
# Perform BM25 keyword search on chunk_content field
|
|
161
|
+
try:
|
|
162
|
+
results = collection.query.bm25(
|
|
163
|
+
query=query_string, limit=k, return_metadata=wvc.query.MetadataQuery(score=True)
|
|
164
|
+
)
|
|
165
|
+
except Exception as e:
|
|
166
|
+
log.error(f"Weaviate client keyword search failed: {e}")
|
|
167
|
+
raise
|
|
168
|
+
|
|
169
|
+
chunks = []
|
|
170
|
+
scores = []
|
|
171
|
+
for doc in results.objects:
|
|
172
|
+
chunk_json = doc.properties["chunk_content"]
|
|
173
|
+
try:
|
|
174
|
+
chunk_dict = json.loads(chunk_json)
|
|
175
|
+
chunk = Chunk(**chunk_dict)
|
|
176
|
+
except Exception:
|
|
177
|
+
log.exception(f"Failed to parse document: {chunk_json}")
|
|
178
|
+
continue
|
|
179
|
+
|
|
180
|
+
score = doc.metadata.score if doc.metadata.score is not None else 0.0
|
|
181
|
+
if score < score_threshold:
|
|
182
|
+
continue
|
|
183
|
+
|
|
184
|
+
chunks.append(chunk)
|
|
185
|
+
scores.append(score)
|
|
186
|
+
|
|
187
|
+
log.debug(f"WEAVIATE KEYWORD SEARCH RESULTS: Found {len(chunks)} chunks with scores {scores}.")
|
|
188
|
+
return QueryChunksResponse(chunks=chunks, scores=scores)
|
|
189
|
+
|
|
190
|
+
async def query_hybrid(
|
|
191
|
+
self,
|
|
192
|
+
embedding: NDArray,
|
|
193
|
+
query_string: str,
|
|
194
|
+
k: int,
|
|
195
|
+
score_threshold: float,
|
|
196
|
+
reranker_type: str,
|
|
197
|
+
reranker_params: dict[str, Any] | None = None,
|
|
198
|
+
) -> QueryChunksResponse:
|
|
199
|
+
"""
|
|
200
|
+
Hybrid search combining vector similarity and keyword search using Weaviate's native hybrid search.
|
|
201
|
+
Args:
|
|
202
|
+
embedding: The query embedding vector
|
|
203
|
+
query_string: The text query for keyword search
|
|
204
|
+
k: Limit of number of results to return
|
|
205
|
+
score_threshold: Minimum similarity score threshold
|
|
206
|
+
reranker_type: Type of reranker to use ("rrf" or "normalized")
|
|
207
|
+
reranker_params: Parameters for the reranker
|
|
208
|
+
Returns:
|
|
209
|
+
QueryChunksResponse with combined results
|
|
210
|
+
"""
|
|
211
|
+
log.debug(
|
|
212
|
+
f"WEAVIATE HYBRID SEARCH CALLED: query='{query_string}', embedding_shape={embedding.shape}, k={k}, threshold={score_threshold}, reranker={reranker_type}"
|
|
213
|
+
)
|
|
214
|
+
sanitized_collection_name = sanitize_collection_name(self.collection_name, weaviate_format=True)
|
|
215
|
+
collection = self.client.collections.get(sanitized_collection_name)
|
|
216
|
+
|
|
217
|
+
# Ranked (RRF) reranker fusion type
|
|
218
|
+
if reranker_type == RERANKER_TYPE_RRF:
|
|
219
|
+
rerank = HybridFusion.RANKED
|
|
220
|
+
# Relative score (Normalized) reranker fusion type
|
|
221
|
+
else:
|
|
222
|
+
rerank = HybridFusion.RELATIVE_SCORE
|
|
223
|
+
|
|
224
|
+
# Perform hybrid search using Weaviate's native hybrid search
|
|
225
|
+
try:
|
|
226
|
+
results = collection.query.hybrid(
|
|
227
|
+
query=query_string,
|
|
228
|
+
alpha=0.5, # Range <0, 1>, where 0.5 will equally favor vector and keyword search
|
|
229
|
+
vector=embedding.tolist(),
|
|
230
|
+
limit=k,
|
|
231
|
+
fusion_type=rerank,
|
|
232
|
+
return_metadata=wvc.query.MetadataQuery(score=True),
|
|
233
|
+
)
|
|
234
|
+
except Exception as e:
|
|
235
|
+
log.error(f"Weaviate client hybrid search failed: {e}")
|
|
236
|
+
raise
|
|
237
|
+
|
|
238
|
+
chunks = []
|
|
239
|
+
scores = []
|
|
240
|
+
for doc in results.objects:
|
|
241
|
+
chunk_json = doc.properties["chunk_content"]
|
|
242
|
+
try:
|
|
243
|
+
chunk_dict = json.loads(chunk_json)
|
|
244
|
+
chunk = Chunk(**chunk_dict)
|
|
245
|
+
except Exception:
|
|
246
|
+
log.exception(f"Failed to parse document: {chunk_json}")
|
|
247
|
+
continue
|
|
248
|
+
|
|
249
|
+
score = doc.metadata.score if doc.metadata.score is not None else 0.0
|
|
250
|
+
if score < score_threshold:
|
|
251
|
+
continue
|
|
252
|
+
|
|
253
|
+
chunks.append(chunk)
|
|
254
|
+
scores.append(score)
|
|
255
|
+
|
|
256
|
+
log.debug(f"WEAVIATE HYBRID SEARCH RESULTS: Found {len(chunks)} chunks with scores {scores}")
|
|
257
|
+
return QueryChunksResponse(chunks=chunks, scores=scores)
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProviderData, VectorStoresProtocolPrivate):
|
|
261
|
+
def __init__(self, config: WeaviateVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None:
|
|
262
|
+
super().__init__(files_api=files_api, kvstore=None)
|
|
263
|
+
self.config = config
|
|
264
|
+
self.inference_api = inference_api
|
|
265
|
+
self.client_cache = {}
|
|
266
|
+
self.cache = {}
|
|
267
|
+
self.vector_store_table = None
|
|
268
|
+
self.metadata_collection_name = "openai_vector_stores_metadata"
|
|
269
|
+
|
|
270
|
+
def _get_client(self) -> weaviate.WeaviateClient:
|
|
271
|
+
if "localhost" in self.config.weaviate_cluster_url:
|
|
272
|
+
log.info("Using Weaviate locally in container")
|
|
273
|
+
host, port = self.config.weaviate_cluster_url.split(":")
|
|
274
|
+
key = "local_test"
|
|
275
|
+
client = weaviate.connect_to_local(host=host, port=port)
|
|
276
|
+
else:
|
|
277
|
+
log.info("Using Weaviate remote cluster with URL")
|
|
278
|
+
key = f"{self.config.weaviate_cluster_url}::{self.config.weaviate_api_key}"
|
|
279
|
+
if key in self.client_cache:
|
|
280
|
+
return self.client_cache[key]
|
|
281
|
+
client = weaviate.connect_to_weaviate_cloud(
|
|
282
|
+
cluster_url=self.config.weaviate_cluster_url,
|
|
283
|
+
auth_credentials=Auth.api_key(self.config.weaviate_api_key),
|
|
284
|
+
)
|
|
285
|
+
self.client_cache[key] = client
|
|
286
|
+
return client
|
|
287
|
+
|
|
288
|
+
async def initialize(self) -> None:
|
|
289
|
+
"""Set up KV store and load existing vector DBs and OpenAI vector stores."""
|
|
290
|
+
# Initialize KV store for metadata if configured
|
|
291
|
+
if self.config.persistence is not None:
|
|
292
|
+
self.kvstore = await kvstore_impl(self.config.persistence)
|
|
293
|
+
else:
|
|
294
|
+
self.kvstore = None
|
|
295
|
+
log.info("No kvstore configured, registry will not persist across restarts")
|
|
296
|
+
|
|
297
|
+
# Load existing vector DB definitions
|
|
298
|
+
if self.kvstore is not None:
|
|
299
|
+
start_key = VECTOR_DBS_PREFIX
|
|
300
|
+
end_key = f"{VECTOR_DBS_PREFIX}\xff"
|
|
301
|
+
stored = await self.kvstore.values_in_range(start_key, end_key)
|
|
302
|
+
for raw in stored:
|
|
303
|
+
vector_store = VectorStore.model_validate_json(raw)
|
|
304
|
+
client = self._get_client()
|
|
305
|
+
idx = WeaviateIndex(client=client, collection_name=vector_store.identifier, kvstore=self.kvstore)
|
|
306
|
+
self.cache[vector_store.identifier] = VectorStoreWithIndex(
|
|
307
|
+
vector_store=vector_store, index=idx, inference_api=self.inference_api
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
# Load OpenAI vector stores metadata into cache
|
|
311
|
+
await self.initialize_openai_vector_stores()
|
|
312
|
+
|
|
313
|
+
async def shutdown(self) -> None:
|
|
314
|
+
for client in self.client_cache.values():
|
|
315
|
+
client.close()
|
|
316
|
+
# Clean up mixin resources (file batch tasks)
|
|
317
|
+
await super().shutdown()
|
|
318
|
+
|
|
319
|
+
async def register_vector_store(self, vector_store: VectorStore) -> None:
|
|
320
|
+
client = self._get_client()
|
|
321
|
+
sanitized_collection_name = sanitize_collection_name(vector_store.identifier, weaviate_format=True)
|
|
322
|
+
# Create collection if it doesn't exist
|
|
323
|
+
if not client.collections.exists(sanitized_collection_name):
|
|
324
|
+
client.collections.create(
|
|
325
|
+
name=sanitized_collection_name,
|
|
326
|
+
vectorizer_config=wvc.config.Configure.Vectorizer.none(),
|
|
327
|
+
properties=[
|
|
328
|
+
wvc.config.Property(name="chunk_content", data_type=wvc.config.DataType.TEXT),
|
|
329
|
+
],
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
self.cache[vector_store.identifier] = VectorStoreWithIndex(
|
|
333
|
+
vector_store, WeaviateIndex(client=client, collection_name=sanitized_collection_name), self.inference_api
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
async def unregister_vector_store(self, vector_store_id: str) -> None:
|
|
337
|
+
client = self._get_client()
|
|
338
|
+
sanitized_collection_name = sanitize_collection_name(vector_store_id, weaviate_format=True)
|
|
339
|
+
if vector_store_id not in self.cache or client.collections.exists(sanitized_collection_name) is False:
|
|
340
|
+
return
|
|
341
|
+
client.collections.delete(sanitized_collection_name)
|
|
342
|
+
await self.cache[vector_store_id].index.delete()
|
|
343
|
+
del self.cache[vector_store_id]
|
|
344
|
+
|
|
345
|
+
async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
|
|
346
|
+
if vector_store_id in self.cache:
|
|
347
|
+
return self.cache[vector_store_id]
|
|
348
|
+
|
|
349
|
+
# Try to load from kvstore
|
|
350
|
+
if self.kvstore is None:
|
|
351
|
+
raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
|
|
352
|
+
|
|
353
|
+
key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
|
|
354
|
+
vector_store_data = await self.kvstore.get(key)
|
|
355
|
+
if not vector_store_data:
|
|
356
|
+
raise VectorStoreNotFoundError(vector_store_id)
|
|
357
|
+
|
|
358
|
+
vector_store = VectorStore.model_validate_json(vector_store_data)
|
|
359
|
+
client = self._get_client()
|
|
360
|
+
sanitized_collection_name = sanitize_collection_name(vector_store.identifier, weaviate_format=True)
|
|
361
|
+
if not client.collections.exists(sanitized_collection_name):
|
|
362
|
+
raise ValueError(f"Collection with name `{sanitized_collection_name}` not found")
|
|
363
|
+
|
|
364
|
+
index = VectorStoreWithIndex(
|
|
365
|
+
vector_store=vector_store,
|
|
366
|
+
index=WeaviateIndex(client=client, collection_name=vector_store.identifier),
|
|
367
|
+
inference_api=self.inference_api,
|
|
368
|
+
)
|
|
369
|
+
self.cache[vector_store_id] = index
|
|
370
|
+
return index
|
|
371
|
+
|
|
372
|
+
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
|
|
373
|
+
index = await self._get_and_cache_vector_store_index(vector_db_id)
|
|
374
|
+
if not index:
|
|
375
|
+
raise VectorStoreNotFoundError(vector_db_id)
|
|
376
|
+
|
|
377
|
+
await index.insert_chunks(chunks)
|
|
378
|
+
|
|
379
|
+
async def query_chunks(
|
|
380
|
+
self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
|
|
381
|
+
) -> QueryChunksResponse:
|
|
382
|
+
index = await self._get_and_cache_vector_store_index(vector_db_id)
|
|
383
|
+
if not index:
|
|
384
|
+
raise VectorStoreNotFoundError(vector_db_id)
|
|
385
|
+
|
|
386
|
+
return await index.query_chunks(query, params)
|
|
387
|
+
|
|
388
|
+
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
|
|
389
|
+
index = await self._get_and_cache_vector_store_index(store_id)
|
|
390
|
+
if not index:
|
|
391
|
+
raise ValueError(f"Vector DB {store_id} not found")
|
|
392
|
+
|
|
393
|
+
await index.index.delete_chunks(chunks_for_deletion)
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
import boto3
|
|
9
|
+
from botocore.client import BaseClient
|
|
10
|
+
from botocore.config import Config
|
|
11
|
+
|
|
12
|
+
from llama_stack.providers.utils.bedrock.config import BedrockBaseConfig
|
|
13
|
+
from llama_stack.providers.utils.bedrock.refreshable_boto_session import (
|
|
14
|
+
RefreshableBotoSession,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def create_bedrock_client(config: BedrockBaseConfig, service_name: str = "bedrock-runtime") -> BaseClient:
|
|
19
|
+
"""Creates a boto3 client for Bedrock services with the given configuration.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
config: The Bedrock configuration containing AWS credentials and settings
|
|
23
|
+
service_name: The AWS service name to create client for (default: "bedrock-runtime")
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
A configured boto3 client
|
|
27
|
+
"""
|
|
28
|
+
if config.aws_access_key_id and config.aws_secret_access_key:
|
|
29
|
+
retries_config = {
|
|
30
|
+
k: v
|
|
31
|
+
for k, v in dict(
|
|
32
|
+
total_max_attempts=config.total_max_attempts,
|
|
33
|
+
mode=config.retry_mode,
|
|
34
|
+
).items()
|
|
35
|
+
if v is not None
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
config_args = {
|
|
39
|
+
k: v
|
|
40
|
+
for k, v in dict(
|
|
41
|
+
region_name=config.region_name,
|
|
42
|
+
retries=retries_config if retries_config else None,
|
|
43
|
+
connect_timeout=config.connect_timeout,
|
|
44
|
+
read_timeout=config.read_timeout,
|
|
45
|
+
).items()
|
|
46
|
+
if v is not None
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
boto3_config = Config(**config_args)
|
|
50
|
+
|
|
51
|
+
session_args = {
|
|
52
|
+
"aws_access_key_id": config.aws_access_key_id,
|
|
53
|
+
"aws_secret_access_key": config.aws_secret_access_key,
|
|
54
|
+
"aws_session_token": config.aws_session_token,
|
|
55
|
+
"region_name": config.region_name,
|
|
56
|
+
"profile_name": config.profile_name,
|
|
57
|
+
"session_ttl": config.session_ttl,
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
# Remove None values
|
|
61
|
+
session_args = {k: v for k, v in session_args.items() if v is not None}
|
|
62
|
+
|
|
63
|
+
boto3_session = boto3.session.Session(**session_args)
|
|
64
|
+
return boto3_session.client(service_name, config=boto3_config)
|
|
65
|
+
else:
|
|
66
|
+
return (
|
|
67
|
+
RefreshableBotoSession(
|
|
68
|
+
region_name=config.region_name,
|
|
69
|
+
profile_name=config.profile_name,
|
|
70
|
+
session_ttl=config.session_ttl,
|
|
71
|
+
)
|
|
72
|
+
.refreshable_session()
|
|
73
|
+
.client(service_name)
|
|
74
|
+
)
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
|
|
9
|
+
from pydantic import Field
|
|
10
|
+
|
|
11
|
+
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class BedrockBaseConfig(RemoteInferenceProviderConfig):
|
|
15
|
+
auth_credential: None = Field(default=None, exclude=True)
|
|
16
|
+
aws_access_key_id: str | None = Field(
|
|
17
|
+
default_factory=lambda: os.getenv("AWS_ACCESS_KEY_ID"),
|
|
18
|
+
description="The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID",
|
|
19
|
+
)
|
|
20
|
+
aws_secret_access_key: str | None = Field(
|
|
21
|
+
default_factory=lambda: os.getenv("AWS_SECRET_ACCESS_KEY"),
|
|
22
|
+
description="The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY",
|
|
23
|
+
)
|
|
24
|
+
aws_session_token: str | None = Field(
|
|
25
|
+
default_factory=lambda: os.getenv("AWS_SESSION_TOKEN"),
|
|
26
|
+
description="The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN",
|
|
27
|
+
)
|
|
28
|
+
region_name: str | None = Field(
|
|
29
|
+
default_factory=lambda: os.getenv("AWS_DEFAULT_REGION"),
|
|
30
|
+
description="The default AWS Region to use, for example, us-west-1 or us-west-2."
|
|
31
|
+
"Default use environment variable: AWS_DEFAULT_REGION",
|
|
32
|
+
)
|
|
33
|
+
profile_name: str | None = Field(
|
|
34
|
+
default_factory=lambda: os.getenv("AWS_PROFILE"),
|
|
35
|
+
description="The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE",
|
|
36
|
+
)
|
|
37
|
+
total_max_attempts: int | None = Field(
|
|
38
|
+
default_factory=lambda: int(val) if (val := os.getenv("AWS_MAX_ATTEMPTS")) else None,
|
|
39
|
+
description="An integer representing the maximum number of attempts that will be made for a single request, "
|
|
40
|
+
"including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS",
|
|
41
|
+
)
|
|
42
|
+
retry_mode: str | None = Field(
|
|
43
|
+
default_factory=lambda: os.getenv("AWS_RETRY_MODE"),
|
|
44
|
+
description="A string representing the type of retries Boto3 will perform."
|
|
45
|
+
"Default use environment variable: AWS_RETRY_MODE",
|
|
46
|
+
)
|
|
47
|
+
connect_timeout: float | None = Field(
|
|
48
|
+
default_factory=lambda: float(os.getenv("AWS_CONNECT_TIMEOUT", "60")),
|
|
49
|
+
description="The time in seconds till a timeout exception is thrown when attempting to make a connection. "
|
|
50
|
+
"The default is 60 seconds.",
|
|
51
|
+
)
|
|
52
|
+
read_timeout: float | None = Field(
|
|
53
|
+
default_factory=lambda: float(os.getenv("AWS_READ_TIMEOUT", "60")),
|
|
54
|
+
description="The time in seconds till a timeout exception is thrown when attempting to read from a connection."
|
|
55
|
+
"The default is 60 seconds.",
|
|
56
|
+
)
|
|
57
|
+
session_ttl: int | None = Field(
|
|
58
|
+
default_factory=lambda: int(os.getenv("AWS_SESSION_TTL", "3600")),
|
|
59
|
+
description="The time in seconds till a session expires. The default is 3600 seconds (1 hour).",
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
@classmethod
|
|
63
|
+
def sample_run_config(cls, **kwargs):
|
|
64
|
+
return {}
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
import datetime
|
|
8
|
+
from time import time
|
|
9
|
+
from uuid import uuid4
|
|
10
|
+
|
|
11
|
+
from boto3 import Session
|
|
12
|
+
from botocore.credentials import RefreshableCredentials
|
|
13
|
+
from botocore.session import get_session
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class RefreshableBotoSession:
|
|
17
|
+
"""
|
|
18
|
+
Boto Helper class which lets us create a refreshable session so that we can cache the client or resource.
|
|
19
|
+
|
|
20
|
+
Usage
|
|
21
|
+
-----
|
|
22
|
+
session = RefreshableBotoSession().refreshable_session()
|
|
23
|
+
|
|
24
|
+
client = session.client("s3") # we now can cache this client object without worrying about expiring credentials
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
region_name: str = None,
|
|
30
|
+
profile_name: str = None,
|
|
31
|
+
sts_arn: str = None,
|
|
32
|
+
session_name: str = None,
|
|
33
|
+
session_ttl: int = 30000,
|
|
34
|
+
):
|
|
35
|
+
"""
|
|
36
|
+
Initialize `RefreshableBotoSession`
|
|
37
|
+
|
|
38
|
+
Parameters
|
|
39
|
+
----------
|
|
40
|
+
region_name : str (optional)
|
|
41
|
+
Default region when creating a new connection.
|
|
42
|
+
|
|
43
|
+
profile_name : str (optional)
|
|
44
|
+
The name of a profile to use.
|
|
45
|
+
|
|
46
|
+
sts_arn : str (optional)
|
|
47
|
+
The role arn to sts before creating a session.
|
|
48
|
+
|
|
49
|
+
session_name : str (optional)
|
|
50
|
+
An identifier for the assumed role session. (required when `sts_arn` is given)
|
|
51
|
+
|
|
52
|
+
session_ttl : int (optional)
|
|
53
|
+
An integer number to set the TTL for each session. Beyond this session, it will renew the token.
|
|
54
|
+
50 minutes by default which is before the default role expiration of 1 hour
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
self.region_name = region_name
|
|
58
|
+
self.profile_name = profile_name
|
|
59
|
+
self.sts_arn = sts_arn
|
|
60
|
+
self.session_name = session_name or uuid4().hex
|
|
61
|
+
self.session_ttl = session_ttl
|
|
62
|
+
|
|
63
|
+
def __get_session_credentials(self):
|
|
64
|
+
"""
|
|
65
|
+
Get session credentials
|
|
66
|
+
"""
|
|
67
|
+
session = Session(region_name=self.region_name, profile_name=self.profile_name)
|
|
68
|
+
|
|
69
|
+
# if sts_arn is given, get credential by assuming the given role
|
|
70
|
+
if self.sts_arn:
|
|
71
|
+
sts_client = session.client(service_name="sts", region_name=self.region_name)
|
|
72
|
+
response = sts_client.assume_role(
|
|
73
|
+
RoleArn=self.sts_arn,
|
|
74
|
+
RoleSessionName=self.session_name,
|
|
75
|
+
DurationSeconds=self.session_ttl,
|
|
76
|
+
).get("Credentials")
|
|
77
|
+
|
|
78
|
+
credentials = {
|
|
79
|
+
"access_key": response.get("AccessKeyId"),
|
|
80
|
+
"secret_key": response.get("SecretAccessKey"),
|
|
81
|
+
"token": response.get("SessionToken"),
|
|
82
|
+
"expiry_time": response.get("Expiration").isoformat(),
|
|
83
|
+
}
|
|
84
|
+
else:
|
|
85
|
+
session_credentials = session.get_credentials().get_frozen_credentials()
|
|
86
|
+
credentials = {
|
|
87
|
+
"access_key": session_credentials.access_key,
|
|
88
|
+
"secret_key": session_credentials.secret_key,
|
|
89
|
+
"token": session_credentials.token,
|
|
90
|
+
"expiry_time": datetime.datetime.fromtimestamp(time() + self.session_ttl, datetime.UTC).isoformat(),
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
return credentials
|
|
94
|
+
|
|
95
|
+
def refreshable_session(self) -> Session:
|
|
96
|
+
"""
|
|
97
|
+
Get refreshable boto3 session.
|
|
98
|
+
"""
|
|
99
|
+
# Get refreshable credentials
|
|
100
|
+
refreshable_credentials = RefreshableCredentials.create_from_metadata(
|
|
101
|
+
metadata=self.__get_session_credentials(),
|
|
102
|
+
refresh_using=self.__get_session_credentials,
|
|
103
|
+
method="sts-assume-role",
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
# attach refreshable credentials current session
|
|
107
|
+
session = get_session()
|
|
108
|
+
session._credentials = refreshable_credentials
|
|
109
|
+
session.set_config_variable("region", self.region_name)
|
|
110
|
+
autorefresh_session = Session(botocore_session=session)
|
|
111
|
+
|
|
112
|
+
return autorefresh_session
|