llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +5 -0
- llama_stack/apis/agents/__init__.py +1 -1
- llama_stack/apis/agents/agents.py +700 -281
- llama_stack/apis/agents/openai_responses.py +1311 -0
- llama_stack/{providers/adapters/memory/sample/config.py → apis/batches/__init__.py} +2 -5
- llama_stack/apis/batches/batches.py +100 -0
- llama_stack/apis/benchmarks/__init__.py +7 -0
- llama_stack/apis/benchmarks/benchmarks.py +108 -0
- llama_stack/apis/common/content_types.py +143 -0
- llama_stack/apis/common/errors.py +103 -0
- llama_stack/apis/common/job_types.py +38 -0
- llama_stack/apis/common/responses.py +36 -0
- llama_stack/apis/common/training_types.py +36 -5
- llama_stack/apis/common/type_system.py +158 -0
- llama_stack/apis/conversations/__init__.py +31 -0
- llama_stack/apis/conversations/conversations.py +286 -0
- llama_stack/apis/datasetio/__init__.py +7 -0
- llama_stack/apis/datasetio/datasetio.py +59 -0
- llama_stack/apis/datasets/__init__.py +7 -0
- llama_stack/apis/datasets/datasets.py +251 -0
- llama_stack/apis/datatypes.py +160 -0
- llama_stack/apis/eval/__init__.py +7 -0
- llama_stack/apis/eval/eval.py +169 -0
- llama_stack/apis/files/__init__.py +7 -0
- llama_stack/apis/files/files.py +199 -0
- llama_stack/apis/inference/__init__.py +1 -1
- llama_stack/apis/inference/inference.py +1169 -113
- llama_stack/apis/inspect/__init__.py +1 -1
- llama_stack/apis/inspect/inspect.py +69 -16
- llama_stack/apis/models/__init__.py +1 -1
- llama_stack/apis/models/models.py +148 -21
- llama_stack/apis/post_training/__init__.py +1 -1
- llama_stack/apis/post_training/post_training.py +265 -120
- llama_stack/{providers/adapters/agents/sample/config.py → apis/prompts/__init__.py} +2 -5
- llama_stack/apis/prompts/prompts.py +204 -0
- llama_stack/apis/providers/__init__.py +7 -0
- llama_stack/apis/providers/providers.py +69 -0
- llama_stack/apis/resource.py +37 -0
- llama_stack/apis/safety/__init__.py +1 -1
- llama_stack/apis/safety/safety.py +95 -12
- llama_stack/apis/scoring/__init__.py +7 -0
- llama_stack/apis/scoring/scoring.py +93 -0
- llama_stack/apis/scoring_functions/__init__.py +7 -0
- llama_stack/apis/scoring_functions/scoring_functions.py +208 -0
- llama_stack/apis/shields/__init__.py +1 -1
- llama_stack/apis/shields/shields.py +76 -33
- llama_stack/apis/synthetic_data_generation/__init__.py +1 -1
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +40 -17
- llama_stack/apis/telemetry/__init__.py +1 -1
- llama_stack/apis/telemetry/telemetry.py +322 -31
- llama_stack/apis/{dataset → tools}/__init__.py +2 -1
- llama_stack/apis/tools/rag_tool.py +218 -0
- llama_stack/apis/tools/tools.py +221 -0
- llama_stack/apis/vector_io/__init__.py +7 -0
- llama_stack/apis/vector_io/vector_io.py +960 -0
- llama_stack/apis/vector_stores/__init__.py +7 -0
- llama_stack/apis/vector_stores/vector_stores.py +51 -0
- llama_stack/apis/version.py +9 -0
- llama_stack/cli/llama.py +13 -5
- llama_stack/cli/stack/_list_deps.py +182 -0
- llama_stack/cli/stack/list_apis.py +1 -1
- llama_stack/cli/stack/list_deps.py +55 -0
- llama_stack/cli/stack/list_providers.py +24 -10
- llama_stack/cli/stack/list_stacks.py +56 -0
- llama_stack/cli/stack/remove.py +115 -0
- llama_stack/cli/stack/run.py +169 -56
- llama_stack/cli/stack/stack.py +18 -4
- llama_stack/cli/stack/utils.py +151 -0
- llama_stack/cli/table.py +23 -61
- llama_stack/cli/utils.py +29 -0
- llama_stack/core/access_control/access_control.py +131 -0
- llama_stack/core/access_control/conditions.py +129 -0
- llama_stack/core/access_control/datatypes.py +107 -0
- llama_stack/core/build.py +164 -0
- llama_stack/core/client.py +205 -0
- llama_stack/core/common.sh +37 -0
- llama_stack/{distribution → core}/configure.py +74 -55
- llama_stack/core/conversations/conversations.py +309 -0
- llama_stack/core/datatypes.py +625 -0
- llama_stack/core/distribution.py +276 -0
- llama_stack/core/external.py +54 -0
- llama_stack/core/id_generation.py +42 -0
- llama_stack/core/inspect.py +86 -0
- llama_stack/core/library_client.py +539 -0
- llama_stack/core/prompts/prompts.py +234 -0
- llama_stack/core/providers.py +137 -0
- llama_stack/core/request_headers.py +115 -0
- llama_stack/core/resolver.py +506 -0
- llama_stack/core/routers/__init__.py +101 -0
- llama_stack/core/routers/datasets.py +73 -0
- llama_stack/core/routers/eval_scoring.py +155 -0
- llama_stack/core/routers/inference.py +645 -0
- llama_stack/core/routers/safety.py +85 -0
- llama_stack/core/routers/tool_runtime.py +91 -0
- llama_stack/core/routers/vector_io.py +442 -0
- llama_stack/core/routing_tables/benchmarks.py +62 -0
- llama_stack/core/routing_tables/common.py +254 -0
- llama_stack/core/routing_tables/datasets.py +91 -0
- llama_stack/core/routing_tables/models.py +163 -0
- llama_stack/core/routing_tables/scoring_functions.py +66 -0
- llama_stack/core/routing_tables/shields.py +61 -0
- llama_stack/core/routing_tables/toolgroups.py +129 -0
- llama_stack/core/routing_tables/vector_stores.py +292 -0
- llama_stack/core/server/auth.py +187 -0
- llama_stack/core/server/auth_providers.py +494 -0
- llama_stack/core/server/quota.py +110 -0
- llama_stack/core/server/routes.py +141 -0
- llama_stack/core/server/server.py +542 -0
- llama_stack/core/server/tracing.py +80 -0
- llama_stack/core/stack.py +546 -0
- llama_stack/core/start_stack.sh +117 -0
- llama_stack/core/storage/datatypes.py +283 -0
- llama_stack/{cli/model → core/store}/__init__.py +1 -1
- llama_stack/core/store/registry.py +199 -0
- llama_stack/core/testing_context.py +49 -0
- llama_stack/core/ui/app.py +55 -0
- llama_stack/core/ui/modules/api.py +32 -0
- llama_stack/core/ui/modules/utils.py +42 -0
- llama_stack/core/ui/page/distribution/datasets.py +18 -0
- llama_stack/core/ui/page/distribution/eval_tasks.py +20 -0
- llama_stack/core/ui/page/distribution/models.py +18 -0
- llama_stack/core/ui/page/distribution/providers.py +27 -0
- llama_stack/core/ui/page/distribution/resources.py +48 -0
- llama_stack/core/ui/page/distribution/scoring_functions.py +18 -0
- llama_stack/core/ui/page/distribution/shields.py +19 -0
- llama_stack/core/ui/page/evaluations/app_eval.py +143 -0
- llama_stack/core/ui/page/evaluations/native_eval.py +253 -0
- llama_stack/core/ui/page/playground/chat.py +130 -0
- llama_stack/core/ui/page/playground/tools.py +352 -0
- llama_stack/core/utils/config.py +30 -0
- llama_stack/{distribution → core}/utils/config_dirs.py +3 -6
- llama_stack/core/utils/config_resolution.py +125 -0
- llama_stack/core/utils/context.py +84 -0
- llama_stack/core/utils/exec.py +96 -0
- llama_stack/{providers/impls/meta_reference/codeshield/config.py → core/utils/image_types.py} +4 -3
- llama_stack/{distribution → core}/utils/model_utils.py +2 -2
- llama_stack/{distribution → core}/utils/prompt_for_config.py +30 -63
- llama_stack/{apis/batch_inference → distributions/dell}/__init__.py +1 -1
- llama_stack/distributions/dell/build.yaml +33 -0
- llama_stack/distributions/dell/dell.py +158 -0
- llama_stack/distributions/dell/run-with-safety.yaml +141 -0
- llama_stack/distributions/dell/run.yaml +132 -0
- llama_stack/distributions/meta-reference-gpu/__init__.py +7 -0
- llama_stack/distributions/meta-reference-gpu/build.yaml +32 -0
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +163 -0
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +154 -0
- llama_stack/distributions/meta-reference-gpu/run.yaml +139 -0
- llama_stack/{apis/evals → distributions/nvidia}/__init__.py +1 -1
- llama_stack/distributions/nvidia/build.yaml +29 -0
- llama_stack/distributions/nvidia/nvidia.py +154 -0
- llama_stack/distributions/nvidia/run-with-safety.yaml +137 -0
- llama_stack/distributions/nvidia/run.yaml +116 -0
- llama_stack/distributions/open-benchmark/__init__.py +7 -0
- llama_stack/distributions/open-benchmark/build.yaml +36 -0
- llama_stack/distributions/open-benchmark/open_benchmark.py +303 -0
- llama_stack/distributions/open-benchmark/run.yaml +252 -0
- llama_stack/distributions/postgres-demo/__init__.py +7 -0
- llama_stack/distributions/postgres-demo/build.yaml +23 -0
- llama_stack/distributions/postgres-demo/postgres_demo.py +125 -0
- llama_stack/distributions/postgres-demo/run.yaml +115 -0
- llama_stack/{apis/memory → distributions/starter}/__init__.py +1 -1
- llama_stack/distributions/starter/build.yaml +61 -0
- llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
- llama_stack/distributions/starter/run.yaml +276 -0
- llama_stack/distributions/starter/starter.py +345 -0
- llama_stack/distributions/starter-gpu/__init__.py +7 -0
- llama_stack/distributions/starter-gpu/build.yaml +61 -0
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
- llama_stack/distributions/starter-gpu/run.yaml +279 -0
- llama_stack/distributions/starter-gpu/starter_gpu.py +20 -0
- llama_stack/distributions/template.py +456 -0
- llama_stack/distributions/watsonx/__init__.py +7 -0
- llama_stack/distributions/watsonx/build.yaml +33 -0
- llama_stack/distributions/watsonx/run.yaml +133 -0
- llama_stack/distributions/watsonx/watsonx.py +95 -0
- llama_stack/env.py +24 -0
- llama_stack/log.py +314 -0
- llama_stack/models/llama/checkpoint.py +164 -0
- llama_stack/models/llama/datatypes.py +164 -0
- llama_stack/models/llama/hadamard_utils.py +86 -0
- llama_stack/models/llama/llama3/args.py +74 -0
- llama_stack/models/llama/llama3/chat_format.py +286 -0
- llama_stack/models/llama/llama3/generation.py +376 -0
- llama_stack/models/llama/llama3/interface.py +255 -0
- llama_stack/models/llama/llama3/model.py +304 -0
- llama_stack/models/llama/llama3/multimodal/__init__.py +12 -0
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +180 -0
- llama_stack/models/llama/llama3/multimodal/image_transform.py +409 -0
- llama_stack/models/llama/llama3/multimodal/model.py +1430 -0
- llama_stack/models/llama/llama3/multimodal/utils.py +26 -0
- llama_stack/models/llama/llama3/prompt_templates/__init__.py +22 -0
- llama_stack/models/llama/llama3/prompt_templates/base.py +39 -0
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +319 -0
- llama_stack/models/llama/llama3/prompt_templates/tool_response.py +62 -0
- llama_stack/models/llama/llama3/quantization/loader.py +316 -0
- llama_stack/models/llama/llama3/template_data.py +116 -0
- llama_stack/models/llama/llama3/tokenizer.model +128000 -0
- llama_stack/models/llama/llama3/tokenizer.py +198 -0
- llama_stack/models/llama/llama3/tool_utils.py +266 -0
- llama_stack/models/llama/llama3_1/__init__.py +12 -0
- llama_stack/models/llama/llama3_1/prompt_format.md +358 -0
- llama_stack/models/llama/llama3_1/prompts.py +258 -0
- llama_stack/models/llama/llama3_2/prompts_text.py +229 -0
- llama_stack/models/llama/llama3_2/prompts_vision.py +126 -0
- llama_stack/models/llama/llama3_2/text_prompt_format.md +286 -0
- llama_stack/models/llama/llama3_2/vision_prompt_format.md +141 -0
- llama_stack/models/llama/llama3_3/prompts.py +259 -0
- llama_stack/models/llama/llama4/args.py +107 -0
- llama_stack/models/llama/llama4/chat_format.py +317 -0
- llama_stack/models/llama/llama4/datatypes.py +56 -0
- llama_stack/models/llama/llama4/ffn.py +58 -0
- llama_stack/models/llama/llama4/generation.py +313 -0
- llama_stack/models/llama/llama4/model.py +437 -0
- llama_stack/models/llama/llama4/moe.py +214 -0
- llama_stack/models/llama/llama4/preprocess.py +435 -0
- llama_stack/models/llama/llama4/prompt_format.md +304 -0
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +136 -0
- llama_stack/models/llama/llama4/prompts.py +279 -0
- llama_stack/models/llama/llama4/quantization/__init__.py +5 -0
- llama_stack/models/llama/llama4/quantization/loader.py +226 -0
- llama_stack/models/llama/llama4/tokenizer.model +200000 -0
- llama_stack/models/llama/llama4/tokenizer.py +263 -0
- llama_stack/models/llama/llama4/vision/__init__.py +5 -0
- llama_stack/models/llama/llama4/vision/embedding.py +210 -0
- llama_stack/models/llama/llama4/vision/encoder.py +412 -0
- llama_stack/models/llama/prompt_format.py +191 -0
- llama_stack/models/llama/quantize_impls.py +316 -0
- llama_stack/models/llama/sku_list.py +1029 -0
- llama_stack/models/llama/sku_types.py +233 -0
- llama_stack/models/llama/tokenizer_utils.py +40 -0
- llama_stack/providers/datatypes.py +136 -107
- llama_stack/providers/inline/__init__.py +5 -0
- llama_stack/providers/inline/agents/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/agents/meta_reference}/__init__.py +12 -5
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +1024 -0
- llama_stack/providers/inline/agents/meta_reference/agents.py +383 -0
- llama_stack/providers/inline/agents/meta_reference/config.py +37 -0
- llama_stack/providers/inline/agents/meta_reference/persistence.py +228 -0
- llama_stack/providers/inline/agents/meta_reference/responses/__init__.py +5 -0
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +423 -0
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +1226 -0
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +449 -0
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +194 -0
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +365 -0
- llama_stack/providers/inline/agents/meta_reference/safety.py +52 -0
- llama_stack/providers/inline/batches/__init__.py +5 -0
- llama_stack/providers/inline/batches/reference/__init__.py +36 -0
- llama_stack/providers/inline/batches/reference/batches.py +679 -0
- llama_stack/providers/inline/batches/reference/config.py +40 -0
- llama_stack/providers/inline/datasetio/__init__.py +5 -0
- llama_stack/providers/inline/datasetio/localfs/__init__.py +20 -0
- llama_stack/providers/inline/datasetio/localfs/config.py +23 -0
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +113 -0
- llama_stack/providers/inline/eval/__init__.py +5 -0
- llama_stack/providers/inline/eval/meta_reference/__init__.py +28 -0
- llama_stack/providers/inline/eval/meta_reference/config.py +23 -0
- llama_stack/providers/inline/eval/meta_reference/eval.py +259 -0
- llama_stack/providers/inline/files/localfs/__init__.py +20 -0
- llama_stack/providers/inline/files/localfs/config.py +31 -0
- llama_stack/providers/inline/files/localfs/files.py +219 -0
- llama_stack/providers/inline/inference/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/__init__.py +4 -4
- llama_stack/providers/inline/inference/meta_reference/common.py +24 -0
- llama_stack/providers/inline/inference/meta_reference/config.py +68 -0
- llama_stack/providers/inline/inference/meta_reference/generators.py +211 -0
- llama_stack/providers/inline/inference/meta_reference/inference.py +158 -0
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +96 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/parallel_utils.py +56 -73
- llama_stack/providers/inline/inference/sentence_transformers/__init__.py +22 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/inference/sentence_transformers}/config.py +6 -4
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +83 -0
- llama_stack/providers/inline/post_training/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/utils.py +35 -0
- llama_stack/providers/inline/post_training/common/validator.py +36 -0
- llama_stack/providers/inline/post_training/huggingface/__init__.py +27 -0
- llama_stack/providers/inline/post_training/huggingface/config.py +83 -0
- llama_stack/providers/inline/post_training/huggingface/post_training.py +208 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +519 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +485 -0
- llama_stack/providers/inline/post_training/huggingface/utils.py +269 -0
- llama_stack/providers/inline/post_training/torchtune/__init__.py +27 -0
- llama_stack/providers/inline/post_training/torchtune/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +240 -0
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +99 -0
- llama_stack/providers/inline/post_training/torchtune/config.py +20 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +57 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/sft.py +78 -0
- llama_stack/providers/inline/post_training/torchtune/post_training.py +178 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +588 -0
- llama_stack/providers/inline/safety/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/codeshield → inline/safety/code_scanner}/__init__.py +4 -2
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +128 -0
- llama_stack/providers/{impls/meta_reference/memory → inline/safety/code_scanner}/config.py +5 -3
- llama_stack/providers/inline/safety/llama_guard/__init__.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/config.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +489 -0
- llama_stack/providers/{adapters/memory/sample → inline/safety/prompt_guard}/__init__.py +4 -4
- llama_stack/providers/inline/safety/prompt_guard/config.py +32 -0
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +131 -0
- llama_stack/providers/inline/scoring/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/__init__.py +25 -0
- llama_stack/providers/{adapters/memory/weaviate → inline/scoring/basic}/config.py +5 -7
- llama_stack/providers/inline/scoring/basic/scoring.py +126 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +240 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +41 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +23 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +27 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +71 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +80 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +66 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +58 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +38 -0
- llama_stack/providers/inline/scoring/basic/utils/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py +3319 -0
- llama_stack/providers/inline/scoring/basic/utils/math_utils.py +330 -0
- llama_stack/providers/inline/scoring/braintrust/__init__.py +27 -0
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +230 -0
- llama_stack/providers/inline/scoring/braintrust/config.py +21 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +23 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +24 -0
- llama_stack/providers/inline/scoring/llm_as_judge/__init__.py +21 -0
- llama_stack/providers/inline/scoring/llm_as_judge/config.py +14 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +113 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +96 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +20 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +81 -0
- llama_stack/providers/inline/telemetry/__init__.py +5 -0
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +21 -0
- llama_stack/providers/inline/telemetry/meta_reference/config.py +47 -0
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +252 -0
- llama_stack/providers/inline/tool_runtime/__init__.py +5 -0
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +19 -0
- llama_stack/providers/{impls/meta_reference/telemetry → inline/tool_runtime/rag}/config.py +5 -3
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +77 -0
- llama_stack/providers/inline/tool_runtime/rag/memory.py +332 -0
- llama_stack/providers/inline/vector_io/__init__.py +5 -0
- llama_stack/providers/inline/vector_io/chroma/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/chroma/config.py +30 -0
- llama_stack/providers/inline/vector_io/faiss/__init__.py +21 -0
- llama_stack/providers/inline/vector_io/faiss/config.py +26 -0
- llama_stack/providers/inline/vector_io/faiss/faiss.py +293 -0
- llama_stack/providers/inline/vector_io/milvus/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/milvus/config.py +29 -0
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/qdrant/config.py +29 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/config.py +26 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +483 -0
- llama_stack/providers/registry/agents.py +16 -18
- llama_stack/providers/registry/batches.py +26 -0
- llama_stack/providers/registry/datasetio.py +49 -0
- llama_stack/providers/registry/eval.py +46 -0
- llama_stack/providers/registry/files.py +31 -0
- llama_stack/providers/registry/inference.py +273 -118
- llama_stack/providers/registry/post_training.py +69 -0
- llama_stack/providers/registry/safety.py +46 -41
- llama_stack/providers/registry/scoring.py +51 -0
- llama_stack/providers/registry/tool_runtime.py +87 -0
- llama_stack/providers/registry/vector_io.py +828 -0
- llama_stack/providers/remote/__init__.py +5 -0
- llama_stack/providers/remote/agents/__init__.py +5 -0
- llama_stack/providers/remote/datasetio/__init__.py +5 -0
- llama_stack/providers/{adapters/memory/chroma → remote/datasetio/huggingface}/__init__.py +7 -4
- llama_stack/providers/remote/datasetio/huggingface/config.py +23 -0
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +99 -0
- llama_stack/providers/remote/datasetio/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/datasetio/nvidia/config.py +61 -0
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +116 -0
- llama_stack/providers/remote/eval/__init__.py +5 -0
- llama_stack/providers/remote/eval/nvidia/__init__.py +31 -0
- llama_stack/providers/remote/eval/nvidia/config.py +29 -0
- llama_stack/providers/remote/eval/nvidia/eval.py +162 -0
- llama_stack/providers/remote/files/s3/__init__.py +19 -0
- llama_stack/providers/remote/files/s3/config.py +42 -0
- llama_stack/providers/remote/files/s3/files.py +313 -0
- llama_stack/providers/remote/inference/__init__.py +5 -0
- llama_stack/providers/{adapters/safety/sample → remote/inference/anthropic}/__init__.py +4 -6
- llama_stack/providers/remote/inference/anthropic/anthropic.py +36 -0
- llama_stack/providers/remote/inference/anthropic/config.py +28 -0
- llama_stack/providers/{impls/meta_reference/telemetry → remote/inference/azure}/__init__.py +4 -4
- llama_stack/providers/remote/inference/azure/azure.py +25 -0
- llama_stack/providers/remote/inference/azure/config.py +61 -0
- llama_stack/providers/{adapters → remote}/inference/bedrock/__init__.py +18 -17
- llama_stack/providers/remote/inference/bedrock/bedrock.py +142 -0
- llama_stack/providers/{adapters/inference/sample → remote/inference/bedrock}/config.py +3 -4
- llama_stack/providers/remote/inference/bedrock/models.py +29 -0
- llama_stack/providers/remote/inference/cerebras/__init__.py +19 -0
- llama_stack/providers/remote/inference/cerebras/cerebras.py +28 -0
- llama_stack/providers/remote/inference/cerebras/config.py +30 -0
- llama_stack/providers/{adapters → remote}/inference/databricks/__init__.py +4 -5
- llama_stack/providers/remote/inference/databricks/config.py +37 -0
- llama_stack/providers/remote/inference/databricks/databricks.py +44 -0
- llama_stack/providers/{adapters → remote}/inference/fireworks/__init__.py +8 -4
- llama_stack/providers/remote/inference/fireworks/config.py +27 -0
- llama_stack/providers/remote/inference/fireworks/fireworks.py +27 -0
- llama_stack/providers/{adapters/memory/pgvector → remote/inference/gemini}/__init__.py +4 -4
- llama_stack/providers/remote/inference/gemini/config.py +28 -0
- llama_stack/providers/remote/inference/gemini/gemini.py +82 -0
- llama_stack/providers/remote/inference/groq/__init__.py +15 -0
- llama_stack/providers/remote/inference/groq/config.py +34 -0
- llama_stack/providers/remote/inference/groq/groq.py +18 -0
- llama_stack/providers/remote/inference/llama_openai_compat/__init__.py +15 -0
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +34 -0
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +46 -0
- llama_stack/providers/remote/inference/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/inference/nvidia/config.py +64 -0
- llama_stack/providers/remote/inference/nvidia/nvidia.py +61 -0
- llama_stack/providers/{adapters/safety/sample/config.py → remote/inference/nvidia/utils.py} +3 -4
- llama_stack/providers/{impls/vllm → remote/inference/ollama}/__init__.py +4 -6
- llama_stack/providers/remote/inference/ollama/config.py +25 -0
- llama_stack/providers/remote/inference/ollama/ollama.py +102 -0
- llama_stack/providers/{adapters/telemetry/opentelemetry → remote/inference/openai}/__init__.py +4 -4
- llama_stack/providers/remote/inference/openai/config.py +39 -0
- llama_stack/providers/remote/inference/openai/openai.py +38 -0
- llama_stack/providers/remote/inference/passthrough/__init__.py +23 -0
- llama_stack/providers/remote/inference/passthrough/config.py +34 -0
- llama_stack/providers/remote/inference/passthrough/passthrough.py +122 -0
- llama_stack/providers/remote/inference/runpod/__init__.py +16 -0
- llama_stack/providers/remote/inference/runpod/config.py +32 -0
- llama_stack/providers/remote/inference/runpod/runpod.py +42 -0
- llama_stack/providers/remote/inference/sambanova/__init__.py +16 -0
- llama_stack/providers/remote/inference/sambanova/config.py +34 -0
- llama_stack/providers/remote/inference/sambanova/sambanova.py +28 -0
- llama_stack/providers/{adapters → remote}/inference/tgi/__init__.py +3 -4
- llama_stack/providers/remote/inference/tgi/config.py +76 -0
- llama_stack/providers/remote/inference/tgi/tgi.py +85 -0
- llama_stack/providers/{adapters → remote}/inference/together/__init__.py +8 -4
- llama_stack/providers/remote/inference/together/config.py +27 -0
- llama_stack/providers/remote/inference/together/together.py +102 -0
- llama_stack/providers/remote/inference/vertexai/__init__.py +15 -0
- llama_stack/providers/remote/inference/vertexai/config.py +48 -0
- llama_stack/providers/remote/inference/vertexai/vertexai.py +54 -0
- llama_stack/providers/remote/inference/vllm/__init__.py +22 -0
- llama_stack/providers/remote/inference/vllm/config.py +59 -0
- llama_stack/providers/remote/inference/vllm/vllm.py +111 -0
- llama_stack/providers/remote/inference/watsonx/__init__.py +15 -0
- llama_stack/providers/remote/inference/watsonx/config.py +45 -0
- llama_stack/providers/remote/inference/watsonx/watsonx.py +336 -0
- llama_stack/providers/remote/post_training/__init__.py +5 -0
- llama_stack/providers/remote/post_training/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/post_training/nvidia/config.py +113 -0
- llama_stack/providers/remote/post_training/nvidia/models.py +27 -0
- llama_stack/providers/remote/post_training/nvidia/post_training.py +430 -0
- llama_stack/providers/remote/post_training/nvidia/utils.py +63 -0
- llama_stack/providers/remote/safety/__init__.py +5 -0
- llama_stack/providers/remote/safety/bedrock/bedrock.py +111 -0
- llama_stack/providers/remote/safety/bedrock/config.py +14 -0
- llama_stack/providers/{adapters/inference/sample → remote/safety/nvidia}/__init__.py +5 -4
- llama_stack/providers/remote/safety/nvidia/config.py +40 -0
- llama_stack/providers/remote/safety/nvidia/nvidia.py +161 -0
- llama_stack/providers/{adapters/agents/sample → remote/safety/sambanova}/__init__.py +5 -4
- llama_stack/providers/remote/safety/sambanova/config.py +37 -0
- llama_stack/providers/remote/safety/sambanova/sambanova.py +98 -0
- llama_stack/providers/remote/tool_runtime/__init__.py +5 -0
- llama_stack/providers/remote/tool_runtime/bing_search/__init__.py +21 -0
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +112 -0
- llama_stack/providers/remote/tool_runtime/bing_search/config.py +22 -0
- llama_stack/providers/remote/tool_runtime/brave_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +148 -0
- llama_stack/providers/remote/tool_runtime/brave_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py +15 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +20 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +73 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +84 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py +22 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py +21 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +140 -0
- llama_stack/providers/remote/vector_io/__init__.py +5 -0
- llama_stack/providers/remote/vector_io/chroma/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/chroma/chroma.py +215 -0
- llama_stack/providers/remote/vector_io/chroma/config.py +28 -0
- llama_stack/providers/remote/vector_io/milvus/__init__.py +18 -0
- llama_stack/providers/remote/vector_io/milvus/config.py +35 -0
- llama_stack/providers/remote/vector_io/milvus/milvus.py +375 -0
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/pgvector/config.py +47 -0
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +460 -0
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/qdrant/config.py +37 -0
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +265 -0
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/weaviate/config.py +32 -0
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +393 -0
- llama_stack/providers/utils/bedrock/__init__.py +5 -0
- llama_stack/providers/utils/bedrock/client.py +74 -0
- llama_stack/providers/utils/bedrock/config.py +64 -0
- llama_stack/providers/utils/bedrock/refreshable_boto_session.py +112 -0
- llama_stack/providers/utils/common/__init__.py +5 -0
- llama_stack/providers/utils/common/data_schema_validator.py +103 -0
- llama_stack/providers/utils/datasetio/__init__.py +5 -0
- llama_stack/providers/utils/datasetio/url_utils.py +47 -0
- llama_stack/providers/utils/files/__init__.py +5 -0
- llama_stack/providers/utils/files/form_data.py +69 -0
- llama_stack/providers/utils/inference/__init__.py +8 -7
- llama_stack/providers/utils/inference/embedding_mixin.py +101 -0
- llama_stack/providers/utils/inference/inference_store.py +264 -0
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +336 -0
- llama_stack/providers/utils/inference/model_registry.py +173 -23
- llama_stack/providers/utils/inference/openai_compat.py +1261 -49
- llama_stack/providers/utils/inference/openai_mixin.py +506 -0
- llama_stack/providers/utils/inference/prompt_adapter.py +365 -67
- llama_stack/providers/utils/kvstore/api.py +6 -6
- llama_stack/providers/utils/kvstore/config.py +28 -48
- llama_stack/providers/utils/kvstore/kvstore.py +61 -15
- llama_stack/providers/utils/kvstore/mongodb/__init__.py +9 -0
- llama_stack/providers/utils/kvstore/mongodb/mongodb.py +82 -0
- llama_stack/providers/utils/kvstore/postgres/__init__.py +7 -0
- llama_stack/providers/utils/kvstore/postgres/postgres.py +114 -0
- llama_stack/providers/utils/kvstore/redis/redis.py +33 -9
- llama_stack/providers/utils/kvstore/sqlite/config.py +2 -1
- llama_stack/providers/utils/kvstore/sqlite/sqlite.py +123 -22
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +1304 -0
- llama_stack/providers/utils/memory/vector_store.py +220 -82
- llama_stack/providers/utils/pagination.py +43 -0
- llama_stack/providers/utils/responses/__init__.py +5 -0
- llama_stack/providers/utils/responses/responses_store.py +292 -0
- llama_stack/providers/utils/scheduler.py +270 -0
- llama_stack/providers/utils/scoring/__init__.py +5 -0
- llama_stack/providers/utils/scoring/aggregation_utils.py +75 -0
- llama_stack/providers/utils/scoring/base_scoring_fn.py +114 -0
- llama_stack/providers/utils/scoring/basic_scoring_utils.py +26 -0
- llama_stack/providers/utils/sqlstore/__init__.py +5 -0
- llama_stack/providers/utils/sqlstore/api.py +128 -0
- llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +319 -0
- llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +343 -0
- llama_stack/providers/utils/sqlstore/sqlstore.py +70 -0
- llama_stack/providers/utils/telemetry/trace_protocol.py +142 -0
- llama_stack/providers/utils/telemetry/tracing.py +192 -53
- llama_stack/providers/utils/tools/__init__.py +5 -0
- llama_stack/providers/utils/tools/mcp.py +148 -0
- llama_stack/providers/utils/tools/ttl_dict.py +70 -0
- llama_stack/providers/utils/vector_io/__init__.py +5 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +156 -0
- llama_stack/schema_utils.py +118 -0
- llama_stack/strong_typing/__init__.py +19 -0
- llama_stack/strong_typing/auxiliary.py +228 -0
- llama_stack/strong_typing/classdef.py +440 -0
- llama_stack/strong_typing/core.py +46 -0
- llama_stack/strong_typing/deserializer.py +877 -0
- llama_stack/strong_typing/docstring.py +409 -0
- llama_stack/strong_typing/exception.py +23 -0
- llama_stack/strong_typing/inspection.py +1085 -0
- llama_stack/strong_typing/mapping.py +40 -0
- llama_stack/strong_typing/name.py +182 -0
- llama_stack/strong_typing/py.typed +0 -0
- llama_stack/strong_typing/schema.py +792 -0
- llama_stack/strong_typing/serialization.py +97 -0
- llama_stack/strong_typing/serializer.py +500 -0
- llama_stack/strong_typing/slots.py +27 -0
- llama_stack/strong_typing/topological.py +89 -0
- llama_stack/testing/__init__.py +5 -0
- llama_stack/testing/api_recorder.py +956 -0
- llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
- llama_stack-0.3.4.dist-info/METADATA +261 -0
- llama_stack-0.3.4.dist-info/RECORD +625 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/WHEEL +1 -1
- llama_stack/apis/agents/client.py +0 -292
- llama_stack/apis/agents/event_logger.py +0 -184
- llama_stack/apis/batch_inference/batch_inference.py +0 -72
- llama_stack/apis/common/deployment_types.py +0 -31
- llama_stack/apis/dataset/dataset.py +0 -63
- llama_stack/apis/evals/evals.py +0 -122
- llama_stack/apis/inference/client.py +0 -197
- llama_stack/apis/inspect/client.py +0 -82
- llama_stack/apis/memory/client.py +0 -155
- llama_stack/apis/memory/memory.py +0 -65
- llama_stack/apis/memory_banks/__init__.py +0 -7
- llama_stack/apis/memory_banks/client.py +0 -101
- llama_stack/apis/memory_banks/memory_banks.py +0 -78
- llama_stack/apis/models/client.py +0 -83
- llama_stack/apis/reward_scoring/__init__.py +0 -7
- llama_stack/apis/reward_scoring/reward_scoring.py +0 -55
- llama_stack/apis/safety/client.py +0 -105
- llama_stack/apis/shields/client.py +0 -79
- llama_stack/cli/download.py +0 -340
- llama_stack/cli/model/describe.py +0 -82
- llama_stack/cli/model/download.py +0 -24
- llama_stack/cli/model/list.py +0 -62
- llama_stack/cli/model/model.py +0 -34
- llama_stack/cli/model/prompt_format.py +0 -112
- llama_stack/cli/model/safety_models.py +0 -52
- llama_stack/cli/stack/build.py +0 -299
- llama_stack/cli/stack/configure.py +0 -178
- llama_stack/distribution/build.py +0 -123
- llama_stack/distribution/build_conda_env.sh +0 -136
- llama_stack/distribution/build_container.sh +0 -142
- llama_stack/distribution/common.sh +0 -40
- llama_stack/distribution/configure_container.sh +0 -47
- llama_stack/distribution/datatypes.py +0 -139
- llama_stack/distribution/distribution.py +0 -58
- llama_stack/distribution/inspect.py +0 -67
- llama_stack/distribution/request_headers.py +0 -57
- llama_stack/distribution/resolver.py +0 -323
- llama_stack/distribution/routers/__init__.py +0 -48
- llama_stack/distribution/routers/routers.py +0 -158
- llama_stack/distribution/routers/routing_tables.py +0 -173
- llama_stack/distribution/server/endpoints.py +0 -48
- llama_stack/distribution/server/server.py +0 -343
- llama_stack/distribution/start_conda_env.sh +0 -42
- llama_stack/distribution/start_container.sh +0 -64
- llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml +0 -10
- llama_stack/distribution/templates/local-build.yaml +0 -10
- llama_stack/distribution/templates/local-databricks-build.yaml +0 -10
- llama_stack/distribution/templates/local-fireworks-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-endpoint-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-serverless-build.yaml +0 -10
- llama_stack/distribution/templates/local-ollama-build.yaml +0 -10
- llama_stack/distribution/templates/local-tgi-build.yaml +0 -10
- llama_stack/distribution/templates/local-together-build.yaml +0 -10
- llama_stack/distribution/templates/local-vllm-build.yaml +0 -10
- llama_stack/distribution/utils/exec.py +0 -105
- llama_stack/providers/adapters/agents/sample/sample.py +0 -18
- llama_stack/providers/adapters/inference/bedrock/bedrock.py +0 -451
- llama_stack/providers/adapters/inference/bedrock/config.py +0 -55
- llama_stack/providers/adapters/inference/databricks/config.py +0 -21
- llama_stack/providers/adapters/inference/databricks/databricks.py +0 -125
- llama_stack/providers/adapters/inference/fireworks/config.py +0 -20
- llama_stack/providers/adapters/inference/fireworks/fireworks.py +0 -130
- llama_stack/providers/adapters/inference/ollama/__init__.py +0 -19
- llama_stack/providers/adapters/inference/ollama/ollama.py +0 -175
- llama_stack/providers/adapters/inference/sample/sample.py +0 -23
- llama_stack/providers/adapters/inference/tgi/config.py +0 -43
- llama_stack/providers/adapters/inference/tgi/tgi.py +0 -200
- llama_stack/providers/adapters/inference/together/config.py +0 -22
- llama_stack/providers/adapters/inference/together/together.py +0 -143
- llama_stack/providers/adapters/memory/chroma/chroma.py +0 -157
- llama_stack/providers/adapters/memory/pgvector/config.py +0 -17
- llama_stack/providers/adapters/memory/pgvector/pgvector.py +0 -211
- llama_stack/providers/adapters/memory/sample/sample.py +0 -23
- llama_stack/providers/adapters/memory/weaviate/__init__.py +0 -15
- llama_stack/providers/adapters/memory/weaviate/weaviate.py +0 -190
- llama_stack/providers/adapters/safety/bedrock/bedrock.py +0 -113
- llama_stack/providers/adapters/safety/bedrock/config.py +0 -16
- llama_stack/providers/adapters/safety/sample/sample.py +0 -23
- llama_stack/providers/adapters/safety/together/__init__.py +0 -18
- llama_stack/providers/adapters/safety/together/config.py +0 -26
- llama_stack/providers/adapters/safety/together/together.py +0 -101
- llama_stack/providers/adapters/telemetry/opentelemetry/config.py +0 -12
- llama_stack/providers/adapters/telemetry/opentelemetry/opentelemetry.py +0 -201
- llama_stack/providers/adapters/telemetry/sample/__init__.py +0 -17
- llama_stack/providers/adapters/telemetry/sample/config.py +0 -12
- llama_stack/providers/adapters/telemetry/sample/sample.py +0 -18
- llama_stack/providers/impls/meta_reference/agents/agent_instance.py +0 -844
- llama_stack/providers/impls/meta_reference/agents/agents.py +0 -161
- llama_stack/providers/impls/meta_reference/agents/persistence.py +0 -84
- llama_stack/providers/impls/meta_reference/agents/rag/context_retriever.py +0 -74
- llama_stack/providers/impls/meta_reference/agents/safety.py +0 -57
- llama_stack/providers/impls/meta_reference/agents/tests/code_execution.py +0 -93
- llama_stack/providers/impls/meta_reference/agents/tests/test_chat_agent.py +0 -305
- llama_stack/providers/impls/meta_reference/agents/tools/base.py +0 -20
- llama_stack/providers/impls/meta_reference/agents/tools/builtin.py +0 -375
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_env_prefix.py +0 -133
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_execution.py +0 -256
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py +0 -87
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/utils.py +0 -21
- llama_stack/providers/impls/meta_reference/agents/tools/safety.py +0 -43
- llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py +0 -58
- llama_stack/providers/impls/meta_reference/inference/config.py +0 -45
- llama_stack/providers/impls/meta_reference/inference/generation.py +0 -376
- llama_stack/providers/impls/meta_reference/inference/inference.py +0 -280
- llama_stack/providers/impls/meta_reference/inference/model_parallel.py +0 -99
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_impls.py +0 -184
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_txest_disabled.py +0 -76
- llama_stack/providers/impls/meta_reference/inference/quantization/loader.py +0 -97
- llama_stack/providers/impls/meta_reference/inference/quantization/scripts/quantize_checkpoint.py +0 -161
- llama_stack/providers/impls/meta_reference/memory/__init__.py +0 -19
- llama_stack/providers/impls/meta_reference/memory/faiss.py +0 -113
- llama_stack/providers/impls/meta_reference/safety/__init__.py +0 -17
- llama_stack/providers/impls/meta_reference/safety/base.py +0 -57
- llama_stack/providers/impls/meta_reference/safety/config.py +0 -48
- llama_stack/providers/impls/meta_reference/safety/llama_guard.py +0 -268
- llama_stack/providers/impls/meta_reference/safety/prompt_guard.py +0 -145
- llama_stack/providers/impls/meta_reference/safety/safety.py +0 -112
- llama_stack/providers/impls/meta_reference/telemetry/console.py +0 -89
- llama_stack/providers/impls/vllm/config.py +0 -35
- llama_stack/providers/impls/vllm/vllm.py +0 -241
- llama_stack/providers/registry/memory.py +0 -78
- llama_stack/providers/registry/telemetry.py +0 -44
- llama_stack/providers/tests/agents/test_agents.py +0 -210
- llama_stack/providers/tests/inference/test_inference.py +0 -257
- llama_stack/providers/tests/inference/test_prompt_adapter.py +0 -126
- llama_stack/providers/tests/memory/test_memory.py +0 -136
- llama_stack/providers/tests/resolver.py +0 -100
- llama_stack/providers/tests/safety/test_safety.py +0 -77
- llama_stack-0.0.42.dist-info/METADATA +0 -137
- llama_stack-0.0.42.dist-info/RECORD +0 -256
- /llama_stack/{distribution → core}/__init__.py +0 -0
- /llama_stack/{distribution/server → core/access_control}/__init__.py +0 -0
- /llama_stack/{distribution/utils → core/conversations}/__init__.py +0 -0
- /llama_stack/{providers/adapters → core/prompts}/__init__.py +0 -0
- /llama_stack/{providers/adapters/agents → core/routing_tables}/__init__.py +0 -0
- /llama_stack/{providers/adapters/inference → core/server}/__init__.py +0 -0
- /llama_stack/{providers/adapters/memory → core/storage}/__init__.py +0 -0
- /llama_stack/{providers/adapters/safety → core/ui}/__init__.py +0 -0
- /llama_stack/{providers/adapters/telemetry → core/ui/modules}/__init__.py +0 -0
- /llama_stack/{providers/impls → core/ui/page}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference → core/ui/page/distribution}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/rag → core/ui/page/evaluations}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tests → core/ui/page/playground}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools → core/utils}/__init__.py +0 -0
- /llama_stack/{distribution → core}/utils/dynamic.py +0 -0
- /llama_stack/{distribution → core}/utils/serialize.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools/ipython_tool → distributions}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization → models}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization/scripts → models/llama}/__init__.py +0 -0
- /llama_stack/{providers/tests → models/llama/llama3}/__init__.py +0 -0
- /llama_stack/{providers/tests/agents → models/llama/llama3/quantization}/__init__.py +0 -0
- /llama_stack/{providers/tests/inference → models/llama/llama3_2}/__init__.py +0 -0
- /llama_stack/{providers/tests/memory → models/llama/llama3_3}/__init__.py +0 -0
- /llama_stack/{providers/tests/safety → models/llama/llama4}/__init__.py +0 -0
- /llama_stack/{scripts → models/llama/llama4/prompt_templates}/__init__.py +0 -0
- /llama_stack/providers/{adapters → remote}/safety/bedrock/__init__.py +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info/licenses}/LICENSE +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,483 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import re
|
|
9
|
+
import sqlite3
|
|
10
|
+
import struct
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
import sqlite_vec
|
|
15
|
+
from numpy.typing import NDArray
|
|
16
|
+
|
|
17
|
+
from llama_stack.apis.common.errors import VectorStoreNotFoundError
|
|
18
|
+
from llama_stack.apis.files import Files
|
|
19
|
+
from llama_stack.apis.inference import Inference
|
|
20
|
+
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
|
|
21
|
+
from llama_stack.apis.vector_stores import VectorStore
|
|
22
|
+
from llama_stack.log import get_logger
|
|
23
|
+
from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
|
|
24
|
+
from llama_stack.providers.utils.kvstore import kvstore_impl
|
|
25
|
+
from llama_stack.providers.utils.kvstore.api import KVStore
|
|
26
|
+
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
|
27
|
+
from llama_stack.providers.utils.memory.vector_store import (
|
|
28
|
+
RERANKER_TYPE_RRF,
|
|
29
|
+
ChunkForDeletion,
|
|
30
|
+
EmbeddingIndex,
|
|
31
|
+
VectorStoreWithIndex,
|
|
32
|
+
)
|
|
33
|
+
from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator
|
|
34
|
+
|
|
35
|
+
logger = get_logger(name=__name__, category="vector_io")
|
|
36
|
+
|
|
37
|
+
# Specifying search mode is dependent on the VectorIO provider.
|
|
38
|
+
VECTOR_SEARCH = "vector"
|
|
39
|
+
KEYWORD_SEARCH = "keyword"
|
|
40
|
+
HYBRID_SEARCH = "hybrid"
|
|
41
|
+
SEARCH_MODES = {VECTOR_SEARCH, KEYWORD_SEARCH, HYBRID_SEARCH}
|
|
42
|
+
|
|
43
|
+
VERSION = "v3"
|
|
44
|
+
VECTOR_DBS_PREFIX = f"vector_stores:sqlite_vec:{VERSION}::"
|
|
45
|
+
VECTOR_INDEX_PREFIX = f"vector_index:sqlite_vec:{VERSION}::"
|
|
46
|
+
OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:sqlite_vec:{VERSION}::"
|
|
47
|
+
OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:sqlite_vec:{VERSION}::"
|
|
48
|
+
OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX = f"openai_vector_stores_files_contents:sqlite_vec:{VERSION}::"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def serialize_vector(vector: list[float]) -> bytes:
|
|
52
|
+
"""Serialize a list of floats into a compact binary representation."""
|
|
53
|
+
return struct.pack(f"{len(vector)}f", *vector)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _create_sqlite_connection(db_path):
|
|
57
|
+
"""Create a SQLite connection with sqlite_vec extension loaded."""
|
|
58
|
+
connection = sqlite3.connect(db_path)
|
|
59
|
+
connection.enable_load_extension(True)
|
|
60
|
+
sqlite_vec.load(connection)
|
|
61
|
+
connection.enable_load_extension(False)
|
|
62
|
+
return connection
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _make_sql_identifier(name: str) -> str:
|
|
66
|
+
return re.sub(r"[^a-zA-Z0-9_]", "_", name)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class SQLiteVecIndex(EmbeddingIndex):
|
|
70
|
+
"""
|
|
71
|
+
An index implementation that stores embeddings in a SQLite virtual table using sqlite-vec.
|
|
72
|
+
Two tables are used:
|
|
73
|
+
- A metadata table (chunks_{bank_id}) that holds the chunk JSON.
|
|
74
|
+
- A virtual table (vec_chunks_{bank_id}) that holds the serialized vector.
|
|
75
|
+
- An FTS5 table (fts_chunks_{bank_id}) for full-text keyword search.
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
def __init__(self, dimension: int, db_path: str, bank_id: str, kvstore: KVStore | None = None):
|
|
79
|
+
self.dimension = dimension
|
|
80
|
+
self.db_path = db_path
|
|
81
|
+
self.bank_id = bank_id
|
|
82
|
+
self.metadata_table = _make_sql_identifier(f"chunks_{bank_id}")
|
|
83
|
+
self.vector_table = _make_sql_identifier(f"vec_chunks_{bank_id}")
|
|
84
|
+
self.fts_table = _make_sql_identifier(f"fts_chunks_{bank_id}")
|
|
85
|
+
self.kvstore = kvstore
|
|
86
|
+
|
|
87
|
+
@classmethod
|
|
88
|
+
async def create(cls, dimension: int, db_path: str, bank_id: str):
|
|
89
|
+
instance = cls(dimension, db_path, bank_id)
|
|
90
|
+
await instance.initialize()
|
|
91
|
+
return instance
|
|
92
|
+
|
|
93
|
+
async def initialize(self) -> None:
|
|
94
|
+
def _init_tables():
|
|
95
|
+
connection = _create_sqlite_connection(self.db_path)
|
|
96
|
+
cur = connection.cursor()
|
|
97
|
+
try:
|
|
98
|
+
# Create the table to store chunk metadata.
|
|
99
|
+
cur.execute(f"""
|
|
100
|
+
CREATE TABLE IF NOT EXISTS [{self.metadata_table}] (
|
|
101
|
+
id TEXT PRIMARY KEY,
|
|
102
|
+
chunk TEXT
|
|
103
|
+
);
|
|
104
|
+
""")
|
|
105
|
+
# Create the virtual table for embeddings.
|
|
106
|
+
cur.execute(f"""
|
|
107
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS [{self.vector_table}]
|
|
108
|
+
USING vec0(embedding FLOAT[{self.dimension}], id TEXT);
|
|
109
|
+
""")
|
|
110
|
+
connection.commit()
|
|
111
|
+
# FTS5 table (for keyword search) - creating both the tables by default. Will use the relevant one
|
|
112
|
+
# based on query. Implementation of the change on client side will allow passing the search_mode option
|
|
113
|
+
# during initialization to make it easier to create the table that is required.
|
|
114
|
+
cur.execute(f"""
|
|
115
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS [{self.fts_table}]
|
|
116
|
+
USING fts5(id, content);
|
|
117
|
+
""")
|
|
118
|
+
connection.commit()
|
|
119
|
+
finally:
|
|
120
|
+
cur.close()
|
|
121
|
+
connection.close()
|
|
122
|
+
|
|
123
|
+
await asyncio.to_thread(_init_tables)
|
|
124
|
+
|
|
125
|
+
async def delete(self) -> None:
|
|
126
|
+
def _drop_tables():
|
|
127
|
+
connection = _create_sqlite_connection(self.db_path)
|
|
128
|
+
cur = connection.cursor()
|
|
129
|
+
try:
|
|
130
|
+
cur.execute(f"DROP TABLE IF EXISTS [{self.metadata_table}];")
|
|
131
|
+
cur.execute(f"DROP TABLE IF EXISTS [{self.vector_table}];")
|
|
132
|
+
cur.execute(f"DROP TABLE IF EXISTS [{self.fts_table}];")
|
|
133
|
+
connection.commit()
|
|
134
|
+
finally:
|
|
135
|
+
cur.close()
|
|
136
|
+
connection.close()
|
|
137
|
+
|
|
138
|
+
await asyncio.to_thread(_drop_tables)
|
|
139
|
+
|
|
140
|
+
async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray, batch_size: int = 500):
|
|
141
|
+
"""
|
|
142
|
+
Add new chunks along with their embeddings using batch inserts.
|
|
143
|
+
For each chunk, we insert its JSON into the metadata table and then insert its
|
|
144
|
+
embedding (serialized to raw bytes) into the virtual table using the assigned rowid.
|
|
145
|
+
If any insert fails, the transaction is rolled back to maintain consistency.
|
|
146
|
+
Also inserts chunk content into FTS table for keyword search support.
|
|
147
|
+
"""
|
|
148
|
+
assert all(isinstance(chunk.content, str) for chunk in chunks), "SQLiteVecIndex only supports text chunks"
|
|
149
|
+
|
|
150
|
+
def _execute_all_batch_inserts():
|
|
151
|
+
connection = _create_sqlite_connection(self.db_path)
|
|
152
|
+
cur = connection.cursor()
|
|
153
|
+
|
|
154
|
+
try:
|
|
155
|
+
cur.execute("BEGIN TRANSACTION")
|
|
156
|
+
for i in range(0, len(chunks), batch_size):
|
|
157
|
+
batch_chunks = chunks[i : i + batch_size]
|
|
158
|
+
batch_embeddings = embeddings[i : i + batch_size]
|
|
159
|
+
|
|
160
|
+
# Insert metadata
|
|
161
|
+
metadata_data = [(chunk.chunk_id, chunk.model_dump_json()) for chunk in batch_chunks]
|
|
162
|
+
cur.executemany(
|
|
163
|
+
f"""
|
|
164
|
+
INSERT INTO [{self.metadata_table}] (id, chunk)
|
|
165
|
+
VALUES (?, ?)
|
|
166
|
+
ON CONFLICT(id) DO UPDATE SET chunk = excluded.chunk;
|
|
167
|
+
""",
|
|
168
|
+
metadata_data,
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
# Insert vector embeddings
|
|
172
|
+
embedding_data = [
|
|
173
|
+
((chunk.chunk_id, serialize_vector(emb.tolist())))
|
|
174
|
+
for chunk, emb in zip(batch_chunks, batch_embeddings, strict=True)
|
|
175
|
+
]
|
|
176
|
+
cur.executemany(f"INSERT INTO [{self.vector_table}] (id, embedding) VALUES (?, ?);", embedding_data)
|
|
177
|
+
|
|
178
|
+
# Insert FTS content
|
|
179
|
+
fts_data = [(chunk.chunk_id, chunk.content) for chunk in batch_chunks]
|
|
180
|
+
# DELETE existing entries with same IDs (FTS5 doesn't support ON CONFLICT)
|
|
181
|
+
cur.executemany(f"DELETE FROM [{self.fts_table}] WHERE id = ?;", [(row[0],) for row in fts_data])
|
|
182
|
+
|
|
183
|
+
# INSERT new entries
|
|
184
|
+
cur.executemany(f"INSERT INTO [{self.fts_table}] (id, content) VALUES (?, ?);", fts_data)
|
|
185
|
+
|
|
186
|
+
connection.commit()
|
|
187
|
+
|
|
188
|
+
except sqlite3.Error as e:
|
|
189
|
+
connection.rollback()
|
|
190
|
+
logger.error(f"Error inserting into {self.vector_table}: {e}")
|
|
191
|
+
raise
|
|
192
|
+
|
|
193
|
+
finally:
|
|
194
|
+
cur.close()
|
|
195
|
+
connection.close()
|
|
196
|
+
|
|
197
|
+
# Run batch insertion in a background thread
|
|
198
|
+
await asyncio.to_thread(_execute_all_batch_inserts)
|
|
199
|
+
|
|
200
|
+
async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
|
|
201
|
+
"""
|
|
202
|
+
Performs vector-based search using a virtual table for vector similarity.
|
|
203
|
+
"""
|
|
204
|
+
|
|
205
|
+
def _execute_query():
|
|
206
|
+
connection = _create_sqlite_connection(self.db_path)
|
|
207
|
+
cur = connection.cursor()
|
|
208
|
+
try:
|
|
209
|
+
emb_list = embedding.tolist() if isinstance(embedding, np.ndarray) else list(embedding)
|
|
210
|
+
emb_blob = serialize_vector(emb_list)
|
|
211
|
+
query_sql = f"""
|
|
212
|
+
SELECT m.id, m.chunk, v.distance
|
|
213
|
+
FROM [{self.vector_table}] AS v
|
|
214
|
+
JOIN [{self.metadata_table}] AS m ON m.id = v.id
|
|
215
|
+
WHERE v.embedding MATCH ? AND k = ?
|
|
216
|
+
ORDER BY v.distance;
|
|
217
|
+
"""
|
|
218
|
+
cur.execute(query_sql, (emb_blob, k))
|
|
219
|
+
return cur.fetchall()
|
|
220
|
+
finally:
|
|
221
|
+
cur.close()
|
|
222
|
+
connection.close()
|
|
223
|
+
|
|
224
|
+
rows = await asyncio.to_thread(_execute_query)
|
|
225
|
+
chunks, scores = [], []
|
|
226
|
+
for row in rows:
|
|
227
|
+
_id, chunk_json, distance = row
|
|
228
|
+
score = 1.0 / distance if distance != 0 else float("inf")
|
|
229
|
+
if score < score_threshold:
|
|
230
|
+
continue
|
|
231
|
+
try:
|
|
232
|
+
chunk = Chunk.model_validate_json(chunk_json)
|
|
233
|
+
except Exception as e:
|
|
234
|
+
logger.error(f"Error parsing chunk JSON for id {_id}: {e}")
|
|
235
|
+
continue
|
|
236
|
+
chunks.append(chunk)
|
|
237
|
+
scores.append(score)
|
|
238
|
+
return QueryChunksResponse(chunks=chunks, scores=scores)
|
|
239
|
+
|
|
240
|
+
async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse:
|
|
241
|
+
"""
|
|
242
|
+
Performs keyword-based search using SQLite FTS5 for relevance-ranked full-text search.
|
|
243
|
+
"""
|
|
244
|
+
|
|
245
|
+
def _execute_query():
|
|
246
|
+
connection = _create_sqlite_connection(self.db_path)
|
|
247
|
+
cur = connection.cursor()
|
|
248
|
+
try:
|
|
249
|
+
query_sql = f"""
|
|
250
|
+
SELECT DISTINCT m.id, m.chunk, bm25([{self.fts_table}]) AS score
|
|
251
|
+
FROM [{self.fts_table}] AS f
|
|
252
|
+
JOIN [{self.metadata_table}] AS m ON m.id = f.id
|
|
253
|
+
WHERE f.content MATCH ?
|
|
254
|
+
ORDER BY score ASC
|
|
255
|
+
LIMIT ?;
|
|
256
|
+
"""
|
|
257
|
+
cur.execute(query_sql, (query_string, k))
|
|
258
|
+
return cur.fetchall()
|
|
259
|
+
finally:
|
|
260
|
+
cur.close()
|
|
261
|
+
connection.close()
|
|
262
|
+
|
|
263
|
+
rows = await asyncio.to_thread(_execute_query)
|
|
264
|
+
chunks, scores = [], []
|
|
265
|
+
for row in rows:
|
|
266
|
+
_id, chunk_json, score = row
|
|
267
|
+
# BM25 scores returned by sqlite-vec are NEGATED (i.e., more relevant = more negative).
|
|
268
|
+
# This design is intentional to simplify sorting by ascending score.
|
|
269
|
+
# Reference: https://alexgarcia.xyz/blog/2024/sqlite-vec-hybrid-search/index.html
|
|
270
|
+
if score > -score_threshold:
|
|
271
|
+
continue
|
|
272
|
+
try:
|
|
273
|
+
chunk = Chunk.model_validate_json(chunk_json)
|
|
274
|
+
except Exception as e:
|
|
275
|
+
logger.error(f"Error parsing chunk JSON for id {_id}: {e}")
|
|
276
|
+
continue
|
|
277
|
+
chunks.append(chunk)
|
|
278
|
+
scores.append(score)
|
|
279
|
+
return QueryChunksResponse(chunks=chunks, scores=scores)
|
|
280
|
+
|
|
281
|
+
async def query_hybrid(
|
|
282
|
+
self,
|
|
283
|
+
embedding: NDArray,
|
|
284
|
+
query_string: str,
|
|
285
|
+
k: int,
|
|
286
|
+
score_threshold: float,
|
|
287
|
+
reranker_type: str = RERANKER_TYPE_RRF,
|
|
288
|
+
reranker_params: dict[str, Any] | None = None,
|
|
289
|
+
) -> QueryChunksResponse:
|
|
290
|
+
"""
|
|
291
|
+
Hybrid search using a configurable re-ranking strategy.
|
|
292
|
+
|
|
293
|
+
Args:
|
|
294
|
+
embedding: The query embedding vector
|
|
295
|
+
query_string: The text query for keyword search
|
|
296
|
+
k: Number of results to return
|
|
297
|
+
score_threshold: Minimum similarity score threshold
|
|
298
|
+
reranker_type: Type of reranker to use ("rrf" or "weighted")
|
|
299
|
+
reranker_params: Parameters for the reranker
|
|
300
|
+
|
|
301
|
+
Returns:
|
|
302
|
+
QueryChunksResponse with combined results
|
|
303
|
+
"""
|
|
304
|
+
if reranker_params is None:
|
|
305
|
+
reranker_params = {}
|
|
306
|
+
|
|
307
|
+
# Get results from both search methods
|
|
308
|
+
vector_response = await self.query_vector(embedding, k, score_threshold)
|
|
309
|
+
keyword_response = await self.query_keyword(query_string, k, score_threshold)
|
|
310
|
+
|
|
311
|
+
# Convert responses to score dictionaries using chunk_id
|
|
312
|
+
vector_scores = {
|
|
313
|
+
chunk.chunk_id: score for chunk, score in zip(vector_response.chunks, vector_response.scores, strict=False)
|
|
314
|
+
}
|
|
315
|
+
keyword_scores = {
|
|
316
|
+
chunk.chunk_id: score
|
|
317
|
+
for chunk, score in zip(keyword_response.chunks, keyword_response.scores, strict=False)
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
# Combine scores using the reranking utility
|
|
321
|
+
combined_scores = WeightedInMemoryAggregator.combine_search_results(
|
|
322
|
+
vector_scores, keyword_scores, reranker_type, reranker_params
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
# Sort by combined score and get top k results
|
|
326
|
+
sorted_items = sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)
|
|
327
|
+
top_k_items = sorted_items[:k]
|
|
328
|
+
|
|
329
|
+
# Filter by score threshold
|
|
330
|
+
filtered_items = [(doc_id, score) for doc_id, score in top_k_items if score >= score_threshold]
|
|
331
|
+
|
|
332
|
+
# Create a map of chunk_id to chunk for both responses
|
|
333
|
+
chunk_map = {c.chunk_id: c for c in vector_response.chunks + keyword_response.chunks}
|
|
334
|
+
|
|
335
|
+
# Use the map to look up chunks by their IDs
|
|
336
|
+
chunks = []
|
|
337
|
+
scores = []
|
|
338
|
+
for doc_id, score in filtered_items:
|
|
339
|
+
if doc_id in chunk_map:
|
|
340
|
+
chunks.append(chunk_map[doc_id])
|
|
341
|
+
scores.append(score)
|
|
342
|
+
|
|
343
|
+
return QueryChunksResponse(chunks=chunks, scores=scores)
|
|
344
|
+
|
|
345
|
+
async def delete_chunks(self, chunks_for_deletion: list[ChunkForDeletion]) -> None:
|
|
346
|
+
"""Remove a chunk from the SQLite vector store."""
|
|
347
|
+
chunk_ids = [c.chunk_id for c in chunks_for_deletion]
|
|
348
|
+
|
|
349
|
+
def _delete_chunks():
|
|
350
|
+
connection = _create_sqlite_connection(self.db_path)
|
|
351
|
+
cur = connection.cursor()
|
|
352
|
+
try:
|
|
353
|
+
cur.execute("BEGIN TRANSACTION")
|
|
354
|
+
|
|
355
|
+
# Delete from metadata table
|
|
356
|
+
placeholders = ",".join("?" * len(chunk_ids))
|
|
357
|
+
cur.execute(f"DELETE FROM {self.metadata_table} WHERE id IN ({placeholders})", chunk_ids)
|
|
358
|
+
|
|
359
|
+
# Delete from vector table
|
|
360
|
+
cur.execute(f"DELETE FROM {self.vector_table} WHERE id IN ({placeholders})", chunk_ids)
|
|
361
|
+
|
|
362
|
+
# Delete from FTS table
|
|
363
|
+
cur.execute(f"DELETE FROM {self.fts_table} WHERE id IN ({placeholders})", chunk_ids)
|
|
364
|
+
|
|
365
|
+
connection.commit()
|
|
366
|
+
except Exception as e:
|
|
367
|
+
connection.rollback()
|
|
368
|
+
logger.error(f"Error deleting chunks: {e}")
|
|
369
|
+
raise
|
|
370
|
+
finally:
|
|
371
|
+
cur.close()
|
|
372
|
+
connection.close()
|
|
373
|
+
|
|
374
|
+
await asyncio.to_thread(_delete_chunks)
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
|
|
378
|
+
"""
|
|
379
|
+
A VectorIO implementation using SQLite + sqlite_vec.
|
|
380
|
+
This class handles vector database registration (with metadata stored in a table named `vector_stores`)
|
|
381
|
+
and creates a cache of VectorStoreWithIndex instances (each wrapping a SQLiteVecIndex).
|
|
382
|
+
"""
|
|
383
|
+
|
|
384
|
+
def __init__(self, config, inference_api: Inference, files_api: Files | None) -> None:
|
|
385
|
+
super().__init__(files_api=files_api, kvstore=None)
|
|
386
|
+
self.config = config
|
|
387
|
+
self.inference_api = inference_api
|
|
388
|
+
self.cache: dict[str, VectorStoreWithIndex] = {}
|
|
389
|
+
self.vector_store_table = None
|
|
390
|
+
|
|
391
|
+
async def initialize(self) -> None:
|
|
392
|
+
self.kvstore = await kvstore_impl(self.config.persistence)
|
|
393
|
+
|
|
394
|
+
start_key = VECTOR_DBS_PREFIX
|
|
395
|
+
end_key = f"{VECTOR_DBS_PREFIX}\xff"
|
|
396
|
+
stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key)
|
|
397
|
+
for db_json in stored_vector_stores:
|
|
398
|
+
vector_store = VectorStore.model_validate_json(db_json)
|
|
399
|
+
index = await SQLiteVecIndex.create(
|
|
400
|
+
vector_store.embedding_dimension, self.config.db_path, vector_store.identifier
|
|
401
|
+
)
|
|
402
|
+
self.cache[vector_store.identifier] = VectorStoreWithIndex(vector_store, index, self.inference_api)
|
|
403
|
+
|
|
404
|
+
# Load existing OpenAI vector stores into the in-memory cache
|
|
405
|
+
await self.initialize_openai_vector_stores()
|
|
406
|
+
|
|
407
|
+
async def shutdown(self) -> None:
|
|
408
|
+
# Clean up mixin resources (file batch tasks)
|
|
409
|
+
await super().shutdown()
|
|
410
|
+
|
|
411
|
+
async def list_vector_stores(self) -> list[VectorStore]:
|
|
412
|
+
return [v.vector_store for v in self.cache.values()]
|
|
413
|
+
|
|
414
|
+
async def register_vector_store(self, vector_store: VectorStore) -> None:
|
|
415
|
+
if self.kvstore is None:
|
|
416
|
+
raise RuntimeError("KVStore not initialized. Call initialize() before registering vector stores.")
|
|
417
|
+
|
|
418
|
+
# Save to kvstore for persistence
|
|
419
|
+
key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
|
|
420
|
+
await self.kvstore.set(key=key, value=vector_store.model_dump_json())
|
|
421
|
+
|
|
422
|
+
# Create and cache the index
|
|
423
|
+
index = await SQLiteVecIndex.create(
|
|
424
|
+
vector_store.embedding_dimension, self.config.db_path, vector_store.identifier
|
|
425
|
+
)
|
|
426
|
+
self.cache[vector_store.identifier] = VectorStoreWithIndex(vector_store, index, self.inference_api)
|
|
427
|
+
|
|
428
|
+
async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
|
|
429
|
+
if vector_store_id in self.cache:
|
|
430
|
+
return self.cache[vector_store_id]
|
|
431
|
+
|
|
432
|
+
# Try to load from kvstore
|
|
433
|
+
if self.kvstore is None:
|
|
434
|
+
raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
|
|
435
|
+
|
|
436
|
+
key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
|
|
437
|
+
vector_store_data = await self.kvstore.get(key)
|
|
438
|
+
if not vector_store_data:
|
|
439
|
+
raise VectorStoreNotFoundError(vector_store_id)
|
|
440
|
+
|
|
441
|
+
vector_store = VectorStore.model_validate_json(vector_store_data)
|
|
442
|
+
index = VectorStoreWithIndex(
|
|
443
|
+
vector_store=vector_store,
|
|
444
|
+
index=SQLiteVecIndex(
|
|
445
|
+
dimension=vector_store.embedding_dimension,
|
|
446
|
+
db_path=self.config.db_path,
|
|
447
|
+
bank_id=vector_store.identifier,
|
|
448
|
+
kvstore=self.kvstore,
|
|
449
|
+
),
|
|
450
|
+
inference_api=self.inference_api,
|
|
451
|
+
)
|
|
452
|
+
self.cache[vector_store_id] = index
|
|
453
|
+
return index
|
|
454
|
+
|
|
455
|
+
async def unregister_vector_store(self, vector_store_id: str) -> None:
|
|
456
|
+
if vector_store_id not in self.cache:
|
|
457
|
+
return
|
|
458
|
+
await self.cache[vector_store_id].index.delete()
|
|
459
|
+
del self.cache[vector_store_id]
|
|
460
|
+
|
|
461
|
+
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
|
|
462
|
+
index = await self._get_and_cache_vector_store_index(vector_db_id)
|
|
463
|
+
if not index:
|
|
464
|
+
raise VectorStoreNotFoundError(vector_db_id)
|
|
465
|
+
# The VectorStoreWithIndex helper is expected to compute embeddings via the inference_api
|
|
466
|
+
# and then call our index's add_chunks.
|
|
467
|
+
await index.insert_chunks(chunks)
|
|
468
|
+
|
|
469
|
+
async def query_chunks(
|
|
470
|
+
self, vector_db_id: str, query: Any, params: dict[str, Any] | None = None
|
|
471
|
+
) -> QueryChunksResponse:
|
|
472
|
+
index = await self._get_and_cache_vector_store_index(vector_db_id)
|
|
473
|
+
if not index:
|
|
474
|
+
raise VectorStoreNotFoundError(vector_db_id)
|
|
475
|
+
return await index.query_chunks(query, params)
|
|
476
|
+
|
|
477
|
+
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
|
|
478
|
+
"""Delete chunks from a sqlite_vec index."""
|
|
479
|
+
index = await self._get_and_cache_vector_store_index(store_id)
|
|
480
|
+
if not index:
|
|
481
|
+
raise VectorStoreNotFoundError(store_id)
|
|
482
|
+
|
|
483
|
+
await index.index.delete_chunks(chunks_for_deletion)
|
|
@@ -4,40 +4,38 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
from typing import List
|
|
8
7
|
|
|
9
|
-
from llama_stack.
|
|
8
|
+
from llama_stack.providers.datatypes import (
|
|
9
|
+
Api,
|
|
10
|
+
InlineProviderSpec,
|
|
11
|
+
ProviderSpec,
|
|
12
|
+
)
|
|
10
13
|
from llama_stack.providers.utils.kvstore import kvstore_dependencies
|
|
11
14
|
|
|
12
15
|
|
|
13
|
-
def available_providers() ->
|
|
16
|
+
def available_providers() -> list[ProviderSpec]:
|
|
14
17
|
return [
|
|
15
18
|
InlineProviderSpec(
|
|
16
19
|
api=Api.agents,
|
|
17
|
-
provider_type="meta-reference",
|
|
20
|
+
provider_type="inline::meta-reference",
|
|
18
21
|
pip_packages=[
|
|
19
22
|
"matplotlib",
|
|
20
23
|
"pillow",
|
|
21
24
|
"pandas",
|
|
22
25
|
"scikit-learn",
|
|
26
|
+
"mcp>=1.23.0",
|
|
23
27
|
]
|
|
24
|
-
+ kvstore_dependencies(),
|
|
25
|
-
module="llama_stack.providers.
|
|
26
|
-
config_class="llama_stack.providers.
|
|
28
|
+
+ kvstore_dependencies(), # TODO make this dynamic based on the kvstore config
|
|
29
|
+
module="llama_stack.providers.inline.agents.meta_reference",
|
|
30
|
+
config_class="llama_stack.providers.inline.agents.meta_reference.MetaReferenceAgentsImplConfig",
|
|
27
31
|
api_dependencies=[
|
|
28
32
|
Api.inference,
|
|
29
33
|
Api.safety,
|
|
30
|
-
Api.
|
|
31
|
-
Api.
|
|
34
|
+
Api.vector_io,
|
|
35
|
+
Api.tool_runtime,
|
|
36
|
+
Api.tool_groups,
|
|
37
|
+
Api.conversations,
|
|
32
38
|
],
|
|
33
|
-
|
|
34
|
-
remote_provider_spec(
|
|
35
|
-
api=Api.agents,
|
|
36
|
-
adapter=AdapterSpec(
|
|
37
|
-
adapter_type="sample",
|
|
38
|
-
pip_packages=[],
|
|
39
|
-
module="llama_stack.providers.adapters.agents.sample",
|
|
40
|
-
config_class="llama_stack.providers.adapters.agents.sample.SampleConfig",
|
|
41
|
-
),
|
|
39
|
+
description="Meta's reference implementation of an agent system that can use tools, access vector databases, and perform complex reasoning tasks.",
|
|
42
40
|
),
|
|
43
41
|
]
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def available_providers() -> list[ProviderSpec]:
|
|
12
|
+
return [
|
|
13
|
+
InlineProviderSpec(
|
|
14
|
+
api=Api.batches,
|
|
15
|
+
provider_type="inline::reference",
|
|
16
|
+
pip_packages=[],
|
|
17
|
+
module="llama_stack.providers.inline.batches.reference",
|
|
18
|
+
config_class="llama_stack.providers.inline.batches.reference.config.ReferenceBatchesImplConfig",
|
|
19
|
+
api_dependencies=[
|
|
20
|
+
Api.inference,
|
|
21
|
+
Api.files,
|
|
22
|
+
Api.models,
|
|
23
|
+
],
|
|
24
|
+
description="Reference implementation of batches API with KVStore persistence.",
|
|
25
|
+
),
|
|
26
|
+
]
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
from llama_stack.providers.datatypes import (
|
|
9
|
+
Api,
|
|
10
|
+
InlineProviderSpec,
|
|
11
|
+
ProviderSpec,
|
|
12
|
+
RemoteProviderSpec,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def available_providers() -> list[ProviderSpec]:
|
|
17
|
+
return [
|
|
18
|
+
InlineProviderSpec(
|
|
19
|
+
api=Api.datasetio,
|
|
20
|
+
provider_type="inline::localfs",
|
|
21
|
+
pip_packages=["pandas"],
|
|
22
|
+
module="llama_stack.providers.inline.datasetio.localfs",
|
|
23
|
+
config_class="llama_stack.providers.inline.datasetio.localfs.LocalFSDatasetIOConfig",
|
|
24
|
+
api_dependencies=[],
|
|
25
|
+
description="Local filesystem-based dataset I/O provider for reading and writing datasets to local storage.",
|
|
26
|
+
),
|
|
27
|
+
RemoteProviderSpec(
|
|
28
|
+
api=Api.datasetio,
|
|
29
|
+
adapter_type="huggingface",
|
|
30
|
+
provider_type="remote::huggingface",
|
|
31
|
+
pip_packages=[
|
|
32
|
+
"datasets>=4.0.0",
|
|
33
|
+
],
|
|
34
|
+
module="llama_stack.providers.remote.datasetio.huggingface",
|
|
35
|
+
config_class="llama_stack.providers.remote.datasetio.huggingface.HuggingfaceDatasetIOConfig",
|
|
36
|
+
description="HuggingFace datasets provider for accessing and managing datasets from the HuggingFace Hub.",
|
|
37
|
+
),
|
|
38
|
+
RemoteProviderSpec(
|
|
39
|
+
api=Api.datasetio,
|
|
40
|
+
adapter_type="nvidia",
|
|
41
|
+
provider_type="remote::nvidia",
|
|
42
|
+
module="llama_stack.providers.remote.datasetio.nvidia",
|
|
43
|
+
config_class="llama_stack.providers.remote.datasetio.nvidia.NvidiaDatasetIOConfig",
|
|
44
|
+
pip_packages=[
|
|
45
|
+
"datasets>=4.0.0",
|
|
46
|
+
],
|
|
47
|
+
description="NVIDIA's dataset I/O provider for accessing datasets from NVIDIA's data platform.",
|
|
48
|
+
),
|
|
49
|
+
]
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def available_providers() -> list[ProviderSpec]:
|
|
12
|
+
return [
|
|
13
|
+
InlineProviderSpec(
|
|
14
|
+
api=Api.eval,
|
|
15
|
+
provider_type="inline::meta-reference",
|
|
16
|
+
pip_packages=["tree_sitter", "pythainlp", "langdetect", "emoji", "nltk"],
|
|
17
|
+
module="llama_stack.providers.inline.eval.meta_reference",
|
|
18
|
+
config_class="llama_stack.providers.inline.eval.meta_reference.MetaReferenceEvalConfig",
|
|
19
|
+
api_dependencies=[
|
|
20
|
+
Api.datasetio,
|
|
21
|
+
Api.datasets,
|
|
22
|
+
Api.scoring,
|
|
23
|
+
Api.inference,
|
|
24
|
+
Api.agents,
|
|
25
|
+
],
|
|
26
|
+
description="Meta's reference implementation of evaluation tasks with support for multiple languages and evaluation metrics.",
|
|
27
|
+
),
|
|
28
|
+
RemoteProviderSpec(
|
|
29
|
+
api=Api.eval,
|
|
30
|
+
adapter_type="nvidia",
|
|
31
|
+
pip_packages=[
|
|
32
|
+
"requests",
|
|
33
|
+
],
|
|
34
|
+
provider_type="remote::nvidia",
|
|
35
|
+
module="llama_stack.providers.remote.eval.nvidia",
|
|
36
|
+
config_class="llama_stack.providers.remote.eval.nvidia.NVIDIAEvalConfig",
|
|
37
|
+
description="NVIDIA's evaluation provider for running evaluation tasks on NVIDIA's platform.",
|
|
38
|
+
api_dependencies=[
|
|
39
|
+
Api.datasetio,
|
|
40
|
+
Api.datasets,
|
|
41
|
+
Api.scoring,
|
|
42
|
+
Api.inference,
|
|
43
|
+
Api.agents,
|
|
44
|
+
],
|
|
45
|
+
),
|
|
46
|
+
]
|