llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +5 -0
- llama_stack/apis/agents/__init__.py +1 -1
- llama_stack/apis/agents/agents.py +700 -281
- llama_stack/apis/agents/openai_responses.py +1311 -0
- llama_stack/{providers/adapters/memory/sample/config.py → apis/batches/__init__.py} +2 -5
- llama_stack/apis/batches/batches.py +100 -0
- llama_stack/apis/benchmarks/__init__.py +7 -0
- llama_stack/apis/benchmarks/benchmarks.py +108 -0
- llama_stack/apis/common/content_types.py +143 -0
- llama_stack/apis/common/errors.py +103 -0
- llama_stack/apis/common/job_types.py +38 -0
- llama_stack/apis/common/responses.py +36 -0
- llama_stack/apis/common/training_types.py +36 -5
- llama_stack/apis/common/type_system.py +158 -0
- llama_stack/apis/conversations/__init__.py +31 -0
- llama_stack/apis/conversations/conversations.py +286 -0
- llama_stack/apis/datasetio/__init__.py +7 -0
- llama_stack/apis/datasetio/datasetio.py +59 -0
- llama_stack/apis/datasets/__init__.py +7 -0
- llama_stack/apis/datasets/datasets.py +251 -0
- llama_stack/apis/datatypes.py +160 -0
- llama_stack/apis/eval/__init__.py +7 -0
- llama_stack/apis/eval/eval.py +169 -0
- llama_stack/apis/files/__init__.py +7 -0
- llama_stack/apis/files/files.py +199 -0
- llama_stack/apis/inference/__init__.py +1 -1
- llama_stack/apis/inference/inference.py +1169 -113
- llama_stack/apis/inspect/__init__.py +1 -1
- llama_stack/apis/inspect/inspect.py +69 -16
- llama_stack/apis/models/__init__.py +1 -1
- llama_stack/apis/models/models.py +148 -21
- llama_stack/apis/post_training/__init__.py +1 -1
- llama_stack/apis/post_training/post_training.py +265 -120
- llama_stack/{providers/adapters/agents/sample/config.py → apis/prompts/__init__.py} +2 -5
- llama_stack/apis/prompts/prompts.py +204 -0
- llama_stack/apis/providers/__init__.py +7 -0
- llama_stack/apis/providers/providers.py +69 -0
- llama_stack/apis/resource.py +37 -0
- llama_stack/apis/safety/__init__.py +1 -1
- llama_stack/apis/safety/safety.py +95 -12
- llama_stack/apis/scoring/__init__.py +7 -0
- llama_stack/apis/scoring/scoring.py +93 -0
- llama_stack/apis/scoring_functions/__init__.py +7 -0
- llama_stack/apis/scoring_functions/scoring_functions.py +208 -0
- llama_stack/apis/shields/__init__.py +1 -1
- llama_stack/apis/shields/shields.py +76 -33
- llama_stack/apis/synthetic_data_generation/__init__.py +1 -1
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +40 -17
- llama_stack/apis/telemetry/__init__.py +1 -1
- llama_stack/apis/telemetry/telemetry.py +322 -31
- llama_stack/apis/{dataset → tools}/__init__.py +2 -1
- llama_stack/apis/tools/rag_tool.py +218 -0
- llama_stack/apis/tools/tools.py +221 -0
- llama_stack/apis/vector_io/__init__.py +7 -0
- llama_stack/apis/vector_io/vector_io.py +960 -0
- llama_stack/apis/vector_stores/__init__.py +7 -0
- llama_stack/apis/vector_stores/vector_stores.py +51 -0
- llama_stack/apis/version.py +9 -0
- llama_stack/cli/llama.py +13 -5
- llama_stack/cli/stack/_list_deps.py +182 -0
- llama_stack/cli/stack/list_apis.py +1 -1
- llama_stack/cli/stack/list_deps.py +55 -0
- llama_stack/cli/stack/list_providers.py +24 -10
- llama_stack/cli/stack/list_stacks.py +56 -0
- llama_stack/cli/stack/remove.py +115 -0
- llama_stack/cli/stack/run.py +169 -56
- llama_stack/cli/stack/stack.py +18 -4
- llama_stack/cli/stack/utils.py +151 -0
- llama_stack/cli/table.py +23 -61
- llama_stack/cli/utils.py +29 -0
- llama_stack/core/access_control/access_control.py +131 -0
- llama_stack/core/access_control/conditions.py +129 -0
- llama_stack/core/access_control/datatypes.py +107 -0
- llama_stack/core/build.py +164 -0
- llama_stack/core/client.py +205 -0
- llama_stack/core/common.sh +37 -0
- llama_stack/{distribution → core}/configure.py +74 -55
- llama_stack/core/conversations/conversations.py +309 -0
- llama_stack/core/datatypes.py +625 -0
- llama_stack/core/distribution.py +276 -0
- llama_stack/core/external.py +54 -0
- llama_stack/core/id_generation.py +42 -0
- llama_stack/core/inspect.py +86 -0
- llama_stack/core/library_client.py +539 -0
- llama_stack/core/prompts/prompts.py +234 -0
- llama_stack/core/providers.py +137 -0
- llama_stack/core/request_headers.py +115 -0
- llama_stack/core/resolver.py +506 -0
- llama_stack/core/routers/__init__.py +101 -0
- llama_stack/core/routers/datasets.py +73 -0
- llama_stack/core/routers/eval_scoring.py +155 -0
- llama_stack/core/routers/inference.py +645 -0
- llama_stack/core/routers/safety.py +85 -0
- llama_stack/core/routers/tool_runtime.py +91 -0
- llama_stack/core/routers/vector_io.py +442 -0
- llama_stack/core/routing_tables/benchmarks.py +62 -0
- llama_stack/core/routing_tables/common.py +254 -0
- llama_stack/core/routing_tables/datasets.py +91 -0
- llama_stack/core/routing_tables/models.py +163 -0
- llama_stack/core/routing_tables/scoring_functions.py +66 -0
- llama_stack/core/routing_tables/shields.py +61 -0
- llama_stack/core/routing_tables/toolgroups.py +129 -0
- llama_stack/core/routing_tables/vector_stores.py +292 -0
- llama_stack/core/server/auth.py +187 -0
- llama_stack/core/server/auth_providers.py +494 -0
- llama_stack/core/server/quota.py +110 -0
- llama_stack/core/server/routes.py +141 -0
- llama_stack/core/server/server.py +542 -0
- llama_stack/core/server/tracing.py +80 -0
- llama_stack/core/stack.py +546 -0
- llama_stack/core/start_stack.sh +117 -0
- llama_stack/core/storage/datatypes.py +283 -0
- llama_stack/{cli/model → core/store}/__init__.py +1 -1
- llama_stack/core/store/registry.py +199 -0
- llama_stack/core/testing_context.py +49 -0
- llama_stack/core/ui/app.py +55 -0
- llama_stack/core/ui/modules/api.py +32 -0
- llama_stack/core/ui/modules/utils.py +42 -0
- llama_stack/core/ui/page/distribution/datasets.py +18 -0
- llama_stack/core/ui/page/distribution/eval_tasks.py +20 -0
- llama_stack/core/ui/page/distribution/models.py +18 -0
- llama_stack/core/ui/page/distribution/providers.py +27 -0
- llama_stack/core/ui/page/distribution/resources.py +48 -0
- llama_stack/core/ui/page/distribution/scoring_functions.py +18 -0
- llama_stack/core/ui/page/distribution/shields.py +19 -0
- llama_stack/core/ui/page/evaluations/app_eval.py +143 -0
- llama_stack/core/ui/page/evaluations/native_eval.py +253 -0
- llama_stack/core/ui/page/playground/chat.py +130 -0
- llama_stack/core/ui/page/playground/tools.py +352 -0
- llama_stack/core/utils/config.py +30 -0
- llama_stack/{distribution → core}/utils/config_dirs.py +3 -6
- llama_stack/core/utils/config_resolution.py +125 -0
- llama_stack/core/utils/context.py +84 -0
- llama_stack/core/utils/exec.py +96 -0
- llama_stack/{providers/impls/meta_reference/codeshield/config.py → core/utils/image_types.py} +4 -3
- llama_stack/{distribution → core}/utils/model_utils.py +2 -2
- llama_stack/{distribution → core}/utils/prompt_for_config.py +30 -63
- llama_stack/{apis/batch_inference → distributions/dell}/__init__.py +1 -1
- llama_stack/distributions/dell/build.yaml +33 -0
- llama_stack/distributions/dell/dell.py +158 -0
- llama_stack/distributions/dell/run-with-safety.yaml +141 -0
- llama_stack/distributions/dell/run.yaml +132 -0
- llama_stack/distributions/meta-reference-gpu/__init__.py +7 -0
- llama_stack/distributions/meta-reference-gpu/build.yaml +32 -0
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +163 -0
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +154 -0
- llama_stack/distributions/meta-reference-gpu/run.yaml +139 -0
- llama_stack/{apis/evals → distributions/nvidia}/__init__.py +1 -1
- llama_stack/distributions/nvidia/build.yaml +29 -0
- llama_stack/distributions/nvidia/nvidia.py +154 -0
- llama_stack/distributions/nvidia/run-with-safety.yaml +137 -0
- llama_stack/distributions/nvidia/run.yaml +116 -0
- llama_stack/distributions/open-benchmark/__init__.py +7 -0
- llama_stack/distributions/open-benchmark/build.yaml +36 -0
- llama_stack/distributions/open-benchmark/open_benchmark.py +303 -0
- llama_stack/distributions/open-benchmark/run.yaml +252 -0
- llama_stack/distributions/postgres-demo/__init__.py +7 -0
- llama_stack/distributions/postgres-demo/build.yaml +23 -0
- llama_stack/distributions/postgres-demo/postgres_demo.py +125 -0
- llama_stack/distributions/postgres-demo/run.yaml +115 -0
- llama_stack/{apis/memory → distributions/starter}/__init__.py +1 -1
- llama_stack/distributions/starter/build.yaml +61 -0
- llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
- llama_stack/distributions/starter/run.yaml +276 -0
- llama_stack/distributions/starter/starter.py +345 -0
- llama_stack/distributions/starter-gpu/__init__.py +7 -0
- llama_stack/distributions/starter-gpu/build.yaml +61 -0
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
- llama_stack/distributions/starter-gpu/run.yaml +279 -0
- llama_stack/distributions/starter-gpu/starter_gpu.py +20 -0
- llama_stack/distributions/template.py +456 -0
- llama_stack/distributions/watsonx/__init__.py +7 -0
- llama_stack/distributions/watsonx/build.yaml +33 -0
- llama_stack/distributions/watsonx/run.yaml +133 -0
- llama_stack/distributions/watsonx/watsonx.py +95 -0
- llama_stack/env.py +24 -0
- llama_stack/log.py +314 -0
- llama_stack/models/llama/checkpoint.py +164 -0
- llama_stack/models/llama/datatypes.py +164 -0
- llama_stack/models/llama/hadamard_utils.py +86 -0
- llama_stack/models/llama/llama3/args.py +74 -0
- llama_stack/models/llama/llama3/chat_format.py +286 -0
- llama_stack/models/llama/llama3/generation.py +376 -0
- llama_stack/models/llama/llama3/interface.py +255 -0
- llama_stack/models/llama/llama3/model.py +304 -0
- llama_stack/models/llama/llama3/multimodal/__init__.py +12 -0
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +180 -0
- llama_stack/models/llama/llama3/multimodal/image_transform.py +409 -0
- llama_stack/models/llama/llama3/multimodal/model.py +1430 -0
- llama_stack/models/llama/llama3/multimodal/utils.py +26 -0
- llama_stack/models/llama/llama3/prompt_templates/__init__.py +22 -0
- llama_stack/models/llama/llama3/prompt_templates/base.py +39 -0
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +319 -0
- llama_stack/models/llama/llama3/prompt_templates/tool_response.py +62 -0
- llama_stack/models/llama/llama3/quantization/loader.py +316 -0
- llama_stack/models/llama/llama3/template_data.py +116 -0
- llama_stack/models/llama/llama3/tokenizer.model +128000 -0
- llama_stack/models/llama/llama3/tokenizer.py +198 -0
- llama_stack/models/llama/llama3/tool_utils.py +266 -0
- llama_stack/models/llama/llama3_1/__init__.py +12 -0
- llama_stack/models/llama/llama3_1/prompt_format.md +358 -0
- llama_stack/models/llama/llama3_1/prompts.py +258 -0
- llama_stack/models/llama/llama3_2/prompts_text.py +229 -0
- llama_stack/models/llama/llama3_2/prompts_vision.py +126 -0
- llama_stack/models/llama/llama3_2/text_prompt_format.md +286 -0
- llama_stack/models/llama/llama3_2/vision_prompt_format.md +141 -0
- llama_stack/models/llama/llama3_3/prompts.py +259 -0
- llama_stack/models/llama/llama4/args.py +107 -0
- llama_stack/models/llama/llama4/chat_format.py +317 -0
- llama_stack/models/llama/llama4/datatypes.py +56 -0
- llama_stack/models/llama/llama4/ffn.py +58 -0
- llama_stack/models/llama/llama4/generation.py +313 -0
- llama_stack/models/llama/llama4/model.py +437 -0
- llama_stack/models/llama/llama4/moe.py +214 -0
- llama_stack/models/llama/llama4/preprocess.py +435 -0
- llama_stack/models/llama/llama4/prompt_format.md +304 -0
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +136 -0
- llama_stack/models/llama/llama4/prompts.py +279 -0
- llama_stack/models/llama/llama4/quantization/__init__.py +5 -0
- llama_stack/models/llama/llama4/quantization/loader.py +226 -0
- llama_stack/models/llama/llama4/tokenizer.model +200000 -0
- llama_stack/models/llama/llama4/tokenizer.py +263 -0
- llama_stack/models/llama/llama4/vision/__init__.py +5 -0
- llama_stack/models/llama/llama4/vision/embedding.py +210 -0
- llama_stack/models/llama/llama4/vision/encoder.py +412 -0
- llama_stack/models/llama/prompt_format.py +191 -0
- llama_stack/models/llama/quantize_impls.py +316 -0
- llama_stack/models/llama/sku_list.py +1029 -0
- llama_stack/models/llama/sku_types.py +233 -0
- llama_stack/models/llama/tokenizer_utils.py +40 -0
- llama_stack/providers/datatypes.py +136 -107
- llama_stack/providers/inline/__init__.py +5 -0
- llama_stack/providers/inline/agents/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/agents/meta_reference}/__init__.py +12 -5
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +1024 -0
- llama_stack/providers/inline/agents/meta_reference/agents.py +383 -0
- llama_stack/providers/inline/agents/meta_reference/config.py +37 -0
- llama_stack/providers/inline/agents/meta_reference/persistence.py +228 -0
- llama_stack/providers/inline/agents/meta_reference/responses/__init__.py +5 -0
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +423 -0
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +1226 -0
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +449 -0
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +194 -0
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +365 -0
- llama_stack/providers/inline/agents/meta_reference/safety.py +52 -0
- llama_stack/providers/inline/batches/__init__.py +5 -0
- llama_stack/providers/inline/batches/reference/__init__.py +36 -0
- llama_stack/providers/inline/batches/reference/batches.py +679 -0
- llama_stack/providers/inline/batches/reference/config.py +40 -0
- llama_stack/providers/inline/datasetio/__init__.py +5 -0
- llama_stack/providers/inline/datasetio/localfs/__init__.py +20 -0
- llama_stack/providers/inline/datasetio/localfs/config.py +23 -0
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +113 -0
- llama_stack/providers/inline/eval/__init__.py +5 -0
- llama_stack/providers/inline/eval/meta_reference/__init__.py +28 -0
- llama_stack/providers/inline/eval/meta_reference/config.py +23 -0
- llama_stack/providers/inline/eval/meta_reference/eval.py +259 -0
- llama_stack/providers/inline/files/localfs/__init__.py +20 -0
- llama_stack/providers/inline/files/localfs/config.py +31 -0
- llama_stack/providers/inline/files/localfs/files.py +219 -0
- llama_stack/providers/inline/inference/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/__init__.py +4 -4
- llama_stack/providers/inline/inference/meta_reference/common.py +24 -0
- llama_stack/providers/inline/inference/meta_reference/config.py +68 -0
- llama_stack/providers/inline/inference/meta_reference/generators.py +211 -0
- llama_stack/providers/inline/inference/meta_reference/inference.py +158 -0
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +96 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/parallel_utils.py +56 -73
- llama_stack/providers/inline/inference/sentence_transformers/__init__.py +22 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/inference/sentence_transformers}/config.py +6 -4
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +83 -0
- llama_stack/providers/inline/post_training/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/utils.py +35 -0
- llama_stack/providers/inline/post_training/common/validator.py +36 -0
- llama_stack/providers/inline/post_training/huggingface/__init__.py +27 -0
- llama_stack/providers/inline/post_training/huggingface/config.py +83 -0
- llama_stack/providers/inline/post_training/huggingface/post_training.py +208 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +519 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +485 -0
- llama_stack/providers/inline/post_training/huggingface/utils.py +269 -0
- llama_stack/providers/inline/post_training/torchtune/__init__.py +27 -0
- llama_stack/providers/inline/post_training/torchtune/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +240 -0
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +99 -0
- llama_stack/providers/inline/post_training/torchtune/config.py +20 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +57 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/sft.py +78 -0
- llama_stack/providers/inline/post_training/torchtune/post_training.py +178 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +588 -0
- llama_stack/providers/inline/safety/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/codeshield → inline/safety/code_scanner}/__init__.py +4 -2
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +128 -0
- llama_stack/providers/{impls/meta_reference/memory → inline/safety/code_scanner}/config.py +5 -3
- llama_stack/providers/inline/safety/llama_guard/__init__.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/config.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +489 -0
- llama_stack/providers/{adapters/memory/sample → inline/safety/prompt_guard}/__init__.py +4 -4
- llama_stack/providers/inline/safety/prompt_guard/config.py +32 -0
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +131 -0
- llama_stack/providers/inline/scoring/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/__init__.py +25 -0
- llama_stack/providers/{adapters/memory/weaviate → inline/scoring/basic}/config.py +5 -7
- llama_stack/providers/inline/scoring/basic/scoring.py +126 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +240 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +41 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +23 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +27 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +71 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +80 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +66 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +58 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +38 -0
- llama_stack/providers/inline/scoring/basic/utils/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py +3319 -0
- llama_stack/providers/inline/scoring/basic/utils/math_utils.py +330 -0
- llama_stack/providers/inline/scoring/braintrust/__init__.py +27 -0
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +230 -0
- llama_stack/providers/inline/scoring/braintrust/config.py +21 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +23 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +24 -0
- llama_stack/providers/inline/scoring/llm_as_judge/__init__.py +21 -0
- llama_stack/providers/inline/scoring/llm_as_judge/config.py +14 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +113 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +96 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +20 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +81 -0
- llama_stack/providers/inline/telemetry/__init__.py +5 -0
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +21 -0
- llama_stack/providers/inline/telemetry/meta_reference/config.py +47 -0
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +252 -0
- llama_stack/providers/inline/tool_runtime/__init__.py +5 -0
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +19 -0
- llama_stack/providers/{impls/meta_reference/telemetry → inline/tool_runtime/rag}/config.py +5 -3
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +77 -0
- llama_stack/providers/inline/tool_runtime/rag/memory.py +332 -0
- llama_stack/providers/inline/vector_io/__init__.py +5 -0
- llama_stack/providers/inline/vector_io/chroma/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/chroma/config.py +30 -0
- llama_stack/providers/inline/vector_io/faiss/__init__.py +21 -0
- llama_stack/providers/inline/vector_io/faiss/config.py +26 -0
- llama_stack/providers/inline/vector_io/faiss/faiss.py +293 -0
- llama_stack/providers/inline/vector_io/milvus/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/milvus/config.py +29 -0
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/qdrant/config.py +29 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/config.py +26 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +483 -0
- llama_stack/providers/registry/agents.py +16 -18
- llama_stack/providers/registry/batches.py +26 -0
- llama_stack/providers/registry/datasetio.py +49 -0
- llama_stack/providers/registry/eval.py +46 -0
- llama_stack/providers/registry/files.py +31 -0
- llama_stack/providers/registry/inference.py +273 -118
- llama_stack/providers/registry/post_training.py +69 -0
- llama_stack/providers/registry/safety.py +46 -41
- llama_stack/providers/registry/scoring.py +51 -0
- llama_stack/providers/registry/tool_runtime.py +87 -0
- llama_stack/providers/registry/vector_io.py +828 -0
- llama_stack/providers/remote/__init__.py +5 -0
- llama_stack/providers/remote/agents/__init__.py +5 -0
- llama_stack/providers/remote/datasetio/__init__.py +5 -0
- llama_stack/providers/{adapters/memory/chroma → remote/datasetio/huggingface}/__init__.py +7 -4
- llama_stack/providers/remote/datasetio/huggingface/config.py +23 -0
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +99 -0
- llama_stack/providers/remote/datasetio/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/datasetio/nvidia/config.py +61 -0
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +116 -0
- llama_stack/providers/remote/eval/__init__.py +5 -0
- llama_stack/providers/remote/eval/nvidia/__init__.py +31 -0
- llama_stack/providers/remote/eval/nvidia/config.py +29 -0
- llama_stack/providers/remote/eval/nvidia/eval.py +162 -0
- llama_stack/providers/remote/files/s3/__init__.py +19 -0
- llama_stack/providers/remote/files/s3/config.py +42 -0
- llama_stack/providers/remote/files/s3/files.py +313 -0
- llama_stack/providers/remote/inference/__init__.py +5 -0
- llama_stack/providers/{adapters/safety/sample → remote/inference/anthropic}/__init__.py +4 -6
- llama_stack/providers/remote/inference/anthropic/anthropic.py +36 -0
- llama_stack/providers/remote/inference/anthropic/config.py +28 -0
- llama_stack/providers/{impls/meta_reference/telemetry → remote/inference/azure}/__init__.py +4 -4
- llama_stack/providers/remote/inference/azure/azure.py +25 -0
- llama_stack/providers/remote/inference/azure/config.py +61 -0
- llama_stack/providers/{adapters → remote}/inference/bedrock/__init__.py +18 -17
- llama_stack/providers/remote/inference/bedrock/bedrock.py +142 -0
- llama_stack/providers/{adapters/inference/sample → remote/inference/bedrock}/config.py +3 -4
- llama_stack/providers/remote/inference/bedrock/models.py +29 -0
- llama_stack/providers/remote/inference/cerebras/__init__.py +19 -0
- llama_stack/providers/remote/inference/cerebras/cerebras.py +28 -0
- llama_stack/providers/remote/inference/cerebras/config.py +30 -0
- llama_stack/providers/{adapters → remote}/inference/databricks/__init__.py +4 -5
- llama_stack/providers/remote/inference/databricks/config.py +37 -0
- llama_stack/providers/remote/inference/databricks/databricks.py +44 -0
- llama_stack/providers/{adapters → remote}/inference/fireworks/__init__.py +8 -4
- llama_stack/providers/remote/inference/fireworks/config.py +27 -0
- llama_stack/providers/remote/inference/fireworks/fireworks.py +27 -0
- llama_stack/providers/{adapters/memory/pgvector → remote/inference/gemini}/__init__.py +4 -4
- llama_stack/providers/remote/inference/gemini/config.py +28 -0
- llama_stack/providers/remote/inference/gemini/gemini.py +82 -0
- llama_stack/providers/remote/inference/groq/__init__.py +15 -0
- llama_stack/providers/remote/inference/groq/config.py +34 -0
- llama_stack/providers/remote/inference/groq/groq.py +18 -0
- llama_stack/providers/remote/inference/llama_openai_compat/__init__.py +15 -0
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +34 -0
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +46 -0
- llama_stack/providers/remote/inference/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/inference/nvidia/config.py +64 -0
- llama_stack/providers/remote/inference/nvidia/nvidia.py +61 -0
- llama_stack/providers/{adapters/safety/sample/config.py → remote/inference/nvidia/utils.py} +3 -4
- llama_stack/providers/{impls/vllm → remote/inference/ollama}/__init__.py +4 -6
- llama_stack/providers/remote/inference/ollama/config.py +25 -0
- llama_stack/providers/remote/inference/ollama/ollama.py +102 -0
- llama_stack/providers/{adapters/telemetry/opentelemetry → remote/inference/openai}/__init__.py +4 -4
- llama_stack/providers/remote/inference/openai/config.py +39 -0
- llama_stack/providers/remote/inference/openai/openai.py +38 -0
- llama_stack/providers/remote/inference/passthrough/__init__.py +23 -0
- llama_stack/providers/remote/inference/passthrough/config.py +34 -0
- llama_stack/providers/remote/inference/passthrough/passthrough.py +122 -0
- llama_stack/providers/remote/inference/runpod/__init__.py +16 -0
- llama_stack/providers/remote/inference/runpod/config.py +32 -0
- llama_stack/providers/remote/inference/runpod/runpod.py +42 -0
- llama_stack/providers/remote/inference/sambanova/__init__.py +16 -0
- llama_stack/providers/remote/inference/sambanova/config.py +34 -0
- llama_stack/providers/remote/inference/sambanova/sambanova.py +28 -0
- llama_stack/providers/{adapters → remote}/inference/tgi/__init__.py +3 -4
- llama_stack/providers/remote/inference/tgi/config.py +76 -0
- llama_stack/providers/remote/inference/tgi/tgi.py +85 -0
- llama_stack/providers/{adapters → remote}/inference/together/__init__.py +8 -4
- llama_stack/providers/remote/inference/together/config.py +27 -0
- llama_stack/providers/remote/inference/together/together.py +102 -0
- llama_stack/providers/remote/inference/vertexai/__init__.py +15 -0
- llama_stack/providers/remote/inference/vertexai/config.py +48 -0
- llama_stack/providers/remote/inference/vertexai/vertexai.py +54 -0
- llama_stack/providers/remote/inference/vllm/__init__.py +22 -0
- llama_stack/providers/remote/inference/vllm/config.py +59 -0
- llama_stack/providers/remote/inference/vllm/vllm.py +111 -0
- llama_stack/providers/remote/inference/watsonx/__init__.py +15 -0
- llama_stack/providers/remote/inference/watsonx/config.py +45 -0
- llama_stack/providers/remote/inference/watsonx/watsonx.py +336 -0
- llama_stack/providers/remote/post_training/__init__.py +5 -0
- llama_stack/providers/remote/post_training/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/post_training/nvidia/config.py +113 -0
- llama_stack/providers/remote/post_training/nvidia/models.py +27 -0
- llama_stack/providers/remote/post_training/nvidia/post_training.py +430 -0
- llama_stack/providers/remote/post_training/nvidia/utils.py +63 -0
- llama_stack/providers/remote/safety/__init__.py +5 -0
- llama_stack/providers/remote/safety/bedrock/bedrock.py +111 -0
- llama_stack/providers/remote/safety/bedrock/config.py +14 -0
- llama_stack/providers/{adapters/inference/sample → remote/safety/nvidia}/__init__.py +5 -4
- llama_stack/providers/remote/safety/nvidia/config.py +40 -0
- llama_stack/providers/remote/safety/nvidia/nvidia.py +161 -0
- llama_stack/providers/{adapters/agents/sample → remote/safety/sambanova}/__init__.py +5 -4
- llama_stack/providers/remote/safety/sambanova/config.py +37 -0
- llama_stack/providers/remote/safety/sambanova/sambanova.py +98 -0
- llama_stack/providers/remote/tool_runtime/__init__.py +5 -0
- llama_stack/providers/remote/tool_runtime/bing_search/__init__.py +21 -0
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +112 -0
- llama_stack/providers/remote/tool_runtime/bing_search/config.py +22 -0
- llama_stack/providers/remote/tool_runtime/brave_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +148 -0
- llama_stack/providers/remote/tool_runtime/brave_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py +15 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +20 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +73 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +84 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py +22 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py +21 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +140 -0
- llama_stack/providers/remote/vector_io/__init__.py +5 -0
- llama_stack/providers/remote/vector_io/chroma/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/chroma/chroma.py +215 -0
- llama_stack/providers/remote/vector_io/chroma/config.py +28 -0
- llama_stack/providers/remote/vector_io/milvus/__init__.py +18 -0
- llama_stack/providers/remote/vector_io/milvus/config.py +35 -0
- llama_stack/providers/remote/vector_io/milvus/milvus.py +375 -0
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/pgvector/config.py +47 -0
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +460 -0
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/qdrant/config.py +37 -0
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +265 -0
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/weaviate/config.py +32 -0
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +393 -0
- llama_stack/providers/utils/bedrock/__init__.py +5 -0
- llama_stack/providers/utils/bedrock/client.py +74 -0
- llama_stack/providers/utils/bedrock/config.py +64 -0
- llama_stack/providers/utils/bedrock/refreshable_boto_session.py +112 -0
- llama_stack/providers/utils/common/__init__.py +5 -0
- llama_stack/providers/utils/common/data_schema_validator.py +103 -0
- llama_stack/providers/utils/datasetio/__init__.py +5 -0
- llama_stack/providers/utils/datasetio/url_utils.py +47 -0
- llama_stack/providers/utils/files/__init__.py +5 -0
- llama_stack/providers/utils/files/form_data.py +69 -0
- llama_stack/providers/utils/inference/__init__.py +8 -7
- llama_stack/providers/utils/inference/embedding_mixin.py +101 -0
- llama_stack/providers/utils/inference/inference_store.py +264 -0
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +336 -0
- llama_stack/providers/utils/inference/model_registry.py +173 -23
- llama_stack/providers/utils/inference/openai_compat.py +1261 -49
- llama_stack/providers/utils/inference/openai_mixin.py +506 -0
- llama_stack/providers/utils/inference/prompt_adapter.py +365 -67
- llama_stack/providers/utils/kvstore/api.py +6 -6
- llama_stack/providers/utils/kvstore/config.py +28 -48
- llama_stack/providers/utils/kvstore/kvstore.py +61 -15
- llama_stack/providers/utils/kvstore/mongodb/__init__.py +9 -0
- llama_stack/providers/utils/kvstore/mongodb/mongodb.py +82 -0
- llama_stack/providers/utils/kvstore/postgres/__init__.py +7 -0
- llama_stack/providers/utils/kvstore/postgres/postgres.py +114 -0
- llama_stack/providers/utils/kvstore/redis/redis.py +33 -9
- llama_stack/providers/utils/kvstore/sqlite/config.py +2 -1
- llama_stack/providers/utils/kvstore/sqlite/sqlite.py +123 -22
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +1304 -0
- llama_stack/providers/utils/memory/vector_store.py +220 -82
- llama_stack/providers/utils/pagination.py +43 -0
- llama_stack/providers/utils/responses/__init__.py +5 -0
- llama_stack/providers/utils/responses/responses_store.py +292 -0
- llama_stack/providers/utils/scheduler.py +270 -0
- llama_stack/providers/utils/scoring/__init__.py +5 -0
- llama_stack/providers/utils/scoring/aggregation_utils.py +75 -0
- llama_stack/providers/utils/scoring/base_scoring_fn.py +114 -0
- llama_stack/providers/utils/scoring/basic_scoring_utils.py +26 -0
- llama_stack/providers/utils/sqlstore/__init__.py +5 -0
- llama_stack/providers/utils/sqlstore/api.py +128 -0
- llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +319 -0
- llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +343 -0
- llama_stack/providers/utils/sqlstore/sqlstore.py +70 -0
- llama_stack/providers/utils/telemetry/trace_protocol.py +142 -0
- llama_stack/providers/utils/telemetry/tracing.py +192 -53
- llama_stack/providers/utils/tools/__init__.py +5 -0
- llama_stack/providers/utils/tools/mcp.py +148 -0
- llama_stack/providers/utils/tools/ttl_dict.py +70 -0
- llama_stack/providers/utils/vector_io/__init__.py +5 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +156 -0
- llama_stack/schema_utils.py +118 -0
- llama_stack/strong_typing/__init__.py +19 -0
- llama_stack/strong_typing/auxiliary.py +228 -0
- llama_stack/strong_typing/classdef.py +440 -0
- llama_stack/strong_typing/core.py +46 -0
- llama_stack/strong_typing/deserializer.py +877 -0
- llama_stack/strong_typing/docstring.py +409 -0
- llama_stack/strong_typing/exception.py +23 -0
- llama_stack/strong_typing/inspection.py +1085 -0
- llama_stack/strong_typing/mapping.py +40 -0
- llama_stack/strong_typing/name.py +182 -0
- llama_stack/strong_typing/py.typed +0 -0
- llama_stack/strong_typing/schema.py +792 -0
- llama_stack/strong_typing/serialization.py +97 -0
- llama_stack/strong_typing/serializer.py +500 -0
- llama_stack/strong_typing/slots.py +27 -0
- llama_stack/strong_typing/topological.py +89 -0
- llama_stack/testing/__init__.py +5 -0
- llama_stack/testing/api_recorder.py +956 -0
- llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
- llama_stack-0.3.4.dist-info/METADATA +261 -0
- llama_stack-0.3.4.dist-info/RECORD +625 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/WHEEL +1 -1
- llama_stack/apis/agents/client.py +0 -292
- llama_stack/apis/agents/event_logger.py +0 -184
- llama_stack/apis/batch_inference/batch_inference.py +0 -72
- llama_stack/apis/common/deployment_types.py +0 -31
- llama_stack/apis/dataset/dataset.py +0 -63
- llama_stack/apis/evals/evals.py +0 -122
- llama_stack/apis/inference/client.py +0 -197
- llama_stack/apis/inspect/client.py +0 -82
- llama_stack/apis/memory/client.py +0 -155
- llama_stack/apis/memory/memory.py +0 -65
- llama_stack/apis/memory_banks/__init__.py +0 -7
- llama_stack/apis/memory_banks/client.py +0 -101
- llama_stack/apis/memory_banks/memory_banks.py +0 -78
- llama_stack/apis/models/client.py +0 -83
- llama_stack/apis/reward_scoring/__init__.py +0 -7
- llama_stack/apis/reward_scoring/reward_scoring.py +0 -55
- llama_stack/apis/safety/client.py +0 -105
- llama_stack/apis/shields/client.py +0 -79
- llama_stack/cli/download.py +0 -340
- llama_stack/cli/model/describe.py +0 -82
- llama_stack/cli/model/download.py +0 -24
- llama_stack/cli/model/list.py +0 -62
- llama_stack/cli/model/model.py +0 -34
- llama_stack/cli/model/prompt_format.py +0 -112
- llama_stack/cli/model/safety_models.py +0 -52
- llama_stack/cli/stack/build.py +0 -299
- llama_stack/cli/stack/configure.py +0 -178
- llama_stack/distribution/build.py +0 -123
- llama_stack/distribution/build_conda_env.sh +0 -136
- llama_stack/distribution/build_container.sh +0 -142
- llama_stack/distribution/common.sh +0 -40
- llama_stack/distribution/configure_container.sh +0 -47
- llama_stack/distribution/datatypes.py +0 -139
- llama_stack/distribution/distribution.py +0 -58
- llama_stack/distribution/inspect.py +0 -67
- llama_stack/distribution/request_headers.py +0 -57
- llama_stack/distribution/resolver.py +0 -323
- llama_stack/distribution/routers/__init__.py +0 -48
- llama_stack/distribution/routers/routers.py +0 -158
- llama_stack/distribution/routers/routing_tables.py +0 -173
- llama_stack/distribution/server/endpoints.py +0 -48
- llama_stack/distribution/server/server.py +0 -343
- llama_stack/distribution/start_conda_env.sh +0 -42
- llama_stack/distribution/start_container.sh +0 -64
- llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml +0 -10
- llama_stack/distribution/templates/local-build.yaml +0 -10
- llama_stack/distribution/templates/local-databricks-build.yaml +0 -10
- llama_stack/distribution/templates/local-fireworks-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-endpoint-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-serverless-build.yaml +0 -10
- llama_stack/distribution/templates/local-ollama-build.yaml +0 -10
- llama_stack/distribution/templates/local-tgi-build.yaml +0 -10
- llama_stack/distribution/templates/local-together-build.yaml +0 -10
- llama_stack/distribution/templates/local-vllm-build.yaml +0 -10
- llama_stack/distribution/utils/exec.py +0 -105
- llama_stack/providers/adapters/agents/sample/sample.py +0 -18
- llama_stack/providers/adapters/inference/bedrock/bedrock.py +0 -451
- llama_stack/providers/adapters/inference/bedrock/config.py +0 -55
- llama_stack/providers/adapters/inference/databricks/config.py +0 -21
- llama_stack/providers/adapters/inference/databricks/databricks.py +0 -125
- llama_stack/providers/adapters/inference/fireworks/config.py +0 -20
- llama_stack/providers/adapters/inference/fireworks/fireworks.py +0 -130
- llama_stack/providers/adapters/inference/ollama/__init__.py +0 -19
- llama_stack/providers/adapters/inference/ollama/ollama.py +0 -175
- llama_stack/providers/adapters/inference/sample/sample.py +0 -23
- llama_stack/providers/adapters/inference/tgi/config.py +0 -43
- llama_stack/providers/adapters/inference/tgi/tgi.py +0 -200
- llama_stack/providers/adapters/inference/together/config.py +0 -22
- llama_stack/providers/adapters/inference/together/together.py +0 -143
- llama_stack/providers/adapters/memory/chroma/chroma.py +0 -157
- llama_stack/providers/adapters/memory/pgvector/config.py +0 -17
- llama_stack/providers/adapters/memory/pgvector/pgvector.py +0 -211
- llama_stack/providers/adapters/memory/sample/sample.py +0 -23
- llama_stack/providers/adapters/memory/weaviate/__init__.py +0 -15
- llama_stack/providers/adapters/memory/weaviate/weaviate.py +0 -190
- llama_stack/providers/adapters/safety/bedrock/bedrock.py +0 -113
- llama_stack/providers/adapters/safety/bedrock/config.py +0 -16
- llama_stack/providers/adapters/safety/sample/sample.py +0 -23
- llama_stack/providers/adapters/safety/together/__init__.py +0 -18
- llama_stack/providers/adapters/safety/together/config.py +0 -26
- llama_stack/providers/adapters/safety/together/together.py +0 -101
- llama_stack/providers/adapters/telemetry/opentelemetry/config.py +0 -12
- llama_stack/providers/adapters/telemetry/opentelemetry/opentelemetry.py +0 -201
- llama_stack/providers/adapters/telemetry/sample/__init__.py +0 -17
- llama_stack/providers/adapters/telemetry/sample/config.py +0 -12
- llama_stack/providers/adapters/telemetry/sample/sample.py +0 -18
- llama_stack/providers/impls/meta_reference/agents/agent_instance.py +0 -844
- llama_stack/providers/impls/meta_reference/agents/agents.py +0 -161
- llama_stack/providers/impls/meta_reference/agents/persistence.py +0 -84
- llama_stack/providers/impls/meta_reference/agents/rag/context_retriever.py +0 -74
- llama_stack/providers/impls/meta_reference/agents/safety.py +0 -57
- llama_stack/providers/impls/meta_reference/agents/tests/code_execution.py +0 -93
- llama_stack/providers/impls/meta_reference/agents/tests/test_chat_agent.py +0 -305
- llama_stack/providers/impls/meta_reference/agents/tools/base.py +0 -20
- llama_stack/providers/impls/meta_reference/agents/tools/builtin.py +0 -375
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_env_prefix.py +0 -133
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_execution.py +0 -256
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py +0 -87
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/utils.py +0 -21
- llama_stack/providers/impls/meta_reference/agents/tools/safety.py +0 -43
- llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py +0 -58
- llama_stack/providers/impls/meta_reference/inference/config.py +0 -45
- llama_stack/providers/impls/meta_reference/inference/generation.py +0 -376
- llama_stack/providers/impls/meta_reference/inference/inference.py +0 -280
- llama_stack/providers/impls/meta_reference/inference/model_parallel.py +0 -99
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_impls.py +0 -184
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_txest_disabled.py +0 -76
- llama_stack/providers/impls/meta_reference/inference/quantization/loader.py +0 -97
- llama_stack/providers/impls/meta_reference/inference/quantization/scripts/quantize_checkpoint.py +0 -161
- llama_stack/providers/impls/meta_reference/memory/__init__.py +0 -19
- llama_stack/providers/impls/meta_reference/memory/faiss.py +0 -113
- llama_stack/providers/impls/meta_reference/safety/__init__.py +0 -17
- llama_stack/providers/impls/meta_reference/safety/base.py +0 -57
- llama_stack/providers/impls/meta_reference/safety/config.py +0 -48
- llama_stack/providers/impls/meta_reference/safety/llama_guard.py +0 -268
- llama_stack/providers/impls/meta_reference/safety/prompt_guard.py +0 -145
- llama_stack/providers/impls/meta_reference/safety/safety.py +0 -112
- llama_stack/providers/impls/meta_reference/telemetry/console.py +0 -89
- llama_stack/providers/impls/vllm/config.py +0 -35
- llama_stack/providers/impls/vllm/vllm.py +0 -241
- llama_stack/providers/registry/memory.py +0 -78
- llama_stack/providers/registry/telemetry.py +0 -44
- llama_stack/providers/tests/agents/test_agents.py +0 -210
- llama_stack/providers/tests/inference/test_inference.py +0 -257
- llama_stack/providers/tests/inference/test_prompt_adapter.py +0 -126
- llama_stack/providers/tests/memory/test_memory.py +0 -136
- llama_stack/providers/tests/resolver.py +0 -100
- llama_stack/providers/tests/safety/test_safety.py +0 -77
- llama_stack-0.0.42.dist-info/METADATA +0 -137
- llama_stack-0.0.42.dist-info/RECORD +0 -256
- /llama_stack/{distribution → core}/__init__.py +0 -0
- /llama_stack/{distribution/server → core/access_control}/__init__.py +0 -0
- /llama_stack/{distribution/utils → core/conversations}/__init__.py +0 -0
- /llama_stack/{providers/adapters → core/prompts}/__init__.py +0 -0
- /llama_stack/{providers/adapters/agents → core/routing_tables}/__init__.py +0 -0
- /llama_stack/{providers/adapters/inference → core/server}/__init__.py +0 -0
- /llama_stack/{providers/adapters/memory → core/storage}/__init__.py +0 -0
- /llama_stack/{providers/adapters/safety → core/ui}/__init__.py +0 -0
- /llama_stack/{providers/adapters/telemetry → core/ui/modules}/__init__.py +0 -0
- /llama_stack/{providers/impls → core/ui/page}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference → core/ui/page/distribution}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/rag → core/ui/page/evaluations}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tests → core/ui/page/playground}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools → core/utils}/__init__.py +0 -0
- /llama_stack/{distribution → core}/utils/dynamic.py +0 -0
- /llama_stack/{distribution → core}/utils/serialize.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools/ipython_tool → distributions}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization → models}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization/scripts → models/llama}/__init__.py +0 -0
- /llama_stack/{providers/tests → models/llama/llama3}/__init__.py +0 -0
- /llama_stack/{providers/tests/agents → models/llama/llama3/quantization}/__init__.py +0 -0
- /llama_stack/{providers/tests/inference → models/llama/llama3_2}/__init__.py +0 -0
- /llama_stack/{providers/tests/memory → models/llama/llama3_3}/__init__.py +0 -0
- /llama_stack/{providers/tests/safety → models/llama/llama4}/__init__.py +0 -0
- /llama_stack/{scripts → models/llama/llama4/prompt_templates}/__init__.py +0 -0
- /llama_stack/providers/{adapters → remote}/safety/bedrock/__init__.py +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info/licenses}/LICENSE +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from .config import MCPProviderConfig
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
async def get_adapter_impl(config: MCPProviderConfig, _deps):
|
|
11
|
+
from .model_context_protocol import ModelContextProtocolToolRuntimeImpl
|
|
12
|
+
|
|
13
|
+
impl = ModelContextProtocolToolRuntimeImpl(config, _deps)
|
|
14
|
+
await impl.initialize()
|
|
15
|
+
return impl
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class MCPProviderDataValidator(BaseModel):
|
|
13
|
+
# mcp_endpoint => dict of headers to send
|
|
14
|
+
mcp_headers: dict[str, dict[str, str]] | None = None
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class MCPProviderConfig(BaseModel):
|
|
18
|
+
@classmethod
|
|
19
|
+
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
|
|
20
|
+
return {}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
from urllib.parse import urlparse
|
|
9
|
+
|
|
10
|
+
from llama_stack.apis.common.content_types import URL
|
|
11
|
+
from llama_stack.apis.datatypes import Api
|
|
12
|
+
from llama_stack.apis.tools import (
|
|
13
|
+
ListToolDefsResponse,
|
|
14
|
+
ToolGroup,
|
|
15
|
+
ToolInvocationResult,
|
|
16
|
+
ToolRuntime,
|
|
17
|
+
)
|
|
18
|
+
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
19
|
+
from llama_stack.log import get_logger
|
|
20
|
+
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
|
|
21
|
+
from llama_stack.providers.utils.tools.mcp import invoke_mcp_tool, list_mcp_tools
|
|
22
|
+
|
|
23
|
+
from .config import MCPProviderConfig
|
|
24
|
+
|
|
25
|
+
logger = get_logger(__name__, category="tools")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class ModelContextProtocolToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsRequestProviderData):
|
|
29
|
+
def __init__(self, config: MCPProviderConfig, _deps: dict[Api, Any]):
|
|
30
|
+
self.config = config
|
|
31
|
+
|
|
32
|
+
async def initialize(self):
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
async def register_toolgroup(self, toolgroup: ToolGroup) -> None:
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
async def unregister_toolgroup(self, toolgroup_id: str) -> None:
|
|
39
|
+
return
|
|
40
|
+
|
|
41
|
+
async def list_runtime_tools(
|
|
42
|
+
self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
|
|
43
|
+
) -> ListToolDefsResponse:
|
|
44
|
+
# this endpoint should be retrieved by getting the tool group right?
|
|
45
|
+
if mcp_endpoint is None:
|
|
46
|
+
raise ValueError("mcp_endpoint is required")
|
|
47
|
+
headers = await self.get_headers_from_request(mcp_endpoint.uri)
|
|
48
|
+
return await list_mcp_tools(mcp_endpoint.uri, headers)
|
|
49
|
+
|
|
50
|
+
async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
|
|
51
|
+
tool = await self.tool_store.get_tool(tool_name)
|
|
52
|
+
if tool.metadata is None or tool.metadata.get("endpoint") is None:
|
|
53
|
+
raise ValueError(f"Tool {tool_name} does not have metadata")
|
|
54
|
+
endpoint = tool.metadata.get("endpoint")
|
|
55
|
+
if urlparse(endpoint).scheme not in ("http", "https"):
|
|
56
|
+
raise ValueError(f"Endpoint {endpoint} is not a valid HTTP(S) URL")
|
|
57
|
+
|
|
58
|
+
headers = await self.get_headers_from_request(endpoint)
|
|
59
|
+
return await invoke_mcp_tool(endpoint, headers, tool_name, kwargs)
|
|
60
|
+
|
|
61
|
+
async def get_headers_from_request(self, mcp_endpoint_uri: str) -> dict[str, str]:
|
|
62
|
+
def canonicalize_uri(uri: str) -> str:
|
|
63
|
+
return f"{urlparse(uri).netloc or ''}/{urlparse(uri).path or ''}"
|
|
64
|
+
|
|
65
|
+
headers = {}
|
|
66
|
+
|
|
67
|
+
provider_data = self.get_request_provider_data()
|
|
68
|
+
if provider_data and provider_data.mcp_headers:
|
|
69
|
+
for uri, values in provider_data.mcp_headers.items():
|
|
70
|
+
if canonicalize_uri(uri) != canonicalize_uri(mcp_endpoint_uri):
|
|
71
|
+
continue
|
|
72
|
+
headers.update(values)
|
|
73
|
+
return headers
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel
|
|
8
|
+
|
|
9
|
+
from .config import TavilySearchToolConfig
|
|
10
|
+
from .tavily_search import TavilySearchToolRuntimeImpl
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class TavilySearchToolProviderDataValidator(BaseModel):
|
|
14
|
+
tavily_search_api_key: str
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
async def get_adapter_impl(config: TavilySearchToolConfig, _deps):
|
|
18
|
+
impl = TavilySearchToolRuntimeImpl(config)
|
|
19
|
+
await impl.initialize()
|
|
20
|
+
return impl
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel, Field
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class TavilySearchToolConfig(BaseModel):
|
|
13
|
+
api_key: str | None = Field(
|
|
14
|
+
default=None,
|
|
15
|
+
description="The Tavily Search API Key",
|
|
16
|
+
)
|
|
17
|
+
max_results: int = Field(
|
|
18
|
+
default=3,
|
|
19
|
+
description="The maximum number of results to return",
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
@classmethod
|
|
23
|
+
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
|
|
24
|
+
return {
|
|
25
|
+
"api_key": "${env.TAVILY_SEARCH_API_KEY:=}",
|
|
26
|
+
"max_results": 3,
|
|
27
|
+
}
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
import httpx
|
|
11
|
+
|
|
12
|
+
from llama_stack.apis.common.content_types import URL
|
|
13
|
+
from llama_stack.apis.tools import (
|
|
14
|
+
ListToolDefsResponse,
|
|
15
|
+
ToolDef,
|
|
16
|
+
ToolGroup,
|
|
17
|
+
ToolInvocationResult,
|
|
18
|
+
ToolRuntime,
|
|
19
|
+
)
|
|
20
|
+
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
21
|
+
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
|
|
22
|
+
|
|
23
|
+
from .config import TavilySearchToolConfig
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class TavilySearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsRequestProviderData):
|
|
27
|
+
def __init__(self, config: TavilySearchToolConfig):
|
|
28
|
+
self.config = config
|
|
29
|
+
|
|
30
|
+
async def initialize(self):
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
async def register_toolgroup(self, toolgroup: ToolGroup) -> None:
|
|
34
|
+
pass
|
|
35
|
+
|
|
36
|
+
async def unregister_toolgroup(self, toolgroup_id: str) -> None:
|
|
37
|
+
return
|
|
38
|
+
|
|
39
|
+
def _get_api_key(self) -> str:
|
|
40
|
+
if self.config.api_key:
|
|
41
|
+
return self.config.api_key
|
|
42
|
+
|
|
43
|
+
provider_data = self.get_request_provider_data()
|
|
44
|
+
if provider_data is None or not provider_data.tavily_search_api_key:
|
|
45
|
+
raise ValueError(
|
|
46
|
+
'Pass Search provider\'s API Key in the header X-LlamaStack-Provider-Data as { "tavily_search_api_key": <your api key>}'
|
|
47
|
+
)
|
|
48
|
+
return provider_data.tavily_search_api_key
|
|
49
|
+
|
|
50
|
+
async def list_runtime_tools(
|
|
51
|
+
self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
|
|
52
|
+
) -> ListToolDefsResponse:
|
|
53
|
+
return ListToolDefsResponse(
|
|
54
|
+
data=[
|
|
55
|
+
ToolDef(
|
|
56
|
+
name="web_search",
|
|
57
|
+
description="Search the web for information",
|
|
58
|
+
input_schema={
|
|
59
|
+
"type": "object",
|
|
60
|
+
"properties": {
|
|
61
|
+
"query": {
|
|
62
|
+
"type": "string",
|
|
63
|
+
"description": "The query to search for",
|
|
64
|
+
}
|
|
65
|
+
},
|
|
66
|
+
"required": ["query"],
|
|
67
|
+
},
|
|
68
|
+
)
|
|
69
|
+
]
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
|
|
73
|
+
api_key = self._get_api_key()
|
|
74
|
+
async with httpx.AsyncClient() as client:
|
|
75
|
+
response = await client.post(
|
|
76
|
+
"https://api.tavily.com/search",
|
|
77
|
+
json={"api_key": api_key, "query": kwargs["query"]},
|
|
78
|
+
)
|
|
79
|
+
response.raise_for_status()
|
|
80
|
+
|
|
81
|
+
return ToolInvocationResult(content=json.dumps(self._clean_tavily_response(response.json())))
|
|
82
|
+
|
|
83
|
+
def _clean_tavily_response(self, search_response, top_k=3):
|
|
84
|
+
return {"query": search_response["query"], "top_k": search_response["results"]}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel
|
|
8
|
+
|
|
9
|
+
from .config import WolframAlphaToolConfig
|
|
10
|
+
from .wolfram_alpha import WolframAlphaToolRuntimeImpl
|
|
11
|
+
|
|
12
|
+
__all__ = ["WolframAlphaToolConfig", "WolframAlphaToolRuntimeImpl"]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class WolframAlphaToolProviderDataValidator(BaseModel):
|
|
16
|
+
wolfram_alpha_api_key: str
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
async def get_adapter_impl(config: WolframAlphaToolConfig, _deps):
|
|
20
|
+
impl = WolframAlphaToolRuntimeImpl(config)
|
|
21
|
+
await impl.initialize()
|
|
22
|
+
return impl
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class WolframAlphaToolConfig(BaseModel):
|
|
13
|
+
"""Configuration for WolframAlpha Tool Runtime"""
|
|
14
|
+
|
|
15
|
+
api_key: str | None = None
|
|
16
|
+
|
|
17
|
+
@classmethod
|
|
18
|
+
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
|
|
19
|
+
return {
|
|
20
|
+
"api_key": "${env.WOLFRAM_ALPHA_API_KEY:=}",
|
|
21
|
+
}
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
import httpx
|
|
11
|
+
|
|
12
|
+
from llama_stack.apis.common.content_types import URL
|
|
13
|
+
from llama_stack.apis.tools import (
|
|
14
|
+
ListToolDefsResponse,
|
|
15
|
+
ToolDef,
|
|
16
|
+
ToolGroup,
|
|
17
|
+
ToolInvocationResult,
|
|
18
|
+
ToolRuntime,
|
|
19
|
+
)
|
|
20
|
+
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
21
|
+
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
|
|
22
|
+
|
|
23
|
+
from .config import WolframAlphaToolConfig
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class WolframAlphaToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsRequestProviderData):
|
|
27
|
+
def __init__(self, config: WolframAlphaToolConfig):
|
|
28
|
+
self.config = config
|
|
29
|
+
self.url = "https://api.wolframalpha.com/v2/query"
|
|
30
|
+
|
|
31
|
+
async def initialize(self):
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
async def register_toolgroup(self, toolgroup: ToolGroup) -> None:
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
async def unregister_toolgroup(self, toolgroup_id: str) -> None:
|
|
38
|
+
return
|
|
39
|
+
|
|
40
|
+
def _get_api_key(self) -> str:
|
|
41
|
+
if self.config.api_key:
|
|
42
|
+
return self.config.api_key
|
|
43
|
+
|
|
44
|
+
provider_data = self.get_request_provider_data()
|
|
45
|
+
if provider_data is None or not provider_data.wolfram_alpha_api_key:
|
|
46
|
+
raise ValueError(
|
|
47
|
+
'Pass WolframAlpha API Key in the header X-LlamaStack-Provider-Data as { "wolfram_alpha_api_key": <your api key>}'
|
|
48
|
+
)
|
|
49
|
+
return provider_data.wolfram_alpha_api_key
|
|
50
|
+
|
|
51
|
+
async def list_runtime_tools(
|
|
52
|
+
self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
|
|
53
|
+
) -> ListToolDefsResponse:
|
|
54
|
+
return ListToolDefsResponse(
|
|
55
|
+
data=[
|
|
56
|
+
ToolDef(
|
|
57
|
+
name="wolfram_alpha",
|
|
58
|
+
description="Query WolframAlpha for computational knowledge",
|
|
59
|
+
input_schema={
|
|
60
|
+
"type": "object",
|
|
61
|
+
"properties": {
|
|
62
|
+
"query": {
|
|
63
|
+
"type": "string",
|
|
64
|
+
"description": "The query to compute",
|
|
65
|
+
}
|
|
66
|
+
},
|
|
67
|
+
"required": ["query"],
|
|
68
|
+
},
|
|
69
|
+
)
|
|
70
|
+
]
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
|
|
74
|
+
api_key = self._get_api_key()
|
|
75
|
+
params = {
|
|
76
|
+
"input": kwargs["query"],
|
|
77
|
+
"appid": api_key,
|
|
78
|
+
"format": "plaintext",
|
|
79
|
+
"output": "json",
|
|
80
|
+
}
|
|
81
|
+
async with httpx.AsyncClient() as client:
|
|
82
|
+
response = await client.get(params=params, url=self.url)
|
|
83
|
+
response.raise_for_status()
|
|
84
|
+
return ToolInvocationResult(content=json.dumps(self._clean_wolfram_alpha_response(response.json())))
|
|
85
|
+
|
|
86
|
+
def _clean_wolfram_alpha_response(self, wa_response):
|
|
87
|
+
remove = {
|
|
88
|
+
"queryresult": [
|
|
89
|
+
"datatypes",
|
|
90
|
+
"error",
|
|
91
|
+
"timedout",
|
|
92
|
+
"timedoutpods",
|
|
93
|
+
"numpods",
|
|
94
|
+
"timing",
|
|
95
|
+
"parsetiming",
|
|
96
|
+
"parsetimedout",
|
|
97
|
+
"recalculate",
|
|
98
|
+
"id",
|
|
99
|
+
"host",
|
|
100
|
+
"server",
|
|
101
|
+
"related",
|
|
102
|
+
"version",
|
|
103
|
+
{
|
|
104
|
+
"pods": [
|
|
105
|
+
"scanner",
|
|
106
|
+
"id",
|
|
107
|
+
"error",
|
|
108
|
+
"expressiontypes",
|
|
109
|
+
"states",
|
|
110
|
+
"infos",
|
|
111
|
+
"position",
|
|
112
|
+
"numsubpods",
|
|
113
|
+
]
|
|
114
|
+
},
|
|
115
|
+
"assumptions",
|
|
116
|
+
],
|
|
117
|
+
}
|
|
118
|
+
for main_key in remove:
|
|
119
|
+
for key_to_remove in remove[main_key]:
|
|
120
|
+
try:
|
|
121
|
+
if key_to_remove == "assumptions":
|
|
122
|
+
if "assumptions" in wa_response[main_key]:
|
|
123
|
+
del wa_response[main_key][key_to_remove]
|
|
124
|
+
if isinstance(key_to_remove, dict):
|
|
125
|
+
for sub_key in key_to_remove:
|
|
126
|
+
if sub_key == "pods":
|
|
127
|
+
for i in range(len(wa_response[main_key][sub_key])):
|
|
128
|
+
if wa_response[main_key][sub_key][i]["title"] == "Result":
|
|
129
|
+
del wa_response[main_key][sub_key][i + 1 :]
|
|
130
|
+
break
|
|
131
|
+
sub_items = wa_response[main_key][sub_key]
|
|
132
|
+
for i in range(len(sub_items)):
|
|
133
|
+
for sub_key_to_remove in key_to_remove[sub_key]:
|
|
134
|
+
if sub_key_to_remove in sub_items[i]:
|
|
135
|
+
del sub_items[i][sub_key_to_remove]
|
|
136
|
+
elif key_to_remove in wa_response[main_key]:
|
|
137
|
+
del wa_response[main_key][key_to_remove]
|
|
138
|
+
except KeyError:
|
|
139
|
+
pass
|
|
140
|
+
return wa_response
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from llama_stack.providers.datatypes import Api, ProviderSpec
|
|
8
|
+
|
|
9
|
+
from .config import ChromaVectorIOConfig
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
async def get_adapter_impl(config: ChromaVectorIOConfig, deps: dict[Api, ProviderSpec]):
|
|
13
|
+
from .chroma import ChromaVectorIOAdapter
|
|
14
|
+
|
|
15
|
+
impl = ChromaVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
|
|
16
|
+
await impl.initialize()
|
|
17
|
+
return impl
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
import asyncio
|
|
7
|
+
import json
|
|
8
|
+
from typing import Any
|
|
9
|
+
from urllib.parse import urlparse
|
|
10
|
+
|
|
11
|
+
import chromadb
|
|
12
|
+
from numpy.typing import NDArray
|
|
13
|
+
|
|
14
|
+
from llama_stack.apis.files import Files
|
|
15
|
+
from llama_stack.apis.inference import Inference, InterleavedContent
|
|
16
|
+
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
|
|
17
|
+
from llama_stack.apis.vector_stores import VectorStore
|
|
18
|
+
from llama_stack.log import get_logger
|
|
19
|
+
from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
|
|
20
|
+
from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
|
|
21
|
+
from llama_stack.providers.utils.kvstore import kvstore_impl
|
|
22
|
+
from llama_stack.providers.utils.kvstore.api import KVStore
|
|
23
|
+
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
|
24
|
+
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
|
|
25
|
+
|
|
26
|
+
from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig
|
|
27
|
+
|
|
28
|
+
log = get_logger(name=__name__, category="vector_io::chroma")
|
|
29
|
+
|
|
30
|
+
ChromaClientType = chromadb.api.AsyncClientAPI | chromadb.api.ClientAPI
|
|
31
|
+
|
|
32
|
+
VERSION = "v3"
|
|
33
|
+
VECTOR_DBS_PREFIX = f"vector_stores:chroma:{VERSION}::"
|
|
34
|
+
VECTOR_INDEX_PREFIX = f"vector_index:chroma:{VERSION}::"
|
|
35
|
+
OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:chroma:{VERSION}::"
|
|
36
|
+
OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:chroma:{VERSION}::"
|
|
37
|
+
OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX = f"openai_vector_stores_files_contents:chroma:{VERSION}::"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# this is a helper to allow us to use async and non-async chroma clients interchangeably
|
|
41
|
+
async def maybe_await(result):
|
|
42
|
+
if asyncio.iscoroutine(result):
|
|
43
|
+
return await result
|
|
44
|
+
return result
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class ChromaIndex(EmbeddingIndex):
|
|
48
|
+
def __init__(self, client: ChromaClientType, collection, kvstore: KVStore | None = None):
|
|
49
|
+
self.client = client
|
|
50
|
+
self.collection = collection
|
|
51
|
+
self.kvstore = kvstore
|
|
52
|
+
|
|
53
|
+
async def initialize(self):
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
|
|
57
|
+
assert len(chunks) == len(embeddings), (
|
|
58
|
+
f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
ids = [f"{c.metadata.get('document_id', '')}:{c.chunk_id}" for c in chunks]
|
|
62
|
+
await maybe_await(
|
|
63
|
+
self.collection.add(documents=[chunk.model_dump_json() for chunk in chunks], embeddings=embeddings, ids=ids)
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
|
|
67
|
+
results = await maybe_await(
|
|
68
|
+
self.collection.query(
|
|
69
|
+
query_embeddings=[embedding.tolist()], n_results=k, include=["documents", "distances"]
|
|
70
|
+
)
|
|
71
|
+
)
|
|
72
|
+
distances = results["distances"][0]
|
|
73
|
+
documents = results["documents"][0]
|
|
74
|
+
|
|
75
|
+
chunks = []
|
|
76
|
+
scores = []
|
|
77
|
+
for dist, doc in zip(distances, documents, strict=False):
|
|
78
|
+
try:
|
|
79
|
+
doc = json.loads(doc)
|
|
80
|
+
chunk = Chunk(**doc)
|
|
81
|
+
except Exception:
|
|
82
|
+
log.exception(f"Failed to parse document: {doc}")
|
|
83
|
+
continue
|
|
84
|
+
|
|
85
|
+
score = 1.0 / float(dist) if dist != 0 else float("inf")
|
|
86
|
+
if score < score_threshold:
|
|
87
|
+
continue
|
|
88
|
+
|
|
89
|
+
chunks.append(chunk)
|
|
90
|
+
scores.append(score)
|
|
91
|
+
|
|
92
|
+
return QueryChunksResponse(chunks=chunks, scores=scores)
|
|
93
|
+
|
|
94
|
+
async def delete(self):
|
|
95
|
+
await maybe_await(self.client.delete_collection(self.collection.name))
|
|
96
|
+
|
|
97
|
+
async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse:
|
|
98
|
+
raise NotImplementedError("Keyword search is not supported in Chroma")
|
|
99
|
+
|
|
100
|
+
async def delete_chunks(self, chunks_for_deletion: list[ChunkForDeletion]) -> None:
|
|
101
|
+
"""Delete a single chunk from the Chroma collection by its ID."""
|
|
102
|
+
ids = [f"{chunk.document_id}:{chunk.chunk_id}" for chunk in chunks_for_deletion]
|
|
103
|
+
await maybe_await(self.collection.delete(ids=ids))
|
|
104
|
+
|
|
105
|
+
async def query_hybrid(
|
|
106
|
+
self,
|
|
107
|
+
embedding: NDArray,
|
|
108
|
+
query_string: str,
|
|
109
|
+
k: int,
|
|
110
|
+
score_threshold: float,
|
|
111
|
+
reranker_type: str,
|
|
112
|
+
reranker_params: dict[str, Any] | None = None,
|
|
113
|
+
) -> QueryChunksResponse:
|
|
114
|
+
raise NotImplementedError("Hybrid search is not supported in Chroma")
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
|
|
118
|
+
def __init__(
|
|
119
|
+
self,
|
|
120
|
+
config: RemoteChromaVectorIOConfig | InlineChromaVectorIOConfig,
|
|
121
|
+
inference_api: Inference,
|
|
122
|
+
files_api: Files | None,
|
|
123
|
+
) -> None:
|
|
124
|
+
super().__init__(files_api=files_api, kvstore=None)
|
|
125
|
+
log.info(f"Initializing ChromaVectorIOAdapter with url: {config}")
|
|
126
|
+
self.config = config
|
|
127
|
+
self.inference_api = inference_api
|
|
128
|
+
self.client = None
|
|
129
|
+
self.cache = {}
|
|
130
|
+
self.vector_store_table = None
|
|
131
|
+
|
|
132
|
+
async def initialize(self) -> None:
|
|
133
|
+
self.kvstore = await kvstore_impl(self.config.persistence)
|
|
134
|
+
|
|
135
|
+
if isinstance(self.config, RemoteChromaVectorIOConfig):
|
|
136
|
+
log.info(f"Connecting to Chroma server at: {self.config.url}")
|
|
137
|
+
url = self.config.url.rstrip("/")
|
|
138
|
+
parsed = urlparse(url)
|
|
139
|
+
|
|
140
|
+
if parsed.path and parsed.path != "/":
|
|
141
|
+
raise ValueError("URL should not contain a path")
|
|
142
|
+
|
|
143
|
+
self.client = await chromadb.AsyncHttpClient(host=parsed.hostname, port=parsed.port)
|
|
144
|
+
else:
|
|
145
|
+
log.info(f"Connecting to Chroma local db at: {self.config.db_path}")
|
|
146
|
+
self.client = chromadb.PersistentClient(path=self.config.db_path)
|
|
147
|
+
self.openai_vector_stores = await self._load_openai_vector_stores()
|
|
148
|
+
|
|
149
|
+
async def shutdown(self) -> None:
|
|
150
|
+
# Clean up mixin resources (file batch tasks)
|
|
151
|
+
await super().shutdown()
|
|
152
|
+
|
|
153
|
+
async def register_vector_store(self, vector_store: VectorStore) -> None:
|
|
154
|
+
collection = await maybe_await(
|
|
155
|
+
self.client.get_or_create_collection(
|
|
156
|
+
name=vector_store.identifier, metadata={"vector_store": vector_store.model_dump_json()}
|
|
157
|
+
)
|
|
158
|
+
)
|
|
159
|
+
self.cache[vector_store.identifier] = VectorStoreWithIndex(
|
|
160
|
+
vector_store, ChromaIndex(self.client, collection), self.inference_api
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
async def unregister_vector_store(self, vector_store_id: str) -> None:
|
|
164
|
+
if vector_store_id not in self.cache:
|
|
165
|
+
log.warning(f"Vector DB {vector_store_id} not found")
|
|
166
|
+
return
|
|
167
|
+
|
|
168
|
+
await self.cache[vector_store_id].index.delete()
|
|
169
|
+
del self.cache[vector_store_id]
|
|
170
|
+
|
|
171
|
+
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
|
|
172
|
+
index = await self._get_and_cache_vector_store_index(vector_db_id)
|
|
173
|
+
if index is None:
|
|
174
|
+
raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
|
|
175
|
+
|
|
176
|
+
await index.insert_chunks(chunks)
|
|
177
|
+
|
|
178
|
+
async def query_chunks(
|
|
179
|
+
self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
|
|
180
|
+
) -> QueryChunksResponse:
|
|
181
|
+
index = await self._get_and_cache_vector_store_index(vector_db_id)
|
|
182
|
+
|
|
183
|
+
if index is None:
|
|
184
|
+
raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
|
|
185
|
+
|
|
186
|
+
return await index.query_chunks(query, params)
|
|
187
|
+
|
|
188
|
+
async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex:
|
|
189
|
+
if vector_store_id in self.cache:
|
|
190
|
+
return self.cache[vector_store_id]
|
|
191
|
+
|
|
192
|
+
# Try to load from kvstore
|
|
193
|
+
if self.kvstore is None:
|
|
194
|
+
raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
|
|
195
|
+
|
|
196
|
+
key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
|
|
197
|
+
vector_store_data = await self.kvstore.get(key)
|
|
198
|
+
if not vector_store_data:
|
|
199
|
+
raise ValueError(f"Vector DB {vector_store_id} not found in Llama Stack")
|
|
200
|
+
|
|
201
|
+
vector_store = VectorStore.model_validate_json(vector_store_data)
|
|
202
|
+
collection = await maybe_await(self.client.get_collection(vector_store_id))
|
|
203
|
+
if not collection:
|
|
204
|
+
raise ValueError(f"Vector DB {vector_store_id} not found in Chroma")
|
|
205
|
+
index = VectorStoreWithIndex(vector_store, ChromaIndex(self.client, collection), self.inference_api)
|
|
206
|
+
self.cache[vector_store_id] = index
|
|
207
|
+
return index
|
|
208
|
+
|
|
209
|
+
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
|
|
210
|
+
"""Delete chunks from a Chroma vector store."""
|
|
211
|
+
index = await self._get_and_cache_vector_store_index(store_id)
|
|
212
|
+
if not index:
|
|
213
|
+
raise ValueError(f"Vector DB {store_id} not found")
|
|
214
|
+
|
|
215
|
+
await index.index.delete_chunks(chunks_for_deletion)
|