llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +5 -0
- llama_stack/apis/agents/__init__.py +1 -1
- llama_stack/apis/agents/agents.py +700 -281
- llama_stack/apis/agents/openai_responses.py +1311 -0
- llama_stack/{providers/adapters/memory/sample/config.py → apis/batches/__init__.py} +2 -5
- llama_stack/apis/batches/batches.py +100 -0
- llama_stack/apis/benchmarks/__init__.py +7 -0
- llama_stack/apis/benchmarks/benchmarks.py +108 -0
- llama_stack/apis/common/content_types.py +143 -0
- llama_stack/apis/common/errors.py +103 -0
- llama_stack/apis/common/job_types.py +38 -0
- llama_stack/apis/common/responses.py +36 -0
- llama_stack/apis/common/training_types.py +36 -5
- llama_stack/apis/common/type_system.py +158 -0
- llama_stack/apis/conversations/__init__.py +31 -0
- llama_stack/apis/conversations/conversations.py +286 -0
- llama_stack/apis/datasetio/__init__.py +7 -0
- llama_stack/apis/datasetio/datasetio.py +59 -0
- llama_stack/apis/datasets/__init__.py +7 -0
- llama_stack/apis/datasets/datasets.py +251 -0
- llama_stack/apis/datatypes.py +160 -0
- llama_stack/apis/eval/__init__.py +7 -0
- llama_stack/apis/eval/eval.py +169 -0
- llama_stack/apis/files/__init__.py +7 -0
- llama_stack/apis/files/files.py +199 -0
- llama_stack/apis/inference/__init__.py +1 -1
- llama_stack/apis/inference/inference.py +1169 -113
- llama_stack/apis/inspect/__init__.py +1 -1
- llama_stack/apis/inspect/inspect.py +69 -16
- llama_stack/apis/models/__init__.py +1 -1
- llama_stack/apis/models/models.py +148 -21
- llama_stack/apis/post_training/__init__.py +1 -1
- llama_stack/apis/post_training/post_training.py +265 -120
- llama_stack/{providers/adapters/agents/sample/config.py → apis/prompts/__init__.py} +2 -5
- llama_stack/apis/prompts/prompts.py +204 -0
- llama_stack/apis/providers/__init__.py +7 -0
- llama_stack/apis/providers/providers.py +69 -0
- llama_stack/apis/resource.py +37 -0
- llama_stack/apis/safety/__init__.py +1 -1
- llama_stack/apis/safety/safety.py +95 -12
- llama_stack/apis/scoring/__init__.py +7 -0
- llama_stack/apis/scoring/scoring.py +93 -0
- llama_stack/apis/scoring_functions/__init__.py +7 -0
- llama_stack/apis/scoring_functions/scoring_functions.py +208 -0
- llama_stack/apis/shields/__init__.py +1 -1
- llama_stack/apis/shields/shields.py +76 -33
- llama_stack/apis/synthetic_data_generation/__init__.py +1 -1
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +40 -17
- llama_stack/apis/telemetry/__init__.py +1 -1
- llama_stack/apis/telemetry/telemetry.py +322 -31
- llama_stack/apis/{dataset → tools}/__init__.py +2 -1
- llama_stack/apis/tools/rag_tool.py +218 -0
- llama_stack/apis/tools/tools.py +221 -0
- llama_stack/apis/vector_io/__init__.py +7 -0
- llama_stack/apis/vector_io/vector_io.py +960 -0
- llama_stack/apis/vector_stores/__init__.py +7 -0
- llama_stack/apis/vector_stores/vector_stores.py +51 -0
- llama_stack/apis/version.py +9 -0
- llama_stack/cli/llama.py +13 -5
- llama_stack/cli/stack/_list_deps.py +182 -0
- llama_stack/cli/stack/list_apis.py +1 -1
- llama_stack/cli/stack/list_deps.py +55 -0
- llama_stack/cli/stack/list_providers.py +24 -10
- llama_stack/cli/stack/list_stacks.py +56 -0
- llama_stack/cli/stack/remove.py +115 -0
- llama_stack/cli/stack/run.py +169 -56
- llama_stack/cli/stack/stack.py +18 -4
- llama_stack/cli/stack/utils.py +151 -0
- llama_stack/cli/table.py +23 -61
- llama_stack/cli/utils.py +29 -0
- llama_stack/core/access_control/access_control.py +131 -0
- llama_stack/core/access_control/conditions.py +129 -0
- llama_stack/core/access_control/datatypes.py +107 -0
- llama_stack/core/build.py +164 -0
- llama_stack/core/client.py +205 -0
- llama_stack/core/common.sh +37 -0
- llama_stack/{distribution → core}/configure.py +74 -55
- llama_stack/core/conversations/conversations.py +309 -0
- llama_stack/core/datatypes.py +625 -0
- llama_stack/core/distribution.py +276 -0
- llama_stack/core/external.py +54 -0
- llama_stack/core/id_generation.py +42 -0
- llama_stack/core/inspect.py +86 -0
- llama_stack/core/library_client.py +539 -0
- llama_stack/core/prompts/prompts.py +234 -0
- llama_stack/core/providers.py +137 -0
- llama_stack/core/request_headers.py +115 -0
- llama_stack/core/resolver.py +506 -0
- llama_stack/core/routers/__init__.py +101 -0
- llama_stack/core/routers/datasets.py +73 -0
- llama_stack/core/routers/eval_scoring.py +155 -0
- llama_stack/core/routers/inference.py +645 -0
- llama_stack/core/routers/safety.py +85 -0
- llama_stack/core/routers/tool_runtime.py +91 -0
- llama_stack/core/routers/vector_io.py +442 -0
- llama_stack/core/routing_tables/benchmarks.py +62 -0
- llama_stack/core/routing_tables/common.py +254 -0
- llama_stack/core/routing_tables/datasets.py +91 -0
- llama_stack/core/routing_tables/models.py +163 -0
- llama_stack/core/routing_tables/scoring_functions.py +66 -0
- llama_stack/core/routing_tables/shields.py +61 -0
- llama_stack/core/routing_tables/toolgroups.py +129 -0
- llama_stack/core/routing_tables/vector_stores.py +292 -0
- llama_stack/core/server/auth.py +187 -0
- llama_stack/core/server/auth_providers.py +494 -0
- llama_stack/core/server/quota.py +110 -0
- llama_stack/core/server/routes.py +141 -0
- llama_stack/core/server/server.py +542 -0
- llama_stack/core/server/tracing.py +80 -0
- llama_stack/core/stack.py +546 -0
- llama_stack/core/start_stack.sh +117 -0
- llama_stack/core/storage/datatypes.py +283 -0
- llama_stack/{cli/model → core/store}/__init__.py +1 -1
- llama_stack/core/store/registry.py +199 -0
- llama_stack/core/testing_context.py +49 -0
- llama_stack/core/ui/app.py +55 -0
- llama_stack/core/ui/modules/api.py +32 -0
- llama_stack/core/ui/modules/utils.py +42 -0
- llama_stack/core/ui/page/distribution/datasets.py +18 -0
- llama_stack/core/ui/page/distribution/eval_tasks.py +20 -0
- llama_stack/core/ui/page/distribution/models.py +18 -0
- llama_stack/core/ui/page/distribution/providers.py +27 -0
- llama_stack/core/ui/page/distribution/resources.py +48 -0
- llama_stack/core/ui/page/distribution/scoring_functions.py +18 -0
- llama_stack/core/ui/page/distribution/shields.py +19 -0
- llama_stack/core/ui/page/evaluations/app_eval.py +143 -0
- llama_stack/core/ui/page/evaluations/native_eval.py +253 -0
- llama_stack/core/ui/page/playground/chat.py +130 -0
- llama_stack/core/ui/page/playground/tools.py +352 -0
- llama_stack/core/utils/config.py +30 -0
- llama_stack/{distribution → core}/utils/config_dirs.py +3 -6
- llama_stack/core/utils/config_resolution.py +125 -0
- llama_stack/core/utils/context.py +84 -0
- llama_stack/core/utils/exec.py +96 -0
- llama_stack/{providers/impls/meta_reference/codeshield/config.py → core/utils/image_types.py} +4 -3
- llama_stack/{distribution → core}/utils/model_utils.py +2 -2
- llama_stack/{distribution → core}/utils/prompt_for_config.py +30 -63
- llama_stack/{apis/batch_inference → distributions/dell}/__init__.py +1 -1
- llama_stack/distributions/dell/build.yaml +33 -0
- llama_stack/distributions/dell/dell.py +158 -0
- llama_stack/distributions/dell/run-with-safety.yaml +141 -0
- llama_stack/distributions/dell/run.yaml +132 -0
- llama_stack/distributions/meta-reference-gpu/__init__.py +7 -0
- llama_stack/distributions/meta-reference-gpu/build.yaml +32 -0
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +163 -0
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +154 -0
- llama_stack/distributions/meta-reference-gpu/run.yaml +139 -0
- llama_stack/{apis/evals → distributions/nvidia}/__init__.py +1 -1
- llama_stack/distributions/nvidia/build.yaml +29 -0
- llama_stack/distributions/nvidia/nvidia.py +154 -0
- llama_stack/distributions/nvidia/run-with-safety.yaml +137 -0
- llama_stack/distributions/nvidia/run.yaml +116 -0
- llama_stack/distributions/open-benchmark/__init__.py +7 -0
- llama_stack/distributions/open-benchmark/build.yaml +36 -0
- llama_stack/distributions/open-benchmark/open_benchmark.py +303 -0
- llama_stack/distributions/open-benchmark/run.yaml +252 -0
- llama_stack/distributions/postgres-demo/__init__.py +7 -0
- llama_stack/distributions/postgres-demo/build.yaml +23 -0
- llama_stack/distributions/postgres-demo/postgres_demo.py +125 -0
- llama_stack/distributions/postgres-demo/run.yaml +115 -0
- llama_stack/{apis/memory → distributions/starter}/__init__.py +1 -1
- llama_stack/distributions/starter/build.yaml +61 -0
- llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
- llama_stack/distributions/starter/run.yaml +276 -0
- llama_stack/distributions/starter/starter.py +345 -0
- llama_stack/distributions/starter-gpu/__init__.py +7 -0
- llama_stack/distributions/starter-gpu/build.yaml +61 -0
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
- llama_stack/distributions/starter-gpu/run.yaml +279 -0
- llama_stack/distributions/starter-gpu/starter_gpu.py +20 -0
- llama_stack/distributions/template.py +456 -0
- llama_stack/distributions/watsonx/__init__.py +7 -0
- llama_stack/distributions/watsonx/build.yaml +33 -0
- llama_stack/distributions/watsonx/run.yaml +133 -0
- llama_stack/distributions/watsonx/watsonx.py +95 -0
- llama_stack/env.py +24 -0
- llama_stack/log.py +314 -0
- llama_stack/models/llama/checkpoint.py +164 -0
- llama_stack/models/llama/datatypes.py +164 -0
- llama_stack/models/llama/hadamard_utils.py +86 -0
- llama_stack/models/llama/llama3/args.py +74 -0
- llama_stack/models/llama/llama3/chat_format.py +286 -0
- llama_stack/models/llama/llama3/generation.py +376 -0
- llama_stack/models/llama/llama3/interface.py +255 -0
- llama_stack/models/llama/llama3/model.py +304 -0
- llama_stack/models/llama/llama3/multimodal/__init__.py +12 -0
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +180 -0
- llama_stack/models/llama/llama3/multimodal/image_transform.py +409 -0
- llama_stack/models/llama/llama3/multimodal/model.py +1430 -0
- llama_stack/models/llama/llama3/multimodal/utils.py +26 -0
- llama_stack/models/llama/llama3/prompt_templates/__init__.py +22 -0
- llama_stack/models/llama/llama3/prompt_templates/base.py +39 -0
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +319 -0
- llama_stack/models/llama/llama3/prompt_templates/tool_response.py +62 -0
- llama_stack/models/llama/llama3/quantization/loader.py +316 -0
- llama_stack/models/llama/llama3/template_data.py +116 -0
- llama_stack/models/llama/llama3/tokenizer.model +128000 -0
- llama_stack/models/llama/llama3/tokenizer.py +198 -0
- llama_stack/models/llama/llama3/tool_utils.py +266 -0
- llama_stack/models/llama/llama3_1/__init__.py +12 -0
- llama_stack/models/llama/llama3_1/prompt_format.md +358 -0
- llama_stack/models/llama/llama3_1/prompts.py +258 -0
- llama_stack/models/llama/llama3_2/prompts_text.py +229 -0
- llama_stack/models/llama/llama3_2/prompts_vision.py +126 -0
- llama_stack/models/llama/llama3_2/text_prompt_format.md +286 -0
- llama_stack/models/llama/llama3_2/vision_prompt_format.md +141 -0
- llama_stack/models/llama/llama3_3/prompts.py +259 -0
- llama_stack/models/llama/llama4/args.py +107 -0
- llama_stack/models/llama/llama4/chat_format.py +317 -0
- llama_stack/models/llama/llama4/datatypes.py +56 -0
- llama_stack/models/llama/llama4/ffn.py +58 -0
- llama_stack/models/llama/llama4/generation.py +313 -0
- llama_stack/models/llama/llama4/model.py +437 -0
- llama_stack/models/llama/llama4/moe.py +214 -0
- llama_stack/models/llama/llama4/preprocess.py +435 -0
- llama_stack/models/llama/llama4/prompt_format.md +304 -0
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +136 -0
- llama_stack/models/llama/llama4/prompts.py +279 -0
- llama_stack/models/llama/llama4/quantization/__init__.py +5 -0
- llama_stack/models/llama/llama4/quantization/loader.py +226 -0
- llama_stack/models/llama/llama4/tokenizer.model +200000 -0
- llama_stack/models/llama/llama4/tokenizer.py +263 -0
- llama_stack/models/llama/llama4/vision/__init__.py +5 -0
- llama_stack/models/llama/llama4/vision/embedding.py +210 -0
- llama_stack/models/llama/llama4/vision/encoder.py +412 -0
- llama_stack/models/llama/prompt_format.py +191 -0
- llama_stack/models/llama/quantize_impls.py +316 -0
- llama_stack/models/llama/sku_list.py +1029 -0
- llama_stack/models/llama/sku_types.py +233 -0
- llama_stack/models/llama/tokenizer_utils.py +40 -0
- llama_stack/providers/datatypes.py +136 -107
- llama_stack/providers/inline/__init__.py +5 -0
- llama_stack/providers/inline/agents/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/agents/meta_reference}/__init__.py +12 -5
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +1024 -0
- llama_stack/providers/inline/agents/meta_reference/agents.py +383 -0
- llama_stack/providers/inline/agents/meta_reference/config.py +37 -0
- llama_stack/providers/inline/agents/meta_reference/persistence.py +228 -0
- llama_stack/providers/inline/agents/meta_reference/responses/__init__.py +5 -0
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +423 -0
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +1226 -0
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +449 -0
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +194 -0
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +365 -0
- llama_stack/providers/inline/agents/meta_reference/safety.py +52 -0
- llama_stack/providers/inline/batches/__init__.py +5 -0
- llama_stack/providers/inline/batches/reference/__init__.py +36 -0
- llama_stack/providers/inline/batches/reference/batches.py +679 -0
- llama_stack/providers/inline/batches/reference/config.py +40 -0
- llama_stack/providers/inline/datasetio/__init__.py +5 -0
- llama_stack/providers/inline/datasetio/localfs/__init__.py +20 -0
- llama_stack/providers/inline/datasetio/localfs/config.py +23 -0
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +113 -0
- llama_stack/providers/inline/eval/__init__.py +5 -0
- llama_stack/providers/inline/eval/meta_reference/__init__.py +28 -0
- llama_stack/providers/inline/eval/meta_reference/config.py +23 -0
- llama_stack/providers/inline/eval/meta_reference/eval.py +259 -0
- llama_stack/providers/inline/files/localfs/__init__.py +20 -0
- llama_stack/providers/inline/files/localfs/config.py +31 -0
- llama_stack/providers/inline/files/localfs/files.py +219 -0
- llama_stack/providers/inline/inference/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/__init__.py +4 -4
- llama_stack/providers/inline/inference/meta_reference/common.py +24 -0
- llama_stack/providers/inline/inference/meta_reference/config.py +68 -0
- llama_stack/providers/inline/inference/meta_reference/generators.py +211 -0
- llama_stack/providers/inline/inference/meta_reference/inference.py +158 -0
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +96 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/parallel_utils.py +56 -73
- llama_stack/providers/inline/inference/sentence_transformers/__init__.py +22 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/inference/sentence_transformers}/config.py +6 -4
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +83 -0
- llama_stack/providers/inline/post_training/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/utils.py +35 -0
- llama_stack/providers/inline/post_training/common/validator.py +36 -0
- llama_stack/providers/inline/post_training/huggingface/__init__.py +27 -0
- llama_stack/providers/inline/post_training/huggingface/config.py +83 -0
- llama_stack/providers/inline/post_training/huggingface/post_training.py +208 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +519 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +485 -0
- llama_stack/providers/inline/post_training/huggingface/utils.py +269 -0
- llama_stack/providers/inline/post_training/torchtune/__init__.py +27 -0
- llama_stack/providers/inline/post_training/torchtune/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +240 -0
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +99 -0
- llama_stack/providers/inline/post_training/torchtune/config.py +20 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +57 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/sft.py +78 -0
- llama_stack/providers/inline/post_training/torchtune/post_training.py +178 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +588 -0
- llama_stack/providers/inline/safety/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/codeshield → inline/safety/code_scanner}/__init__.py +4 -2
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +128 -0
- llama_stack/providers/{impls/meta_reference/memory → inline/safety/code_scanner}/config.py +5 -3
- llama_stack/providers/inline/safety/llama_guard/__init__.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/config.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +489 -0
- llama_stack/providers/{adapters/memory/sample → inline/safety/prompt_guard}/__init__.py +4 -4
- llama_stack/providers/inline/safety/prompt_guard/config.py +32 -0
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +131 -0
- llama_stack/providers/inline/scoring/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/__init__.py +25 -0
- llama_stack/providers/{adapters/memory/weaviate → inline/scoring/basic}/config.py +5 -7
- llama_stack/providers/inline/scoring/basic/scoring.py +126 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +240 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +41 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +23 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +27 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +71 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +80 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +66 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +58 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +38 -0
- llama_stack/providers/inline/scoring/basic/utils/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py +3319 -0
- llama_stack/providers/inline/scoring/basic/utils/math_utils.py +330 -0
- llama_stack/providers/inline/scoring/braintrust/__init__.py +27 -0
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +230 -0
- llama_stack/providers/inline/scoring/braintrust/config.py +21 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +23 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +24 -0
- llama_stack/providers/inline/scoring/llm_as_judge/__init__.py +21 -0
- llama_stack/providers/inline/scoring/llm_as_judge/config.py +14 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +113 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +96 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +20 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +81 -0
- llama_stack/providers/inline/telemetry/__init__.py +5 -0
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +21 -0
- llama_stack/providers/inline/telemetry/meta_reference/config.py +47 -0
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +252 -0
- llama_stack/providers/inline/tool_runtime/__init__.py +5 -0
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +19 -0
- llama_stack/providers/{impls/meta_reference/telemetry → inline/tool_runtime/rag}/config.py +5 -3
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +77 -0
- llama_stack/providers/inline/tool_runtime/rag/memory.py +332 -0
- llama_stack/providers/inline/vector_io/__init__.py +5 -0
- llama_stack/providers/inline/vector_io/chroma/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/chroma/config.py +30 -0
- llama_stack/providers/inline/vector_io/faiss/__init__.py +21 -0
- llama_stack/providers/inline/vector_io/faiss/config.py +26 -0
- llama_stack/providers/inline/vector_io/faiss/faiss.py +293 -0
- llama_stack/providers/inline/vector_io/milvus/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/milvus/config.py +29 -0
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/qdrant/config.py +29 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/config.py +26 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +483 -0
- llama_stack/providers/registry/agents.py +16 -18
- llama_stack/providers/registry/batches.py +26 -0
- llama_stack/providers/registry/datasetio.py +49 -0
- llama_stack/providers/registry/eval.py +46 -0
- llama_stack/providers/registry/files.py +31 -0
- llama_stack/providers/registry/inference.py +273 -118
- llama_stack/providers/registry/post_training.py +69 -0
- llama_stack/providers/registry/safety.py +46 -41
- llama_stack/providers/registry/scoring.py +51 -0
- llama_stack/providers/registry/tool_runtime.py +87 -0
- llama_stack/providers/registry/vector_io.py +828 -0
- llama_stack/providers/remote/__init__.py +5 -0
- llama_stack/providers/remote/agents/__init__.py +5 -0
- llama_stack/providers/remote/datasetio/__init__.py +5 -0
- llama_stack/providers/{adapters/memory/chroma → remote/datasetio/huggingface}/__init__.py +7 -4
- llama_stack/providers/remote/datasetio/huggingface/config.py +23 -0
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +99 -0
- llama_stack/providers/remote/datasetio/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/datasetio/nvidia/config.py +61 -0
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +116 -0
- llama_stack/providers/remote/eval/__init__.py +5 -0
- llama_stack/providers/remote/eval/nvidia/__init__.py +31 -0
- llama_stack/providers/remote/eval/nvidia/config.py +29 -0
- llama_stack/providers/remote/eval/nvidia/eval.py +162 -0
- llama_stack/providers/remote/files/s3/__init__.py +19 -0
- llama_stack/providers/remote/files/s3/config.py +42 -0
- llama_stack/providers/remote/files/s3/files.py +313 -0
- llama_stack/providers/remote/inference/__init__.py +5 -0
- llama_stack/providers/{adapters/safety/sample → remote/inference/anthropic}/__init__.py +4 -6
- llama_stack/providers/remote/inference/anthropic/anthropic.py +36 -0
- llama_stack/providers/remote/inference/anthropic/config.py +28 -0
- llama_stack/providers/{impls/meta_reference/telemetry → remote/inference/azure}/__init__.py +4 -4
- llama_stack/providers/remote/inference/azure/azure.py +25 -0
- llama_stack/providers/remote/inference/azure/config.py +61 -0
- llama_stack/providers/{adapters → remote}/inference/bedrock/__init__.py +18 -17
- llama_stack/providers/remote/inference/bedrock/bedrock.py +142 -0
- llama_stack/providers/{adapters/inference/sample → remote/inference/bedrock}/config.py +3 -4
- llama_stack/providers/remote/inference/bedrock/models.py +29 -0
- llama_stack/providers/remote/inference/cerebras/__init__.py +19 -0
- llama_stack/providers/remote/inference/cerebras/cerebras.py +28 -0
- llama_stack/providers/remote/inference/cerebras/config.py +30 -0
- llama_stack/providers/{adapters → remote}/inference/databricks/__init__.py +4 -5
- llama_stack/providers/remote/inference/databricks/config.py +37 -0
- llama_stack/providers/remote/inference/databricks/databricks.py +44 -0
- llama_stack/providers/{adapters → remote}/inference/fireworks/__init__.py +8 -4
- llama_stack/providers/remote/inference/fireworks/config.py +27 -0
- llama_stack/providers/remote/inference/fireworks/fireworks.py +27 -0
- llama_stack/providers/{adapters/memory/pgvector → remote/inference/gemini}/__init__.py +4 -4
- llama_stack/providers/remote/inference/gemini/config.py +28 -0
- llama_stack/providers/remote/inference/gemini/gemini.py +82 -0
- llama_stack/providers/remote/inference/groq/__init__.py +15 -0
- llama_stack/providers/remote/inference/groq/config.py +34 -0
- llama_stack/providers/remote/inference/groq/groq.py +18 -0
- llama_stack/providers/remote/inference/llama_openai_compat/__init__.py +15 -0
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +34 -0
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +46 -0
- llama_stack/providers/remote/inference/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/inference/nvidia/config.py +64 -0
- llama_stack/providers/remote/inference/nvidia/nvidia.py +61 -0
- llama_stack/providers/{adapters/safety/sample/config.py → remote/inference/nvidia/utils.py} +3 -4
- llama_stack/providers/{impls/vllm → remote/inference/ollama}/__init__.py +4 -6
- llama_stack/providers/remote/inference/ollama/config.py +25 -0
- llama_stack/providers/remote/inference/ollama/ollama.py +102 -0
- llama_stack/providers/{adapters/telemetry/opentelemetry → remote/inference/openai}/__init__.py +4 -4
- llama_stack/providers/remote/inference/openai/config.py +39 -0
- llama_stack/providers/remote/inference/openai/openai.py +38 -0
- llama_stack/providers/remote/inference/passthrough/__init__.py +23 -0
- llama_stack/providers/remote/inference/passthrough/config.py +34 -0
- llama_stack/providers/remote/inference/passthrough/passthrough.py +122 -0
- llama_stack/providers/remote/inference/runpod/__init__.py +16 -0
- llama_stack/providers/remote/inference/runpod/config.py +32 -0
- llama_stack/providers/remote/inference/runpod/runpod.py +42 -0
- llama_stack/providers/remote/inference/sambanova/__init__.py +16 -0
- llama_stack/providers/remote/inference/sambanova/config.py +34 -0
- llama_stack/providers/remote/inference/sambanova/sambanova.py +28 -0
- llama_stack/providers/{adapters → remote}/inference/tgi/__init__.py +3 -4
- llama_stack/providers/remote/inference/tgi/config.py +76 -0
- llama_stack/providers/remote/inference/tgi/tgi.py +85 -0
- llama_stack/providers/{adapters → remote}/inference/together/__init__.py +8 -4
- llama_stack/providers/remote/inference/together/config.py +27 -0
- llama_stack/providers/remote/inference/together/together.py +102 -0
- llama_stack/providers/remote/inference/vertexai/__init__.py +15 -0
- llama_stack/providers/remote/inference/vertexai/config.py +48 -0
- llama_stack/providers/remote/inference/vertexai/vertexai.py +54 -0
- llama_stack/providers/remote/inference/vllm/__init__.py +22 -0
- llama_stack/providers/remote/inference/vllm/config.py +59 -0
- llama_stack/providers/remote/inference/vllm/vllm.py +111 -0
- llama_stack/providers/remote/inference/watsonx/__init__.py +15 -0
- llama_stack/providers/remote/inference/watsonx/config.py +45 -0
- llama_stack/providers/remote/inference/watsonx/watsonx.py +336 -0
- llama_stack/providers/remote/post_training/__init__.py +5 -0
- llama_stack/providers/remote/post_training/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/post_training/nvidia/config.py +113 -0
- llama_stack/providers/remote/post_training/nvidia/models.py +27 -0
- llama_stack/providers/remote/post_training/nvidia/post_training.py +430 -0
- llama_stack/providers/remote/post_training/nvidia/utils.py +63 -0
- llama_stack/providers/remote/safety/__init__.py +5 -0
- llama_stack/providers/remote/safety/bedrock/bedrock.py +111 -0
- llama_stack/providers/remote/safety/bedrock/config.py +14 -0
- llama_stack/providers/{adapters/inference/sample → remote/safety/nvidia}/__init__.py +5 -4
- llama_stack/providers/remote/safety/nvidia/config.py +40 -0
- llama_stack/providers/remote/safety/nvidia/nvidia.py +161 -0
- llama_stack/providers/{adapters/agents/sample → remote/safety/sambanova}/__init__.py +5 -4
- llama_stack/providers/remote/safety/sambanova/config.py +37 -0
- llama_stack/providers/remote/safety/sambanova/sambanova.py +98 -0
- llama_stack/providers/remote/tool_runtime/__init__.py +5 -0
- llama_stack/providers/remote/tool_runtime/bing_search/__init__.py +21 -0
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +112 -0
- llama_stack/providers/remote/tool_runtime/bing_search/config.py +22 -0
- llama_stack/providers/remote/tool_runtime/brave_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +148 -0
- llama_stack/providers/remote/tool_runtime/brave_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py +15 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +20 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +73 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +84 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py +22 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py +21 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +140 -0
- llama_stack/providers/remote/vector_io/__init__.py +5 -0
- llama_stack/providers/remote/vector_io/chroma/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/chroma/chroma.py +215 -0
- llama_stack/providers/remote/vector_io/chroma/config.py +28 -0
- llama_stack/providers/remote/vector_io/milvus/__init__.py +18 -0
- llama_stack/providers/remote/vector_io/milvus/config.py +35 -0
- llama_stack/providers/remote/vector_io/milvus/milvus.py +375 -0
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/pgvector/config.py +47 -0
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +460 -0
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/qdrant/config.py +37 -0
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +265 -0
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/weaviate/config.py +32 -0
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +393 -0
- llama_stack/providers/utils/bedrock/__init__.py +5 -0
- llama_stack/providers/utils/bedrock/client.py +74 -0
- llama_stack/providers/utils/bedrock/config.py +64 -0
- llama_stack/providers/utils/bedrock/refreshable_boto_session.py +112 -0
- llama_stack/providers/utils/common/__init__.py +5 -0
- llama_stack/providers/utils/common/data_schema_validator.py +103 -0
- llama_stack/providers/utils/datasetio/__init__.py +5 -0
- llama_stack/providers/utils/datasetio/url_utils.py +47 -0
- llama_stack/providers/utils/files/__init__.py +5 -0
- llama_stack/providers/utils/files/form_data.py +69 -0
- llama_stack/providers/utils/inference/__init__.py +8 -7
- llama_stack/providers/utils/inference/embedding_mixin.py +101 -0
- llama_stack/providers/utils/inference/inference_store.py +264 -0
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +336 -0
- llama_stack/providers/utils/inference/model_registry.py +173 -23
- llama_stack/providers/utils/inference/openai_compat.py +1261 -49
- llama_stack/providers/utils/inference/openai_mixin.py +506 -0
- llama_stack/providers/utils/inference/prompt_adapter.py +365 -67
- llama_stack/providers/utils/kvstore/api.py +6 -6
- llama_stack/providers/utils/kvstore/config.py +28 -48
- llama_stack/providers/utils/kvstore/kvstore.py +61 -15
- llama_stack/providers/utils/kvstore/mongodb/__init__.py +9 -0
- llama_stack/providers/utils/kvstore/mongodb/mongodb.py +82 -0
- llama_stack/providers/utils/kvstore/postgres/__init__.py +7 -0
- llama_stack/providers/utils/kvstore/postgres/postgres.py +114 -0
- llama_stack/providers/utils/kvstore/redis/redis.py +33 -9
- llama_stack/providers/utils/kvstore/sqlite/config.py +2 -1
- llama_stack/providers/utils/kvstore/sqlite/sqlite.py +123 -22
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +1304 -0
- llama_stack/providers/utils/memory/vector_store.py +220 -82
- llama_stack/providers/utils/pagination.py +43 -0
- llama_stack/providers/utils/responses/__init__.py +5 -0
- llama_stack/providers/utils/responses/responses_store.py +292 -0
- llama_stack/providers/utils/scheduler.py +270 -0
- llama_stack/providers/utils/scoring/__init__.py +5 -0
- llama_stack/providers/utils/scoring/aggregation_utils.py +75 -0
- llama_stack/providers/utils/scoring/base_scoring_fn.py +114 -0
- llama_stack/providers/utils/scoring/basic_scoring_utils.py +26 -0
- llama_stack/providers/utils/sqlstore/__init__.py +5 -0
- llama_stack/providers/utils/sqlstore/api.py +128 -0
- llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +319 -0
- llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +343 -0
- llama_stack/providers/utils/sqlstore/sqlstore.py +70 -0
- llama_stack/providers/utils/telemetry/trace_protocol.py +142 -0
- llama_stack/providers/utils/telemetry/tracing.py +192 -53
- llama_stack/providers/utils/tools/__init__.py +5 -0
- llama_stack/providers/utils/tools/mcp.py +148 -0
- llama_stack/providers/utils/tools/ttl_dict.py +70 -0
- llama_stack/providers/utils/vector_io/__init__.py +5 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +156 -0
- llama_stack/schema_utils.py +118 -0
- llama_stack/strong_typing/__init__.py +19 -0
- llama_stack/strong_typing/auxiliary.py +228 -0
- llama_stack/strong_typing/classdef.py +440 -0
- llama_stack/strong_typing/core.py +46 -0
- llama_stack/strong_typing/deserializer.py +877 -0
- llama_stack/strong_typing/docstring.py +409 -0
- llama_stack/strong_typing/exception.py +23 -0
- llama_stack/strong_typing/inspection.py +1085 -0
- llama_stack/strong_typing/mapping.py +40 -0
- llama_stack/strong_typing/name.py +182 -0
- llama_stack/strong_typing/py.typed +0 -0
- llama_stack/strong_typing/schema.py +792 -0
- llama_stack/strong_typing/serialization.py +97 -0
- llama_stack/strong_typing/serializer.py +500 -0
- llama_stack/strong_typing/slots.py +27 -0
- llama_stack/strong_typing/topological.py +89 -0
- llama_stack/testing/__init__.py +5 -0
- llama_stack/testing/api_recorder.py +956 -0
- llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
- llama_stack-0.3.4.dist-info/METADATA +261 -0
- llama_stack-0.3.4.dist-info/RECORD +625 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/WHEEL +1 -1
- llama_stack/apis/agents/client.py +0 -292
- llama_stack/apis/agents/event_logger.py +0 -184
- llama_stack/apis/batch_inference/batch_inference.py +0 -72
- llama_stack/apis/common/deployment_types.py +0 -31
- llama_stack/apis/dataset/dataset.py +0 -63
- llama_stack/apis/evals/evals.py +0 -122
- llama_stack/apis/inference/client.py +0 -197
- llama_stack/apis/inspect/client.py +0 -82
- llama_stack/apis/memory/client.py +0 -155
- llama_stack/apis/memory/memory.py +0 -65
- llama_stack/apis/memory_banks/__init__.py +0 -7
- llama_stack/apis/memory_banks/client.py +0 -101
- llama_stack/apis/memory_banks/memory_banks.py +0 -78
- llama_stack/apis/models/client.py +0 -83
- llama_stack/apis/reward_scoring/__init__.py +0 -7
- llama_stack/apis/reward_scoring/reward_scoring.py +0 -55
- llama_stack/apis/safety/client.py +0 -105
- llama_stack/apis/shields/client.py +0 -79
- llama_stack/cli/download.py +0 -340
- llama_stack/cli/model/describe.py +0 -82
- llama_stack/cli/model/download.py +0 -24
- llama_stack/cli/model/list.py +0 -62
- llama_stack/cli/model/model.py +0 -34
- llama_stack/cli/model/prompt_format.py +0 -112
- llama_stack/cli/model/safety_models.py +0 -52
- llama_stack/cli/stack/build.py +0 -299
- llama_stack/cli/stack/configure.py +0 -178
- llama_stack/distribution/build.py +0 -123
- llama_stack/distribution/build_conda_env.sh +0 -136
- llama_stack/distribution/build_container.sh +0 -142
- llama_stack/distribution/common.sh +0 -40
- llama_stack/distribution/configure_container.sh +0 -47
- llama_stack/distribution/datatypes.py +0 -139
- llama_stack/distribution/distribution.py +0 -58
- llama_stack/distribution/inspect.py +0 -67
- llama_stack/distribution/request_headers.py +0 -57
- llama_stack/distribution/resolver.py +0 -323
- llama_stack/distribution/routers/__init__.py +0 -48
- llama_stack/distribution/routers/routers.py +0 -158
- llama_stack/distribution/routers/routing_tables.py +0 -173
- llama_stack/distribution/server/endpoints.py +0 -48
- llama_stack/distribution/server/server.py +0 -343
- llama_stack/distribution/start_conda_env.sh +0 -42
- llama_stack/distribution/start_container.sh +0 -64
- llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml +0 -10
- llama_stack/distribution/templates/local-build.yaml +0 -10
- llama_stack/distribution/templates/local-databricks-build.yaml +0 -10
- llama_stack/distribution/templates/local-fireworks-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-endpoint-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-serverless-build.yaml +0 -10
- llama_stack/distribution/templates/local-ollama-build.yaml +0 -10
- llama_stack/distribution/templates/local-tgi-build.yaml +0 -10
- llama_stack/distribution/templates/local-together-build.yaml +0 -10
- llama_stack/distribution/templates/local-vllm-build.yaml +0 -10
- llama_stack/distribution/utils/exec.py +0 -105
- llama_stack/providers/adapters/agents/sample/sample.py +0 -18
- llama_stack/providers/adapters/inference/bedrock/bedrock.py +0 -451
- llama_stack/providers/adapters/inference/bedrock/config.py +0 -55
- llama_stack/providers/adapters/inference/databricks/config.py +0 -21
- llama_stack/providers/adapters/inference/databricks/databricks.py +0 -125
- llama_stack/providers/adapters/inference/fireworks/config.py +0 -20
- llama_stack/providers/adapters/inference/fireworks/fireworks.py +0 -130
- llama_stack/providers/adapters/inference/ollama/__init__.py +0 -19
- llama_stack/providers/adapters/inference/ollama/ollama.py +0 -175
- llama_stack/providers/adapters/inference/sample/sample.py +0 -23
- llama_stack/providers/adapters/inference/tgi/config.py +0 -43
- llama_stack/providers/adapters/inference/tgi/tgi.py +0 -200
- llama_stack/providers/adapters/inference/together/config.py +0 -22
- llama_stack/providers/adapters/inference/together/together.py +0 -143
- llama_stack/providers/adapters/memory/chroma/chroma.py +0 -157
- llama_stack/providers/adapters/memory/pgvector/config.py +0 -17
- llama_stack/providers/adapters/memory/pgvector/pgvector.py +0 -211
- llama_stack/providers/adapters/memory/sample/sample.py +0 -23
- llama_stack/providers/adapters/memory/weaviate/__init__.py +0 -15
- llama_stack/providers/adapters/memory/weaviate/weaviate.py +0 -190
- llama_stack/providers/adapters/safety/bedrock/bedrock.py +0 -113
- llama_stack/providers/adapters/safety/bedrock/config.py +0 -16
- llama_stack/providers/adapters/safety/sample/sample.py +0 -23
- llama_stack/providers/adapters/safety/together/__init__.py +0 -18
- llama_stack/providers/adapters/safety/together/config.py +0 -26
- llama_stack/providers/adapters/safety/together/together.py +0 -101
- llama_stack/providers/adapters/telemetry/opentelemetry/config.py +0 -12
- llama_stack/providers/adapters/telemetry/opentelemetry/opentelemetry.py +0 -201
- llama_stack/providers/adapters/telemetry/sample/__init__.py +0 -17
- llama_stack/providers/adapters/telemetry/sample/config.py +0 -12
- llama_stack/providers/adapters/telemetry/sample/sample.py +0 -18
- llama_stack/providers/impls/meta_reference/agents/agent_instance.py +0 -844
- llama_stack/providers/impls/meta_reference/agents/agents.py +0 -161
- llama_stack/providers/impls/meta_reference/agents/persistence.py +0 -84
- llama_stack/providers/impls/meta_reference/agents/rag/context_retriever.py +0 -74
- llama_stack/providers/impls/meta_reference/agents/safety.py +0 -57
- llama_stack/providers/impls/meta_reference/agents/tests/code_execution.py +0 -93
- llama_stack/providers/impls/meta_reference/agents/tests/test_chat_agent.py +0 -305
- llama_stack/providers/impls/meta_reference/agents/tools/base.py +0 -20
- llama_stack/providers/impls/meta_reference/agents/tools/builtin.py +0 -375
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_env_prefix.py +0 -133
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_execution.py +0 -256
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py +0 -87
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/utils.py +0 -21
- llama_stack/providers/impls/meta_reference/agents/tools/safety.py +0 -43
- llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py +0 -58
- llama_stack/providers/impls/meta_reference/inference/config.py +0 -45
- llama_stack/providers/impls/meta_reference/inference/generation.py +0 -376
- llama_stack/providers/impls/meta_reference/inference/inference.py +0 -280
- llama_stack/providers/impls/meta_reference/inference/model_parallel.py +0 -99
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_impls.py +0 -184
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_txest_disabled.py +0 -76
- llama_stack/providers/impls/meta_reference/inference/quantization/loader.py +0 -97
- llama_stack/providers/impls/meta_reference/inference/quantization/scripts/quantize_checkpoint.py +0 -161
- llama_stack/providers/impls/meta_reference/memory/__init__.py +0 -19
- llama_stack/providers/impls/meta_reference/memory/faiss.py +0 -113
- llama_stack/providers/impls/meta_reference/safety/__init__.py +0 -17
- llama_stack/providers/impls/meta_reference/safety/base.py +0 -57
- llama_stack/providers/impls/meta_reference/safety/config.py +0 -48
- llama_stack/providers/impls/meta_reference/safety/llama_guard.py +0 -268
- llama_stack/providers/impls/meta_reference/safety/prompt_guard.py +0 -145
- llama_stack/providers/impls/meta_reference/safety/safety.py +0 -112
- llama_stack/providers/impls/meta_reference/telemetry/console.py +0 -89
- llama_stack/providers/impls/vllm/config.py +0 -35
- llama_stack/providers/impls/vllm/vllm.py +0 -241
- llama_stack/providers/registry/memory.py +0 -78
- llama_stack/providers/registry/telemetry.py +0 -44
- llama_stack/providers/tests/agents/test_agents.py +0 -210
- llama_stack/providers/tests/inference/test_inference.py +0 -257
- llama_stack/providers/tests/inference/test_prompt_adapter.py +0 -126
- llama_stack/providers/tests/memory/test_memory.py +0 -136
- llama_stack/providers/tests/resolver.py +0 -100
- llama_stack/providers/tests/safety/test_safety.py +0 -77
- llama_stack-0.0.42.dist-info/METADATA +0 -137
- llama_stack-0.0.42.dist-info/RECORD +0 -256
- /llama_stack/{distribution → core}/__init__.py +0 -0
- /llama_stack/{distribution/server → core/access_control}/__init__.py +0 -0
- /llama_stack/{distribution/utils → core/conversations}/__init__.py +0 -0
- /llama_stack/{providers/adapters → core/prompts}/__init__.py +0 -0
- /llama_stack/{providers/adapters/agents → core/routing_tables}/__init__.py +0 -0
- /llama_stack/{providers/adapters/inference → core/server}/__init__.py +0 -0
- /llama_stack/{providers/adapters/memory → core/storage}/__init__.py +0 -0
- /llama_stack/{providers/adapters/safety → core/ui}/__init__.py +0 -0
- /llama_stack/{providers/adapters/telemetry → core/ui/modules}/__init__.py +0 -0
- /llama_stack/{providers/impls → core/ui/page}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference → core/ui/page/distribution}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/rag → core/ui/page/evaluations}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tests → core/ui/page/playground}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools → core/utils}/__init__.py +0 -0
- /llama_stack/{distribution → core}/utils/dynamic.py +0 -0
- /llama_stack/{distribution → core}/utils/serialize.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools/ipython_tool → distributions}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization → models}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization/scripts → models/llama}/__init__.py +0 -0
- /llama_stack/{providers/tests → models/llama/llama3}/__init__.py +0 -0
- /llama_stack/{providers/tests/agents → models/llama/llama3/quantization}/__init__.py +0 -0
- /llama_stack/{providers/tests/inference → models/llama/llama3_2}/__init__.py +0 -0
- /llama_stack/{providers/tests/memory → models/llama/llama3_3}/__init__.py +0 -0
- /llama_stack/{providers/tests/safety → models/llama/llama4}/__init__.py +0 -0
- /llama_stack/{scripts → models/llama/llama4/prompt_templates}/__init__.py +0 -0
- /llama_stack/providers/{adapters → remote}/safety/bedrock/__init__.py +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info/licenses}/LICENSE +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
import requests
|
|
10
|
+
|
|
11
|
+
from llama_stack.apis.inference import OpenAIMessageParam
|
|
12
|
+
from llama_stack.apis.safety import ModerationObject, RunShieldResponse, Safety, SafetyViolation, ViolationLevel
|
|
13
|
+
from llama_stack.apis.shields import Shield
|
|
14
|
+
from llama_stack.log import get_logger
|
|
15
|
+
from llama_stack.providers.datatypes import ShieldsProtocolPrivate
|
|
16
|
+
|
|
17
|
+
from .config import NVIDIASafetyConfig
|
|
18
|
+
|
|
19
|
+
logger = get_logger(name=__name__, category="safety::nvidia")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class NVIDIASafetyAdapter(Safety, ShieldsProtocolPrivate):
|
|
23
|
+
def __init__(self, config: NVIDIASafetyConfig) -> None:
|
|
24
|
+
"""
|
|
25
|
+
Initialize the NVIDIASafetyAdapter with a given safety configuration.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
config (NVIDIASafetyConfig): The configuration containing the guardrails service URL and config ID.
|
|
29
|
+
"""
|
|
30
|
+
self.config = config
|
|
31
|
+
|
|
32
|
+
async def initialize(self) -> None:
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
async def shutdown(self) -> None:
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
async def register_shield(self, shield: Shield) -> None:
|
|
39
|
+
if not shield.provider_resource_id:
|
|
40
|
+
raise ValueError("Shield model not provided.")
|
|
41
|
+
|
|
42
|
+
async def unregister_shield(self, identifier: str) -> None:
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
async def run_shield(
|
|
46
|
+
self, shield_id: str, messages: list[OpenAIMessageParam], params: dict[str, Any] | None = None
|
|
47
|
+
) -> RunShieldResponse:
|
|
48
|
+
"""
|
|
49
|
+
Run a safety shield check against the provided messages.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
shield_id (str): The unique identifier for the shield to be used.
|
|
53
|
+
messages (List[Message]): A list of Message objects representing the conversation history.
|
|
54
|
+
params (Optional[dict[str, Any]]): Additional parameters for the shield check.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
RunShieldResponse: The response containing safety violation details if any.
|
|
58
|
+
|
|
59
|
+
Raises:
|
|
60
|
+
ValueError: If the shield with the provided shield_id is not found.
|
|
61
|
+
"""
|
|
62
|
+
shield = await self.shield_store.get_shield(shield_id)
|
|
63
|
+
if not shield:
|
|
64
|
+
raise ValueError(f"Shield {shield_id} not found")
|
|
65
|
+
|
|
66
|
+
self.shield = NeMoGuardrails(self.config, shield.shield_id)
|
|
67
|
+
return await self.shield.run(messages)
|
|
68
|
+
|
|
69
|
+
async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
|
|
70
|
+
raise NotImplementedError("NVIDIA safety provider currently does not implement run_moderation")
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class NeMoGuardrails:
|
|
74
|
+
"""
|
|
75
|
+
A class that encapsulates NVIDIA's guardrails safety logic.
|
|
76
|
+
|
|
77
|
+
Sends messages to the guardrails service and interprets the response to determine
|
|
78
|
+
if a safety violation has occurred.
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
def __init__(
|
|
82
|
+
self,
|
|
83
|
+
config: NVIDIASafetyConfig,
|
|
84
|
+
model: str,
|
|
85
|
+
threshold: float = 0.9,
|
|
86
|
+
temperature: float = 1.0,
|
|
87
|
+
):
|
|
88
|
+
"""
|
|
89
|
+
Initialize a NeMoGuardrails instance with the provided parameters.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
config (NVIDIASafetyConfig): The safety configuration containing the config ID and guardrails URL.
|
|
93
|
+
model (str): The identifier or name of the model to be used for safety checks.
|
|
94
|
+
threshold (float, optional): The threshold for flagging violations. Defaults to 0.9.
|
|
95
|
+
temperature (float, optional): The temperature setting for the underlying model. Must be greater than 0. Defaults to 1.0.
|
|
96
|
+
|
|
97
|
+
Raises:
|
|
98
|
+
ValueError: If temperature is less than or equal to 0.
|
|
99
|
+
AssertionError: If config_id is not provided in the configuration.
|
|
100
|
+
"""
|
|
101
|
+
self.config_id = config.config_id
|
|
102
|
+
self.model = model
|
|
103
|
+
assert self.config_id is not None, "Must provide config id"
|
|
104
|
+
if temperature <= 0:
|
|
105
|
+
raise ValueError("Temperature must be greater than 0")
|
|
106
|
+
|
|
107
|
+
self.temperature = temperature
|
|
108
|
+
self.threshold = threshold
|
|
109
|
+
self.guardrails_service_url = config.guardrails_service_url
|
|
110
|
+
|
|
111
|
+
async def _guardrails_post(self, path: str, data: Any | None):
|
|
112
|
+
"""Helper for making POST requests to the guardrails service."""
|
|
113
|
+
headers = {
|
|
114
|
+
"Accept": "application/json",
|
|
115
|
+
}
|
|
116
|
+
response = requests.post(url=f"{self.guardrails_service_url}{path}", headers=headers, json=data)
|
|
117
|
+
response.raise_for_status()
|
|
118
|
+
return response.json()
|
|
119
|
+
|
|
120
|
+
async def run(self, messages: list[OpenAIMessageParam]) -> RunShieldResponse:
|
|
121
|
+
"""
|
|
122
|
+
Queries the /v1/guardrails/checks endpoint of the NeMo guardrails deployed API.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
messages (List[Message]): A list of Message objects to be checked for safety violations.
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
RunShieldResponse: If the response indicates a violation ("blocked" status), returns a
|
|
129
|
+
RunShieldResponse with a SafetyViolation; otherwise, returns a RunShieldResponse with violation set to None.
|
|
130
|
+
|
|
131
|
+
Raises:
|
|
132
|
+
requests.HTTPError: If the POST request fails.
|
|
133
|
+
"""
|
|
134
|
+
request_data = {
|
|
135
|
+
"model": self.model,
|
|
136
|
+
"messages": [{"role": message.role, "content": message.content} for message in messages],
|
|
137
|
+
"temperature": self.temperature,
|
|
138
|
+
"top_p": 1,
|
|
139
|
+
"frequency_penalty": 0,
|
|
140
|
+
"presence_penalty": 0,
|
|
141
|
+
"max_tokens": 160,
|
|
142
|
+
"stream": False,
|
|
143
|
+
"guardrails": {
|
|
144
|
+
"config_id": self.config_id,
|
|
145
|
+
},
|
|
146
|
+
}
|
|
147
|
+
response = await self._guardrails_post(path="/v1/guardrail/checks", data=request_data)
|
|
148
|
+
|
|
149
|
+
if response["status"] == "blocked":
|
|
150
|
+
user_message = "Sorry I cannot do this."
|
|
151
|
+
metadata = response["rails_status"]
|
|
152
|
+
|
|
153
|
+
return RunShieldResponse(
|
|
154
|
+
violation=SafetyViolation(
|
|
155
|
+
user_message=user_message,
|
|
156
|
+
violation_level=ViolationLevel.ERROR,
|
|
157
|
+
metadata=metadata,
|
|
158
|
+
)
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
return RunShieldResponse(violation=None)
|
|
@@ -4,14 +4,15 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
+
|
|
7
8
|
from typing import Any
|
|
8
9
|
|
|
9
|
-
from .config import
|
|
10
|
+
from .config import SambaNovaSafetyConfig
|
|
10
11
|
|
|
11
12
|
|
|
12
|
-
async def get_adapter_impl(config:
|
|
13
|
-
from .
|
|
13
|
+
async def get_adapter_impl(config: SambaNovaSafetyConfig, _deps) -> Any:
|
|
14
|
+
from .sambanova import SambaNovaSafetyAdapter
|
|
14
15
|
|
|
15
|
-
impl =
|
|
16
|
+
impl = SambaNovaSafetyAdapter(config)
|
|
16
17
|
await impl.initialize()
|
|
17
18
|
return impl
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel, Field, SecretStr
|
|
10
|
+
|
|
11
|
+
from llama_stack.schema_utils import json_schema_type
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class SambaNovaProviderDataValidator(BaseModel):
|
|
15
|
+
sambanova_api_key: str | None = Field(
|
|
16
|
+
default=None,
|
|
17
|
+
description="Sambanova Cloud API key",
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@json_schema_type
|
|
22
|
+
class SambaNovaSafetyConfig(BaseModel):
|
|
23
|
+
url: str = Field(
|
|
24
|
+
default="https://api.sambanova.ai/v1",
|
|
25
|
+
description="The URL for the SambaNova AI server",
|
|
26
|
+
)
|
|
27
|
+
api_key: SecretStr | None = Field(
|
|
28
|
+
default=None,
|
|
29
|
+
description="The SambaNova cloud API Key",
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
@classmethod
|
|
33
|
+
def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY:=}", **kwargs) -> dict[str, Any]:
|
|
34
|
+
return {
|
|
35
|
+
"url": "https://api.sambanova.ai/v1",
|
|
36
|
+
"api_key": api_key,
|
|
37
|
+
}
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
import litellm
|
|
10
|
+
import requests
|
|
11
|
+
|
|
12
|
+
from llama_stack.apis.inference import OpenAIMessageParam
|
|
13
|
+
from llama_stack.apis.safety import (
|
|
14
|
+
RunShieldResponse,
|
|
15
|
+
Safety,
|
|
16
|
+
SafetyViolation,
|
|
17
|
+
ViolationLevel,
|
|
18
|
+
)
|
|
19
|
+
from llama_stack.apis.shields import Shield
|
|
20
|
+
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
21
|
+
from llama_stack.log import get_logger
|
|
22
|
+
from llama_stack.providers.datatypes import ShieldsProtocolPrivate
|
|
23
|
+
|
|
24
|
+
from .config import SambaNovaSafetyConfig
|
|
25
|
+
|
|
26
|
+
logger = get_logger(name=__name__, category="safety::sambanova")
|
|
27
|
+
|
|
28
|
+
CANNED_RESPONSE_TEXT = "I can't answer that. Can I help with something else?"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class SambaNovaSafetyAdapter(Safety, ShieldsProtocolPrivate, NeedsRequestProviderData):
|
|
32
|
+
def __init__(self, config: SambaNovaSafetyConfig) -> None:
|
|
33
|
+
self.config = config
|
|
34
|
+
self.environment_available_models = []
|
|
35
|
+
|
|
36
|
+
async def initialize(self) -> None:
|
|
37
|
+
pass
|
|
38
|
+
|
|
39
|
+
async def shutdown(self) -> None:
|
|
40
|
+
pass
|
|
41
|
+
|
|
42
|
+
def _get_api_key(self) -> str:
|
|
43
|
+
config_api_key = self.config.api_key if self.config.api_key else None
|
|
44
|
+
if config_api_key:
|
|
45
|
+
return config_api_key.get_secret_value()
|
|
46
|
+
else:
|
|
47
|
+
provider_data = self.get_request_provider_data()
|
|
48
|
+
if provider_data is None or not provider_data.sambanova_api_key:
|
|
49
|
+
raise ValueError(
|
|
50
|
+
'Pass Sambanova API Key in the header X-LlamaStack-Provider-Data as { "sambanova_api_key": <your api key> }'
|
|
51
|
+
)
|
|
52
|
+
return provider_data.sambanova_api_key
|
|
53
|
+
|
|
54
|
+
async def register_shield(self, shield: Shield) -> None:
|
|
55
|
+
list_models_url = self.config.url + "/models"
|
|
56
|
+
if len(self.environment_available_models) == 0:
|
|
57
|
+
try:
|
|
58
|
+
response = requests.get(list_models_url)
|
|
59
|
+
response.raise_for_status()
|
|
60
|
+
except requests.exceptions.RequestException as e:
|
|
61
|
+
raise RuntimeError(f"Request to {list_models_url} failed") from e
|
|
62
|
+
self.environment_available_models = [model.get("id") for model in response.json().get("data", {})]
|
|
63
|
+
if (
|
|
64
|
+
"guard" not in shield.provider_resource_id.lower()
|
|
65
|
+
or shield.provider_resource_id.split("sambanova/")[-1] not in self.environment_available_models
|
|
66
|
+
):
|
|
67
|
+
logger.warning(f"Shield {shield.provider_resource_id} not available in {list_models_url}")
|
|
68
|
+
|
|
69
|
+
async def unregister_shield(self, identifier: str) -> None:
|
|
70
|
+
pass
|
|
71
|
+
|
|
72
|
+
async def run_shield(
|
|
73
|
+
self, shield_id: str, messages: list[OpenAIMessageParam], params: dict[str, Any] | None = None
|
|
74
|
+
) -> RunShieldResponse:
|
|
75
|
+
shield = await self.shield_store.get_shield(shield_id)
|
|
76
|
+
if not shield:
|
|
77
|
+
raise ValueError(f"Shield {shield_id} not found")
|
|
78
|
+
|
|
79
|
+
shield_params = shield.params
|
|
80
|
+
logger.debug(f"run_shield::{shield_params}::messages={messages}")
|
|
81
|
+
|
|
82
|
+
response = litellm.completion(model=shield.provider_resource_id, messages=messages, api_key=self._get_api_key())
|
|
83
|
+
shield_message = response.choices[0].message.content
|
|
84
|
+
|
|
85
|
+
if "unsafe" in shield_message.lower():
|
|
86
|
+
user_message = CANNED_RESPONSE_TEXT
|
|
87
|
+
violation_type = shield_message.split("\n")[-1]
|
|
88
|
+
metadata = {"violation_type": violation_type}
|
|
89
|
+
|
|
90
|
+
return RunShieldResponse(
|
|
91
|
+
violation=SafetyViolation(
|
|
92
|
+
user_message=user_message,
|
|
93
|
+
violation_level=ViolationLevel.ERROR,
|
|
94
|
+
metadata=metadata,
|
|
95
|
+
)
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
return RunShieldResponse()
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from .bing_search import BingSearchToolRuntimeImpl
|
|
8
|
+
from .config import BingSearchToolConfig
|
|
9
|
+
|
|
10
|
+
__all__ = ["BingSearchToolConfig", "BingSearchToolRuntimeImpl"]
|
|
11
|
+
from pydantic import BaseModel
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class BingSearchToolProviderDataValidator(BaseModel):
|
|
15
|
+
bing_search_api_key: str
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
async def get_adapter_impl(config: BingSearchToolConfig, _deps):
|
|
19
|
+
impl = BingSearchToolRuntimeImpl(config)
|
|
20
|
+
await impl.initialize()
|
|
21
|
+
return impl
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
import httpx
|
|
11
|
+
|
|
12
|
+
from llama_stack.apis.common.content_types import URL
|
|
13
|
+
from llama_stack.apis.tools import (
|
|
14
|
+
ListToolDefsResponse,
|
|
15
|
+
ToolDef,
|
|
16
|
+
ToolGroup,
|
|
17
|
+
ToolInvocationResult,
|
|
18
|
+
ToolRuntime,
|
|
19
|
+
)
|
|
20
|
+
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
21
|
+
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
|
|
22
|
+
|
|
23
|
+
from .config import BingSearchToolConfig
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class BingSearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsRequestProviderData):
|
|
27
|
+
def __init__(self, config: BingSearchToolConfig):
|
|
28
|
+
self.config = config
|
|
29
|
+
self.url = "https://api.bing.microsoft.com/v7.0/search"
|
|
30
|
+
|
|
31
|
+
async def initialize(self):
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
async def register_toolgroup(self, toolgroup: ToolGroup) -> None:
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
async def unregister_toolgroup(self, toolgroup_id: str) -> None:
|
|
38
|
+
return
|
|
39
|
+
|
|
40
|
+
def _get_api_key(self) -> str:
|
|
41
|
+
if self.config.api_key:
|
|
42
|
+
return self.config.api_key
|
|
43
|
+
|
|
44
|
+
provider_data = self.get_request_provider_data()
|
|
45
|
+
if provider_data is None or not provider_data.bing_search_api_key:
|
|
46
|
+
raise ValueError(
|
|
47
|
+
'Pass Bing Search API Key in the header X-LlamaStack-Provider-Data as { "bing_search_api_key": <your api key>}'
|
|
48
|
+
)
|
|
49
|
+
return provider_data.bing_search_api_key
|
|
50
|
+
|
|
51
|
+
async def list_runtime_tools(
|
|
52
|
+
self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
|
|
53
|
+
) -> ListToolDefsResponse:
|
|
54
|
+
return ListToolDefsResponse(
|
|
55
|
+
data=[
|
|
56
|
+
ToolDef(
|
|
57
|
+
name="web_search",
|
|
58
|
+
description="Search the web using Bing Search API",
|
|
59
|
+
input_schema={
|
|
60
|
+
"type": "object",
|
|
61
|
+
"properties": {
|
|
62
|
+
"query": {
|
|
63
|
+
"type": "string",
|
|
64
|
+
"description": "The query to search for",
|
|
65
|
+
}
|
|
66
|
+
},
|
|
67
|
+
"required": ["query"],
|
|
68
|
+
},
|
|
69
|
+
)
|
|
70
|
+
]
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
|
|
74
|
+
api_key = self._get_api_key()
|
|
75
|
+
headers = {
|
|
76
|
+
"Ocp-Apim-Subscription-Key": api_key,
|
|
77
|
+
}
|
|
78
|
+
params = {
|
|
79
|
+
"count": self.config.top_k,
|
|
80
|
+
"textDecorations": True,
|
|
81
|
+
"textFormat": "HTML",
|
|
82
|
+
"q": kwargs["query"],
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
async with httpx.AsyncClient() as client:
|
|
86
|
+
response = await client.get(
|
|
87
|
+
url=self.url,
|
|
88
|
+
params=params,
|
|
89
|
+
headers=headers,
|
|
90
|
+
)
|
|
91
|
+
response.raise_for_status()
|
|
92
|
+
|
|
93
|
+
return ToolInvocationResult(content=json.dumps(self._clean_response(response.json())))
|
|
94
|
+
|
|
95
|
+
def _clean_response(self, search_response):
|
|
96
|
+
clean_response = []
|
|
97
|
+
query = search_response["queryContext"]["originalQuery"]
|
|
98
|
+
if "webPages" in search_response:
|
|
99
|
+
pages = search_response["webPages"]["value"]
|
|
100
|
+
for p in pages:
|
|
101
|
+
selected_keys = {"name", "url", "snippet"}
|
|
102
|
+
clean_response.append({k: v for k, v in p.items() if k in selected_keys})
|
|
103
|
+
if "news" in search_response:
|
|
104
|
+
clean_news = []
|
|
105
|
+
news = search_response["news"]["value"]
|
|
106
|
+
for n in news:
|
|
107
|
+
selected_keys = {"name", "url", "description"}
|
|
108
|
+
clean_news.append({k: v for k, v in n.items() if k in selected_keys})
|
|
109
|
+
|
|
110
|
+
clean_response.append(clean_news)
|
|
111
|
+
|
|
112
|
+
return {"query": query, "top_k": clean_response}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class BingSearchToolConfig(BaseModel):
|
|
13
|
+
"""Configuration for Bing Search Tool Runtime"""
|
|
14
|
+
|
|
15
|
+
api_key: str | None = None
|
|
16
|
+
top_k: int = 3
|
|
17
|
+
|
|
18
|
+
@classmethod
|
|
19
|
+
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
|
|
20
|
+
return {
|
|
21
|
+
"api_key": "${env.BING_API_KEY:}",
|
|
22
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel
|
|
8
|
+
|
|
9
|
+
from .brave_search import BraveSearchToolRuntimeImpl
|
|
10
|
+
from .config import BraveSearchToolConfig
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class BraveSearchToolProviderDataValidator(BaseModel):
|
|
14
|
+
brave_search_api_key: str
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
async def get_adapter_impl(config: BraveSearchToolConfig, _deps):
|
|
18
|
+
impl = BraveSearchToolRuntimeImpl(config)
|
|
19
|
+
await impl.initialize()
|
|
20
|
+
return impl
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
import httpx
|
|
10
|
+
|
|
11
|
+
from llama_stack.apis.common.content_types import URL
|
|
12
|
+
from llama_stack.apis.tools import (
|
|
13
|
+
ListToolDefsResponse,
|
|
14
|
+
ToolDef,
|
|
15
|
+
ToolGroup,
|
|
16
|
+
ToolInvocationResult,
|
|
17
|
+
ToolRuntime,
|
|
18
|
+
)
|
|
19
|
+
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
20
|
+
from llama_stack.models.llama.datatypes import BuiltinTool
|
|
21
|
+
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
|
|
22
|
+
|
|
23
|
+
from .config import BraveSearchToolConfig
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class BraveSearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsRequestProviderData):
|
|
27
|
+
def __init__(self, config: BraveSearchToolConfig):
|
|
28
|
+
self.config = config
|
|
29
|
+
|
|
30
|
+
async def initialize(self):
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
async def register_toolgroup(self, toolgroup: ToolGroup) -> None:
|
|
34
|
+
pass
|
|
35
|
+
|
|
36
|
+
async def unregister_toolgroup(self, toolgroup_id: str) -> None:
|
|
37
|
+
return
|
|
38
|
+
|
|
39
|
+
def _get_api_key(self) -> str:
|
|
40
|
+
if self.config.api_key:
|
|
41
|
+
return self.config.api_key
|
|
42
|
+
|
|
43
|
+
provider_data = self.get_request_provider_data()
|
|
44
|
+
if provider_data is None or not provider_data.brave_search_api_key:
|
|
45
|
+
raise ValueError(
|
|
46
|
+
'Pass Search provider\'s API Key in the header X-LlamaStack-Provider-Data as { "brave_search_api_key": <your api key>}'
|
|
47
|
+
)
|
|
48
|
+
return provider_data.brave_search_api_key
|
|
49
|
+
|
|
50
|
+
async def list_runtime_tools(
|
|
51
|
+
self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
|
|
52
|
+
) -> ListToolDefsResponse:
|
|
53
|
+
return ListToolDefsResponse(
|
|
54
|
+
data=[
|
|
55
|
+
ToolDef(
|
|
56
|
+
name="web_search",
|
|
57
|
+
description="Search the web for information",
|
|
58
|
+
input_schema={
|
|
59
|
+
"type": "object",
|
|
60
|
+
"properties": {
|
|
61
|
+
"query": {
|
|
62
|
+
"type": "string",
|
|
63
|
+
"description": "The query to search for",
|
|
64
|
+
}
|
|
65
|
+
},
|
|
66
|
+
"required": ["query"],
|
|
67
|
+
},
|
|
68
|
+
built_in_type=BuiltinTool.brave_search,
|
|
69
|
+
)
|
|
70
|
+
]
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
|
|
74
|
+
api_key = self._get_api_key()
|
|
75
|
+
url = "https://api.search.brave.com/res/v1/web/search"
|
|
76
|
+
headers = {
|
|
77
|
+
"X-Subscription-Token": api_key,
|
|
78
|
+
"Accept-Encoding": "gzip",
|
|
79
|
+
"Accept": "application/json",
|
|
80
|
+
}
|
|
81
|
+
payload = {"q": kwargs["query"]}
|
|
82
|
+
async with httpx.AsyncClient() as client:
|
|
83
|
+
response = await client.get(
|
|
84
|
+
url=url,
|
|
85
|
+
params=payload,
|
|
86
|
+
headers=headers,
|
|
87
|
+
)
|
|
88
|
+
response.raise_for_status()
|
|
89
|
+
results = self._clean_brave_response(response.json())
|
|
90
|
+
content_items = "\n".join([str(result) for result in results])
|
|
91
|
+
return ToolInvocationResult(
|
|
92
|
+
content=content_items,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
def _clean_brave_response(self, search_response):
|
|
96
|
+
clean_response = []
|
|
97
|
+
if "mixed" in search_response:
|
|
98
|
+
mixed_results = search_response["mixed"]
|
|
99
|
+
for m in mixed_results["main"][: self.config.max_results]:
|
|
100
|
+
r_type = m["type"]
|
|
101
|
+
results = search_response[r_type]["results"]
|
|
102
|
+
cleaned = self._clean_result_by_type(r_type, results, m.get("index"))
|
|
103
|
+
clean_response.append(cleaned)
|
|
104
|
+
|
|
105
|
+
return clean_response
|
|
106
|
+
|
|
107
|
+
def _clean_result_by_type(self, r_type, results, idx=None):
|
|
108
|
+
type_cleaners = {
|
|
109
|
+
"web": (
|
|
110
|
+
["type", "title", "url", "description", "date", "extra_snippets"],
|
|
111
|
+
lambda x: x[idx],
|
|
112
|
+
),
|
|
113
|
+
"faq": (["type", "question", "answer", "title", "url"], lambda x: x),
|
|
114
|
+
"infobox": (
|
|
115
|
+
["type", "title", "url", "description", "long_desc"],
|
|
116
|
+
lambda x: x[idx],
|
|
117
|
+
),
|
|
118
|
+
"videos": (["type", "url", "title", "description", "date"], lambda x: x),
|
|
119
|
+
"locations": (
|
|
120
|
+
[
|
|
121
|
+
"type",
|
|
122
|
+
"title",
|
|
123
|
+
"url",
|
|
124
|
+
"description",
|
|
125
|
+
"coordinates",
|
|
126
|
+
"postal_address",
|
|
127
|
+
"contact",
|
|
128
|
+
"rating",
|
|
129
|
+
"distance",
|
|
130
|
+
"zoom_level",
|
|
131
|
+
],
|
|
132
|
+
lambda x: x,
|
|
133
|
+
),
|
|
134
|
+
"news": (["type", "title", "url", "description"], lambda x: x),
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
if r_type not in type_cleaners:
|
|
138
|
+
return ""
|
|
139
|
+
|
|
140
|
+
selected_keys, result_selector = type_cleaners[r_type]
|
|
141
|
+
results = result_selector(results)
|
|
142
|
+
|
|
143
|
+
if isinstance(results, list):
|
|
144
|
+
cleaned = [{k: v for k, v in item.items() if k in selected_keys} for item in results]
|
|
145
|
+
else:
|
|
146
|
+
cleaned = {k: v for k, v in results.items() if k in selected_keys}
|
|
147
|
+
|
|
148
|
+
return str(cleaned)
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel, Field
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class BraveSearchToolConfig(BaseModel):
|
|
13
|
+
api_key: str | None = Field(
|
|
14
|
+
default=None,
|
|
15
|
+
description="The Brave Search API Key",
|
|
16
|
+
)
|
|
17
|
+
max_results: int = Field(
|
|
18
|
+
default=3,
|
|
19
|
+
description="The maximum number of results to return",
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
@classmethod
|
|
23
|
+
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
|
|
24
|
+
return {
|
|
25
|
+
"api_key": "${env.BRAVE_SEARCH_API_KEY:=}",
|
|
26
|
+
"max_results": 3,
|
|
27
|
+
}
|