llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +5 -0
- llama_stack/apis/agents/__init__.py +1 -1
- llama_stack/apis/agents/agents.py +700 -281
- llama_stack/apis/agents/openai_responses.py +1311 -0
- llama_stack/{providers/adapters/memory/sample/config.py → apis/batches/__init__.py} +2 -5
- llama_stack/apis/batches/batches.py +100 -0
- llama_stack/apis/benchmarks/__init__.py +7 -0
- llama_stack/apis/benchmarks/benchmarks.py +108 -0
- llama_stack/apis/common/content_types.py +143 -0
- llama_stack/apis/common/errors.py +103 -0
- llama_stack/apis/common/job_types.py +38 -0
- llama_stack/apis/common/responses.py +36 -0
- llama_stack/apis/common/training_types.py +36 -5
- llama_stack/apis/common/type_system.py +158 -0
- llama_stack/apis/conversations/__init__.py +31 -0
- llama_stack/apis/conversations/conversations.py +286 -0
- llama_stack/apis/datasetio/__init__.py +7 -0
- llama_stack/apis/datasetio/datasetio.py +59 -0
- llama_stack/apis/datasets/__init__.py +7 -0
- llama_stack/apis/datasets/datasets.py +251 -0
- llama_stack/apis/datatypes.py +160 -0
- llama_stack/apis/eval/__init__.py +7 -0
- llama_stack/apis/eval/eval.py +169 -0
- llama_stack/apis/files/__init__.py +7 -0
- llama_stack/apis/files/files.py +199 -0
- llama_stack/apis/inference/__init__.py +1 -1
- llama_stack/apis/inference/inference.py +1169 -113
- llama_stack/apis/inspect/__init__.py +1 -1
- llama_stack/apis/inspect/inspect.py +69 -16
- llama_stack/apis/models/__init__.py +1 -1
- llama_stack/apis/models/models.py +148 -21
- llama_stack/apis/post_training/__init__.py +1 -1
- llama_stack/apis/post_training/post_training.py +265 -120
- llama_stack/{providers/adapters/agents/sample/config.py → apis/prompts/__init__.py} +2 -5
- llama_stack/apis/prompts/prompts.py +204 -0
- llama_stack/apis/providers/__init__.py +7 -0
- llama_stack/apis/providers/providers.py +69 -0
- llama_stack/apis/resource.py +37 -0
- llama_stack/apis/safety/__init__.py +1 -1
- llama_stack/apis/safety/safety.py +95 -12
- llama_stack/apis/scoring/__init__.py +7 -0
- llama_stack/apis/scoring/scoring.py +93 -0
- llama_stack/apis/scoring_functions/__init__.py +7 -0
- llama_stack/apis/scoring_functions/scoring_functions.py +208 -0
- llama_stack/apis/shields/__init__.py +1 -1
- llama_stack/apis/shields/shields.py +76 -33
- llama_stack/apis/synthetic_data_generation/__init__.py +1 -1
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +40 -17
- llama_stack/apis/telemetry/__init__.py +1 -1
- llama_stack/apis/telemetry/telemetry.py +322 -31
- llama_stack/apis/{dataset → tools}/__init__.py +2 -1
- llama_stack/apis/tools/rag_tool.py +218 -0
- llama_stack/apis/tools/tools.py +221 -0
- llama_stack/apis/vector_io/__init__.py +7 -0
- llama_stack/apis/vector_io/vector_io.py +960 -0
- llama_stack/apis/vector_stores/__init__.py +7 -0
- llama_stack/apis/vector_stores/vector_stores.py +51 -0
- llama_stack/apis/version.py +9 -0
- llama_stack/cli/llama.py +13 -5
- llama_stack/cli/stack/_list_deps.py +182 -0
- llama_stack/cli/stack/list_apis.py +1 -1
- llama_stack/cli/stack/list_deps.py +55 -0
- llama_stack/cli/stack/list_providers.py +24 -10
- llama_stack/cli/stack/list_stacks.py +56 -0
- llama_stack/cli/stack/remove.py +115 -0
- llama_stack/cli/stack/run.py +169 -56
- llama_stack/cli/stack/stack.py +18 -4
- llama_stack/cli/stack/utils.py +151 -0
- llama_stack/cli/table.py +23 -61
- llama_stack/cli/utils.py +29 -0
- llama_stack/core/access_control/access_control.py +131 -0
- llama_stack/core/access_control/conditions.py +129 -0
- llama_stack/core/access_control/datatypes.py +107 -0
- llama_stack/core/build.py +164 -0
- llama_stack/core/client.py +205 -0
- llama_stack/core/common.sh +37 -0
- llama_stack/{distribution → core}/configure.py +74 -55
- llama_stack/core/conversations/conversations.py +309 -0
- llama_stack/core/datatypes.py +625 -0
- llama_stack/core/distribution.py +276 -0
- llama_stack/core/external.py +54 -0
- llama_stack/core/id_generation.py +42 -0
- llama_stack/core/inspect.py +86 -0
- llama_stack/core/library_client.py +539 -0
- llama_stack/core/prompts/prompts.py +234 -0
- llama_stack/core/providers.py +137 -0
- llama_stack/core/request_headers.py +115 -0
- llama_stack/core/resolver.py +506 -0
- llama_stack/core/routers/__init__.py +101 -0
- llama_stack/core/routers/datasets.py +73 -0
- llama_stack/core/routers/eval_scoring.py +155 -0
- llama_stack/core/routers/inference.py +645 -0
- llama_stack/core/routers/safety.py +85 -0
- llama_stack/core/routers/tool_runtime.py +91 -0
- llama_stack/core/routers/vector_io.py +442 -0
- llama_stack/core/routing_tables/benchmarks.py +62 -0
- llama_stack/core/routing_tables/common.py +254 -0
- llama_stack/core/routing_tables/datasets.py +91 -0
- llama_stack/core/routing_tables/models.py +163 -0
- llama_stack/core/routing_tables/scoring_functions.py +66 -0
- llama_stack/core/routing_tables/shields.py +61 -0
- llama_stack/core/routing_tables/toolgroups.py +129 -0
- llama_stack/core/routing_tables/vector_stores.py +292 -0
- llama_stack/core/server/auth.py +187 -0
- llama_stack/core/server/auth_providers.py +494 -0
- llama_stack/core/server/quota.py +110 -0
- llama_stack/core/server/routes.py +141 -0
- llama_stack/core/server/server.py +542 -0
- llama_stack/core/server/tracing.py +80 -0
- llama_stack/core/stack.py +546 -0
- llama_stack/core/start_stack.sh +117 -0
- llama_stack/core/storage/datatypes.py +283 -0
- llama_stack/{cli/model → core/store}/__init__.py +1 -1
- llama_stack/core/store/registry.py +199 -0
- llama_stack/core/testing_context.py +49 -0
- llama_stack/core/ui/app.py +55 -0
- llama_stack/core/ui/modules/api.py +32 -0
- llama_stack/core/ui/modules/utils.py +42 -0
- llama_stack/core/ui/page/distribution/datasets.py +18 -0
- llama_stack/core/ui/page/distribution/eval_tasks.py +20 -0
- llama_stack/core/ui/page/distribution/models.py +18 -0
- llama_stack/core/ui/page/distribution/providers.py +27 -0
- llama_stack/core/ui/page/distribution/resources.py +48 -0
- llama_stack/core/ui/page/distribution/scoring_functions.py +18 -0
- llama_stack/core/ui/page/distribution/shields.py +19 -0
- llama_stack/core/ui/page/evaluations/app_eval.py +143 -0
- llama_stack/core/ui/page/evaluations/native_eval.py +253 -0
- llama_stack/core/ui/page/playground/chat.py +130 -0
- llama_stack/core/ui/page/playground/tools.py +352 -0
- llama_stack/core/utils/config.py +30 -0
- llama_stack/{distribution → core}/utils/config_dirs.py +3 -6
- llama_stack/core/utils/config_resolution.py +125 -0
- llama_stack/core/utils/context.py +84 -0
- llama_stack/core/utils/exec.py +96 -0
- llama_stack/{providers/impls/meta_reference/codeshield/config.py → core/utils/image_types.py} +4 -3
- llama_stack/{distribution → core}/utils/model_utils.py +2 -2
- llama_stack/{distribution → core}/utils/prompt_for_config.py +30 -63
- llama_stack/{apis/batch_inference → distributions/dell}/__init__.py +1 -1
- llama_stack/distributions/dell/build.yaml +33 -0
- llama_stack/distributions/dell/dell.py +158 -0
- llama_stack/distributions/dell/run-with-safety.yaml +141 -0
- llama_stack/distributions/dell/run.yaml +132 -0
- llama_stack/distributions/meta-reference-gpu/__init__.py +7 -0
- llama_stack/distributions/meta-reference-gpu/build.yaml +32 -0
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +163 -0
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +154 -0
- llama_stack/distributions/meta-reference-gpu/run.yaml +139 -0
- llama_stack/{apis/evals → distributions/nvidia}/__init__.py +1 -1
- llama_stack/distributions/nvidia/build.yaml +29 -0
- llama_stack/distributions/nvidia/nvidia.py +154 -0
- llama_stack/distributions/nvidia/run-with-safety.yaml +137 -0
- llama_stack/distributions/nvidia/run.yaml +116 -0
- llama_stack/distributions/open-benchmark/__init__.py +7 -0
- llama_stack/distributions/open-benchmark/build.yaml +36 -0
- llama_stack/distributions/open-benchmark/open_benchmark.py +303 -0
- llama_stack/distributions/open-benchmark/run.yaml +252 -0
- llama_stack/distributions/postgres-demo/__init__.py +7 -0
- llama_stack/distributions/postgres-demo/build.yaml +23 -0
- llama_stack/distributions/postgres-demo/postgres_demo.py +125 -0
- llama_stack/distributions/postgres-demo/run.yaml +115 -0
- llama_stack/{apis/memory → distributions/starter}/__init__.py +1 -1
- llama_stack/distributions/starter/build.yaml +61 -0
- llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
- llama_stack/distributions/starter/run.yaml +276 -0
- llama_stack/distributions/starter/starter.py +345 -0
- llama_stack/distributions/starter-gpu/__init__.py +7 -0
- llama_stack/distributions/starter-gpu/build.yaml +61 -0
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
- llama_stack/distributions/starter-gpu/run.yaml +279 -0
- llama_stack/distributions/starter-gpu/starter_gpu.py +20 -0
- llama_stack/distributions/template.py +456 -0
- llama_stack/distributions/watsonx/__init__.py +7 -0
- llama_stack/distributions/watsonx/build.yaml +33 -0
- llama_stack/distributions/watsonx/run.yaml +133 -0
- llama_stack/distributions/watsonx/watsonx.py +95 -0
- llama_stack/env.py +24 -0
- llama_stack/log.py +314 -0
- llama_stack/models/llama/checkpoint.py +164 -0
- llama_stack/models/llama/datatypes.py +164 -0
- llama_stack/models/llama/hadamard_utils.py +86 -0
- llama_stack/models/llama/llama3/args.py +74 -0
- llama_stack/models/llama/llama3/chat_format.py +286 -0
- llama_stack/models/llama/llama3/generation.py +376 -0
- llama_stack/models/llama/llama3/interface.py +255 -0
- llama_stack/models/llama/llama3/model.py +304 -0
- llama_stack/models/llama/llama3/multimodal/__init__.py +12 -0
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +180 -0
- llama_stack/models/llama/llama3/multimodal/image_transform.py +409 -0
- llama_stack/models/llama/llama3/multimodal/model.py +1430 -0
- llama_stack/models/llama/llama3/multimodal/utils.py +26 -0
- llama_stack/models/llama/llama3/prompt_templates/__init__.py +22 -0
- llama_stack/models/llama/llama3/prompt_templates/base.py +39 -0
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +319 -0
- llama_stack/models/llama/llama3/prompt_templates/tool_response.py +62 -0
- llama_stack/models/llama/llama3/quantization/loader.py +316 -0
- llama_stack/models/llama/llama3/template_data.py +116 -0
- llama_stack/models/llama/llama3/tokenizer.model +128000 -0
- llama_stack/models/llama/llama3/tokenizer.py +198 -0
- llama_stack/models/llama/llama3/tool_utils.py +266 -0
- llama_stack/models/llama/llama3_1/__init__.py +12 -0
- llama_stack/models/llama/llama3_1/prompt_format.md +358 -0
- llama_stack/models/llama/llama3_1/prompts.py +258 -0
- llama_stack/models/llama/llama3_2/prompts_text.py +229 -0
- llama_stack/models/llama/llama3_2/prompts_vision.py +126 -0
- llama_stack/models/llama/llama3_2/text_prompt_format.md +286 -0
- llama_stack/models/llama/llama3_2/vision_prompt_format.md +141 -0
- llama_stack/models/llama/llama3_3/prompts.py +259 -0
- llama_stack/models/llama/llama4/args.py +107 -0
- llama_stack/models/llama/llama4/chat_format.py +317 -0
- llama_stack/models/llama/llama4/datatypes.py +56 -0
- llama_stack/models/llama/llama4/ffn.py +58 -0
- llama_stack/models/llama/llama4/generation.py +313 -0
- llama_stack/models/llama/llama4/model.py +437 -0
- llama_stack/models/llama/llama4/moe.py +214 -0
- llama_stack/models/llama/llama4/preprocess.py +435 -0
- llama_stack/models/llama/llama4/prompt_format.md +304 -0
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +136 -0
- llama_stack/models/llama/llama4/prompts.py +279 -0
- llama_stack/models/llama/llama4/quantization/__init__.py +5 -0
- llama_stack/models/llama/llama4/quantization/loader.py +226 -0
- llama_stack/models/llama/llama4/tokenizer.model +200000 -0
- llama_stack/models/llama/llama4/tokenizer.py +263 -0
- llama_stack/models/llama/llama4/vision/__init__.py +5 -0
- llama_stack/models/llama/llama4/vision/embedding.py +210 -0
- llama_stack/models/llama/llama4/vision/encoder.py +412 -0
- llama_stack/models/llama/prompt_format.py +191 -0
- llama_stack/models/llama/quantize_impls.py +316 -0
- llama_stack/models/llama/sku_list.py +1029 -0
- llama_stack/models/llama/sku_types.py +233 -0
- llama_stack/models/llama/tokenizer_utils.py +40 -0
- llama_stack/providers/datatypes.py +136 -107
- llama_stack/providers/inline/__init__.py +5 -0
- llama_stack/providers/inline/agents/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/agents/meta_reference}/__init__.py +12 -5
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +1024 -0
- llama_stack/providers/inline/agents/meta_reference/agents.py +383 -0
- llama_stack/providers/inline/agents/meta_reference/config.py +37 -0
- llama_stack/providers/inline/agents/meta_reference/persistence.py +228 -0
- llama_stack/providers/inline/agents/meta_reference/responses/__init__.py +5 -0
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +423 -0
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +1226 -0
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +449 -0
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +194 -0
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +365 -0
- llama_stack/providers/inline/agents/meta_reference/safety.py +52 -0
- llama_stack/providers/inline/batches/__init__.py +5 -0
- llama_stack/providers/inline/batches/reference/__init__.py +36 -0
- llama_stack/providers/inline/batches/reference/batches.py +679 -0
- llama_stack/providers/inline/batches/reference/config.py +40 -0
- llama_stack/providers/inline/datasetio/__init__.py +5 -0
- llama_stack/providers/inline/datasetio/localfs/__init__.py +20 -0
- llama_stack/providers/inline/datasetio/localfs/config.py +23 -0
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +113 -0
- llama_stack/providers/inline/eval/__init__.py +5 -0
- llama_stack/providers/inline/eval/meta_reference/__init__.py +28 -0
- llama_stack/providers/inline/eval/meta_reference/config.py +23 -0
- llama_stack/providers/inline/eval/meta_reference/eval.py +259 -0
- llama_stack/providers/inline/files/localfs/__init__.py +20 -0
- llama_stack/providers/inline/files/localfs/config.py +31 -0
- llama_stack/providers/inline/files/localfs/files.py +219 -0
- llama_stack/providers/inline/inference/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/__init__.py +4 -4
- llama_stack/providers/inline/inference/meta_reference/common.py +24 -0
- llama_stack/providers/inline/inference/meta_reference/config.py +68 -0
- llama_stack/providers/inline/inference/meta_reference/generators.py +211 -0
- llama_stack/providers/inline/inference/meta_reference/inference.py +158 -0
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +96 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/parallel_utils.py +56 -73
- llama_stack/providers/inline/inference/sentence_transformers/__init__.py +22 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/inference/sentence_transformers}/config.py +6 -4
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +83 -0
- llama_stack/providers/inline/post_training/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/utils.py +35 -0
- llama_stack/providers/inline/post_training/common/validator.py +36 -0
- llama_stack/providers/inline/post_training/huggingface/__init__.py +27 -0
- llama_stack/providers/inline/post_training/huggingface/config.py +83 -0
- llama_stack/providers/inline/post_training/huggingface/post_training.py +208 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +519 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +485 -0
- llama_stack/providers/inline/post_training/huggingface/utils.py +269 -0
- llama_stack/providers/inline/post_training/torchtune/__init__.py +27 -0
- llama_stack/providers/inline/post_training/torchtune/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +240 -0
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +99 -0
- llama_stack/providers/inline/post_training/torchtune/config.py +20 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +57 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/sft.py +78 -0
- llama_stack/providers/inline/post_training/torchtune/post_training.py +178 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +588 -0
- llama_stack/providers/inline/safety/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/codeshield → inline/safety/code_scanner}/__init__.py +4 -2
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +128 -0
- llama_stack/providers/{impls/meta_reference/memory → inline/safety/code_scanner}/config.py +5 -3
- llama_stack/providers/inline/safety/llama_guard/__init__.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/config.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +489 -0
- llama_stack/providers/{adapters/memory/sample → inline/safety/prompt_guard}/__init__.py +4 -4
- llama_stack/providers/inline/safety/prompt_guard/config.py +32 -0
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +131 -0
- llama_stack/providers/inline/scoring/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/__init__.py +25 -0
- llama_stack/providers/{adapters/memory/weaviate → inline/scoring/basic}/config.py +5 -7
- llama_stack/providers/inline/scoring/basic/scoring.py +126 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +240 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +41 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +23 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +27 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +71 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +80 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +66 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +58 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +38 -0
- llama_stack/providers/inline/scoring/basic/utils/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py +3319 -0
- llama_stack/providers/inline/scoring/basic/utils/math_utils.py +330 -0
- llama_stack/providers/inline/scoring/braintrust/__init__.py +27 -0
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +230 -0
- llama_stack/providers/inline/scoring/braintrust/config.py +21 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +23 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +24 -0
- llama_stack/providers/inline/scoring/llm_as_judge/__init__.py +21 -0
- llama_stack/providers/inline/scoring/llm_as_judge/config.py +14 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +113 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +96 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +20 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +81 -0
- llama_stack/providers/inline/telemetry/__init__.py +5 -0
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +21 -0
- llama_stack/providers/inline/telemetry/meta_reference/config.py +47 -0
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +252 -0
- llama_stack/providers/inline/tool_runtime/__init__.py +5 -0
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +19 -0
- llama_stack/providers/{impls/meta_reference/telemetry → inline/tool_runtime/rag}/config.py +5 -3
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +77 -0
- llama_stack/providers/inline/tool_runtime/rag/memory.py +332 -0
- llama_stack/providers/inline/vector_io/__init__.py +5 -0
- llama_stack/providers/inline/vector_io/chroma/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/chroma/config.py +30 -0
- llama_stack/providers/inline/vector_io/faiss/__init__.py +21 -0
- llama_stack/providers/inline/vector_io/faiss/config.py +26 -0
- llama_stack/providers/inline/vector_io/faiss/faiss.py +293 -0
- llama_stack/providers/inline/vector_io/milvus/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/milvus/config.py +29 -0
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/qdrant/config.py +29 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/config.py +26 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +483 -0
- llama_stack/providers/registry/agents.py +16 -18
- llama_stack/providers/registry/batches.py +26 -0
- llama_stack/providers/registry/datasetio.py +49 -0
- llama_stack/providers/registry/eval.py +46 -0
- llama_stack/providers/registry/files.py +31 -0
- llama_stack/providers/registry/inference.py +273 -118
- llama_stack/providers/registry/post_training.py +69 -0
- llama_stack/providers/registry/safety.py +46 -41
- llama_stack/providers/registry/scoring.py +51 -0
- llama_stack/providers/registry/tool_runtime.py +87 -0
- llama_stack/providers/registry/vector_io.py +828 -0
- llama_stack/providers/remote/__init__.py +5 -0
- llama_stack/providers/remote/agents/__init__.py +5 -0
- llama_stack/providers/remote/datasetio/__init__.py +5 -0
- llama_stack/providers/{adapters/memory/chroma → remote/datasetio/huggingface}/__init__.py +7 -4
- llama_stack/providers/remote/datasetio/huggingface/config.py +23 -0
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +99 -0
- llama_stack/providers/remote/datasetio/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/datasetio/nvidia/config.py +61 -0
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +116 -0
- llama_stack/providers/remote/eval/__init__.py +5 -0
- llama_stack/providers/remote/eval/nvidia/__init__.py +31 -0
- llama_stack/providers/remote/eval/nvidia/config.py +29 -0
- llama_stack/providers/remote/eval/nvidia/eval.py +162 -0
- llama_stack/providers/remote/files/s3/__init__.py +19 -0
- llama_stack/providers/remote/files/s3/config.py +42 -0
- llama_stack/providers/remote/files/s3/files.py +313 -0
- llama_stack/providers/remote/inference/__init__.py +5 -0
- llama_stack/providers/{adapters/safety/sample → remote/inference/anthropic}/__init__.py +4 -6
- llama_stack/providers/remote/inference/anthropic/anthropic.py +36 -0
- llama_stack/providers/remote/inference/anthropic/config.py +28 -0
- llama_stack/providers/{impls/meta_reference/telemetry → remote/inference/azure}/__init__.py +4 -4
- llama_stack/providers/remote/inference/azure/azure.py +25 -0
- llama_stack/providers/remote/inference/azure/config.py +61 -0
- llama_stack/providers/{adapters → remote}/inference/bedrock/__init__.py +18 -17
- llama_stack/providers/remote/inference/bedrock/bedrock.py +142 -0
- llama_stack/providers/{adapters/inference/sample → remote/inference/bedrock}/config.py +3 -4
- llama_stack/providers/remote/inference/bedrock/models.py +29 -0
- llama_stack/providers/remote/inference/cerebras/__init__.py +19 -0
- llama_stack/providers/remote/inference/cerebras/cerebras.py +28 -0
- llama_stack/providers/remote/inference/cerebras/config.py +30 -0
- llama_stack/providers/{adapters → remote}/inference/databricks/__init__.py +4 -5
- llama_stack/providers/remote/inference/databricks/config.py +37 -0
- llama_stack/providers/remote/inference/databricks/databricks.py +44 -0
- llama_stack/providers/{adapters → remote}/inference/fireworks/__init__.py +8 -4
- llama_stack/providers/remote/inference/fireworks/config.py +27 -0
- llama_stack/providers/remote/inference/fireworks/fireworks.py +27 -0
- llama_stack/providers/{adapters/memory/pgvector → remote/inference/gemini}/__init__.py +4 -4
- llama_stack/providers/remote/inference/gemini/config.py +28 -0
- llama_stack/providers/remote/inference/gemini/gemini.py +82 -0
- llama_stack/providers/remote/inference/groq/__init__.py +15 -0
- llama_stack/providers/remote/inference/groq/config.py +34 -0
- llama_stack/providers/remote/inference/groq/groq.py +18 -0
- llama_stack/providers/remote/inference/llama_openai_compat/__init__.py +15 -0
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +34 -0
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +46 -0
- llama_stack/providers/remote/inference/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/inference/nvidia/config.py +64 -0
- llama_stack/providers/remote/inference/nvidia/nvidia.py +61 -0
- llama_stack/providers/{adapters/safety/sample/config.py → remote/inference/nvidia/utils.py} +3 -4
- llama_stack/providers/{impls/vllm → remote/inference/ollama}/__init__.py +4 -6
- llama_stack/providers/remote/inference/ollama/config.py +25 -0
- llama_stack/providers/remote/inference/ollama/ollama.py +102 -0
- llama_stack/providers/{adapters/telemetry/opentelemetry → remote/inference/openai}/__init__.py +4 -4
- llama_stack/providers/remote/inference/openai/config.py +39 -0
- llama_stack/providers/remote/inference/openai/openai.py +38 -0
- llama_stack/providers/remote/inference/passthrough/__init__.py +23 -0
- llama_stack/providers/remote/inference/passthrough/config.py +34 -0
- llama_stack/providers/remote/inference/passthrough/passthrough.py +122 -0
- llama_stack/providers/remote/inference/runpod/__init__.py +16 -0
- llama_stack/providers/remote/inference/runpod/config.py +32 -0
- llama_stack/providers/remote/inference/runpod/runpod.py +42 -0
- llama_stack/providers/remote/inference/sambanova/__init__.py +16 -0
- llama_stack/providers/remote/inference/sambanova/config.py +34 -0
- llama_stack/providers/remote/inference/sambanova/sambanova.py +28 -0
- llama_stack/providers/{adapters → remote}/inference/tgi/__init__.py +3 -4
- llama_stack/providers/remote/inference/tgi/config.py +76 -0
- llama_stack/providers/remote/inference/tgi/tgi.py +85 -0
- llama_stack/providers/{adapters → remote}/inference/together/__init__.py +8 -4
- llama_stack/providers/remote/inference/together/config.py +27 -0
- llama_stack/providers/remote/inference/together/together.py +102 -0
- llama_stack/providers/remote/inference/vertexai/__init__.py +15 -0
- llama_stack/providers/remote/inference/vertexai/config.py +48 -0
- llama_stack/providers/remote/inference/vertexai/vertexai.py +54 -0
- llama_stack/providers/remote/inference/vllm/__init__.py +22 -0
- llama_stack/providers/remote/inference/vllm/config.py +59 -0
- llama_stack/providers/remote/inference/vllm/vllm.py +111 -0
- llama_stack/providers/remote/inference/watsonx/__init__.py +15 -0
- llama_stack/providers/remote/inference/watsonx/config.py +45 -0
- llama_stack/providers/remote/inference/watsonx/watsonx.py +336 -0
- llama_stack/providers/remote/post_training/__init__.py +5 -0
- llama_stack/providers/remote/post_training/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/post_training/nvidia/config.py +113 -0
- llama_stack/providers/remote/post_training/nvidia/models.py +27 -0
- llama_stack/providers/remote/post_training/nvidia/post_training.py +430 -0
- llama_stack/providers/remote/post_training/nvidia/utils.py +63 -0
- llama_stack/providers/remote/safety/__init__.py +5 -0
- llama_stack/providers/remote/safety/bedrock/bedrock.py +111 -0
- llama_stack/providers/remote/safety/bedrock/config.py +14 -0
- llama_stack/providers/{adapters/inference/sample → remote/safety/nvidia}/__init__.py +5 -4
- llama_stack/providers/remote/safety/nvidia/config.py +40 -0
- llama_stack/providers/remote/safety/nvidia/nvidia.py +161 -0
- llama_stack/providers/{adapters/agents/sample → remote/safety/sambanova}/__init__.py +5 -4
- llama_stack/providers/remote/safety/sambanova/config.py +37 -0
- llama_stack/providers/remote/safety/sambanova/sambanova.py +98 -0
- llama_stack/providers/remote/tool_runtime/__init__.py +5 -0
- llama_stack/providers/remote/tool_runtime/bing_search/__init__.py +21 -0
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +112 -0
- llama_stack/providers/remote/tool_runtime/bing_search/config.py +22 -0
- llama_stack/providers/remote/tool_runtime/brave_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +148 -0
- llama_stack/providers/remote/tool_runtime/brave_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py +15 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +20 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +73 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +84 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py +22 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py +21 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +140 -0
- llama_stack/providers/remote/vector_io/__init__.py +5 -0
- llama_stack/providers/remote/vector_io/chroma/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/chroma/chroma.py +215 -0
- llama_stack/providers/remote/vector_io/chroma/config.py +28 -0
- llama_stack/providers/remote/vector_io/milvus/__init__.py +18 -0
- llama_stack/providers/remote/vector_io/milvus/config.py +35 -0
- llama_stack/providers/remote/vector_io/milvus/milvus.py +375 -0
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/pgvector/config.py +47 -0
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +460 -0
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/qdrant/config.py +37 -0
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +265 -0
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/weaviate/config.py +32 -0
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +393 -0
- llama_stack/providers/utils/bedrock/__init__.py +5 -0
- llama_stack/providers/utils/bedrock/client.py +74 -0
- llama_stack/providers/utils/bedrock/config.py +64 -0
- llama_stack/providers/utils/bedrock/refreshable_boto_session.py +112 -0
- llama_stack/providers/utils/common/__init__.py +5 -0
- llama_stack/providers/utils/common/data_schema_validator.py +103 -0
- llama_stack/providers/utils/datasetio/__init__.py +5 -0
- llama_stack/providers/utils/datasetio/url_utils.py +47 -0
- llama_stack/providers/utils/files/__init__.py +5 -0
- llama_stack/providers/utils/files/form_data.py +69 -0
- llama_stack/providers/utils/inference/__init__.py +8 -7
- llama_stack/providers/utils/inference/embedding_mixin.py +101 -0
- llama_stack/providers/utils/inference/inference_store.py +264 -0
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +336 -0
- llama_stack/providers/utils/inference/model_registry.py +173 -23
- llama_stack/providers/utils/inference/openai_compat.py +1261 -49
- llama_stack/providers/utils/inference/openai_mixin.py +506 -0
- llama_stack/providers/utils/inference/prompt_adapter.py +365 -67
- llama_stack/providers/utils/kvstore/api.py +6 -6
- llama_stack/providers/utils/kvstore/config.py +28 -48
- llama_stack/providers/utils/kvstore/kvstore.py +61 -15
- llama_stack/providers/utils/kvstore/mongodb/__init__.py +9 -0
- llama_stack/providers/utils/kvstore/mongodb/mongodb.py +82 -0
- llama_stack/providers/utils/kvstore/postgres/__init__.py +7 -0
- llama_stack/providers/utils/kvstore/postgres/postgres.py +114 -0
- llama_stack/providers/utils/kvstore/redis/redis.py +33 -9
- llama_stack/providers/utils/kvstore/sqlite/config.py +2 -1
- llama_stack/providers/utils/kvstore/sqlite/sqlite.py +123 -22
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +1304 -0
- llama_stack/providers/utils/memory/vector_store.py +220 -82
- llama_stack/providers/utils/pagination.py +43 -0
- llama_stack/providers/utils/responses/__init__.py +5 -0
- llama_stack/providers/utils/responses/responses_store.py +292 -0
- llama_stack/providers/utils/scheduler.py +270 -0
- llama_stack/providers/utils/scoring/__init__.py +5 -0
- llama_stack/providers/utils/scoring/aggregation_utils.py +75 -0
- llama_stack/providers/utils/scoring/base_scoring_fn.py +114 -0
- llama_stack/providers/utils/scoring/basic_scoring_utils.py +26 -0
- llama_stack/providers/utils/sqlstore/__init__.py +5 -0
- llama_stack/providers/utils/sqlstore/api.py +128 -0
- llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +319 -0
- llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +343 -0
- llama_stack/providers/utils/sqlstore/sqlstore.py +70 -0
- llama_stack/providers/utils/telemetry/trace_protocol.py +142 -0
- llama_stack/providers/utils/telemetry/tracing.py +192 -53
- llama_stack/providers/utils/tools/__init__.py +5 -0
- llama_stack/providers/utils/tools/mcp.py +148 -0
- llama_stack/providers/utils/tools/ttl_dict.py +70 -0
- llama_stack/providers/utils/vector_io/__init__.py +5 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +156 -0
- llama_stack/schema_utils.py +118 -0
- llama_stack/strong_typing/__init__.py +19 -0
- llama_stack/strong_typing/auxiliary.py +228 -0
- llama_stack/strong_typing/classdef.py +440 -0
- llama_stack/strong_typing/core.py +46 -0
- llama_stack/strong_typing/deserializer.py +877 -0
- llama_stack/strong_typing/docstring.py +409 -0
- llama_stack/strong_typing/exception.py +23 -0
- llama_stack/strong_typing/inspection.py +1085 -0
- llama_stack/strong_typing/mapping.py +40 -0
- llama_stack/strong_typing/name.py +182 -0
- llama_stack/strong_typing/py.typed +0 -0
- llama_stack/strong_typing/schema.py +792 -0
- llama_stack/strong_typing/serialization.py +97 -0
- llama_stack/strong_typing/serializer.py +500 -0
- llama_stack/strong_typing/slots.py +27 -0
- llama_stack/strong_typing/topological.py +89 -0
- llama_stack/testing/__init__.py +5 -0
- llama_stack/testing/api_recorder.py +956 -0
- llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
- llama_stack-0.3.4.dist-info/METADATA +261 -0
- llama_stack-0.3.4.dist-info/RECORD +625 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/WHEEL +1 -1
- llama_stack/apis/agents/client.py +0 -292
- llama_stack/apis/agents/event_logger.py +0 -184
- llama_stack/apis/batch_inference/batch_inference.py +0 -72
- llama_stack/apis/common/deployment_types.py +0 -31
- llama_stack/apis/dataset/dataset.py +0 -63
- llama_stack/apis/evals/evals.py +0 -122
- llama_stack/apis/inference/client.py +0 -197
- llama_stack/apis/inspect/client.py +0 -82
- llama_stack/apis/memory/client.py +0 -155
- llama_stack/apis/memory/memory.py +0 -65
- llama_stack/apis/memory_banks/__init__.py +0 -7
- llama_stack/apis/memory_banks/client.py +0 -101
- llama_stack/apis/memory_banks/memory_banks.py +0 -78
- llama_stack/apis/models/client.py +0 -83
- llama_stack/apis/reward_scoring/__init__.py +0 -7
- llama_stack/apis/reward_scoring/reward_scoring.py +0 -55
- llama_stack/apis/safety/client.py +0 -105
- llama_stack/apis/shields/client.py +0 -79
- llama_stack/cli/download.py +0 -340
- llama_stack/cli/model/describe.py +0 -82
- llama_stack/cli/model/download.py +0 -24
- llama_stack/cli/model/list.py +0 -62
- llama_stack/cli/model/model.py +0 -34
- llama_stack/cli/model/prompt_format.py +0 -112
- llama_stack/cli/model/safety_models.py +0 -52
- llama_stack/cli/stack/build.py +0 -299
- llama_stack/cli/stack/configure.py +0 -178
- llama_stack/distribution/build.py +0 -123
- llama_stack/distribution/build_conda_env.sh +0 -136
- llama_stack/distribution/build_container.sh +0 -142
- llama_stack/distribution/common.sh +0 -40
- llama_stack/distribution/configure_container.sh +0 -47
- llama_stack/distribution/datatypes.py +0 -139
- llama_stack/distribution/distribution.py +0 -58
- llama_stack/distribution/inspect.py +0 -67
- llama_stack/distribution/request_headers.py +0 -57
- llama_stack/distribution/resolver.py +0 -323
- llama_stack/distribution/routers/__init__.py +0 -48
- llama_stack/distribution/routers/routers.py +0 -158
- llama_stack/distribution/routers/routing_tables.py +0 -173
- llama_stack/distribution/server/endpoints.py +0 -48
- llama_stack/distribution/server/server.py +0 -343
- llama_stack/distribution/start_conda_env.sh +0 -42
- llama_stack/distribution/start_container.sh +0 -64
- llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml +0 -10
- llama_stack/distribution/templates/local-build.yaml +0 -10
- llama_stack/distribution/templates/local-databricks-build.yaml +0 -10
- llama_stack/distribution/templates/local-fireworks-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-endpoint-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-serverless-build.yaml +0 -10
- llama_stack/distribution/templates/local-ollama-build.yaml +0 -10
- llama_stack/distribution/templates/local-tgi-build.yaml +0 -10
- llama_stack/distribution/templates/local-together-build.yaml +0 -10
- llama_stack/distribution/templates/local-vllm-build.yaml +0 -10
- llama_stack/distribution/utils/exec.py +0 -105
- llama_stack/providers/adapters/agents/sample/sample.py +0 -18
- llama_stack/providers/adapters/inference/bedrock/bedrock.py +0 -451
- llama_stack/providers/adapters/inference/bedrock/config.py +0 -55
- llama_stack/providers/adapters/inference/databricks/config.py +0 -21
- llama_stack/providers/adapters/inference/databricks/databricks.py +0 -125
- llama_stack/providers/adapters/inference/fireworks/config.py +0 -20
- llama_stack/providers/adapters/inference/fireworks/fireworks.py +0 -130
- llama_stack/providers/adapters/inference/ollama/__init__.py +0 -19
- llama_stack/providers/adapters/inference/ollama/ollama.py +0 -175
- llama_stack/providers/adapters/inference/sample/sample.py +0 -23
- llama_stack/providers/adapters/inference/tgi/config.py +0 -43
- llama_stack/providers/adapters/inference/tgi/tgi.py +0 -200
- llama_stack/providers/adapters/inference/together/config.py +0 -22
- llama_stack/providers/adapters/inference/together/together.py +0 -143
- llama_stack/providers/adapters/memory/chroma/chroma.py +0 -157
- llama_stack/providers/adapters/memory/pgvector/config.py +0 -17
- llama_stack/providers/adapters/memory/pgvector/pgvector.py +0 -211
- llama_stack/providers/adapters/memory/sample/sample.py +0 -23
- llama_stack/providers/adapters/memory/weaviate/__init__.py +0 -15
- llama_stack/providers/adapters/memory/weaviate/weaviate.py +0 -190
- llama_stack/providers/adapters/safety/bedrock/bedrock.py +0 -113
- llama_stack/providers/adapters/safety/bedrock/config.py +0 -16
- llama_stack/providers/adapters/safety/sample/sample.py +0 -23
- llama_stack/providers/adapters/safety/together/__init__.py +0 -18
- llama_stack/providers/adapters/safety/together/config.py +0 -26
- llama_stack/providers/adapters/safety/together/together.py +0 -101
- llama_stack/providers/adapters/telemetry/opentelemetry/config.py +0 -12
- llama_stack/providers/adapters/telemetry/opentelemetry/opentelemetry.py +0 -201
- llama_stack/providers/adapters/telemetry/sample/__init__.py +0 -17
- llama_stack/providers/adapters/telemetry/sample/config.py +0 -12
- llama_stack/providers/adapters/telemetry/sample/sample.py +0 -18
- llama_stack/providers/impls/meta_reference/agents/agent_instance.py +0 -844
- llama_stack/providers/impls/meta_reference/agents/agents.py +0 -161
- llama_stack/providers/impls/meta_reference/agents/persistence.py +0 -84
- llama_stack/providers/impls/meta_reference/agents/rag/context_retriever.py +0 -74
- llama_stack/providers/impls/meta_reference/agents/safety.py +0 -57
- llama_stack/providers/impls/meta_reference/agents/tests/code_execution.py +0 -93
- llama_stack/providers/impls/meta_reference/agents/tests/test_chat_agent.py +0 -305
- llama_stack/providers/impls/meta_reference/agents/tools/base.py +0 -20
- llama_stack/providers/impls/meta_reference/agents/tools/builtin.py +0 -375
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_env_prefix.py +0 -133
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_execution.py +0 -256
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py +0 -87
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/utils.py +0 -21
- llama_stack/providers/impls/meta_reference/agents/tools/safety.py +0 -43
- llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py +0 -58
- llama_stack/providers/impls/meta_reference/inference/config.py +0 -45
- llama_stack/providers/impls/meta_reference/inference/generation.py +0 -376
- llama_stack/providers/impls/meta_reference/inference/inference.py +0 -280
- llama_stack/providers/impls/meta_reference/inference/model_parallel.py +0 -99
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_impls.py +0 -184
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_txest_disabled.py +0 -76
- llama_stack/providers/impls/meta_reference/inference/quantization/loader.py +0 -97
- llama_stack/providers/impls/meta_reference/inference/quantization/scripts/quantize_checkpoint.py +0 -161
- llama_stack/providers/impls/meta_reference/memory/__init__.py +0 -19
- llama_stack/providers/impls/meta_reference/memory/faiss.py +0 -113
- llama_stack/providers/impls/meta_reference/safety/__init__.py +0 -17
- llama_stack/providers/impls/meta_reference/safety/base.py +0 -57
- llama_stack/providers/impls/meta_reference/safety/config.py +0 -48
- llama_stack/providers/impls/meta_reference/safety/llama_guard.py +0 -268
- llama_stack/providers/impls/meta_reference/safety/prompt_guard.py +0 -145
- llama_stack/providers/impls/meta_reference/safety/safety.py +0 -112
- llama_stack/providers/impls/meta_reference/telemetry/console.py +0 -89
- llama_stack/providers/impls/vllm/config.py +0 -35
- llama_stack/providers/impls/vllm/vllm.py +0 -241
- llama_stack/providers/registry/memory.py +0 -78
- llama_stack/providers/registry/telemetry.py +0 -44
- llama_stack/providers/tests/agents/test_agents.py +0 -210
- llama_stack/providers/tests/inference/test_inference.py +0 -257
- llama_stack/providers/tests/inference/test_prompt_adapter.py +0 -126
- llama_stack/providers/tests/memory/test_memory.py +0 -136
- llama_stack/providers/tests/resolver.py +0 -100
- llama_stack/providers/tests/safety/test_safety.py +0 -77
- llama_stack-0.0.42.dist-info/METADATA +0 -137
- llama_stack-0.0.42.dist-info/RECORD +0 -256
- /llama_stack/{distribution → core}/__init__.py +0 -0
- /llama_stack/{distribution/server → core/access_control}/__init__.py +0 -0
- /llama_stack/{distribution/utils → core/conversations}/__init__.py +0 -0
- /llama_stack/{providers/adapters → core/prompts}/__init__.py +0 -0
- /llama_stack/{providers/adapters/agents → core/routing_tables}/__init__.py +0 -0
- /llama_stack/{providers/adapters/inference → core/server}/__init__.py +0 -0
- /llama_stack/{providers/adapters/memory → core/storage}/__init__.py +0 -0
- /llama_stack/{providers/adapters/safety → core/ui}/__init__.py +0 -0
- /llama_stack/{providers/adapters/telemetry → core/ui/modules}/__init__.py +0 -0
- /llama_stack/{providers/impls → core/ui/page}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference → core/ui/page/distribution}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/rag → core/ui/page/evaluations}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tests → core/ui/page/playground}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools → core/utils}/__init__.py +0 -0
- /llama_stack/{distribution → core}/utils/dynamic.py +0 -0
- /llama_stack/{distribution → core}/utils/serialize.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools/ipython_tool → distributions}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization → models}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization/scripts → models/llama}/__init__.py +0 -0
- /llama_stack/{providers/tests → models/llama/llama3}/__init__.py +0 -0
- /llama_stack/{providers/tests/agents → models/llama/llama3/quantization}/__init__.py +0 -0
- /llama_stack/{providers/tests/inference → models/llama/llama3_2}/__init__.py +0 -0
- /llama_stack/{providers/tests/memory → models/llama/llama3_3}/__init__.py +0 -0
- /llama_stack/{providers/tests/safety → models/llama/llama4}/__init__.py +0 -0
- /llama_stack/{scripts → models/llama/llama4/prompt_templates}/__init__.py +0 -0
- /llama_stack/providers/{adapters → remote}/safety/bedrock/__init__.py +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info/licenses}/LICENSE +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/top_level.txt +0 -0
|
@@ -1,211 +0,0 @@
|
|
|
1
|
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
-
# All rights reserved.
|
|
3
|
-
#
|
|
4
|
-
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
-
# the root directory of this source tree.
|
|
6
|
-
|
|
7
|
-
from typing import List, Tuple
|
|
8
|
-
|
|
9
|
-
import psycopg2
|
|
10
|
-
from numpy.typing import NDArray
|
|
11
|
-
from psycopg2 import sql
|
|
12
|
-
from psycopg2.extras import execute_values, Json
|
|
13
|
-
|
|
14
|
-
from pydantic import BaseModel, parse_obj_as
|
|
15
|
-
|
|
16
|
-
from llama_stack.apis.memory import * # noqa: F403
|
|
17
|
-
|
|
18
|
-
from llama_stack.providers.datatypes import MemoryBanksProtocolPrivate
|
|
19
|
-
from llama_stack.providers.utils.memory.vector_store import (
|
|
20
|
-
ALL_MINILM_L6_V2_DIMENSION,
|
|
21
|
-
BankWithIndex,
|
|
22
|
-
EmbeddingIndex,
|
|
23
|
-
)
|
|
24
|
-
|
|
25
|
-
from .config import PGVectorConfig
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
def check_extension_version(cur):
|
|
29
|
-
cur.execute("SELECT extversion FROM pg_extension WHERE extname = 'vector'")
|
|
30
|
-
result = cur.fetchone()
|
|
31
|
-
return result[0] if result else None
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
def upsert_models(cur, keys_models: List[Tuple[str, BaseModel]]):
|
|
35
|
-
query = sql.SQL(
|
|
36
|
-
"""
|
|
37
|
-
INSERT INTO metadata_store (key, data)
|
|
38
|
-
VALUES %s
|
|
39
|
-
ON CONFLICT (key) DO UPDATE
|
|
40
|
-
SET data = EXCLUDED.data
|
|
41
|
-
"""
|
|
42
|
-
)
|
|
43
|
-
|
|
44
|
-
values = [(key, Json(model.dict())) for key, model in keys_models]
|
|
45
|
-
execute_values(cur, query, values, template="(%s, %s)")
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
def load_models(cur, cls):
|
|
49
|
-
query = "SELECT key, data FROM metadata_store"
|
|
50
|
-
cur.execute(query)
|
|
51
|
-
rows = cur.fetchall()
|
|
52
|
-
return [parse_obj_as(cls, row["data"]) for row in rows]
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
class PGVectorIndex(EmbeddingIndex):
|
|
56
|
-
def __init__(self, bank: MemoryBankDef, dimension: int, cursor):
|
|
57
|
-
self.cursor = cursor
|
|
58
|
-
self.table_name = f"vector_store_{bank.identifier}"
|
|
59
|
-
|
|
60
|
-
self.cursor.execute(
|
|
61
|
-
f"""
|
|
62
|
-
CREATE TABLE IF NOT EXISTS {self.table_name} (
|
|
63
|
-
id TEXT PRIMARY KEY,
|
|
64
|
-
document JSONB,
|
|
65
|
-
embedding vector({dimension})
|
|
66
|
-
)
|
|
67
|
-
"""
|
|
68
|
-
)
|
|
69
|
-
|
|
70
|
-
async def add_chunks(self, chunks: List[Chunk], embeddings: NDArray):
|
|
71
|
-
assert len(chunks) == len(
|
|
72
|
-
embeddings
|
|
73
|
-
), f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
|
|
74
|
-
|
|
75
|
-
values = []
|
|
76
|
-
for i, chunk in enumerate(chunks):
|
|
77
|
-
values.append(
|
|
78
|
-
(
|
|
79
|
-
f"{chunk.document_id}:chunk-{i}",
|
|
80
|
-
Json(chunk.dict()),
|
|
81
|
-
embeddings[i].tolist(),
|
|
82
|
-
)
|
|
83
|
-
)
|
|
84
|
-
|
|
85
|
-
query = sql.SQL(
|
|
86
|
-
f"""
|
|
87
|
-
INSERT INTO {self.table_name} (id, document, embedding)
|
|
88
|
-
VALUES %s
|
|
89
|
-
ON CONFLICT (id) DO UPDATE SET embedding = EXCLUDED.embedding, document = EXCLUDED.document
|
|
90
|
-
"""
|
|
91
|
-
)
|
|
92
|
-
execute_values(self.cursor, query, values, template="(%s, %s, %s::vector)")
|
|
93
|
-
|
|
94
|
-
async def query(self, embedding: NDArray, k: int) -> QueryDocumentsResponse:
|
|
95
|
-
self.cursor.execute(
|
|
96
|
-
f"""
|
|
97
|
-
SELECT document, embedding <-> %s::vector AS distance
|
|
98
|
-
FROM {self.table_name}
|
|
99
|
-
ORDER BY distance
|
|
100
|
-
LIMIT %s
|
|
101
|
-
""",
|
|
102
|
-
(embedding.tolist(), k),
|
|
103
|
-
)
|
|
104
|
-
results = self.cursor.fetchall()
|
|
105
|
-
|
|
106
|
-
chunks = []
|
|
107
|
-
scores = []
|
|
108
|
-
for doc, dist in results:
|
|
109
|
-
chunks.append(Chunk(**doc))
|
|
110
|
-
scores.append(1.0 / float(dist))
|
|
111
|
-
|
|
112
|
-
return QueryDocumentsResponse(chunks=chunks, scores=scores)
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
class PGVectorMemoryAdapter(Memory, MemoryBanksProtocolPrivate):
|
|
116
|
-
def __init__(self, config: PGVectorConfig) -> None:
|
|
117
|
-
print(f"Initializing PGVectorMemoryAdapter -> {config.host}:{config.port}")
|
|
118
|
-
self.config = config
|
|
119
|
-
self.cursor = None
|
|
120
|
-
self.conn = None
|
|
121
|
-
self.cache = {}
|
|
122
|
-
|
|
123
|
-
async def initialize(self) -> None:
|
|
124
|
-
try:
|
|
125
|
-
self.conn = psycopg2.connect(
|
|
126
|
-
host=self.config.host,
|
|
127
|
-
port=self.config.port,
|
|
128
|
-
database=self.config.db,
|
|
129
|
-
user=self.config.user,
|
|
130
|
-
password=self.config.password,
|
|
131
|
-
)
|
|
132
|
-
self.cursor = self.conn.cursor()
|
|
133
|
-
|
|
134
|
-
version = check_extension_version(self.cursor)
|
|
135
|
-
if version:
|
|
136
|
-
print(f"Vector extension version: {version}")
|
|
137
|
-
else:
|
|
138
|
-
raise RuntimeError("Vector extension is not installed.")
|
|
139
|
-
|
|
140
|
-
self.cursor.execute(
|
|
141
|
-
"""
|
|
142
|
-
CREATE TABLE IF NOT EXISTS metadata_store (
|
|
143
|
-
key TEXT PRIMARY KEY,
|
|
144
|
-
data JSONB
|
|
145
|
-
)
|
|
146
|
-
"""
|
|
147
|
-
)
|
|
148
|
-
except Exception as e:
|
|
149
|
-
import traceback
|
|
150
|
-
|
|
151
|
-
traceback.print_exc()
|
|
152
|
-
raise RuntimeError("Could not connect to PGVector database server") from e
|
|
153
|
-
|
|
154
|
-
async def shutdown(self) -> None:
|
|
155
|
-
pass
|
|
156
|
-
|
|
157
|
-
async def register_memory_bank(
|
|
158
|
-
self,
|
|
159
|
-
memory_bank: MemoryBankDef,
|
|
160
|
-
) -> None:
|
|
161
|
-
assert (
|
|
162
|
-
memory_bank.type == MemoryBankType.vector.value
|
|
163
|
-
), f"Only vector banks are supported {memory_bank.type}"
|
|
164
|
-
|
|
165
|
-
upsert_models(
|
|
166
|
-
self.cursor,
|
|
167
|
-
[
|
|
168
|
-
(memory_bank.identifier, memory_bank),
|
|
169
|
-
],
|
|
170
|
-
)
|
|
171
|
-
|
|
172
|
-
index = BankWithIndex(
|
|
173
|
-
bank=memory_bank,
|
|
174
|
-
index=PGVectorIndex(memory_bank, ALL_MINILM_L6_V2_DIMENSION, self.cursor),
|
|
175
|
-
)
|
|
176
|
-
self.cache[memory_bank.identifier] = index
|
|
177
|
-
|
|
178
|
-
async def list_memory_banks(self) -> List[MemoryBankDef]:
|
|
179
|
-
banks = load_models(self.cursor, MemoryBankDef)
|
|
180
|
-
for bank in banks:
|
|
181
|
-
if bank.identifier not in self.cache:
|
|
182
|
-
index = BankWithIndex(
|
|
183
|
-
bank=bank,
|
|
184
|
-
index=PGVectorIndex(bank, ALL_MINILM_L6_V2_DIMENSION, self.cursor),
|
|
185
|
-
)
|
|
186
|
-
self.cache[bank.identifier] = index
|
|
187
|
-
return banks
|
|
188
|
-
|
|
189
|
-
async def insert_documents(
|
|
190
|
-
self,
|
|
191
|
-
bank_id: str,
|
|
192
|
-
documents: List[MemoryBankDocument],
|
|
193
|
-
ttl_seconds: Optional[int] = None,
|
|
194
|
-
) -> None:
|
|
195
|
-
index = self.cache.get(bank_id, None)
|
|
196
|
-
if not index:
|
|
197
|
-
raise ValueError(f"Bank {bank_id} not found")
|
|
198
|
-
|
|
199
|
-
await index.insert_documents(documents)
|
|
200
|
-
|
|
201
|
-
async def query_documents(
|
|
202
|
-
self,
|
|
203
|
-
bank_id: str,
|
|
204
|
-
query: InterleavedTextMedia,
|
|
205
|
-
params: Optional[Dict[str, Any]] = None,
|
|
206
|
-
) -> QueryDocumentsResponse:
|
|
207
|
-
index = self.cache.get(bank_id, None)
|
|
208
|
-
if not index:
|
|
209
|
-
raise ValueError(f"Bank {bank_id} not found")
|
|
210
|
-
|
|
211
|
-
return await index.query_documents(query, params)
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
-
# All rights reserved.
|
|
3
|
-
#
|
|
4
|
-
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
-
# the root directory of this source tree.
|
|
6
|
-
|
|
7
|
-
from .config import SampleConfig
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
from llama_stack.apis.memory import * # noqa: F403
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class SampleMemoryImpl(Memory):
|
|
14
|
-
def __init__(self, config: SampleConfig):
|
|
15
|
-
self.config = config
|
|
16
|
-
|
|
17
|
-
async def register_memory_bank(self, memory_bank: MemoryBankDef) -> None:
|
|
18
|
-
# these are the memory banks the Llama Stack will use to route requests to this provider
|
|
19
|
-
# perform validation here if necessary
|
|
20
|
-
pass
|
|
21
|
-
|
|
22
|
-
async def initialize(self):
|
|
23
|
-
pass
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
-
# All rights reserved.
|
|
3
|
-
#
|
|
4
|
-
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
-
# the root directory of this source tree.
|
|
6
|
-
|
|
7
|
-
from .config import WeaviateConfig, WeaviateRequestProviderData # noqa: F401
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
async def get_adapter_impl(config: WeaviateConfig, _deps):
|
|
11
|
-
from .weaviate import WeaviateMemoryAdapter
|
|
12
|
-
|
|
13
|
-
impl = WeaviateMemoryAdapter(config)
|
|
14
|
-
await impl.initialize()
|
|
15
|
-
return impl
|
|
@@ -1,190 +0,0 @@
|
|
|
1
|
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
-
# All rights reserved.
|
|
3
|
-
#
|
|
4
|
-
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
-
# the root directory of this source tree.
|
|
6
|
-
import json
|
|
7
|
-
|
|
8
|
-
from typing import Any, Dict, List, Optional
|
|
9
|
-
|
|
10
|
-
import weaviate
|
|
11
|
-
import weaviate.classes as wvc
|
|
12
|
-
from numpy.typing import NDArray
|
|
13
|
-
from weaviate.classes.init import Auth
|
|
14
|
-
|
|
15
|
-
from llama_stack.apis.memory import * # noqa: F403
|
|
16
|
-
from llama_stack.distribution.request_headers import NeedsRequestProviderData
|
|
17
|
-
from llama_stack.providers.datatypes import MemoryBanksProtocolPrivate
|
|
18
|
-
from llama_stack.providers.utils.memory.vector_store import (
|
|
19
|
-
BankWithIndex,
|
|
20
|
-
EmbeddingIndex,
|
|
21
|
-
)
|
|
22
|
-
|
|
23
|
-
from .config import WeaviateConfig, WeaviateRequestProviderData
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
class WeaviateIndex(EmbeddingIndex):
|
|
27
|
-
def __init__(self, client: weaviate.Client, collection_name: str):
|
|
28
|
-
self.client = client
|
|
29
|
-
self.collection_name = collection_name
|
|
30
|
-
|
|
31
|
-
async def add_chunks(self, chunks: List[Chunk], embeddings: NDArray):
|
|
32
|
-
assert len(chunks) == len(
|
|
33
|
-
embeddings
|
|
34
|
-
), f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
|
|
35
|
-
|
|
36
|
-
data_objects = []
|
|
37
|
-
for i, chunk in enumerate(chunks):
|
|
38
|
-
data_objects.append(
|
|
39
|
-
wvc.data.DataObject(
|
|
40
|
-
properties={
|
|
41
|
-
"chunk_content": chunk.json(),
|
|
42
|
-
},
|
|
43
|
-
vector=embeddings[i].tolist(),
|
|
44
|
-
)
|
|
45
|
-
)
|
|
46
|
-
|
|
47
|
-
# Inserting chunks into a prespecified Weaviate collection
|
|
48
|
-
collection = self.client.collections.get(self.collection_name)
|
|
49
|
-
|
|
50
|
-
# TODO: make this async friendly
|
|
51
|
-
collection.data.insert_many(data_objects)
|
|
52
|
-
|
|
53
|
-
async def query(self, embedding: NDArray, k: int) -> QueryDocumentsResponse:
|
|
54
|
-
collection = self.client.collections.get(self.collection_name)
|
|
55
|
-
|
|
56
|
-
results = collection.query.near_vector(
|
|
57
|
-
near_vector=embedding.tolist(),
|
|
58
|
-
limit=k,
|
|
59
|
-
return_metadata=wvc.query.MetadataQuery(distance=True),
|
|
60
|
-
)
|
|
61
|
-
|
|
62
|
-
chunks = []
|
|
63
|
-
scores = []
|
|
64
|
-
for doc in results.objects:
|
|
65
|
-
chunk_json = doc.properties["chunk_content"]
|
|
66
|
-
try:
|
|
67
|
-
chunk_dict = json.loads(chunk_json)
|
|
68
|
-
chunk = Chunk(**chunk_dict)
|
|
69
|
-
except Exception:
|
|
70
|
-
import traceback
|
|
71
|
-
|
|
72
|
-
traceback.print_exc()
|
|
73
|
-
print(f"Failed to parse document: {chunk_json}")
|
|
74
|
-
continue
|
|
75
|
-
|
|
76
|
-
chunks.append(chunk)
|
|
77
|
-
scores.append(1.0 / doc.metadata.distance)
|
|
78
|
-
|
|
79
|
-
return QueryDocumentsResponse(chunks=chunks, scores=scores)
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
class WeaviateMemoryAdapter(
|
|
83
|
-
Memory, NeedsRequestProviderData, MemoryBanksProtocolPrivate
|
|
84
|
-
):
|
|
85
|
-
def __init__(self, config: WeaviateConfig) -> None:
|
|
86
|
-
self.config = config
|
|
87
|
-
self.client_cache = {}
|
|
88
|
-
self.cache = {}
|
|
89
|
-
|
|
90
|
-
def _get_client(self) -> weaviate.Client:
|
|
91
|
-
provider_data = self.get_request_provider_data()
|
|
92
|
-
assert provider_data is not None, "Request provider data must be set"
|
|
93
|
-
assert isinstance(provider_data, WeaviateRequestProviderData)
|
|
94
|
-
|
|
95
|
-
key = f"{provider_data.weaviate_cluster_url}::{provider_data.weaviate_api_key}"
|
|
96
|
-
if key in self.client_cache:
|
|
97
|
-
return self.client_cache[key]
|
|
98
|
-
|
|
99
|
-
client = weaviate.connect_to_weaviate_cloud(
|
|
100
|
-
cluster_url=provider_data.weaviate_cluster_url,
|
|
101
|
-
auth_credentials=Auth.api_key(provider_data.weaviate_api_key),
|
|
102
|
-
)
|
|
103
|
-
self.client_cache[key] = client
|
|
104
|
-
return client
|
|
105
|
-
|
|
106
|
-
async def initialize(self) -> None:
|
|
107
|
-
pass
|
|
108
|
-
|
|
109
|
-
async def shutdown(self) -> None:
|
|
110
|
-
for client in self.client_cache.values():
|
|
111
|
-
client.close()
|
|
112
|
-
|
|
113
|
-
async def register_memory_bank(
|
|
114
|
-
self,
|
|
115
|
-
memory_bank: MemoryBankDef,
|
|
116
|
-
) -> None:
|
|
117
|
-
assert (
|
|
118
|
-
memory_bank.type == MemoryBankType.vector.value
|
|
119
|
-
), f"Only vector banks are supported {memory_bank.type}"
|
|
120
|
-
|
|
121
|
-
client = self._get_client()
|
|
122
|
-
|
|
123
|
-
# Create collection if it doesn't exist
|
|
124
|
-
if not client.collections.exists(memory_bank.identifier):
|
|
125
|
-
client.collections.create(
|
|
126
|
-
name=memory_bank.identifier,
|
|
127
|
-
vectorizer_config=wvc.config.Configure.Vectorizer.none(),
|
|
128
|
-
properties=[
|
|
129
|
-
wvc.config.Property(
|
|
130
|
-
name="chunk_content",
|
|
131
|
-
data_type=wvc.config.DataType.TEXT,
|
|
132
|
-
),
|
|
133
|
-
],
|
|
134
|
-
)
|
|
135
|
-
|
|
136
|
-
index = BankWithIndex(
|
|
137
|
-
bank=memory_bank,
|
|
138
|
-
index=WeaviateIndex(client=client, collection_name=memory_bank.identifier),
|
|
139
|
-
)
|
|
140
|
-
self.cache[memory_bank.identifier] = index
|
|
141
|
-
|
|
142
|
-
async def list_memory_banks(self) -> List[MemoryBankDef]:
|
|
143
|
-
# TODO: right now the Llama Stack is the source of truth for these banks. That is
|
|
144
|
-
# not ideal. It should be Weaviate which is the source of truth. Unfortunately,
|
|
145
|
-
# list() happens at Stack startup when the Weaviate client (credentials) is not
|
|
146
|
-
# yet available. We need to figure out a way to make this work.
|
|
147
|
-
return [i.bank for i in self.cache.values()]
|
|
148
|
-
|
|
149
|
-
async def _get_and_cache_bank_index(self, bank_id: str) -> Optional[BankWithIndex]:
|
|
150
|
-
if bank_id in self.cache:
|
|
151
|
-
return self.cache[bank_id]
|
|
152
|
-
|
|
153
|
-
bank = await self.memory_bank_store.get_memory_bank(bank_id)
|
|
154
|
-
if not bank:
|
|
155
|
-
raise ValueError(f"Bank {bank_id} not found")
|
|
156
|
-
|
|
157
|
-
client = self._get_client()
|
|
158
|
-
if not client.collections.exists(bank_id):
|
|
159
|
-
raise ValueError(f"Collection with name `{bank_id}` not found")
|
|
160
|
-
|
|
161
|
-
index = BankWithIndex(
|
|
162
|
-
bank=bank,
|
|
163
|
-
index=WeaviateIndex(client=client, collection_name=bank_id),
|
|
164
|
-
)
|
|
165
|
-
self.cache[bank_id] = index
|
|
166
|
-
return index
|
|
167
|
-
|
|
168
|
-
async def insert_documents(
|
|
169
|
-
self,
|
|
170
|
-
bank_id: str,
|
|
171
|
-
documents: List[MemoryBankDocument],
|
|
172
|
-
ttl_seconds: Optional[int] = None,
|
|
173
|
-
) -> None:
|
|
174
|
-
index = await self._get_and_cache_bank_index(bank_id)
|
|
175
|
-
if not index:
|
|
176
|
-
raise ValueError(f"Bank {bank_id} not found")
|
|
177
|
-
|
|
178
|
-
await index.insert_documents(documents)
|
|
179
|
-
|
|
180
|
-
async def query_documents(
|
|
181
|
-
self,
|
|
182
|
-
bank_id: str,
|
|
183
|
-
query: InterleavedTextMedia,
|
|
184
|
-
params: Optional[Dict[str, Any]] = None,
|
|
185
|
-
) -> QueryDocumentsResponse:
|
|
186
|
-
index = await self._get_and_cache_bank_index(bank_id)
|
|
187
|
-
if not index:
|
|
188
|
-
raise ValueError(f"Bank {bank_id} not found")
|
|
189
|
-
|
|
190
|
-
return await index.query_documents(query, params)
|
|
@@ -1,113 +0,0 @@
|
|
|
1
|
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
-
# All rights reserved.
|
|
3
|
-
#
|
|
4
|
-
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
-
# the root directory of this source tree.
|
|
6
|
-
|
|
7
|
-
import json
|
|
8
|
-
import logging
|
|
9
|
-
|
|
10
|
-
from typing import Any, Dict, List
|
|
11
|
-
|
|
12
|
-
import boto3
|
|
13
|
-
|
|
14
|
-
from llama_stack.apis.safety import * # noqa
|
|
15
|
-
from llama_models.llama3.api.datatypes import * # noqa: F403
|
|
16
|
-
from llama_stack.providers.datatypes import ShieldsProtocolPrivate
|
|
17
|
-
|
|
18
|
-
from .config import BedrockSafetyConfig
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
logger = logging.getLogger(__name__)
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
BEDROCK_SUPPORTED_SHIELDS = [
|
|
25
|
-
ShieldType.generic_content_shield.value,
|
|
26
|
-
]
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
class BedrockSafetyAdapter(Safety, ShieldsProtocolPrivate):
|
|
30
|
-
def __init__(self, config: BedrockSafetyConfig) -> None:
|
|
31
|
-
if not config.aws_profile:
|
|
32
|
-
raise ValueError(f"Missing boto_client aws_profile in model info::{config}")
|
|
33
|
-
self.config = config
|
|
34
|
-
self.registered_shields = []
|
|
35
|
-
|
|
36
|
-
async def initialize(self) -> None:
|
|
37
|
-
try:
|
|
38
|
-
print(f"initializing with profile --- > {self.config}")
|
|
39
|
-
self.boto_client = boto3.Session(
|
|
40
|
-
profile_name=self.config.aws_profile
|
|
41
|
-
).client("bedrock-runtime")
|
|
42
|
-
except Exception as e:
|
|
43
|
-
raise RuntimeError("Error initializing BedrockSafetyAdapter") from e
|
|
44
|
-
|
|
45
|
-
async def shutdown(self) -> None:
|
|
46
|
-
pass
|
|
47
|
-
|
|
48
|
-
async def register_shield(self, shield: ShieldDef) -> None:
|
|
49
|
-
raise ValueError("Registering dynamic shields is not supported")
|
|
50
|
-
|
|
51
|
-
async def list_shields(self) -> List[ShieldDef]:
|
|
52
|
-
raise NotImplementedError(
|
|
53
|
-
"""
|
|
54
|
-
`list_shields` not implemented; this should read all guardrails from
|
|
55
|
-
bedrock and populate guardrailId and guardrailVersion in the ShieldDef.
|
|
56
|
-
"""
|
|
57
|
-
)
|
|
58
|
-
|
|
59
|
-
async def run_shield(
|
|
60
|
-
self, shield_type: str, messages: List[Message], params: Dict[str, Any] = None
|
|
61
|
-
) -> RunShieldResponse:
|
|
62
|
-
shield_def = await self.shield_store.get_shield(shield_type)
|
|
63
|
-
if not shield_def:
|
|
64
|
-
raise ValueError(f"Unknown shield {shield_type}")
|
|
65
|
-
|
|
66
|
-
"""This is the implementation for the bedrock guardrails. The input to the guardrails is to be of this format
|
|
67
|
-
```content = [
|
|
68
|
-
{
|
|
69
|
-
"text": {
|
|
70
|
-
"text": "Is the AB503 Product a better investment than the S&P 500?"
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
]```
|
|
74
|
-
However the incoming messages are of this type UserMessage(content=....) coming from
|
|
75
|
-
https://github.com/meta-llama/llama-models/blob/main/models/llama3/api/datatypes.py
|
|
76
|
-
|
|
77
|
-
They contain content, role . For now we will extract the content and default the "qualifiers": ["query"]
|
|
78
|
-
"""
|
|
79
|
-
|
|
80
|
-
shield_params = shield_def.params
|
|
81
|
-
logger.debug(f"run_shield::{shield_params}::messages={messages}")
|
|
82
|
-
|
|
83
|
-
# - convert the messages into format Bedrock expects
|
|
84
|
-
content_messages = []
|
|
85
|
-
for message in messages:
|
|
86
|
-
content_messages.append({"text": {"text": message.content}})
|
|
87
|
-
logger.debug(
|
|
88
|
-
f"run_shield::final:messages::{json.dumps(content_messages, indent=2)}:"
|
|
89
|
-
)
|
|
90
|
-
|
|
91
|
-
response = self.boto_client.apply_guardrail(
|
|
92
|
-
guardrailIdentifier=shield_params["guardrailIdentifier"],
|
|
93
|
-
guardrailVersion=shield_params["guardrailVersion"],
|
|
94
|
-
source="OUTPUT", # or 'INPUT' depending on your use case
|
|
95
|
-
content=content_messages,
|
|
96
|
-
)
|
|
97
|
-
if response["action"] == "GUARDRAIL_INTERVENED":
|
|
98
|
-
user_message = ""
|
|
99
|
-
metadata = {}
|
|
100
|
-
for output in response["outputs"]:
|
|
101
|
-
# guardrails returns a list - however for this implementation we will leverage the last values
|
|
102
|
-
user_message = output["text"]
|
|
103
|
-
for assessment in response["assessments"]:
|
|
104
|
-
# guardrails returns a list - however for this implementation we will leverage the last values
|
|
105
|
-
metadata = dict(assessment)
|
|
106
|
-
|
|
107
|
-
return SafetyViolation(
|
|
108
|
-
user_message=user_message,
|
|
109
|
-
violation_level=ViolationLevel.ERROR,
|
|
110
|
-
metadata=metadata,
|
|
111
|
-
)
|
|
112
|
-
|
|
113
|
-
return None
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
-
# All rights reserved.
|
|
3
|
-
#
|
|
4
|
-
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
-
# the root directory of this source tree.
|
|
6
|
-
|
|
7
|
-
from pydantic import BaseModel, Field
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class BedrockSafetyConfig(BaseModel):
|
|
11
|
-
"""Configuration information for a guardrail that you want to use in the request."""
|
|
12
|
-
|
|
13
|
-
aws_profile: str = Field(
|
|
14
|
-
default="default",
|
|
15
|
-
description="The profile on the machine having valid aws credentials. This will ensure separation of creation to invocation",
|
|
16
|
-
)
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
-
# All rights reserved.
|
|
3
|
-
#
|
|
4
|
-
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
-
# the root directory of this source tree.
|
|
6
|
-
|
|
7
|
-
from .config import SampleConfig
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
from llama_stack.apis.safety import * # noqa: F403
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class SampleSafetyImpl(Safety):
|
|
14
|
-
def __init__(self, config: SampleConfig):
|
|
15
|
-
self.config = config
|
|
16
|
-
|
|
17
|
-
async def register_shield(self, shield: ShieldDef) -> None:
|
|
18
|
-
# these are the safety shields the Llama Stack will use to route requests to this provider
|
|
19
|
-
# perform validation here if necessary
|
|
20
|
-
pass
|
|
21
|
-
|
|
22
|
-
async def initialize(self):
|
|
23
|
-
pass
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
-
# All rights reserved.
|
|
3
|
-
#
|
|
4
|
-
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
-
# the root directory of this source tree.
|
|
6
|
-
|
|
7
|
-
from .config import TogetherProviderDataValidator, TogetherSafetyConfig # noqa: F401
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
async def get_adapter_impl(config: TogetherSafetyConfig, _deps):
|
|
11
|
-
from .together import TogetherSafetyImpl
|
|
12
|
-
|
|
13
|
-
assert isinstance(
|
|
14
|
-
config, TogetherSafetyConfig
|
|
15
|
-
), f"Unexpected config type: {type(config)}"
|
|
16
|
-
impl = TogetherSafetyImpl(config)
|
|
17
|
-
await impl.initialize()
|
|
18
|
-
return impl
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
-
# All rights reserved.
|
|
3
|
-
#
|
|
4
|
-
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
-
# the root directory of this source tree.
|
|
6
|
-
|
|
7
|
-
from typing import Optional
|
|
8
|
-
|
|
9
|
-
from llama_models.schema_utils import json_schema_type
|
|
10
|
-
from pydantic import BaseModel, Field
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class TogetherProviderDataValidator(BaseModel):
|
|
14
|
-
together_api_key: str
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
@json_schema_type
|
|
18
|
-
class TogetherSafetyConfig(BaseModel):
|
|
19
|
-
url: str = Field(
|
|
20
|
-
default="https://api.together.xyz/v1",
|
|
21
|
-
description="The URL for the Together AI server",
|
|
22
|
-
)
|
|
23
|
-
api_key: Optional[str] = Field(
|
|
24
|
-
default=None,
|
|
25
|
-
description="The Together AI API Key (default for the distribution, if any)",
|
|
26
|
-
)
|