llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +5 -0
- llama_stack/apis/agents/__init__.py +1 -1
- llama_stack/apis/agents/agents.py +700 -281
- llama_stack/apis/agents/openai_responses.py +1311 -0
- llama_stack/{providers/adapters/memory/sample/config.py → apis/batches/__init__.py} +2 -5
- llama_stack/apis/batches/batches.py +100 -0
- llama_stack/apis/benchmarks/__init__.py +7 -0
- llama_stack/apis/benchmarks/benchmarks.py +108 -0
- llama_stack/apis/common/content_types.py +143 -0
- llama_stack/apis/common/errors.py +103 -0
- llama_stack/apis/common/job_types.py +38 -0
- llama_stack/apis/common/responses.py +36 -0
- llama_stack/apis/common/training_types.py +36 -5
- llama_stack/apis/common/type_system.py +158 -0
- llama_stack/apis/conversations/__init__.py +31 -0
- llama_stack/apis/conversations/conversations.py +286 -0
- llama_stack/apis/datasetio/__init__.py +7 -0
- llama_stack/apis/datasetio/datasetio.py +59 -0
- llama_stack/apis/datasets/__init__.py +7 -0
- llama_stack/apis/datasets/datasets.py +251 -0
- llama_stack/apis/datatypes.py +160 -0
- llama_stack/apis/eval/__init__.py +7 -0
- llama_stack/apis/eval/eval.py +169 -0
- llama_stack/apis/files/__init__.py +7 -0
- llama_stack/apis/files/files.py +199 -0
- llama_stack/apis/inference/__init__.py +1 -1
- llama_stack/apis/inference/inference.py +1169 -113
- llama_stack/apis/inspect/__init__.py +1 -1
- llama_stack/apis/inspect/inspect.py +69 -16
- llama_stack/apis/models/__init__.py +1 -1
- llama_stack/apis/models/models.py +148 -21
- llama_stack/apis/post_training/__init__.py +1 -1
- llama_stack/apis/post_training/post_training.py +265 -120
- llama_stack/{providers/adapters/agents/sample/config.py → apis/prompts/__init__.py} +2 -5
- llama_stack/apis/prompts/prompts.py +204 -0
- llama_stack/apis/providers/__init__.py +7 -0
- llama_stack/apis/providers/providers.py +69 -0
- llama_stack/apis/resource.py +37 -0
- llama_stack/apis/safety/__init__.py +1 -1
- llama_stack/apis/safety/safety.py +95 -12
- llama_stack/apis/scoring/__init__.py +7 -0
- llama_stack/apis/scoring/scoring.py +93 -0
- llama_stack/apis/scoring_functions/__init__.py +7 -0
- llama_stack/apis/scoring_functions/scoring_functions.py +208 -0
- llama_stack/apis/shields/__init__.py +1 -1
- llama_stack/apis/shields/shields.py +76 -33
- llama_stack/apis/synthetic_data_generation/__init__.py +1 -1
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +40 -17
- llama_stack/apis/telemetry/__init__.py +1 -1
- llama_stack/apis/telemetry/telemetry.py +322 -31
- llama_stack/apis/{dataset → tools}/__init__.py +2 -1
- llama_stack/apis/tools/rag_tool.py +218 -0
- llama_stack/apis/tools/tools.py +221 -0
- llama_stack/apis/vector_io/__init__.py +7 -0
- llama_stack/apis/vector_io/vector_io.py +960 -0
- llama_stack/apis/vector_stores/__init__.py +7 -0
- llama_stack/apis/vector_stores/vector_stores.py +51 -0
- llama_stack/apis/version.py +9 -0
- llama_stack/cli/llama.py +13 -5
- llama_stack/cli/stack/_list_deps.py +182 -0
- llama_stack/cli/stack/list_apis.py +1 -1
- llama_stack/cli/stack/list_deps.py +55 -0
- llama_stack/cli/stack/list_providers.py +24 -10
- llama_stack/cli/stack/list_stacks.py +56 -0
- llama_stack/cli/stack/remove.py +115 -0
- llama_stack/cli/stack/run.py +169 -56
- llama_stack/cli/stack/stack.py +18 -4
- llama_stack/cli/stack/utils.py +151 -0
- llama_stack/cli/table.py +23 -61
- llama_stack/cli/utils.py +29 -0
- llama_stack/core/access_control/access_control.py +131 -0
- llama_stack/core/access_control/conditions.py +129 -0
- llama_stack/core/access_control/datatypes.py +107 -0
- llama_stack/core/build.py +164 -0
- llama_stack/core/client.py +205 -0
- llama_stack/core/common.sh +37 -0
- llama_stack/{distribution → core}/configure.py +74 -55
- llama_stack/core/conversations/conversations.py +309 -0
- llama_stack/core/datatypes.py +625 -0
- llama_stack/core/distribution.py +276 -0
- llama_stack/core/external.py +54 -0
- llama_stack/core/id_generation.py +42 -0
- llama_stack/core/inspect.py +86 -0
- llama_stack/core/library_client.py +539 -0
- llama_stack/core/prompts/prompts.py +234 -0
- llama_stack/core/providers.py +137 -0
- llama_stack/core/request_headers.py +115 -0
- llama_stack/core/resolver.py +506 -0
- llama_stack/core/routers/__init__.py +101 -0
- llama_stack/core/routers/datasets.py +73 -0
- llama_stack/core/routers/eval_scoring.py +155 -0
- llama_stack/core/routers/inference.py +645 -0
- llama_stack/core/routers/safety.py +85 -0
- llama_stack/core/routers/tool_runtime.py +91 -0
- llama_stack/core/routers/vector_io.py +442 -0
- llama_stack/core/routing_tables/benchmarks.py +62 -0
- llama_stack/core/routing_tables/common.py +254 -0
- llama_stack/core/routing_tables/datasets.py +91 -0
- llama_stack/core/routing_tables/models.py +163 -0
- llama_stack/core/routing_tables/scoring_functions.py +66 -0
- llama_stack/core/routing_tables/shields.py +61 -0
- llama_stack/core/routing_tables/toolgroups.py +129 -0
- llama_stack/core/routing_tables/vector_stores.py +292 -0
- llama_stack/core/server/auth.py +187 -0
- llama_stack/core/server/auth_providers.py +494 -0
- llama_stack/core/server/quota.py +110 -0
- llama_stack/core/server/routes.py +141 -0
- llama_stack/core/server/server.py +542 -0
- llama_stack/core/server/tracing.py +80 -0
- llama_stack/core/stack.py +546 -0
- llama_stack/core/start_stack.sh +117 -0
- llama_stack/core/storage/datatypes.py +283 -0
- llama_stack/{cli/model → core/store}/__init__.py +1 -1
- llama_stack/core/store/registry.py +199 -0
- llama_stack/core/testing_context.py +49 -0
- llama_stack/core/ui/app.py +55 -0
- llama_stack/core/ui/modules/api.py +32 -0
- llama_stack/core/ui/modules/utils.py +42 -0
- llama_stack/core/ui/page/distribution/datasets.py +18 -0
- llama_stack/core/ui/page/distribution/eval_tasks.py +20 -0
- llama_stack/core/ui/page/distribution/models.py +18 -0
- llama_stack/core/ui/page/distribution/providers.py +27 -0
- llama_stack/core/ui/page/distribution/resources.py +48 -0
- llama_stack/core/ui/page/distribution/scoring_functions.py +18 -0
- llama_stack/core/ui/page/distribution/shields.py +19 -0
- llama_stack/core/ui/page/evaluations/app_eval.py +143 -0
- llama_stack/core/ui/page/evaluations/native_eval.py +253 -0
- llama_stack/core/ui/page/playground/chat.py +130 -0
- llama_stack/core/ui/page/playground/tools.py +352 -0
- llama_stack/core/utils/config.py +30 -0
- llama_stack/{distribution → core}/utils/config_dirs.py +3 -6
- llama_stack/core/utils/config_resolution.py +125 -0
- llama_stack/core/utils/context.py +84 -0
- llama_stack/core/utils/exec.py +96 -0
- llama_stack/{providers/impls/meta_reference/codeshield/config.py → core/utils/image_types.py} +4 -3
- llama_stack/{distribution → core}/utils/model_utils.py +2 -2
- llama_stack/{distribution → core}/utils/prompt_for_config.py +30 -63
- llama_stack/{apis/batch_inference → distributions/dell}/__init__.py +1 -1
- llama_stack/distributions/dell/build.yaml +33 -0
- llama_stack/distributions/dell/dell.py +158 -0
- llama_stack/distributions/dell/run-with-safety.yaml +141 -0
- llama_stack/distributions/dell/run.yaml +132 -0
- llama_stack/distributions/meta-reference-gpu/__init__.py +7 -0
- llama_stack/distributions/meta-reference-gpu/build.yaml +32 -0
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +163 -0
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +154 -0
- llama_stack/distributions/meta-reference-gpu/run.yaml +139 -0
- llama_stack/{apis/evals → distributions/nvidia}/__init__.py +1 -1
- llama_stack/distributions/nvidia/build.yaml +29 -0
- llama_stack/distributions/nvidia/nvidia.py +154 -0
- llama_stack/distributions/nvidia/run-with-safety.yaml +137 -0
- llama_stack/distributions/nvidia/run.yaml +116 -0
- llama_stack/distributions/open-benchmark/__init__.py +7 -0
- llama_stack/distributions/open-benchmark/build.yaml +36 -0
- llama_stack/distributions/open-benchmark/open_benchmark.py +303 -0
- llama_stack/distributions/open-benchmark/run.yaml +252 -0
- llama_stack/distributions/postgres-demo/__init__.py +7 -0
- llama_stack/distributions/postgres-demo/build.yaml +23 -0
- llama_stack/distributions/postgres-demo/postgres_demo.py +125 -0
- llama_stack/distributions/postgres-demo/run.yaml +115 -0
- llama_stack/{apis/memory → distributions/starter}/__init__.py +1 -1
- llama_stack/distributions/starter/build.yaml +61 -0
- llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
- llama_stack/distributions/starter/run.yaml +276 -0
- llama_stack/distributions/starter/starter.py +345 -0
- llama_stack/distributions/starter-gpu/__init__.py +7 -0
- llama_stack/distributions/starter-gpu/build.yaml +61 -0
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
- llama_stack/distributions/starter-gpu/run.yaml +279 -0
- llama_stack/distributions/starter-gpu/starter_gpu.py +20 -0
- llama_stack/distributions/template.py +456 -0
- llama_stack/distributions/watsonx/__init__.py +7 -0
- llama_stack/distributions/watsonx/build.yaml +33 -0
- llama_stack/distributions/watsonx/run.yaml +133 -0
- llama_stack/distributions/watsonx/watsonx.py +95 -0
- llama_stack/env.py +24 -0
- llama_stack/log.py +314 -0
- llama_stack/models/llama/checkpoint.py +164 -0
- llama_stack/models/llama/datatypes.py +164 -0
- llama_stack/models/llama/hadamard_utils.py +86 -0
- llama_stack/models/llama/llama3/args.py +74 -0
- llama_stack/models/llama/llama3/chat_format.py +286 -0
- llama_stack/models/llama/llama3/generation.py +376 -0
- llama_stack/models/llama/llama3/interface.py +255 -0
- llama_stack/models/llama/llama3/model.py +304 -0
- llama_stack/models/llama/llama3/multimodal/__init__.py +12 -0
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +180 -0
- llama_stack/models/llama/llama3/multimodal/image_transform.py +409 -0
- llama_stack/models/llama/llama3/multimodal/model.py +1430 -0
- llama_stack/models/llama/llama3/multimodal/utils.py +26 -0
- llama_stack/models/llama/llama3/prompt_templates/__init__.py +22 -0
- llama_stack/models/llama/llama3/prompt_templates/base.py +39 -0
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +319 -0
- llama_stack/models/llama/llama3/prompt_templates/tool_response.py +62 -0
- llama_stack/models/llama/llama3/quantization/loader.py +316 -0
- llama_stack/models/llama/llama3/template_data.py +116 -0
- llama_stack/models/llama/llama3/tokenizer.model +128000 -0
- llama_stack/models/llama/llama3/tokenizer.py +198 -0
- llama_stack/models/llama/llama3/tool_utils.py +266 -0
- llama_stack/models/llama/llama3_1/__init__.py +12 -0
- llama_stack/models/llama/llama3_1/prompt_format.md +358 -0
- llama_stack/models/llama/llama3_1/prompts.py +258 -0
- llama_stack/models/llama/llama3_2/prompts_text.py +229 -0
- llama_stack/models/llama/llama3_2/prompts_vision.py +126 -0
- llama_stack/models/llama/llama3_2/text_prompt_format.md +286 -0
- llama_stack/models/llama/llama3_2/vision_prompt_format.md +141 -0
- llama_stack/models/llama/llama3_3/prompts.py +259 -0
- llama_stack/models/llama/llama4/args.py +107 -0
- llama_stack/models/llama/llama4/chat_format.py +317 -0
- llama_stack/models/llama/llama4/datatypes.py +56 -0
- llama_stack/models/llama/llama4/ffn.py +58 -0
- llama_stack/models/llama/llama4/generation.py +313 -0
- llama_stack/models/llama/llama4/model.py +437 -0
- llama_stack/models/llama/llama4/moe.py +214 -0
- llama_stack/models/llama/llama4/preprocess.py +435 -0
- llama_stack/models/llama/llama4/prompt_format.md +304 -0
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +136 -0
- llama_stack/models/llama/llama4/prompts.py +279 -0
- llama_stack/models/llama/llama4/quantization/__init__.py +5 -0
- llama_stack/models/llama/llama4/quantization/loader.py +226 -0
- llama_stack/models/llama/llama4/tokenizer.model +200000 -0
- llama_stack/models/llama/llama4/tokenizer.py +263 -0
- llama_stack/models/llama/llama4/vision/__init__.py +5 -0
- llama_stack/models/llama/llama4/vision/embedding.py +210 -0
- llama_stack/models/llama/llama4/vision/encoder.py +412 -0
- llama_stack/models/llama/prompt_format.py +191 -0
- llama_stack/models/llama/quantize_impls.py +316 -0
- llama_stack/models/llama/sku_list.py +1029 -0
- llama_stack/models/llama/sku_types.py +233 -0
- llama_stack/models/llama/tokenizer_utils.py +40 -0
- llama_stack/providers/datatypes.py +136 -107
- llama_stack/providers/inline/__init__.py +5 -0
- llama_stack/providers/inline/agents/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/agents/meta_reference}/__init__.py +12 -5
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +1024 -0
- llama_stack/providers/inline/agents/meta_reference/agents.py +383 -0
- llama_stack/providers/inline/agents/meta_reference/config.py +37 -0
- llama_stack/providers/inline/agents/meta_reference/persistence.py +228 -0
- llama_stack/providers/inline/agents/meta_reference/responses/__init__.py +5 -0
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +423 -0
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +1226 -0
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +449 -0
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +194 -0
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +365 -0
- llama_stack/providers/inline/agents/meta_reference/safety.py +52 -0
- llama_stack/providers/inline/batches/__init__.py +5 -0
- llama_stack/providers/inline/batches/reference/__init__.py +36 -0
- llama_stack/providers/inline/batches/reference/batches.py +679 -0
- llama_stack/providers/inline/batches/reference/config.py +40 -0
- llama_stack/providers/inline/datasetio/__init__.py +5 -0
- llama_stack/providers/inline/datasetio/localfs/__init__.py +20 -0
- llama_stack/providers/inline/datasetio/localfs/config.py +23 -0
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +113 -0
- llama_stack/providers/inline/eval/__init__.py +5 -0
- llama_stack/providers/inline/eval/meta_reference/__init__.py +28 -0
- llama_stack/providers/inline/eval/meta_reference/config.py +23 -0
- llama_stack/providers/inline/eval/meta_reference/eval.py +259 -0
- llama_stack/providers/inline/files/localfs/__init__.py +20 -0
- llama_stack/providers/inline/files/localfs/config.py +31 -0
- llama_stack/providers/inline/files/localfs/files.py +219 -0
- llama_stack/providers/inline/inference/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/__init__.py +4 -4
- llama_stack/providers/inline/inference/meta_reference/common.py +24 -0
- llama_stack/providers/inline/inference/meta_reference/config.py +68 -0
- llama_stack/providers/inline/inference/meta_reference/generators.py +211 -0
- llama_stack/providers/inline/inference/meta_reference/inference.py +158 -0
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +96 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/parallel_utils.py +56 -73
- llama_stack/providers/inline/inference/sentence_transformers/__init__.py +22 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/inference/sentence_transformers}/config.py +6 -4
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +83 -0
- llama_stack/providers/inline/post_training/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/utils.py +35 -0
- llama_stack/providers/inline/post_training/common/validator.py +36 -0
- llama_stack/providers/inline/post_training/huggingface/__init__.py +27 -0
- llama_stack/providers/inline/post_training/huggingface/config.py +83 -0
- llama_stack/providers/inline/post_training/huggingface/post_training.py +208 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +519 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +485 -0
- llama_stack/providers/inline/post_training/huggingface/utils.py +269 -0
- llama_stack/providers/inline/post_training/torchtune/__init__.py +27 -0
- llama_stack/providers/inline/post_training/torchtune/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +240 -0
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +99 -0
- llama_stack/providers/inline/post_training/torchtune/config.py +20 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +57 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/sft.py +78 -0
- llama_stack/providers/inline/post_training/torchtune/post_training.py +178 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +588 -0
- llama_stack/providers/inline/safety/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/codeshield → inline/safety/code_scanner}/__init__.py +4 -2
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +128 -0
- llama_stack/providers/{impls/meta_reference/memory → inline/safety/code_scanner}/config.py +5 -3
- llama_stack/providers/inline/safety/llama_guard/__init__.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/config.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +489 -0
- llama_stack/providers/{adapters/memory/sample → inline/safety/prompt_guard}/__init__.py +4 -4
- llama_stack/providers/inline/safety/prompt_guard/config.py +32 -0
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +131 -0
- llama_stack/providers/inline/scoring/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/__init__.py +25 -0
- llama_stack/providers/{adapters/memory/weaviate → inline/scoring/basic}/config.py +5 -7
- llama_stack/providers/inline/scoring/basic/scoring.py +126 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +240 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +41 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +23 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +27 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +71 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +80 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +66 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +58 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +38 -0
- llama_stack/providers/inline/scoring/basic/utils/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py +3319 -0
- llama_stack/providers/inline/scoring/basic/utils/math_utils.py +330 -0
- llama_stack/providers/inline/scoring/braintrust/__init__.py +27 -0
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +230 -0
- llama_stack/providers/inline/scoring/braintrust/config.py +21 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +23 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +24 -0
- llama_stack/providers/inline/scoring/llm_as_judge/__init__.py +21 -0
- llama_stack/providers/inline/scoring/llm_as_judge/config.py +14 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +113 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +96 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +20 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +81 -0
- llama_stack/providers/inline/telemetry/__init__.py +5 -0
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +21 -0
- llama_stack/providers/inline/telemetry/meta_reference/config.py +47 -0
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +252 -0
- llama_stack/providers/inline/tool_runtime/__init__.py +5 -0
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +19 -0
- llama_stack/providers/{impls/meta_reference/telemetry → inline/tool_runtime/rag}/config.py +5 -3
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +77 -0
- llama_stack/providers/inline/tool_runtime/rag/memory.py +332 -0
- llama_stack/providers/inline/vector_io/__init__.py +5 -0
- llama_stack/providers/inline/vector_io/chroma/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/chroma/config.py +30 -0
- llama_stack/providers/inline/vector_io/faiss/__init__.py +21 -0
- llama_stack/providers/inline/vector_io/faiss/config.py +26 -0
- llama_stack/providers/inline/vector_io/faiss/faiss.py +293 -0
- llama_stack/providers/inline/vector_io/milvus/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/milvus/config.py +29 -0
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/qdrant/config.py +29 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/config.py +26 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +483 -0
- llama_stack/providers/registry/agents.py +16 -18
- llama_stack/providers/registry/batches.py +26 -0
- llama_stack/providers/registry/datasetio.py +49 -0
- llama_stack/providers/registry/eval.py +46 -0
- llama_stack/providers/registry/files.py +31 -0
- llama_stack/providers/registry/inference.py +273 -118
- llama_stack/providers/registry/post_training.py +69 -0
- llama_stack/providers/registry/safety.py +46 -41
- llama_stack/providers/registry/scoring.py +51 -0
- llama_stack/providers/registry/tool_runtime.py +87 -0
- llama_stack/providers/registry/vector_io.py +828 -0
- llama_stack/providers/remote/__init__.py +5 -0
- llama_stack/providers/remote/agents/__init__.py +5 -0
- llama_stack/providers/remote/datasetio/__init__.py +5 -0
- llama_stack/providers/{adapters/memory/chroma → remote/datasetio/huggingface}/__init__.py +7 -4
- llama_stack/providers/remote/datasetio/huggingface/config.py +23 -0
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +99 -0
- llama_stack/providers/remote/datasetio/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/datasetio/nvidia/config.py +61 -0
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +116 -0
- llama_stack/providers/remote/eval/__init__.py +5 -0
- llama_stack/providers/remote/eval/nvidia/__init__.py +31 -0
- llama_stack/providers/remote/eval/nvidia/config.py +29 -0
- llama_stack/providers/remote/eval/nvidia/eval.py +162 -0
- llama_stack/providers/remote/files/s3/__init__.py +19 -0
- llama_stack/providers/remote/files/s3/config.py +42 -0
- llama_stack/providers/remote/files/s3/files.py +313 -0
- llama_stack/providers/remote/inference/__init__.py +5 -0
- llama_stack/providers/{adapters/safety/sample → remote/inference/anthropic}/__init__.py +4 -6
- llama_stack/providers/remote/inference/anthropic/anthropic.py +36 -0
- llama_stack/providers/remote/inference/anthropic/config.py +28 -0
- llama_stack/providers/{impls/meta_reference/telemetry → remote/inference/azure}/__init__.py +4 -4
- llama_stack/providers/remote/inference/azure/azure.py +25 -0
- llama_stack/providers/remote/inference/azure/config.py +61 -0
- llama_stack/providers/{adapters → remote}/inference/bedrock/__init__.py +18 -17
- llama_stack/providers/remote/inference/bedrock/bedrock.py +142 -0
- llama_stack/providers/{adapters/inference/sample → remote/inference/bedrock}/config.py +3 -4
- llama_stack/providers/remote/inference/bedrock/models.py +29 -0
- llama_stack/providers/remote/inference/cerebras/__init__.py +19 -0
- llama_stack/providers/remote/inference/cerebras/cerebras.py +28 -0
- llama_stack/providers/remote/inference/cerebras/config.py +30 -0
- llama_stack/providers/{adapters → remote}/inference/databricks/__init__.py +4 -5
- llama_stack/providers/remote/inference/databricks/config.py +37 -0
- llama_stack/providers/remote/inference/databricks/databricks.py +44 -0
- llama_stack/providers/{adapters → remote}/inference/fireworks/__init__.py +8 -4
- llama_stack/providers/remote/inference/fireworks/config.py +27 -0
- llama_stack/providers/remote/inference/fireworks/fireworks.py +27 -0
- llama_stack/providers/{adapters/memory/pgvector → remote/inference/gemini}/__init__.py +4 -4
- llama_stack/providers/remote/inference/gemini/config.py +28 -0
- llama_stack/providers/remote/inference/gemini/gemini.py +82 -0
- llama_stack/providers/remote/inference/groq/__init__.py +15 -0
- llama_stack/providers/remote/inference/groq/config.py +34 -0
- llama_stack/providers/remote/inference/groq/groq.py +18 -0
- llama_stack/providers/remote/inference/llama_openai_compat/__init__.py +15 -0
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +34 -0
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +46 -0
- llama_stack/providers/remote/inference/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/inference/nvidia/config.py +64 -0
- llama_stack/providers/remote/inference/nvidia/nvidia.py +61 -0
- llama_stack/providers/{adapters/safety/sample/config.py → remote/inference/nvidia/utils.py} +3 -4
- llama_stack/providers/{impls/vllm → remote/inference/ollama}/__init__.py +4 -6
- llama_stack/providers/remote/inference/ollama/config.py +25 -0
- llama_stack/providers/remote/inference/ollama/ollama.py +102 -0
- llama_stack/providers/{adapters/telemetry/opentelemetry → remote/inference/openai}/__init__.py +4 -4
- llama_stack/providers/remote/inference/openai/config.py +39 -0
- llama_stack/providers/remote/inference/openai/openai.py +38 -0
- llama_stack/providers/remote/inference/passthrough/__init__.py +23 -0
- llama_stack/providers/remote/inference/passthrough/config.py +34 -0
- llama_stack/providers/remote/inference/passthrough/passthrough.py +122 -0
- llama_stack/providers/remote/inference/runpod/__init__.py +16 -0
- llama_stack/providers/remote/inference/runpod/config.py +32 -0
- llama_stack/providers/remote/inference/runpod/runpod.py +42 -0
- llama_stack/providers/remote/inference/sambanova/__init__.py +16 -0
- llama_stack/providers/remote/inference/sambanova/config.py +34 -0
- llama_stack/providers/remote/inference/sambanova/sambanova.py +28 -0
- llama_stack/providers/{adapters → remote}/inference/tgi/__init__.py +3 -4
- llama_stack/providers/remote/inference/tgi/config.py +76 -0
- llama_stack/providers/remote/inference/tgi/tgi.py +85 -0
- llama_stack/providers/{adapters → remote}/inference/together/__init__.py +8 -4
- llama_stack/providers/remote/inference/together/config.py +27 -0
- llama_stack/providers/remote/inference/together/together.py +102 -0
- llama_stack/providers/remote/inference/vertexai/__init__.py +15 -0
- llama_stack/providers/remote/inference/vertexai/config.py +48 -0
- llama_stack/providers/remote/inference/vertexai/vertexai.py +54 -0
- llama_stack/providers/remote/inference/vllm/__init__.py +22 -0
- llama_stack/providers/remote/inference/vllm/config.py +59 -0
- llama_stack/providers/remote/inference/vllm/vllm.py +111 -0
- llama_stack/providers/remote/inference/watsonx/__init__.py +15 -0
- llama_stack/providers/remote/inference/watsonx/config.py +45 -0
- llama_stack/providers/remote/inference/watsonx/watsonx.py +336 -0
- llama_stack/providers/remote/post_training/__init__.py +5 -0
- llama_stack/providers/remote/post_training/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/post_training/nvidia/config.py +113 -0
- llama_stack/providers/remote/post_training/nvidia/models.py +27 -0
- llama_stack/providers/remote/post_training/nvidia/post_training.py +430 -0
- llama_stack/providers/remote/post_training/nvidia/utils.py +63 -0
- llama_stack/providers/remote/safety/__init__.py +5 -0
- llama_stack/providers/remote/safety/bedrock/bedrock.py +111 -0
- llama_stack/providers/remote/safety/bedrock/config.py +14 -0
- llama_stack/providers/{adapters/inference/sample → remote/safety/nvidia}/__init__.py +5 -4
- llama_stack/providers/remote/safety/nvidia/config.py +40 -0
- llama_stack/providers/remote/safety/nvidia/nvidia.py +161 -0
- llama_stack/providers/{adapters/agents/sample → remote/safety/sambanova}/__init__.py +5 -4
- llama_stack/providers/remote/safety/sambanova/config.py +37 -0
- llama_stack/providers/remote/safety/sambanova/sambanova.py +98 -0
- llama_stack/providers/remote/tool_runtime/__init__.py +5 -0
- llama_stack/providers/remote/tool_runtime/bing_search/__init__.py +21 -0
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +112 -0
- llama_stack/providers/remote/tool_runtime/bing_search/config.py +22 -0
- llama_stack/providers/remote/tool_runtime/brave_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +148 -0
- llama_stack/providers/remote/tool_runtime/brave_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py +15 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +20 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +73 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +84 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py +22 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py +21 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +140 -0
- llama_stack/providers/remote/vector_io/__init__.py +5 -0
- llama_stack/providers/remote/vector_io/chroma/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/chroma/chroma.py +215 -0
- llama_stack/providers/remote/vector_io/chroma/config.py +28 -0
- llama_stack/providers/remote/vector_io/milvus/__init__.py +18 -0
- llama_stack/providers/remote/vector_io/milvus/config.py +35 -0
- llama_stack/providers/remote/vector_io/milvus/milvus.py +375 -0
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/pgvector/config.py +47 -0
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +460 -0
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/qdrant/config.py +37 -0
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +265 -0
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/weaviate/config.py +32 -0
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +393 -0
- llama_stack/providers/utils/bedrock/__init__.py +5 -0
- llama_stack/providers/utils/bedrock/client.py +74 -0
- llama_stack/providers/utils/bedrock/config.py +64 -0
- llama_stack/providers/utils/bedrock/refreshable_boto_session.py +112 -0
- llama_stack/providers/utils/common/__init__.py +5 -0
- llama_stack/providers/utils/common/data_schema_validator.py +103 -0
- llama_stack/providers/utils/datasetio/__init__.py +5 -0
- llama_stack/providers/utils/datasetio/url_utils.py +47 -0
- llama_stack/providers/utils/files/__init__.py +5 -0
- llama_stack/providers/utils/files/form_data.py +69 -0
- llama_stack/providers/utils/inference/__init__.py +8 -7
- llama_stack/providers/utils/inference/embedding_mixin.py +101 -0
- llama_stack/providers/utils/inference/inference_store.py +264 -0
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +336 -0
- llama_stack/providers/utils/inference/model_registry.py +173 -23
- llama_stack/providers/utils/inference/openai_compat.py +1261 -49
- llama_stack/providers/utils/inference/openai_mixin.py +506 -0
- llama_stack/providers/utils/inference/prompt_adapter.py +365 -67
- llama_stack/providers/utils/kvstore/api.py +6 -6
- llama_stack/providers/utils/kvstore/config.py +28 -48
- llama_stack/providers/utils/kvstore/kvstore.py +61 -15
- llama_stack/providers/utils/kvstore/mongodb/__init__.py +9 -0
- llama_stack/providers/utils/kvstore/mongodb/mongodb.py +82 -0
- llama_stack/providers/utils/kvstore/postgres/__init__.py +7 -0
- llama_stack/providers/utils/kvstore/postgres/postgres.py +114 -0
- llama_stack/providers/utils/kvstore/redis/redis.py +33 -9
- llama_stack/providers/utils/kvstore/sqlite/config.py +2 -1
- llama_stack/providers/utils/kvstore/sqlite/sqlite.py +123 -22
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +1304 -0
- llama_stack/providers/utils/memory/vector_store.py +220 -82
- llama_stack/providers/utils/pagination.py +43 -0
- llama_stack/providers/utils/responses/__init__.py +5 -0
- llama_stack/providers/utils/responses/responses_store.py +292 -0
- llama_stack/providers/utils/scheduler.py +270 -0
- llama_stack/providers/utils/scoring/__init__.py +5 -0
- llama_stack/providers/utils/scoring/aggregation_utils.py +75 -0
- llama_stack/providers/utils/scoring/base_scoring_fn.py +114 -0
- llama_stack/providers/utils/scoring/basic_scoring_utils.py +26 -0
- llama_stack/providers/utils/sqlstore/__init__.py +5 -0
- llama_stack/providers/utils/sqlstore/api.py +128 -0
- llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +319 -0
- llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +343 -0
- llama_stack/providers/utils/sqlstore/sqlstore.py +70 -0
- llama_stack/providers/utils/telemetry/trace_protocol.py +142 -0
- llama_stack/providers/utils/telemetry/tracing.py +192 -53
- llama_stack/providers/utils/tools/__init__.py +5 -0
- llama_stack/providers/utils/tools/mcp.py +148 -0
- llama_stack/providers/utils/tools/ttl_dict.py +70 -0
- llama_stack/providers/utils/vector_io/__init__.py +5 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +156 -0
- llama_stack/schema_utils.py +118 -0
- llama_stack/strong_typing/__init__.py +19 -0
- llama_stack/strong_typing/auxiliary.py +228 -0
- llama_stack/strong_typing/classdef.py +440 -0
- llama_stack/strong_typing/core.py +46 -0
- llama_stack/strong_typing/deserializer.py +877 -0
- llama_stack/strong_typing/docstring.py +409 -0
- llama_stack/strong_typing/exception.py +23 -0
- llama_stack/strong_typing/inspection.py +1085 -0
- llama_stack/strong_typing/mapping.py +40 -0
- llama_stack/strong_typing/name.py +182 -0
- llama_stack/strong_typing/py.typed +0 -0
- llama_stack/strong_typing/schema.py +792 -0
- llama_stack/strong_typing/serialization.py +97 -0
- llama_stack/strong_typing/serializer.py +500 -0
- llama_stack/strong_typing/slots.py +27 -0
- llama_stack/strong_typing/topological.py +89 -0
- llama_stack/testing/__init__.py +5 -0
- llama_stack/testing/api_recorder.py +956 -0
- llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
- llama_stack-0.3.4.dist-info/METADATA +261 -0
- llama_stack-0.3.4.dist-info/RECORD +625 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/WHEEL +1 -1
- llama_stack/apis/agents/client.py +0 -292
- llama_stack/apis/agents/event_logger.py +0 -184
- llama_stack/apis/batch_inference/batch_inference.py +0 -72
- llama_stack/apis/common/deployment_types.py +0 -31
- llama_stack/apis/dataset/dataset.py +0 -63
- llama_stack/apis/evals/evals.py +0 -122
- llama_stack/apis/inference/client.py +0 -197
- llama_stack/apis/inspect/client.py +0 -82
- llama_stack/apis/memory/client.py +0 -155
- llama_stack/apis/memory/memory.py +0 -65
- llama_stack/apis/memory_banks/__init__.py +0 -7
- llama_stack/apis/memory_banks/client.py +0 -101
- llama_stack/apis/memory_banks/memory_banks.py +0 -78
- llama_stack/apis/models/client.py +0 -83
- llama_stack/apis/reward_scoring/__init__.py +0 -7
- llama_stack/apis/reward_scoring/reward_scoring.py +0 -55
- llama_stack/apis/safety/client.py +0 -105
- llama_stack/apis/shields/client.py +0 -79
- llama_stack/cli/download.py +0 -340
- llama_stack/cli/model/describe.py +0 -82
- llama_stack/cli/model/download.py +0 -24
- llama_stack/cli/model/list.py +0 -62
- llama_stack/cli/model/model.py +0 -34
- llama_stack/cli/model/prompt_format.py +0 -112
- llama_stack/cli/model/safety_models.py +0 -52
- llama_stack/cli/stack/build.py +0 -299
- llama_stack/cli/stack/configure.py +0 -178
- llama_stack/distribution/build.py +0 -123
- llama_stack/distribution/build_conda_env.sh +0 -136
- llama_stack/distribution/build_container.sh +0 -142
- llama_stack/distribution/common.sh +0 -40
- llama_stack/distribution/configure_container.sh +0 -47
- llama_stack/distribution/datatypes.py +0 -139
- llama_stack/distribution/distribution.py +0 -58
- llama_stack/distribution/inspect.py +0 -67
- llama_stack/distribution/request_headers.py +0 -57
- llama_stack/distribution/resolver.py +0 -323
- llama_stack/distribution/routers/__init__.py +0 -48
- llama_stack/distribution/routers/routers.py +0 -158
- llama_stack/distribution/routers/routing_tables.py +0 -173
- llama_stack/distribution/server/endpoints.py +0 -48
- llama_stack/distribution/server/server.py +0 -343
- llama_stack/distribution/start_conda_env.sh +0 -42
- llama_stack/distribution/start_container.sh +0 -64
- llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml +0 -10
- llama_stack/distribution/templates/local-build.yaml +0 -10
- llama_stack/distribution/templates/local-databricks-build.yaml +0 -10
- llama_stack/distribution/templates/local-fireworks-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-endpoint-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-serverless-build.yaml +0 -10
- llama_stack/distribution/templates/local-ollama-build.yaml +0 -10
- llama_stack/distribution/templates/local-tgi-build.yaml +0 -10
- llama_stack/distribution/templates/local-together-build.yaml +0 -10
- llama_stack/distribution/templates/local-vllm-build.yaml +0 -10
- llama_stack/distribution/utils/exec.py +0 -105
- llama_stack/providers/adapters/agents/sample/sample.py +0 -18
- llama_stack/providers/adapters/inference/bedrock/bedrock.py +0 -451
- llama_stack/providers/adapters/inference/bedrock/config.py +0 -55
- llama_stack/providers/adapters/inference/databricks/config.py +0 -21
- llama_stack/providers/adapters/inference/databricks/databricks.py +0 -125
- llama_stack/providers/adapters/inference/fireworks/config.py +0 -20
- llama_stack/providers/adapters/inference/fireworks/fireworks.py +0 -130
- llama_stack/providers/adapters/inference/ollama/__init__.py +0 -19
- llama_stack/providers/adapters/inference/ollama/ollama.py +0 -175
- llama_stack/providers/adapters/inference/sample/sample.py +0 -23
- llama_stack/providers/adapters/inference/tgi/config.py +0 -43
- llama_stack/providers/adapters/inference/tgi/tgi.py +0 -200
- llama_stack/providers/adapters/inference/together/config.py +0 -22
- llama_stack/providers/adapters/inference/together/together.py +0 -143
- llama_stack/providers/adapters/memory/chroma/chroma.py +0 -157
- llama_stack/providers/adapters/memory/pgvector/config.py +0 -17
- llama_stack/providers/adapters/memory/pgvector/pgvector.py +0 -211
- llama_stack/providers/adapters/memory/sample/sample.py +0 -23
- llama_stack/providers/adapters/memory/weaviate/__init__.py +0 -15
- llama_stack/providers/adapters/memory/weaviate/weaviate.py +0 -190
- llama_stack/providers/adapters/safety/bedrock/bedrock.py +0 -113
- llama_stack/providers/adapters/safety/bedrock/config.py +0 -16
- llama_stack/providers/adapters/safety/sample/sample.py +0 -23
- llama_stack/providers/adapters/safety/together/__init__.py +0 -18
- llama_stack/providers/adapters/safety/together/config.py +0 -26
- llama_stack/providers/adapters/safety/together/together.py +0 -101
- llama_stack/providers/adapters/telemetry/opentelemetry/config.py +0 -12
- llama_stack/providers/adapters/telemetry/opentelemetry/opentelemetry.py +0 -201
- llama_stack/providers/adapters/telemetry/sample/__init__.py +0 -17
- llama_stack/providers/adapters/telemetry/sample/config.py +0 -12
- llama_stack/providers/adapters/telemetry/sample/sample.py +0 -18
- llama_stack/providers/impls/meta_reference/agents/agent_instance.py +0 -844
- llama_stack/providers/impls/meta_reference/agents/agents.py +0 -161
- llama_stack/providers/impls/meta_reference/agents/persistence.py +0 -84
- llama_stack/providers/impls/meta_reference/agents/rag/context_retriever.py +0 -74
- llama_stack/providers/impls/meta_reference/agents/safety.py +0 -57
- llama_stack/providers/impls/meta_reference/agents/tests/code_execution.py +0 -93
- llama_stack/providers/impls/meta_reference/agents/tests/test_chat_agent.py +0 -305
- llama_stack/providers/impls/meta_reference/agents/tools/base.py +0 -20
- llama_stack/providers/impls/meta_reference/agents/tools/builtin.py +0 -375
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_env_prefix.py +0 -133
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_execution.py +0 -256
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py +0 -87
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/utils.py +0 -21
- llama_stack/providers/impls/meta_reference/agents/tools/safety.py +0 -43
- llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py +0 -58
- llama_stack/providers/impls/meta_reference/inference/config.py +0 -45
- llama_stack/providers/impls/meta_reference/inference/generation.py +0 -376
- llama_stack/providers/impls/meta_reference/inference/inference.py +0 -280
- llama_stack/providers/impls/meta_reference/inference/model_parallel.py +0 -99
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_impls.py +0 -184
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_txest_disabled.py +0 -76
- llama_stack/providers/impls/meta_reference/inference/quantization/loader.py +0 -97
- llama_stack/providers/impls/meta_reference/inference/quantization/scripts/quantize_checkpoint.py +0 -161
- llama_stack/providers/impls/meta_reference/memory/__init__.py +0 -19
- llama_stack/providers/impls/meta_reference/memory/faiss.py +0 -113
- llama_stack/providers/impls/meta_reference/safety/__init__.py +0 -17
- llama_stack/providers/impls/meta_reference/safety/base.py +0 -57
- llama_stack/providers/impls/meta_reference/safety/config.py +0 -48
- llama_stack/providers/impls/meta_reference/safety/llama_guard.py +0 -268
- llama_stack/providers/impls/meta_reference/safety/prompt_guard.py +0 -145
- llama_stack/providers/impls/meta_reference/safety/safety.py +0 -112
- llama_stack/providers/impls/meta_reference/telemetry/console.py +0 -89
- llama_stack/providers/impls/vllm/config.py +0 -35
- llama_stack/providers/impls/vllm/vllm.py +0 -241
- llama_stack/providers/registry/memory.py +0 -78
- llama_stack/providers/registry/telemetry.py +0 -44
- llama_stack/providers/tests/agents/test_agents.py +0 -210
- llama_stack/providers/tests/inference/test_inference.py +0 -257
- llama_stack/providers/tests/inference/test_prompt_adapter.py +0 -126
- llama_stack/providers/tests/memory/test_memory.py +0 -136
- llama_stack/providers/tests/resolver.py +0 -100
- llama_stack/providers/tests/safety/test_safety.py +0 -77
- llama_stack-0.0.42.dist-info/METADATA +0 -137
- llama_stack-0.0.42.dist-info/RECORD +0 -256
- /llama_stack/{distribution → core}/__init__.py +0 -0
- /llama_stack/{distribution/server → core/access_control}/__init__.py +0 -0
- /llama_stack/{distribution/utils → core/conversations}/__init__.py +0 -0
- /llama_stack/{providers/adapters → core/prompts}/__init__.py +0 -0
- /llama_stack/{providers/adapters/agents → core/routing_tables}/__init__.py +0 -0
- /llama_stack/{providers/adapters/inference → core/server}/__init__.py +0 -0
- /llama_stack/{providers/adapters/memory → core/storage}/__init__.py +0 -0
- /llama_stack/{providers/adapters/safety → core/ui}/__init__.py +0 -0
- /llama_stack/{providers/adapters/telemetry → core/ui/modules}/__init__.py +0 -0
- /llama_stack/{providers/impls → core/ui/page}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference → core/ui/page/distribution}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/rag → core/ui/page/evaluations}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tests → core/ui/page/playground}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools → core/utils}/__init__.py +0 -0
- /llama_stack/{distribution → core}/utils/dynamic.py +0 -0
- /llama_stack/{distribution → core}/utils/serialize.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools/ipython_tool → distributions}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization → models}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization/scripts → models/llama}/__init__.py +0 -0
- /llama_stack/{providers/tests → models/llama/llama3}/__init__.py +0 -0
- /llama_stack/{providers/tests/agents → models/llama/llama3/quantization}/__init__.py +0 -0
- /llama_stack/{providers/tests/inference → models/llama/llama3_2}/__init__.py +0 -0
- /llama_stack/{providers/tests/memory → models/llama/llama3_3}/__init__.py +0 -0
- /llama_stack/{providers/tests/safety → models/llama/llama4}/__init__.py +0 -0
- /llama_stack/{scripts → models/llama/llama4/prompt_templates}/__init__.py +0 -0
- /llama_stack/providers/{adapters → remote}/safety/bedrock/__init__.py +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info/licenses}/LICENSE +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/top_level.txt +0 -0
|
@@ -6,40 +6,83 @@
|
|
|
6
6
|
|
|
7
7
|
from datetime import datetime
|
|
8
8
|
from enum import Enum
|
|
9
|
-
from typing import
|
|
9
|
+
from typing import (
|
|
10
|
+
Annotated,
|
|
11
|
+
Any,
|
|
12
|
+
Literal,
|
|
13
|
+
Protocol,
|
|
14
|
+
runtime_checkable,
|
|
15
|
+
)
|
|
10
16
|
|
|
11
|
-
from llama_models.schema_utils import json_schema_type, webmethod
|
|
12
17
|
from pydantic import BaseModel, Field
|
|
13
|
-
|
|
18
|
+
|
|
19
|
+
from llama_stack.models.llama.datatypes import Primitive
|
|
20
|
+
from llama_stack.schema_utils import json_schema_type, register_schema
|
|
21
|
+
|
|
22
|
+
# Add this constant near the top of the file, after the imports
|
|
23
|
+
DEFAULT_TTL_DAYS = 7
|
|
14
24
|
|
|
15
25
|
|
|
16
26
|
@json_schema_type
|
|
17
27
|
class SpanStatus(Enum):
|
|
28
|
+
"""The status of a span indicating whether it completed successfully or with an error.
|
|
29
|
+
:cvar OK: Span completed successfully without errors
|
|
30
|
+
:cvar ERROR: Span completed with an error or failure
|
|
31
|
+
"""
|
|
32
|
+
|
|
18
33
|
OK = "ok"
|
|
19
34
|
ERROR = "error"
|
|
20
35
|
|
|
21
36
|
|
|
22
37
|
@json_schema_type
|
|
23
38
|
class Span(BaseModel):
|
|
39
|
+
"""A span representing a single operation within a trace.
|
|
40
|
+
:param span_id: Unique identifier for the span
|
|
41
|
+
:param trace_id: Unique identifier for the trace this span belongs to
|
|
42
|
+
:param parent_span_id: (Optional) Unique identifier for the parent span, if this is a child span
|
|
43
|
+
:param name: Human-readable name describing the operation this span represents
|
|
44
|
+
:param start_time: Timestamp when the operation began
|
|
45
|
+
:param end_time: (Optional) Timestamp when the operation finished, if completed
|
|
46
|
+
:param attributes: (Optional) Key-value pairs containing additional metadata about the span
|
|
47
|
+
"""
|
|
48
|
+
|
|
24
49
|
span_id: str
|
|
25
50
|
trace_id: str
|
|
26
|
-
parent_span_id:
|
|
51
|
+
parent_span_id: str | None = None
|
|
27
52
|
name: str
|
|
28
53
|
start_time: datetime
|
|
29
|
-
end_time:
|
|
30
|
-
attributes:
|
|
54
|
+
end_time: datetime | None = None
|
|
55
|
+
attributes: dict[str, Any] | None = Field(default_factory=lambda: {})
|
|
56
|
+
|
|
57
|
+
def set_attribute(self, key: str, value: Any):
|
|
58
|
+
if self.attributes is None:
|
|
59
|
+
self.attributes = {}
|
|
60
|
+
self.attributes[key] = value
|
|
31
61
|
|
|
32
62
|
|
|
33
63
|
@json_schema_type
|
|
34
64
|
class Trace(BaseModel):
|
|
65
|
+
"""A trace representing the complete execution path of a request across multiple operations.
|
|
66
|
+
:param trace_id: Unique identifier for the trace
|
|
67
|
+
:param root_span_id: Unique identifier for the root span that started this trace
|
|
68
|
+
:param start_time: Timestamp when the trace began
|
|
69
|
+
:param end_time: (Optional) Timestamp when the trace finished, if completed
|
|
70
|
+
"""
|
|
71
|
+
|
|
35
72
|
trace_id: str
|
|
36
73
|
root_span_id: str
|
|
37
74
|
start_time: datetime
|
|
38
|
-
end_time:
|
|
75
|
+
end_time: datetime | None = None
|
|
39
76
|
|
|
40
77
|
|
|
41
78
|
@json_schema_type
|
|
42
79
|
class EventType(Enum):
|
|
80
|
+
"""The type of telemetry event being logged.
|
|
81
|
+
:cvar UNSTRUCTURED_LOG: A simple log message with severity level
|
|
82
|
+
:cvar STRUCTURED_LOG: A structured log event with typed payload data
|
|
83
|
+
:cvar METRIC: A metric measurement with value and unit
|
|
84
|
+
"""
|
|
85
|
+
|
|
43
86
|
UNSTRUCTURED_LOG = "unstructured_log"
|
|
44
87
|
STRUCTURED_LOG = "structured_log"
|
|
45
88
|
METRIC = "metric"
|
|
@@ -47,6 +90,15 @@ class EventType(Enum):
|
|
|
47
90
|
|
|
48
91
|
@json_schema_type
|
|
49
92
|
class LogSeverity(Enum):
|
|
93
|
+
"""The severity level of a log message.
|
|
94
|
+
:cvar VERBOSE: Detailed diagnostic information for troubleshooting
|
|
95
|
+
:cvar DEBUG: Debug information useful during development
|
|
96
|
+
:cvar INFO: General informational messages about normal operation
|
|
97
|
+
:cvar WARN: Warning messages about potentially problematic situations
|
|
98
|
+
:cvar ERROR: Error messages indicating failures that don't stop execution
|
|
99
|
+
:cvar CRITICAL: Critical error messages indicating severe failures
|
|
100
|
+
"""
|
|
101
|
+
|
|
50
102
|
VERBOSE = "verbose"
|
|
51
103
|
DEBUG = "debug"
|
|
52
104
|
INFO = "info"
|
|
@@ -56,77 +108,316 @@ class LogSeverity(Enum):
|
|
|
56
108
|
|
|
57
109
|
|
|
58
110
|
class EventCommon(BaseModel):
|
|
111
|
+
"""Common fields shared by all telemetry events.
|
|
112
|
+
:param trace_id: Unique identifier for the trace this event belongs to
|
|
113
|
+
:param span_id: Unique identifier for the span this event belongs to
|
|
114
|
+
:param timestamp: Timestamp when the event occurred
|
|
115
|
+
:param attributes: (Optional) Key-value pairs containing additional metadata about the event
|
|
116
|
+
"""
|
|
117
|
+
|
|
59
118
|
trace_id: str
|
|
60
119
|
span_id: str
|
|
61
120
|
timestamp: datetime
|
|
62
|
-
attributes:
|
|
121
|
+
attributes: dict[str, Primitive] | None = Field(default_factory=lambda: {})
|
|
63
122
|
|
|
64
123
|
|
|
65
124
|
@json_schema_type
|
|
66
125
|
class UnstructuredLogEvent(EventCommon):
|
|
67
|
-
|
|
126
|
+
"""An unstructured log event containing a simple text message.
|
|
127
|
+
:param type: Event type identifier set to UNSTRUCTURED_LOG
|
|
128
|
+
:param message: The log message text
|
|
129
|
+
:param severity: The severity level of the log message
|
|
130
|
+
"""
|
|
131
|
+
|
|
132
|
+
type: Literal[EventType.UNSTRUCTURED_LOG] = EventType.UNSTRUCTURED_LOG
|
|
68
133
|
message: str
|
|
69
134
|
severity: LogSeverity
|
|
70
135
|
|
|
71
136
|
|
|
72
137
|
@json_schema_type
|
|
73
138
|
class MetricEvent(EventCommon):
|
|
74
|
-
|
|
139
|
+
"""A metric event containing a measured value.
|
|
140
|
+
:param type: Event type identifier set to METRIC
|
|
141
|
+
:param metric: The name of the metric being measured
|
|
142
|
+
:param value: The numeric value of the metric measurement
|
|
143
|
+
:param unit: The unit of measurement for the metric value
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
type: Literal[EventType.METRIC] = EventType.METRIC
|
|
75
147
|
metric: str # this would be an enum
|
|
76
|
-
value:
|
|
148
|
+
value: int | float
|
|
77
149
|
unit: str
|
|
78
150
|
|
|
79
151
|
|
|
152
|
+
@json_schema_type
|
|
153
|
+
class MetricInResponse(BaseModel):
|
|
154
|
+
"""A metric value included in API responses.
|
|
155
|
+
:param metric: The name of the metric
|
|
156
|
+
:param value: The numeric value of the metric
|
|
157
|
+
:param unit: (Optional) The unit of measurement for the metric value
|
|
158
|
+
"""
|
|
159
|
+
|
|
160
|
+
metric: str
|
|
161
|
+
value: int | float
|
|
162
|
+
unit: str | None = None
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
# This is a short term solution to allow inference API to return metrics
|
|
166
|
+
# The ideal way to do this is to have a way for all response types to include metrics
|
|
167
|
+
# and all metric events logged to the telemetry API to be included with the response
|
|
168
|
+
# To do this, we will need to augment all response types with a metrics field.
|
|
169
|
+
# We have hit a blocker from stainless SDK that prevents us from doing this.
|
|
170
|
+
# The blocker is that if we were to augment the response types that have a data field
|
|
171
|
+
# in them like so
|
|
172
|
+
# class ListModelsResponse(BaseModel):
|
|
173
|
+
# metrics: Optional[List[MetricEvent]] = None
|
|
174
|
+
# data: List[Models]
|
|
175
|
+
# ...
|
|
176
|
+
# The client SDK will need to access the data by using a .data field, which is not
|
|
177
|
+
# ergonomic. Stainless SDK does support unwrapping the response type, but it
|
|
178
|
+
# requires that the response type to only have a single field.
|
|
179
|
+
|
|
180
|
+
# We will need a way in the client SDK to signal that the metrics are needed
|
|
181
|
+
# and if they are needed, the client SDK has to return the full response type
|
|
182
|
+
# without unwrapping it.
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
class MetricResponseMixin(BaseModel):
|
|
186
|
+
"""Mixin class for API responses that can include metrics.
|
|
187
|
+
:param metrics: (Optional) List of metrics associated with the API response
|
|
188
|
+
"""
|
|
189
|
+
|
|
190
|
+
metrics: list[MetricInResponse] | None = None
|
|
191
|
+
|
|
192
|
+
|
|
80
193
|
@json_schema_type
|
|
81
194
|
class StructuredLogType(Enum):
|
|
195
|
+
"""The type of structured log event payload.
|
|
196
|
+
:cvar SPAN_START: Event indicating the start of a new span
|
|
197
|
+
:cvar SPAN_END: Event indicating the completion of a span
|
|
198
|
+
"""
|
|
199
|
+
|
|
82
200
|
SPAN_START = "span_start"
|
|
83
201
|
SPAN_END = "span_end"
|
|
84
202
|
|
|
85
203
|
|
|
86
204
|
@json_schema_type
|
|
87
205
|
class SpanStartPayload(BaseModel):
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
206
|
+
"""Payload for a span start event.
|
|
207
|
+
:param type: Payload type identifier set to SPAN_START
|
|
208
|
+
:param name: Human-readable name describing the operation this span represents
|
|
209
|
+
:param parent_span_id: (Optional) Unique identifier for the parent span, if this is a child span
|
|
210
|
+
"""
|
|
211
|
+
|
|
212
|
+
type: Literal[StructuredLogType.SPAN_START] = StructuredLogType.SPAN_START
|
|
91
213
|
name: str
|
|
92
|
-
parent_span_id:
|
|
214
|
+
parent_span_id: str | None = None
|
|
93
215
|
|
|
94
216
|
|
|
95
217
|
@json_schema_type
|
|
96
218
|
class SpanEndPayload(BaseModel):
|
|
97
|
-
|
|
219
|
+
"""Payload for a span end event.
|
|
220
|
+
:param type: Payload type identifier set to SPAN_END
|
|
221
|
+
:param status: The final status of the span indicating success or failure
|
|
222
|
+
"""
|
|
223
|
+
|
|
224
|
+
type: Literal[StructuredLogType.SPAN_END] = StructuredLogType.SPAN_END
|
|
98
225
|
status: SpanStatus
|
|
99
226
|
|
|
100
227
|
|
|
101
228
|
StructuredLogPayload = Annotated[
|
|
102
|
-
|
|
103
|
-
SpanStartPayload,
|
|
104
|
-
SpanEndPayload,
|
|
105
|
-
],
|
|
229
|
+
SpanStartPayload | SpanEndPayload,
|
|
106
230
|
Field(discriminator="type"),
|
|
107
231
|
]
|
|
232
|
+
register_schema(StructuredLogPayload, name="StructuredLogPayload")
|
|
108
233
|
|
|
109
234
|
|
|
110
235
|
@json_schema_type
|
|
111
236
|
class StructuredLogEvent(EventCommon):
|
|
112
|
-
|
|
237
|
+
"""A structured log event containing typed payload data.
|
|
238
|
+
:param type: Event type identifier set to STRUCTURED_LOG
|
|
239
|
+
:param payload: The structured payload data for the log event
|
|
240
|
+
"""
|
|
241
|
+
|
|
242
|
+
type: Literal[EventType.STRUCTURED_LOG] = EventType.STRUCTURED_LOG
|
|
113
243
|
payload: StructuredLogPayload
|
|
114
244
|
|
|
115
245
|
|
|
116
246
|
Event = Annotated[
|
|
117
|
-
|
|
118
|
-
UnstructuredLogEvent,
|
|
119
|
-
MetricEvent,
|
|
120
|
-
StructuredLogEvent,
|
|
121
|
-
],
|
|
247
|
+
UnstructuredLogEvent | MetricEvent | StructuredLogEvent,
|
|
122
248
|
Field(discriminator="type"),
|
|
123
249
|
]
|
|
250
|
+
register_schema(Event, name="Event")
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
@json_schema_type
|
|
254
|
+
class EvalTrace(BaseModel):
|
|
255
|
+
"""A trace record for evaluation purposes.
|
|
256
|
+
:param session_id: Unique identifier for the evaluation session
|
|
257
|
+
:param step: The evaluation step or phase identifier
|
|
258
|
+
:param input: The input data for the evaluation
|
|
259
|
+
:param output: The actual output produced during evaluation
|
|
260
|
+
:param expected_output: The expected output for comparison during evaluation
|
|
261
|
+
"""
|
|
262
|
+
|
|
263
|
+
session_id: str
|
|
264
|
+
step: str
|
|
265
|
+
input: str
|
|
266
|
+
output: str
|
|
267
|
+
expected_output: str
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
@json_schema_type
|
|
271
|
+
class SpanWithStatus(Span):
|
|
272
|
+
"""A span that includes status information.
|
|
273
|
+
:param status: (Optional) The current status of the span
|
|
274
|
+
"""
|
|
275
|
+
|
|
276
|
+
status: SpanStatus | None = None
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
@json_schema_type
|
|
280
|
+
class QueryConditionOp(Enum):
|
|
281
|
+
"""Comparison operators for query conditions.
|
|
282
|
+
:cvar EQ: Equal to comparison
|
|
283
|
+
:cvar NE: Not equal to comparison
|
|
284
|
+
:cvar GT: Greater than comparison
|
|
285
|
+
:cvar LT: Less than comparison
|
|
286
|
+
"""
|
|
287
|
+
|
|
288
|
+
EQ = "eq"
|
|
289
|
+
NE = "ne"
|
|
290
|
+
GT = "gt"
|
|
291
|
+
LT = "lt"
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
@json_schema_type
|
|
295
|
+
class QueryCondition(BaseModel):
|
|
296
|
+
"""A condition for filtering query results.
|
|
297
|
+
:param key: The attribute key to filter on
|
|
298
|
+
:param op: The comparison operator to apply
|
|
299
|
+
:param value: The value to compare against
|
|
300
|
+
"""
|
|
301
|
+
|
|
302
|
+
key: str
|
|
303
|
+
op: QueryConditionOp
|
|
304
|
+
value: Any
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
class QueryTracesResponse(BaseModel):
|
|
308
|
+
"""Response containing a list of traces.
|
|
309
|
+
:param data: List of traces matching the query criteria
|
|
310
|
+
"""
|
|
311
|
+
|
|
312
|
+
data: list[Trace]
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
class QuerySpansResponse(BaseModel):
|
|
316
|
+
"""Response containing a list of spans.
|
|
317
|
+
:param data: List of spans matching the query criteria
|
|
318
|
+
"""
|
|
319
|
+
|
|
320
|
+
data: list[Span]
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
class QuerySpanTreeResponse(BaseModel):
|
|
324
|
+
"""Response containing a tree structure of spans.
|
|
325
|
+
:param data: Dictionary mapping span IDs to spans with status information
|
|
326
|
+
"""
|
|
327
|
+
|
|
328
|
+
data: dict[str, SpanWithStatus]
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
class MetricQueryType(Enum):
|
|
332
|
+
"""The type of metric query to perform.
|
|
333
|
+
:cvar RANGE: Query metrics over a time range
|
|
334
|
+
:cvar INSTANT: Query metrics at a specific point in time
|
|
335
|
+
"""
|
|
336
|
+
|
|
337
|
+
RANGE = "range"
|
|
338
|
+
INSTANT = "instant"
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
class MetricLabelOperator(Enum):
|
|
342
|
+
"""Operators for matching metric labels.
|
|
343
|
+
:cvar EQUALS: Label value must equal the specified value
|
|
344
|
+
:cvar NOT_EQUALS: Label value must not equal the specified value
|
|
345
|
+
:cvar REGEX_MATCH: Label value must match the specified regular expression
|
|
346
|
+
:cvar REGEX_NOT_MATCH: Label value must not match the specified regular expression
|
|
347
|
+
"""
|
|
348
|
+
|
|
349
|
+
EQUALS = "="
|
|
350
|
+
NOT_EQUALS = "!="
|
|
351
|
+
REGEX_MATCH = "=~"
|
|
352
|
+
REGEX_NOT_MATCH = "!~"
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
class MetricLabelMatcher(BaseModel):
|
|
356
|
+
"""A matcher for filtering metrics by label values.
|
|
357
|
+
:param name: The name of the label to match
|
|
358
|
+
:param value: The value to match against
|
|
359
|
+
:param operator: The comparison operator to use for matching
|
|
360
|
+
"""
|
|
361
|
+
|
|
362
|
+
name: str
|
|
363
|
+
value: str
|
|
364
|
+
operator: MetricLabelOperator = MetricLabelOperator.EQUALS
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
@json_schema_type
|
|
368
|
+
class MetricLabel(BaseModel):
|
|
369
|
+
"""A label associated with a metric.
|
|
370
|
+
:param name: The name of the label
|
|
371
|
+
:param value: The value of the label
|
|
372
|
+
"""
|
|
373
|
+
|
|
374
|
+
name: str
|
|
375
|
+
value: str
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
@json_schema_type
|
|
379
|
+
class MetricDataPoint(BaseModel):
|
|
380
|
+
"""A single data point in a metric time series.
|
|
381
|
+
:param timestamp: Unix timestamp when the metric value was recorded
|
|
382
|
+
:param value: The numeric value of the metric at this timestamp
|
|
383
|
+
"""
|
|
384
|
+
|
|
385
|
+
timestamp: int
|
|
386
|
+
value: float
|
|
387
|
+
unit: str
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
@json_schema_type
|
|
391
|
+
class MetricSeries(BaseModel):
|
|
392
|
+
"""A time series of metric data points.
|
|
393
|
+
:param metric: The name of the metric
|
|
394
|
+
:param labels: List of labels associated with this metric series
|
|
395
|
+
:param values: List of data points in chronological order
|
|
396
|
+
"""
|
|
397
|
+
|
|
398
|
+
metric: str
|
|
399
|
+
labels: list[MetricLabel]
|
|
400
|
+
values: list[MetricDataPoint]
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
class QueryMetricsResponse(BaseModel):
|
|
404
|
+
"""Response containing metric time series data.
|
|
405
|
+
:param data: List of metric series matching the query criteria
|
|
406
|
+
"""
|
|
407
|
+
|
|
408
|
+
data: list[MetricSeries]
|
|
124
409
|
|
|
125
410
|
|
|
126
411
|
@runtime_checkable
|
|
127
412
|
class Telemetry(Protocol):
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
413
|
+
async def log_event(
|
|
414
|
+
self,
|
|
415
|
+
event: Event,
|
|
416
|
+
ttl_seconds: int = DEFAULT_TTL_DAYS * 86400,
|
|
417
|
+
) -> None:
|
|
418
|
+
"""Log an event.
|
|
419
|
+
|
|
420
|
+
:param event: The event to log.
|
|
421
|
+
:param ttl_seconds: The time to live of the event.
|
|
422
|
+
"""
|
|
423
|
+
...
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from enum import Enum, StrEnum
|
|
8
|
+
from typing import Annotated, Any, Literal, Protocol
|
|
9
|
+
|
|
10
|
+
from pydantic import BaseModel, Field, field_validator
|
|
11
|
+
from typing_extensions import runtime_checkable
|
|
12
|
+
|
|
13
|
+
from llama_stack.apis.common.content_types import URL, InterleavedContent
|
|
14
|
+
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
|
15
|
+
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
|
16
|
+
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@json_schema_type
|
|
20
|
+
class RRFRanker(BaseModel):
|
|
21
|
+
"""
|
|
22
|
+
Reciprocal Rank Fusion (RRF) ranker configuration.
|
|
23
|
+
|
|
24
|
+
:param type: The type of ranker, always "rrf"
|
|
25
|
+
:param impact_factor: The impact factor for RRF scoring. Higher values give more weight to higher-ranked results.
|
|
26
|
+
Must be greater than 0
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
type: Literal["rrf"] = "rrf"
|
|
30
|
+
impact_factor: float = Field(default=60.0, gt=0.0) # default of 60 for optimal performance
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@json_schema_type
|
|
34
|
+
class WeightedRanker(BaseModel):
|
|
35
|
+
"""
|
|
36
|
+
Weighted ranker configuration that combines vector and keyword scores.
|
|
37
|
+
|
|
38
|
+
:param type: The type of ranker, always "weighted"
|
|
39
|
+
:param alpha: Weight factor between 0 and 1.
|
|
40
|
+
0 means only use keyword scores,
|
|
41
|
+
1 means only use vector scores,
|
|
42
|
+
values in between blend both scores.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
type: Literal["weighted"] = "weighted"
|
|
46
|
+
alpha: float = Field(
|
|
47
|
+
default=0.5,
|
|
48
|
+
ge=0.0,
|
|
49
|
+
le=1.0,
|
|
50
|
+
description="Weight factor between 0 and 1. 0 means only keyword scores, 1 means only vector scores.",
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
Ranker = Annotated[
|
|
55
|
+
RRFRanker | WeightedRanker,
|
|
56
|
+
Field(discriminator="type"),
|
|
57
|
+
]
|
|
58
|
+
register_schema(Ranker, name="Ranker")
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@json_schema_type
|
|
62
|
+
class RAGDocument(BaseModel):
|
|
63
|
+
"""
|
|
64
|
+
A document to be used for document ingestion in the RAG Tool.
|
|
65
|
+
|
|
66
|
+
:param document_id: The unique identifier for the document.
|
|
67
|
+
:param content: The content of the document.
|
|
68
|
+
:param mime_type: The MIME type of the document.
|
|
69
|
+
:param metadata: Additional metadata for the document.
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
document_id: str
|
|
73
|
+
content: InterleavedContent | URL
|
|
74
|
+
mime_type: str | None = None
|
|
75
|
+
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@json_schema_type
|
|
79
|
+
class RAGQueryResult(BaseModel):
|
|
80
|
+
"""Result of a RAG query containing retrieved content and metadata.
|
|
81
|
+
|
|
82
|
+
:param content: (Optional) The retrieved content from the query
|
|
83
|
+
:param metadata: Additional metadata about the query result
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
content: InterleavedContent | None = None
|
|
87
|
+
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@json_schema_type
|
|
91
|
+
class RAGQueryGenerator(Enum):
|
|
92
|
+
"""Types of query generators for RAG systems.
|
|
93
|
+
|
|
94
|
+
:cvar default: Default query generator using simple text processing
|
|
95
|
+
:cvar llm: LLM-based query generator for enhanced query understanding
|
|
96
|
+
:cvar custom: Custom query generator implementation
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
default = "default"
|
|
100
|
+
llm = "llm"
|
|
101
|
+
custom = "custom"
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@json_schema_type
|
|
105
|
+
class RAGSearchMode(StrEnum):
|
|
106
|
+
"""
|
|
107
|
+
Search modes for RAG query retrieval:
|
|
108
|
+
- VECTOR: Uses vector similarity search for semantic matching
|
|
109
|
+
- KEYWORD: Uses keyword-based search for exact matching
|
|
110
|
+
- HYBRID: Combines both vector and keyword search for better results
|
|
111
|
+
"""
|
|
112
|
+
|
|
113
|
+
VECTOR = "vector"
|
|
114
|
+
KEYWORD = "keyword"
|
|
115
|
+
HYBRID = "hybrid"
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
@json_schema_type
|
|
119
|
+
class DefaultRAGQueryGeneratorConfig(BaseModel):
|
|
120
|
+
"""Configuration for the default RAG query generator.
|
|
121
|
+
|
|
122
|
+
:param type: Type of query generator, always 'default'
|
|
123
|
+
:param separator: String separator used to join query terms
|
|
124
|
+
"""
|
|
125
|
+
|
|
126
|
+
type: Literal["default"] = "default"
|
|
127
|
+
separator: str = " "
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@json_schema_type
|
|
131
|
+
class LLMRAGQueryGeneratorConfig(BaseModel):
|
|
132
|
+
"""Configuration for the LLM-based RAG query generator.
|
|
133
|
+
|
|
134
|
+
:param type: Type of query generator, always 'llm'
|
|
135
|
+
:param model: Name of the language model to use for query generation
|
|
136
|
+
:param template: Template string for formatting the query generation prompt
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
type: Literal["llm"] = "llm"
|
|
140
|
+
model: str
|
|
141
|
+
template: str
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
RAGQueryGeneratorConfig = Annotated[
|
|
145
|
+
DefaultRAGQueryGeneratorConfig | LLMRAGQueryGeneratorConfig,
|
|
146
|
+
Field(discriminator="type"),
|
|
147
|
+
]
|
|
148
|
+
register_schema(RAGQueryGeneratorConfig, name="RAGQueryGeneratorConfig")
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
@json_schema_type
|
|
152
|
+
class RAGQueryConfig(BaseModel):
|
|
153
|
+
"""
|
|
154
|
+
Configuration for the RAG query generation.
|
|
155
|
+
|
|
156
|
+
:param query_generator_config: Configuration for the query generator.
|
|
157
|
+
:param max_tokens_in_context: Maximum number of tokens in the context.
|
|
158
|
+
:param max_chunks: Maximum number of chunks to retrieve.
|
|
159
|
+
:param chunk_template: Template for formatting each retrieved chunk in the context.
|
|
160
|
+
Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict).
|
|
161
|
+
Default: "Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n"
|
|
162
|
+
:param mode: Search mode for retrieval—either "vector", "keyword", or "hybrid". Default "vector".
|
|
163
|
+
:param ranker: Configuration for the ranker to use in hybrid search. Defaults to RRF ranker.
|
|
164
|
+
"""
|
|
165
|
+
|
|
166
|
+
# This config defines how a query is generated using the messages
|
|
167
|
+
# for memory bank retrieval.
|
|
168
|
+
query_generator_config: RAGQueryGeneratorConfig = Field(default=DefaultRAGQueryGeneratorConfig())
|
|
169
|
+
max_tokens_in_context: int = 4096
|
|
170
|
+
max_chunks: int = 5
|
|
171
|
+
chunk_template: str = "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n"
|
|
172
|
+
mode: RAGSearchMode | None = RAGSearchMode.VECTOR
|
|
173
|
+
ranker: Ranker | None = Field(default=None) # Only used for hybrid mode
|
|
174
|
+
|
|
175
|
+
@field_validator("chunk_template")
|
|
176
|
+
def validate_chunk_template(cls, v: str) -> str:
|
|
177
|
+
if "{chunk.content}" not in v:
|
|
178
|
+
raise ValueError("chunk_template must contain {chunk.content}")
|
|
179
|
+
if "{index}" not in v:
|
|
180
|
+
raise ValueError("chunk_template must contain {index}")
|
|
181
|
+
if len(v) == 0:
|
|
182
|
+
raise ValueError("chunk_template must not be empty")
|
|
183
|
+
return v
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
@runtime_checkable
|
|
187
|
+
@trace_protocol
|
|
188
|
+
class RAGToolRuntime(Protocol):
|
|
189
|
+
@webmethod(route="/tool-runtime/rag-tool/insert", method="POST", level=LLAMA_STACK_API_V1)
|
|
190
|
+
async def insert(
|
|
191
|
+
self,
|
|
192
|
+
documents: list[RAGDocument],
|
|
193
|
+
vector_db_id: str,
|
|
194
|
+
chunk_size_in_tokens: int = 512,
|
|
195
|
+
) -> None:
|
|
196
|
+
"""Index documents so they can be used by the RAG system.
|
|
197
|
+
|
|
198
|
+
:param documents: List of documents to index in the RAG system
|
|
199
|
+
:param vector_db_id: ID of the vector database to store the document embeddings
|
|
200
|
+
:param chunk_size_in_tokens: (Optional) Size in tokens for document chunking during indexing
|
|
201
|
+
"""
|
|
202
|
+
...
|
|
203
|
+
|
|
204
|
+
@webmethod(route="/tool-runtime/rag-tool/query", method="POST", level=LLAMA_STACK_API_V1)
|
|
205
|
+
async def query(
|
|
206
|
+
self,
|
|
207
|
+
content: InterleavedContent,
|
|
208
|
+
vector_db_ids: list[str],
|
|
209
|
+
query_config: RAGQueryConfig | None = None,
|
|
210
|
+
) -> RAGQueryResult:
|
|
211
|
+
"""Query the RAG system for context; typically invoked by the agent.
|
|
212
|
+
|
|
213
|
+
:param content: The query content to search for in the indexed documents
|
|
214
|
+
:param vector_db_ids: List of vector database IDs to search within
|
|
215
|
+
:param query_config: (Optional) Configuration parameters for the query operation
|
|
216
|
+
:returns: RAGQueryResult containing the retrieved content and metadata
|
|
217
|
+
"""
|
|
218
|
+
...
|