llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +5 -0
- llama_stack/apis/agents/__init__.py +1 -1
- llama_stack/apis/agents/agents.py +700 -281
- llama_stack/apis/agents/openai_responses.py +1311 -0
- llama_stack/{providers/adapters/memory/sample/config.py → apis/batches/__init__.py} +2 -5
- llama_stack/apis/batches/batches.py +100 -0
- llama_stack/apis/benchmarks/__init__.py +7 -0
- llama_stack/apis/benchmarks/benchmarks.py +108 -0
- llama_stack/apis/common/content_types.py +143 -0
- llama_stack/apis/common/errors.py +103 -0
- llama_stack/apis/common/job_types.py +38 -0
- llama_stack/apis/common/responses.py +36 -0
- llama_stack/apis/common/training_types.py +36 -5
- llama_stack/apis/common/type_system.py +158 -0
- llama_stack/apis/conversations/__init__.py +31 -0
- llama_stack/apis/conversations/conversations.py +286 -0
- llama_stack/apis/datasetio/__init__.py +7 -0
- llama_stack/apis/datasetio/datasetio.py +59 -0
- llama_stack/apis/datasets/__init__.py +7 -0
- llama_stack/apis/datasets/datasets.py +251 -0
- llama_stack/apis/datatypes.py +160 -0
- llama_stack/apis/eval/__init__.py +7 -0
- llama_stack/apis/eval/eval.py +169 -0
- llama_stack/apis/files/__init__.py +7 -0
- llama_stack/apis/files/files.py +199 -0
- llama_stack/apis/inference/__init__.py +1 -1
- llama_stack/apis/inference/inference.py +1169 -113
- llama_stack/apis/inspect/__init__.py +1 -1
- llama_stack/apis/inspect/inspect.py +69 -16
- llama_stack/apis/models/__init__.py +1 -1
- llama_stack/apis/models/models.py +148 -21
- llama_stack/apis/post_training/__init__.py +1 -1
- llama_stack/apis/post_training/post_training.py +265 -120
- llama_stack/{providers/adapters/agents/sample/config.py → apis/prompts/__init__.py} +2 -5
- llama_stack/apis/prompts/prompts.py +204 -0
- llama_stack/apis/providers/__init__.py +7 -0
- llama_stack/apis/providers/providers.py +69 -0
- llama_stack/apis/resource.py +37 -0
- llama_stack/apis/safety/__init__.py +1 -1
- llama_stack/apis/safety/safety.py +95 -12
- llama_stack/apis/scoring/__init__.py +7 -0
- llama_stack/apis/scoring/scoring.py +93 -0
- llama_stack/apis/scoring_functions/__init__.py +7 -0
- llama_stack/apis/scoring_functions/scoring_functions.py +208 -0
- llama_stack/apis/shields/__init__.py +1 -1
- llama_stack/apis/shields/shields.py +76 -33
- llama_stack/apis/synthetic_data_generation/__init__.py +1 -1
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +40 -17
- llama_stack/apis/telemetry/__init__.py +1 -1
- llama_stack/apis/telemetry/telemetry.py +322 -31
- llama_stack/apis/{dataset → tools}/__init__.py +2 -1
- llama_stack/apis/tools/rag_tool.py +218 -0
- llama_stack/apis/tools/tools.py +221 -0
- llama_stack/apis/vector_io/__init__.py +7 -0
- llama_stack/apis/vector_io/vector_io.py +960 -0
- llama_stack/apis/vector_stores/__init__.py +7 -0
- llama_stack/apis/vector_stores/vector_stores.py +51 -0
- llama_stack/apis/version.py +9 -0
- llama_stack/cli/llama.py +13 -5
- llama_stack/cli/stack/_list_deps.py +182 -0
- llama_stack/cli/stack/list_apis.py +1 -1
- llama_stack/cli/stack/list_deps.py +55 -0
- llama_stack/cli/stack/list_providers.py +24 -10
- llama_stack/cli/stack/list_stacks.py +56 -0
- llama_stack/cli/stack/remove.py +115 -0
- llama_stack/cli/stack/run.py +169 -56
- llama_stack/cli/stack/stack.py +18 -4
- llama_stack/cli/stack/utils.py +151 -0
- llama_stack/cli/table.py +23 -61
- llama_stack/cli/utils.py +29 -0
- llama_stack/core/access_control/access_control.py +131 -0
- llama_stack/core/access_control/conditions.py +129 -0
- llama_stack/core/access_control/datatypes.py +107 -0
- llama_stack/core/build.py +164 -0
- llama_stack/core/client.py +205 -0
- llama_stack/core/common.sh +37 -0
- llama_stack/{distribution → core}/configure.py +74 -55
- llama_stack/core/conversations/conversations.py +309 -0
- llama_stack/core/datatypes.py +625 -0
- llama_stack/core/distribution.py +276 -0
- llama_stack/core/external.py +54 -0
- llama_stack/core/id_generation.py +42 -0
- llama_stack/core/inspect.py +86 -0
- llama_stack/core/library_client.py +539 -0
- llama_stack/core/prompts/prompts.py +234 -0
- llama_stack/core/providers.py +137 -0
- llama_stack/core/request_headers.py +115 -0
- llama_stack/core/resolver.py +506 -0
- llama_stack/core/routers/__init__.py +101 -0
- llama_stack/core/routers/datasets.py +73 -0
- llama_stack/core/routers/eval_scoring.py +155 -0
- llama_stack/core/routers/inference.py +645 -0
- llama_stack/core/routers/safety.py +85 -0
- llama_stack/core/routers/tool_runtime.py +91 -0
- llama_stack/core/routers/vector_io.py +442 -0
- llama_stack/core/routing_tables/benchmarks.py +62 -0
- llama_stack/core/routing_tables/common.py +254 -0
- llama_stack/core/routing_tables/datasets.py +91 -0
- llama_stack/core/routing_tables/models.py +163 -0
- llama_stack/core/routing_tables/scoring_functions.py +66 -0
- llama_stack/core/routing_tables/shields.py +61 -0
- llama_stack/core/routing_tables/toolgroups.py +129 -0
- llama_stack/core/routing_tables/vector_stores.py +292 -0
- llama_stack/core/server/auth.py +187 -0
- llama_stack/core/server/auth_providers.py +494 -0
- llama_stack/core/server/quota.py +110 -0
- llama_stack/core/server/routes.py +141 -0
- llama_stack/core/server/server.py +542 -0
- llama_stack/core/server/tracing.py +80 -0
- llama_stack/core/stack.py +546 -0
- llama_stack/core/start_stack.sh +117 -0
- llama_stack/core/storage/datatypes.py +283 -0
- llama_stack/{cli/model → core/store}/__init__.py +1 -1
- llama_stack/core/store/registry.py +199 -0
- llama_stack/core/testing_context.py +49 -0
- llama_stack/core/ui/app.py +55 -0
- llama_stack/core/ui/modules/api.py +32 -0
- llama_stack/core/ui/modules/utils.py +42 -0
- llama_stack/core/ui/page/distribution/datasets.py +18 -0
- llama_stack/core/ui/page/distribution/eval_tasks.py +20 -0
- llama_stack/core/ui/page/distribution/models.py +18 -0
- llama_stack/core/ui/page/distribution/providers.py +27 -0
- llama_stack/core/ui/page/distribution/resources.py +48 -0
- llama_stack/core/ui/page/distribution/scoring_functions.py +18 -0
- llama_stack/core/ui/page/distribution/shields.py +19 -0
- llama_stack/core/ui/page/evaluations/app_eval.py +143 -0
- llama_stack/core/ui/page/evaluations/native_eval.py +253 -0
- llama_stack/core/ui/page/playground/chat.py +130 -0
- llama_stack/core/ui/page/playground/tools.py +352 -0
- llama_stack/core/utils/config.py +30 -0
- llama_stack/{distribution → core}/utils/config_dirs.py +3 -6
- llama_stack/core/utils/config_resolution.py +125 -0
- llama_stack/core/utils/context.py +84 -0
- llama_stack/core/utils/exec.py +96 -0
- llama_stack/{providers/impls/meta_reference/codeshield/config.py → core/utils/image_types.py} +4 -3
- llama_stack/{distribution → core}/utils/model_utils.py +2 -2
- llama_stack/{distribution → core}/utils/prompt_for_config.py +30 -63
- llama_stack/{apis/batch_inference → distributions/dell}/__init__.py +1 -1
- llama_stack/distributions/dell/build.yaml +33 -0
- llama_stack/distributions/dell/dell.py +158 -0
- llama_stack/distributions/dell/run-with-safety.yaml +141 -0
- llama_stack/distributions/dell/run.yaml +132 -0
- llama_stack/distributions/meta-reference-gpu/__init__.py +7 -0
- llama_stack/distributions/meta-reference-gpu/build.yaml +32 -0
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +163 -0
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +154 -0
- llama_stack/distributions/meta-reference-gpu/run.yaml +139 -0
- llama_stack/{apis/evals → distributions/nvidia}/__init__.py +1 -1
- llama_stack/distributions/nvidia/build.yaml +29 -0
- llama_stack/distributions/nvidia/nvidia.py +154 -0
- llama_stack/distributions/nvidia/run-with-safety.yaml +137 -0
- llama_stack/distributions/nvidia/run.yaml +116 -0
- llama_stack/distributions/open-benchmark/__init__.py +7 -0
- llama_stack/distributions/open-benchmark/build.yaml +36 -0
- llama_stack/distributions/open-benchmark/open_benchmark.py +303 -0
- llama_stack/distributions/open-benchmark/run.yaml +252 -0
- llama_stack/distributions/postgres-demo/__init__.py +7 -0
- llama_stack/distributions/postgres-demo/build.yaml +23 -0
- llama_stack/distributions/postgres-demo/postgres_demo.py +125 -0
- llama_stack/distributions/postgres-demo/run.yaml +115 -0
- llama_stack/{apis/memory → distributions/starter}/__init__.py +1 -1
- llama_stack/distributions/starter/build.yaml +61 -0
- llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
- llama_stack/distributions/starter/run.yaml +276 -0
- llama_stack/distributions/starter/starter.py +345 -0
- llama_stack/distributions/starter-gpu/__init__.py +7 -0
- llama_stack/distributions/starter-gpu/build.yaml +61 -0
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
- llama_stack/distributions/starter-gpu/run.yaml +279 -0
- llama_stack/distributions/starter-gpu/starter_gpu.py +20 -0
- llama_stack/distributions/template.py +456 -0
- llama_stack/distributions/watsonx/__init__.py +7 -0
- llama_stack/distributions/watsonx/build.yaml +33 -0
- llama_stack/distributions/watsonx/run.yaml +133 -0
- llama_stack/distributions/watsonx/watsonx.py +95 -0
- llama_stack/env.py +24 -0
- llama_stack/log.py +314 -0
- llama_stack/models/llama/checkpoint.py +164 -0
- llama_stack/models/llama/datatypes.py +164 -0
- llama_stack/models/llama/hadamard_utils.py +86 -0
- llama_stack/models/llama/llama3/args.py +74 -0
- llama_stack/models/llama/llama3/chat_format.py +286 -0
- llama_stack/models/llama/llama3/generation.py +376 -0
- llama_stack/models/llama/llama3/interface.py +255 -0
- llama_stack/models/llama/llama3/model.py +304 -0
- llama_stack/models/llama/llama3/multimodal/__init__.py +12 -0
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +180 -0
- llama_stack/models/llama/llama3/multimodal/image_transform.py +409 -0
- llama_stack/models/llama/llama3/multimodal/model.py +1430 -0
- llama_stack/models/llama/llama3/multimodal/utils.py +26 -0
- llama_stack/models/llama/llama3/prompt_templates/__init__.py +22 -0
- llama_stack/models/llama/llama3/prompt_templates/base.py +39 -0
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +319 -0
- llama_stack/models/llama/llama3/prompt_templates/tool_response.py +62 -0
- llama_stack/models/llama/llama3/quantization/loader.py +316 -0
- llama_stack/models/llama/llama3/template_data.py +116 -0
- llama_stack/models/llama/llama3/tokenizer.model +128000 -0
- llama_stack/models/llama/llama3/tokenizer.py +198 -0
- llama_stack/models/llama/llama3/tool_utils.py +266 -0
- llama_stack/models/llama/llama3_1/__init__.py +12 -0
- llama_stack/models/llama/llama3_1/prompt_format.md +358 -0
- llama_stack/models/llama/llama3_1/prompts.py +258 -0
- llama_stack/models/llama/llama3_2/prompts_text.py +229 -0
- llama_stack/models/llama/llama3_2/prompts_vision.py +126 -0
- llama_stack/models/llama/llama3_2/text_prompt_format.md +286 -0
- llama_stack/models/llama/llama3_2/vision_prompt_format.md +141 -0
- llama_stack/models/llama/llama3_3/prompts.py +259 -0
- llama_stack/models/llama/llama4/args.py +107 -0
- llama_stack/models/llama/llama4/chat_format.py +317 -0
- llama_stack/models/llama/llama4/datatypes.py +56 -0
- llama_stack/models/llama/llama4/ffn.py +58 -0
- llama_stack/models/llama/llama4/generation.py +313 -0
- llama_stack/models/llama/llama4/model.py +437 -0
- llama_stack/models/llama/llama4/moe.py +214 -0
- llama_stack/models/llama/llama4/preprocess.py +435 -0
- llama_stack/models/llama/llama4/prompt_format.md +304 -0
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +136 -0
- llama_stack/models/llama/llama4/prompts.py +279 -0
- llama_stack/models/llama/llama4/quantization/__init__.py +5 -0
- llama_stack/models/llama/llama4/quantization/loader.py +226 -0
- llama_stack/models/llama/llama4/tokenizer.model +200000 -0
- llama_stack/models/llama/llama4/tokenizer.py +263 -0
- llama_stack/models/llama/llama4/vision/__init__.py +5 -0
- llama_stack/models/llama/llama4/vision/embedding.py +210 -0
- llama_stack/models/llama/llama4/vision/encoder.py +412 -0
- llama_stack/models/llama/prompt_format.py +191 -0
- llama_stack/models/llama/quantize_impls.py +316 -0
- llama_stack/models/llama/sku_list.py +1029 -0
- llama_stack/models/llama/sku_types.py +233 -0
- llama_stack/models/llama/tokenizer_utils.py +40 -0
- llama_stack/providers/datatypes.py +136 -107
- llama_stack/providers/inline/__init__.py +5 -0
- llama_stack/providers/inline/agents/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/agents/meta_reference}/__init__.py +12 -5
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +1024 -0
- llama_stack/providers/inline/agents/meta_reference/agents.py +383 -0
- llama_stack/providers/inline/agents/meta_reference/config.py +37 -0
- llama_stack/providers/inline/agents/meta_reference/persistence.py +228 -0
- llama_stack/providers/inline/agents/meta_reference/responses/__init__.py +5 -0
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +423 -0
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +1226 -0
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +449 -0
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +194 -0
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +365 -0
- llama_stack/providers/inline/agents/meta_reference/safety.py +52 -0
- llama_stack/providers/inline/batches/__init__.py +5 -0
- llama_stack/providers/inline/batches/reference/__init__.py +36 -0
- llama_stack/providers/inline/batches/reference/batches.py +679 -0
- llama_stack/providers/inline/batches/reference/config.py +40 -0
- llama_stack/providers/inline/datasetio/__init__.py +5 -0
- llama_stack/providers/inline/datasetio/localfs/__init__.py +20 -0
- llama_stack/providers/inline/datasetio/localfs/config.py +23 -0
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +113 -0
- llama_stack/providers/inline/eval/__init__.py +5 -0
- llama_stack/providers/inline/eval/meta_reference/__init__.py +28 -0
- llama_stack/providers/inline/eval/meta_reference/config.py +23 -0
- llama_stack/providers/inline/eval/meta_reference/eval.py +259 -0
- llama_stack/providers/inline/files/localfs/__init__.py +20 -0
- llama_stack/providers/inline/files/localfs/config.py +31 -0
- llama_stack/providers/inline/files/localfs/files.py +219 -0
- llama_stack/providers/inline/inference/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/__init__.py +4 -4
- llama_stack/providers/inline/inference/meta_reference/common.py +24 -0
- llama_stack/providers/inline/inference/meta_reference/config.py +68 -0
- llama_stack/providers/inline/inference/meta_reference/generators.py +211 -0
- llama_stack/providers/inline/inference/meta_reference/inference.py +158 -0
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +96 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/parallel_utils.py +56 -73
- llama_stack/providers/inline/inference/sentence_transformers/__init__.py +22 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/inference/sentence_transformers}/config.py +6 -4
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +83 -0
- llama_stack/providers/inline/post_training/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/utils.py +35 -0
- llama_stack/providers/inline/post_training/common/validator.py +36 -0
- llama_stack/providers/inline/post_training/huggingface/__init__.py +27 -0
- llama_stack/providers/inline/post_training/huggingface/config.py +83 -0
- llama_stack/providers/inline/post_training/huggingface/post_training.py +208 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +519 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +485 -0
- llama_stack/providers/inline/post_training/huggingface/utils.py +269 -0
- llama_stack/providers/inline/post_training/torchtune/__init__.py +27 -0
- llama_stack/providers/inline/post_training/torchtune/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +240 -0
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +99 -0
- llama_stack/providers/inline/post_training/torchtune/config.py +20 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +57 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/sft.py +78 -0
- llama_stack/providers/inline/post_training/torchtune/post_training.py +178 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +588 -0
- llama_stack/providers/inline/safety/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/codeshield → inline/safety/code_scanner}/__init__.py +4 -2
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +128 -0
- llama_stack/providers/{impls/meta_reference/memory → inline/safety/code_scanner}/config.py +5 -3
- llama_stack/providers/inline/safety/llama_guard/__init__.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/config.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +489 -0
- llama_stack/providers/{adapters/memory/sample → inline/safety/prompt_guard}/__init__.py +4 -4
- llama_stack/providers/inline/safety/prompt_guard/config.py +32 -0
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +131 -0
- llama_stack/providers/inline/scoring/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/__init__.py +25 -0
- llama_stack/providers/{adapters/memory/weaviate → inline/scoring/basic}/config.py +5 -7
- llama_stack/providers/inline/scoring/basic/scoring.py +126 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +240 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +41 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +23 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +27 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +71 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +80 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +66 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +58 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +38 -0
- llama_stack/providers/inline/scoring/basic/utils/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py +3319 -0
- llama_stack/providers/inline/scoring/basic/utils/math_utils.py +330 -0
- llama_stack/providers/inline/scoring/braintrust/__init__.py +27 -0
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +230 -0
- llama_stack/providers/inline/scoring/braintrust/config.py +21 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +23 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +24 -0
- llama_stack/providers/inline/scoring/llm_as_judge/__init__.py +21 -0
- llama_stack/providers/inline/scoring/llm_as_judge/config.py +14 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +113 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +96 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +20 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +81 -0
- llama_stack/providers/inline/telemetry/__init__.py +5 -0
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +21 -0
- llama_stack/providers/inline/telemetry/meta_reference/config.py +47 -0
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +252 -0
- llama_stack/providers/inline/tool_runtime/__init__.py +5 -0
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +19 -0
- llama_stack/providers/{impls/meta_reference/telemetry → inline/tool_runtime/rag}/config.py +5 -3
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +77 -0
- llama_stack/providers/inline/tool_runtime/rag/memory.py +332 -0
- llama_stack/providers/inline/vector_io/__init__.py +5 -0
- llama_stack/providers/inline/vector_io/chroma/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/chroma/config.py +30 -0
- llama_stack/providers/inline/vector_io/faiss/__init__.py +21 -0
- llama_stack/providers/inline/vector_io/faiss/config.py +26 -0
- llama_stack/providers/inline/vector_io/faiss/faiss.py +293 -0
- llama_stack/providers/inline/vector_io/milvus/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/milvus/config.py +29 -0
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/qdrant/config.py +29 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/config.py +26 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +483 -0
- llama_stack/providers/registry/agents.py +16 -18
- llama_stack/providers/registry/batches.py +26 -0
- llama_stack/providers/registry/datasetio.py +49 -0
- llama_stack/providers/registry/eval.py +46 -0
- llama_stack/providers/registry/files.py +31 -0
- llama_stack/providers/registry/inference.py +273 -118
- llama_stack/providers/registry/post_training.py +69 -0
- llama_stack/providers/registry/safety.py +46 -41
- llama_stack/providers/registry/scoring.py +51 -0
- llama_stack/providers/registry/tool_runtime.py +87 -0
- llama_stack/providers/registry/vector_io.py +828 -0
- llama_stack/providers/remote/__init__.py +5 -0
- llama_stack/providers/remote/agents/__init__.py +5 -0
- llama_stack/providers/remote/datasetio/__init__.py +5 -0
- llama_stack/providers/{adapters/memory/chroma → remote/datasetio/huggingface}/__init__.py +7 -4
- llama_stack/providers/remote/datasetio/huggingface/config.py +23 -0
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +99 -0
- llama_stack/providers/remote/datasetio/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/datasetio/nvidia/config.py +61 -0
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +116 -0
- llama_stack/providers/remote/eval/__init__.py +5 -0
- llama_stack/providers/remote/eval/nvidia/__init__.py +31 -0
- llama_stack/providers/remote/eval/nvidia/config.py +29 -0
- llama_stack/providers/remote/eval/nvidia/eval.py +162 -0
- llama_stack/providers/remote/files/s3/__init__.py +19 -0
- llama_stack/providers/remote/files/s3/config.py +42 -0
- llama_stack/providers/remote/files/s3/files.py +313 -0
- llama_stack/providers/remote/inference/__init__.py +5 -0
- llama_stack/providers/{adapters/safety/sample → remote/inference/anthropic}/__init__.py +4 -6
- llama_stack/providers/remote/inference/anthropic/anthropic.py +36 -0
- llama_stack/providers/remote/inference/anthropic/config.py +28 -0
- llama_stack/providers/{impls/meta_reference/telemetry → remote/inference/azure}/__init__.py +4 -4
- llama_stack/providers/remote/inference/azure/azure.py +25 -0
- llama_stack/providers/remote/inference/azure/config.py +61 -0
- llama_stack/providers/{adapters → remote}/inference/bedrock/__init__.py +18 -17
- llama_stack/providers/remote/inference/bedrock/bedrock.py +142 -0
- llama_stack/providers/{adapters/inference/sample → remote/inference/bedrock}/config.py +3 -4
- llama_stack/providers/remote/inference/bedrock/models.py +29 -0
- llama_stack/providers/remote/inference/cerebras/__init__.py +19 -0
- llama_stack/providers/remote/inference/cerebras/cerebras.py +28 -0
- llama_stack/providers/remote/inference/cerebras/config.py +30 -0
- llama_stack/providers/{adapters → remote}/inference/databricks/__init__.py +4 -5
- llama_stack/providers/remote/inference/databricks/config.py +37 -0
- llama_stack/providers/remote/inference/databricks/databricks.py +44 -0
- llama_stack/providers/{adapters → remote}/inference/fireworks/__init__.py +8 -4
- llama_stack/providers/remote/inference/fireworks/config.py +27 -0
- llama_stack/providers/remote/inference/fireworks/fireworks.py +27 -0
- llama_stack/providers/{adapters/memory/pgvector → remote/inference/gemini}/__init__.py +4 -4
- llama_stack/providers/remote/inference/gemini/config.py +28 -0
- llama_stack/providers/remote/inference/gemini/gemini.py +82 -0
- llama_stack/providers/remote/inference/groq/__init__.py +15 -0
- llama_stack/providers/remote/inference/groq/config.py +34 -0
- llama_stack/providers/remote/inference/groq/groq.py +18 -0
- llama_stack/providers/remote/inference/llama_openai_compat/__init__.py +15 -0
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +34 -0
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +46 -0
- llama_stack/providers/remote/inference/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/inference/nvidia/config.py +64 -0
- llama_stack/providers/remote/inference/nvidia/nvidia.py +61 -0
- llama_stack/providers/{adapters/safety/sample/config.py → remote/inference/nvidia/utils.py} +3 -4
- llama_stack/providers/{impls/vllm → remote/inference/ollama}/__init__.py +4 -6
- llama_stack/providers/remote/inference/ollama/config.py +25 -0
- llama_stack/providers/remote/inference/ollama/ollama.py +102 -0
- llama_stack/providers/{adapters/telemetry/opentelemetry → remote/inference/openai}/__init__.py +4 -4
- llama_stack/providers/remote/inference/openai/config.py +39 -0
- llama_stack/providers/remote/inference/openai/openai.py +38 -0
- llama_stack/providers/remote/inference/passthrough/__init__.py +23 -0
- llama_stack/providers/remote/inference/passthrough/config.py +34 -0
- llama_stack/providers/remote/inference/passthrough/passthrough.py +122 -0
- llama_stack/providers/remote/inference/runpod/__init__.py +16 -0
- llama_stack/providers/remote/inference/runpod/config.py +32 -0
- llama_stack/providers/remote/inference/runpod/runpod.py +42 -0
- llama_stack/providers/remote/inference/sambanova/__init__.py +16 -0
- llama_stack/providers/remote/inference/sambanova/config.py +34 -0
- llama_stack/providers/remote/inference/sambanova/sambanova.py +28 -0
- llama_stack/providers/{adapters → remote}/inference/tgi/__init__.py +3 -4
- llama_stack/providers/remote/inference/tgi/config.py +76 -0
- llama_stack/providers/remote/inference/tgi/tgi.py +85 -0
- llama_stack/providers/{adapters → remote}/inference/together/__init__.py +8 -4
- llama_stack/providers/remote/inference/together/config.py +27 -0
- llama_stack/providers/remote/inference/together/together.py +102 -0
- llama_stack/providers/remote/inference/vertexai/__init__.py +15 -0
- llama_stack/providers/remote/inference/vertexai/config.py +48 -0
- llama_stack/providers/remote/inference/vertexai/vertexai.py +54 -0
- llama_stack/providers/remote/inference/vllm/__init__.py +22 -0
- llama_stack/providers/remote/inference/vllm/config.py +59 -0
- llama_stack/providers/remote/inference/vllm/vllm.py +111 -0
- llama_stack/providers/remote/inference/watsonx/__init__.py +15 -0
- llama_stack/providers/remote/inference/watsonx/config.py +45 -0
- llama_stack/providers/remote/inference/watsonx/watsonx.py +336 -0
- llama_stack/providers/remote/post_training/__init__.py +5 -0
- llama_stack/providers/remote/post_training/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/post_training/nvidia/config.py +113 -0
- llama_stack/providers/remote/post_training/nvidia/models.py +27 -0
- llama_stack/providers/remote/post_training/nvidia/post_training.py +430 -0
- llama_stack/providers/remote/post_training/nvidia/utils.py +63 -0
- llama_stack/providers/remote/safety/__init__.py +5 -0
- llama_stack/providers/remote/safety/bedrock/bedrock.py +111 -0
- llama_stack/providers/remote/safety/bedrock/config.py +14 -0
- llama_stack/providers/{adapters/inference/sample → remote/safety/nvidia}/__init__.py +5 -4
- llama_stack/providers/remote/safety/nvidia/config.py +40 -0
- llama_stack/providers/remote/safety/nvidia/nvidia.py +161 -0
- llama_stack/providers/{adapters/agents/sample → remote/safety/sambanova}/__init__.py +5 -4
- llama_stack/providers/remote/safety/sambanova/config.py +37 -0
- llama_stack/providers/remote/safety/sambanova/sambanova.py +98 -0
- llama_stack/providers/remote/tool_runtime/__init__.py +5 -0
- llama_stack/providers/remote/tool_runtime/bing_search/__init__.py +21 -0
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +112 -0
- llama_stack/providers/remote/tool_runtime/bing_search/config.py +22 -0
- llama_stack/providers/remote/tool_runtime/brave_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +148 -0
- llama_stack/providers/remote/tool_runtime/brave_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py +15 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +20 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +73 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +84 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py +22 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py +21 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +140 -0
- llama_stack/providers/remote/vector_io/__init__.py +5 -0
- llama_stack/providers/remote/vector_io/chroma/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/chroma/chroma.py +215 -0
- llama_stack/providers/remote/vector_io/chroma/config.py +28 -0
- llama_stack/providers/remote/vector_io/milvus/__init__.py +18 -0
- llama_stack/providers/remote/vector_io/milvus/config.py +35 -0
- llama_stack/providers/remote/vector_io/milvus/milvus.py +375 -0
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/pgvector/config.py +47 -0
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +460 -0
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/qdrant/config.py +37 -0
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +265 -0
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/weaviate/config.py +32 -0
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +393 -0
- llama_stack/providers/utils/bedrock/__init__.py +5 -0
- llama_stack/providers/utils/bedrock/client.py +74 -0
- llama_stack/providers/utils/bedrock/config.py +64 -0
- llama_stack/providers/utils/bedrock/refreshable_boto_session.py +112 -0
- llama_stack/providers/utils/common/__init__.py +5 -0
- llama_stack/providers/utils/common/data_schema_validator.py +103 -0
- llama_stack/providers/utils/datasetio/__init__.py +5 -0
- llama_stack/providers/utils/datasetio/url_utils.py +47 -0
- llama_stack/providers/utils/files/__init__.py +5 -0
- llama_stack/providers/utils/files/form_data.py +69 -0
- llama_stack/providers/utils/inference/__init__.py +8 -7
- llama_stack/providers/utils/inference/embedding_mixin.py +101 -0
- llama_stack/providers/utils/inference/inference_store.py +264 -0
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +336 -0
- llama_stack/providers/utils/inference/model_registry.py +173 -23
- llama_stack/providers/utils/inference/openai_compat.py +1261 -49
- llama_stack/providers/utils/inference/openai_mixin.py +506 -0
- llama_stack/providers/utils/inference/prompt_adapter.py +365 -67
- llama_stack/providers/utils/kvstore/api.py +6 -6
- llama_stack/providers/utils/kvstore/config.py +28 -48
- llama_stack/providers/utils/kvstore/kvstore.py +61 -15
- llama_stack/providers/utils/kvstore/mongodb/__init__.py +9 -0
- llama_stack/providers/utils/kvstore/mongodb/mongodb.py +82 -0
- llama_stack/providers/utils/kvstore/postgres/__init__.py +7 -0
- llama_stack/providers/utils/kvstore/postgres/postgres.py +114 -0
- llama_stack/providers/utils/kvstore/redis/redis.py +33 -9
- llama_stack/providers/utils/kvstore/sqlite/config.py +2 -1
- llama_stack/providers/utils/kvstore/sqlite/sqlite.py +123 -22
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +1304 -0
- llama_stack/providers/utils/memory/vector_store.py +220 -82
- llama_stack/providers/utils/pagination.py +43 -0
- llama_stack/providers/utils/responses/__init__.py +5 -0
- llama_stack/providers/utils/responses/responses_store.py +292 -0
- llama_stack/providers/utils/scheduler.py +270 -0
- llama_stack/providers/utils/scoring/__init__.py +5 -0
- llama_stack/providers/utils/scoring/aggregation_utils.py +75 -0
- llama_stack/providers/utils/scoring/base_scoring_fn.py +114 -0
- llama_stack/providers/utils/scoring/basic_scoring_utils.py +26 -0
- llama_stack/providers/utils/sqlstore/__init__.py +5 -0
- llama_stack/providers/utils/sqlstore/api.py +128 -0
- llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +319 -0
- llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +343 -0
- llama_stack/providers/utils/sqlstore/sqlstore.py +70 -0
- llama_stack/providers/utils/telemetry/trace_protocol.py +142 -0
- llama_stack/providers/utils/telemetry/tracing.py +192 -53
- llama_stack/providers/utils/tools/__init__.py +5 -0
- llama_stack/providers/utils/tools/mcp.py +148 -0
- llama_stack/providers/utils/tools/ttl_dict.py +70 -0
- llama_stack/providers/utils/vector_io/__init__.py +5 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +156 -0
- llama_stack/schema_utils.py +118 -0
- llama_stack/strong_typing/__init__.py +19 -0
- llama_stack/strong_typing/auxiliary.py +228 -0
- llama_stack/strong_typing/classdef.py +440 -0
- llama_stack/strong_typing/core.py +46 -0
- llama_stack/strong_typing/deserializer.py +877 -0
- llama_stack/strong_typing/docstring.py +409 -0
- llama_stack/strong_typing/exception.py +23 -0
- llama_stack/strong_typing/inspection.py +1085 -0
- llama_stack/strong_typing/mapping.py +40 -0
- llama_stack/strong_typing/name.py +182 -0
- llama_stack/strong_typing/py.typed +0 -0
- llama_stack/strong_typing/schema.py +792 -0
- llama_stack/strong_typing/serialization.py +97 -0
- llama_stack/strong_typing/serializer.py +500 -0
- llama_stack/strong_typing/slots.py +27 -0
- llama_stack/strong_typing/topological.py +89 -0
- llama_stack/testing/__init__.py +5 -0
- llama_stack/testing/api_recorder.py +956 -0
- llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
- llama_stack-0.3.4.dist-info/METADATA +261 -0
- llama_stack-0.3.4.dist-info/RECORD +625 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/WHEEL +1 -1
- llama_stack/apis/agents/client.py +0 -292
- llama_stack/apis/agents/event_logger.py +0 -184
- llama_stack/apis/batch_inference/batch_inference.py +0 -72
- llama_stack/apis/common/deployment_types.py +0 -31
- llama_stack/apis/dataset/dataset.py +0 -63
- llama_stack/apis/evals/evals.py +0 -122
- llama_stack/apis/inference/client.py +0 -197
- llama_stack/apis/inspect/client.py +0 -82
- llama_stack/apis/memory/client.py +0 -155
- llama_stack/apis/memory/memory.py +0 -65
- llama_stack/apis/memory_banks/__init__.py +0 -7
- llama_stack/apis/memory_banks/client.py +0 -101
- llama_stack/apis/memory_banks/memory_banks.py +0 -78
- llama_stack/apis/models/client.py +0 -83
- llama_stack/apis/reward_scoring/__init__.py +0 -7
- llama_stack/apis/reward_scoring/reward_scoring.py +0 -55
- llama_stack/apis/safety/client.py +0 -105
- llama_stack/apis/shields/client.py +0 -79
- llama_stack/cli/download.py +0 -340
- llama_stack/cli/model/describe.py +0 -82
- llama_stack/cli/model/download.py +0 -24
- llama_stack/cli/model/list.py +0 -62
- llama_stack/cli/model/model.py +0 -34
- llama_stack/cli/model/prompt_format.py +0 -112
- llama_stack/cli/model/safety_models.py +0 -52
- llama_stack/cli/stack/build.py +0 -299
- llama_stack/cli/stack/configure.py +0 -178
- llama_stack/distribution/build.py +0 -123
- llama_stack/distribution/build_conda_env.sh +0 -136
- llama_stack/distribution/build_container.sh +0 -142
- llama_stack/distribution/common.sh +0 -40
- llama_stack/distribution/configure_container.sh +0 -47
- llama_stack/distribution/datatypes.py +0 -139
- llama_stack/distribution/distribution.py +0 -58
- llama_stack/distribution/inspect.py +0 -67
- llama_stack/distribution/request_headers.py +0 -57
- llama_stack/distribution/resolver.py +0 -323
- llama_stack/distribution/routers/__init__.py +0 -48
- llama_stack/distribution/routers/routers.py +0 -158
- llama_stack/distribution/routers/routing_tables.py +0 -173
- llama_stack/distribution/server/endpoints.py +0 -48
- llama_stack/distribution/server/server.py +0 -343
- llama_stack/distribution/start_conda_env.sh +0 -42
- llama_stack/distribution/start_container.sh +0 -64
- llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml +0 -10
- llama_stack/distribution/templates/local-build.yaml +0 -10
- llama_stack/distribution/templates/local-databricks-build.yaml +0 -10
- llama_stack/distribution/templates/local-fireworks-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-endpoint-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-serverless-build.yaml +0 -10
- llama_stack/distribution/templates/local-ollama-build.yaml +0 -10
- llama_stack/distribution/templates/local-tgi-build.yaml +0 -10
- llama_stack/distribution/templates/local-together-build.yaml +0 -10
- llama_stack/distribution/templates/local-vllm-build.yaml +0 -10
- llama_stack/distribution/utils/exec.py +0 -105
- llama_stack/providers/adapters/agents/sample/sample.py +0 -18
- llama_stack/providers/adapters/inference/bedrock/bedrock.py +0 -451
- llama_stack/providers/adapters/inference/bedrock/config.py +0 -55
- llama_stack/providers/adapters/inference/databricks/config.py +0 -21
- llama_stack/providers/adapters/inference/databricks/databricks.py +0 -125
- llama_stack/providers/adapters/inference/fireworks/config.py +0 -20
- llama_stack/providers/adapters/inference/fireworks/fireworks.py +0 -130
- llama_stack/providers/adapters/inference/ollama/__init__.py +0 -19
- llama_stack/providers/adapters/inference/ollama/ollama.py +0 -175
- llama_stack/providers/adapters/inference/sample/sample.py +0 -23
- llama_stack/providers/adapters/inference/tgi/config.py +0 -43
- llama_stack/providers/adapters/inference/tgi/tgi.py +0 -200
- llama_stack/providers/adapters/inference/together/config.py +0 -22
- llama_stack/providers/adapters/inference/together/together.py +0 -143
- llama_stack/providers/adapters/memory/chroma/chroma.py +0 -157
- llama_stack/providers/adapters/memory/pgvector/config.py +0 -17
- llama_stack/providers/adapters/memory/pgvector/pgvector.py +0 -211
- llama_stack/providers/adapters/memory/sample/sample.py +0 -23
- llama_stack/providers/adapters/memory/weaviate/__init__.py +0 -15
- llama_stack/providers/adapters/memory/weaviate/weaviate.py +0 -190
- llama_stack/providers/adapters/safety/bedrock/bedrock.py +0 -113
- llama_stack/providers/adapters/safety/bedrock/config.py +0 -16
- llama_stack/providers/adapters/safety/sample/sample.py +0 -23
- llama_stack/providers/adapters/safety/together/__init__.py +0 -18
- llama_stack/providers/adapters/safety/together/config.py +0 -26
- llama_stack/providers/adapters/safety/together/together.py +0 -101
- llama_stack/providers/adapters/telemetry/opentelemetry/config.py +0 -12
- llama_stack/providers/adapters/telemetry/opentelemetry/opentelemetry.py +0 -201
- llama_stack/providers/adapters/telemetry/sample/__init__.py +0 -17
- llama_stack/providers/adapters/telemetry/sample/config.py +0 -12
- llama_stack/providers/adapters/telemetry/sample/sample.py +0 -18
- llama_stack/providers/impls/meta_reference/agents/agent_instance.py +0 -844
- llama_stack/providers/impls/meta_reference/agents/agents.py +0 -161
- llama_stack/providers/impls/meta_reference/agents/persistence.py +0 -84
- llama_stack/providers/impls/meta_reference/agents/rag/context_retriever.py +0 -74
- llama_stack/providers/impls/meta_reference/agents/safety.py +0 -57
- llama_stack/providers/impls/meta_reference/agents/tests/code_execution.py +0 -93
- llama_stack/providers/impls/meta_reference/agents/tests/test_chat_agent.py +0 -305
- llama_stack/providers/impls/meta_reference/agents/tools/base.py +0 -20
- llama_stack/providers/impls/meta_reference/agents/tools/builtin.py +0 -375
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_env_prefix.py +0 -133
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_execution.py +0 -256
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py +0 -87
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/utils.py +0 -21
- llama_stack/providers/impls/meta_reference/agents/tools/safety.py +0 -43
- llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py +0 -58
- llama_stack/providers/impls/meta_reference/inference/config.py +0 -45
- llama_stack/providers/impls/meta_reference/inference/generation.py +0 -376
- llama_stack/providers/impls/meta_reference/inference/inference.py +0 -280
- llama_stack/providers/impls/meta_reference/inference/model_parallel.py +0 -99
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_impls.py +0 -184
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_txest_disabled.py +0 -76
- llama_stack/providers/impls/meta_reference/inference/quantization/loader.py +0 -97
- llama_stack/providers/impls/meta_reference/inference/quantization/scripts/quantize_checkpoint.py +0 -161
- llama_stack/providers/impls/meta_reference/memory/__init__.py +0 -19
- llama_stack/providers/impls/meta_reference/memory/faiss.py +0 -113
- llama_stack/providers/impls/meta_reference/safety/__init__.py +0 -17
- llama_stack/providers/impls/meta_reference/safety/base.py +0 -57
- llama_stack/providers/impls/meta_reference/safety/config.py +0 -48
- llama_stack/providers/impls/meta_reference/safety/llama_guard.py +0 -268
- llama_stack/providers/impls/meta_reference/safety/prompt_guard.py +0 -145
- llama_stack/providers/impls/meta_reference/safety/safety.py +0 -112
- llama_stack/providers/impls/meta_reference/telemetry/console.py +0 -89
- llama_stack/providers/impls/vllm/config.py +0 -35
- llama_stack/providers/impls/vllm/vllm.py +0 -241
- llama_stack/providers/registry/memory.py +0 -78
- llama_stack/providers/registry/telemetry.py +0 -44
- llama_stack/providers/tests/agents/test_agents.py +0 -210
- llama_stack/providers/tests/inference/test_inference.py +0 -257
- llama_stack/providers/tests/inference/test_prompt_adapter.py +0 -126
- llama_stack/providers/tests/memory/test_memory.py +0 -136
- llama_stack/providers/tests/resolver.py +0 -100
- llama_stack/providers/tests/safety/test_safety.py +0 -77
- llama_stack-0.0.42.dist-info/METADATA +0 -137
- llama_stack-0.0.42.dist-info/RECORD +0 -256
- /llama_stack/{distribution → core}/__init__.py +0 -0
- /llama_stack/{distribution/server → core/access_control}/__init__.py +0 -0
- /llama_stack/{distribution/utils → core/conversations}/__init__.py +0 -0
- /llama_stack/{providers/adapters → core/prompts}/__init__.py +0 -0
- /llama_stack/{providers/adapters/agents → core/routing_tables}/__init__.py +0 -0
- /llama_stack/{providers/adapters/inference → core/server}/__init__.py +0 -0
- /llama_stack/{providers/adapters/memory → core/storage}/__init__.py +0 -0
- /llama_stack/{providers/adapters/safety → core/ui}/__init__.py +0 -0
- /llama_stack/{providers/adapters/telemetry → core/ui/modules}/__init__.py +0 -0
- /llama_stack/{providers/impls → core/ui/page}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference → core/ui/page/distribution}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/rag → core/ui/page/evaluations}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tests → core/ui/page/playground}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools → core/utils}/__init__.py +0 -0
- /llama_stack/{distribution → core}/utils/dynamic.py +0 -0
- /llama_stack/{distribution → core}/utils/serialize.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools/ipython_tool → distributions}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization → models}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization/scripts → models/llama}/__init__.py +0 -0
- /llama_stack/{providers/tests → models/llama/llama3}/__init__.py +0 -0
- /llama_stack/{providers/tests/agents → models/llama/llama3/quantization}/__init__.py +0 -0
- /llama_stack/{providers/tests/inference → models/llama/llama3_2}/__init__.py +0 -0
- /llama_stack/{providers/tests/memory → models/llama/llama3_3}/__init__.py +0 -0
- /llama_stack/{providers/tests/safety → models/llama/llama4}/__init__.py +0 -0
- /llama_stack/{scripts → models/llama/llama4/prompt_templates}/__init__.py +0 -0
- /llama_stack/providers/{adapters → remote}/safety/bedrock/__init__.py +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info/licenses}/LICENSE +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/top_level.txt +0 -0
|
@@ -5,49 +5,130 @@
|
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
7
|
import asyncio
|
|
8
|
-
import
|
|
9
|
-
import logging
|
|
8
|
+
import contextvars
|
|
9
|
+
import logging # allow-direct-logging
|
|
10
10
|
import queue
|
|
11
|
+
import secrets
|
|
12
|
+
import sys
|
|
11
13
|
import threading
|
|
12
|
-
import
|
|
13
|
-
from
|
|
14
|
+
import time
|
|
15
|
+
from collections.abc import Callable
|
|
16
|
+
from datetime import UTC, datetime
|
|
14
17
|
from functools import wraps
|
|
15
|
-
from typing import Any
|
|
18
|
+
from typing import Any
|
|
16
19
|
|
|
20
|
+
from llama_stack.apis.telemetry import (
|
|
21
|
+
Event,
|
|
22
|
+
LogSeverity,
|
|
23
|
+
Span,
|
|
24
|
+
SpanEndPayload,
|
|
25
|
+
SpanStartPayload,
|
|
26
|
+
SpanStatus,
|
|
27
|
+
StructuredLogEvent,
|
|
28
|
+
Telemetry,
|
|
29
|
+
UnstructuredLogEvent,
|
|
30
|
+
)
|
|
31
|
+
from llama_stack.log import get_logger
|
|
32
|
+
from llama_stack.providers.utils.telemetry.trace_protocol import serialize_value
|
|
17
33
|
|
|
18
|
-
|
|
34
|
+
logger = get_logger(__name__, category="core")
|
|
19
35
|
|
|
36
|
+
# Fallback logger that does NOT propagate to TelemetryHandler to avoid recursion
|
|
37
|
+
_fallback_logger = logging.getLogger("llama_stack.telemetry.background")
|
|
38
|
+
if not _fallback_logger.handlers:
|
|
39
|
+
_fallback_logger.propagate = False
|
|
40
|
+
_fallback_logger.setLevel(logging.ERROR)
|
|
41
|
+
_fallback_handler = logging.StreamHandler(sys.stderr)
|
|
42
|
+
_fallback_handler.setLevel(logging.ERROR)
|
|
43
|
+
_fallback_handler.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s"))
|
|
44
|
+
_fallback_logger.addHandler(_fallback_handler)
|
|
20
45
|
|
|
21
|
-
def generate_short_uuid(len: int = 12):
|
|
22
|
-
full_uuid = uuid.uuid4()
|
|
23
|
-
uuid_bytes = full_uuid.bytes
|
|
24
|
-
encoded = base64.urlsafe_b64encode(uuid_bytes)
|
|
25
|
-
return encoded.rstrip(b"=").decode("ascii")[:len]
|
|
26
46
|
|
|
47
|
+
INVALID_SPAN_ID = 0x0000000000000000
|
|
48
|
+
INVALID_TRACE_ID = 0x00000000000000000000000000000000
|
|
27
49
|
|
|
28
|
-
|
|
50
|
+
ROOT_SPAN_MARKERS = ["__root__", "__root_span__"]
|
|
51
|
+
# The logical root span may not be visible to this process if a parent context
|
|
52
|
+
# is passed in. The local root span is the first local span in a trace.
|
|
53
|
+
LOCAL_ROOT_SPAN_MARKER = "__local_root_span__"
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def trace_id_to_str(trace_id: int) -> str:
|
|
57
|
+
"""Convenience trace ID formatting method
|
|
58
|
+
Args:
|
|
59
|
+
trace_id: Trace ID int
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
The trace ID as 32-byte hexadecimal string
|
|
63
|
+
"""
|
|
64
|
+
return format(trace_id, "032x")
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def span_id_to_str(span_id: int) -> str:
|
|
68
|
+
"""Convenience span ID formatting method
|
|
69
|
+
Args:
|
|
70
|
+
span_id: Span ID int
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
The span ID as 16-byte hexadecimal string
|
|
74
|
+
"""
|
|
75
|
+
return format(span_id, "016x")
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def generate_span_id() -> str:
|
|
79
|
+
span_id = secrets.randbits(64)
|
|
80
|
+
while span_id == INVALID_SPAN_ID:
|
|
81
|
+
span_id = secrets.randbits(64)
|
|
82
|
+
return span_id_to_str(span_id)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def generate_trace_id() -> str:
|
|
86
|
+
trace_id = secrets.randbits(128)
|
|
87
|
+
while trace_id == INVALID_TRACE_ID:
|
|
88
|
+
trace_id = secrets.randbits(128)
|
|
89
|
+
return trace_id_to_str(trace_id)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
CURRENT_TRACE_CONTEXT = contextvars.ContextVar("trace_context", default=None)
|
|
29
93
|
BACKGROUND_LOGGER = None
|
|
30
94
|
|
|
95
|
+
LOG_QUEUE_FULL_LOG_INTERVAL_SECONDS = 60.0
|
|
96
|
+
|
|
31
97
|
|
|
32
98
|
class BackgroundLogger:
|
|
33
|
-
def __init__(self, api: Telemetry, capacity: int =
|
|
99
|
+
def __init__(self, api: Telemetry, capacity: int = 100000):
|
|
34
100
|
self.api = api
|
|
35
|
-
self.log_queue = queue.Queue(maxsize=capacity)
|
|
36
|
-
self.worker_thread = threading.Thread(target=self.
|
|
101
|
+
self.log_queue: queue.Queue[Any] = queue.Queue(maxsize=capacity)
|
|
102
|
+
self.worker_thread = threading.Thread(target=self._worker, daemon=True)
|
|
37
103
|
self.worker_thread.start()
|
|
104
|
+
self._last_queue_full_log_time: float = 0.0
|
|
105
|
+
self._dropped_since_last_notice: int = 0
|
|
38
106
|
|
|
39
107
|
def log_event(self, event):
|
|
40
108
|
try:
|
|
41
109
|
self.log_queue.put_nowait(event)
|
|
42
110
|
except queue.Full:
|
|
43
|
-
|
|
111
|
+
# Aggregate drops and emit at most once per interval via fallback logger
|
|
112
|
+
self._dropped_since_last_notice += 1
|
|
113
|
+
current_time = time.time()
|
|
114
|
+
if current_time - self._last_queue_full_log_time >= LOG_QUEUE_FULL_LOG_INTERVAL_SECONDS:
|
|
115
|
+
_fallback_logger.error(
|
|
116
|
+
"Log queue is full; dropped %d events since last notice",
|
|
117
|
+
self._dropped_since_last_notice,
|
|
118
|
+
)
|
|
119
|
+
self._last_queue_full_log_time = current_time
|
|
120
|
+
self._dropped_since_last_notice = 0
|
|
121
|
+
|
|
122
|
+
def _worker(self):
|
|
123
|
+
loop = asyncio.new_event_loop()
|
|
124
|
+
asyncio.set_event_loop(loop)
|
|
125
|
+
loop.run_until_complete(self._process_logs())
|
|
44
126
|
|
|
45
|
-
def _process_logs(self):
|
|
127
|
+
async def _process_logs(self):
|
|
46
128
|
while True:
|
|
47
129
|
try:
|
|
48
130
|
event = self.log_queue.get()
|
|
49
|
-
|
|
50
|
-
asyncio.run(self.api.log_event(event))
|
|
131
|
+
await self.api.log_event(event)
|
|
51
132
|
except Exception:
|
|
52
133
|
import traceback
|
|
53
134
|
|
|
@@ -60,20 +141,33 @@ class BackgroundLogger:
|
|
|
60
141
|
self.log_queue.join()
|
|
61
142
|
|
|
62
143
|
|
|
144
|
+
def enqueue_event(event: Event) -> None:
|
|
145
|
+
"""Enqueue a telemetry event to the background logger if available.
|
|
146
|
+
|
|
147
|
+
This provides a non-blocking path for routers and other hot paths to
|
|
148
|
+
submit telemetry without awaiting the Telemetry API, reducing contention
|
|
149
|
+
with the main event loop.
|
|
150
|
+
"""
|
|
151
|
+
global BACKGROUND_LOGGER
|
|
152
|
+
if BACKGROUND_LOGGER is None:
|
|
153
|
+
raise RuntimeError("Telemetry API not initialized")
|
|
154
|
+
BACKGROUND_LOGGER.log_event(event)
|
|
155
|
+
|
|
156
|
+
|
|
63
157
|
class TraceContext:
|
|
64
|
-
spans:
|
|
158
|
+
spans: list[Span] = []
|
|
65
159
|
|
|
66
160
|
def __init__(self, logger: BackgroundLogger, trace_id: str):
|
|
67
161
|
self.logger = logger
|
|
68
162
|
self.trace_id = trace_id
|
|
69
163
|
|
|
70
|
-
def push_span(self, name: str, attributes:
|
|
164
|
+
def push_span(self, name: str, attributes: dict[str, Any] = None) -> Span:
|
|
71
165
|
current_span = self.get_current_span()
|
|
72
166
|
span = Span(
|
|
73
|
-
span_id=
|
|
167
|
+
span_id=generate_span_id(),
|
|
74
168
|
trace_id=self.trace_id,
|
|
75
169
|
name=name,
|
|
76
|
-
start_time=datetime.now(),
|
|
170
|
+
start_time=datetime.now(UTC),
|
|
77
171
|
parent_span_id=current_span.span_id if current_span else None,
|
|
78
172
|
attributes=attributes,
|
|
79
173
|
)
|
|
@@ -92,6 +186,7 @@ class TraceContext:
|
|
|
92
186
|
)
|
|
93
187
|
|
|
94
188
|
self.spans.append(span)
|
|
189
|
+
return span
|
|
95
190
|
|
|
96
191
|
def pop_span(self, status: SpanStatus = SpanStatus.OK):
|
|
97
192
|
span = self.spans.pop()
|
|
@@ -115,35 +210,45 @@ class TraceContext:
|
|
|
115
210
|
def setup_logger(api: Telemetry, level: int = logging.INFO):
|
|
116
211
|
global BACKGROUND_LOGGER
|
|
117
212
|
|
|
118
|
-
BACKGROUND_LOGGER
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
213
|
+
if BACKGROUND_LOGGER is None:
|
|
214
|
+
BACKGROUND_LOGGER = BackgroundLogger(api)
|
|
215
|
+
root_logger = logging.getLogger()
|
|
216
|
+
root_logger.setLevel(level)
|
|
217
|
+
root_logger.addHandler(TelemetryHandler())
|
|
122
218
|
|
|
123
219
|
|
|
124
|
-
async def start_trace(name: str, attributes:
|
|
220
|
+
async def start_trace(name: str, attributes: dict[str, Any] = None) -> TraceContext:
|
|
125
221
|
global CURRENT_TRACE_CONTEXT, BACKGROUND_LOGGER
|
|
126
222
|
|
|
127
223
|
if BACKGROUND_LOGGER is None:
|
|
128
|
-
|
|
224
|
+
logger.debug("No Telemetry implementation set. Skipping trace initialization...")
|
|
129
225
|
return
|
|
130
226
|
|
|
131
|
-
trace_id =
|
|
227
|
+
trace_id = generate_trace_id()
|
|
132
228
|
context = TraceContext(BACKGROUND_LOGGER, trace_id)
|
|
133
|
-
|
|
229
|
+
# Mark this span as the root for the trace for now. The processing of
|
|
230
|
+
# traceparent context if supplied comes later and will result in the
|
|
231
|
+
# ROOT_SPAN_MARKERS being removed. Also mark this is the 'local' root,
|
|
232
|
+
# i.e. the root of the spans originating in this process as this is
|
|
233
|
+
# needed to ensure that we insert this 'local' root span's id into
|
|
234
|
+
# the trace record in sqlite store.
|
|
235
|
+
attributes = dict.fromkeys(ROOT_SPAN_MARKERS, True) | {LOCAL_ROOT_SPAN_MARKER: True} | (attributes or {})
|
|
236
|
+
context.push_span(name, attributes)
|
|
134
237
|
|
|
135
|
-
CURRENT_TRACE_CONTEXT
|
|
238
|
+
CURRENT_TRACE_CONTEXT.set(context)
|
|
239
|
+
return context
|
|
136
240
|
|
|
137
241
|
|
|
138
242
|
async def end_trace(status: SpanStatus = SpanStatus.OK):
|
|
139
243
|
global CURRENT_TRACE_CONTEXT
|
|
140
244
|
|
|
141
|
-
context = CURRENT_TRACE_CONTEXT
|
|
245
|
+
context = CURRENT_TRACE_CONTEXT.get()
|
|
142
246
|
if context is None:
|
|
247
|
+
logger.debug("No trace context to end")
|
|
143
248
|
return
|
|
144
249
|
|
|
145
250
|
context.pop_span(status)
|
|
146
|
-
CURRENT_TRACE_CONTEXT
|
|
251
|
+
CURRENT_TRACE_CONTEXT.set(None)
|
|
147
252
|
|
|
148
253
|
|
|
149
254
|
def severity(levelname: str) -> LogSeverity:
|
|
@@ -152,7 +257,7 @@ def severity(levelname: str) -> LogSeverity:
|
|
|
152
257
|
elif levelname == "INFO":
|
|
153
258
|
return LogSeverity.INFO
|
|
154
259
|
elif levelname == "WARNING":
|
|
155
|
-
return LogSeverity.
|
|
260
|
+
return LogSeverity.WARN
|
|
156
261
|
elif levelname == "ERROR":
|
|
157
262
|
return LogSeverity.ERROR
|
|
158
263
|
elif levelname == "CRITICAL":
|
|
@@ -169,12 +274,8 @@ class TelemetryHandler(logging.Handler):
|
|
|
169
274
|
if record.module in ("asyncio", "selector_events"):
|
|
170
275
|
return
|
|
171
276
|
|
|
172
|
-
global CURRENT_TRACE_CONTEXT
|
|
173
|
-
|
|
174
|
-
if BACKGROUND_LOGGER is None:
|
|
175
|
-
raise RuntimeError("Telemetry API not initialized")
|
|
176
|
-
|
|
177
|
-
context = CURRENT_TRACE_CONTEXT
|
|
277
|
+
global CURRENT_TRACE_CONTEXT
|
|
278
|
+
context = CURRENT_TRACE_CONTEXT.get()
|
|
178
279
|
if context is None:
|
|
179
280
|
return
|
|
180
281
|
|
|
@@ -182,11 +283,11 @@ class TelemetryHandler(logging.Handler):
|
|
|
182
283
|
if span is None:
|
|
183
284
|
return
|
|
184
285
|
|
|
185
|
-
|
|
286
|
+
enqueue_event(
|
|
186
287
|
UnstructuredLogEvent(
|
|
187
288
|
trace_id=span.trace_id,
|
|
188
289
|
span_id=span.span_id,
|
|
189
|
-
timestamp=datetime.now(),
|
|
290
|
+
timestamp=datetime.now(UTC),
|
|
190
291
|
message=self.format(record),
|
|
191
292
|
severity=severity(record.levelname),
|
|
192
293
|
)
|
|
@@ -197,28 +298,54 @@ class TelemetryHandler(logging.Handler):
|
|
|
197
298
|
|
|
198
299
|
|
|
199
300
|
class SpanContextManager:
|
|
200
|
-
def __init__(self, name: str, attributes:
|
|
301
|
+
def __init__(self, name: str, attributes: dict[str, Any] = None):
|
|
201
302
|
self.name = name
|
|
202
303
|
self.attributes = attributes
|
|
304
|
+
self.span = None
|
|
203
305
|
|
|
204
306
|
def __enter__(self):
|
|
205
307
|
global CURRENT_TRACE_CONTEXT
|
|
206
|
-
context = CURRENT_TRACE_CONTEXT
|
|
207
|
-
if context:
|
|
208
|
-
|
|
308
|
+
context = CURRENT_TRACE_CONTEXT.get()
|
|
309
|
+
if not context:
|
|
310
|
+
logger.debug("No trace context to push span")
|
|
311
|
+
return self
|
|
312
|
+
|
|
313
|
+
self.span = context.push_span(self.name, self.attributes)
|
|
209
314
|
return self
|
|
210
315
|
|
|
211
316
|
def __exit__(self, exc_type, exc_value, traceback):
|
|
212
317
|
global CURRENT_TRACE_CONTEXT
|
|
213
|
-
context = CURRENT_TRACE_CONTEXT
|
|
214
|
-
if context:
|
|
215
|
-
|
|
318
|
+
context = CURRENT_TRACE_CONTEXT.get()
|
|
319
|
+
if not context:
|
|
320
|
+
logger.debug("No trace context to pop span")
|
|
321
|
+
return
|
|
322
|
+
|
|
323
|
+
context.pop_span()
|
|
324
|
+
|
|
325
|
+
def set_attribute(self, key: str, value: Any):
|
|
326
|
+
if self.span:
|
|
327
|
+
if self.span.attributes is None:
|
|
328
|
+
self.span.attributes = {}
|
|
329
|
+
self.span.attributes[key] = serialize_value(value)
|
|
216
330
|
|
|
217
331
|
async def __aenter__(self):
|
|
218
|
-
|
|
332
|
+
global CURRENT_TRACE_CONTEXT
|
|
333
|
+
context = CURRENT_TRACE_CONTEXT.get()
|
|
334
|
+
if not context:
|
|
335
|
+
logger.debug("No trace context to push span")
|
|
336
|
+
return self
|
|
337
|
+
|
|
338
|
+
self.span = context.push_span(self.name, self.attributes)
|
|
339
|
+
return self
|
|
219
340
|
|
|
220
341
|
async def __aexit__(self, exc_type, exc_value, traceback):
|
|
221
|
-
|
|
342
|
+
global CURRENT_TRACE_CONTEXT
|
|
343
|
+
context = CURRENT_TRACE_CONTEXT.get()
|
|
344
|
+
if not context:
|
|
345
|
+
logger.debug("No trace context to pop span")
|
|
346
|
+
return
|
|
347
|
+
|
|
348
|
+
context.pop_span()
|
|
222
349
|
|
|
223
350
|
def __call__(self, func: Callable):
|
|
224
351
|
@wraps(func)
|
|
@@ -241,5 +368,17 @@ class SpanContextManager:
|
|
|
241
368
|
return wrapper
|
|
242
369
|
|
|
243
370
|
|
|
244
|
-
def span(name: str, attributes:
|
|
371
|
+
def span(name: str, attributes: dict[str, Any] = None):
|
|
245
372
|
return SpanContextManager(name, attributes)
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def get_current_span() -> Span | None:
|
|
376
|
+
global CURRENT_TRACE_CONTEXT
|
|
377
|
+
if CURRENT_TRACE_CONTEXT is None:
|
|
378
|
+
logger.debug("No trace context to get current span")
|
|
379
|
+
return None
|
|
380
|
+
|
|
381
|
+
context = CURRENT_TRACE_CONTEXT.get()
|
|
382
|
+
if context:
|
|
383
|
+
return context.get_current_span()
|
|
384
|
+
return None
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from collections.abc import AsyncGenerator
|
|
8
|
+
from contextlib import asynccontextmanager
|
|
9
|
+
from enum import Enum
|
|
10
|
+
from typing import Any, cast
|
|
11
|
+
|
|
12
|
+
import httpx
|
|
13
|
+
from mcp import ClientSession, McpError
|
|
14
|
+
from mcp import types as mcp_types
|
|
15
|
+
from mcp.client.sse import sse_client
|
|
16
|
+
from mcp.client.streamable_http import streamablehttp_client
|
|
17
|
+
|
|
18
|
+
from llama_stack.apis.common.content_types import ImageContentItem, InterleavedContentItem, TextContentItem
|
|
19
|
+
from llama_stack.apis.tools import (
|
|
20
|
+
ListToolDefsResponse,
|
|
21
|
+
ToolDef,
|
|
22
|
+
ToolInvocationResult,
|
|
23
|
+
)
|
|
24
|
+
from llama_stack.core.datatypes import AuthenticationRequiredError
|
|
25
|
+
from llama_stack.log import get_logger
|
|
26
|
+
from llama_stack.providers.utils.tools.ttl_dict import TTLDict
|
|
27
|
+
|
|
28
|
+
logger = get_logger(__name__, category="tools")
|
|
29
|
+
|
|
30
|
+
protocol_cache = TTLDict(ttl_seconds=3600)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class MCPProtol(Enum):
|
|
34
|
+
UNKNOWN = 0
|
|
35
|
+
STREAMABLE_HTTP = 1
|
|
36
|
+
SSE = 2
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@asynccontextmanager
|
|
40
|
+
async def client_wrapper(endpoint: str, headers: dict[str, str]) -> AsyncGenerator[ClientSession, Any]:
|
|
41
|
+
# we use a ttl'd dict to cache the happy path protocol for each endpoint
|
|
42
|
+
# but, we always fall back to trying the other protocol if we cannot initialize the session
|
|
43
|
+
connection_strategies = [MCPProtol.STREAMABLE_HTTP, MCPProtol.SSE]
|
|
44
|
+
mcp_protocol = protocol_cache.get(endpoint, default=MCPProtol.UNKNOWN)
|
|
45
|
+
if mcp_protocol == MCPProtol.SSE:
|
|
46
|
+
connection_strategies = [MCPProtol.SSE, MCPProtol.STREAMABLE_HTTP]
|
|
47
|
+
|
|
48
|
+
for i, strategy in enumerate(connection_strategies):
|
|
49
|
+
try:
|
|
50
|
+
client = streamablehttp_client
|
|
51
|
+
if strategy == MCPProtol.SSE:
|
|
52
|
+
client = sse_client
|
|
53
|
+
async with client(endpoint, headers=headers) as client_streams:
|
|
54
|
+
async with ClientSession(read_stream=client_streams[0], write_stream=client_streams[1]) as session:
|
|
55
|
+
await session.initialize()
|
|
56
|
+
protocol_cache[endpoint] = strategy
|
|
57
|
+
yield session
|
|
58
|
+
return
|
|
59
|
+
except* httpx.HTTPStatusError as eg:
|
|
60
|
+
for exc in eg.exceptions:
|
|
61
|
+
# mypy does not currently narrow the type of `eg.exceptions` based on the `except*` filter,
|
|
62
|
+
# so we explicitly cast each item to httpx.HTTPStatusError. This is safe because
|
|
63
|
+
# `except* httpx.HTTPStatusError` guarantees all exceptions in `eg.exceptions` are of that type.
|
|
64
|
+
err = cast(httpx.HTTPStatusError, exc)
|
|
65
|
+
if err.response.status_code == 401:
|
|
66
|
+
raise AuthenticationRequiredError(exc) from exc
|
|
67
|
+
if i == len(connection_strategies) - 1:
|
|
68
|
+
raise
|
|
69
|
+
except* httpx.ConnectError as eg:
|
|
70
|
+
# Connection refused, server down, network unreachable
|
|
71
|
+
if i == len(connection_strategies) - 1:
|
|
72
|
+
error_msg = f"Failed to connect to MCP server at {endpoint}: Connection refused"
|
|
73
|
+
logger.error(f"MCP connection error: {error_msg}")
|
|
74
|
+
raise ConnectionError(error_msg) from eg
|
|
75
|
+
else:
|
|
76
|
+
logger.warning(
|
|
77
|
+
f"failed to connect to MCP server at {endpoint} via {strategy.name}, falling back to {connection_strategies[i + 1].name}"
|
|
78
|
+
)
|
|
79
|
+
except* httpx.TimeoutException as eg:
|
|
80
|
+
# Request timeout, server too slow
|
|
81
|
+
if i == len(connection_strategies) - 1:
|
|
82
|
+
error_msg = f"MCP server at {endpoint} timed out"
|
|
83
|
+
logger.error(f"MCP timeout error: {error_msg}")
|
|
84
|
+
raise TimeoutError(error_msg) from eg
|
|
85
|
+
else:
|
|
86
|
+
logger.warning(
|
|
87
|
+
f"MCP server at {endpoint} timed out via {strategy.name}, falling back to {connection_strategies[i + 1].name}"
|
|
88
|
+
)
|
|
89
|
+
except* httpx.RequestError as eg:
|
|
90
|
+
# DNS resolution failures, network errors, invalid URLs
|
|
91
|
+
if i == len(connection_strategies) - 1:
|
|
92
|
+
# Get the first exception's message for the error string
|
|
93
|
+
exc_msg = str(eg.exceptions[0]) if eg.exceptions else "Unknown error"
|
|
94
|
+
error_msg = f"Network error connecting to MCP server at {endpoint}: {exc_msg}"
|
|
95
|
+
logger.error(f"MCP network error: {error_msg}")
|
|
96
|
+
raise ConnectionError(error_msg) from eg
|
|
97
|
+
else:
|
|
98
|
+
logger.warning(
|
|
99
|
+
f"network error connecting to MCP server at {endpoint} via {strategy.name}, falling back to {connection_strategies[i + 1].name}"
|
|
100
|
+
)
|
|
101
|
+
except* McpError:
|
|
102
|
+
if i < len(connection_strategies) - 1:
|
|
103
|
+
logger.warning(
|
|
104
|
+
f"failed to connect via {strategy.name}, falling back to {connection_strategies[i + 1].name}"
|
|
105
|
+
)
|
|
106
|
+
else:
|
|
107
|
+
raise
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
async def list_mcp_tools(endpoint: str, headers: dict[str, str]) -> ListToolDefsResponse:
|
|
111
|
+
tools = []
|
|
112
|
+
async with client_wrapper(endpoint, headers) as session:
|
|
113
|
+
tools_result = await session.list_tools()
|
|
114
|
+
for tool in tools_result.tools:
|
|
115
|
+
tools.append(
|
|
116
|
+
ToolDef(
|
|
117
|
+
name=tool.name,
|
|
118
|
+
description=tool.description,
|
|
119
|
+
input_schema=tool.inputSchema,
|
|
120
|
+
output_schema=getattr(tool, "outputSchema", None),
|
|
121
|
+
metadata={
|
|
122
|
+
"endpoint": endpoint,
|
|
123
|
+
},
|
|
124
|
+
)
|
|
125
|
+
)
|
|
126
|
+
return ListToolDefsResponse(data=tools)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
async def invoke_mcp_tool(
|
|
130
|
+
endpoint: str, headers: dict[str, str], tool_name: str, kwargs: dict[str, Any]
|
|
131
|
+
) -> ToolInvocationResult:
|
|
132
|
+
async with client_wrapper(endpoint, headers) as session:
|
|
133
|
+
result = await session.call_tool(tool_name, kwargs)
|
|
134
|
+
|
|
135
|
+
content: list[InterleavedContentItem] = []
|
|
136
|
+
for item in result.content:
|
|
137
|
+
if isinstance(item, mcp_types.TextContent):
|
|
138
|
+
content.append(TextContentItem(text=item.text))
|
|
139
|
+
elif isinstance(item, mcp_types.ImageContent):
|
|
140
|
+
content.append(ImageContentItem(image=item.data))
|
|
141
|
+
elif isinstance(item, mcp_types.EmbeddedResource):
|
|
142
|
+
logger.warning(f"EmbeddedResource is not supported: {item}")
|
|
143
|
+
else:
|
|
144
|
+
raise ValueError(f"Unknown content type: {type(item)}")
|
|
145
|
+
return ToolInvocationResult(
|
|
146
|
+
content=content,
|
|
147
|
+
error_code=1 if result.isError else 0,
|
|
148
|
+
)
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
import time
|
|
8
|
+
from threading import RLock
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class TTLDict(dict):
|
|
13
|
+
"""
|
|
14
|
+
A dictionary with a ttl for each item
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, ttl_seconds: float, *args, **kwargs):
|
|
18
|
+
super().__init__(*args, **kwargs)
|
|
19
|
+
self.ttl_seconds = ttl_seconds
|
|
20
|
+
self._expires: dict[Any, Any] = {} # expires holds when an item will expire
|
|
21
|
+
self._lock = RLock()
|
|
22
|
+
|
|
23
|
+
if args or kwargs:
|
|
24
|
+
for k, v in self.items():
|
|
25
|
+
self.__setitem__(k, v)
|
|
26
|
+
|
|
27
|
+
def __delitem__(self, key):
|
|
28
|
+
with self._lock:
|
|
29
|
+
del self._expires[key]
|
|
30
|
+
super().__delitem__(key)
|
|
31
|
+
|
|
32
|
+
def __setitem__(self, key, value):
|
|
33
|
+
with self._lock:
|
|
34
|
+
self._expires[key] = time.monotonic() + self.ttl_seconds
|
|
35
|
+
super().__setitem__(key, value)
|
|
36
|
+
|
|
37
|
+
def _is_expired(self, key):
|
|
38
|
+
if key not in self._expires:
|
|
39
|
+
return False
|
|
40
|
+
return time.monotonic() > self._expires[key]
|
|
41
|
+
|
|
42
|
+
def __getitem__(self, key):
|
|
43
|
+
with self._lock:
|
|
44
|
+
if self._is_expired(key):
|
|
45
|
+
del self._expires[key]
|
|
46
|
+
super().__delitem__(key)
|
|
47
|
+
raise KeyError(f"{key} has expired and was removed")
|
|
48
|
+
|
|
49
|
+
return super().__getitem__(key)
|
|
50
|
+
|
|
51
|
+
def get(self, key, default=None):
|
|
52
|
+
try:
|
|
53
|
+
return self[key]
|
|
54
|
+
except KeyError:
|
|
55
|
+
return default
|
|
56
|
+
|
|
57
|
+
def __contains__(self, key):
|
|
58
|
+
try:
|
|
59
|
+
_ = self[key]
|
|
60
|
+
return True
|
|
61
|
+
except KeyError:
|
|
62
|
+
return False
|
|
63
|
+
|
|
64
|
+
def __repr__(self):
|
|
65
|
+
with self._lock:
|
|
66
|
+
for key in self.keys():
|
|
67
|
+
if self._is_expired(key):
|
|
68
|
+
del self._expires[key]
|
|
69
|
+
super().__delitem__(key)
|
|
70
|
+
return f"TTLDict({self.ttl_seconds}, {super().__repr__()})"
|