llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +5 -0
- llama_stack/apis/agents/__init__.py +1 -1
- llama_stack/apis/agents/agents.py +700 -281
- llama_stack/apis/agents/openai_responses.py +1311 -0
- llama_stack/{providers/adapters/memory/sample/config.py → apis/batches/__init__.py} +2 -5
- llama_stack/apis/batches/batches.py +100 -0
- llama_stack/apis/benchmarks/__init__.py +7 -0
- llama_stack/apis/benchmarks/benchmarks.py +108 -0
- llama_stack/apis/common/content_types.py +143 -0
- llama_stack/apis/common/errors.py +103 -0
- llama_stack/apis/common/job_types.py +38 -0
- llama_stack/apis/common/responses.py +36 -0
- llama_stack/apis/common/training_types.py +36 -5
- llama_stack/apis/common/type_system.py +158 -0
- llama_stack/apis/conversations/__init__.py +31 -0
- llama_stack/apis/conversations/conversations.py +286 -0
- llama_stack/apis/datasetio/__init__.py +7 -0
- llama_stack/apis/datasetio/datasetio.py +59 -0
- llama_stack/apis/datasets/__init__.py +7 -0
- llama_stack/apis/datasets/datasets.py +251 -0
- llama_stack/apis/datatypes.py +160 -0
- llama_stack/apis/eval/__init__.py +7 -0
- llama_stack/apis/eval/eval.py +169 -0
- llama_stack/apis/files/__init__.py +7 -0
- llama_stack/apis/files/files.py +199 -0
- llama_stack/apis/inference/__init__.py +1 -1
- llama_stack/apis/inference/inference.py +1169 -113
- llama_stack/apis/inspect/__init__.py +1 -1
- llama_stack/apis/inspect/inspect.py +69 -16
- llama_stack/apis/models/__init__.py +1 -1
- llama_stack/apis/models/models.py +148 -21
- llama_stack/apis/post_training/__init__.py +1 -1
- llama_stack/apis/post_training/post_training.py +265 -120
- llama_stack/{providers/adapters/agents/sample/config.py → apis/prompts/__init__.py} +2 -5
- llama_stack/apis/prompts/prompts.py +204 -0
- llama_stack/apis/providers/__init__.py +7 -0
- llama_stack/apis/providers/providers.py +69 -0
- llama_stack/apis/resource.py +37 -0
- llama_stack/apis/safety/__init__.py +1 -1
- llama_stack/apis/safety/safety.py +95 -12
- llama_stack/apis/scoring/__init__.py +7 -0
- llama_stack/apis/scoring/scoring.py +93 -0
- llama_stack/apis/scoring_functions/__init__.py +7 -0
- llama_stack/apis/scoring_functions/scoring_functions.py +208 -0
- llama_stack/apis/shields/__init__.py +1 -1
- llama_stack/apis/shields/shields.py +76 -33
- llama_stack/apis/synthetic_data_generation/__init__.py +1 -1
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +40 -17
- llama_stack/apis/telemetry/__init__.py +1 -1
- llama_stack/apis/telemetry/telemetry.py +322 -31
- llama_stack/apis/{dataset → tools}/__init__.py +2 -1
- llama_stack/apis/tools/rag_tool.py +218 -0
- llama_stack/apis/tools/tools.py +221 -0
- llama_stack/apis/vector_io/__init__.py +7 -0
- llama_stack/apis/vector_io/vector_io.py +960 -0
- llama_stack/apis/vector_stores/__init__.py +7 -0
- llama_stack/apis/vector_stores/vector_stores.py +51 -0
- llama_stack/apis/version.py +9 -0
- llama_stack/cli/llama.py +13 -5
- llama_stack/cli/stack/_list_deps.py +182 -0
- llama_stack/cli/stack/list_apis.py +1 -1
- llama_stack/cli/stack/list_deps.py +55 -0
- llama_stack/cli/stack/list_providers.py +24 -10
- llama_stack/cli/stack/list_stacks.py +56 -0
- llama_stack/cli/stack/remove.py +115 -0
- llama_stack/cli/stack/run.py +169 -56
- llama_stack/cli/stack/stack.py +18 -4
- llama_stack/cli/stack/utils.py +151 -0
- llama_stack/cli/table.py +23 -61
- llama_stack/cli/utils.py +29 -0
- llama_stack/core/access_control/access_control.py +131 -0
- llama_stack/core/access_control/conditions.py +129 -0
- llama_stack/core/access_control/datatypes.py +107 -0
- llama_stack/core/build.py +164 -0
- llama_stack/core/client.py +205 -0
- llama_stack/core/common.sh +37 -0
- llama_stack/{distribution → core}/configure.py +74 -55
- llama_stack/core/conversations/conversations.py +309 -0
- llama_stack/core/datatypes.py +625 -0
- llama_stack/core/distribution.py +276 -0
- llama_stack/core/external.py +54 -0
- llama_stack/core/id_generation.py +42 -0
- llama_stack/core/inspect.py +86 -0
- llama_stack/core/library_client.py +539 -0
- llama_stack/core/prompts/prompts.py +234 -0
- llama_stack/core/providers.py +137 -0
- llama_stack/core/request_headers.py +115 -0
- llama_stack/core/resolver.py +506 -0
- llama_stack/core/routers/__init__.py +101 -0
- llama_stack/core/routers/datasets.py +73 -0
- llama_stack/core/routers/eval_scoring.py +155 -0
- llama_stack/core/routers/inference.py +645 -0
- llama_stack/core/routers/safety.py +85 -0
- llama_stack/core/routers/tool_runtime.py +91 -0
- llama_stack/core/routers/vector_io.py +442 -0
- llama_stack/core/routing_tables/benchmarks.py +62 -0
- llama_stack/core/routing_tables/common.py +254 -0
- llama_stack/core/routing_tables/datasets.py +91 -0
- llama_stack/core/routing_tables/models.py +163 -0
- llama_stack/core/routing_tables/scoring_functions.py +66 -0
- llama_stack/core/routing_tables/shields.py +61 -0
- llama_stack/core/routing_tables/toolgroups.py +129 -0
- llama_stack/core/routing_tables/vector_stores.py +292 -0
- llama_stack/core/server/auth.py +187 -0
- llama_stack/core/server/auth_providers.py +494 -0
- llama_stack/core/server/quota.py +110 -0
- llama_stack/core/server/routes.py +141 -0
- llama_stack/core/server/server.py +542 -0
- llama_stack/core/server/tracing.py +80 -0
- llama_stack/core/stack.py +546 -0
- llama_stack/core/start_stack.sh +117 -0
- llama_stack/core/storage/datatypes.py +283 -0
- llama_stack/{cli/model → core/store}/__init__.py +1 -1
- llama_stack/core/store/registry.py +199 -0
- llama_stack/core/testing_context.py +49 -0
- llama_stack/core/ui/app.py +55 -0
- llama_stack/core/ui/modules/api.py +32 -0
- llama_stack/core/ui/modules/utils.py +42 -0
- llama_stack/core/ui/page/distribution/datasets.py +18 -0
- llama_stack/core/ui/page/distribution/eval_tasks.py +20 -0
- llama_stack/core/ui/page/distribution/models.py +18 -0
- llama_stack/core/ui/page/distribution/providers.py +27 -0
- llama_stack/core/ui/page/distribution/resources.py +48 -0
- llama_stack/core/ui/page/distribution/scoring_functions.py +18 -0
- llama_stack/core/ui/page/distribution/shields.py +19 -0
- llama_stack/core/ui/page/evaluations/app_eval.py +143 -0
- llama_stack/core/ui/page/evaluations/native_eval.py +253 -0
- llama_stack/core/ui/page/playground/chat.py +130 -0
- llama_stack/core/ui/page/playground/tools.py +352 -0
- llama_stack/core/utils/config.py +30 -0
- llama_stack/{distribution → core}/utils/config_dirs.py +3 -6
- llama_stack/core/utils/config_resolution.py +125 -0
- llama_stack/core/utils/context.py +84 -0
- llama_stack/core/utils/exec.py +96 -0
- llama_stack/{providers/impls/meta_reference/codeshield/config.py → core/utils/image_types.py} +4 -3
- llama_stack/{distribution → core}/utils/model_utils.py +2 -2
- llama_stack/{distribution → core}/utils/prompt_for_config.py +30 -63
- llama_stack/{apis/batch_inference → distributions/dell}/__init__.py +1 -1
- llama_stack/distributions/dell/build.yaml +33 -0
- llama_stack/distributions/dell/dell.py +158 -0
- llama_stack/distributions/dell/run-with-safety.yaml +141 -0
- llama_stack/distributions/dell/run.yaml +132 -0
- llama_stack/distributions/meta-reference-gpu/__init__.py +7 -0
- llama_stack/distributions/meta-reference-gpu/build.yaml +32 -0
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +163 -0
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +154 -0
- llama_stack/distributions/meta-reference-gpu/run.yaml +139 -0
- llama_stack/{apis/evals → distributions/nvidia}/__init__.py +1 -1
- llama_stack/distributions/nvidia/build.yaml +29 -0
- llama_stack/distributions/nvidia/nvidia.py +154 -0
- llama_stack/distributions/nvidia/run-with-safety.yaml +137 -0
- llama_stack/distributions/nvidia/run.yaml +116 -0
- llama_stack/distributions/open-benchmark/__init__.py +7 -0
- llama_stack/distributions/open-benchmark/build.yaml +36 -0
- llama_stack/distributions/open-benchmark/open_benchmark.py +303 -0
- llama_stack/distributions/open-benchmark/run.yaml +252 -0
- llama_stack/distributions/postgres-demo/__init__.py +7 -0
- llama_stack/distributions/postgres-demo/build.yaml +23 -0
- llama_stack/distributions/postgres-demo/postgres_demo.py +125 -0
- llama_stack/distributions/postgres-demo/run.yaml +115 -0
- llama_stack/{apis/memory → distributions/starter}/__init__.py +1 -1
- llama_stack/distributions/starter/build.yaml +61 -0
- llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
- llama_stack/distributions/starter/run.yaml +276 -0
- llama_stack/distributions/starter/starter.py +345 -0
- llama_stack/distributions/starter-gpu/__init__.py +7 -0
- llama_stack/distributions/starter-gpu/build.yaml +61 -0
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
- llama_stack/distributions/starter-gpu/run.yaml +279 -0
- llama_stack/distributions/starter-gpu/starter_gpu.py +20 -0
- llama_stack/distributions/template.py +456 -0
- llama_stack/distributions/watsonx/__init__.py +7 -0
- llama_stack/distributions/watsonx/build.yaml +33 -0
- llama_stack/distributions/watsonx/run.yaml +133 -0
- llama_stack/distributions/watsonx/watsonx.py +95 -0
- llama_stack/env.py +24 -0
- llama_stack/log.py +314 -0
- llama_stack/models/llama/checkpoint.py +164 -0
- llama_stack/models/llama/datatypes.py +164 -0
- llama_stack/models/llama/hadamard_utils.py +86 -0
- llama_stack/models/llama/llama3/args.py +74 -0
- llama_stack/models/llama/llama3/chat_format.py +286 -0
- llama_stack/models/llama/llama3/generation.py +376 -0
- llama_stack/models/llama/llama3/interface.py +255 -0
- llama_stack/models/llama/llama3/model.py +304 -0
- llama_stack/models/llama/llama3/multimodal/__init__.py +12 -0
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +180 -0
- llama_stack/models/llama/llama3/multimodal/image_transform.py +409 -0
- llama_stack/models/llama/llama3/multimodal/model.py +1430 -0
- llama_stack/models/llama/llama3/multimodal/utils.py +26 -0
- llama_stack/models/llama/llama3/prompt_templates/__init__.py +22 -0
- llama_stack/models/llama/llama3/prompt_templates/base.py +39 -0
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +319 -0
- llama_stack/models/llama/llama3/prompt_templates/tool_response.py +62 -0
- llama_stack/models/llama/llama3/quantization/loader.py +316 -0
- llama_stack/models/llama/llama3/template_data.py +116 -0
- llama_stack/models/llama/llama3/tokenizer.model +128000 -0
- llama_stack/models/llama/llama3/tokenizer.py +198 -0
- llama_stack/models/llama/llama3/tool_utils.py +266 -0
- llama_stack/models/llama/llama3_1/__init__.py +12 -0
- llama_stack/models/llama/llama3_1/prompt_format.md +358 -0
- llama_stack/models/llama/llama3_1/prompts.py +258 -0
- llama_stack/models/llama/llama3_2/prompts_text.py +229 -0
- llama_stack/models/llama/llama3_2/prompts_vision.py +126 -0
- llama_stack/models/llama/llama3_2/text_prompt_format.md +286 -0
- llama_stack/models/llama/llama3_2/vision_prompt_format.md +141 -0
- llama_stack/models/llama/llama3_3/prompts.py +259 -0
- llama_stack/models/llama/llama4/args.py +107 -0
- llama_stack/models/llama/llama4/chat_format.py +317 -0
- llama_stack/models/llama/llama4/datatypes.py +56 -0
- llama_stack/models/llama/llama4/ffn.py +58 -0
- llama_stack/models/llama/llama4/generation.py +313 -0
- llama_stack/models/llama/llama4/model.py +437 -0
- llama_stack/models/llama/llama4/moe.py +214 -0
- llama_stack/models/llama/llama4/preprocess.py +435 -0
- llama_stack/models/llama/llama4/prompt_format.md +304 -0
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +136 -0
- llama_stack/models/llama/llama4/prompts.py +279 -0
- llama_stack/models/llama/llama4/quantization/__init__.py +5 -0
- llama_stack/models/llama/llama4/quantization/loader.py +226 -0
- llama_stack/models/llama/llama4/tokenizer.model +200000 -0
- llama_stack/models/llama/llama4/tokenizer.py +263 -0
- llama_stack/models/llama/llama4/vision/__init__.py +5 -0
- llama_stack/models/llama/llama4/vision/embedding.py +210 -0
- llama_stack/models/llama/llama4/vision/encoder.py +412 -0
- llama_stack/models/llama/prompt_format.py +191 -0
- llama_stack/models/llama/quantize_impls.py +316 -0
- llama_stack/models/llama/sku_list.py +1029 -0
- llama_stack/models/llama/sku_types.py +233 -0
- llama_stack/models/llama/tokenizer_utils.py +40 -0
- llama_stack/providers/datatypes.py +136 -107
- llama_stack/providers/inline/__init__.py +5 -0
- llama_stack/providers/inline/agents/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/agents/meta_reference}/__init__.py +12 -5
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +1024 -0
- llama_stack/providers/inline/agents/meta_reference/agents.py +383 -0
- llama_stack/providers/inline/agents/meta_reference/config.py +37 -0
- llama_stack/providers/inline/agents/meta_reference/persistence.py +228 -0
- llama_stack/providers/inline/agents/meta_reference/responses/__init__.py +5 -0
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +423 -0
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +1226 -0
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +449 -0
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +194 -0
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +365 -0
- llama_stack/providers/inline/agents/meta_reference/safety.py +52 -0
- llama_stack/providers/inline/batches/__init__.py +5 -0
- llama_stack/providers/inline/batches/reference/__init__.py +36 -0
- llama_stack/providers/inline/batches/reference/batches.py +679 -0
- llama_stack/providers/inline/batches/reference/config.py +40 -0
- llama_stack/providers/inline/datasetio/__init__.py +5 -0
- llama_stack/providers/inline/datasetio/localfs/__init__.py +20 -0
- llama_stack/providers/inline/datasetio/localfs/config.py +23 -0
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +113 -0
- llama_stack/providers/inline/eval/__init__.py +5 -0
- llama_stack/providers/inline/eval/meta_reference/__init__.py +28 -0
- llama_stack/providers/inline/eval/meta_reference/config.py +23 -0
- llama_stack/providers/inline/eval/meta_reference/eval.py +259 -0
- llama_stack/providers/inline/files/localfs/__init__.py +20 -0
- llama_stack/providers/inline/files/localfs/config.py +31 -0
- llama_stack/providers/inline/files/localfs/files.py +219 -0
- llama_stack/providers/inline/inference/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/__init__.py +4 -4
- llama_stack/providers/inline/inference/meta_reference/common.py +24 -0
- llama_stack/providers/inline/inference/meta_reference/config.py +68 -0
- llama_stack/providers/inline/inference/meta_reference/generators.py +211 -0
- llama_stack/providers/inline/inference/meta_reference/inference.py +158 -0
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +96 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/parallel_utils.py +56 -73
- llama_stack/providers/inline/inference/sentence_transformers/__init__.py +22 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/inference/sentence_transformers}/config.py +6 -4
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +83 -0
- llama_stack/providers/inline/post_training/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/utils.py +35 -0
- llama_stack/providers/inline/post_training/common/validator.py +36 -0
- llama_stack/providers/inline/post_training/huggingface/__init__.py +27 -0
- llama_stack/providers/inline/post_training/huggingface/config.py +83 -0
- llama_stack/providers/inline/post_training/huggingface/post_training.py +208 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +519 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +485 -0
- llama_stack/providers/inline/post_training/huggingface/utils.py +269 -0
- llama_stack/providers/inline/post_training/torchtune/__init__.py +27 -0
- llama_stack/providers/inline/post_training/torchtune/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +240 -0
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +99 -0
- llama_stack/providers/inline/post_training/torchtune/config.py +20 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +57 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/sft.py +78 -0
- llama_stack/providers/inline/post_training/torchtune/post_training.py +178 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +588 -0
- llama_stack/providers/inline/safety/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/codeshield → inline/safety/code_scanner}/__init__.py +4 -2
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +128 -0
- llama_stack/providers/{impls/meta_reference/memory → inline/safety/code_scanner}/config.py +5 -3
- llama_stack/providers/inline/safety/llama_guard/__init__.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/config.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +489 -0
- llama_stack/providers/{adapters/memory/sample → inline/safety/prompt_guard}/__init__.py +4 -4
- llama_stack/providers/inline/safety/prompt_guard/config.py +32 -0
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +131 -0
- llama_stack/providers/inline/scoring/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/__init__.py +25 -0
- llama_stack/providers/{adapters/memory/weaviate → inline/scoring/basic}/config.py +5 -7
- llama_stack/providers/inline/scoring/basic/scoring.py +126 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +240 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +41 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +23 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +27 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +71 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +80 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +66 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +58 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +38 -0
- llama_stack/providers/inline/scoring/basic/utils/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py +3319 -0
- llama_stack/providers/inline/scoring/basic/utils/math_utils.py +330 -0
- llama_stack/providers/inline/scoring/braintrust/__init__.py +27 -0
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +230 -0
- llama_stack/providers/inline/scoring/braintrust/config.py +21 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +23 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +24 -0
- llama_stack/providers/inline/scoring/llm_as_judge/__init__.py +21 -0
- llama_stack/providers/inline/scoring/llm_as_judge/config.py +14 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +113 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +96 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +20 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +81 -0
- llama_stack/providers/inline/telemetry/__init__.py +5 -0
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +21 -0
- llama_stack/providers/inline/telemetry/meta_reference/config.py +47 -0
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +252 -0
- llama_stack/providers/inline/tool_runtime/__init__.py +5 -0
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +19 -0
- llama_stack/providers/{impls/meta_reference/telemetry → inline/tool_runtime/rag}/config.py +5 -3
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +77 -0
- llama_stack/providers/inline/tool_runtime/rag/memory.py +332 -0
- llama_stack/providers/inline/vector_io/__init__.py +5 -0
- llama_stack/providers/inline/vector_io/chroma/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/chroma/config.py +30 -0
- llama_stack/providers/inline/vector_io/faiss/__init__.py +21 -0
- llama_stack/providers/inline/vector_io/faiss/config.py +26 -0
- llama_stack/providers/inline/vector_io/faiss/faiss.py +293 -0
- llama_stack/providers/inline/vector_io/milvus/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/milvus/config.py +29 -0
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/qdrant/config.py +29 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/config.py +26 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +483 -0
- llama_stack/providers/registry/agents.py +16 -18
- llama_stack/providers/registry/batches.py +26 -0
- llama_stack/providers/registry/datasetio.py +49 -0
- llama_stack/providers/registry/eval.py +46 -0
- llama_stack/providers/registry/files.py +31 -0
- llama_stack/providers/registry/inference.py +273 -118
- llama_stack/providers/registry/post_training.py +69 -0
- llama_stack/providers/registry/safety.py +46 -41
- llama_stack/providers/registry/scoring.py +51 -0
- llama_stack/providers/registry/tool_runtime.py +87 -0
- llama_stack/providers/registry/vector_io.py +828 -0
- llama_stack/providers/remote/__init__.py +5 -0
- llama_stack/providers/remote/agents/__init__.py +5 -0
- llama_stack/providers/remote/datasetio/__init__.py +5 -0
- llama_stack/providers/{adapters/memory/chroma → remote/datasetio/huggingface}/__init__.py +7 -4
- llama_stack/providers/remote/datasetio/huggingface/config.py +23 -0
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +99 -0
- llama_stack/providers/remote/datasetio/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/datasetio/nvidia/config.py +61 -0
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +116 -0
- llama_stack/providers/remote/eval/__init__.py +5 -0
- llama_stack/providers/remote/eval/nvidia/__init__.py +31 -0
- llama_stack/providers/remote/eval/nvidia/config.py +29 -0
- llama_stack/providers/remote/eval/nvidia/eval.py +162 -0
- llama_stack/providers/remote/files/s3/__init__.py +19 -0
- llama_stack/providers/remote/files/s3/config.py +42 -0
- llama_stack/providers/remote/files/s3/files.py +313 -0
- llama_stack/providers/remote/inference/__init__.py +5 -0
- llama_stack/providers/{adapters/safety/sample → remote/inference/anthropic}/__init__.py +4 -6
- llama_stack/providers/remote/inference/anthropic/anthropic.py +36 -0
- llama_stack/providers/remote/inference/anthropic/config.py +28 -0
- llama_stack/providers/{impls/meta_reference/telemetry → remote/inference/azure}/__init__.py +4 -4
- llama_stack/providers/remote/inference/azure/azure.py +25 -0
- llama_stack/providers/remote/inference/azure/config.py +61 -0
- llama_stack/providers/{adapters → remote}/inference/bedrock/__init__.py +18 -17
- llama_stack/providers/remote/inference/bedrock/bedrock.py +142 -0
- llama_stack/providers/{adapters/inference/sample → remote/inference/bedrock}/config.py +3 -4
- llama_stack/providers/remote/inference/bedrock/models.py +29 -0
- llama_stack/providers/remote/inference/cerebras/__init__.py +19 -0
- llama_stack/providers/remote/inference/cerebras/cerebras.py +28 -0
- llama_stack/providers/remote/inference/cerebras/config.py +30 -0
- llama_stack/providers/{adapters → remote}/inference/databricks/__init__.py +4 -5
- llama_stack/providers/remote/inference/databricks/config.py +37 -0
- llama_stack/providers/remote/inference/databricks/databricks.py +44 -0
- llama_stack/providers/{adapters → remote}/inference/fireworks/__init__.py +8 -4
- llama_stack/providers/remote/inference/fireworks/config.py +27 -0
- llama_stack/providers/remote/inference/fireworks/fireworks.py +27 -0
- llama_stack/providers/{adapters/memory/pgvector → remote/inference/gemini}/__init__.py +4 -4
- llama_stack/providers/remote/inference/gemini/config.py +28 -0
- llama_stack/providers/remote/inference/gemini/gemini.py +82 -0
- llama_stack/providers/remote/inference/groq/__init__.py +15 -0
- llama_stack/providers/remote/inference/groq/config.py +34 -0
- llama_stack/providers/remote/inference/groq/groq.py +18 -0
- llama_stack/providers/remote/inference/llama_openai_compat/__init__.py +15 -0
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +34 -0
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +46 -0
- llama_stack/providers/remote/inference/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/inference/nvidia/config.py +64 -0
- llama_stack/providers/remote/inference/nvidia/nvidia.py +61 -0
- llama_stack/providers/{adapters/safety/sample/config.py → remote/inference/nvidia/utils.py} +3 -4
- llama_stack/providers/{impls/vllm → remote/inference/ollama}/__init__.py +4 -6
- llama_stack/providers/remote/inference/ollama/config.py +25 -0
- llama_stack/providers/remote/inference/ollama/ollama.py +102 -0
- llama_stack/providers/{adapters/telemetry/opentelemetry → remote/inference/openai}/__init__.py +4 -4
- llama_stack/providers/remote/inference/openai/config.py +39 -0
- llama_stack/providers/remote/inference/openai/openai.py +38 -0
- llama_stack/providers/remote/inference/passthrough/__init__.py +23 -0
- llama_stack/providers/remote/inference/passthrough/config.py +34 -0
- llama_stack/providers/remote/inference/passthrough/passthrough.py +122 -0
- llama_stack/providers/remote/inference/runpod/__init__.py +16 -0
- llama_stack/providers/remote/inference/runpod/config.py +32 -0
- llama_stack/providers/remote/inference/runpod/runpod.py +42 -0
- llama_stack/providers/remote/inference/sambanova/__init__.py +16 -0
- llama_stack/providers/remote/inference/sambanova/config.py +34 -0
- llama_stack/providers/remote/inference/sambanova/sambanova.py +28 -0
- llama_stack/providers/{adapters → remote}/inference/tgi/__init__.py +3 -4
- llama_stack/providers/remote/inference/tgi/config.py +76 -0
- llama_stack/providers/remote/inference/tgi/tgi.py +85 -0
- llama_stack/providers/{adapters → remote}/inference/together/__init__.py +8 -4
- llama_stack/providers/remote/inference/together/config.py +27 -0
- llama_stack/providers/remote/inference/together/together.py +102 -0
- llama_stack/providers/remote/inference/vertexai/__init__.py +15 -0
- llama_stack/providers/remote/inference/vertexai/config.py +48 -0
- llama_stack/providers/remote/inference/vertexai/vertexai.py +54 -0
- llama_stack/providers/remote/inference/vllm/__init__.py +22 -0
- llama_stack/providers/remote/inference/vllm/config.py +59 -0
- llama_stack/providers/remote/inference/vllm/vllm.py +111 -0
- llama_stack/providers/remote/inference/watsonx/__init__.py +15 -0
- llama_stack/providers/remote/inference/watsonx/config.py +45 -0
- llama_stack/providers/remote/inference/watsonx/watsonx.py +336 -0
- llama_stack/providers/remote/post_training/__init__.py +5 -0
- llama_stack/providers/remote/post_training/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/post_training/nvidia/config.py +113 -0
- llama_stack/providers/remote/post_training/nvidia/models.py +27 -0
- llama_stack/providers/remote/post_training/nvidia/post_training.py +430 -0
- llama_stack/providers/remote/post_training/nvidia/utils.py +63 -0
- llama_stack/providers/remote/safety/__init__.py +5 -0
- llama_stack/providers/remote/safety/bedrock/bedrock.py +111 -0
- llama_stack/providers/remote/safety/bedrock/config.py +14 -0
- llama_stack/providers/{adapters/inference/sample → remote/safety/nvidia}/__init__.py +5 -4
- llama_stack/providers/remote/safety/nvidia/config.py +40 -0
- llama_stack/providers/remote/safety/nvidia/nvidia.py +161 -0
- llama_stack/providers/{adapters/agents/sample → remote/safety/sambanova}/__init__.py +5 -4
- llama_stack/providers/remote/safety/sambanova/config.py +37 -0
- llama_stack/providers/remote/safety/sambanova/sambanova.py +98 -0
- llama_stack/providers/remote/tool_runtime/__init__.py +5 -0
- llama_stack/providers/remote/tool_runtime/bing_search/__init__.py +21 -0
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +112 -0
- llama_stack/providers/remote/tool_runtime/bing_search/config.py +22 -0
- llama_stack/providers/remote/tool_runtime/brave_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +148 -0
- llama_stack/providers/remote/tool_runtime/brave_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py +15 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +20 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +73 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +84 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py +22 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py +21 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +140 -0
- llama_stack/providers/remote/vector_io/__init__.py +5 -0
- llama_stack/providers/remote/vector_io/chroma/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/chroma/chroma.py +215 -0
- llama_stack/providers/remote/vector_io/chroma/config.py +28 -0
- llama_stack/providers/remote/vector_io/milvus/__init__.py +18 -0
- llama_stack/providers/remote/vector_io/milvus/config.py +35 -0
- llama_stack/providers/remote/vector_io/milvus/milvus.py +375 -0
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/pgvector/config.py +47 -0
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +460 -0
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/qdrant/config.py +37 -0
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +265 -0
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/weaviate/config.py +32 -0
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +393 -0
- llama_stack/providers/utils/bedrock/__init__.py +5 -0
- llama_stack/providers/utils/bedrock/client.py +74 -0
- llama_stack/providers/utils/bedrock/config.py +64 -0
- llama_stack/providers/utils/bedrock/refreshable_boto_session.py +112 -0
- llama_stack/providers/utils/common/__init__.py +5 -0
- llama_stack/providers/utils/common/data_schema_validator.py +103 -0
- llama_stack/providers/utils/datasetio/__init__.py +5 -0
- llama_stack/providers/utils/datasetio/url_utils.py +47 -0
- llama_stack/providers/utils/files/__init__.py +5 -0
- llama_stack/providers/utils/files/form_data.py +69 -0
- llama_stack/providers/utils/inference/__init__.py +8 -7
- llama_stack/providers/utils/inference/embedding_mixin.py +101 -0
- llama_stack/providers/utils/inference/inference_store.py +264 -0
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +336 -0
- llama_stack/providers/utils/inference/model_registry.py +173 -23
- llama_stack/providers/utils/inference/openai_compat.py +1261 -49
- llama_stack/providers/utils/inference/openai_mixin.py +506 -0
- llama_stack/providers/utils/inference/prompt_adapter.py +365 -67
- llama_stack/providers/utils/kvstore/api.py +6 -6
- llama_stack/providers/utils/kvstore/config.py +28 -48
- llama_stack/providers/utils/kvstore/kvstore.py +61 -15
- llama_stack/providers/utils/kvstore/mongodb/__init__.py +9 -0
- llama_stack/providers/utils/kvstore/mongodb/mongodb.py +82 -0
- llama_stack/providers/utils/kvstore/postgres/__init__.py +7 -0
- llama_stack/providers/utils/kvstore/postgres/postgres.py +114 -0
- llama_stack/providers/utils/kvstore/redis/redis.py +33 -9
- llama_stack/providers/utils/kvstore/sqlite/config.py +2 -1
- llama_stack/providers/utils/kvstore/sqlite/sqlite.py +123 -22
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +1304 -0
- llama_stack/providers/utils/memory/vector_store.py +220 -82
- llama_stack/providers/utils/pagination.py +43 -0
- llama_stack/providers/utils/responses/__init__.py +5 -0
- llama_stack/providers/utils/responses/responses_store.py +292 -0
- llama_stack/providers/utils/scheduler.py +270 -0
- llama_stack/providers/utils/scoring/__init__.py +5 -0
- llama_stack/providers/utils/scoring/aggregation_utils.py +75 -0
- llama_stack/providers/utils/scoring/base_scoring_fn.py +114 -0
- llama_stack/providers/utils/scoring/basic_scoring_utils.py +26 -0
- llama_stack/providers/utils/sqlstore/__init__.py +5 -0
- llama_stack/providers/utils/sqlstore/api.py +128 -0
- llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +319 -0
- llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +343 -0
- llama_stack/providers/utils/sqlstore/sqlstore.py +70 -0
- llama_stack/providers/utils/telemetry/trace_protocol.py +142 -0
- llama_stack/providers/utils/telemetry/tracing.py +192 -53
- llama_stack/providers/utils/tools/__init__.py +5 -0
- llama_stack/providers/utils/tools/mcp.py +148 -0
- llama_stack/providers/utils/tools/ttl_dict.py +70 -0
- llama_stack/providers/utils/vector_io/__init__.py +5 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +156 -0
- llama_stack/schema_utils.py +118 -0
- llama_stack/strong_typing/__init__.py +19 -0
- llama_stack/strong_typing/auxiliary.py +228 -0
- llama_stack/strong_typing/classdef.py +440 -0
- llama_stack/strong_typing/core.py +46 -0
- llama_stack/strong_typing/deserializer.py +877 -0
- llama_stack/strong_typing/docstring.py +409 -0
- llama_stack/strong_typing/exception.py +23 -0
- llama_stack/strong_typing/inspection.py +1085 -0
- llama_stack/strong_typing/mapping.py +40 -0
- llama_stack/strong_typing/name.py +182 -0
- llama_stack/strong_typing/py.typed +0 -0
- llama_stack/strong_typing/schema.py +792 -0
- llama_stack/strong_typing/serialization.py +97 -0
- llama_stack/strong_typing/serializer.py +500 -0
- llama_stack/strong_typing/slots.py +27 -0
- llama_stack/strong_typing/topological.py +89 -0
- llama_stack/testing/__init__.py +5 -0
- llama_stack/testing/api_recorder.py +956 -0
- llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
- llama_stack-0.3.4.dist-info/METADATA +261 -0
- llama_stack-0.3.4.dist-info/RECORD +625 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/WHEEL +1 -1
- llama_stack/apis/agents/client.py +0 -292
- llama_stack/apis/agents/event_logger.py +0 -184
- llama_stack/apis/batch_inference/batch_inference.py +0 -72
- llama_stack/apis/common/deployment_types.py +0 -31
- llama_stack/apis/dataset/dataset.py +0 -63
- llama_stack/apis/evals/evals.py +0 -122
- llama_stack/apis/inference/client.py +0 -197
- llama_stack/apis/inspect/client.py +0 -82
- llama_stack/apis/memory/client.py +0 -155
- llama_stack/apis/memory/memory.py +0 -65
- llama_stack/apis/memory_banks/__init__.py +0 -7
- llama_stack/apis/memory_banks/client.py +0 -101
- llama_stack/apis/memory_banks/memory_banks.py +0 -78
- llama_stack/apis/models/client.py +0 -83
- llama_stack/apis/reward_scoring/__init__.py +0 -7
- llama_stack/apis/reward_scoring/reward_scoring.py +0 -55
- llama_stack/apis/safety/client.py +0 -105
- llama_stack/apis/shields/client.py +0 -79
- llama_stack/cli/download.py +0 -340
- llama_stack/cli/model/describe.py +0 -82
- llama_stack/cli/model/download.py +0 -24
- llama_stack/cli/model/list.py +0 -62
- llama_stack/cli/model/model.py +0 -34
- llama_stack/cli/model/prompt_format.py +0 -112
- llama_stack/cli/model/safety_models.py +0 -52
- llama_stack/cli/stack/build.py +0 -299
- llama_stack/cli/stack/configure.py +0 -178
- llama_stack/distribution/build.py +0 -123
- llama_stack/distribution/build_conda_env.sh +0 -136
- llama_stack/distribution/build_container.sh +0 -142
- llama_stack/distribution/common.sh +0 -40
- llama_stack/distribution/configure_container.sh +0 -47
- llama_stack/distribution/datatypes.py +0 -139
- llama_stack/distribution/distribution.py +0 -58
- llama_stack/distribution/inspect.py +0 -67
- llama_stack/distribution/request_headers.py +0 -57
- llama_stack/distribution/resolver.py +0 -323
- llama_stack/distribution/routers/__init__.py +0 -48
- llama_stack/distribution/routers/routers.py +0 -158
- llama_stack/distribution/routers/routing_tables.py +0 -173
- llama_stack/distribution/server/endpoints.py +0 -48
- llama_stack/distribution/server/server.py +0 -343
- llama_stack/distribution/start_conda_env.sh +0 -42
- llama_stack/distribution/start_container.sh +0 -64
- llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml +0 -10
- llama_stack/distribution/templates/local-build.yaml +0 -10
- llama_stack/distribution/templates/local-databricks-build.yaml +0 -10
- llama_stack/distribution/templates/local-fireworks-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-endpoint-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-serverless-build.yaml +0 -10
- llama_stack/distribution/templates/local-ollama-build.yaml +0 -10
- llama_stack/distribution/templates/local-tgi-build.yaml +0 -10
- llama_stack/distribution/templates/local-together-build.yaml +0 -10
- llama_stack/distribution/templates/local-vllm-build.yaml +0 -10
- llama_stack/distribution/utils/exec.py +0 -105
- llama_stack/providers/adapters/agents/sample/sample.py +0 -18
- llama_stack/providers/adapters/inference/bedrock/bedrock.py +0 -451
- llama_stack/providers/adapters/inference/bedrock/config.py +0 -55
- llama_stack/providers/adapters/inference/databricks/config.py +0 -21
- llama_stack/providers/adapters/inference/databricks/databricks.py +0 -125
- llama_stack/providers/adapters/inference/fireworks/config.py +0 -20
- llama_stack/providers/adapters/inference/fireworks/fireworks.py +0 -130
- llama_stack/providers/adapters/inference/ollama/__init__.py +0 -19
- llama_stack/providers/adapters/inference/ollama/ollama.py +0 -175
- llama_stack/providers/adapters/inference/sample/sample.py +0 -23
- llama_stack/providers/adapters/inference/tgi/config.py +0 -43
- llama_stack/providers/adapters/inference/tgi/tgi.py +0 -200
- llama_stack/providers/adapters/inference/together/config.py +0 -22
- llama_stack/providers/adapters/inference/together/together.py +0 -143
- llama_stack/providers/adapters/memory/chroma/chroma.py +0 -157
- llama_stack/providers/adapters/memory/pgvector/config.py +0 -17
- llama_stack/providers/adapters/memory/pgvector/pgvector.py +0 -211
- llama_stack/providers/adapters/memory/sample/sample.py +0 -23
- llama_stack/providers/adapters/memory/weaviate/__init__.py +0 -15
- llama_stack/providers/adapters/memory/weaviate/weaviate.py +0 -190
- llama_stack/providers/adapters/safety/bedrock/bedrock.py +0 -113
- llama_stack/providers/adapters/safety/bedrock/config.py +0 -16
- llama_stack/providers/adapters/safety/sample/sample.py +0 -23
- llama_stack/providers/adapters/safety/together/__init__.py +0 -18
- llama_stack/providers/adapters/safety/together/config.py +0 -26
- llama_stack/providers/adapters/safety/together/together.py +0 -101
- llama_stack/providers/adapters/telemetry/opentelemetry/config.py +0 -12
- llama_stack/providers/adapters/telemetry/opentelemetry/opentelemetry.py +0 -201
- llama_stack/providers/adapters/telemetry/sample/__init__.py +0 -17
- llama_stack/providers/adapters/telemetry/sample/config.py +0 -12
- llama_stack/providers/adapters/telemetry/sample/sample.py +0 -18
- llama_stack/providers/impls/meta_reference/agents/agent_instance.py +0 -844
- llama_stack/providers/impls/meta_reference/agents/agents.py +0 -161
- llama_stack/providers/impls/meta_reference/agents/persistence.py +0 -84
- llama_stack/providers/impls/meta_reference/agents/rag/context_retriever.py +0 -74
- llama_stack/providers/impls/meta_reference/agents/safety.py +0 -57
- llama_stack/providers/impls/meta_reference/agents/tests/code_execution.py +0 -93
- llama_stack/providers/impls/meta_reference/agents/tests/test_chat_agent.py +0 -305
- llama_stack/providers/impls/meta_reference/agents/tools/base.py +0 -20
- llama_stack/providers/impls/meta_reference/agents/tools/builtin.py +0 -375
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_env_prefix.py +0 -133
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_execution.py +0 -256
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py +0 -87
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/utils.py +0 -21
- llama_stack/providers/impls/meta_reference/agents/tools/safety.py +0 -43
- llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py +0 -58
- llama_stack/providers/impls/meta_reference/inference/config.py +0 -45
- llama_stack/providers/impls/meta_reference/inference/generation.py +0 -376
- llama_stack/providers/impls/meta_reference/inference/inference.py +0 -280
- llama_stack/providers/impls/meta_reference/inference/model_parallel.py +0 -99
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_impls.py +0 -184
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_txest_disabled.py +0 -76
- llama_stack/providers/impls/meta_reference/inference/quantization/loader.py +0 -97
- llama_stack/providers/impls/meta_reference/inference/quantization/scripts/quantize_checkpoint.py +0 -161
- llama_stack/providers/impls/meta_reference/memory/__init__.py +0 -19
- llama_stack/providers/impls/meta_reference/memory/faiss.py +0 -113
- llama_stack/providers/impls/meta_reference/safety/__init__.py +0 -17
- llama_stack/providers/impls/meta_reference/safety/base.py +0 -57
- llama_stack/providers/impls/meta_reference/safety/config.py +0 -48
- llama_stack/providers/impls/meta_reference/safety/llama_guard.py +0 -268
- llama_stack/providers/impls/meta_reference/safety/prompt_guard.py +0 -145
- llama_stack/providers/impls/meta_reference/safety/safety.py +0 -112
- llama_stack/providers/impls/meta_reference/telemetry/console.py +0 -89
- llama_stack/providers/impls/vllm/config.py +0 -35
- llama_stack/providers/impls/vllm/vllm.py +0 -241
- llama_stack/providers/registry/memory.py +0 -78
- llama_stack/providers/registry/telemetry.py +0 -44
- llama_stack/providers/tests/agents/test_agents.py +0 -210
- llama_stack/providers/tests/inference/test_inference.py +0 -257
- llama_stack/providers/tests/inference/test_prompt_adapter.py +0 -126
- llama_stack/providers/tests/memory/test_memory.py +0 -136
- llama_stack/providers/tests/resolver.py +0 -100
- llama_stack/providers/tests/safety/test_safety.py +0 -77
- llama_stack-0.0.42.dist-info/METADATA +0 -137
- llama_stack-0.0.42.dist-info/RECORD +0 -256
- /llama_stack/{distribution → core}/__init__.py +0 -0
- /llama_stack/{distribution/server → core/access_control}/__init__.py +0 -0
- /llama_stack/{distribution/utils → core/conversations}/__init__.py +0 -0
- /llama_stack/{providers/adapters → core/prompts}/__init__.py +0 -0
- /llama_stack/{providers/adapters/agents → core/routing_tables}/__init__.py +0 -0
- /llama_stack/{providers/adapters/inference → core/server}/__init__.py +0 -0
- /llama_stack/{providers/adapters/memory → core/storage}/__init__.py +0 -0
- /llama_stack/{providers/adapters/safety → core/ui}/__init__.py +0 -0
- /llama_stack/{providers/adapters/telemetry → core/ui/modules}/__init__.py +0 -0
- /llama_stack/{providers/impls → core/ui/page}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference → core/ui/page/distribution}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/rag → core/ui/page/evaluations}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tests → core/ui/page/playground}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools → core/utils}/__init__.py +0 -0
- /llama_stack/{distribution → core}/utils/dynamic.py +0 -0
- /llama_stack/{distribution → core}/utils/serialize.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools/ipython_tool → distributions}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization → models}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization/scripts → models/llama}/__init__.py +0 -0
- /llama_stack/{providers/tests → models/llama/llama3}/__init__.py +0 -0
- /llama_stack/{providers/tests/agents → models/llama/llama3/quantization}/__init__.py +0 -0
- /llama_stack/{providers/tests/inference → models/llama/llama3_2}/__init__.py +0 -0
- /llama_stack/{providers/tests/memory → models/llama/llama3_3}/__init__.py +0 -0
- /llama_stack/{providers/tests/safety → models/llama/llama4}/__init__.py +0 -0
- /llama_stack/{scripts → models/llama/llama4/prompt_templates}/__init__.py +0 -0
- /llama_stack/providers/{adapters → remote}/safety/bedrock/__init__.py +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info/licenses}/LICENSE +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,352 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
import enum
|
|
8
|
+
import json
|
|
9
|
+
import uuid
|
|
10
|
+
|
|
11
|
+
import streamlit as st
|
|
12
|
+
from llama_stack_client import Agent
|
|
13
|
+
from llama_stack_client.lib.agents.react.agent import ReActAgent
|
|
14
|
+
from llama_stack_client.lib.agents.react.tool_parser import ReActOutput
|
|
15
|
+
|
|
16
|
+
from llama_stack.core.ui.modules.api import llama_stack_api
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class AgentType(enum.Enum):
|
|
20
|
+
REGULAR = "Regular"
|
|
21
|
+
REACT = "ReAct"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def tool_chat_page():
|
|
25
|
+
st.title("🛠 Tools")
|
|
26
|
+
|
|
27
|
+
client = llama_stack_api.client
|
|
28
|
+
models = client.models.list()
|
|
29
|
+
model_list = [model.identifier for model in models if model.api_model_type == "llm"]
|
|
30
|
+
|
|
31
|
+
tool_groups = client.toolgroups.list()
|
|
32
|
+
tool_groups_list = [tool_group.identifier for tool_group in tool_groups]
|
|
33
|
+
mcp_tools_list = [tool for tool in tool_groups_list if tool.startswith("mcp::")]
|
|
34
|
+
builtin_tools_list = [tool for tool in tool_groups_list if not tool.startswith("mcp::")]
|
|
35
|
+
selected_vector_stores = []
|
|
36
|
+
|
|
37
|
+
def reset_agent():
|
|
38
|
+
st.session_state.clear()
|
|
39
|
+
st.cache_resource.clear()
|
|
40
|
+
|
|
41
|
+
with st.sidebar:
|
|
42
|
+
st.title("Configuration")
|
|
43
|
+
st.subheader("Model")
|
|
44
|
+
model = st.selectbox(label="Model", options=model_list, on_change=reset_agent, label_visibility="collapsed")
|
|
45
|
+
|
|
46
|
+
st.subheader("Available ToolGroups")
|
|
47
|
+
|
|
48
|
+
toolgroup_selection = st.pills(
|
|
49
|
+
label="Built-in tools",
|
|
50
|
+
options=builtin_tools_list,
|
|
51
|
+
selection_mode="multi",
|
|
52
|
+
on_change=reset_agent,
|
|
53
|
+
format_func=lambda tool: "".join(tool.split("::")[1:]),
|
|
54
|
+
help="List of built-in tools from your llama stack server.",
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
if "builtin::rag" in toolgroup_selection:
|
|
58
|
+
vector_stores = llama_stack_api.client.vector_stores.list() or []
|
|
59
|
+
if not vector_stores:
|
|
60
|
+
st.info("No vector databases available for selection.")
|
|
61
|
+
vector_stores = [vector_store.identifier for vector_store in vector_stores]
|
|
62
|
+
selected_vector_stores = st.multiselect(
|
|
63
|
+
label="Select Document Collections to use in RAG queries",
|
|
64
|
+
options=vector_stores,
|
|
65
|
+
on_change=reset_agent,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
mcp_selection = st.pills(
|
|
69
|
+
label="MCP Servers",
|
|
70
|
+
options=mcp_tools_list,
|
|
71
|
+
selection_mode="multi",
|
|
72
|
+
on_change=reset_agent,
|
|
73
|
+
format_func=lambda tool: "".join(tool.split("::")[1:]),
|
|
74
|
+
help="List of MCP servers registered to your llama stack server.",
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
toolgroup_selection.extend(mcp_selection)
|
|
78
|
+
|
|
79
|
+
grouped_tools = {}
|
|
80
|
+
total_tools = 0
|
|
81
|
+
|
|
82
|
+
for toolgroup_id in toolgroup_selection:
|
|
83
|
+
tools = client.tools.list(toolgroup_id=toolgroup_id)
|
|
84
|
+
grouped_tools[toolgroup_id] = [tool.name for tool in tools]
|
|
85
|
+
total_tools += len(tools)
|
|
86
|
+
|
|
87
|
+
st.markdown(f"Active Tools: 🛠 {total_tools}")
|
|
88
|
+
|
|
89
|
+
for group_id, tools in grouped_tools.items():
|
|
90
|
+
with st.expander(f"🔧 Tools from `{group_id}`"):
|
|
91
|
+
for idx, tool in enumerate(tools, start=1):
|
|
92
|
+
st.markdown(f"{idx}. `{tool.split(':')[-1]}`")
|
|
93
|
+
|
|
94
|
+
st.subheader("Agent Configurations")
|
|
95
|
+
st.subheader("Agent Type")
|
|
96
|
+
agent_type = st.radio(
|
|
97
|
+
label="Select Agent Type",
|
|
98
|
+
options=["Regular", "ReAct"],
|
|
99
|
+
on_change=reset_agent,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
if agent_type == "ReAct":
|
|
103
|
+
agent_type = AgentType.REACT
|
|
104
|
+
else:
|
|
105
|
+
agent_type = AgentType.REGULAR
|
|
106
|
+
|
|
107
|
+
max_tokens = st.slider(
|
|
108
|
+
"Max Tokens",
|
|
109
|
+
min_value=0,
|
|
110
|
+
max_value=4096,
|
|
111
|
+
value=512,
|
|
112
|
+
step=64,
|
|
113
|
+
help="The maximum number of tokens to generate",
|
|
114
|
+
on_change=reset_agent,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
for i, tool_name in enumerate(toolgroup_selection):
|
|
118
|
+
if tool_name == "builtin::rag":
|
|
119
|
+
tool_dict = dict(
|
|
120
|
+
name="builtin::rag",
|
|
121
|
+
args={
|
|
122
|
+
"vector_store_ids": list(selected_vector_stores),
|
|
123
|
+
},
|
|
124
|
+
)
|
|
125
|
+
toolgroup_selection[i] = tool_dict
|
|
126
|
+
|
|
127
|
+
@st.cache_resource
|
|
128
|
+
def create_agent():
|
|
129
|
+
if "agent_type" in st.session_state and st.session_state.agent_type == AgentType.REACT:
|
|
130
|
+
return ReActAgent(
|
|
131
|
+
client=client,
|
|
132
|
+
model=model,
|
|
133
|
+
tools=toolgroup_selection,
|
|
134
|
+
response_format={
|
|
135
|
+
"type": "json_schema",
|
|
136
|
+
"json_schema": ReActOutput.model_json_schema(),
|
|
137
|
+
},
|
|
138
|
+
sampling_params={"strategy": {"type": "greedy"}, "max_tokens": max_tokens},
|
|
139
|
+
)
|
|
140
|
+
else:
|
|
141
|
+
return Agent(
|
|
142
|
+
client,
|
|
143
|
+
model=model,
|
|
144
|
+
instructions="You are a helpful assistant. When you use a tool always respond with a summary of the result.",
|
|
145
|
+
tools=toolgroup_selection,
|
|
146
|
+
sampling_params={"strategy": {"type": "greedy"}, "max_tokens": max_tokens},
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
st.session_state.agent_type = agent_type
|
|
150
|
+
|
|
151
|
+
agent = create_agent()
|
|
152
|
+
|
|
153
|
+
if "agent_session_id" not in st.session_state:
|
|
154
|
+
st.session_state["agent_session_id"] = agent.create_session(session_name=f"tool_demo_{uuid.uuid4()}")
|
|
155
|
+
|
|
156
|
+
session_id = st.session_state["agent_session_id"]
|
|
157
|
+
|
|
158
|
+
if "messages" not in st.session_state:
|
|
159
|
+
st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}]
|
|
160
|
+
|
|
161
|
+
for msg in st.session_state.messages:
|
|
162
|
+
with st.chat_message(msg["role"]):
|
|
163
|
+
st.markdown(msg["content"])
|
|
164
|
+
|
|
165
|
+
if prompt := st.chat_input(placeholder=""):
|
|
166
|
+
with st.chat_message("user"):
|
|
167
|
+
st.markdown(prompt)
|
|
168
|
+
|
|
169
|
+
st.session_state.messages.append({"role": "user", "content": prompt})
|
|
170
|
+
|
|
171
|
+
turn_response = agent.create_turn(
|
|
172
|
+
session_id=session_id,
|
|
173
|
+
messages=[{"role": "user", "content": prompt}],
|
|
174
|
+
stream=True,
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
def response_generator(turn_response):
|
|
178
|
+
if st.session_state.get("agent_type") == AgentType.REACT:
|
|
179
|
+
return _handle_react_response(turn_response)
|
|
180
|
+
else:
|
|
181
|
+
return _handle_regular_response(turn_response)
|
|
182
|
+
|
|
183
|
+
def _handle_react_response(turn_response):
|
|
184
|
+
current_step_content = ""
|
|
185
|
+
final_answer = None
|
|
186
|
+
tool_results = []
|
|
187
|
+
|
|
188
|
+
for response in turn_response:
|
|
189
|
+
if not hasattr(response.event, "payload"):
|
|
190
|
+
yield (
|
|
191
|
+
"\n\n🚨 :red[_Llama Stack server Error:_]\n"
|
|
192
|
+
"The response received is missing an expected `payload` attribute.\n"
|
|
193
|
+
"This could indicate a malformed response or an internal issue within the server.\n\n"
|
|
194
|
+
f"Error details: {response}"
|
|
195
|
+
)
|
|
196
|
+
return
|
|
197
|
+
|
|
198
|
+
payload = response.event.payload
|
|
199
|
+
|
|
200
|
+
if payload.event_type == "step_progress" and hasattr(payload.delta, "text"):
|
|
201
|
+
current_step_content += payload.delta.text
|
|
202
|
+
continue
|
|
203
|
+
|
|
204
|
+
if payload.event_type == "step_complete":
|
|
205
|
+
step_details = payload.step_details
|
|
206
|
+
|
|
207
|
+
if step_details.step_type == "inference":
|
|
208
|
+
yield from _process_inference_step(current_step_content, tool_results, final_answer)
|
|
209
|
+
current_step_content = ""
|
|
210
|
+
elif step_details.step_type == "tool_execution":
|
|
211
|
+
tool_results = _process_tool_execution(step_details, tool_results)
|
|
212
|
+
current_step_content = ""
|
|
213
|
+
else:
|
|
214
|
+
current_step_content = ""
|
|
215
|
+
|
|
216
|
+
if not final_answer and tool_results:
|
|
217
|
+
yield from _format_tool_results_summary(tool_results)
|
|
218
|
+
|
|
219
|
+
def _process_inference_step(current_step_content, tool_results, final_answer):
|
|
220
|
+
try:
|
|
221
|
+
react_output_data = json.loads(current_step_content)
|
|
222
|
+
thought = react_output_data.get("thought")
|
|
223
|
+
action = react_output_data.get("action")
|
|
224
|
+
answer = react_output_data.get("answer")
|
|
225
|
+
|
|
226
|
+
if answer and answer != "null" and answer is not None:
|
|
227
|
+
final_answer = answer
|
|
228
|
+
|
|
229
|
+
if thought:
|
|
230
|
+
with st.expander("🤔 Thinking...", expanded=False):
|
|
231
|
+
st.markdown(f":grey[__{thought}__]")
|
|
232
|
+
|
|
233
|
+
if action and isinstance(action, dict):
|
|
234
|
+
tool_name = action.get("tool_name")
|
|
235
|
+
tool_params = action.get("tool_params")
|
|
236
|
+
with st.expander(f'🛠 Action: Using tool "{tool_name}"', expanded=False):
|
|
237
|
+
st.json(tool_params)
|
|
238
|
+
|
|
239
|
+
if answer and answer != "null" and answer is not None:
|
|
240
|
+
yield f"\n\n✅ **Final Answer:**\n{answer}"
|
|
241
|
+
|
|
242
|
+
except json.JSONDecodeError:
|
|
243
|
+
yield f"\n\nFailed to parse ReAct step content:\n```json\n{current_step_content}\n```"
|
|
244
|
+
except Exception as e:
|
|
245
|
+
yield f"\n\nFailed to process ReAct step: {e}\n```json\n{current_step_content}\n```"
|
|
246
|
+
|
|
247
|
+
return final_answer
|
|
248
|
+
|
|
249
|
+
def _process_tool_execution(step_details, tool_results):
|
|
250
|
+
try:
|
|
251
|
+
if hasattr(step_details, "tool_responses") and step_details.tool_responses:
|
|
252
|
+
for tool_response in step_details.tool_responses:
|
|
253
|
+
tool_name = tool_response.tool_name
|
|
254
|
+
content = tool_response.content
|
|
255
|
+
tool_results.append((tool_name, content))
|
|
256
|
+
with st.expander(f'⚙️ Observation (Result from "{tool_name}")', expanded=False):
|
|
257
|
+
try:
|
|
258
|
+
parsed_content = json.loads(content)
|
|
259
|
+
st.json(parsed_content)
|
|
260
|
+
except json.JSONDecodeError:
|
|
261
|
+
st.code(content, language=None)
|
|
262
|
+
else:
|
|
263
|
+
with st.expander("⚙️ Observation", expanded=False):
|
|
264
|
+
st.markdown(":grey[_Tool execution step completed, but no response data found._]")
|
|
265
|
+
except Exception as e:
|
|
266
|
+
with st.expander("⚙️ Error in Tool Execution", expanded=False):
|
|
267
|
+
st.markdown(f":red[_Error processing tool execution: {str(e)}_]")
|
|
268
|
+
|
|
269
|
+
return tool_results
|
|
270
|
+
|
|
271
|
+
def _format_tool_results_summary(tool_results):
|
|
272
|
+
yield "\n\n**Here's what I found:**\n"
|
|
273
|
+
for tool_name, content in tool_results:
|
|
274
|
+
try:
|
|
275
|
+
parsed_content = json.loads(content)
|
|
276
|
+
|
|
277
|
+
if tool_name == "web_search" and "top_k" in parsed_content:
|
|
278
|
+
yield from _format_web_search_results(parsed_content)
|
|
279
|
+
elif "results" in parsed_content and isinstance(parsed_content["results"], list):
|
|
280
|
+
yield from _format_results_list(parsed_content["results"])
|
|
281
|
+
elif isinstance(parsed_content, dict) and len(parsed_content) > 0:
|
|
282
|
+
yield from _format_dict_results(parsed_content)
|
|
283
|
+
elif isinstance(parsed_content, list) and len(parsed_content) > 0:
|
|
284
|
+
yield from _format_list_results(parsed_content)
|
|
285
|
+
except json.JSONDecodeError:
|
|
286
|
+
yield f"\n**{tool_name}** was used but returned complex data. Check the observation for details.\n"
|
|
287
|
+
except (TypeError, AttributeError, KeyError, IndexError) as e:
|
|
288
|
+
print(f"Error processing {tool_name} result: {type(e).__name__}: {e}")
|
|
289
|
+
|
|
290
|
+
def _format_web_search_results(parsed_content):
|
|
291
|
+
for i, result in enumerate(parsed_content["top_k"], 1):
|
|
292
|
+
if i <= 3:
|
|
293
|
+
title = result.get("title", "Untitled")
|
|
294
|
+
url = result.get("url", "")
|
|
295
|
+
content_text = result.get("content", "").strip()
|
|
296
|
+
yield f"\n- **{title}**\n {content_text}\n [Source]({url})\n"
|
|
297
|
+
|
|
298
|
+
def _format_results_list(results):
|
|
299
|
+
for i, result in enumerate(results, 1):
|
|
300
|
+
if i <= 3:
|
|
301
|
+
if isinstance(result, dict):
|
|
302
|
+
name = result.get("name", result.get("title", "Result " + str(i)))
|
|
303
|
+
description = result.get("description", result.get("content", result.get("summary", "")))
|
|
304
|
+
yield f"\n- **{name}**\n {description}\n"
|
|
305
|
+
else:
|
|
306
|
+
yield f"\n- {result}\n"
|
|
307
|
+
|
|
308
|
+
def _format_dict_results(parsed_content):
|
|
309
|
+
yield "\n```\n"
|
|
310
|
+
for key, value in list(parsed_content.items())[:5]:
|
|
311
|
+
if isinstance(value, str) and len(value) < 100:
|
|
312
|
+
yield f"{key}: {value}\n"
|
|
313
|
+
else:
|
|
314
|
+
yield f"{key}: [Complex data]\n"
|
|
315
|
+
yield "```\n"
|
|
316
|
+
|
|
317
|
+
def _format_list_results(parsed_content):
|
|
318
|
+
yield "\n"
|
|
319
|
+
for _, item in enumerate(parsed_content[:3], 1):
|
|
320
|
+
if isinstance(item, str):
|
|
321
|
+
yield f"- {item}\n"
|
|
322
|
+
elif isinstance(item, dict) and "text" in item:
|
|
323
|
+
yield f"- {item['text']}\n"
|
|
324
|
+
elif isinstance(item, dict) and len(item) > 0:
|
|
325
|
+
first_value = next(iter(item.values()))
|
|
326
|
+
if isinstance(first_value, str) and len(first_value) < 100:
|
|
327
|
+
yield f"- {first_value}\n"
|
|
328
|
+
|
|
329
|
+
def _handle_regular_response(turn_response):
|
|
330
|
+
for response in turn_response:
|
|
331
|
+
if hasattr(response.event, "payload"):
|
|
332
|
+
print(response.event.payload)
|
|
333
|
+
if response.event.payload.event_type == "step_progress":
|
|
334
|
+
if hasattr(response.event.payload.delta, "text"):
|
|
335
|
+
yield response.event.payload.delta.text
|
|
336
|
+
if response.event.payload.event_type == "step_complete":
|
|
337
|
+
if response.event.payload.step_details.step_type == "tool_execution":
|
|
338
|
+
if response.event.payload.step_details.tool_calls:
|
|
339
|
+
tool_name = str(response.event.payload.step_details.tool_calls[0].tool_name)
|
|
340
|
+
yield f'\n\n🛠 :grey[_Using "{tool_name}" tool:_]\n\n'
|
|
341
|
+
else:
|
|
342
|
+
yield "No tool_calls present in step_details"
|
|
343
|
+
else:
|
|
344
|
+
yield f"Error occurred in the Llama Stack Cluster: {response}"
|
|
345
|
+
|
|
346
|
+
with st.chat_message("assistant"):
|
|
347
|
+
response_content = st.write_stream(response_generator(turn_response))
|
|
348
|
+
|
|
349
|
+
st.session_state.messages.append({"role": "assistant", "content": response_content})
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
tool_chat_page()
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def redact_sensitive_fields(data: dict[str, Any]) -> dict[str, Any]:
|
|
11
|
+
"""Redact sensitive information from config before printing."""
|
|
12
|
+
sensitive_patterns = ["api_key", "api_token", "password", "secret"]
|
|
13
|
+
|
|
14
|
+
def _redact_value(v: Any) -> Any:
|
|
15
|
+
if isinstance(v, dict):
|
|
16
|
+
return _redact_dict(v)
|
|
17
|
+
elif isinstance(v, list):
|
|
18
|
+
return [_redact_value(i) for i in v]
|
|
19
|
+
return v
|
|
20
|
+
|
|
21
|
+
def _redact_dict(d: dict[str, Any]) -> dict[str, Any]:
|
|
22
|
+
result = {}
|
|
23
|
+
for k, v in d.items():
|
|
24
|
+
if any(pattern in k.lower() for pattern in sensitive_patterns):
|
|
25
|
+
result[k] = "********"
|
|
26
|
+
else:
|
|
27
|
+
result[k] = _redact_value(v)
|
|
28
|
+
return result
|
|
29
|
+
|
|
30
|
+
return _redact_dict(data)
|
|
@@ -7,15 +7,12 @@
|
|
|
7
7
|
import os
|
|
8
8
|
from pathlib import Path
|
|
9
9
|
|
|
10
|
-
|
|
11
|
-
LLAMA_STACK_CONFIG_DIR = Path(
|
|
12
|
-
os.getenv("LLAMA_STACK_CONFIG_DIR", os.path.expanduser("~/.llama/"))
|
|
13
|
-
)
|
|
10
|
+
LLAMA_STACK_CONFIG_DIR = Path(os.getenv("LLAMA_STACK_CONFIG_DIR", os.path.expanduser("~/.llama/")))
|
|
14
11
|
|
|
15
12
|
DISTRIBS_BASE_DIR = LLAMA_STACK_CONFIG_DIR / "distributions"
|
|
16
13
|
|
|
17
14
|
DEFAULT_CHECKPOINT_DIR = LLAMA_STACK_CONFIG_DIR / "checkpoints"
|
|
18
15
|
|
|
19
|
-
BUILDS_BASE_DIR = LLAMA_STACK_CONFIG_DIR / "builds"
|
|
20
|
-
|
|
21
16
|
RUNTIME_BASE_DIR = LLAMA_STACK_CONFIG_DIR / "runtime"
|
|
17
|
+
|
|
18
|
+
EXTERNAL_PROVIDERS_DIR = LLAMA_STACK_CONFIG_DIR / "providers.d"
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from enum import StrEnum
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
|
|
11
|
+
from llama_stack.log import get_logger
|
|
12
|
+
|
|
13
|
+
logger = get_logger(name=__name__, category="core")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
DISTRO_DIR = Path(__file__).parent.parent.parent.parent / "llama_stack" / "distributions"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Mode(StrEnum):
|
|
20
|
+
RUN = "run"
|
|
21
|
+
BUILD = "build"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def resolve_config_or_distro(
|
|
25
|
+
config_or_distro: str,
|
|
26
|
+
mode: Mode = Mode.RUN,
|
|
27
|
+
) -> Path:
|
|
28
|
+
"""
|
|
29
|
+
Resolve a config/distro argument to a concrete config file path.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
config_or_distro: User input (file path, distribution name, or built distribution)
|
|
33
|
+
mode: Mode resolving for ("run", "build", "server")
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
Path to the resolved config file
|
|
37
|
+
|
|
38
|
+
Raises:
|
|
39
|
+
ValueError: If resolution fails
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
# Strategy 1: Try as file path first
|
|
43
|
+
config_path = Path(config_or_distro)
|
|
44
|
+
if config_path.exists() and config_path.is_file():
|
|
45
|
+
logger.debug(f"Using file path: {config_path}")
|
|
46
|
+
return config_path.resolve()
|
|
47
|
+
|
|
48
|
+
# Strategy 2: Try as distribution name (if no .yaml extension)
|
|
49
|
+
if not config_or_distro.endswith(".yaml"):
|
|
50
|
+
distro_config = _get_distro_config_path(config_or_distro, mode)
|
|
51
|
+
if distro_config.exists():
|
|
52
|
+
logger.debug(f"Using distribution: {distro_config}")
|
|
53
|
+
return distro_config
|
|
54
|
+
|
|
55
|
+
# Strategy 3: Try as built distribution name
|
|
56
|
+
distrib_config = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
|
|
57
|
+
if distrib_config.exists():
|
|
58
|
+
logger.debug(f"Using built distribution: {distrib_config}")
|
|
59
|
+
return distrib_config
|
|
60
|
+
|
|
61
|
+
distrib_config = DISTRIBS_BASE_DIR / f"{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
|
|
62
|
+
if distrib_config.exists():
|
|
63
|
+
logger.debug(f"Using built distribution: {distrib_config}")
|
|
64
|
+
return distrib_config
|
|
65
|
+
|
|
66
|
+
# Strategy 4: Failed - provide helpful error
|
|
67
|
+
raise ValueError(_format_resolution_error(config_or_distro, mode))
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _get_distro_config_path(distro_name: str, mode: Mode) -> Path:
|
|
71
|
+
"""Get the config file path for a distro."""
|
|
72
|
+
return DISTRO_DIR / distro_name / f"{mode}.yaml"
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _format_resolution_error(config_or_distro: str, mode: Mode) -> str:
|
|
76
|
+
"""Format a helpful error message for resolution failures."""
|
|
77
|
+
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
|
|
78
|
+
|
|
79
|
+
distro_path = _get_distro_config_path(config_or_distro, mode)
|
|
80
|
+
distrib_path = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
|
|
81
|
+
distrib_path2 = DISTRIBS_BASE_DIR / f"{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
|
|
82
|
+
|
|
83
|
+
available_distros = _get_available_distros()
|
|
84
|
+
distros_str = ", ".join(available_distros) if available_distros else "none found"
|
|
85
|
+
|
|
86
|
+
return f"""Could not resolve config or distribution '{config_or_distro}'.
|
|
87
|
+
|
|
88
|
+
Tried the following locations:
|
|
89
|
+
1. As file path: {Path(config_or_distro).resolve()}
|
|
90
|
+
2. As distribution: {distro_path}
|
|
91
|
+
3. As built distribution: ({distrib_path}, {distrib_path2})
|
|
92
|
+
|
|
93
|
+
Available distributions: {distros_str}
|
|
94
|
+
|
|
95
|
+
Did you mean one of these distributions?
|
|
96
|
+
{_format_distro_suggestions(available_distros, config_or_distro)}
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _get_available_distros() -> list[str]:
|
|
101
|
+
"""Get list of available distro names."""
|
|
102
|
+
if not DISTRO_DIR.exists() and not DISTRIBS_BASE_DIR.exists():
|
|
103
|
+
return []
|
|
104
|
+
|
|
105
|
+
return list(
|
|
106
|
+
set(
|
|
107
|
+
[d.name for d in DISTRO_DIR.iterdir() if d.is_dir() and not d.name.startswith(".")]
|
|
108
|
+
+ [d.name for d in DISTRIBS_BASE_DIR.iterdir() if d.is_dir() and not d.name.startswith(".")]
|
|
109
|
+
)
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _format_distro_suggestions(distros: list[str], user_input: str) -> str:
|
|
114
|
+
"""Format distro suggestions for error messages, showing closest matches first."""
|
|
115
|
+
if not distros:
|
|
116
|
+
return " (no distros found)"
|
|
117
|
+
|
|
118
|
+
import difflib
|
|
119
|
+
|
|
120
|
+
# Get up to 3 closest matches with similarity threshold of 0.3 (lower = more permissive)
|
|
121
|
+
close_matches = difflib.get_close_matches(user_input, distros, n=3, cutoff=0.3)
|
|
122
|
+
display_distros = close_matches if close_matches else distros[:3]
|
|
123
|
+
|
|
124
|
+
suggestions = [f" - {d}" for d in display_distros]
|
|
125
|
+
return "\n".join(suggestions)
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from collections.abc import AsyncGenerator
|
|
8
|
+
from contextvars import ContextVar
|
|
9
|
+
|
|
10
|
+
from llama_stack.providers.utils.telemetry.tracing import CURRENT_TRACE_CONTEXT
|
|
11
|
+
|
|
12
|
+
_MISSING = object()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def preserve_contexts_async_generator[T](
|
|
16
|
+
gen: AsyncGenerator[T, None], context_vars: list[ContextVar]
|
|
17
|
+
) -> AsyncGenerator[T, None]:
|
|
18
|
+
"""
|
|
19
|
+
Wraps an async generator to preserve context variables across iterations.
|
|
20
|
+
This is needed because we start a new asyncio event loop for each streaming request,
|
|
21
|
+
and we need to preserve the context across the event loop boundary.
|
|
22
|
+
"""
|
|
23
|
+
# Capture initial context values
|
|
24
|
+
initial_context_values = {context_var.name: context_var.get() for context_var in context_vars}
|
|
25
|
+
|
|
26
|
+
async def wrapper() -> AsyncGenerator[T, None]:
|
|
27
|
+
while True:
|
|
28
|
+
previous_values: dict[ContextVar, object] = {}
|
|
29
|
+
tokens: dict[ContextVar, object] = {}
|
|
30
|
+
|
|
31
|
+
# Restore ALL context values before any await and capture previous state
|
|
32
|
+
# This is needed to propagate context across async generator boundaries
|
|
33
|
+
for context_var in context_vars:
|
|
34
|
+
try:
|
|
35
|
+
previous_values[context_var] = context_var.get()
|
|
36
|
+
except LookupError:
|
|
37
|
+
previous_values[context_var] = _MISSING
|
|
38
|
+
tokens[context_var] = context_var.set(initial_context_values[context_var.name])
|
|
39
|
+
|
|
40
|
+
def _restore_context_var(context_var: ContextVar, *, _tokens=tokens, _prev=previous_values) -> None:
|
|
41
|
+
token = _tokens.get(context_var)
|
|
42
|
+
previous_value = _prev.get(context_var, _MISSING)
|
|
43
|
+
if token is not None:
|
|
44
|
+
try:
|
|
45
|
+
context_var.reset(token)
|
|
46
|
+
return
|
|
47
|
+
except (RuntimeError, ValueError):
|
|
48
|
+
pass
|
|
49
|
+
|
|
50
|
+
if previous_value is _MISSING:
|
|
51
|
+
context_var.set(None)
|
|
52
|
+
else:
|
|
53
|
+
context_var.set(previous_value)
|
|
54
|
+
|
|
55
|
+
try:
|
|
56
|
+
item = await gen.__anext__()
|
|
57
|
+
except StopAsyncIteration:
|
|
58
|
+
# Restore all context vars before exiting to prevent leaks
|
|
59
|
+
# Use _restore_context_var for all vars to properly restore to previous values
|
|
60
|
+
for context_var in context_vars:
|
|
61
|
+
_restore_context_var(context_var)
|
|
62
|
+
break
|
|
63
|
+
except Exception:
|
|
64
|
+
# Restore all context vars on exception
|
|
65
|
+
for context_var in context_vars:
|
|
66
|
+
_restore_context_var(context_var)
|
|
67
|
+
raise
|
|
68
|
+
|
|
69
|
+
try:
|
|
70
|
+
yield item
|
|
71
|
+
# Update our tracked values with any changes made during this iteration
|
|
72
|
+
# Only for non-trace context vars - trace context must persist across yields
|
|
73
|
+
# to allow nested span tracking for telemetry
|
|
74
|
+
for context_var in context_vars:
|
|
75
|
+
if context_var is not CURRENT_TRACE_CONTEXT:
|
|
76
|
+
initial_context_values[context_var.name] = context_var.get()
|
|
77
|
+
finally:
|
|
78
|
+
# Restore non-trace context vars after each yield to prevent leaks between requests
|
|
79
|
+
# CURRENT_TRACE_CONTEXT is NOT restored here to preserve telemetry span stack
|
|
80
|
+
for context_var in context_vars:
|
|
81
|
+
if context_var is not CURRENT_TRACE_CONTEXT:
|
|
82
|
+
_restore_context_var(context_var)
|
|
83
|
+
|
|
84
|
+
return wrapper()
|