llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +5 -0
- llama_stack/apis/agents/__init__.py +1 -1
- llama_stack/apis/agents/agents.py +700 -281
- llama_stack/apis/agents/openai_responses.py +1311 -0
- llama_stack/{providers/adapters/memory/sample/config.py → apis/batches/__init__.py} +2 -5
- llama_stack/apis/batches/batches.py +100 -0
- llama_stack/apis/benchmarks/__init__.py +7 -0
- llama_stack/apis/benchmarks/benchmarks.py +108 -0
- llama_stack/apis/common/content_types.py +143 -0
- llama_stack/apis/common/errors.py +103 -0
- llama_stack/apis/common/job_types.py +38 -0
- llama_stack/apis/common/responses.py +36 -0
- llama_stack/apis/common/training_types.py +36 -5
- llama_stack/apis/common/type_system.py +158 -0
- llama_stack/apis/conversations/__init__.py +31 -0
- llama_stack/apis/conversations/conversations.py +286 -0
- llama_stack/apis/datasetio/__init__.py +7 -0
- llama_stack/apis/datasetio/datasetio.py +59 -0
- llama_stack/apis/datasets/__init__.py +7 -0
- llama_stack/apis/datasets/datasets.py +251 -0
- llama_stack/apis/datatypes.py +160 -0
- llama_stack/apis/eval/__init__.py +7 -0
- llama_stack/apis/eval/eval.py +169 -0
- llama_stack/apis/files/__init__.py +7 -0
- llama_stack/apis/files/files.py +199 -0
- llama_stack/apis/inference/__init__.py +1 -1
- llama_stack/apis/inference/inference.py +1169 -113
- llama_stack/apis/inspect/__init__.py +1 -1
- llama_stack/apis/inspect/inspect.py +69 -16
- llama_stack/apis/models/__init__.py +1 -1
- llama_stack/apis/models/models.py +148 -21
- llama_stack/apis/post_training/__init__.py +1 -1
- llama_stack/apis/post_training/post_training.py +265 -120
- llama_stack/{providers/adapters/agents/sample/config.py → apis/prompts/__init__.py} +2 -5
- llama_stack/apis/prompts/prompts.py +204 -0
- llama_stack/apis/providers/__init__.py +7 -0
- llama_stack/apis/providers/providers.py +69 -0
- llama_stack/apis/resource.py +37 -0
- llama_stack/apis/safety/__init__.py +1 -1
- llama_stack/apis/safety/safety.py +95 -12
- llama_stack/apis/scoring/__init__.py +7 -0
- llama_stack/apis/scoring/scoring.py +93 -0
- llama_stack/apis/scoring_functions/__init__.py +7 -0
- llama_stack/apis/scoring_functions/scoring_functions.py +208 -0
- llama_stack/apis/shields/__init__.py +1 -1
- llama_stack/apis/shields/shields.py +76 -33
- llama_stack/apis/synthetic_data_generation/__init__.py +1 -1
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +40 -17
- llama_stack/apis/telemetry/__init__.py +1 -1
- llama_stack/apis/telemetry/telemetry.py +322 -31
- llama_stack/apis/{dataset → tools}/__init__.py +2 -1
- llama_stack/apis/tools/rag_tool.py +218 -0
- llama_stack/apis/tools/tools.py +221 -0
- llama_stack/apis/vector_io/__init__.py +7 -0
- llama_stack/apis/vector_io/vector_io.py +960 -0
- llama_stack/apis/vector_stores/__init__.py +7 -0
- llama_stack/apis/vector_stores/vector_stores.py +51 -0
- llama_stack/apis/version.py +9 -0
- llama_stack/cli/llama.py +13 -5
- llama_stack/cli/stack/_list_deps.py +182 -0
- llama_stack/cli/stack/list_apis.py +1 -1
- llama_stack/cli/stack/list_deps.py +55 -0
- llama_stack/cli/stack/list_providers.py +24 -10
- llama_stack/cli/stack/list_stacks.py +56 -0
- llama_stack/cli/stack/remove.py +115 -0
- llama_stack/cli/stack/run.py +169 -56
- llama_stack/cli/stack/stack.py +18 -4
- llama_stack/cli/stack/utils.py +151 -0
- llama_stack/cli/table.py +23 -61
- llama_stack/cli/utils.py +29 -0
- llama_stack/core/access_control/access_control.py +131 -0
- llama_stack/core/access_control/conditions.py +129 -0
- llama_stack/core/access_control/datatypes.py +107 -0
- llama_stack/core/build.py +164 -0
- llama_stack/core/client.py +205 -0
- llama_stack/core/common.sh +37 -0
- llama_stack/{distribution → core}/configure.py +74 -55
- llama_stack/core/conversations/conversations.py +309 -0
- llama_stack/core/datatypes.py +625 -0
- llama_stack/core/distribution.py +276 -0
- llama_stack/core/external.py +54 -0
- llama_stack/core/id_generation.py +42 -0
- llama_stack/core/inspect.py +86 -0
- llama_stack/core/library_client.py +539 -0
- llama_stack/core/prompts/prompts.py +234 -0
- llama_stack/core/providers.py +137 -0
- llama_stack/core/request_headers.py +115 -0
- llama_stack/core/resolver.py +506 -0
- llama_stack/core/routers/__init__.py +101 -0
- llama_stack/core/routers/datasets.py +73 -0
- llama_stack/core/routers/eval_scoring.py +155 -0
- llama_stack/core/routers/inference.py +645 -0
- llama_stack/core/routers/safety.py +85 -0
- llama_stack/core/routers/tool_runtime.py +91 -0
- llama_stack/core/routers/vector_io.py +442 -0
- llama_stack/core/routing_tables/benchmarks.py +62 -0
- llama_stack/core/routing_tables/common.py +254 -0
- llama_stack/core/routing_tables/datasets.py +91 -0
- llama_stack/core/routing_tables/models.py +163 -0
- llama_stack/core/routing_tables/scoring_functions.py +66 -0
- llama_stack/core/routing_tables/shields.py +61 -0
- llama_stack/core/routing_tables/toolgroups.py +129 -0
- llama_stack/core/routing_tables/vector_stores.py +292 -0
- llama_stack/core/server/auth.py +187 -0
- llama_stack/core/server/auth_providers.py +494 -0
- llama_stack/core/server/quota.py +110 -0
- llama_stack/core/server/routes.py +141 -0
- llama_stack/core/server/server.py +542 -0
- llama_stack/core/server/tracing.py +80 -0
- llama_stack/core/stack.py +546 -0
- llama_stack/core/start_stack.sh +117 -0
- llama_stack/core/storage/datatypes.py +283 -0
- llama_stack/{cli/model → core/store}/__init__.py +1 -1
- llama_stack/core/store/registry.py +199 -0
- llama_stack/core/testing_context.py +49 -0
- llama_stack/core/ui/app.py +55 -0
- llama_stack/core/ui/modules/api.py +32 -0
- llama_stack/core/ui/modules/utils.py +42 -0
- llama_stack/core/ui/page/distribution/datasets.py +18 -0
- llama_stack/core/ui/page/distribution/eval_tasks.py +20 -0
- llama_stack/core/ui/page/distribution/models.py +18 -0
- llama_stack/core/ui/page/distribution/providers.py +27 -0
- llama_stack/core/ui/page/distribution/resources.py +48 -0
- llama_stack/core/ui/page/distribution/scoring_functions.py +18 -0
- llama_stack/core/ui/page/distribution/shields.py +19 -0
- llama_stack/core/ui/page/evaluations/app_eval.py +143 -0
- llama_stack/core/ui/page/evaluations/native_eval.py +253 -0
- llama_stack/core/ui/page/playground/chat.py +130 -0
- llama_stack/core/ui/page/playground/tools.py +352 -0
- llama_stack/core/utils/config.py +30 -0
- llama_stack/{distribution → core}/utils/config_dirs.py +3 -6
- llama_stack/core/utils/config_resolution.py +125 -0
- llama_stack/core/utils/context.py +84 -0
- llama_stack/core/utils/exec.py +96 -0
- llama_stack/{providers/impls/meta_reference/codeshield/config.py → core/utils/image_types.py} +4 -3
- llama_stack/{distribution → core}/utils/model_utils.py +2 -2
- llama_stack/{distribution → core}/utils/prompt_for_config.py +30 -63
- llama_stack/{apis/batch_inference → distributions/dell}/__init__.py +1 -1
- llama_stack/distributions/dell/build.yaml +33 -0
- llama_stack/distributions/dell/dell.py +158 -0
- llama_stack/distributions/dell/run-with-safety.yaml +141 -0
- llama_stack/distributions/dell/run.yaml +132 -0
- llama_stack/distributions/meta-reference-gpu/__init__.py +7 -0
- llama_stack/distributions/meta-reference-gpu/build.yaml +32 -0
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +163 -0
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +154 -0
- llama_stack/distributions/meta-reference-gpu/run.yaml +139 -0
- llama_stack/{apis/evals → distributions/nvidia}/__init__.py +1 -1
- llama_stack/distributions/nvidia/build.yaml +29 -0
- llama_stack/distributions/nvidia/nvidia.py +154 -0
- llama_stack/distributions/nvidia/run-with-safety.yaml +137 -0
- llama_stack/distributions/nvidia/run.yaml +116 -0
- llama_stack/distributions/open-benchmark/__init__.py +7 -0
- llama_stack/distributions/open-benchmark/build.yaml +36 -0
- llama_stack/distributions/open-benchmark/open_benchmark.py +303 -0
- llama_stack/distributions/open-benchmark/run.yaml +252 -0
- llama_stack/distributions/postgres-demo/__init__.py +7 -0
- llama_stack/distributions/postgres-demo/build.yaml +23 -0
- llama_stack/distributions/postgres-demo/postgres_demo.py +125 -0
- llama_stack/distributions/postgres-demo/run.yaml +115 -0
- llama_stack/{apis/memory → distributions/starter}/__init__.py +1 -1
- llama_stack/distributions/starter/build.yaml +61 -0
- llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
- llama_stack/distributions/starter/run.yaml +276 -0
- llama_stack/distributions/starter/starter.py +345 -0
- llama_stack/distributions/starter-gpu/__init__.py +7 -0
- llama_stack/distributions/starter-gpu/build.yaml +61 -0
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
- llama_stack/distributions/starter-gpu/run.yaml +279 -0
- llama_stack/distributions/starter-gpu/starter_gpu.py +20 -0
- llama_stack/distributions/template.py +456 -0
- llama_stack/distributions/watsonx/__init__.py +7 -0
- llama_stack/distributions/watsonx/build.yaml +33 -0
- llama_stack/distributions/watsonx/run.yaml +133 -0
- llama_stack/distributions/watsonx/watsonx.py +95 -0
- llama_stack/env.py +24 -0
- llama_stack/log.py +314 -0
- llama_stack/models/llama/checkpoint.py +164 -0
- llama_stack/models/llama/datatypes.py +164 -0
- llama_stack/models/llama/hadamard_utils.py +86 -0
- llama_stack/models/llama/llama3/args.py +74 -0
- llama_stack/models/llama/llama3/chat_format.py +286 -0
- llama_stack/models/llama/llama3/generation.py +376 -0
- llama_stack/models/llama/llama3/interface.py +255 -0
- llama_stack/models/llama/llama3/model.py +304 -0
- llama_stack/models/llama/llama3/multimodal/__init__.py +12 -0
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +180 -0
- llama_stack/models/llama/llama3/multimodal/image_transform.py +409 -0
- llama_stack/models/llama/llama3/multimodal/model.py +1430 -0
- llama_stack/models/llama/llama3/multimodal/utils.py +26 -0
- llama_stack/models/llama/llama3/prompt_templates/__init__.py +22 -0
- llama_stack/models/llama/llama3/prompt_templates/base.py +39 -0
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +319 -0
- llama_stack/models/llama/llama3/prompt_templates/tool_response.py +62 -0
- llama_stack/models/llama/llama3/quantization/loader.py +316 -0
- llama_stack/models/llama/llama3/template_data.py +116 -0
- llama_stack/models/llama/llama3/tokenizer.model +128000 -0
- llama_stack/models/llama/llama3/tokenizer.py +198 -0
- llama_stack/models/llama/llama3/tool_utils.py +266 -0
- llama_stack/models/llama/llama3_1/__init__.py +12 -0
- llama_stack/models/llama/llama3_1/prompt_format.md +358 -0
- llama_stack/models/llama/llama3_1/prompts.py +258 -0
- llama_stack/models/llama/llama3_2/prompts_text.py +229 -0
- llama_stack/models/llama/llama3_2/prompts_vision.py +126 -0
- llama_stack/models/llama/llama3_2/text_prompt_format.md +286 -0
- llama_stack/models/llama/llama3_2/vision_prompt_format.md +141 -0
- llama_stack/models/llama/llama3_3/prompts.py +259 -0
- llama_stack/models/llama/llama4/args.py +107 -0
- llama_stack/models/llama/llama4/chat_format.py +317 -0
- llama_stack/models/llama/llama4/datatypes.py +56 -0
- llama_stack/models/llama/llama4/ffn.py +58 -0
- llama_stack/models/llama/llama4/generation.py +313 -0
- llama_stack/models/llama/llama4/model.py +437 -0
- llama_stack/models/llama/llama4/moe.py +214 -0
- llama_stack/models/llama/llama4/preprocess.py +435 -0
- llama_stack/models/llama/llama4/prompt_format.md +304 -0
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +136 -0
- llama_stack/models/llama/llama4/prompts.py +279 -0
- llama_stack/models/llama/llama4/quantization/__init__.py +5 -0
- llama_stack/models/llama/llama4/quantization/loader.py +226 -0
- llama_stack/models/llama/llama4/tokenizer.model +200000 -0
- llama_stack/models/llama/llama4/tokenizer.py +263 -0
- llama_stack/models/llama/llama4/vision/__init__.py +5 -0
- llama_stack/models/llama/llama4/vision/embedding.py +210 -0
- llama_stack/models/llama/llama4/vision/encoder.py +412 -0
- llama_stack/models/llama/prompt_format.py +191 -0
- llama_stack/models/llama/quantize_impls.py +316 -0
- llama_stack/models/llama/sku_list.py +1029 -0
- llama_stack/models/llama/sku_types.py +233 -0
- llama_stack/models/llama/tokenizer_utils.py +40 -0
- llama_stack/providers/datatypes.py +136 -107
- llama_stack/providers/inline/__init__.py +5 -0
- llama_stack/providers/inline/agents/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/agents/meta_reference}/__init__.py +12 -5
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +1024 -0
- llama_stack/providers/inline/agents/meta_reference/agents.py +383 -0
- llama_stack/providers/inline/agents/meta_reference/config.py +37 -0
- llama_stack/providers/inline/agents/meta_reference/persistence.py +228 -0
- llama_stack/providers/inline/agents/meta_reference/responses/__init__.py +5 -0
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +423 -0
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +1226 -0
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +449 -0
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +194 -0
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +365 -0
- llama_stack/providers/inline/agents/meta_reference/safety.py +52 -0
- llama_stack/providers/inline/batches/__init__.py +5 -0
- llama_stack/providers/inline/batches/reference/__init__.py +36 -0
- llama_stack/providers/inline/batches/reference/batches.py +679 -0
- llama_stack/providers/inline/batches/reference/config.py +40 -0
- llama_stack/providers/inline/datasetio/__init__.py +5 -0
- llama_stack/providers/inline/datasetio/localfs/__init__.py +20 -0
- llama_stack/providers/inline/datasetio/localfs/config.py +23 -0
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +113 -0
- llama_stack/providers/inline/eval/__init__.py +5 -0
- llama_stack/providers/inline/eval/meta_reference/__init__.py +28 -0
- llama_stack/providers/inline/eval/meta_reference/config.py +23 -0
- llama_stack/providers/inline/eval/meta_reference/eval.py +259 -0
- llama_stack/providers/inline/files/localfs/__init__.py +20 -0
- llama_stack/providers/inline/files/localfs/config.py +31 -0
- llama_stack/providers/inline/files/localfs/files.py +219 -0
- llama_stack/providers/inline/inference/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/__init__.py +4 -4
- llama_stack/providers/inline/inference/meta_reference/common.py +24 -0
- llama_stack/providers/inline/inference/meta_reference/config.py +68 -0
- llama_stack/providers/inline/inference/meta_reference/generators.py +211 -0
- llama_stack/providers/inline/inference/meta_reference/inference.py +158 -0
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +96 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/parallel_utils.py +56 -73
- llama_stack/providers/inline/inference/sentence_transformers/__init__.py +22 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/inference/sentence_transformers}/config.py +6 -4
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +83 -0
- llama_stack/providers/inline/post_training/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/utils.py +35 -0
- llama_stack/providers/inline/post_training/common/validator.py +36 -0
- llama_stack/providers/inline/post_training/huggingface/__init__.py +27 -0
- llama_stack/providers/inline/post_training/huggingface/config.py +83 -0
- llama_stack/providers/inline/post_training/huggingface/post_training.py +208 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +519 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +485 -0
- llama_stack/providers/inline/post_training/huggingface/utils.py +269 -0
- llama_stack/providers/inline/post_training/torchtune/__init__.py +27 -0
- llama_stack/providers/inline/post_training/torchtune/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +240 -0
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +99 -0
- llama_stack/providers/inline/post_training/torchtune/config.py +20 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +57 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/sft.py +78 -0
- llama_stack/providers/inline/post_training/torchtune/post_training.py +178 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +588 -0
- llama_stack/providers/inline/safety/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/codeshield → inline/safety/code_scanner}/__init__.py +4 -2
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +128 -0
- llama_stack/providers/{impls/meta_reference/memory → inline/safety/code_scanner}/config.py +5 -3
- llama_stack/providers/inline/safety/llama_guard/__init__.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/config.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +489 -0
- llama_stack/providers/{adapters/memory/sample → inline/safety/prompt_guard}/__init__.py +4 -4
- llama_stack/providers/inline/safety/prompt_guard/config.py +32 -0
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +131 -0
- llama_stack/providers/inline/scoring/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/__init__.py +25 -0
- llama_stack/providers/{adapters/memory/weaviate → inline/scoring/basic}/config.py +5 -7
- llama_stack/providers/inline/scoring/basic/scoring.py +126 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +240 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +41 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +23 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +27 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +71 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +80 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +66 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +58 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +38 -0
- llama_stack/providers/inline/scoring/basic/utils/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py +3319 -0
- llama_stack/providers/inline/scoring/basic/utils/math_utils.py +330 -0
- llama_stack/providers/inline/scoring/braintrust/__init__.py +27 -0
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +230 -0
- llama_stack/providers/inline/scoring/braintrust/config.py +21 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +23 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +24 -0
- llama_stack/providers/inline/scoring/llm_as_judge/__init__.py +21 -0
- llama_stack/providers/inline/scoring/llm_as_judge/config.py +14 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +113 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +96 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +20 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +81 -0
- llama_stack/providers/inline/telemetry/__init__.py +5 -0
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +21 -0
- llama_stack/providers/inline/telemetry/meta_reference/config.py +47 -0
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +252 -0
- llama_stack/providers/inline/tool_runtime/__init__.py +5 -0
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +19 -0
- llama_stack/providers/{impls/meta_reference/telemetry → inline/tool_runtime/rag}/config.py +5 -3
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +77 -0
- llama_stack/providers/inline/tool_runtime/rag/memory.py +332 -0
- llama_stack/providers/inline/vector_io/__init__.py +5 -0
- llama_stack/providers/inline/vector_io/chroma/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/chroma/config.py +30 -0
- llama_stack/providers/inline/vector_io/faiss/__init__.py +21 -0
- llama_stack/providers/inline/vector_io/faiss/config.py +26 -0
- llama_stack/providers/inline/vector_io/faiss/faiss.py +293 -0
- llama_stack/providers/inline/vector_io/milvus/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/milvus/config.py +29 -0
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/qdrant/config.py +29 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/config.py +26 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +483 -0
- llama_stack/providers/registry/agents.py +16 -18
- llama_stack/providers/registry/batches.py +26 -0
- llama_stack/providers/registry/datasetio.py +49 -0
- llama_stack/providers/registry/eval.py +46 -0
- llama_stack/providers/registry/files.py +31 -0
- llama_stack/providers/registry/inference.py +273 -118
- llama_stack/providers/registry/post_training.py +69 -0
- llama_stack/providers/registry/safety.py +46 -41
- llama_stack/providers/registry/scoring.py +51 -0
- llama_stack/providers/registry/tool_runtime.py +87 -0
- llama_stack/providers/registry/vector_io.py +828 -0
- llama_stack/providers/remote/__init__.py +5 -0
- llama_stack/providers/remote/agents/__init__.py +5 -0
- llama_stack/providers/remote/datasetio/__init__.py +5 -0
- llama_stack/providers/{adapters/memory/chroma → remote/datasetio/huggingface}/__init__.py +7 -4
- llama_stack/providers/remote/datasetio/huggingface/config.py +23 -0
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +99 -0
- llama_stack/providers/remote/datasetio/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/datasetio/nvidia/config.py +61 -0
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +116 -0
- llama_stack/providers/remote/eval/__init__.py +5 -0
- llama_stack/providers/remote/eval/nvidia/__init__.py +31 -0
- llama_stack/providers/remote/eval/nvidia/config.py +29 -0
- llama_stack/providers/remote/eval/nvidia/eval.py +162 -0
- llama_stack/providers/remote/files/s3/__init__.py +19 -0
- llama_stack/providers/remote/files/s3/config.py +42 -0
- llama_stack/providers/remote/files/s3/files.py +313 -0
- llama_stack/providers/remote/inference/__init__.py +5 -0
- llama_stack/providers/{adapters/safety/sample → remote/inference/anthropic}/__init__.py +4 -6
- llama_stack/providers/remote/inference/anthropic/anthropic.py +36 -0
- llama_stack/providers/remote/inference/anthropic/config.py +28 -0
- llama_stack/providers/{impls/meta_reference/telemetry → remote/inference/azure}/__init__.py +4 -4
- llama_stack/providers/remote/inference/azure/azure.py +25 -0
- llama_stack/providers/remote/inference/azure/config.py +61 -0
- llama_stack/providers/{adapters → remote}/inference/bedrock/__init__.py +18 -17
- llama_stack/providers/remote/inference/bedrock/bedrock.py +142 -0
- llama_stack/providers/{adapters/inference/sample → remote/inference/bedrock}/config.py +3 -4
- llama_stack/providers/remote/inference/bedrock/models.py +29 -0
- llama_stack/providers/remote/inference/cerebras/__init__.py +19 -0
- llama_stack/providers/remote/inference/cerebras/cerebras.py +28 -0
- llama_stack/providers/remote/inference/cerebras/config.py +30 -0
- llama_stack/providers/{adapters → remote}/inference/databricks/__init__.py +4 -5
- llama_stack/providers/remote/inference/databricks/config.py +37 -0
- llama_stack/providers/remote/inference/databricks/databricks.py +44 -0
- llama_stack/providers/{adapters → remote}/inference/fireworks/__init__.py +8 -4
- llama_stack/providers/remote/inference/fireworks/config.py +27 -0
- llama_stack/providers/remote/inference/fireworks/fireworks.py +27 -0
- llama_stack/providers/{adapters/memory/pgvector → remote/inference/gemini}/__init__.py +4 -4
- llama_stack/providers/remote/inference/gemini/config.py +28 -0
- llama_stack/providers/remote/inference/gemini/gemini.py +82 -0
- llama_stack/providers/remote/inference/groq/__init__.py +15 -0
- llama_stack/providers/remote/inference/groq/config.py +34 -0
- llama_stack/providers/remote/inference/groq/groq.py +18 -0
- llama_stack/providers/remote/inference/llama_openai_compat/__init__.py +15 -0
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +34 -0
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +46 -0
- llama_stack/providers/remote/inference/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/inference/nvidia/config.py +64 -0
- llama_stack/providers/remote/inference/nvidia/nvidia.py +61 -0
- llama_stack/providers/{adapters/safety/sample/config.py → remote/inference/nvidia/utils.py} +3 -4
- llama_stack/providers/{impls/vllm → remote/inference/ollama}/__init__.py +4 -6
- llama_stack/providers/remote/inference/ollama/config.py +25 -0
- llama_stack/providers/remote/inference/ollama/ollama.py +102 -0
- llama_stack/providers/{adapters/telemetry/opentelemetry → remote/inference/openai}/__init__.py +4 -4
- llama_stack/providers/remote/inference/openai/config.py +39 -0
- llama_stack/providers/remote/inference/openai/openai.py +38 -0
- llama_stack/providers/remote/inference/passthrough/__init__.py +23 -0
- llama_stack/providers/remote/inference/passthrough/config.py +34 -0
- llama_stack/providers/remote/inference/passthrough/passthrough.py +122 -0
- llama_stack/providers/remote/inference/runpod/__init__.py +16 -0
- llama_stack/providers/remote/inference/runpod/config.py +32 -0
- llama_stack/providers/remote/inference/runpod/runpod.py +42 -0
- llama_stack/providers/remote/inference/sambanova/__init__.py +16 -0
- llama_stack/providers/remote/inference/sambanova/config.py +34 -0
- llama_stack/providers/remote/inference/sambanova/sambanova.py +28 -0
- llama_stack/providers/{adapters → remote}/inference/tgi/__init__.py +3 -4
- llama_stack/providers/remote/inference/tgi/config.py +76 -0
- llama_stack/providers/remote/inference/tgi/tgi.py +85 -0
- llama_stack/providers/{adapters → remote}/inference/together/__init__.py +8 -4
- llama_stack/providers/remote/inference/together/config.py +27 -0
- llama_stack/providers/remote/inference/together/together.py +102 -0
- llama_stack/providers/remote/inference/vertexai/__init__.py +15 -0
- llama_stack/providers/remote/inference/vertexai/config.py +48 -0
- llama_stack/providers/remote/inference/vertexai/vertexai.py +54 -0
- llama_stack/providers/remote/inference/vllm/__init__.py +22 -0
- llama_stack/providers/remote/inference/vllm/config.py +59 -0
- llama_stack/providers/remote/inference/vllm/vllm.py +111 -0
- llama_stack/providers/remote/inference/watsonx/__init__.py +15 -0
- llama_stack/providers/remote/inference/watsonx/config.py +45 -0
- llama_stack/providers/remote/inference/watsonx/watsonx.py +336 -0
- llama_stack/providers/remote/post_training/__init__.py +5 -0
- llama_stack/providers/remote/post_training/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/post_training/nvidia/config.py +113 -0
- llama_stack/providers/remote/post_training/nvidia/models.py +27 -0
- llama_stack/providers/remote/post_training/nvidia/post_training.py +430 -0
- llama_stack/providers/remote/post_training/nvidia/utils.py +63 -0
- llama_stack/providers/remote/safety/__init__.py +5 -0
- llama_stack/providers/remote/safety/bedrock/bedrock.py +111 -0
- llama_stack/providers/remote/safety/bedrock/config.py +14 -0
- llama_stack/providers/{adapters/inference/sample → remote/safety/nvidia}/__init__.py +5 -4
- llama_stack/providers/remote/safety/nvidia/config.py +40 -0
- llama_stack/providers/remote/safety/nvidia/nvidia.py +161 -0
- llama_stack/providers/{adapters/agents/sample → remote/safety/sambanova}/__init__.py +5 -4
- llama_stack/providers/remote/safety/sambanova/config.py +37 -0
- llama_stack/providers/remote/safety/sambanova/sambanova.py +98 -0
- llama_stack/providers/remote/tool_runtime/__init__.py +5 -0
- llama_stack/providers/remote/tool_runtime/bing_search/__init__.py +21 -0
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +112 -0
- llama_stack/providers/remote/tool_runtime/bing_search/config.py +22 -0
- llama_stack/providers/remote/tool_runtime/brave_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +148 -0
- llama_stack/providers/remote/tool_runtime/brave_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py +15 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +20 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +73 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +84 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py +22 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py +21 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +140 -0
- llama_stack/providers/remote/vector_io/__init__.py +5 -0
- llama_stack/providers/remote/vector_io/chroma/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/chroma/chroma.py +215 -0
- llama_stack/providers/remote/vector_io/chroma/config.py +28 -0
- llama_stack/providers/remote/vector_io/milvus/__init__.py +18 -0
- llama_stack/providers/remote/vector_io/milvus/config.py +35 -0
- llama_stack/providers/remote/vector_io/milvus/milvus.py +375 -0
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/pgvector/config.py +47 -0
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +460 -0
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/qdrant/config.py +37 -0
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +265 -0
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/weaviate/config.py +32 -0
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +393 -0
- llama_stack/providers/utils/bedrock/__init__.py +5 -0
- llama_stack/providers/utils/bedrock/client.py +74 -0
- llama_stack/providers/utils/bedrock/config.py +64 -0
- llama_stack/providers/utils/bedrock/refreshable_boto_session.py +112 -0
- llama_stack/providers/utils/common/__init__.py +5 -0
- llama_stack/providers/utils/common/data_schema_validator.py +103 -0
- llama_stack/providers/utils/datasetio/__init__.py +5 -0
- llama_stack/providers/utils/datasetio/url_utils.py +47 -0
- llama_stack/providers/utils/files/__init__.py +5 -0
- llama_stack/providers/utils/files/form_data.py +69 -0
- llama_stack/providers/utils/inference/__init__.py +8 -7
- llama_stack/providers/utils/inference/embedding_mixin.py +101 -0
- llama_stack/providers/utils/inference/inference_store.py +264 -0
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +336 -0
- llama_stack/providers/utils/inference/model_registry.py +173 -23
- llama_stack/providers/utils/inference/openai_compat.py +1261 -49
- llama_stack/providers/utils/inference/openai_mixin.py +506 -0
- llama_stack/providers/utils/inference/prompt_adapter.py +365 -67
- llama_stack/providers/utils/kvstore/api.py +6 -6
- llama_stack/providers/utils/kvstore/config.py +28 -48
- llama_stack/providers/utils/kvstore/kvstore.py +61 -15
- llama_stack/providers/utils/kvstore/mongodb/__init__.py +9 -0
- llama_stack/providers/utils/kvstore/mongodb/mongodb.py +82 -0
- llama_stack/providers/utils/kvstore/postgres/__init__.py +7 -0
- llama_stack/providers/utils/kvstore/postgres/postgres.py +114 -0
- llama_stack/providers/utils/kvstore/redis/redis.py +33 -9
- llama_stack/providers/utils/kvstore/sqlite/config.py +2 -1
- llama_stack/providers/utils/kvstore/sqlite/sqlite.py +123 -22
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +1304 -0
- llama_stack/providers/utils/memory/vector_store.py +220 -82
- llama_stack/providers/utils/pagination.py +43 -0
- llama_stack/providers/utils/responses/__init__.py +5 -0
- llama_stack/providers/utils/responses/responses_store.py +292 -0
- llama_stack/providers/utils/scheduler.py +270 -0
- llama_stack/providers/utils/scoring/__init__.py +5 -0
- llama_stack/providers/utils/scoring/aggregation_utils.py +75 -0
- llama_stack/providers/utils/scoring/base_scoring_fn.py +114 -0
- llama_stack/providers/utils/scoring/basic_scoring_utils.py +26 -0
- llama_stack/providers/utils/sqlstore/__init__.py +5 -0
- llama_stack/providers/utils/sqlstore/api.py +128 -0
- llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +319 -0
- llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +343 -0
- llama_stack/providers/utils/sqlstore/sqlstore.py +70 -0
- llama_stack/providers/utils/telemetry/trace_protocol.py +142 -0
- llama_stack/providers/utils/telemetry/tracing.py +192 -53
- llama_stack/providers/utils/tools/__init__.py +5 -0
- llama_stack/providers/utils/tools/mcp.py +148 -0
- llama_stack/providers/utils/tools/ttl_dict.py +70 -0
- llama_stack/providers/utils/vector_io/__init__.py +5 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +156 -0
- llama_stack/schema_utils.py +118 -0
- llama_stack/strong_typing/__init__.py +19 -0
- llama_stack/strong_typing/auxiliary.py +228 -0
- llama_stack/strong_typing/classdef.py +440 -0
- llama_stack/strong_typing/core.py +46 -0
- llama_stack/strong_typing/deserializer.py +877 -0
- llama_stack/strong_typing/docstring.py +409 -0
- llama_stack/strong_typing/exception.py +23 -0
- llama_stack/strong_typing/inspection.py +1085 -0
- llama_stack/strong_typing/mapping.py +40 -0
- llama_stack/strong_typing/name.py +182 -0
- llama_stack/strong_typing/py.typed +0 -0
- llama_stack/strong_typing/schema.py +792 -0
- llama_stack/strong_typing/serialization.py +97 -0
- llama_stack/strong_typing/serializer.py +500 -0
- llama_stack/strong_typing/slots.py +27 -0
- llama_stack/strong_typing/topological.py +89 -0
- llama_stack/testing/__init__.py +5 -0
- llama_stack/testing/api_recorder.py +956 -0
- llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
- llama_stack-0.3.4.dist-info/METADATA +261 -0
- llama_stack-0.3.4.dist-info/RECORD +625 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/WHEEL +1 -1
- llama_stack/apis/agents/client.py +0 -292
- llama_stack/apis/agents/event_logger.py +0 -184
- llama_stack/apis/batch_inference/batch_inference.py +0 -72
- llama_stack/apis/common/deployment_types.py +0 -31
- llama_stack/apis/dataset/dataset.py +0 -63
- llama_stack/apis/evals/evals.py +0 -122
- llama_stack/apis/inference/client.py +0 -197
- llama_stack/apis/inspect/client.py +0 -82
- llama_stack/apis/memory/client.py +0 -155
- llama_stack/apis/memory/memory.py +0 -65
- llama_stack/apis/memory_banks/__init__.py +0 -7
- llama_stack/apis/memory_banks/client.py +0 -101
- llama_stack/apis/memory_banks/memory_banks.py +0 -78
- llama_stack/apis/models/client.py +0 -83
- llama_stack/apis/reward_scoring/__init__.py +0 -7
- llama_stack/apis/reward_scoring/reward_scoring.py +0 -55
- llama_stack/apis/safety/client.py +0 -105
- llama_stack/apis/shields/client.py +0 -79
- llama_stack/cli/download.py +0 -340
- llama_stack/cli/model/describe.py +0 -82
- llama_stack/cli/model/download.py +0 -24
- llama_stack/cli/model/list.py +0 -62
- llama_stack/cli/model/model.py +0 -34
- llama_stack/cli/model/prompt_format.py +0 -112
- llama_stack/cli/model/safety_models.py +0 -52
- llama_stack/cli/stack/build.py +0 -299
- llama_stack/cli/stack/configure.py +0 -178
- llama_stack/distribution/build.py +0 -123
- llama_stack/distribution/build_conda_env.sh +0 -136
- llama_stack/distribution/build_container.sh +0 -142
- llama_stack/distribution/common.sh +0 -40
- llama_stack/distribution/configure_container.sh +0 -47
- llama_stack/distribution/datatypes.py +0 -139
- llama_stack/distribution/distribution.py +0 -58
- llama_stack/distribution/inspect.py +0 -67
- llama_stack/distribution/request_headers.py +0 -57
- llama_stack/distribution/resolver.py +0 -323
- llama_stack/distribution/routers/__init__.py +0 -48
- llama_stack/distribution/routers/routers.py +0 -158
- llama_stack/distribution/routers/routing_tables.py +0 -173
- llama_stack/distribution/server/endpoints.py +0 -48
- llama_stack/distribution/server/server.py +0 -343
- llama_stack/distribution/start_conda_env.sh +0 -42
- llama_stack/distribution/start_container.sh +0 -64
- llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml +0 -10
- llama_stack/distribution/templates/local-build.yaml +0 -10
- llama_stack/distribution/templates/local-databricks-build.yaml +0 -10
- llama_stack/distribution/templates/local-fireworks-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-endpoint-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-serverless-build.yaml +0 -10
- llama_stack/distribution/templates/local-ollama-build.yaml +0 -10
- llama_stack/distribution/templates/local-tgi-build.yaml +0 -10
- llama_stack/distribution/templates/local-together-build.yaml +0 -10
- llama_stack/distribution/templates/local-vllm-build.yaml +0 -10
- llama_stack/distribution/utils/exec.py +0 -105
- llama_stack/providers/adapters/agents/sample/sample.py +0 -18
- llama_stack/providers/adapters/inference/bedrock/bedrock.py +0 -451
- llama_stack/providers/adapters/inference/bedrock/config.py +0 -55
- llama_stack/providers/adapters/inference/databricks/config.py +0 -21
- llama_stack/providers/adapters/inference/databricks/databricks.py +0 -125
- llama_stack/providers/adapters/inference/fireworks/config.py +0 -20
- llama_stack/providers/adapters/inference/fireworks/fireworks.py +0 -130
- llama_stack/providers/adapters/inference/ollama/__init__.py +0 -19
- llama_stack/providers/adapters/inference/ollama/ollama.py +0 -175
- llama_stack/providers/adapters/inference/sample/sample.py +0 -23
- llama_stack/providers/adapters/inference/tgi/config.py +0 -43
- llama_stack/providers/adapters/inference/tgi/tgi.py +0 -200
- llama_stack/providers/adapters/inference/together/config.py +0 -22
- llama_stack/providers/adapters/inference/together/together.py +0 -143
- llama_stack/providers/adapters/memory/chroma/chroma.py +0 -157
- llama_stack/providers/adapters/memory/pgvector/config.py +0 -17
- llama_stack/providers/adapters/memory/pgvector/pgvector.py +0 -211
- llama_stack/providers/adapters/memory/sample/sample.py +0 -23
- llama_stack/providers/adapters/memory/weaviate/__init__.py +0 -15
- llama_stack/providers/adapters/memory/weaviate/weaviate.py +0 -190
- llama_stack/providers/adapters/safety/bedrock/bedrock.py +0 -113
- llama_stack/providers/adapters/safety/bedrock/config.py +0 -16
- llama_stack/providers/adapters/safety/sample/sample.py +0 -23
- llama_stack/providers/adapters/safety/together/__init__.py +0 -18
- llama_stack/providers/adapters/safety/together/config.py +0 -26
- llama_stack/providers/adapters/safety/together/together.py +0 -101
- llama_stack/providers/adapters/telemetry/opentelemetry/config.py +0 -12
- llama_stack/providers/adapters/telemetry/opentelemetry/opentelemetry.py +0 -201
- llama_stack/providers/adapters/telemetry/sample/__init__.py +0 -17
- llama_stack/providers/adapters/telemetry/sample/config.py +0 -12
- llama_stack/providers/adapters/telemetry/sample/sample.py +0 -18
- llama_stack/providers/impls/meta_reference/agents/agent_instance.py +0 -844
- llama_stack/providers/impls/meta_reference/agents/agents.py +0 -161
- llama_stack/providers/impls/meta_reference/agents/persistence.py +0 -84
- llama_stack/providers/impls/meta_reference/agents/rag/context_retriever.py +0 -74
- llama_stack/providers/impls/meta_reference/agents/safety.py +0 -57
- llama_stack/providers/impls/meta_reference/agents/tests/code_execution.py +0 -93
- llama_stack/providers/impls/meta_reference/agents/tests/test_chat_agent.py +0 -305
- llama_stack/providers/impls/meta_reference/agents/tools/base.py +0 -20
- llama_stack/providers/impls/meta_reference/agents/tools/builtin.py +0 -375
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_env_prefix.py +0 -133
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_execution.py +0 -256
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py +0 -87
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/utils.py +0 -21
- llama_stack/providers/impls/meta_reference/agents/tools/safety.py +0 -43
- llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py +0 -58
- llama_stack/providers/impls/meta_reference/inference/config.py +0 -45
- llama_stack/providers/impls/meta_reference/inference/generation.py +0 -376
- llama_stack/providers/impls/meta_reference/inference/inference.py +0 -280
- llama_stack/providers/impls/meta_reference/inference/model_parallel.py +0 -99
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_impls.py +0 -184
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_txest_disabled.py +0 -76
- llama_stack/providers/impls/meta_reference/inference/quantization/loader.py +0 -97
- llama_stack/providers/impls/meta_reference/inference/quantization/scripts/quantize_checkpoint.py +0 -161
- llama_stack/providers/impls/meta_reference/memory/__init__.py +0 -19
- llama_stack/providers/impls/meta_reference/memory/faiss.py +0 -113
- llama_stack/providers/impls/meta_reference/safety/__init__.py +0 -17
- llama_stack/providers/impls/meta_reference/safety/base.py +0 -57
- llama_stack/providers/impls/meta_reference/safety/config.py +0 -48
- llama_stack/providers/impls/meta_reference/safety/llama_guard.py +0 -268
- llama_stack/providers/impls/meta_reference/safety/prompt_guard.py +0 -145
- llama_stack/providers/impls/meta_reference/safety/safety.py +0 -112
- llama_stack/providers/impls/meta_reference/telemetry/console.py +0 -89
- llama_stack/providers/impls/vllm/config.py +0 -35
- llama_stack/providers/impls/vllm/vllm.py +0 -241
- llama_stack/providers/registry/memory.py +0 -78
- llama_stack/providers/registry/telemetry.py +0 -44
- llama_stack/providers/tests/agents/test_agents.py +0 -210
- llama_stack/providers/tests/inference/test_inference.py +0 -257
- llama_stack/providers/tests/inference/test_prompt_adapter.py +0 -126
- llama_stack/providers/tests/memory/test_memory.py +0 -136
- llama_stack/providers/tests/resolver.py +0 -100
- llama_stack/providers/tests/safety/test_safety.py +0 -77
- llama_stack-0.0.42.dist-info/METADATA +0 -137
- llama_stack-0.0.42.dist-info/RECORD +0 -256
- /llama_stack/{distribution → core}/__init__.py +0 -0
- /llama_stack/{distribution/server → core/access_control}/__init__.py +0 -0
- /llama_stack/{distribution/utils → core/conversations}/__init__.py +0 -0
- /llama_stack/{providers/adapters → core/prompts}/__init__.py +0 -0
- /llama_stack/{providers/adapters/agents → core/routing_tables}/__init__.py +0 -0
- /llama_stack/{providers/adapters/inference → core/server}/__init__.py +0 -0
- /llama_stack/{providers/adapters/memory → core/storage}/__init__.py +0 -0
- /llama_stack/{providers/adapters/safety → core/ui}/__init__.py +0 -0
- /llama_stack/{providers/adapters/telemetry → core/ui/modules}/__init__.py +0 -0
- /llama_stack/{providers/impls → core/ui/page}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference → core/ui/page/distribution}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/rag → core/ui/page/evaluations}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tests → core/ui/page/playground}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools → core/utils}/__init__.py +0 -0
- /llama_stack/{distribution → core}/utils/dynamic.py +0 -0
- /llama_stack/{distribution → core}/utils/serialize.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools/ipython_tool → distributions}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization → models}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization/scripts → models/llama}/__init__.py +0 -0
- /llama_stack/{providers/tests → models/llama/llama3}/__init__.py +0 -0
- /llama_stack/{providers/tests/agents → models/llama/llama3/quantization}/__init__.py +0 -0
- /llama_stack/{providers/tests/inference → models/llama/llama3_2}/__init__.py +0 -0
- /llama_stack/{providers/tests/memory → models/llama/llama3_3}/__init__.py +0 -0
- /llama_stack/{providers/tests/safety → models/llama/llama4}/__init__.py +0 -0
- /llama_stack/{scripts → models/llama/llama4/prompt_templates}/__init__.py +0 -0
- /llama_stack/providers/{adapters → remote}/safety/bedrock/__init__.py +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info/licenses}/LICENSE +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/top_level.txt +0 -0
|
@@ -4,196 +4,104 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
+
from collections.abc import AsyncIterator
|
|
7
8
|
from datetime import datetime
|
|
8
|
-
from enum import
|
|
9
|
-
from typing import
|
|
10
|
-
Any,
|
|
11
|
-
Dict,
|
|
12
|
-
List,
|
|
13
|
-
Literal,
|
|
14
|
-
Optional,
|
|
15
|
-
Protocol,
|
|
16
|
-
runtime_checkable,
|
|
17
|
-
Union,
|
|
18
|
-
)
|
|
19
|
-
|
|
20
|
-
from llama_models.schema_utils import json_schema_type, webmethod
|
|
9
|
+
from enum import StrEnum
|
|
10
|
+
from typing import Annotated, Any, Literal, Protocol, runtime_checkable
|
|
21
11
|
|
|
22
12
|
from pydantic import BaseModel, ConfigDict, Field
|
|
23
|
-
from typing_extensions import Annotated
|
|
24
|
-
|
|
25
|
-
from llama_models.llama3.api.datatypes import * # noqa: F403
|
|
26
|
-
from llama_stack.apis.common.deployment_types import * # noqa: F403
|
|
27
|
-
from llama_stack.apis.inference import * # noqa: F403
|
|
28
|
-
from llama_stack.apis.safety import * # noqa: F403
|
|
29
|
-
from llama_stack.apis.memory import * # noqa: F403
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
@json_schema_type
|
|
33
|
-
class Attachment(BaseModel):
|
|
34
|
-
content: InterleavedTextMedia | URL
|
|
35
|
-
mime_type: str
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
class AgentTool(Enum):
|
|
39
|
-
brave_search = "brave_search"
|
|
40
|
-
wolfram_alpha = "wolfram_alpha"
|
|
41
|
-
photogen = "photogen"
|
|
42
|
-
code_interpreter = "code_interpreter"
|
|
43
|
-
|
|
44
|
-
function_call = "function_call"
|
|
45
|
-
memory = "memory"
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
class ToolDefinitionCommon(BaseModel):
|
|
49
|
-
input_shields: Optional[List[str]] = Field(default_factory=list)
|
|
50
|
-
output_shields: Optional[List[str]] = Field(default_factory=list)
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
class SearchEngineType(Enum):
|
|
54
|
-
bing = "bing"
|
|
55
|
-
brave = "brave"
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
@json_schema_type
|
|
59
|
-
class SearchToolDefinition(ToolDefinitionCommon):
|
|
60
|
-
# NOTE: brave_search is just a placeholder since model always uses
|
|
61
|
-
# brave_search as tool call name
|
|
62
|
-
type: Literal[AgentTool.brave_search.value] = AgentTool.brave_search.value
|
|
63
|
-
api_key: str
|
|
64
|
-
engine: SearchEngineType = SearchEngineType.brave
|
|
65
|
-
remote_execution: Optional[RestAPIExecutionConfig] = None
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
@json_schema_type
|
|
69
|
-
class WolframAlphaToolDefinition(ToolDefinitionCommon):
|
|
70
|
-
type: Literal[AgentTool.wolfram_alpha.value] = AgentTool.wolfram_alpha.value
|
|
71
|
-
api_key: str
|
|
72
|
-
remote_execution: Optional[RestAPIExecutionConfig] = None
|
|
73
13
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
14
|
+
from llama_stack.apis.common.content_types import URL, ContentDelta, InterleavedContent
|
|
15
|
+
from llama_stack.apis.common.responses import Order, PaginatedResponse
|
|
16
|
+
from llama_stack.apis.inference import (
|
|
17
|
+
CompletionMessage,
|
|
18
|
+
ResponseFormat,
|
|
19
|
+
SamplingParams,
|
|
20
|
+
ToolCall,
|
|
21
|
+
ToolChoice,
|
|
22
|
+
ToolConfig,
|
|
23
|
+
ToolPromptFormat,
|
|
24
|
+
ToolResponse,
|
|
25
|
+
ToolResponseMessage,
|
|
26
|
+
UserMessage,
|
|
27
|
+
)
|
|
28
|
+
from llama_stack.apis.safety import SafetyViolation
|
|
29
|
+
from llama_stack.apis.tools import ToolDef
|
|
30
|
+
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
|
|
31
|
+
from llama_stack.schema_utils import ExtraBodyField, json_schema_type, register_schema, webmethod
|
|
32
|
+
|
|
33
|
+
from .openai_responses import (
|
|
34
|
+
ListOpenAIResponseInputItem,
|
|
35
|
+
ListOpenAIResponseObject,
|
|
36
|
+
OpenAIDeleteResponseObject,
|
|
37
|
+
OpenAIResponseInput,
|
|
38
|
+
OpenAIResponseInputTool,
|
|
39
|
+
OpenAIResponseObject,
|
|
40
|
+
OpenAIResponseObjectStream,
|
|
41
|
+
OpenAIResponseText,
|
|
42
|
+
)
|
|
86
43
|
|
|
87
44
|
|
|
88
45
|
@json_schema_type
|
|
89
|
-
class
|
|
90
|
-
|
|
91
|
-
function_name: str
|
|
92
|
-
description: str
|
|
93
|
-
parameters: Dict[str, ToolParamDefinition]
|
|
94
|
-
remote_execution: Optional[RestAPIExecutionConfig] = None
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
class _MemoryBankConfigCommon(BaseModel):
|
|
98
|
-
bank_id: str
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
class AgentVectorMemoryBankConfig(_MemoryBankConfigCommon):
|
|
102
|
-
type: Literal[MemoryBankType.vector.value] = MemoryBankType.vector.value
|
|
46
|
+
class ResponseGuardrailSpec(BaseModel):
|
|
47
|
+
"""Specification for a guardrail to apply during response generation.
|
|
103
48
|
|
|
49
|
+
:param type: The type/identifier of the guardrail.
|
|
50
|
+
"""
|
|
104
51
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
keys: List[str] # what keys to focus on
|
|
52
|
+
type: str
|
|
53
|
+
# TODO: more fields to be added for guardrail configuration
|
|
108
54
|
|
|
109
55
|
|
|
110
|
-
|
|
111
|
-
type: Literal[MemoryBankType.keyword.value] = MemoryBankType.keyword.value
|
|
56
|
+
ResponseGuardrail = str | ResponseGuardrailSpec
|
|
112
57
|
|
|
113
58
|
|
|
114
|
-
class
|
|
115
|
-
|
|
116
|
-
entities: List[str] # what entities to focus on
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
MemoryBankConfig = Annotated[
|
|
120
|
-
Union[
|
|
121
|
-
AgentVectorMemoryBankConfig,
|
|
122
|
-
AgentKeyValueMemoryBankConfig,
|
|
123
|
-
AgentKeywordMemoryBankConfig,
|
|
124
|
-
AgentGraphMemoryBankConfig,
|
|
125
|
-
],
|
|
126
|
-
Field(discriminator="type"),
|
|
127
|
-
]
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
class MemoryQueryGenerator(Enum):
|
|
131
|
-
default = "default"
|
|
132
|
-
llm = "llm"
|
|
133
|
-
custom = "custom"
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
class DefaultMemoryQueryGeneratorConfig(BaseModel):
|
|
137
|
-
type: Literal[MemoryQueryGenerator.default.value] = (
|
|
138
|
-
MemoryQueryGenerator.default.value
|
|
139
|
-
)
|
|
140
|
-
sep: str = " "
|
|
59
|
+
class Attachment(BaseModel):
|
|
60
|
+
"""An attachment to an agent turn.
|
|
141
61
|
|
|
62
|
+
:param content: The content of the attachment.
|
|
63
|
+
:param mime_type: The MIME type of the attachment.
|
|
64
|
+
"""
|
|
142
65
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
model: str
|
|
146
|
-
template: str
|
|
66
|
+
content: InterleavedContent | URL
|
|
67
|
+
mime_type: str
|
|
147
68
|
|
|
148
69
|
|
|
149
|
-
class
|
|
150
|
-
|
|
70
|
+
class Document(BaseModel):
|
|
71
|
+
"""A document to be used by an agent.
|
|
151
72
|
|
|
73
|
+
:param content: The content of the document.
|
|
74
|
+
:param mime_type: The MIME type of the document.
|
|
75
|
+
"""
|
|
152
76
|
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
DefaultMemoryQueryGeneratorConfig,
|
|
156
|
-
LLMMemoryQueryGeneratorConfig,
|
|
157
|
-
CustomMemoryQueryGeneratorConfig,
|
|
158
|
-
],
|
|
159
|
-
Field(discriminator="type"),
|
|
160
|
-
]
|
|
77
|
+
content: InterleavedContent | URL
|
|
78
|
+
mime_type: str
|
|
161
79
|
|
|
162
80
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
type: Literal[AgentTool.memory.value] = AgentTool.memory.value
|
|
166
|
-
memory_bank_configs: List[MemoryBankConfig] = Field(default_factory=list)
|
|
167
|
-
# This config defines how a query is generated using the messages
|
|
168
|
-
# for memory bank retrieval.
|
|
169
|
-
query_generator_config: MemoryQueryGeneratorConfig = Field(
|
|
170
|
-
default=DefaultMemoryQueryGeneratorConfig()
|
|
171
|
-
)
|
|
172
|
-
max_tokens_in_context: int = 4096
|
|
173
|
-
max_chunks: int = 10
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
AgentToolDefinition = Annotated[
|
|
177
|
-
Union[
|
|
178
|
-
SearchToolDefinition,
|
|
179
|
-
WolframAlphaToolDefinition,
|
|
180
|
-
PhotogenToolDefinition,
|
|
181
|
-
CodeInterpreterToolDefinition,
|
|
182
|
-
FunctionCallToolDefinition,
|
|
183
|
-
MemoryToolDefinition,
|
|
184
|
-
],
|
|
185
|
-
Field(discriminator="type"),
|
|
186
|
-
]
|
|
81
|
+
class StepCommon(BaseModel):
|
|
82
|
+
"""A common step in an agent turn.
|
|
187
83
|
|
|
84
|
+
:param turn_id: The ID of the turn.
|
|
85
|
+
:param step_id: The ID of the step.
|
|
86
|
+
:param started_at: The time the step started.
|
|
87
|
+
:param completed_at: The time the step completed.
|
|
88
|
+
"""
|
|
188
89
|
|
|
189
|
-
class StepCommon(BaseModel):
|
|
190
90
|
turn_id: str
|
|
191
91
|
step_id: str
|
|
192
|
-
started_at:
|
|
193
|
-
completed_at:
|
|
92
|
+
started_at: datetime | None = None
|
|
93
|
+
completed_at: datetime | None = None
|
|
94
|
+
|
|
194
95
|
|
|
96
|
+
class StepType(StrEnum):
|
|
97
|
+
"""Type of the step in an agent turn.
|
|
98
|
+
|
|
99
|
+
:cvar inference: The step is an inference step that calls an LLM.
|
|
100
|
+
:cvar tool_execution: The step is a tool execution step that executes a tool call.
|
|
101
|
+
:cvar shield_call: The step is a shield call step that checks for safety violations.
|
|
102
|
+
:cvar memory_retrieval: The step is a memory retrieval step that retrieves context for vector dbs.
|
|
103
|
+
"""
|
|
195
104
|
|
|
196
|
-
class StepType(Enum):
|
|
197
105
|
inference = "inference"
|
|
198
106
|
tool_execution = "tool_execution"
|
|
199
107
|
shield_call = "shield_call"
|
|
@@ -202,274 +110,785 @@ class StepType(Enum):
|
|
|
202
110
|
|
|
203
111
|
@json_schema_type
|
|
204
112
|
class InferenceStep(StepCommon):
|
|
113
|
+
"""An inference step in an agent turn.
|
|
114
|
+
|
|
115
|
+
:param model_response: The response from the LLM.
|
|
116
|
+
"""
|
|
117
|
+
|
|
205
118
|
model_config = ConfigDict(protected_namespaces=())
|
|
206
119
|
|
|
207
|
-
step_type: Literal[StepType.inference
|
|
120
|
+
step_type: Literal[StepType.inference] = StepType.inference
|
|
208
121
|
model_response: CompletionMessage
|
|
209
122
|
|
|
210
123
|
|
|
211
124
|
@json_schema_type
|
|
212
125
|
class ToolExecutionStep(StepCommon):
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
126
|
+
"""A tool execution step in an agent turn.
|
|
127
|
+
|
|
128
|
+
:param tool_calls: The tool calls to execute.
|
|
129
|
+
:param tool_responses: The tool responses from the tool calls.
|
|
130
|
+
"""
|
|
131
|
+
|
|
132
|
+
step_type: Literal[StepType.tool_execution] = StepType.tool_execution
|
|
133
|
+
tool_calls: list[ToolCall]
|
|
134
|
+
tool_responses: list[ToolResponse]
|
|
216
135
|
|
|
217
136
|
|
|
218
137
|
@json_schema_type
|
|
219
138
|
class ShieldCallStep(StepCommon):
|
|
220
|
-
|
|
221
|
-
|
|
139
|
+
"""A shield call step in an agent turn.
|
|
140
|
+
|
|
141
|
+
:param violation: The violation from the shield call.
|
|
142
|
+
"""
|
|
143
|
+
|
|
144
|
+
step_type: Literal[StepType.shield_call] = StepType.shield_call
|
|
145
|
+
violation: SafetyViolation | None
|
|
222
146
|
|
|
223
147
|
|
|
224
148
|
@json_schema_type
|
|
225
149
|
class MemoryRetrievalStep(StepCommon):
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
150
|
+
"""A memory retrieval step in an agent turn.
|
|
151
|
+
|
|
152
|
+
:param vector_db_ids: The IDs of the vector databases to retrieve context from.
|
|
153
|
+
:param inserted_context: The context retrieved from the vector databases.
|
|
154
|
+
"""
|
|
155
|
+
|
|
156
|
+
step_type: Literal[StepType.memory_retrieval] = StepType.memory_retrieval
|
|
157
|
+
# TODO: should this be List[str]?
|
|
158
|
+
vector_db_ids: str
|
|
159
|
+
inserted_context: InterleavedContent
|
|
231
160
|
|
|
232
161
|
|
|
233
162
|
Step = Annotated[
|
|
234
|
-
|
|
235
|
-
InferenceStep,
|
|
236
|
-
ToolExecutionStep,
|
|
237
|
-
ShieldCallStep,
|
|
238
|
-
MemoryRetrievalStep,
|
|
239
|
-
],
|
|
163
|
+
InferenceStep | ToolExecutionStep | ShieldCallStep | MemoryRetrievalStep,
|
|
240
164
|
Field(discriminator="step_type"),
|
|
241
165
|
]
|
|
242
166
|
|
|
243
167
|
|
|
244
168
|
@json_schema_type
|
|
245
169
|
class Turn(BaseModel):
|
|
246
|
-
"""A single turn in an interaction with an Agentic System.
|
|
170
|
+
"""A single turn in an interaction with an Agentic System.
|
|
171
|
+
|
|
172
|
+
:param turn_id: Unique identifier for the turn within a session
|
|
173
|
+
:param session_id: Unique identifier for the conversation session
|
|
174
|
+
:param input_messages: List of messages that initiated this turn
|
|
175
|
+
:param steps: Ordered list of processing steps executed during this turn
|
|
176
|
+
:param output_message: The model's generated response containing content and metadata
|
|
177
|
+
:param output_attachments: (Optional) Files or media attached to the agent's response
|
|
178
|
+
:param started_at: Timestamp when the turn began
|
|
179
|
+
:param completed_at: (Optional) Timestamp when the turn finished, if completed
|
|
180
|
+
"""
|
|
247
181
|
|
|
248
182
|
turn_id: str
|
|
249
183
|
session_id: str
|
|
250
|
-
input_messages:
|
|
251
|
-
|
|
252
|
-
UserMessage,
|
|
253
|
-
ToolResponseMessage,
|
|
254
|
-
]
|
|
255
|
-
]
|
|
256
|
-
steps: List[Step]
|
|
184
|
+
input_messages: list[UserMessage | ToolResponseMessage]
|
|
185
|
+
steps: list[Step]
|
|
257
186
|
output_message: CompletionMessage
|
|
258
|
-
output_attachments:
|
|
187
|
+
output_attachments: list[Attachment] | None = Field(default_factory=lambda: [])
|
|
259
188
|
|
|
260
189
|
started_at: datetime
|
|
261
|
-
completed_at:
|
|
190
|
+
completed_at: datetime | None = None
|
|
262
191
|
|
|
263
192
|
|
|
264
193
|
@json_schema_type
|
|
265
194
|
class Session(BaseModel):
|
|
266
|
-
"""A single session of an interaction with an Agentic System.
|
|
195
|
+
"""A single session of an interaction with an Agentic System.
|
|
196
|
+
|
|
197
|
+
:param session_id: Unique identifier for the conversation session
|
|
198
|
+
:param session_name: Human-readable name for the session
|
|
199
|
+
:param turns: List of all turns that have occurred in this session
|
|
200
|
+
:param started_at: Timestamp when the session was created
|
|
201
|
+
"""
|
|
267
202
|
|
|
268
203
|
session_id: str
|
|
269
204
|
session_name: str
|
|
270
|
-
turns:
|
|
205
|
+
turns: list[Turn]
|
|
271
206
|
started_at: datetime
|
|
272
207
|
|
|
273
|
-
memory_bank: Optional[MemoryBankDef] = None
|
|
274
208
|
|
|
209
|
+
class AgentToolGroupWithArgs(BaseModel):
|
|
210
|
+
name: str
|
|
211
|
+
args: dict[str, Any]
|
|
275
212
|
|
|
276
|
-
class AgentConfigCommon(BaseModel):
|
|
277
|
-
sampling_params: Optional[SamplingParams] = SamplingParams()
|
|
278
213
|
|
|
279
|
-
|
|
280
|
-
|
|
214
|
+
AgentToolGroup = str | AgentToolGroupWithArgs
|
|
215
|
+
register_schema(AgentToolGroup, name="AgentTool")
|
|
281
216
|
|
|
282
|
-
tools: Optional[List[AgentToolDefinition]] = Field(default_factory=list)
|
|
283
|
-
tool_choice: Optional[ToolChoice] = Field(default=ToolChoice.auto)
|
|
284
|
-
tool_prompt_format: Optional[ToolPromptFormat] = Field(
|
|
285
|
-
default=ToolPromptFormat.json
|
|
286
|
-
)
|
|
287
217
|
|
|
288
|
-
|
|
218
|
+
class AgentConfigCommon(BaseModel):
|
|
219
|
+
sampling_params: SamplingParams | None = Field(default_factory=SamplingParams)
|
|
220
|
+
|
|
221
|
+
input_shields: list[str] | None = Field(default_factory=lambda: [])
|
|
222
|
+
output_shields: list[str] | None = Field(default_factory=lambda: [])
|
|
223
|
+
toolgroups: list[AgentToolGroup] | None = Field(default_factory=lambda: [])
|
|
224
|
+
client_tools: list[ToolDef] | None = Field(default_factory=lambda: [])
|
|
225
|
+
tool_choice: ToolChoice | None = Field(default=None, deprecated="use tool_config instead")
|
|
226
|
+
tool_prompt_format: ToolPromptFormat | None = Field(default=None, deprecated="use tool_config instead")
|
|
227
|
+
tool_config: ToolConfig | None = Field(default=None)
|
|
228
|
+
|
|
229
|
+
max_infer_iters: int | None = 10
|
|
230
|
+
|
|
231
|
+
def model_post_init(self, __context):
|
|
232
|
+
if self.tool_config:
|
|
233
|
+
if self.tool_choice and self.tool_config.tool_choice != self.tool_choice:
|
|
234
|
+
raise ValueError("tool_choice is deprecated. Use tool_choice in tool_config instead.")
|
|
235
|
+
if self.tool_prompt_format and self.tool_config.tool_prompt_format != self.tool_prompt_format:
|
|
236
|
+
raise ValueError("tool_prompt_format is deprecated. Use tool_prompt_format in tool_config instead.")
|
|
237
|
+
else:
|
|
238
|
+
params = {}
|
|
239
|
+
if self.tool_choice:
|
|
240
|
+
params["tool_choice"] = self.tool_choice
|
|
241
|
+
if self.tool_prompt_format:
|
|
242
|
+
params["tool_prompt_format"] = self.tool_prompt_format
|
|
243
|
+
self.tool_config = ToolConfig(**params)
|
|
289
244
|
|
|
290
245
|
|
|
291
246
|
@json_schema_type
|
|
292
247
|
class AgentConfig(AgentConfigCommon):
|
|
248
|
+
"""Configuration for an agent.
|
|
249
|
+
|
|
250
|
+
:param model: The model identifier to use for the agent
|
|
251
|
+
:param instructions: The system instructions for the agent
|
|
252
|
+
:param name: Optional name for the agent, used in telemetry and identification
|
|
253
|
+
:param enable_session_persistence: Optional flag indicating whether session data has to be persisted
|
|
254
|
+
:param response_format: Optional response format configuration
|
|
255
|
+
"""
|
|
256
|
+
|
|
293
257
|
model: str
|
|
294
258
|
instructions: str
|
|
295
|
-
|
|
259
|
+
name: str | None = None
|
|
260
|
+
enable_session_persistence: bool | None = False
|
|
261
|
+
response_format: ResponseFormat | None = None
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
@json_schema_type
|
|
265
|
+
class Agent(BaseModel):
|
|
266
|
+
"""An agent instance with configuration and metadata.
|
|
267
|
+
|
|
268
|
+
:param agent_id: Unique identifier for the agent
|
|
269
|
+
:param agent_config: Configuration settings for the agent
|
|
270
|
+
:param created_at: Timestamp when the agent was created
|
|
271
|
+
"""
|
|
272
|
+
|
|
273
|
+
agent_id: str
|
|
274
|
+
agent_config: AgentConfig
|
|
275
|
+
created_at: datetime
|
|
296
276
|
|
|
297
277
|
|
|
298
278
|
class AgentConfigOverridablePerTurn(AgentConfigCommon):
|
|
299
|
-
instructions:
|
|
279
|
+
instructions: str | None = None
|
|
300
280
|
|
|
301
281
|
|
|
302
|
-
class AgentTurnResponseEventType(
|
|
282
|
+
class AgentTurnResponseEventType(StrEnum):
|
|
303
283
|
step_start = "step_start"
|
|
304
284
|
step_complete = "step_complete"
|
|
305
285
|
step_progress = "step_progress"
|
|
306
286
|
|
|
307
287
|
turn_start = "turn_start"
|
|
308
288
|
turn_complete = "turn_complete"
|
|
289
|
+
turn_awaiting_input = "turn_awaiting_input"
|
|
309
290
|
|
|
310
291
|
|
|
311
292
|
@json_schema_type
|
|
312
293
|
class AgentTurnResponseStepStartPayload(BaseModel):
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
294
|
+
"""Payload for step start events in agent turn responses.
|
|
295
|
+
|
|
296
|
+
:param event_type: Type of event being reported
|
|
297
|
+
:param step_type: Type of step being executed
|
|
298
|
+
:param step_id: Unique identifier for the step within a turn
|
|
299
|
+
:param metadata: (Optional) Additional metadata for the step
|
|
300
|
+
"""
|
|
301
|
+
|
|
302
|
+
event_type: Literal[AgentTurnResponseEventType.step_start] = AgentTurnResponseEventType.step_start
|
|
316
303
|
step_type: StepType
|
|
317
304
|
step_id: str
|
|
318
|
-
metadata:
|
|
305
|
+
metadata: dict[str, Any] | None = Field(default_factory=lambda: {})
|
|
319
306
|
|
|
320
307
|
|
|
321
308
|
@json_schema_type
|
|
322
309
|
class AgentTurnResponseStepCompletePayload(BaseModel):
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
310
|
+
"""Payload for step completion events in agent turn responses.
|
|
311
|
+
|
|
312
|
+
:param event_type: Type of event being reported
|
|
313
|
+
:param step_type: Type of step being executed
|
|
314
|
+
:param step_id: Unique identifier for the step within a turn
|
|
315
|
+
:param step_details: Complete details of the executed step
|
|
316
|
+
"""
|
|
317
|
+
|
|
318
|
+
event_type: Literal[AgentTurnResponseEventType.step_complete] = AgentTurnResponseEventType.step_complete
|
|
326
319
|
step_type: StepType
|
|
320
|
+
step_id: str
|
|
327
321
|
step_details: Step
|
|
328
322
|
|
|
329
323
|
|
|
330
324
|
@json_schema_type
|
|
331
325
|
class AgentTurnResponseStepProgressPayload(BaseModel):
|
|
326
|
+
"""Payload for step progress events in agent turn responses.
|
|
327
|
+
|
|
328
|
+
:param event_type: Type of event being reported
|
|
329
|
+
:param step_type: Type of step being executed
|
|
330
|
+
:param step_id: Unique identifier for the step within a turn
|
|
331
|
+
:param delta: Incremental content changes during step execution
|
|
332
|
+
"""
|
|
333
|
+
|
|
332
334
|
model_config = ConfigDict(protected_namespaces=())
|
|
333
335
|
|
|
334
|
-
event_type: Literal[AgentTurnResponseEventType.step_progress
|
|
335
|
-
AgentTurnResponseEventType.step_progress.value
|
|
336
|
-
)
|
|
336
|
+
event_type: Literal[AgentTurnResponseEventType.step_progress] = AgentTurnResponseEventType.step_progress
|
|
337
337
|
step_type: StepType
|
|
338
338
|
step_id: str
|
|
339
339
|
|
|
340
|
-
|
|
341
|
-
tool_call_delta: Optional[ToolCallDelta] = None
|
|
342
|
-
tool_response_text_delta: Optional[str] = None
|
|
340
|
+
delta: ContentDelta
|
|
343
341
|
|
|
344
342
|
|
|
345
343
|
@json_schema_type
|
|
346
344
|
class AgentTurnResponseTurnStartPayload(BaseModel):
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
345
|
+
"""Payload for turn start events in agent turn responses.
|
|
346
|
+
|
|
347
|
+
:param event_type: Type of event being reported
|
|
348
|
+
:param turn_id: Unique identifier for the turn within a session
|
|
349
|
+
"""
|
|
350
|
+
|
|
351
|
+
event_type: Literal[AgentTurnResponseEventType.turn_start] = AgentTurnResponseEventType.turn_start
|
|
350
352
|
turn_id: str
|
|
351
353
|
|
|
352
354
|
|
|
353
355
|
@json_schema_type
|
|
354
356
|
class AgentTurnResponseTurnCompletePayload(BaseModel):
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
357
|
+
"""Payload for turn completion events in agent turn responses.
|
|
358
|
+
|
|
359
|
+
:param event_type: Type of event being reported
|
|
360
|
+
:param turn: Complete turn data including all steps and results
|
|
361
|
+
"""
|
|
362
|
+
|
|
363
|
+
event_type: Literal[AgentTurnResponseEventType.turn_complete] = AgentTurnResponseEventType.turn_complete
|
|
358
364
|
turn: Turn
|
|
359
365
|
|
|
360
366
|
|
|
367
|
+
@json_schema_type
|
|
368
|
+
class AgentTurnResponseTurnAwaitingInputPayload(BaseModel):
|
|
369
|
+
"""Payload for turn awaiting input events in agent turn responses.
|
|
370
|
+
|
|
371
|
+
:param event_type: Type of event being reported
|
|
372
|
+
:param turn: Turn data when waiting for external tool responses
|
|
373
|
+
"""
|
|
374
|
+
|
|
375
|
+
event_type: Literal[AgentTurnResponseEventType.turn_awaiting_input] = AgentTurnResponseEventType.turn_awaiting_input
|
|
376
|
+
turn: Turn
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
AgentTurnResponseEventPayload = Annotated[
|
|
380
|
+
AgentTurnResponseStepStartPayload
|
|
381
|
+
| AgentTurnResponseStepProgressPayload
|
|
382
|
+
| AgentTurnResponseStepCompletePayload
|
|
383
|
+
| AgentTurnResponseTurnStartPayload
|
|
384
|
+
| AgentTurnResponseTurnCompletePayload
|
|
385
|
+
| AgentTurnResponseTurnAwaitingInputPayload,
|
|
386
|
+
Field(discriminator="event_type"),
|
|
387
|
+
]
|
|
388
|
+
register_schema(AgentTurnResponseEventPayload, name="AgentTurnResponseEventPayload")
|
|
389
|
+
|
|
390
|
+
|
|
361
391
|
@json_schema_type
|
|
362
392
|
class AgentTurnResponseEvent(BaseModel):
|
|
363
|
-
"""
|
|
393
|
+
"""An event in an agent turn response stream.
|
|
394
|
+
|
|
395
|
+
:param payload: Event-specific payload containing event data
|
|
396
|
+
"""
|
|
364
397
|
|
|
365
|
-
payload:
|
|
366
|
-
Union[
|
|
367
|
-
AgentTurnResponseStepStartPayload,
|
|
368
|
-
AgentTurnResponseStepProgressPayload,
|
|
369
|
-
AgentTurnResponseStepCompletePayload,
|
|
370
|
-
AgentTurnResponseTurnStartPayload,
|
|
371
|
-
AgentTurnResponseTurnCompletePayload,
|
|
372
|
-
],
|
|
373
|
-
Field(discriminator="event_type"),
|
|
374
|
-
]
|
|
398
|
+
payload: AgentTurnResponseEventPayload
|
|
375
399
|
|
|
376
400
|
|
|
377
401
|
@json_schema_type
|
|
378
402
|
class AgentCreateResponse(BaseModel):
|
|
403
|
+
"""Response returned when creating a new agent.
|
|
404
|
+
|
|
405
|
+
:param agent_id: Unique identifier for the created agent
|
|
406
|
+
"""
|
|
407
|
+
|
|
379
408
|
agent_id: str
|
|
380
409
|
|
|
381
410
|
|
|
382
411
|
@json_schema_type
|
|
383
412
|
class AgentSessionCreateResponse(BaseModel):
|
|
413
|
+
"""Response returned when creating a new agent session.
|
|
414
|
+
|
|
415
|
+
:param session_id: Unique identifier for the created session
|
|
416
|
+
"""
|
|
417
|
+
|
|
384
418
|
session_id: str
|
|
385
419
|
|
|
386
420
|
|
|
387
421
|
@json_schema_type
|
|
388
422
|
class AgentTurnCreateRequest(AgentConfigOverridablePerTurn):
|
|
423
|
+
"""Request to create a new turn for an agent.
|
|
424
|
+
|
|
425
|
+
:param agent_id: Unique identifier for the agent
|
|
426
|
+
:param session_id: Unique identifier for the conversation session
|
|
427
|
+
:param messages: List of messages to start the turn with
|
|
428
|
+
:param documents: (Optional) List of documents to provide to the agent
|
|
429
|
+
:param toolgroups: (Optional) List of tool groups to make available for this turn
|
|
430
|
+
:param stream: (Optional) Whether to stream the response
|
|
431
|
+
:param tool_config: (Optional) Tool configuration to override agent defaults
|
|
432
|
+
"""
|
|
433
|
+
|
|
389
434
|
agent_id: str
|
|
390
435
|
session_id: str
|
|
391
436
|
|
|
392
437
|
# TODO: figure out how we can simplify this and make why
|
|
393
438
|
# ToolResponseMessage needs to be here (it is function call
|
|
394
439
|
# execution from outside the system)
|
|
395
|
-
messages:
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
440
|
+
messages: list[UserMessage | ToolResponseMessage]
|
|
441
|
+
|
|
442
|
+
documents: list[Document] | None = None
|
|
443
|
+
toolgroups: list[AgentToolGroup] | None = Field(default_factory=lambda: [])
|
|
444
|
+
|
|
445
|
+
stream: bool | None = False
|
|
446
|
+
tool_config: ToolConfig | None = None
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
@json_schema_type
|
|
450
|
+
class AgentTurnResumeRequest(BaseModel):
|
|
451
|
+
"""Request to resume an agent turn with tool responses.
|
|
452
|
+
|
|
453
|
+
:param agent_id: Unique identifier for the agent
|
|
454
|
+
:param session_id: Unique identifier for the conversation session
|
|
455
|
+
:param turn_id: Unique identifier for the turn within a session
|
|
456
|
+
:param tool_responses: List of tool responses to submit to continue the turn
|
|
457
|
+
:param stream: (Optional) Whether to stream the response
|
|
458
|
+
"""
|
|
402
459
|
|
|
403
|
-
|
|
460
|
+
agent_id: str
|
|
461
|
+
session_id: str
|
|
462
|
+
turn_id: str
|
|
463
|
+
tool_responses: list[ToolResponse]
|
|
464
|
+
stream: bool | None = False
|
|
404
465
|
|
|
405
466
|
|
|
406
467
|
@json_schema_type
|
|
407
468
|
class AgentTurnResponseStreamChunk(BaseModel):
|
|
469
|
+
"""Streamed agent turn completion response.
|
|
470
|
+
|
|
471
|
+
:param event: Individual event in the agent turn response stream
|
|
472
|
+
"""
|
|
473
|
+
|
|
408
474
|
event: AgentTurnResponseEvent
|
|
409
475
|
|
|
410
476
|
|
|
411
477
|
@json_schema_type
|
|
412
478
|
class AgentStepResponse(BaseModel):
|
|
479
|
+
"""Response containing details of a specific agent step.
|
|
480
|
+
|
|
481
|
+
:param step: The complete step data and execution details
|
|
482
|
+
"""
|
|
483
|
+
|
|
413
484
|
step: Step
|
|
414
485
|
|
|
415
486
|
|
|
416
487
|
@runtime_checkable
|
|
417
488
|
class Agents(Protocol):
|
|
418
|
-
|
|
489
|
+
"""Agents
|
|
490
|
+
|
|
491
|
+
APIs for creating and interacting with agentic systems."""
|
|
492
|
+
|
|
493
|
+
@webmethod(
|
|
494
|
+
route="/agents",
|
|
495
|
+
method="POST",
|
|
496
|
+
descriptive_name="create_agent",
|
|
497
|
+
deprecated=True,
|
|
498
|
+
level=LLAMA_STACK_API_V1,
|
|
499
|
+
)
|
|
500
|
+
@webmethod(
|
|
501
|
+
route="/agents",
|
|
502
|
+
method="POST",
|
|
503
|
+
descriptive_name="create_agent",
|
|
504
|
+
level=LLAMA_STACK_API_V1ALPHA,
|
|
505
|
+
)
|
|
419
506
|
async def create_agent(
|
|
420
507
|
self,
|
|
421
508
|
agent_config: AgentConfig,
|
|
422
|
-
) -> AgentCreateResponse:
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
509
|
+
) -> AgentCreateResponse:
|
|
510
|
+
"""Create an agent with the given configuration.
|
|
511
|
+
|
|
512
|
+
:param agent_config: The configuration for the agent.
|
|
513
|
+
:returns: An AgentCreateResponse with the agent ID.
|
|
514
|
+
"""
|
|
515
|
+
...
|
|
516
|
+
|
|
517
|
+
@webmethod(
|
|
518
|
+
route="/agents/{agent_id}/session/{session_id}/turn",
|
|
519
|
+
method="POST",
|
|
520
|
+
descriptive_name="create_agent_turn",
|
|
521
|
+
deprecated=True,
|
|
522
|
+
level=LLAMA_STACK_API_V1,
|
|
523
|
+
)
|
|
524
|
+
@webmethod(
|
|
525
|
+
route="/agents/{agent_id}/session/{session_id}/turn",
|
|
526
|
+
method="POST",
|
|
527
|
+
descriptive_name="create_agent_turn",
|
|
528
|
+
level=LLAMA_STACK_API_V1ALPHA,
|
|
529
|
+
)
|
|
530
|
+
async def create_agent_turn(
|
|
531
|
+
self,
|
|
532
|
+
agent_id: str,
|
|
533
|
+
session_id: str,
|
|
534
|
+
messages: list[UserMessage | ToolResponseMessage],
|
|
535
|
+
stream: bool | None = False,
|
|
536
|
+
documents: list[Document] | None = None,
|
|
537
|
+
toolgroups: list[AgentToolGroup] | None = None,
|
|
538
|
+
tool_config: ToolConfig | None = None,
|
|
539
|
+
) -> Turn | AsyncIterator[AgentTurnResponseStreamChunk]:
|
|
540
|
+
"""Create a new turn for an agent.
|
|
541
|
+
|
|
542
|
+
:param agent_id: The ID of the agent to create the turn for.
|
|
543
|
+
:param session_id: The ID of the session to create the turn for.
|
|
544
|
+
:param messages: List of messages to start the turn with.
|
|
545
|
+
:param stream: (Optional) If True, generate an SSE event stream of the response. Defaults to False.
|
|
546
|
+
:param documents: (Optional) List of documents to create the turn with.
|
|
547
|
+
:param toolgroups: (Optional) List of toolgroups to create the turn with, will be used in addition to the agent's config toolgroups for the request.
|
|
548
|
+
:param tool_config: (Optional) The tool configuration to create the turn with, will be used to override the agent's tool_config.
|
|
549
|
+
:returns: If stream=False, returns a Turn object.
|
|
550
|
+
If stream=True, returns an SSE event stream of AgentTurnResponseStreamChunk.
|
|
551
|
+
"""
|
|
552
|
+
...
|
|
553
|
+
|
|
554
|
+
@webmethod(
|
|
555
|
+
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
|
|
556
|
+
method="POST",
|
|
557
|
+
descriptive_name="resume_agent_turn",
|
|
558
|
+
deprecated=True,
|
|
559
|
+
level=LLAMA_STACK_API_V1,
|
|
560
|
+
)
|
|
561
|
+
@webmethod(
|
|
562
|
+
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
|
|
563
|
+
method="POST",
|
|
564
|
+
descriptive_name="resume_agent_turn",
|
|
565
|
+
level=LLAMA_STACK_API_V1ALPHA,
|
|
566
|
+
)
|
|
567
|
+
async def resume_agent_turn(
|
|
428
568
|
self,
|
|
429
569
|
agent_id: str,
|
|
430
570
|
session_id: str,
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
571
|
+
turn_id: str,
|
|
572
|
+
tool_responses: list[ToolResponse],
|
|
573
|
+
stream: bool | None = False,
|
|
574
|
+
) -> Turn | AsyncIterator[AgentTurnResponseStreamChunk]:
|
|
575
|
+
"""Resume an agent turn with executed tool call responses.
|
|
576
|
+
|
|
577
|
+
When a Turn has the status `awaiting_input` due to pending input from client side tool calls, this endpoint can be used to submit the outputs from the tool calls once they are ready.
|
|
578
|
+
|
|
579
|
+
:param agent_id: The ID of the agent to resume.
|
|
580
|
+
:param session_id: The ID of the session to resume.
|
|
581
|
+
:param turn_id: The ID of the turn to resume.
|
|
582
|
+
:param tool_responses: The tool call responses to resume the turn with.
|
|
583
|
+
:param stream: Whether to stream the response.
|
|
584
|
+
:returns: A Turn object if stream is False, otherwise an AsyncIterator of AgentTurnResponseStreamChunk objects.
|
|
585
|
+
"""
|
|
586
|
+
...
|
|
587
|
+
|
|
588
|
+
@webmethod(
|
|
589
|
+
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
|
|
590
|
+
method="GET",
|
|
591
|
+
deprecated=True,
|
|
592
|
+
level=LLAMA_STACK_API_V1,
|
|
593
|
+
)
|
|
594
|
+
@webmethod(
|
|
595
|
+
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
|
|
596
|
+
method="GET",
|
|
597
|
+
level=LLAMA_STACK_API_V1ALPHA,
|
|
598
|
+
)
|
|
442
599
|
async def get_agents_turn(
|
|
443
600
|
self,
|
|
444
601
|
agent_id: str,
|
|
602
|
+
session_id: str,
|
|
445
603
|
turn_id: str,
|
|
446
|
-
) -> Turn:
|
|
447
|
-
|
|
448
|
-
|
|
604
|
+
) -> Turn:
|
|
605
|
+
"""Retrieve an agent turn by its ID.
|
|
606
|
+
|
|
607
|
+
:param agent_id: The ID of the agent to get the turn for.
|
|
608
|
+
:param session_id: The ID of the session to get the turn for.
|
|
609
|
+
:param turn_id: The ID of the turn to get.
|
|
610
|
+
:returns: A Turn.
|
|
611
|
+
"""
|
|
612
|
+
...
|
|
613
|
+
|
|
614
|
+
@webmethod(
|
|
615
|
+
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
|
|
616
|
+
method="GET",
|
|
617
|
+
deprecated=True,
|
|
618
|
+
level=LLAMA_STACK_API_V1,
|
|
619
|
+
)
|
|
620
|
+
@webmethod(
|
|
621
|
+
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
|
|
622
|
+
method="GET",
|
|
623
|
+
level=LLAMA_STACK_API_V1ALPHA,
|
|
624
|
+
)
|
|
449
625
|
async def get_agents_step(
|
|
450
|
-
self,
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
626
|
+
self,
|
|
627
|
+
agent_id: str,
|
|
628
|
+
session_id: str,
|
|
629
|
+
turn_id: str,
|
|
630
|
+
step_id: str,
|
|
631
|
+
) -> AgentStepResponse:
|
|
632
|
+
"""Retrieve an agent step by its ID.
|
|
633
|
+
|
|
634
|
+
:param agent_id: The ID of the agent to get the step for.
|
|
635
|
+
:param session_id: The ID of the session to get the step for.
|
|
636
|
+
:param turn_id: The ID of the turn to get the step for.
|
|
637
|
+
:param step_id: The ID of the step to get.
|
|
638
|
+
:returns: An AgentStepResponse.
|
|
639
|
+
"""
|
|
640
|
+
...
|
|
641
|
+
|
|
642
|
+
@webmethod(
|
|
643
|
+
route="/agents/{agent_id}/session",
|
|
644
|
+
method="POST",
|
|
645
|
+
descriptive_name="create_agent_session",
|
|
646
|
+
deprecated=True,
|
|
647
|
+
level=LLAMA_STACK_API_V1,
|
|
648
|
+
)
|
|
649
|
+
@webmethod(
|
|
650
|
+
route="/agents/{agent_id}/session",
|
|
651
|
+
method="POST",
|
|
652
|
+
descriptive_name="create_agent_session",
|
|
653
|
+
level=LLAMA_STACK_API_V1ALPHA,
|
|
654
|
+
)
|
|
454
655
|
async def create_agent_session(
|
|
455
656
|
self,
|
|
456
657
|
agent_id: str,
|
|
457
658
|
session_name: str,
|
|
458
|
-
) -> AgentSessionCreateResponse:
|
|
459
|
-
|
|
460
|
-
|
|
659
|
+
) -> AgentSessionCreateResponse:
|
|
660
|
+
"""Create a new session for an agent.
|
|
661
|
+
|
|
662
|
+
:param agent_id: The ID of the agent to create the session for.
|
|
663
|
+
:param session_name: The name of the session to create.
|
|
664
|
+
:returns: An AgentSessionCreateResponse.
|
|
665
|
+
"""
|
|
666
|
+
...
|
|
667
|
+
|
|
668
|
+
@webmethod(
|
|
669
|
+
route="/agents/{agent_id}/session/{session_id}",
|
|
670
|
+
method="GET",
|
|
671
|
+
deprecated=True,
|
|
672
|
+
level=LLAMA_STACK_API_V1,
|
|
673
|
+
)
|
|
674
|
+
@webmethod(
|
|
675
|
+
route="/agents/{agent_id}/session/{session_id}",
|
|
676
|
+
method="GET",
|
|
677
|
+
level=LLAMA_STACK_API_V1ALPHA,
|
|
678
|
+
)
|
|
461
679
|
async def get_agents_session(
|
|
462
680
|
self,
|
|
681
|
+
session_id: str,
|
|
463
682
|
agent_id: str,
|
|
683
|
+
turn_ids: list[str] | None = None,
|
|
684
|
+
) -> Session:
|
|
685
|
+
"""Retrieve an agent session by its ID.
|
|
686
|
+
|
|
687
|
+
:param session_id: The ID of the session to get.
|
|
688
|
+
:param agent_id: The ID of the agent to get the session for.
|
|
689
|
+
:param turn_ids: (Optional) List of turn IDs to filter the session by.
|
|
690
|
+
:returns: A Session.
|
|
691
|
+
"""
|
|
692
|
+
...
|
|
693
|
+
|
|
694
|
+
@webmethod(
|
|
695
|
+
route="/agents/{agent_id}/session/{session_id}",
|
|
696
|
+
method="DELETE",
|
|
697
|
+
deprecated=True,
|
|
698
|
+
level=LLAMA_STACK_API_V1,
|
|
699
|
+
)
|
|
700
|
+
@webmethod(
|
|
701
|
+
route="/agents/{agent_id}/session/{session_id}",
|
|
702
|
+
method="DELETE",
|
|
703
|
+
level=LLAMA_STACK_API_V1ALPHA,
|
|
704
|
+
)
|
|
705
|
+
async def delete_agents_session(
|
|
706
|
+
self,
|
|
464
707
|
session_id: str,
|
|
465
|
-
|
|
466
|
-
) ->
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
708
|
+
agent_id: str,
|
|
709
|
+
) -> None:
|
|
710
|
+
"""Delete an agent session by its ID and its associated turns.
|
|
711
|
+
|
|
712
|
+
:param session_id: The ID of the session to delete.
|
|
713
|
+
:param agent_id: The ID of the agent to delete the session for.
|
|
714
|
+
"""
|
|
715
|
+
...
|
|
716
|
+
|
|
717
|
+
@webmethod(
|
|
718
|
+
route="/agents/{agent_id}",
|
|
719
|
+
method="DELETE",
|
|
720
|
+
deprecated=True,
|
|
721
|
+
level=LLAMA_STACK_API_V1,
|
|
722
|
+
)
|
|
723
|
+
@webmethod(route="/agents/{agent_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
|
|
724
|
+
async def delete_agent(
|
|
473
725
|
self,
|
|
474
726
|
agent_id: str,
|
|
475
|
-
) -> None:
|
|
727
|
+
) -> None:
|
|
728
|
+
"""Delete an agent by its ID and its associated sessions and turns.
|
|
729
|
+
|
|
730
|
+
:param agent_id: The ID of the agent to delete.
|
|
731
|
+
"""
|
|
732
|
+
...
|
|
733
|
+
|
|
734
|
+
@webmethod(route="/agents", method="GET", deprecated=True, level=LLAMA_STACK_API_V1)
|
|
735
|
+
@webmethod(route="/agents", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
|
736
|
+
async def list_agents(self, start_index: int | None = None, limit: int | None = None) -> PaginatedResponse:
|
|
737
|
+
"""List all agents.
|
|
738
|
+
|
|
739
|
+
:param start_index: The index to start the pagination from.
|
|
740
|
+
:param limit: The number of agents to return.
|
|
741
|
+
:returns: A PaginatedResponse.
|
|
742
|
+
"""
|
|
743
|
+
...
|
|
744
|
+
|
|
745
|
+
@webmethod(
|
|
746
|
+
route="/agents/{agent_id}",
|
|
747
|
+
method="GET",
|
|
748
|
+
deprecated=True,
|
|
749
|
+
level=LLAMA_STACK_API_V1,
|
|
750
|
+
)
|
|
751
|
+
@webmethod(route="/agents/{agent_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
|
752
|
+
async def get_agent(self, agent_id: str) -> Agent:
|
|
753
|
+
"""Describe an agent by its ID.
|
|
754
|
+
|
|
755
|
+
:param agent_id: ID of the agent.
|
|
756
|
+
:returns: An Agent of the agent.
|
|
757
|
+
"""
|
|
758
|
+
...
|
|
759
|
+
|
|
760
|
+
@webmethod(
|
|
761
|
+
route="/agents/{agent_id}/sessions",
|
|
762
|
+
method="GET",
|
|
763
|
+
deprecated=True,
|
|
764
|
+
level=LLAMA_STACK_API_V1,
|
|
765
|
+
)
|
|
766
|
+
@webmethod(route="/agents/{agent_id}/sessions", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
|
767
|
+
async def list_agent_sessions(
|
|
768
|
+
self,
|
|
769
|
+
agent_id: str,
|
|
770
|
+
start_index: int | None = None,
|
|
771
|
+
limit: int | None = None,
|
|
772
|
+
) -> PaginatedResponse:
|
|
773
|
+
"""List all session(s) of a given agent.
|
|
774
|
+
|
|
775
|
+
:param agent_id: The ID of the agent to list sessions for.
|
|
776
|
+
:param start_index: The index to start the pagination from.
|
|
777
|
+
:param limit: The number of sessions to return.
|
|
778
|
+
:returns: A PaginatedResponse.
|
|
779
|
+
"""
|
|
780
|
+
...
|
|
781
|
+
|
|
782
|
+
# We situate the OpenAI Responses API in the Agents API just like we did things
|
|
783
|
+
# for Inference. The Responses API, in its intent, serves the same purpose as
|
|
784
|
+
# the Agents API above -- it is essentially a lightweight "agentic loop" with
|
|
785
|
+
# integrated tool calling.
|
|
786
|
+
#
|
|
787
|
+
# Both of these APIs are inherently stateful.
|
|
788
|
+
|
|
789
|
+
@webmethod(
|
|
790
|
+
route="/openai/v1/responses/{response_id}",
|
|
791
|
+
method="GET",
|
|
792
|
+
level=LLAMA_STACK_API_V1,
|
|
793
|
+
deprecated=True,
|
|
794
|
+
)
|
|
795
|
+
@webmethod(route="/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
|
|
796
|
+
async def get_openai_response(
|
|
797
|
+
self,
|
|
798
|
+
response_id: str,
|
|
799
|
+
) -> OpenAIResponseObject:
|
|
800
|
+
"""Get a model response.
|
|
801
|
+
|
|
802
|
+
:param response_id: The ID of the OpenAI response to retrieve.
|
|
803
|
+
:returns: An OpenAIResponseObject.
|
|
804
|
+
"""
|
|
805
|
+
...
|
|
806
|
+
|
|
807
|
+
@webmethod(route="/openai/v1/responses", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
|
808
|
+
@webmethod(route="/responses", method="POST", level=LLAMA_STACK_API_V1)
|
|
809
|
+
async def create_openai_response(
|
|
810
|
+
self,
|
|
811
|
+
input: str | list[OpenAIResponseInput],
|
|
812
|
+
model: str,
|
|
813
|
+
instructions: str | None = None,
|
|
814
|
+
previous_response_id: str | None = None,
|
|
815
|
+
conversation: str | None = None,
|
|
816
|
+
store: bool | None = True,
|
|
817
|
+
stream: bool | None = False,
|
|
818
|
+
temperature: float | None = None,
|
|
819
|
+
text: OpenAIResponseText | None = None,
|
|
820
|
+
tools: list[OpenAIResponseInputTool] | None = None,
|
|
821
|
+
include: list[str] | None = None,
|
|
822
|
+
max_infer_iters: int | None = 10, # this is an extension to the OpenAI API
|
|
823
|
+
guardrails: Annotated[
|
|
824
|
+
list[ResponseGuardrail] | None,
|
|
825
|
+
ExtraBodyField(
|
|
826
|
+
"List of guardrails to apply during response generation. Guardrails provide safety and content moderation."
|
|
827
|
+
),
|
|
828
|
+
] = None,
|
|
829
|
+
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
|
|
830
|
+
"""Create a model response.
|
|
831
|
+
|
|
832
|
+
:param input: Input message(s) to create the response.
|
|
833
|
+
:param model: The underlying LLM used for completions.
|
|
834
|
+
:param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
|
|
835
|
+
:param conversation: (Optional) The ID of a conversation to add the response to. Must begin with 'conv_'. Input and output messages will be automatically added to the conversation.
|
|
836
|
+
:param include: (Optional) Additional fields to include in the response.
|
|
837
|
+
:param guardrails: (Optional) List of guardrails to apply during response generation. Can be guardrail IDs (strings) or guardrail specifications.
|
|
838
|
+
:returns: An OpenAIResponseObject.
|
|
839
|
+
"""
|
|
840
|
+
...
|
|
841
|
+
|
|
842
|
+
@webmethod(route="/openai/v1/responses", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
|
843
|
+
@webmethod(route="/responses", method="GET", level=LLAMA_STACK_API_V1)
|
|
844
|
+
async def list_openai_responses(
|
|
845
|
+
self,
|
|
846
|
+
after: str | None = None,
|
|
847
|
+
limit: int | None = 50,
|
|
848
|
+
model: str | None = None,
|
|
849
|
+
order: Order | None = Order.desc,
|
|
850
|
+
) -> ListOpenAIResponseObject:
|
|
851
|
+
"""List all responses.
|
|
852
|
+
|
|
853
|
+
:param after: The ID of the last response to return.
|
|
854
|
+
:param limit: The number of responses to return.
|
|
855
|
+
:param model: The model to filter responses by.
|
|
856
|
+
:param order: The order to sort responses by when sorted by created_at ('asc' or 'desc').
|
|
857
|
+
:returns: A ListOpenAIResponseObject.
|
|
858
|
+
"""
|
|
859
|
+
...
|
|
860
|
+
|
|
861
|
+
@webmethod(
|
|
862
|
+
route="/openai/v1/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
|
|
863
|
+
)
|
|
864
|
+
@webmethod(route="/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
|
|
865
|
+
async def list_openai_response_input_items(
|
|
866
|
+
self,
|
|
867
|
+
response_id: str,
|
|
868
|
+
after: str | None = None,
|
|
869
|
+
before: str | None = None,
|
|
870
|
+
include: list[str] | None = None,
|
|
871
|
+
limit: int | None = 20,
|
|
872
|
+
order: Order | None = Order.desc,
|
|
873
|
+
) -> ListOpenAIResponseInputItem:
|
|
874
|
+
"""List input items.
|
|
875
|
+
|
|
876
|
+
:param response_id: The ID of the response to retrieve input items for.
|
|
877
|
+
:param after: An item ID to list items after, used for pagination.
|
|
878
|
+
:param before: An item ID to list items before, used for pagination.
|
|
879
|
+
:param include: Additional fields to include in the response.
|
|
880
|
+
:param limit: A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.
|
|
881
|
+
:param order: The order to return the input items in. Default is desc.
|
|
882
|
+
:returns: An ListOpenAIResponseInputItem.
|
|
883
|
+
"""
|
|
884
|
+
...
|
|
885
|
+
|
|
886
|
+
@webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
|
|
887
|
+
@webmethod(route="/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
|
888
|
+
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
|
|
889
|
+
"""Delete a response.
|
|
890
|
+
|
|
891
|
+
:param response_id: The ID of the OpenAI response to delete.
|
|
892
|
+
:returns: An OpenAIDeleteResponseObject
|
|
893
|
+
"""
|
|
894
|
+
...
|