llama-stack 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +201 -58
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
- llama_stack/providers/registry/agents.py +7 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +12 -21
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -131
- llama_stack-0.4.0.dist-info/RECORD +588 -0
- llama_stack-0.4.0.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.4.dist-info/RECORD +0 -625
- llama_stack-0.3.4.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
|
@@ -7,8 +7,18 @@
|
|
|
7
7
|
import asyncio
|
|
8
8
|
import json
|
|
9
9
|
from collections.abc import AsyncIterator
|
|
10
|
+
from typing import Any
|
|
10
11
|
|
|
11
|
-
from
|
|
12
|
+
from opentelemetry import trace
|
|
13
|
+
|
|
14
|
+
from llama_stack.core.datatypes import VectorStoresConfig
|
|
15
|
+
from llama_stack.log import get_logger
|
|
16
|
+
from llama_stack_api import (
|
|
17
|
+
ImageContentItem,
|
|
18
|
+
OpenAIChatCompletionContentPartImageParam,
|
|
19
|
+
OpenAIChatCompletionContentPartTextParam,
|
|
20
|
+
OpenAIChatCompletionToolCall,
|
|
21
|
+
OpenAIImageURL,
|
|
12
22
|
OpenAIResponseInputToolFileSearch,
|
|
13
23
|
OpenAIResponseInputToolMCP,
|
|
14
24
|
OpenAIResponseObjectStreamResponseFileSearchCallCompleted,
|
|
@@ -23,26 +33,18 @@ from llama_stack.apis.agents.openai_responses import (
|
|
|
23
33
|
OpenAIResponseOutputMessageFileSearchToolCall,
|
|
24
34
|
OpenAIResponseOutputMessageFileSearchToolCallResults,
|
|
25
35
|
OpenAIResponseOutputMessageWebSearchToolCall,
|
|
26
|
-
)
|
|
27
|
-
from llama_stack.apis.common.content_types import (
|
|
28
|
-
ImageContentItem,
|
|
29
|
-
TextContentItem,
|
|
30
|
-
)
|
|
31
|
-
from llama_stack.apis.inference import (
|
|
32
|
-
OpenAIChatCompletionContentPartImageParam,
|
|
33
|
-
OpenAIChatCompletionContentPartTextParam,
|
|
34
|
-
OpenAIChatCompletionToolCall,
|
|
35
|
-
OpenAIImageURL,
|
|
36
36
|
OpenAIToolMessageParam,
|
|
37
|
+
TextContentItem,
|
|
38
|
+
ToolGroups,
|
|
39
|
+
ToolInvocationResult,
|
|
40
|
+
ToolRuntime,
|
|
41
|
+
VectorIO,
|
|
37
42
|
)
|
|
38
|
-
from llama_stack.apis.tools import ToolGroups, ToolInvocationResult, ToolRuntime
|
|
39
|
-
from llama_stack.apis.vector_io import VectorIO
|
|
40
|
-
from llama_stack.log import get_logger
|
|
41
|
-
from llama_stack.providers.utils.telemetry import tracing
|
|
42
43
|
|
|
43
44
|
from .types import ChatCompletionContext, ToolExecutionResult
|
|
44
45
|
|
|
45
46
|
logger = get_logger(name=__name__, category="agents::meta_reference")
|
|
47
|
+
tracer = trace.get_tracer(__name__)
|
|
46
48
|
|
|
47
49
|
|
|
48
50
|
class ToolExecutor:
|
|
@@ -51,10 +53,12 @@ class ToolExecutor:
|
|
|
51
53
|
tool_groups_api: ToolGroups,
|
|
52
54
|
tool_runtime_api: ToolRuntime,
|
|
53
55
|
vector_io_api: VectorIO,
|
|
56
|
+
vector_stores_config=None,
|
|
54
57
|
):
|
|
55
58
|
self.tool_groups_api = tool_groups_api
|
|
56
59
|
self.tool_runtime_api = tool_runtime_api
|
|
57
60
|
self.vector_io_api = vector_io_api
|
|
61
|
+
self.vector_stores_config = vector_stores_config
|
|
58
62
|
|
|
59
63
|
async def execute_tool_call(
|
|
60
64
|
self,
|
|
@@ -67,7 +71,7 @@ class ToolExecutor:
|
|
|
67
71
|
) -> AsyncIterator[ToolExecutionResult]:
|
|
68
72
|
tool_call_id = tool_call.id
|
|
69
73
|
function = tool_call.function
|
|
70
|
-
tool_kwargs = json.loads(function.arguments) if function.arguments else {}
|
|
74
|
+
tool_kwargs = json.loads(function.arguments) if function and function.arguments else {}
|
|
71
75
|
|
|
72
76
|
if not function or not tool_call_id or not function.name:
|
|
73
77
|
yield ToolExecutionResult(sequence_number=sequence_number)
|
|
@@ -84,7 +88,16 @@ class ToolExecutor:
|
|
|
84
88
|
error_exc, result = await self._execute_tool(function.name, tool_kwargs, ctx, mcp_tool_to_server)
|
|
85
89
|
|
|
86
90
|
# Emit completion events for tool execution
|
|
87
|
-
has_error =
|
|
91
|
+
has_error = bool(
|
|
92
|
+
error_exc
|
|
93
|
+
or (
|
|
94
|
+
result
|
|
95
|
+
and (
|
|
96
|
+
((error_code := getattr(result, "error_code", None)) and error_code > 0)
|
|
97
|
+
or getattr(result, "error_message", None)
|
|
98
|
+
)
|
|
99
|
+
)
|
|
100
|
+
)
|
|
88
101
|
async for event_result in self._emit_completion_events(
|
|
89
102
|
function.name, ctx, sequence_number, output_index, item_id, has_error, mcp_tool_to_server
|
|
90
103
|
):
|
|
@@ -101,7 +114,9 @@ class ToolExecutor:
|
|
|
101
114
|
sequence_number=sequence_number,
|
|
102
115
|
final_output_message=output_message,
|
|
103
116
|
final_input_message=input_message,
|
|
104
|
-
citation_files=
|
|
117
|
+
citation_files=(
|
|
118
|
+
metadata.get("citation_files") if result and (metadata := getattr(result, "metadata", None)) else None
|
|
119
|
+
),
|
|
105
120
|
)
|
|
106
121
|
|
|
107
122
|
async def _execute_knowledge_search_via_vector_store(
|
|
@@ -136,13 +151,35 @@ class ToolExecutor:
|
|
|
136
151
|
for results in all_results:
|
|
137
152
|
search_results.extend(results)
|
|
138
153
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
154
|
+
# Get templates from vector stores config, fallback to constants
|
|
155
|
+
|
|
156
|
+
# Check if annotations are enabled
|
|
157
|
+
enable_annotations = (
|
|
158
|
+
self.vector_stores_config
|
|
159
|
+
and self.vector_stores_config.annotation_prompt_params
|
|
160
|
+
and self.vector_stores_config.annotation_prompt_params.enable_annotations
|
|
144
161
|
)
|
|
145
162
|
|
|
163
|
+
# Get templates
|
|
164
|
+
header_template = self.vector_stores_config.file_search_params.header_template
|
|
165
|
+
footer_template = self.vector_stores_config.file_search_params.footer_template
|
|
166
|
+
context_template = self.vector_stores_config.context_prompt_params.context_template
|
|
167
|
+
|
|
168
|
+
# Get annotation templates (use defaults if annotations disabled)
|
|
169
|
+
if enable_annotations:
|
|
170
|
+
chunk_annotation_template = self.vector_stores_config.annotation_prompt_params.chunk_annotation_template
|
|
171
|
+
annotation_instruction_template = (
|
|
172
|
+
self.vector_stores_config.annotation_prompt_params.annotation_instruction_template
|
|
173
|
+
)
|
|
174
|
+
else:
|
|
175
|
+
# Use defaults from VectorStoresConfig when annotations disabled
|
|
176
|
+
default_config = VectorStoresConfig()
|
|
177
|
+
chunk_annotation_template = default_config.annotation_prompt_params.chunk_annotation_template
|
|
178
|
+
annotation_instruction_template = default_config.annotation_prompt_params.annotation_instruction_template
|
|
179
|
+
|
|
180
|
+
content_items = []
|
|
181
|
+
content_items.append(TextContentItem(text=header_template.format(num_chunks=len(search_results))))
|
|
182
|
+
|
|
146
183
|
unique_files = set()
|
|
147
184
|
for i, result_item in enumerate(search_results):
|
|
148
185
|
chunk_text = result_item.content[0].text if result_item.content else ""
|
|
@@ -154,22 +191,23 @@ class ToolExecutor:
|
|
|
154
191
|
if result_item.attributes:
|
|
155
192
|
metadata_text += f", attributes: {result_item.attributes}"
|
|
156
193
|
|
|
157
|
-
text_content =
|
|
194
|
+
text_content = chunk_annotation_template.format(
|
|
195
|
+
index=i + 1, metadata_text=metadata_text, file_id=file_id, chunk_text=chunk_text
|
|
196
|
+
)
|
|
158
197
|
content_items.append(TextContentItem(text=text_content))
|
|
159
198
|
unique_files.add(file_id)
|
|
160
199
|
|
|
161
|
-
content_items.append(TextContentItem(text=
|
|
200
|
+
content_items.append(TextContentItem(text=footer_template))
|
|
162
201
|
|
|
163
|
-
|
|
202
|
+
annotation_instruction = ""
|
|
164
203
|
if unique_files:
|
|
165
|
-
|
|
166
|
-
" Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). "
|
|
167
|
-
"Do not add extra punctuation. Use only the file IDs provided (do not invent new ones)."
|
|
168
|
-
)
|
|
204
|
+
annotation_instruction = annotation_instruction_template
|
|
169
205
|
|
|
170
206
|
content_items.append(
|
|
171
207
|
TextContentItem(
|
|
172
|
-
text=
|
|
208
|
+
text=context_template.format(
|
|
209
|
+
query=query, num_chunks=len(search_results), annotation_instruction=annotation_instruction
|
|
210
|
+
)
|
|
173
211
|
)
|
|
174
212
|
)
|
|
175
213
|
|
|
@@ -188,8 +226,9 @@ class ToolExecutor:
|
|
|
188
226
|
|
|
189
227
|
citation_files[file_id] = filename
|
|
190
228
|
|
|
229
|
+
# Cast to proper InterleavedContent type (list invariance)
|
|
191
230
|
return ToolInvocationResult(
|
|
192
|
-
content=content_items,
|
|
231
|
+
content=content_items, # type: ignore[arg-type]
|
|
193
232
|
metadata={
|
|
194
233
|
"document_ids": [r.file_id for r in search_results],
|
|
195
234
|
"chunks": [r.content[0].text if r.content else "" for r in search_results],
|
|
@@ -209,51 +248,60 @@ class ToolExecutor:
|
|
|
209
248
|
) -> AsyncIterator[ToolExecutionResult]:
|
|
210
249
|
"""Emit progress events for tool execution start."""
|
|
211
250
|
# Emit in_progress event based on tool type (only for tools with specific streaming events)
|
|
212
|
-
progress_event = None
|
|
213
251
|
if mcp_tool_to_server and function_name in mcp_tool_to_server:
|
|
214
252
|
sequence_number += 1
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
253
|
+
yield ToolExecutionResult(
|
|
254
|
+
stream_event=OpenAIResponseObjectStreamResponseMcpCallInProgress(
|
|
255
|
+
item_id=item_id,
|
|
256
|
+
output_index=output_index,
|
|
257
|
+
sequence_number=sequence_number,
|
|
258
|
+
),
|
|
218
259
|
sequence_number=sequence_number,
|
|
219
260
|
)
|
|
220
261
|
elif function_name == "web_search":
|
|
221
262
|
sequence_number += 1
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
263
|
+
yield ToolExecutionResult(
|
|
264
|
+
stream_event=OpenAIResponseObjectStreamResponseWebSearchCallInProgress(
|
|
265
|
+
item_id=item_id,
|
|
266
|
+
output_index=output_index,
|
|
267
|
+
sequence_number=sequence_number,
|
|
268
|
+
),
|
|
225
269
|
sequence_number=sequence_number,
|
|
226
270
|
)
|
|
227
271
|
elif function_name == "knowledge_search":
|
|
228
272
|
sequence_number += 1
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
273
|
+
yield ToolExecutionResult(
|
|
274
|
+
stream_event=OpenAIResponseObjectStreamResponseFileSearchCallInProgress(
|
|
275
|
+
item_id=item_id,
|
|
276
|
+
output_index=output_index,
|
|
277
|
+
sequence_number=sequence_number,
|
|
278
|
+
),
|
|
232
279
|
sequence_number=sequence_number,
|
|
233
280
|
)
|
|
234
281
|
|
|
235
|
-
if progress_event:
|
|
236
|
-
yield ToolExecutionResult(stream_event=progress_event, sequence_number=sequence_number)
|
|
237
|
-
|
|
238
282
|
# For web search, emit searching event
|
|
239
283
|
if function_name == "web_search":
|
|
240
284
|
sequence_number += 1
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
285
|
+
yield ToolExecutionResult(
|
|
286
|
+
stream_event=OpenAIResponseObjectStreamResponseWebSearchCallSearching(
|
|
287
|
+
item_id=item_id,
|
|
288
|
+
output_index=output_index,
|
|
289
|
+
sequence_number=sequence_number,
|
|
290
|
+
),
|
|
244
291
|
sequence_number=sequence_number,
|
|
245
292
|
)
|
|
246
|
-
yield ToolExecutionResult(stream_event=searching_event, sequence_number=sequence_number)
|
|
247
293
|
|
|
248
294
|
# For file search, emit searching event
|
|
249
295
|
if function_name == "knowledge_search":
|
|
250
296
|
sequence_number += 1
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
297
|
+
yield ToolExecutionResult(
|
|
298
|
+
stream_event=OpenAIResponseObjectStreamResponseFileSearchCallSearching(
|
|
299
|
+
item_id=item_id,
|
|
300
|
+
output_index=output_index,
|
|
301
|
+
sequence_number=sequence_number,
|
|
302
|
+
),
|
|
254
303
|
sequence_number=sequence_number,
|
|
255
304
|
)
|
|
256
|
-
yield ToolExecutionResult(stream_event=searching_event, sequence_number=sequence_number)
|
|
257
305
|
|
|
258
306
|
async def _execute_tool(
|
|
259
307
|
self,
|
|
@@ -261,7 +309,7 @@ class ToolExecutor:
|
|
|
261
309
|
tool_kwargs: dict,
|
|
262
310
|
ctx: ChatCompletionContext,
|
|
263
311
|
mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None,
|
|
264
|
-
) -> tuple[Exception | None,
|
|
312
|
+
) -> tuple[Exception | None, Any]:
|
|
265
313
|
"""Execute the tool and return error exception and result."""
|
|
266
314
|
error_exc = None
|
|
267
315
|
result = None
|
|
@@ -276,23 +324,30 @@ class ToolExecutor:
|
|
|
276
324
|
"server_url": mcp_tool.server_url,
|
|
277
325
|
"tool_name": function_name,
|
|
278
326
|
}
|
|
279
|
-
|
|
327
|
+
# TODO: follow semantic conventions for Open Telemetry tool spans
|
|
328
|
+
# https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span
|
|
329
|
+
with tracer.start_as_current_span("invoke_mcp_tool", attributes=attributes):
|
|
280
330
|
result = await invoke_mcp_tool(
|
|
281
331
|
endpoint=mcp_tool.server_url,
|
|
282
|
-
headers=mcp_tool.headers or {},
|
|
283
332
|
tool_name=function_name,
|
|
284
333
|
kwargs=tool_kwargs,
|
|
334
|
+
headers=mcp_tool.headers,
|
|
335
|
+
authorization=mcp_tool.authorization,
|
|
285
336
|
)
|
|
286
337
|
elif function_name == "knowledge_search":
|
|
287
|
-
response_file_search_tool =
|
|
288
|
-
(
|
|
289
|
-
|
|
338
|
+
response_file_search_tool = (
|
|
339
|
+
next(
|
|
340
|
+
(t for t in ctx.response_tools if isinstance(t, OpenAIResponseInputToolFileSearch)),
|
|
341
|
+
None,
|
|
342
|
+
)
|
|
343
|
+
if ctx.response_tools
|
|
344
|
+
else None
|
|
290
345
|
)
|
|
291
346
|
if response_file_search_tool:
|
|
292
347
|
# Use vector_stores.search API instead of knowledge_search tool
|
|
293
348
|
# to support filters and ranking_options
|
|
294
349
|
query = tool_kwargs.get("query", "")
|
|
295
|
-
|
|
350
|
+
with tracer.start_as_current_span("knowledge_search"):
|
|
296
351
|
result = await self._execute_knowledge_search_via_vector_store(
|
|
297
352
|
query=query,
|
|
298
353
|
response_file_search_tool=response_file_search_tool,
|
|
@@ -301,7 +356,9 @@ class ToolExecutor:
|
|
|
301
356
|
attributes = {
|
|
302
357
|
"tool_name": function_name,
|
|
303
358
|
}
|
|
304
|
-
|
|
359
|
+
# TODO: follow semantic conventions for Open Telemetry tool spans
|
|
360
|
+
# https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span
|
|
361
|
+
with tracer.start_as_current_span("invoke_tool", attributes=attributes):
|
|
305
362
|
result = await self.tool_runtime_api.invoke_tool(
|
|
306
363
|
tool_name=function_name,
|
|
307
364
|
kwargs=tool_kwargs,
|
|
@@ -322,35 +379,34 @@ class ToolExecutor:
|
|
|
322
379
|
mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None,
|
|
323
380
|
) -> AsyncIterator[ToolExecutionResult]:
|
|
324
381
|
"""Emit completion or failure events for tool execution."""
|
|
325
|
-
completion_event = None
|
|
326
|
-
|
|
327
382
|
if mcp_tool_to_server and function_name in mcp_tool_to_server:
|
|
328
383
|
sequence_number += 1
|
|
329
384
|
if has_error:
|
|
330
|
-
|
|
385
|
+
mcp_failed_event = OpenAIResponseObjectStreamResponseMcpCallFailed(
|
|
331
386
|
sequence_number=sequence_number,
|
|
332
387
|
)
|
|
388
|
+
yield ToolExecutionResult(stream_event=mcp_failed_event, sequence_number=sequence_number)
|
|
333
389
|
else:
|
|
334
|
-
|
|
390
|
+
mcp_completed_event = OpenAIResponseObjectStreamResponseMcpCallCompleted(
|
|
335
391
|
sequence_number=sequence_number,
|
|
336
392
|
)
|
|
393
|
+
yield ToolExecutionResult(stream_event=mcp_completed_event, sequence_number=sequence_number)
|
|
337
394
|
elif function_name == "web_search":
|
|
338
395
|
sequence_number += 1
|
|
339
|
-
|
|
396
|
+
web_completion_event = OpenAIResponseObjectStreamResponseWebSearchCallCompleted(
|
|
340
397
|
item_id=item_id,
|
|
341
398
|
output_index=output_index,
|
|
342
399
|
sequence_number=sequence_number,
|
|
343
400
|
)
|
|
401
|
+
yield ToolExecutionResult(stream_event=web_completion_event, sequence_number=sequence_number)
|
|
344
402
|
elif function_name == "knowledge_search":
|
|
345
403
|
sequence_number += 1
|
|
346
|
-
|
|
404
|
+
file_completion_event = OpenAIResponseObjectStreamResponseFileSearchCallCompleted(
|
|
347
405
|
item_id=item_id,
|
|
348
406
|
output_index=output_index,
|
|
349
407
|
sequence_number=sequence_number,
|
|
350
408
|
)
|
|
351
|
-
|
|
352
|
-
if completion_event:
|
|
353
|
-
yield ToolExecutionResult(stream_event=completion_event, sequence_number=sequence_number)
|
|
409
|
+
yield ToolExecutionResult(stream_event=file_completion_event, sequence_number=sequence_number)
|
|
354
410
|
|
|
355
411
|
async def _build_result_messages(
|
|
356
412
|
self,
|
|
@@ -360,18 +416,19 @@ class ToolExecutor:
|
|
|
360
416
|
tool_kwargs: dict,
|
|
361
417
|
ctx: ChatCompletionContext,
|
|
362
418
|
error_exc: Exception | None,
|
|
363
|
-
result:
|
|
419
|
+
result: Any,
|
|
364
420
|
has_error: bool,
|
|
365
421
|
mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None,
|
|
366
|
-
) -> tuple[
|
|
422
|
+
) -> tuple[Any, Any]:
|
|
367
423
|
"""Build output and input messages from tool execution results."""
|
|
368
424
|
from llama_stack.providers.utils.inference.prompt_adapter import (
|
|
369
425
|
interleaved_content_as_str,
|
|
370
426
|
)
|
|
371
427
|
|
|
372
428
|
# Build output message
|
|
429
|
+
message: Any
|
|
373
430
|
if mcp_tool_to_server and function.name in mcp_tool_to_server:
|
|
374
|
-
from
|
|
431
|
+
from llama_stack_api import (
|
|
375
432
|
OpenAIResponseOutputMessageMCPCall,
|
|
376
433
|
)
|
|
377
434
|
|
|
@@ -383,10 +440,14 @@ class ToolExecutor:
|
|
|
383
440
|
)
|
|
384
441
|
if error_exc:
|
|
385
442
|
message.error = str(error_exc)
|
|
386
|
-
elif (result and result
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
443
|
+
elif (result and (error_code := getattr(result, "error_code", None)) and error_code > 0) or (
|
|
444
|
+
result and getattr(result, "error_message", None)
|
|
445
|
+
):
|
|
446
|
+
ec = getattr(result, "error_code", "unknown")
|
|
447
|
+
em = getattr(result, "error_message", "")
|
|
448
|
+
message.error = f"Error (code {ec}): {em}"
|
|
449
|
+
elif result and (content := getattr(result, "content", None)):
|
|
450
|
+
message.output = interleaved_content_as_str(content)
|
|
390
451
|
else:
|
|
391
452
|
if function.name == "web_search":
|
|
392
453
|
message = OpenAIResponseOutputMessageWebSearchToolCall(
|
|
@@ -401,17 +462,17 @@ class ToolExecutor:
|
|
|
401
462
|
queries=[tool_kwargs.get("query", "")],
|
|
402
463
|
status="completed",
|
|
403
464
|
)
|
|
404
|
-
if result and "document_ids" in
|
|
465
|
+
if result and (metadata := getattr(result, "metadata", None)) and "document_ids" in metadata:
|
|
405
466
|
message.results = []
|
|
406
|
-
for i, doc_id in enumerate(
|
|
407
|
-
text =
|
|
408
|
-
score =
|
|
467
|
+
for i, doc_id in enumerate(metadata["document_ids"]):
|
|
468
|
+
text = metadata["chunks"][i] if "chunks" in metadata else None
|
|
469
|
+
score = metadata["scores"][i] if "scores" in metadata else None
|
|
409
470
|
message.results.append(
|
|
410
471
|
OpenAIResponseOutputMessageFileSearchToolCallResults(
|
|
411
472
|
file_id=doc_id,
|
|
412
473
|
filename=doc_id,
|
|
413
|
-
text=text,
|
|
414
|
-
score=score,
|
|
474
|
+
text=text if text is not None else "",
|
|
475
|
+
score=score if score is not None else 0.0,
|
|
415
476
|
attributes={},
|
|
416
477
|
)
|
|
417
478
|
)
|
|
@@ -421,27 +482,32 @@ class ToolExecutor:
|
|
|
421
482
|
raise ValueError(f"Unknown tool {function.name} called")
|
|
422
483
|
|
|
423
484
|
# Build input message
|
|
424
|
-
input_message = None
|
|
425
|
-
if result and result
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
485
|
+
input_message: OpenAIToolMessageParam | None = None
|
|
486
|
+
if result and (result_content := getattr(result, "content", None)):
|
|
487
|
+
# all the mypy contortions here are still unsatisfactory with random Any typing
|
|
488
|
+
if isinstance(result_content, str):
|
|
489
|
+
msg_content: str | list[Any] = result_content
|
|
490
|
+
elif isinstance(result_content, list):
|
|
491
|
+
content_list: list[Any] = []
|
|
492
|
+
for item in result_content:
|
|
493
|
+
part: Any
|
|
431
494
|
if isinstance(item, TextContentItem):
|
|
432
495
|
part = OpenAIChatCompletionContentPartTextParam(text=item.text)
|
|
433
496
|
elif isinstance(item, ImageContentItem):
|
|
434
497
|
if item.image.data:
|
|
435
|
-
|
|
498
|
+
url_value = f"data:image;base64,{item.image.data}"
|
|
436
499
|
else:
|
|
437
|
-
|
|
438
|
-
part = OpenAIChatCompletionContentPartImageParam(image_url=OpenAIImageURL(url=
|
|
500
|
+
url_value = str(item.image.url) if item.image.url else ""
|
|
501
|
+
part = OpenAIChatCompletionContentPartImageParam(image_url=OpenAIImageURL(url=url_value))
|
|
439
502
|
else:
|
|
440
503
|
raise ValueError(f"Unknown result content type: {type(item)}")
|
|
441
|
-
|
|
504
|
+
content_list.append(part)
|
|
505
|
+
msg_content = content_list
|
|
442
506
|
else:
|
|
443
|
-
raise ValueError(f"Unknown result content type: {type(
|
|
444
|
-
|
|
507
|
+
raise ValueError(f"Unknown result content type: {type(result_content)}")
|
|
508
|
+
# OpenAIToolMessageParam accepts str | list[TextParam] but we may have images
|
|
509
|
+
# This is runtime-safe as the API accepts it, but mypy complains
|
|
510
|
+
input_message = OpenAIToolMessageParam(content=msg_content, tool_call_id=tool_call_id) # type: ignore[arg-type]
|
|
445
511
|
else:
|
|
446
512
|
text = str(error_exc) if error_exc else "Tool execution failed"
|
|
447
513
|
input_message = OpenAIToolMessageParam(content=text, tool_call_id=tool_call_id)
|
|
@@ -5,13 +5,18 @@
|
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
7
|
from dataclasses import dataclass
|
|
8
|
+
from typing import cast
|
|
8
9
|
|
|
9
10
|
from openai.types.chat import ChatCompletionToolParam
|
|
10
11
|
from pydantic import BaseModel
|
|
11
12
|
|
|
12
|
-
from
|
|
13
|
+
from llama_stack_api import (
|
|
14
|
+
OpenAIChatCompletionToolCall,
|
|
15
|
+
OpenAIMessageParam,
|
|
16
|
+
OpenAIResponseFormatParam,
|
|
13
17
|
OpenAIResponseInput,
|
|
14
18
|
OpenAIResponseInputTool,
|
|
19
|
+
OpenAIResponseInputToolChoice,
|
|
15
20
|
OpenAIResponseInputToolFileSearch,
|
|
16
21
|
OpenAIResponseInputToolFunction,
|
|
17
22
|
OpenAIResponseInputToolMCP,
|
|
@@ -24,8 +29,8 @@ from llama_stack.apis.agents.openai_responses import (
|
|
|
24
29
|
OpenAIResponseOutputMessageMCPListTools,
|
|
25
30
|
OpenAIResponseTool,
|
|
26
31
|
OpenAIResponseToolMCP,
|
|
32
|
+
OpenAITokenLogProb,
|
|
27
33
|
)
|
|
28
|
-
from llama_stack.apis.inference import OpenAIChatCompletionToolCall, OpenAIMessageParam, OpenAIResponseFormatParam
|
|
29
34
|
|
|
30
35
|
|
|
31
36
|
class ToolExecutionResult(BaseModel):
|
|
@@ -51,6 +56,7 @@ class ChatCompletionResult:
|
|
|
51
56
|
message_item_id: str # For streaming events
|
|
52
57
|
tool_call_item_ids: dict[int, str] # For streaming events
|
|
53
58
|
content_part_emitted: bool # Tracking state
|
|
59
|
+
logprobs: list[OpenAITokenLogProb] | None = None
|
|
54
60
|
|
|
55
61
|
@property
|
|
56
62
|
def content_text(self) -> str:
|
|
@@ -100,17 +106,19 @@ class ToolContext(BaseModel):
|
|
|
100
106
|
if isinstance(tool, OpenAIResponseToolMCP):
|
|
101
107
|
previous_tools_by_label[tool.server_label] = tool
|
|
102
108
|
# collect tool definitions which are the same in current and previous requests:
|
|
103
|
-
tools_to_process = []
|
|
109
|
+
tools_to_process: list[OpenAIResponseInputTool] = []
|
|
104
110
|
matched: dict[str, OpenAIResponseInputToolMCP] = {}
|
|
105
|
-
|
|
111
|
+
# Mypy confuses OpenAIResponseInputTool (Input union) with OpenAIResponseTool (output union)
|
|
112
|
+
# which differ only in MCP type (InputToolMCP vs ToolMCP). Code is correct.
|
|
113
|
+
for tool in cast(list[OpenAIResponseInputTool], self.current_tools): # type: ignore[assignment]
|
|
106
114
|
if isinstance(tool, OpenAIResponseInputToolMCP) and tool.server_label in previous_tools_by_label:
|
|
107
115
|
previous_tool = previous_tools_by_label[tool.server_label]
|
|
108
116
|
if previous_tool.allowed_tools == tool.allowed_tools:
|
|
109
117
|
matched[tool.server_label] = tool
|
|
110
118
|
else:
|
|
111
|
-
tools_to_process.append(tool)
|
|
119
|
+
tools_to_process.append(tool) # type: ignore[arg-type]
|
|
112
120
|
else:
|
|
113
|
-
tools_to_process.append(tool)
|
|
121
|
+
tools_to_process.append(tool) # type: ignore[arg-type]
|
|
114
122
|
# tools that are not the same or were not previously defined need to be processed:
|
|
115
123
|
self.tools_to_process = tools_to_process
|
|
116
124
|
# for all matched definitions, get the mcp_list_tools objects from the previous output:
|
|
@@ -119,9 +127,11 @@ class ToolContext(BaseModel):
|
|
|
119
127
|
]
|
|
120
128
|
# reconstruct the tool to server mappings that can be reused:
|
|
121
129
|
for listing in self.previous_tool_listings:
|
|
130
|
+
# listing is OpenAIResponseOutputMessageMCPListTools which has tools: list[MCPListToolsTool]
|
|
122
131
|
definition = matched[listing.server_label]
|
|
123
|
-
for
|
|
124
|
-
|
|
132
|
+
for mcp_tool in listing.tools:
|
|
133
|
+
# mcp_tool is MCPListToolsTool which has a name: str field
|
|
134
|
+
self.previous_tools[mcp_tool.name] = definition
|
|
125
135
|
|
|
126
136
|
def available_tools(self) -> list[OpenAIResponseTool]:
|
|
127
137
|
if not self.current_tools:
|
|
@@ -139,6 +149,8 @@ class ToolContext(BaseModel):
|
|
|
139
149
|
server_label=tool.server_label,
|
|
140
150
|
allowed_tools=tool.allowed_tools,
|
|
141
151
|
)
|
|
152
|
+
# Exhaustive check - all tool types should be handled above
|
|
153
|
+
raise AssertionError(f"Unexpected tool type: {type(tool)}")
|
|
142
154
|
|
|
143
155
|
return [convert_tool(tool) for tool in self.current_tools]
|
|
144
156
|
|
|
@@ -151,6 +163,7 @@ class ChatCompletionContext(BaseModel):
|
|
|
151
163
|
temperature: float | None
|
|
152
164
|
response_format: OpenAIResponseFormatParam
|
|
153
165
|
tool_context: ToolContext | None
|
|
166
|
+
tool_choice: OpenAIResponseInputToolChoice | None = None
|
|
154
167
|
approval_requests: list[OpenAIResponseMCPApprovalRequest] = []
|
|
155
168
|
approval_responses: dict[str, OpenAIResponseMCPApprovalResponse] = {}
|
|
156
169
|
|
|
@@ -163,6 +176,7 @@ class ChatCompletionContext(BaseModel):
|
|
|
163
176
|
response_format: OpenAIResponseFormatParam,
|
|
164
177
|
tool_context: ToolContext,
|
|
165
178
|
inputs: list[OpenAIResponseInput] | str,
|
|
179
|
+
tool_choice: OpenAIResponseInputToolChoice | None = None,
|
|
166
180
|
):
|
|
167
181
|
super().__init__(
|
|
168
182
|
model=model,
|
|
@@ -171,6 +185,7 @@ class ChatCompletionContext(BaseModel):
|
|
|
171
185
|
temperature=temperature,
|
|
172
186
|
response_format=response_format,
|
|
173
187
|
tool_context=tool_context,
|
|
188
|
+
tool_choice=tool_choice,
|
|
174
189
|
)
|
|
175
190
|
if not isinstance(inputs, str):
|
|
176
191
|
self.approval_requests = [input for input in inputs if input.type == "mcp_approval_request"]
|