llama-stack 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +201 -58
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
- llama_stack/providers/registry/agents.py +7 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +7 -8
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -54
- llama_stack-0.4.0.dist-info/RECORD +588 -0
- llama_stack-0.4.0.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.5.dist-info/RECORD +0 -625
- llama_stack-0.3.5.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
|
@@ -9,16 +9,16 @@ from typing import Any
|
|
|
9
9
|
|
|
10
10
|
import httpx
|
|
11
11
|
|
|
12
|
-
from llama_stack.
|
|
13
|
-
from
|
|
12
|
+
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
13
|
+
from llama_stack_api import (
|
|
14
|
+
URL,
|
|
14
15
|
ListToolDefsResponse,
|
|
15
16
|
ToolDef,
|
|
16
17
|
ToolGroup,
|
|
18
|
+
ToolGroupsProtocolPrivate,
|
|
17
19
|
ToolInvocationResult,
|
|
18
20
|
ToolRuntime,
|
|
19
21
|
)
|
|
20
|
-
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
21
|
-
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
|
|
22
22
|
|
|
23
23
|
from .config import BingSearchToolConfig
|
|
24
24
|
|
|
@@ -49,7 +49,10 @@ class BingSearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsReq
|
|
|
49
49
|
return provider_data.bing_search_api_key
|
|
50
50
|
|
|
51
51
|
async def list_runtime_tools(
|
|
52
|
-
self,
|
|
52
|
+
self,
|
|
53
|
+
tool_group_id: str | None = None,
|
|
54
|
+
mcp_endpoint: URL | None = None,
|
|
55
|
+
authorization: str | None = None,
|
|
53
56
|
) -> ListToolDefsResponse:
|
|
54
57
|
return ListToolDefsResponse(
|
|
55
58
|
data=[
|
|
@@ -70,7 +73,9 @@ class BingSearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsReq
|
|
|
70
73
|
]
|
|
71
74
|
)
|
|
72
75
|
|
|
73
|
-
async def invoke_tool(
|
|
76
|
+
async def invoke_tool(
|
|
77
|
+
self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
|
|
78
|
+
) -> ToolInvocationResult:
|
|
74
79
|
api_key = self._get_api_key()
|
|
75
80
|
headers = {
|
|
76
81
|
"Ocp-Apim-Subscription-Key": api_key,
|
|
@@ -8,17 +8,17 @@ from typing import Any
|
|
|
8
8
|
|
|
9
9
|
import httpx
|
|
10
10
|
|
|
11
|
-
from llama_stack.
|
|
12
|
-
from llama_stack.
|
|
11
|
+
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
12
|
+
from llama_stack.models.llama.datatypes import BuiltinTool
|
|
13
|
+
from llama_stack_api import (
|
|
14
|
+
URL,
|
|
13
15
|
ListToolDefsResponse,
|
|
14
16
|
ToolDef,
|
|
15
17
|
ToolGroup,
|
|
18
|
+
ToolGroupsProtocolPrivate,
|
|
16
19
|
ToolInvocationResult,
|
|
17
20
|
ToolRuntime,
|
|
18
21
|
)
|
|
19
|
-
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
20
|
-
from llama_stack.models.llama.datatypes import BuiltinTool
|
|
21
|
-
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
|
|
22
22
|
|
|
23
23
|
from .config import BraveSearchToolConfig
|
|
24
24
|
|
|
@@ -48,7 +48,10 @@ class BraveSearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsRe
|
|
|
48
48
|
return provider_data.brave_search_api_key
|
|
49
49
|
|
|
50
50
|
async def list_runtime_tools(
|
|
51
|
-
self,
|
|
51
|
+
self,
|
|
52
|
+
tool_group_id: str | None = None,
|
|
53
|
+
mcp_endpoint: URL | None = None,
|
|
54
|
+
authorization: str | None = None,
|
|
52
55
|
) -> ListToolDefsResponse:
|
|
53
56
|
return ListToolDefsResponse(
|
|
54
57
|
data=[
|
|
@@ -70,7 +73,9 @@ class BraveSearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsRe
|
|
|
70
73
|
]
|
|
71
74
|
)
|
|
72
75
|
|
|
73
|
-
async def invoke_tool(
|
|
76
|
+
async def invoke_tool(
|
|
77
|
+
self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
|
|
78
|
+
) -> ToolInvocationResult:
|
|
74
79
|
api_key = self._get_api_key()
|
|
75
80
|
url = "https://api.search.brave.com/res/v1/web/search"
|
|
76
81
|
headers = {
|
|
@@ -10,8 +10,14 @@ from pydantic import BaseModel
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class MCPProviderDataValidator(BaseModel):
|
|
13
|
-
|
|
14
|
-
|
|
13
|
+
"""
|
|
14
|
+
Validator for MCP provider-specific data passed via request headers.
|
|
15
|
+
|
|
16
|
+
Phase 1: Support old header-based authentication for backward compatibility.
|
|
17
|
+
In Phase 2, this will be deprecated in favor of the authorization parameter.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
mcp_headers: dict[str, dict[str, str]] | None = None # Map of URI -> headers dict
|
|
15
21
|
|
|
16
22
|
|
|
17
23
|
class MCPProviderConfig(BaseModel):
|
|
@@ -7,18 +7,18 @@
|
|
|
7
7
|
from typing import Any
|
|
8
8
|
from urllib.parse import urlparse
|
|
9
9
|
|
|
10
|
-
from llama_stack.
|
|
11
|
-
from llama_stack.
|
|
12
|
-
from llama_stack.
|
|
10
|
+
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
11
|
+
from llama_stack.log import get_logger
|
|
12
|
+
from llama_stack.providers.utils.tools.mcp import invoke_mcp_tool, list_mcp_tools
|
|
13
|
+
from llama_stack_api import (
|
|
14
|
+
URL,
|
|
15
|
+
Api,
|
|
13
16
|
ListToolDefsResponse,
|
|
14
17
|
ToolGroup,
|
|
18
|
+
ToolGroupsProtocolPrivate,
|
|
15
19
|
ToolInvocationResult,
|
|
16
20
|
ToolRuntime,
|
|
17
21
|
)
|
|
18
|
-
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
19
|
-
from llama_stack.log import get_logger
|
|
20
|
-
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
|
|
21
|
-
from llama_stack.providers.utils.tools.mcp import invoke_mcp_tool, list_mcp_tools
|
|
22
22
|
|
|
23
23
|
from .config import MCPProviderConfig
|
|
24
24
|
|
|
@@ -39,15 +39,23 @@ class ModelContextProtocolToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime
|
|
|
39
39
|
return
|
|
40
40
|
|
|
41
41
|
async def list_runtime_tools(
|
|
42
|
-
self,
|
|
42
|
+
self,
|
|
43
|
+
tool_group_id: str | None = None,
|
|
44
|
+
mcp_endpoint: URL | None = None,
|
|
45
|
+
authorization: str | None = None,
|
|
43
46
|
) -> ListToolDefsResponse:
|
|
44
47
|
# this endpoint should be retrieved by getting the tool group right?
|
|
45
48
|
if mcp_endpoint is None:
|
|
46
49
|
raise ValueError("mcp_endpoint is required")
|
|
47
|
-
headers = await self.get_headers_from_request(mcp_endpoint.uri)
|
|
48
|
-
return await list_mcp_tools(mcp_endpoint.uri, headers)
|
|
49
50
|
|
|
50
|
-
|
|
51
|
+
# Get other headers from provider data (but NOT authorization)
|
|
52
|
+
provider_headers = await self.get_headers_from_request(mcp_endpoint.uri)
|
|
53
|
+
|
|
54
|
+
return await list_mcp_tools(endpoint=mcp_endpoint.uri, headers=provider_headers, authorization=authorization)
|
|
55
|
+
|
|
56
|
+
async def invoke_tool(
|
|
57
|
+
self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
|
|
58
|
+
) -> ToolInvocationResult:
|
|
51
59
|
tool = await self.tool_store.get_tool(tool_name)
|
|
52
60
|
if tool.metadata is None or tool.metadata.get("endpoint") is None:
|
|
53
61
|
raise ValueError(f"Tool {tool_name} does not have metadata")
|
|
@@ -55,19 +63,53 @@ class ModelContextProtocolToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime
|
|
|
55
63
|
if urlparse(endpoint).scheme not in ("http", "https"):
|
|
56
64
|
raise ValueError(f"Endpoint {endpoint} is not a valid HTTP(S) URL")
|
|
57
65
|
|
|
58
|
-
headers
|
|
59
|
-
|
|
66
|
+
# Get other headers from provider data (but NOT authorization)
|
|
67
|
+
provider_headers = await self.get_headers_from_request(endpoint)
|
|
68
|
+
|
|
69
|
+
return await invoke_mcp_tool(
|
|
70
|
+
endpoint=endpoint,
|
|
71
|
+
tool_name=tool_name,
|
|
72
|
+
kwargs=kwargs,
|
|
73
|
+
headers=provider_headers,
|
|
74
|
+
authorization=authorization,
|
|
75
|
+
)
|
|
60
76
|
|
|
61
77
|
async def get_headers_from_request(self, mcp_endpoint_uri: str) -> dict[str, str]:
|
|
78
|
+
"""
|
|
79
|
+
Extract headers from request provider data, excluding authorization.
|
|
80
|
+
|
|
81
|
+
Authorization must be provided via the dedicated authorization parameter.
|
|
82
|
+
If Authorization is found in mcp_headers, raise an error to guide users to the correct approach.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
mcp_endpoint_uri: The MCP endpoint URI to match against provider data
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
dict[str, str]: Headers dictionary (without Authorization)
|
|
89
|
+
|
|
90
|
+
Raises:
|
|
91
|
+
ValueError: If Authorization header is found in mcp_headers
|
|
92
|
+
"""
|
|
93
|
+
|
|
62
94
|
def canonicalize_uri(uri: str) -> str:
|
|
63
95
|
return f"{urlparse(uri).netloc or ''}/{urlparse(uri).path or ''}"
|
|
64
96
|
|
|
65
97
|
headers = {}
|
|
66
98
|
|
|
67
99
|
provider_data = self.get_request_provider_data()
|
|
68
|
-
if provider_data and provider_data.mcp_headers:
|
|
100
|
+
if provider_data and hasattr(provider_data, "mcp_headers") and provider_data.mcp_headers:
|
|
69
101
|
for uri, values in provider_data.mcp_headers.items():
|
|
70
102
|
if canonicalize_uri(uri) != canonicalize_uri(mcp_endpoint_uri):
|
|
71
103
|
continue
|
|
72
|
-
|
|
104
|
+
|
|
105
|
+
# Reject Authorization in mcp_headers - must use authorization parameter
|
|
106
|
+
for key in values.keys():
|
|
107
|
+
if key.lower() == "authorization":
|
|
108
|
+
raise ValueError(
|
|
109
|
+
"Authorization cannot be provided via mcp_headers in provider_data. "
|
|
110
|
+
"Please use the dedicated 'authorization' parameter instead. "
|
|
111
|
+
"Example: tool_runtime.invoke_tool(..., authorization='your-token')"
|
|
112
|
+
)
|
|
113
|
+
headers[key] = values[key]
|
|
114
|
+
|
|
73
115
|
return headers
|
|
@@ -9,16 +9,16 @@ from typing import Any
|
|
|
9
9
|
|
|
10
10
|
import httpx
|
|
11
11
|
|
|
12
|
-
from llama_stack.
|
|
13
|
-
from
|
|
12
|
+
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
13
|
+
from llama_stack_api import (
|
|
14
|
+
URL,
|
|
14
15
|
ListToolDefsResponse,
|
|
15
16
|
ToolDef,
|
|
16
17
|
ToolGroup,
|
|
18
|
+
ToolGroupsProtocolPrivate,
|
|
17
19
|
ToolInvocationResult,
|
|
18
20
|
ToolRuntime,
|
|
19
21
|
)
|
|
20
|
-
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
21
|
-
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
|
|
22
22
|
|
|
23
23
|
from .config import TavilySearchToolConfig
|
|
24
24
|
|
|
@@ -48,7 +48,10 @@ class TavilySearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR
|
|
|
48
48
|
return provider_data.tavily_search_api_key
|
|
49
49
|
|
|
50
50
|
async def list_runtime_tools(
|
|
51
|
-
self,
|
|
51
|
+
self,
|
|
52
|
+
tool_group_id: str | None = None,
|
|
53
|
+
mcp_endpoint: URL | None = None,
|
|
54
|
+
authorization: str | None = None,
|
|
52
55
|
) -> ListToolDefsResponse:
|
|
53
56
|
return ListToolDefsResponse(
|
|
54
57
|
data=[
|
|
@@ -69,7 +72,9 @@ class TavilySearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR
|
|
|
69
72
|
]
|
|
70
73
|
)
|
|
71
74
|
|
|
72
|
-
async def invoke_tool(
|
|
75
|
+
async def invoke_tool(
|
|
76
|
+
self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
|
|
77
|
+
) -> ToolInvocationResult:
|
|
73
78
|
api_key = self._get_api_key()
|
|
74
79
|
async with httpx.AsyncClient() as client:
|
|
75
80
|
response = await client.post(
|
|
@@ -9,16 +9,16 @@ from typing import Any
|
|
|
9
9
|
|
|
10
10
|
import httpx
|
|
11
11
|
|
|
12
|
-
from llama_stack.
|
|
13
|
-
from
|
|
12
|
+
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
13
|
+
from llama_stack_api import (
|
|
14
|
+
URL,
|
|
14
15
|
ListToolDefsResponse,
|
|
15
16
|
ToolDef,
|
|
16
17
|
ToolGroup,
|
|
18
|
+
ToolGroupsProtocolPrivate,
|
|
17
19
|
ToolInvocationResult,
|
|
18
20
|
ToolRuntime,
|
|
19
21
|
)
|
|
20
|
-
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
21
|
-
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
|
|
22
22
|
|
|
23
23
|
from .config import WolframAlphaToolConfig
|
|
24
24
|
|
|
@@ -49,7 +49,10 @@ class WolframAlphaToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR
|
|
|
49
49
|
return provider_data.wolfram_alpha_api_key
|
|
50
50
|
|
|
51
51
|
async def list_runtime_tools(
|
|
52
|
-
self,
|
|
52
|
+
self,
|
|
53
|
+
tool_group_id: str | None = None,
|
|
54
|
+
mcp_endpoint: URL | None = None,
|
|
55
|
+
authorization: str | None = None,
|
|
53
56
|
) -> ListToolDefsResponse:
|
|
54
57
|
return ListToolDefsResponse(
|
|
55
58
|
data=[
|
|
@@ -70,7 +73,9 @@ class WolframAlphaToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR
|
|
|
70
73
|
]
|
|
71
74
|
)
|
|
72
75
|
|
|
73
|
-
async def invoke_tool(
|
|
76
|
+
async def invoke_tool(
|
|
77
|
+
self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
|
|
78
|
+
) -> ToolInvocationResult:
|
|
74
79
|
api_key = self._get_api_key()
|
|
75
80
|
params = {
|
|
76
81
|
"input": kwargs["query"],
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
from
|
|
7
|
+
from llama_stack_api import Api, ProviderSpec
|
|
8
8
|
|
|
9
9
|
from .config import ChromaVectorIOConfig
|
|
10
10
|
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
import asyncio
|
|
7
|
+
import heapq
|
|
7
8
|
import json
|
|
8
9
|
from typing import Any
|
|
9
10
|
from urllib.parse import urlparse
|
|
@@ -11,17 +12,23 @@ from urllib.parse import urlparse
|
|
|
11
12
|
import chromadb
|
|
12
13
|
from numpy.typing import NDArray
|
|
13
14
|
|
|
14
|
-
from llama_stack.
|
|
15
|
-
from llama_stack.apis.inference import Inference, InterleavedContent
|
|
16
|
-
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
|
|
17
|
-
from llama_stack.apis.vector_stores import VectorStore
|
|
15
|
+
from llama_stack.core.storage.kvstore import kvstore_impl
|
|
18
16
|
from llama_stack.log import get_logger
|
|
19
|
-
from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
|
|
20
17
|
from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
|
|
21
|
-
from llama_stack.providers.utils.kvstore import kvstore_impl
|
|
22
|
-
from llama_stack.providers.utils.kvstore.api import KVStore
|
|
23
18
|
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
|
24
19
|
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
|
|
20
|
+
from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator
|
|
21
|
+
from llama_stack_api import (
|
|
22
|
+
EmbeddedChunk,
|
|
23
|
+
Files,
|
|
24
|
+
Inference,
|
|
25
|
+
InterleavedContent,
|
|
26
|
+
QueryChunksResponse,
|
|
27
|
+
VectorIO,
|
|
28
|
+
VectorStore,
|
|
29
|
+
VectorStoresProtocolPrivate,
|
|
30
|
+
)
|
|
31
|
+
from llama_stack_api.internal.kvstore import KVStore
|
|
25
32
|
|
|
26
33
|
from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig
|
|
27
34
|
|
|
@@ -53,7 +60,7 @@ class ChromaIndex(EmbeddingIndex):
|
|
|
53
60
|
async def initialize(self):
|
|
54
61
|
pass
|
|
55
62
|
|
|
56
|
-
async def add_chunks(self, chunks: list[
|
|
63
|
+
async def add_chunks(self, chunks: list[EmbeddedChunk], embeddings: NDArray):
|
|
57
64
|
assert len(chunks) == len(embeddings), (
|
|
58
65
|
f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
|
|
59
66
|
)
|
|
@@ -77,7 +84,7 @@ class ChromaIndex(EmbeddingIndex):
|
|
|
77
84
|
for dist, doc in zip(distances, documents, strict=False):
|
|
78
85
|
try:
|
|
79
86
|
doc = json.loads(doc)
|
|
80
|
-
chunk =
|
|
87
|
+
chunk = EmbeddedChunk(**doc)
|
|
81
88
|
except Exception:
|
|
82
89
|
log.exception(f"Failed to parse document: {doc}")
|
|
83
90
|
continue
|
|
@@ -94,8 +101,55 @@ class ChromaIndex(EmbeddingIndex):
|
|
|
94
101
|
async def delete(self):
|
|
95
102
|
await maybe_await(self.client.delete_collection(self.collection.name))
|
|
96
103
|
|
|
97
|
-
async def query_keyword(
|
|
98
|
-
|
|
104
|
+
async def query_keyword(
|
|
105
|
+
self,
|
|
106
|
+
query_string: str,
|
|
107
|
+
k: int,
|
|
108
|
+
score_threshold: float,
|
|
109
|
+
) -> QueryChunksResponse:
|
|
110
|
+
"""
|
|
111
|
+
Perform keyword search using Chroma's built-in where_document feature.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
query_string: The text query for keyword search
|
|
115
|
+
k: Number of results to return
|
|
116
|
+
score_threshold: Minimum similarity score threshold
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
QueryChunksResponse with combined results
|
|
120
|
+
"""
|
|
121
|
+
try:
|
|
122
|
+
results = await maybe_await(
|
|
123
|
+
self.collection.query(
|
|
124
|
+
query_texts=[query_string],
|
|
125
|
+
where_document={"$contains": query_string},
|
|
126
|
+
n_results=k,
|
|
127
|
+
include=["documents", "distances"],
|
|
128
|
+
)
|
|
129
|
+
)
|
|
130
|
+
except Exception as e:
|
|
131
|
+
log.error(f"Chroma client keyword search failed: {e}")
|
|
132
|
+
raise
|
|
133
|
+
|
|
134
|
+
distances = results["distances"][0] if results["distances"] else []
|
|
135
|
+
documents = results["documents"][0] if results["documents"] else []
|
|
136
|
+
|
|
137
|
+
chunks = []
|
|
138
|
+
scores = []
|
|
139
|
+
|
|
140
|
+
for dist, doc in zip(distances, documents, strict=False):
|
|
141
|
+
doc_data = json.loads(doc)
|
|
142
|
+
chunk = EmbeddedChunk(**doc_data)
|
|
143
|
+
|
|
144
|
+
score = 1.0 / (1.0 + float(dist)) if dist is not None else 1.0
|
|
145
|
+
|
|
146
|
+
if score < score_threshold:
|
|
147
|
+
continue
|
|
148
|
+
|
|
149
|
+
chunks.append(chunk)
|
|
150
|
+
scores.append(score)
|
|
151
|
+
|
|
152
|
+
return QueryChunksResponse(chunks=chunks, scores=scores)
|
|
99
153
|
|
|
100
154
|
async def delete_chunks(self, chunks_for_deletion: list[ChunkForDeletion]) -> None:
|
|
101
155
|
"""Delete a single chunk from the Chroma collection by its ID."""
|
|
@@ -111,7 +165,57 @@ class ChromaIndex(EmbeddingIndex):
|
|
|
111
165
|
reranker_type: str,
|
|
112
166
|
reranker_params: dict[str, Any] | None = None,
|
|
113
167
|
) -> QueryChunksResponse:
|
|
114
|
-
|
|
168
|
+
"""
|
|
169
|
+
Hybrid search combining vector similarity and keyword search using configurable reranking.
|
|
170
|
+
Args:
|
|
171
|
+
embedding: The query embedding vector
|
|
172
|
+
query_string: The text query for keyword search
|
|
173
|
+
k: Number of results to return
|
|
174
|
+
score_threshold: Minimum similarity score threshold
|
|
175
|
+
reranker_type: Type of reranker to use ("rrf" or "weighted")
|
|
176
|
+
reranker_params: Parameters for the reranker
|
|
177
|
+
Returns:
|
|
178
|
+
QueryChunksResponse with combined results
|
|
179
|
+
"""
|
|
180
|
+
if reranker_params is None:
|
|
181
|
+
reranker_params = {}
|
|
182
|
+
|
|
183
|
+
# Get results from both search methods
|
|
184
|
+
vector_response = await self.query_vector(embedding, k, score_threshold)
|
|
185
|
+
keyword_response = await self.query_keyword(query_string, k, score_threshold)
|
|
186
|
+
|
|
187
|
+
# Convert responses to score dictionaries using chunk_id
|
|
188
|
+
vector_scores = {
|
|
189
|
+
chunk.chunk_id: score for chunk, score in zip(vector_response.chunks, vector_response.scores, strict=False)
|
|
190
|
+
}
|
|
191
|
+
keyword_scores = {
|
|
192
|
+
chunk.chunk_id: score
|
|
193
|
+
for chunk, score in zip(keyword_response.chunks, keyword_response.scores, strict=False)
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
# Combine scores using the reranking utility
|
|
197
|
+
combined_scores = WeightedInMemoryAggregator.combine_search_results(
|
|
198
|
+
vector_scores, keyword_scores, reranker_type, reranker_params
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
# Efficient top-k selection because it only tracks the k best candidates it's seen so far
|
|
202
|
+
top_k_items = heapq.nlargest(k, combined_scores.items(), key=lambda x: x[1])
|
|
203
|
+
|
|
204
|
+
# Filter by score threshold
|
|
205
|
+
filtered_items = [(doc_id, score) for doc_id, score in top_k_items if score >= score_threshold]
|
|
206
|
+
|
|
207
|
+
# Create a map of chunk_id to chunk for both responses
|
|
208
|
+
chunk_map = {c.chunk_id: c for c in vector_response.chunks + keyword_response.chunks}
|
|
209
|
+
|
|
210
|
+
# Use the map to look up chunks by their IDs
|
|
211
|
+
chunks = []
|
|
212
|
+
scores = []
|
|
213
|
+
for doc_id, score in filtered_items:
|
|
214
|
+
if doc_id in chunk_map:
|
|
215
|
+
chunks.append(chunk_map[doc_id])
|
|
216
|
+
scores.append(score)
|
|
217
|
+
|
|
218
|
+
return QueryChunksResponse(chunks=chunks, scores=scores)
|
|
115
219
|
|
|
116
220
|
|
|
117
221
|
class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
|
|
@@ -121,10 +225,9 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
|
|
|
121
225
|
inference_api: Inference,
|
|
122
226
|
files_api: Files | None,
|
|
123
227
|
) -> None:
|
|
124
|
-
super().__init__(files_api=files_api, kvstore=None)
|
|
228
|
+
super().__init__(inference_api=inference_api, files_api=files_api, kvstore=None)
|
|
125
229
|
log.info(f"Initializing ChromaVectorIOAdapter with url: {config}")
|
|
126
230
|
self.config = config
|
|
127
|
-
self.inference_api = inference_api
|
|
128
231
|
self.client = None
|
|
129
232
|
self.cache = {}
|
|
130
233
|
self.vector_store_table = None
|
|
@@ -168,20 +271,22 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
|
|
|
168
271
|
await self.cache[vector_store_id].index.delete()
|
|
169
272
|
del self.cache[vector_store_id]
|
|
170
273
|
|
|
171
|
-
async def insert_chunks(
|
|
172
|
-
|
|
274
|
+
async def insert_chunks(
|
|
275
|
+
self, vector_store_id: str, chunks: list[EmbeddedChunk], ttl_seconds: int | None = None
|
|
276
|
+
) -> None:
|
|
277
|
+
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
|
173
278
|
if index is None:
|
|
174
|
-
raise ValueError(f"Vector DB {
|
|
279
|
+
raise ValueError(f"Vector DB {vector_store_id} not found in Chroma")
|
|
175
280
|
|
|
176
281
|
await index.insert_chunks(chunks)
|
|
177
282
|
|
|
178
283
|
async def query_chunks(
|
|
179
|
-
self,
|
|
284
|
+
self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
|
|
180
285
|
) -> QueryChunksResponse:
|
|
181
|
-
index = await self._get_and_cache_vector_store_index(
|
|
286
|
+
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
|
182
287
|
|
|
183
288
|
if index is None:
|
|
184
|
-
raise ValueError(f"Vector DB {
|
|
289
|
+
raise ValueError(f"Vector DB {vector_store_id} not found in Chroma")
|
|
185
290
|
|
|
186
291
|
return await index.query_chunks(query, params)
|
|
187
292
|
|
|
@@ -9,7 +9,7 @@ from typing import Any
|
|
|
9
9
|
from pydantic import BaseModel, Field
|
|
10
10
|
|
|
11
11
|
from llama_stack.core.storage.datatypes import KVStoreReference
|
|
12
|
-
from
|
|
12
|
+
from llama_stack_api import json_schema_type
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
@json_schema_type
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
from
|
|
7
|
+
from llama_stack_api import Api, ProviderSpec
|
|
8
8
|
|
|
9
9
|
from .config import MilvusVectorIOConfig
|
|
10
10
|
|
|
@@ -9,7 +9,7 @@ from typing import Any
|
|
|
9
9
|
from pydantic import BaseModel, ConfigDict, Field
|
|
10
10
|
|
|
11
11
|
from llama_stack.core.storage.datatypes import KVStoreReference
|
|
12
|
-
from
|
|
12
|
+
from llama_stack_api import json_schema_type
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
@json_schema_type
|