llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +235 -62
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +46 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +44 -33
- llama_stack/providers/registry/agents.py +8 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +131 -23
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +37 -28
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +37 -25
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +147 -30
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +31 -26
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +7 -8
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/providers/utils/vector_io/__init__.py +16 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +36 -0
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/METADATA +57 -55
- llama_stack-0.4.1.dist-info/RECORD +588 -0
- llama_stack-0.4.1.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.5.dist-info/RECORD +0 -625
- llama_stack-0.3.5.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
|
@@ -9,16 +9,16 @@ from typing import Any
|
|
|
9
9
|
|
|
10
10
|
import httpx
|
|
11
11
|
|
|
12
|
-
from llama_stack.
|
|
13
|
-
from
|
|
12
|
+
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
13
|
+
from llama_stack_api import (
|
|
14
|
+
URL,
|
|
14
15
|
ListToolDefsResponse,
|
|
15
16
|
ToolDef,
|
|
16
17
|
ToolGroup,
|
|
18
|
+
ToolGroupsProtocolPrivate,
|
|
17
19
|
ToolInvocationResult,
|
|
18
20
|
ToolRuntime,
|
|
19
21
|
)
|
|
20
|
-
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
21
|
-
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
|
|
22
22
|
|
|
23
23
|
from .config import BingSearchToolConfig
|
|
24
24
|
|
|
@@ -49,7 +49,10 @@ class BingSearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsReq
|
|
|
49
49
|
return provider_data.bing_search_api_key
|
|
50
50
|
|
|
51
51
|
async def list_runtime_tools(
|
|
52
|
-
self,
|
|
52
|
+
self,
|
|
53
|
+
tool_group_id: str | None = None,
|
|
54
|
+
mcp_endpoint: URL | None = None,
|
|
55
|
+
authorization: str | None = None,
|
|
53
56
|
) -> ListToolDefsResponse:
|
|
54
57
|
return ListToolDefsResponse(
|
|
55
58
|
data=[
|
|
@@ -70,7 +73,9 @@ class BingSearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsReq
|
|
|
70
73
|
]
|
|
71
74
|
)
|
|
72
75
|
|
|
73
|
-
async def invoke_tool(
|
|
76
|
+
async def invoke_tool(
|
|
77
|
+
self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
|
|
78
|
+
) -> ToolInvocationResult:
|
|
74
79
|
api_key = self._get_api_key()
|
|
75
80
|
headers = {
|
|
76
81
|
"Ocp-Apim-Subscription-Key": api_key,
|
|
@@ -8,17 +8,17 @@ from typing import Any
|
|
|
8
8
|
|
|
9
9
|
import httpx
|
|
10
10
|
|
|
11
|
-
from llama_stack.
|
|
12
|
-
from llama_stack.
|
|
11
|
+
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
12
|
+
from llama_stack.models.llama.datatypes import BuiltinTool
|
|
13
|
+
from llama_stack_api import (
|
|
14
|
+
URL,
|
|
13
15
|
ListToolDefsResponse,
|
|
14
16
|
ToolDef,
|
|
15
17
|
ToolGroup,
|
|
18
|
+
ToolGroupsProtocolPrivate,
|
|
16
19
|
ToolInvocationResult,
|
|
17
20
|
ToolRuntime,
|
|
18
21
|
)
|
|
19
|
-
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
20
|
-
from llama_stack.models.llama.datatypes import BuiltinTool
|
|
21
|
-
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
|
|
22
22
|
|
|
23
23
|
from .config import BraveSearchToolConfig
|
|
24
24
|
|
|
@@ -48,7 +48,10 @@ class BraveSearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsRe
|
|
|
48
48
|
return provider_data.brave_search_api_key
|
|
49
49
|
|
|
50
50
|
async def list_runtime_tools(
|
|
51
|
-
self,
|
|
51
|
+
self,
|
|
52
|
+
tool_group_id: str | None = None,
|
|
53
|
+
mcp_endpoint: URL | None = None,
|
|
54
|
+
authorization: str | None = None,
|
|
52
55
|
) -> ListToolDefsResponse:
|
|
53
56
|
return ListToolDefsResponse(
|
|
54
57
|
data=[
|
|
@@ -70,7 +73,9 @@ class BraveSearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsRe
|
|
|
70
73
|
]
|
|
71
74
|
)
|
|
72
75
|
|
|
73
|
-
async def invoke_tool(
|
|
76
|
+
async def invoke_tool(
|
|
77
|
+
self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
|
|
78
|
+
) -> ToolInvocationResult:
|
|
74
79
|
api_key = self._get_api_key()
|
|
75
80
|
url = "https://api.search.brave.com/res/v1/web/search"
|
|
76
81
|
headers = {
|
|
@@ -10,8 +10,14 @@ from pydantic import BaseModel
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class MCPProviderDataValidator(BaseModel):
|
|
13
|
-
|
|
14
|
-
|
|
13
|
+
"""
|
|
14
|
+
Validator for MCP provider-specific data passed via request headers.
|
|
15
|
+
|
|
16
|
+
Phase 1: Support old header-based authentication for backward compatibility.
|
|
17
|
+
In Phase 2, this will be deprecated in favor of the authorization parameter.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
mcp_headers: dict[str, dict[str, str]] | None = None # Map of URI -> headers dict
|
|
15
21
|
|
|
16
22
|
|
|
17
23
|
class MCPProviderConfig(BaseModel):
|
|
@@ -7,18 +7,18 @@
|
|
|
7
7
|
from typing import Any
|
|
8
8
|
from urllib.parse import urlparse
|
|
9
9
|
|
|
10
|
-
from llama_stack.
|
|
11
|
-
from llama_stack.
|
|
12
|
-
from llama_stack.
|
|
10
|
+
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
11
|
+
from llama_stack.log import get_logger
|
|
12
|
+
from llama_stack.providers.utils.tools.mcp import invoke_mcp_tool, list_mcp_tools
|
|
13
|
+
from llama_stack_api import (
|
|
14
|
+
URL,
|
|
15
|
+
Api,
|
|
13
16
|
ListToolDefsResponse,
|
|
14
17
|
ToolGroup,
|
|
18
|
+
ToolGroupsProtocolPrivate,
|
|
15
19
|
ToolInvocationResult,
|
|
16
20
|
ToolRuntime,
|
|
17
21
|
)
|
|
18
|
-
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
19
|
-
from llama_stack.log import get_logger
|
|
20
|
-
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
|
|
21
|
-
from llama_stack.providers.utils.tools.mcp import invoke_mcp_tool, list_mcp_tools
|
|
22
22
|
|
|
23
23
|
from .config import MCPProviderConfig
|
|
24
24
|
|
|
@@ -39,15 +39,23 @@ class ModelContextProtocolToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime
|
|
|
39
39
|
return
|
|
40
40
|
|
|
41
41
|
async def list_runtime_tools(
|
|
42
|
-
self,
|
|
42
|
+
self,
|
|
43
|
+
tool_group_id: str | None = None,
|
|
44
|
+
mcp_endpoint: URL | None = None,
|
|
45
|
+
authorization: str | None = None,
|
|
43
46
|
) -> ListToolDefsResponse:
|
|
44
47
|
# this endpoint should be retrieved by getting the tool group right?
|
|
45
48
|
if mcp_endpoint is None:
|
|
46
49
|
raise ValueError("mcp_endpoint is required")
|
|
47
|
-
headers = await self.get_headers_from_request(mcp_endpoint.uri)
|
|
48
|
-
return await list_mcp_tools(mcp_endpoint.uri, headers)
|
|
49
50
|
|
|
50
|
-
|
|
51
|
+
# Get other headers from provider data (but NOT authorization)
|
|
52
|
+
provider_headers = await self.get_headers_from_request(mcp_endpoint.uri)
|
|
53
|
+
|
|
54
|
+
return await list_mcp_tools(endpoint=mcp_endpoint.uri, headers=provider_headers, authorization=authorization)
|
|
55
|
+
|
|
56
|
+
async def invoke_tool(
|
|
57
|
+
self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
|
|
58
|
+
) -> ToolInvocationResult:
|
|
51
59
|
tool = await self.tool_store.get_tool(tool_name)
|
|
52
60
|
if tool.metadata is None or tool.metadata.get("endpoint") is None:
|
|
53
61
|
raise ValueError(f"Tool {tool_name} does not have metadata")
|
|
@@ -55,19 +63,53 @@ class ModelContextProtocolToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime
|
|
|
55
63
|
if urlparse(endpoint).scheme not in ("http", "https"):
|
|
56
64
|
raise ValueError(f"Endpoint {endpoint} is not a valid HTTP(S) URL")
|
|
57
65
|
|
|
58
|
-
headers
|
|
59
|
-
|
|
66
|
+
# Get other headers from provider data (but NOT authorization)
|
|
67
|
+
provider_headers = await self.get_headers_from_request(endpoint)
|
|
68
|
+
|
|
69
|
+
return await invoke_mcp_tool(
|
|
70
|
+
endpoint=endpoint,
|
|
71
|
+
tool_name=tool_name,
|
|
72
|
+
kwargs=kwargs,
|
|
73
|
+
headers=provider_headers,
|
|
74
|
+
authorization=authorization,
|
|
75
|
+
)
|
|
60
76
|
|
|
61
77
|
async def get_headers_from_request(self, mcp_endpoint_uri: str) -> dict[str, str]:
|
|
78
|
+
"""
|
|
79
|
+
Extract headers from request provider data, excluding authorization.
|
|
80
|
+
|
|
81
|
+
Authorization must be provided via the dedicated authorization parameter.
|
|
82
|
+
If Authorization is found in mcp_headers, raise an error to guide users to the correct approach.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
mcp_endpoint_uri: The MCP endpoint URI to match against provider data
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
dict[str, str]: Headers dictionary (without Authorization)
|
|
89
|
+
|
|
90
|
+
Raises:
|
|
91
|
+
ValueError: If Authorization header is found in mcp_headers
|
|
92
|
+
"""
|
|
93
|
+
|
|
62
94
|
def canonicalize_uri(uri: str) -> str:
|
|
63
95
|
return f"{urlparse(uri).netloc or ''}/{urlparse(uri).path or ''}"
|
|
64
96
|
|
|
65
97
|
headers = {}
|
|
66
98
|
|
|
67
99
|
provider_data = self.get_request_provider_data()
|
|
68
|
-
if provider_data and provider_data.mcp_headers:
|
|
100
|
+
if provider_data and hasattr(provider_data, "mcp_headers") and provider_data.mcp_headers:
|
|
69
101
|
for uri, values in provider_data.mcp_headers.items():
|
|
70
102
|
if canonicalize_uri(uri) != canonicalize_uri(mcp_endpoint_uri):
|
|
71
103
|
continue
|
|
72
|
-
|
|
104
|
+
|
|
105
|
+
# Reject Authorization in mcp_headers - must use authorization parameter
|
|
106
|
+
for key in values.keys():
|
|
107
|
+
if key.lower() == "authorization":
|
|
108
|
+
raise ValueError(
|
|
109
|
+
"Authorization cannot be provided via mcp_headers in provider_data. "
|
|
110
|
+
"Please use the dedicated 'authorization' parameter instead. "
|
|
111
|
+
"Example: tool_runtime.invoke_tool(..., authorization='your-token')"
|
|
112
|
+
)
|
|
113
|
+
headers[key] = values[key]
|
|
114
|
+
|
|
73
115
|
return headers
|
|
@@ -9,16 +9,16 @@ from typing import Any
|
|
|
9
9
|
|
|
10
10
|
import httpx
|
|
11
11
|
|
|
12
|
-
from llama_stack.
|
|
13
|
-
from
|
|
12
|
+
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
13
|
+
from llama_stack_api import (
|
|
14
|
+
URL,
|
|
14
15
|
ListToolDefsResponse,
|
|
15
16
|
ToolDef,
|
|
16
17
|
ToolGroup,
|
|
18
|
+
ToolGroupsProtocolPrivate,
|
|
17
19
|
ToolInvocationResult,
|
|
18
20
|
ToolRuntime,
|
|
19
21
|
)
|
|
20
|
-
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
21
|
-
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
|
|
22
22
|
|
|
23
23
|
from .config import TavilySearchToolConfig
|
|
24
24
|
|
|
@@ -48,7 +48,10 @@ class TavilySearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR
|
|
|
48
48
|
return provider_data.tavily_search_api_key
|
|
49
49
|
|
|
50
50
|
async def list_runtime_tools(
|
|
51
|
-
self,
|
|
51
|
+
self,
|
|
52
|
+
tool_group_id: str | None = None,
|
|
53
|
+
mcp_endpoint: URL | None = None,
|
|
54
|
+
authorization: str | None = None,
|
|
52
55
|
) -> ListToolDefsResponse:
|
|
53
56
|
return ListToolDefsResponse(
|
|
54
57
|
data=[
|
|
@@ -69,7 +72,9 @@ class TavilySearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR
|
|
|
69
72
|
]
|
|
70
73
|
)
|
|
71
74
|
|
|
72
|
-
async def invoke_tool(
|
|
75
|
+
async def invoke_tool(
|
|
76
|
+
self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
|
|
77
|
+
) -> ToolInvocationResult:
|
|
73
78
|
api_key = self._get_api_key()
|
|
74
79
|
async with httpx.AsyncClient() as client:
|
|
75
80
|
response = await client.post(
|
|
@@ -9,16 +9,16 @@ from typing import Any
|
|
|
9
9
|
|
|
10
10
|
import httpx
|
|
11
11
|
|
|
12
|
-
from llama_stack.
|
|
13
|
-
from
|
|
12
|
+
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
13
|
+
from llama_stack_api import (
|
|
14
|
+
URL,
|
|
14
15
|
ListToolDefsResponse,
|
|
15
16
|
ToolDef,
|
|
16
17
|
ToolGroup,
|
|
18
|
+
ToolGroupsProtocolPrivate,
|
|
17
19
|
ToolInvocationResult,
|
|
18
20
|
ToolRuntime,
|
|
19
21
|
)
|
|
20
|
-
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
21
|
-
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
|
|
22
22
|
|
|
23
23
|
from .config import WolframAlphaToolConfig
|
|
24
24
|
|
|
@@ -49,7 +49,10 @@ class WolframAlphaToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR
|
|
|
49
49
|
return provider_data.wolfram_alpha_api_key
|
|
50
50
|
|
|
51
51
|
async def list_runtime_tools(
|
|
52
|
-
self,
|
|
52
|
+
self,
|
|
53
|
+
tool_group_id: str | None = None,
|
|
54
|
+
mcp_endpoint: URL | None = None,
|
|
55
|
+
authorization: str | None = None,
|
|
53
56
|
) -> ListToolDefsResponse:
|
|
54
57
|
return ListToolDefsResponse(
|
|
55
58
|
data=[
|
|
@@ -70,7 +73,9 @@ class WolframAlphaToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR
|
|
|
70
73
|
]
|
|
71
74
|
)
|
|
72
75
|
|
|
73
|
-
async def invoke_tool(
|
|
76
|
+
async def invoke_tool(
|
|
77
|
+
self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
|
|
78
|
+
) -> ToolInvocationResult:
|
|
74
79
|
api_key = self._get_api_key()
|
|
75
80
|
params = {
|
|
76
81
|
"input": kwargs["query"],
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
from
|
|
7
|
+
from llama_stack_api import Api, ProviderSpec
|
|
8
8
|
|
|
9
9
|
from .config import ChromaVectorIOConfig
|
|
10
10
|
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
import asyncio
|
|
7
|
+
import heapq
|
|
7
8
|
import json
|
|
8
9
|
from typing import Any
|
|
9
10
|
from urllib.parse import urlparse
|
|
@@ -11,17 +12,24 @@ from urllib.parse import urlparse
|
|
|
11
12
|
import chromadb
|
|
12
13
|
from numpy.typing import NDArray
|
|
13
14
|
|
|
14
|
-
from llama_stack.
|
|
15
|
-
from llama_stack.apis.inference import Inference, InterleavedContent
|
|
16
|
-
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
|
|
17
|
-
from llama_stack.apis.vector_stores import VectorStore
|
|
15
|
+
from llama_stack.core.storage.kvstore import kvstore_impl
|
|
18
16
|
from llama_stack.log import get_logger
|
|
19
|
-
from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
|
|
20
17
|
from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
|
|
21
|
-
from llama_stack.providers.utils.kvstore import kvstore_impl
|
|
22
|
-
from llama_stack.providers.utils.kvstore.api import KVStore
|
|
23
18
|
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
|
24
19
|
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
|
|
20
|
+
from llama_stack.providers.utils.vector_io import load_embedded_chunk_with_backward_compat
|
|
21
|
+
from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator
|
|
22
|
+
from llama_stack_api import (
|
|
23
|
+
EmbeddedChunk,
|
|
24
|
+
Files,
|
|
25
|
+
Inference,
|
|
26
|
+
InterleavedContent,
|
|
27
|
+
QueryChunksResponse,
|
|
28
|
+
VectorIO,
|
|
29
|
+
VectorStore,
|
|
30
|
+
VectorStoresProtocolPrivate,
|
|
31
|
+
)
|
|
32
|
+
from llama_stack_api.internal.kvstore import KVStore
|
|
25
33
|
|
|
26
34
|
from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig
|
|
27
35
|
|
|
@@ -53,10 +61,12 @@ class ChromaIndex(EmbeddingIndex):
|
|
|
53
61
|
async def initialize(self):
|
|
54
62
|
pass
|
|
55
63
|
|
|
56
|
-
async def add_chunks(self, chunks: list[
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
64
|
+
async def add_chunks(self, chunks: list[EmbeddedChunk]):
|
|
65
|
+
if not chunks:
|
|
66
|
+
return
|
|
67
|
+
|
|
68
|
+
# Extract embeddings directly from chunks (already list[float])
|
|
69
|
+
embeddings = [chunk.embedding for chunk in chunks]
|
|
60
70
|
|
|
61
71
|
ids = [f"{c.metadata.get('document_id', '')}:{c.chunk_id}" for c in chunks]
|
|
62
72
|
await maybe_await(
|
|
@@ -77,7 +87,7 @@ class ChromaIndex(EmbeddingIndex):
|
|
|
77
87
|
for dist, doc in zip(distances, documents, strict=False):
|
|
78
88
|
try:
|
|
79
89
|
doc = json.loads(doc)
|
|
80
|
-
chunk =
|
|
90
|
+
chunk = load_embedded_chunk_with_backward_compat(doc)
|
|
81
91
|
except Exception:
|
|
82
92
|
log.exception(f"Failed to parse document: {doc}")
|
|
83
93
|
continue
|
|
@@ -94,8 +104,55 @@ class ChromaIndex(EmbeddingIndex):
|
|
|
94
104
|
async def delete(self):
|
|
95
105
|
await maybe_await(self.client.delete_collection(self.collection.name))
|
|
96
106
|
|
|
97
|
-
async def query_keyword(
|
|
98
|
-
|
|
107
|
+
async def query_keyword(
|
|
108
|
+
self,
|
|
109
|
+
query_string: str,
|
|
110
|
+
k: int,
|
|
111
|
+
score_threshold: float,
|
|
112
|
+
) -> QueryChunksResponse:
|
|
113
|
+
"""
|
|
114
|
+
Perform keyword search using Chroma's built-in where_document feature.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
query_string: The text query for keyword search
|
|
118
|
+
k: Number of results to return
|
|
119
|
+
score_threshold: Minimum similarity score threshold
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
QueryChunksResponse with combined results
|
|
123
|
+
"""
|
|
124
|
+
try:
|
|
125
|
+
results = await maybe_await(
|
|
126
|
+
self.collection.query(
|
|
127
|
+
query_texts=[query_string],
|
|
128
|
+
where_document={"$contains": query_string},
|
|
129
|
+
n_results=k,
|
|
130
|
+
include=["documents", "distances"],
|
|
131
|
+
)
|
|
132
|
+
)
|
|
133
|
+
except Exception as e:
|
|
134
|
+
log.error(f"Chroma client keyword search failed: {e}")
|
|
135
|
+
raise
|
|
136
|
+
|
|
137
|
+
distances = results["distances"][0] if results["distances"] else []
|
|
138
|
+
documents = results["documents"][0] if results["documents"] else []
|
|
139
|
+
|
|
140
|
+
chunks = []
|
|
141
|
+
scores = []
|
|
142
|
+
|
|
143
|
+
for dist, doc in zip(distances, documents, strict=False):
|
|
144
|
+
doc_data = json.loads(doc)
|
|
145
|
+
chunk = load_embedded_chunk_with_backward_compat(doc_data)
|
|
146
|
+
|
|
147
|
+
score = 1.0 / (1.0 + float(dist)) if dist is not None else 1.0
|
|
148
|
+
|
|
149
|
+
if score < score_threshold:
|
|
150
|
+
continue
|
|
151
|
+
|
|
152
|
+
chunks.append(chunk)
|
|
153
|
+
scores.append(score)
|
|
154
|
+
|
|
155
|
+
return QueryChunksResponse(chunks=chunks, scores=scores)
|
|
99
156
|
|
|
100
157
|
async def delete_chunks(self, chunks_for_deletion: list[ChunkForDeletion]) -> None:
|
|
101
158
|
"""Delete a single chunk from the Chroma collection by its ID."""
|
|
@@ -111,7 +168,57 @@ class ChromaIndex(EmbeddingIndex):
|
|
|
111
168
|
reranker_type: str,
|
|
112
169
|
reranker_params: dict[str, Any] | None = None,
|
|
113
170
|
) -> QueryChunksResponse:
|
|
114
|
-
|
|
171
|
+
"""
|
|
172
|
+
Hybrid search combining vector similarity and keyword search using configurable reranking.
|
|
173
|
+
Args:
|
|
174
|
+
embedding: The query embedding vector
|
|
175
|
+
query_string: The text query for keyword search
|
|
176
|
+
k: Number of results to return
|
|
177
|
+
score_threshold: Minimum similarity score threshold
|
|
178
|
+
reranker_type: Type of reranker to use ("rrf" or "weighted")
|
|
179
|
+
reranker_params: Parameters for the reranker
|
|
180
|
+
Returns:
|
|
181
|
+
QueryChunksResponse with combined results
|
|
182
|
+
"""
|
|
183
|
+
if reranker_params is None:
|
|
184
|
+
reranker_params = {}
|
|
185
|
+
|
|
186
|
+
# Get results from both search methods
|
|
187
|
+
vector_response = await self.query_vector(embedding, k, score_threshold)
|
|
188
|
+
keyword_response = await self.query_keyword(query_string, k, score_threshold)
|
|
189
|
+
|
|
190
|
+
# Convert responses to score dictionaries using chunk_id
|
|
191
|
+
vector_scores = {
|
|
192
|
+
chunk.chunk_id: score for chunk, score in zip(vector_response.chunks, vector_response.scores, strict=False)
|
|
193
|
+
}
|
|
194
|
+
keyword_scores = {
|
|
195
|
+
chunk.chunk_id: score
|
|
196
|
+
for chunk, score in zip(keyword_response.chunks, keyword_response.scores, strict=False)
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
# Combine scores using the reranking utility
|
|
200
|
+
combined_scores = WeightedInMemoryAggregator.combine_search_results(
|
|
201
|
+
vector_scores, keyword_scores, reranker_type, reranker_params
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
# Efficient top-k selection because it only tracks the k best candidates it's seen so far
|
|
205
|
+
top_k_items = heapq.nlargest(k, combined_scores.items(), key=lambda x: x[1])
|
|
206
|
+
|
|
207
|
+
# Filter by score threshold
|
|
208
|
+
filtered_items = [(doc_id, score) for doc_id, score in top_k_items if score >= score_threshold]
|
|
209
|
+
|
|
210
|
+
# Create a map of chunk_id to chunk for both responses
|
|
211
|
+
chunk_map = {c.chunk_id: c for c in vector_response.chunks + keyword_response.chunks}
|
|
212
|
+
|
|
213
|
+
# Use the map to look up chunks by their IDs
|
|
214
|
+
chunks = []
|
|
215
|
+
scores = []
|
|
216
|
+
for doc_id, score in filtered_items:
|
|
217
|
+
if doc_id in chunk_map:
|
|
218
|
+
chunks.append(chunk_map[doc_id])
|
|
219
|
+
scores.append(score)
|
|
220
|
+
|
|
221
|
+
return QueryChunksResponse(chunks=chunks, scores=scores)
|
|
115
222
|
|
|
116
223
|
|
|
117
224
|
class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
|
|
@@ -121,10 +228,9 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
|
|
|
121
228
|
inference_api: Inference,
|
|
122
229
|
files_api: Files | None,
|
|
123
230
|
) -> None:
|
|
124
|
-
super().__init__(files_api=files_api, kvstore=None)
|
|
231
|
+
super().__init__(inference_api=inference_api, files_api=files_api, kvstore=None)
|
|
125
232
|
log.info(f"Initializing ChromaVectorIOAdapter with url: {config}")
|
|
126
233
|
self.config = config
|
|
127
|
-
self.inference_api = inference_api
|
|
128
234
|
self.client = None
|
|
129
235
|
self.cache = {}
|
|
130
236
|
self.vector_store_table = None
|
|
@@ -168,20 +274,22 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
|
|
|
168
274
|
await self.cache[vector_store_id].index.delete()
|
|
169
275
|
del self.cache[vector_store_id]
|
|
170
276
|
|
|
171
|
-
async def insert_chunks(
|
|
172
|
-
|
|
277
|
+
async def insert_chunks(
|
|
278
|
+
self, vector_store_id: str, chunks: list[EmbeddedChunk], ttl_seconds: int | None = None
|
|
279
|
+
) -> None:
|
|
280
|
+
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
|
173
281
|
if index is None:
|
|
174
|
-
raise ValueError(f"Vector DB {
|
|
282
|
+
raise ValueError(f"Vector DB {vector_store_id} not found in Chroma")
|
|
175
283
|
|
|
176
284
|
await index.insert_chunks(chunks)
|
|
177
285
|
|
|
178
286
|
async def query_chunks(
|
|
179
|
-
self,
|
|
287
|
+
self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
|
|
180
288
|
) -> QueryChunksResponse:
|
|
181
|
-
index = await self._get_and_cache_vector_store_index(
|
|
289
|
+
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
|
182
290
|
|
|
183
291
|
if index is None:
|
|
184
|
-
raise ValueError(f"Vector DB {
|
|
292
|
+
raise ValueError(f"Vector DB {vector_store_id} not found in Chroma")
|
|
185
293
|
|
|
186
294
|
return await index.query_chunks(query, params)
|
|
187
295
|
|
|
@@ -9,7 +9,7 @@ from typing import Any
|
|
|
9
9
|
from pydantic import BaseModel, Field
|
|
10
10
|
|
|
11
11
|
from llama_stack.core.storage.datatypes import KVStoreReference
|
|
12
|
-
from
|
|
12
|
+
from llama_stack_api import json_schema_type
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
@json_schema_type
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
from
|
|
7
|
+
from llama_stack_api import Api, ProviderSpec
|
|
8
8
|
|
|
9
9
|
from .config import MilvusVectorIOConfig
|
|
10
10
|
|
|
@@ -9,7 +9,7 @@ from typing import Any
|
|
|
9
9
|
from pydantic import BaseModel, ConfigDict, Field
|
|
10
10
|
|
|
11
11
|
from llama_stack.core.storage.datatypes import KVStoreReference
|
|
12
|
-
from
|
|
12
|
+
from llama_stack_api import json_schema_type
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
@json_schema_type
|