llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +235 -62
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +46 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +44 -33
- llama_stack/providers/registry/agents.py +8 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +131 -23
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +37 -28
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +37 -25
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +147 -30
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +31 -26
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +7 -8
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/providers/utils/vector_io/__init__.py +16 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +36 -0
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/METADATA +57 -55
- llama_stack-0.4.1.dist-info/RECORD +588 -0
- llama_stack-0.4.1.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.5.dist-info/RECORD +0 -625
- llama_stack-0.3.5.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
|
@@ -11,16 +11,9 @@ from typing import Any
|
|
|
11
11
|
from numpy.typing import NDArray
|
|
12
12
|
from pymilvus import AnnSearchRequest, DataType, Function, FunctionType, MilvusClient, RRFRanker, WeightedRanker
|
|
13
13
|
|
|
14
|
-
from llama_stack.
|
|
15
|
-
from llama_stack.apis.files import Files
|
|
16
|
-
from llama_stack.apis.inference import Inference, InterleavedContent
|
|
17
|
-
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
|
|
18
|
-
from llama_stack.apis.vector_stores import VectorStore
|
|
14
|
+
from llama_stack.core.storage.kvstore import kvstore_impl
|
|
19
15
|
from llama_stack.log import get_logger
|
|
20
|
-
from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
|
|
21
16
|
from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig
|
|
22
|
-
from llama_stack.providers.utils.kvstore import kvstore_impl
|
|
23
|
-
from llama_stack.providers.utils.kvstore.api import KVStore
|
|
24
17
|
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
|
25
18
|
from llama_stack.providers.utils.memory.vector_store import (
|
|
26
19
|
RERANKER_TYPE_WEIGHTED,
|
|
@@ -28,12 +21,28 @@ from llama_stack.providers.utils.memory.vector_store import (
|
|
|
28
21
|
EmbeddingIndex,
|
|
29
22
|
VectorStoreWithIndex,
|
|
30
23
|
)
|
|
31
|
-
from llama_stack.providers.utils.vector_io.vector_utils import
|
|
24
|
+
from llama_stack.providers.utils.vector_io.vector_utils import (
|
|
25
|
+
load_embedded_chunk_with_backward_compat,
|
|
26
|
+
sanitize_collection_name,
|
|
27
|
+
)
|
|
28
|
+
from llama_stack_api import (
|
|
29
|
+
EmbeddedChunk,
|
|
30
|
+
Files,
|
|
31
|
+
Inference,
|
|
32
|
+
InterleavedContent,
|
|
33
|
+
QueryChunksResponse,
|
|
34
|
+
VectorIO,
|
|
35
|
+
VectorStore,
|
|
36
|
+
VectorStoreNotFoundError,
|
|
37
|
+
VectorStoresProtocolPrivate,
|
|
38
|
+
)
|
|
39
|
+
from llama_stack_api.internal.kvstore import KVStore
|
|
32
40
|
|
|
33
41
|
from .config import MilvusVectorIOConfig as RemoteMilvusVectorIOConfig
|
|
34
42
|
|
|
35
43
|
logger = get_logger(name=__name__, category="vector_io::milvus")
|
|
36
44
|
|
|
45
|
+
|
|
37
46
|
VERSION = "v3"
|
|
38
47
|
VECTOR_DBS_PREFIX = f"vector_stores:milvus:{VERSION}::"
|
|
39
48
|
VECTOR_INDEX_PREFIX = f"vector_index:milvus:{VERSION}::"
|
|
@@ -60,10 +69,9 @@ class MilvusIndex(EmbeddingIndex):
|
|
|
60
69
|
if await asyncio.to_thread(self.client.has_collection, self.collection_name):
|
|
61
70
|
await asyncio.to_thread(self.client.drop_collection, collection_name=self.collection_name)
|
|
62
71
|
|
|
63
|
-
async def add_chunks(self, chunks: list[
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
)
|
|
72
|
+
async def add_chunks(self, chunks: list[EmbeddedChunk]):
|
|
73
|
+
if not chunks:
|
|
74
|
+
return
|
|
67
75
|
|
|
68
76
|
if not await asyncio.to_thread(self.client.has_collection, self.collection_name):
|
|
69
77
|
logger.info(f"Creating new collection {self.collection_name} with nullable sparse field")
|
|
@@ -76,7 +84,7 @@ class MilvusIndex(EmbeddingIndex):
|
|
|
76
84
|
max_length=65535,
|
|
77
85
|
enable_analyzer=True, # Enable text analysis for BM25
|
|
78
86
|
)
|
|
79
|
-
schema.add_field(field_name="vector", datatype=DataType.FLOAT_VECTOR, dim=len(
|
|
87
|
+
schema.add_field(field_name="vector", datatype=DataType.FLOAT_VECTOR, dim=len(chunks[0].embedding))
|
|
80
88
|
schema.add_field(field_name="chunk_content", datatype=DataType.JSON)
|
|
81
89
|
# Add sparse vector field for BM25 (required by the function)
|
|
82
90
|
schema.add_field(field_name="sparse", datatype=DataType.SPARSE_FLOAT_VECTOR)
|
|
@@ -105,12 +113,12 @@ class MilvusIndex(EmbeddingIndex):
|
|
|
105
113
|
)
|
|
106
114
|
|
|
107
115
|
data = []
|
|
108
|
-
for chunk
|
|
116
|
+
for chunk in chunks:
|
|
109
117
|
data.append(
|
|
110
118
|
{
|
|
111
119
|
"chunk_id": chunk.chunk_id,
|
|
112
120
|
"content": chunk.content,
|
|
113
|
-
"vector": embedding,
|
|
121
|
+
"vector": chunk.embedding, # Already a list[float]
|
|
114
122
|
"chunk_content": chunk.model_dump(),
|
|
115
123
|
# sparse field will be handled by BM25 function automatically
|
|
116
124
|
}
|
|
@@ -131,7 +139,7 @@ class MilvusIndex(EmbeddingIndex):
|
|
|
131
139
|
output_fields=["*"],
|
|
132
140
|
search_params={"params": {"radius": score_threshold}},
|
|
133
141
|
)
|
|
134
|
-
chunks = [
|
|
142
|
+
chunks = [load_embedded_chunk_with_backward_compat(res["entity"]["chunk_content"]) for res in search_res[0]]
|
|
135
143
|
scores = [res["distance"] for res in search_res[0]]
|
|
136
144
|
return QueryChunksResponse(chunks=chunks, scores=scores)
|
|
137
145
|
|
|
@@ -158,7 +166,7 @@ class MilvusIndex(EmbeddingIndex):
|
|
|
158
166
|
chunks = []
|
|
159
167
|
scores = []
|
|
160
168
|
for res in search_res[0]:
|
|
161
|
-
chunk =
|
|
169
|
+
chunk = load_embedded_chunk_with_backward_compat(res["entity"]["chunk_content"])
|
|
162
170
|
chunks.append(chunk)
|
|
163
171
|
scores.append(res["distance"]) # BM25 score from Milvus
|
|
164
172
|
|
|
@@ -186,7 +194,7 @@ class MilvusIndex(EmbeddingIndex):
|
|
|
186
194
|
output_fields=["*"],
|
|
187
195
|
limit=k,
|
|
188
196
|
)
|
|
189
|
-
chunks = [
|
|
197
|
+
chunks = [load_embedded_chunk_with_backward_compat(res["chunk_content"]) for res in search_res]
|
|
190
198
|
scores = [1.0] * len(chunks) # Simple binary score for text search
|
|
191
199
|
return QueryChunksResponse(chunks=chunks, scores=scores)
|
|
192
200
|
|
|
@@ -238,7 +246,7 @@ class MilvusIndex(EmbeddingIndex):
|
|
|
238
246
|
chunks = []
|
|
239
247
|
scores = []
|
|
240
248
|
for res in search_res[0]:
|
|
241
|
-
chunk =
|
|
249
|
+
chunk = load_embedded_chunk_with_backward_compat(res["entity"]["chunk_content"])
|
|
242
250
|
chunks.append(chunk)
|
|
243
251
|
scores.append(res["distance"])
|
|
244
252
|
|
|
@@ -268,11 +276,10 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
|
|
|
268
276
|
inference_api: Inference,
|
|
269
277
|
files_api: Files | None,
|
|
270
278
|
) -> None:
|
|
271
|
-
super().__init__(files_api=files_api, kvstore=None)
|
|
279
|
+
super().__init__(inference_api=inference_api, files_api=files_api, kvstore=None)
|
|
272
280
|
self.config = config
|
|
273
281
|
self.cache = {}
|
|
274
282
|
self.client = None
|
|
275
|
-
self.inference_api = inference_api
|
|
276
283
|
self.vector_store_table = None
|
|
277
284
|
self.metadata_collection_name = "openai_vector_stores_metadata"
|
|
278
285
|
|
|
@@ -351,19 +358,21 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
|
|
|
351
358
|
await self.cache[vector_store_id].index.delete()
|
|
352
359
|
del self.cache[vector_store_id]
|
|
353
360
|
|
|
354
|
-
async def insert_chunks(
|
|
355
|
-
|
|
361
|
+
async def insert_chunks(
|
|
362
|
+
self, vector_store_id: str, chunks: list[EmbeddedChunk], ttl_seconds: int | None = None
|
|
363
|
+
) -> None:
|
|
364
|
+
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
|
356
365
|
if not index:
|
|
357
|
-
raise VectorStoreNotFoundError(
|
|
366
|
+
raise VectorStoreNotFoundError(vector_store_id)
|
|
358
367
|
|
|
359
368
|
await index.insert_chunks(chunks)
|
|
360
369
|
|
|
361
370
|
async def query_chunks(
|
|
362
|
-
self,
|
|
371
|
+
self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
|
|
363
372
|
) -> QueryChunksResponse:
|
|
364
|
-
index = await self._get_and_cache_vector_store_index(
|
|
373
|
+
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
|
365
374
|
if not index:
|
|
366
|
-
raise VectorStoreNotFoundError(
|
|
375
|
+
raise VectorStoreNotFoundError(vector_store_id)
|
|
367
376
|
return await index.query_chunks(query, params)
|
|
368
377
|
|
|
369
378
|
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
from
|
|
7
|
+
from llama_stack_api import Api, ProviderSpec
|
|
8
8
|
|
|
9
9
|
from .config import PGVectorVectorIOConfig
|
|
10
10
|
|
|
@@ -9,7 +9,7 @@ from typing import Any
|
|
|
9
9
|
from pydantic import BaseModel, Field
|
|
10
10
|
|
|
11
11
|
from llama_stack.core.storage.datatypes import KVStoreReference
|
|
12
|
-
from
|
|
12
|
+
from llama_stack_api import json_schema_type
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
@json_schema_type
|
|
@@ -13,19 +13,28 @@ from psycopg2 import sql
|
|
|
13
13
|
from psycopg2.extras import Json, execute_values
|
|
14
14
|
from pydantic import BaseModel, TypeAdapter
|
|
15
15
|
|
|
16
|
-
from llama_stack.
|
|
17
|
-
from llama_stack.apis.files import Files
|
|
18
|
-
from llama_stack.apis.inference import Inference, InterleavedContent
|
|
19
|
-
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
|
|
20
|
-
from llama_stack.apis.vector_stores import VectorStore
|
|
16
|
+
from llama_stack.core.storage.kvstore import kvstore_impl
|
|
21
17
|
from llama_stack.log import get_logger
|
|
22
|
-
from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
|
|
23
18
|
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
|
|
24
|
-
from llama_stack.providers.utils.kvstore import kvstore_impl
|
|
25
|
-
from llama_stack.providers.utils.kvstore.api import KVStore
|
|
26
19
|
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
|
27
20
|
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
|
|
28
|
-
from llama_stack.providers.utils.vector_io.vector_utils import
|
|
21
|
+
from llama_stack.providers.utils.vector_io.vector_utils import (
|
|
22
|
+
WeightedInMemoryAggregator,
|
|
23
|
+
load_embedded_chunk_with_backward_compat,
|
|
24
|
+
sanitize_collection_name,
|
|
25
|
+
)
|
|
26
|
+
from llama_stack_api import (
|
|
27
|
+
EmbeddedChunk,
|
|
28
|
+
Files,
|
|
29
|
+
Inference,
|
|
30
|
+
InterleavedContent,
|
|
31
|
+
QueryChunksResponse,
|
|
32
|
+
VectorIO,
|
|
33
|
+
VectorStore,
|
|
34
|
+
VectorStoreNotFoundError,
|
|
35
|
+
VectorStoresProtocolPrivate,
|
|
36
|
+
)
|
|
37
|
+
from llama_stack_api.internal.kvstore import KVStore
|
|
29
38
|
|
|
30
39
|
from .config import PGVectorVectorIOConfig
|
|
31
40
|
|
|
@@ -125,19 +134,18 @@ class PGVectorIndex(EmbeddingIndex):
|
|
|
125
134
|
log.exception(f"Error creating PGVectorIndex for vector_store: {self.vector_store.identifier}")
|
|
126
135
|
raise RuntimeError(f"Error creating PGVectorIndex for vector_store: {self.vector_store.identifier}") from e
|
|
127
136
|
|
|
128
|
-
async def add_chunks(self, chunks: list[
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
)
|
|
137
|
+
async def add_chunks(self, chunks: list[EmbeddedChunk]):
|
|
138
|
+
if not chunks:
|
|
139
|
+
return
|
|
132
140
|
|
|
133
141
|
values = []
|
|
134
|
-
for
|
|
142
|
+
for chunk in chunks:
|
|
135
143
|
content_text = interleaved_content_as_str(chunk.content)
|
|
136
144
|
values.append(
|
|
137
145
|
(
|
|
138
146
|
f"{chunk.chunk_id}",
|
|
139
147
|
Json(chunk.model_dump()),
|
|
140
|
-
|
|
148
|
+
chunk.embedding, # Already a list[float]
|
|
141
149
|
content_text,
|
|
142
150
|
content_text, # Pass content_text twice - once for content_text column, once for to_tsvector function. Eg. to_tsvector(content_text) = tokenized_content
|
|
143
151
|
)
|
|
@@ -189,7 +197,7 @@ class PGVectorIndex(EmbeddingIndex):
|
|
|
189
197
|
score = 1.0 / float(dist) if dist != 0 else float("inf")
|
|
190
198
|
if score < score_threshold:
|
|
191
199
|
continue
|
|
192
|
-
chunks.append(
|
|
200
|
+
chunks.append(load_embedded_chunk_with_backward_compat(doc))
|
|
193
201
|
scores.append(score)
|
|
194
202
|
|
|
195
203
|
return QueryChunksResponse(chunks=chunks, scores=scores)
|
|
@@ -225,7 +233,7 @@ class PGVectorIndex(EmbeddingIndex):
|
|
|
225
233
|
for doc, score in results:
|
|
226
234
|
if score < score_threshold:
|
|
227
235
|
continue
|
|
228
|
-
chunks.append(
|
|
236
|
+
chunks.append(load_embedded_chunk_with_backward_compat(doc))
|
|
229
237
|
scores.append(float(score))
|
|
230
238
|
|
|
231
239
|
return QueryChunksResponse(chunks=chunks, scores=scores)
|
|
@@ -301,7 +309,8 @@ class PGVectorIndex(EmbeddingIndex):
|
|
|
301
309
|
"""Remove a chunk from the PostgreSQL table."""
|
|
302
310
|
chunk_ids = [c.chunk_id for c in chunks_for_deletion]
|
|
303
311
|
with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
|
|
304
|
-
|
|
312
|
+
# Fix: Use proper tuple parameter binding with explicit array cast
|
|
313
|
+
cur.execute(f"DELETE FROM {self.table_name} WHERE id = ANY(%s::text[])", (chunk_ids,))
|
|
305
314
|
|
|
306
315
|
def get_pgvector_search_function(self) -> str:
|
|
307
316
|
return self.PGVECTOR_DISTANCE_METRIC_TO_SEARCH_FUNCTION[self.distance_metric]
|
|
@@ -327,16 +336,17 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProt
|
|
|
327
336
|
def __init__(
|
|
328
337
|
self, config: PGVectorVectorIOConfig, inference_api: Inference, files_api: Files | None = None
|
|
329
338
|
) -> None:
|
|
330
|
-
super().__init__(files_api=files_api, kvstore=None)
|
|
339
|
+
super().__init__(inference_api=inference_api, files_api=files_api, kvstore=None)
|
|
331
340
|
self.config = config
|
|
332
|
-
self.inference_api = inference_api
|
|
333
341
|
self.conn = None
|
|
334
342
|
self.cache = {}
|
|
335
343
|
self.vector_store_table = None
|
|
336
344
|
self.metadata_collection_name = "openai_vector_stores_metadata"
|
|
337
345
|
|
|
338
346
|
async def initialize(self) -> None:
|
|
339
|
-
|
|
347
|
+
# Create a safe config representation with masked password for logging
|
|
348
|
+
safe_config = {**self.config.model_dump(exclude={"password"}), "password": "******"}
|
|
349
|
+
log.info(f"Initializing PGVector memory adapter with config: {safe_config}")
|
|
340
350
|
self.kvstore = await kvstore_impl(self.config.persistence)
|
|
341
351
|
await self.initialize_openai_vector_stores()
|
|
342
352
|
|
|
@@ -422,14 +432,16 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProt
|
|
|
422
432
|
raise RuntimeError("KVStore not initialized. Call initialize() before unregistering vector stores.")
|
|
423
433
|
await self.kvstore.delete(key=f"{VECTOR_DBS_PREFIX}{vector_store_id}")
|
|
424
434
|
|
|
425
|
-
async def insert_chunks(
|
|
426
|
-
|
|
435
|
+
async def insert_chunks(
|
|
436
|
+
self, vector_store_id: str, chunks: list[EmbeddedChunk], ttl_seconds: int | None = None
|
|
437
|
+
) -> None:
|
|
438
|
+
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
|
427
439
|
await index.insert_chunks(chunks)
|
|
428
440
|
|
|
429
441
|
async def query_chunks(
|
|
430
|
-
self,
|
|
442
|
+
self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
|
|
431
443
|
) -> QueryChunksResponse:
|
|
432
|
-
index = await self._get_and_cache_vector_store_index(
|
|
444
|
+
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
|
433
445
|
return await index.query_chunks(query, params)
|
|
434
446
|
|
|
435
447
|
async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex:
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
from
|
|
7
|
+
from llama_stack_api import Api, ProviderSpec
|
|
8
8
|
|
|
9
9
|
from .config import QdrantVectorIOConfig
|
|
10
10
|
|
|
@@ -13,23 +13,25 @@ from numpy.typing import NDArray
|
|
|
13
13
|
from qdrant_client import AsyncQdrantClient, models
|
|
14
14
|
from qdrant_client.models import PointStruct
|
|
15
15
|
|
|
16
|
-
from llama_stack.
|
|
17
|
-
from llama_stack.
|
|
18
|
-
from llama_stack.
|
|
19
|
-
from llama_stack.
|
|
20
|
-
|
|
16
|
+
from llama_stack.core.storage.kvstore import kvstore_impl
|
|
17
|
+
from llama_stack.log import get_logger
|
|
18
|
+
from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
|
|
19
|
+
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
|
20
|
+
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
|
|
21
|
+
from llama_stack.providers.utils.vector_io.vector_utils import load_embedded_chunk_with_backward_compat
|
|
22
|
+
from llama_stack_api import (
|
|
23
|
+
EmbeddedChunk,
|
|
24
|
+
Files,
|
|
25
|
+
Inference,
|
|
26
|
+
InterleavedContent,
|
|
21
27
|
QueryChunksResponse,
|
|
22
28
|
VectorIO,
|
|
29
|
+
VectorStore,
|
|
23
30
|
VectorStoreChunkingStrategy,
|
|
24
31
|
VectorStoreFileObject,
|
|
32
|
+
VectorStoreNotFoundError,
|
|
33
|
+
VectorStoresProtocolPrivate,
|
|
25
34
|
)
|
|
26
|
-
from llama_stack.apis.vector_stores import VectorStore
|
|
27
|
-
from llama_stack.log import get_logger
|
|
28
|
-
from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
|
|
29
|
-
from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
|
|
30
|
-
from llama_stack.providers.utils.kvstore import kvstore_impl
|
|
31
|
-
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
|
32
|
-
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
|
|
33
35
|
|
|
34
36
|
from .config import QdrantVectorIOConfig as RemoteQdrantVectorIOConfig
|
|
35
37
|
|
|
@@ -65,24 +67,23 @@ class QdrantIndex(EmbeddingIndex):
|
|
|
65
67
|
# If the collection does not exist, it will be created in add_chunks.
|
|
66
68
|
pass
|
|
67
69
|
|
|
68
|
-
async def add_chunks(self, chunks: list[
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
)
|
|
70
|
+
async def add_chunks(self, chunks: list[EmbeddedChunk]):
|
|
71
|
+
if not chunks:
|
|
72
|
+
return
|
|
72
73
|
|
|
73
74
|
if not await self.client.collection_exists(self.collection_name):
|
|
74
75
|
await self.client.create_collection(
|
|
75
76
|
self.collection_name,
|
|
76
|
-
vectors_config=models.VectorParams(size=len(
|
|
77
|
+
vectors_config=models.VectorParams(size=len(chunks[0].embedding), distance=models.Distance.COSINE),
|
|
77
78
|
)
|
|
78
79
|
|
|
79
80
|
points = []
|
|
80
|
-
for
|
|
81
|
+
for chunk in chunks:
|
|
81
82
|
chunk_id = chunk.chunk_id
|
|
82
83
|
points.append(
|
|
83
84
|
PointStruct(
|
|
84
85
|
id=convert_id(chunk_id),
|
|
85
|
-
vector=embedding,
|
|
86
|
+
vector=chunk.embedding, # Already a list[float]
|
|
86
87
|
payload={"chunk_content": chunk.model_dump()} | {CHUNK_ID_KEY: chunk_id},
|
|
87
88
|
)
|
|
88
89
|
)
|
|
@@ -117,7 +118,7 @@ class QdrantIndex(EmbeddingIndex):
|
|
|
117
118
|
assert point.payload is not None
|
|
118
119
|
|
|
119
120
|
try:
|
|
120
|
-
chunk =
|
|
121
|
+
chunk = load_embedded_chunk_with_backward_compat(point.payload["chunk_content"])
|
|
121
122
|
except Exception:
|
|
122
123
|
log.exception("Failed to parse chunk")
|
|
123
124
|
continue
|
|
@@ -128,7 +129,63 @@ class QdrantIndex(EmbeddingIndex):
|
|
|
128
129
|
return QueryChunksResponse(chunks=chunks, scores=scores)
|
|
129
130
|
|
|
130
131
|
async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse:
|
|
131
|
-
|
|
132
|
+
"""
|
|
133
|
+
Performs keyword-based search using Qdrant's MatchText filter.
|
|
134
|
+
|
|
135
|
+
Uses Qdrant's query_filter with MatchText to search for chunks containing
|
|
136
|
+
the specified text query string in the chunk content.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
query_string: The text query for keyword search
|
|
140
|
+
k: Number of results to return
|
|
141
|
+
score_threshold: Minimum similarity score threshold
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
QueryChunksResponse with chunks and scores matching the keyword query
|
|
145
|
+
"""
|
|
146
|
+
try:
|
|
147
|
+
results = (
|
|
148
|
+
await self.client.query_points(
|
|
149
|
+
collection_name=self.collection_name,
|
|
150
|
+
query_filter=models.Filter(
|
|
151
|
+
must=[
|
|
152
|
+
models.FieldCondition(
|
|
153
|
+
key="chunk_content.content", match=models.MatchText(text=query_string)
|
|
154
|
+
)
|
|
155
|
+
]
|
|
156
|
+
),
|
|
157
|
+
limit=k,
|
|
158
|
+
with_payload=True,
|
|
159
|
+
with_vectors=False,
|
|
160
|
+
score_threshold=score_threshold,
|
|
161
|
+
)
|
|
162
|
+
).points
|
|
163
|
+
except Exception as e:
|
|
164
|
+
log.error(f"Error querying keyword search in Qdrant collection {self.collection_name}: {e}")
|
|
165
|
+
raise
|
|
166
|
+
|
|
167
|
+
chunks, scores = [], []
|
|
168
|
+
for point in results:
|
|
169
|
+
if not isinstance(point, models.ScoredPoint):
|
|
170
|
+
raise RuntimeError(f"Expected ScoredPoint from Qdrant query, got {type(point).__name__}")
|
|
171
|
+
if point.payload is None:
|
|
172
|
+
raise RuntimeError("Qdrant query returned point with no payload")
|
|
173
|
+
|
|
174
|
+
try:
|
|
175
|
+
chunk = load_embedded_chunk_with_backward_compat(point.payload["chunk_content"])
|
|
176
|
+
except Exception:
|
|
177
|
+
chunk_id = point.payload.get(CHUNK_ID_KEY, "unknown") if point.payload else "unknown"
|
|
178
|
+
point_id = getattr(point, "id", "unknown")
|
|
179
|
+
log.exception(
|
|
180
|
+
f"Failed to parse chunk in collection {self.collection_name}: "
|
|
181
|
+
f"chunk_id={chunk_id}, point_id={point_id}"
|
|
182
|
+
)
|
|
183
|
+
continue
|
|
184
|
+
|
|
185
|
+
chunks.append(chunk)
|
|
186
|
+
scores.append(point.score)
|
|
187
|
+
|
|
188
|
+
return QueryChunksResponse(chunks=chunks, scores=scores)
|
|
132
189
|
|
|
133
190
|
async def query_hybrid(
|
|
134
191
|
self,
|
|
@@ -139,7 +196,66 @@ class QdrantIndex(EmbeddingIndex):
|
|
|
139
196
|
reranker_type: str,
|
|
140
197
|
reranker_params: dict[str, Any] | None = None,
|
|
141
198
|
) -> QueryChunksResponse:
|
|
142
|
-
|
|
199
|
+
"""
|
|
200
|
+
Hybrid search combining vector similarity and keyword filtering in a single query.
|
|
201
|
+
|
|
202
|
+
Uses Qdrant's native capability to combine a vector query with a query_filter,
|
|
203
|
+
allowing vector similarity search to be filtered by keyword matches in one call.
|
|
204
|
+
|
|
205
|
+
Args:
|
|
206
|
+
embedding: The query embedding vector
|
|
207
|
+
query_string: The text query for keyword filtering
|
|
208
|
+
k: Number of results to return
|
|
209
|
+
score_threshold: Minimum similarity score threshold
|
|
210
|
+
reranker_type: Not used with this approach, but kept for API compatibility
|
|
211
|
+
reranker_params: Not used with this approach, but kept for API compatibility
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
QueryChunksResponse with filtered vector search results
|
|
215
|
+
"""
|
|
216
|
+
try:
|
|
217
|
+
results = (
|
|
218
|
+
await self.client.query_points(
|
|
219
|
+
collection_name=self.collection_name,
|
|
220
|
+
query=embedding.tolist(),
|
|
221
|
+
query_filter=models.Filter(
|
|
222
|
+
must=[
|
|
223
|
+
models.FieldCondition(
|
|
224
|
+
key="chunk_content.content", match=models.MatchText(text=query_string)
|
|
225
|
+
)
|
|
226
|
+
]
|
|
227
|
+
),
|
|
228
|
+
limit=k,
|
|
229
|
+
with_payload=True,
|
|
230
|
+
score_threshold=score_threshold,
|
|
231
|
+
)
|
|
232
|
+
).points
|
|
233
|
+
except Exception as e:
|
|
234
|
+
log.error(f"Error querying hybrid search in Qdrant collection {self.collection_name}: {e}")
|
|
235
|
+
raise
|
|
236
|
+
|
|
237
|
+
chunks, scores = [], []
|
|
238
|
+
for point in results:
|
|
239
|
+
if not isinstance(point, models.ScoredPoint):
|
|
240
|
+
raise RuntimeError(f"Expected ScoredPoint from Qdrant query, got {type(point).__name__}")
|
|
241
|
+
if point.payload is None:
|
|
242
|
+
raise RuntimeError("Qdrant query returned point with no payload")
|
|
243
|
+
|
|
244
|
+
try:
|
|
245
|
+
chunk = load_embedded_chunk_with_backward_compat(point.payload["chunk_content"])
|
|
246
|
+
except Exception:
|
|
247
|
+
chunk_id = point.payload.get(CHUNK_ID_KEY, "unknown") if point.payload else "unknown"
|
|
248
|
+
point_id = getattr(point, "id", "unknown")
|
|
249
|
+
log.exception(
|
|
250
|
+
f"Failed to parse chunk in collection {self.collection_name}: "
|
|
251
|
+
f"chunk_id={chunk_id}, point_id={point_id}"
|
|
252
|
+
)
|
|
253
|
+
continue
|
|
254
|
+
|
|
255
|
+
chunks.append(chunk)
|
|
256
|
+
scores.append(point.score)
|
|
257
|
+
|
|
258
|
+
return QueryChunksResponse(chunks=chunks, scores=scores)
|
|
143
259
|
|
|
144
260
|
async def delete(self):
|
|
145
261
|
await self.client.delete_collection(collection_name=self.collection_name)
|
|
@@ -152,11 +268,10 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
|
|
|
152
268
|
inference_api: Inference,
|
|
153
269
|
files_api: Files | None = None,
|
|
154
270
|
) -> None:
|
|
155
|
-
super().__init__(files_api=files_api, kvstore=None)
|
|
271
|
+
super().__init__(inference_api=inference_api, files_api=files_api, kvstore=None)
|
|
156
272
|
self.config = config
|
|
157
273
|
self.client: AsyncQdrantClient = None
|
|
158
274
|
self.cache = {}
|
|
159
|
-
self.inference_api = inference_api
|
|
160
275
|
self.vector_store_table = None
|
|
161
276
|
self._qdrant_lock = asyncio.Lock()
|
|
162
277
|
|
|
@@ -227,19 +342,21 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
|
|
|
227
342
|
self.cache[vector_store_id] = index
|
|
228
343
|
return index
|
|
229
344
|
|
|
230
|
-
async def insert_chunks(
|
|
231
|
-
|
|
345
|
+
async def insert_chunks(
|
|
346
|
+
self, vector_store_id: str, chunks: list[EmbeddedChunk], ttl_seconds: int | None = None
|
|
347
|
+
) -> None:
|
|
348
|
+
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
|
232
349
|
if not index:
|
|
233
|
-
raise VectorStoreNotFoundError(
|
|
350
|
+
raise VectorStoreNotFoundError(vector_store_id)
|
|
234
351
|
|
|
235
352
|
await index.insert_chunks(chunks)
|
|
236
353
|
|
|
237
354
|
async def query_chunks(
|
|
238
|
-
self,
|
|
355
|
+
self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
|
|
239
356
|
) -> QueryChunksResponse:
|
|
240
|
-
index = await self._get_and_cache_vector_store_index(
|
|
357
|
+
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
|
241
358
|
if not index:
|
|
242
|
-
raise VectorStoreNotFoundError(
|
|
359
|
+
raise VectorStoreNotFoundError(vector_store_id)
|
|
243
360
|
|
|
244
361
|
return await index.query_chunks(query, params)
|
|
245
362
|
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
from
|
|
7
|
+
from llama_stack_api import Api, ProviderSpec
|
|
8
8
|
|
|
9
9
|
from .config import WeaviateVectorIOConfig
|
|
10
10
|
|
|
@@ -9,7 +9,7 @@ from typing import Any
|
|
|
9
9
|
from pydantic import BaseModel, Field
|
|
10
10
|
|
|
11
11
|
from llama_stack.core.storage.datatypes import KVStoreReference
|
|
12
|
-
from
|
|
12
|
+
from llama_stack_api import json_schema_type
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
@json_schema_type
|