PyPI - llama-stack - Versions diffs - 0.4.3__py3-none-any.whl → 0.5.0rc1__py3-none-any.whl - Mend

llama-stack 0.4.3py3-none-any.whl → 0.5.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (307) hide show

llama_stack/cli/stack/_list_deps.py +11 -7
llama_stack/cli/stack/run.py +3 -25
llama_stack/core/access_control/datatypes.py +78 -0
llama_stack/core/configure.py +2 -2
{llama_stack_api/internal → llama_stack/core/connectors}/__init__.py +2 -2
llama_stack/core/connectors/connectors.py +162 -0
llama_stack/core/conversations/conversations.py +61 -58
llama_stack/core/datatypes.py +54 -8
llama_stack/core/library_client.py +60 -13
llama_stack/core/prompts/prompts.py +43 -42
llama_stack/core/routers/datasets.py +20 -17
llama_stack/core/routers/eval_scoring.py +143 -53
llama_stack/core/routers/inference.py +20 -9
llama_stack/core/routers/safety.py +30 -42
llama_stack/core/routers/vector_io.py +15 -7
llama_stack/core/routing_tables/models.py +42 -3
llama_stack/core/routing_tables/scoring_functions.py +19 -19
llama_stack/core/routing_tables/shields.py +20 -17
llama_stack/core/routing_tables/vector_stores.py +8 -5
llama_stack/core/server/auth.py +192 -17
llama_stack/core/server/fastapi_router_registry.py +40 -5
llama_stack/core/server/server.py +24 -5
llama_stack/core/stack.py +54 -10
llama_stack/core/storage/datatypes.py +9 -0
llama_stack/core/store/registry.py +1 -1
llama_stack/core/utils/exec.py +2 -2
llama_stack/core/utils/type_inspection.py +16 -2
llama_stack/distributions/dell/config.yaml +4 -1
llama_stack/distributions/dell/doc_template.md +209 -0
llama_stack/distributions/dell/run-with-safety.yaml +4 -1
llama_stack/distributions/nvidia/config.yaml +4 -1
llama_stack/distributions/nvidia/doc_template.md +170 -0
llama_stack/distributions/nvidia/run-with-safety.yaml +4 -1
llama_stack/distributions/oci/config.yaml +4 -1
llama_stack/distributions/oci/doc_template.md +140 -0
llama_stack/distributions/open-benchmark/config.yaml +9 -1
llama_stack/distributions/postgres-demo/config.yaml +1 -1
llama_stack/distributions/starter/build.yaml +62 -0
llama_stack/distributions/starter/config.yaml +22 -3
llama_stack/distributions/starter/run-with-postgres-store.yaml +22 -3
llama_stack/distributions/starter/starter.py +13 -1
llama_stack/distributions/starter-gpu/build.yaml +62 -0
llama_stack/distributions/starter-gpu/config.yaml +22 -3
llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +22 -3
llama_stack/distributions/template.py +10 -2
llama_stack/distributions/watsonx/config.yaml +4 -1
llama_stack/log.py +1 -0
llama_stack/models/llama/resources/dog.jpg +0 -0
llama_stack/models/llama/resources/pasta.jpeg +0 -0
llama_stack/models/llama/resources/small_dog.jpg +0 -0
llama_stack/providers/inline/agents/meta_reference/__init__.py +1 -0
llama_stack/providers/inline/agents/meta_reference/agents.py +57 -61
llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +183 -60
llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +94 -22
llama_stack/providers/inline/agents/meta_reference/responses/types.py +2 -1
llama_stack/providers/inline/agents/meta_reference/responses/utils.py +4 -1
llama_stack/providers/inline/agents/meta_reference/safety.py +2 -2
llama_stack/providers/inline/batches/reference/batches.py +2 -1
llama_stack/providers/inline/eval/meta_reference/eval.py +40 -32
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h +9 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift +189 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift +238 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift +12 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift +89 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj +550 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +8 -0
llama_stack/providers/inline/post_training/huggingface/post_training.py +33 -38
llama_stack/providers/inline/post_training/huggingface/utils.py +2 -5
llama_stack/providers/inline/post_training/torchtune/post_training.py +28 -33
llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +2 -4
llama_stack/providers/inline/safety/code_scanner/code_scanner.py +12 -15
llama_stack/providers/inline/safety/llama_guard/llama_guard.py +15 -18
llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +11 -17
llama_stack/providers/inline/scoring/basic/scoring.py +13 -17
llama_stack/providers/inline/scoring/braintrust/braintrust.py +15 -15
llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +13 -17
llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +1 -1
llama_stack/providers/registry/agents.py +1 -0
llama_stack/providers/registry/inference.py +1 -9
llama_stack/providers/registry/vector_io.py +136 -16
llama_stack/providers/remote/datasetio/nvidia/README.md +74 -0
llama_stack/providers/remote/eval/nvidia/README.md +134 -0
llama_stack/providers/remote/eval/nvidia/eval.py +22 -21
llama_stack/providers/remote/files/s3/README.md +266 -0
llama_stack/providers/remote/files/s3/config.py +5 -3
llama_stack/providers/remote/files/s3/files.py +2 -2
llama_stack/providers/remote/inference/gemini/gemini.py +4 -0
llama_stack/providers/remote/inference/nvidia/NVIDIA.md +203 -0
llama_stack/providers/remote/inference/openai/openai.py +2 -0
llama_stack/providers/remote/inference/together/together.py +4 -0
llama_stack/providers/remote/inference/vertexai/config.py +3 -3
llama_stack/providers/remote/inference/vertexai/vertexai.py +5 -2
llama_stack/providers/remote/inference/vllm/config.py +37 -18
llama_stack/providers/remote/inference/vllm/vllm.py +0 -3
llama_stack/providers/remote/inference/watsonx/watsonx.py +4 -0
llama_stack/providers/remote/post_training/nvidia/README.md +151 -0
llama_stack/providers/remote/post_training/nvidia/post_training.py +31 -33
llama_stack/providers/remote/safety/bedrock/bedrock.py +10 -27
llama_stack/providers/remote/safety/nvidia/README.md +78 -0
llama_stack/providers/remote/safety/nvidia/nvidia.py +9 -25
llama_stack/providers/remote/safety/sambanova/sambanova.py +13 -11
llama_stack/providers/remote/vector_io/elasticsearch/__init__.py +17 -0
llama_stack/providers/remote/vector_io/elasticsearch/config.py +32 -0
llama_stack/providers/remote/vector_io/elasticsearch/elasticsearch.py +463 -0
llama_stack/providers/remote/vector_io/oci/__init__.py +22 -0
llama_stack/providers/remote/vector_io/oci/config.py +41 -0
llama_stack/providers/remote/vector_io/oci/oci26ai.py +595 -0
llama_stack/providers/remote/vector_io/pgvector/config.py +69 -2
llama_stack/providers/remote/vector_io/pgvector/pgvector.py +255 -6
llama_stack/providers/remote/vector_io/qdrant/qdrant.py +62 -38
llama_stack/providers/utils/bedrock/client.py +3 -3
llama_stack/providers/utils/bedrock/config.py +7 -7
llama_stack/providers/utils/inference/embedding_mixin.py +4 -0
llama_stack/providers/utils/inference/http_client.py +239 -0
llama_stack/providers/utils/inference/litellm_openai_mixin.py +5 -0
llama_stack/providers/utils/inference/model_registry.py +148 -2
llama_stack/providers/utils/inference/openai_compat.py +2 -1
llama_stack/providers/utils/inference/openai_mixin.py +41 -2
llama_stack/providers/utils/memory/openai_vector_store_mixin.py +92 -5
llama_stack/providers/utils/memory/vector_store.py +46 -19
llama_stack/providers/utils/responses/responses_store.py +40 -6
llama_stack/providers/utils/safety.py +114 -0
llama_stack/providers/utils/tools/mcp.py +44 -3
llama_stack/testing/api_recorder.py +9 -3
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/METADATA +14 -2
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/RECORD +131 -275
llama_stack-0.5.0rc1.dist-info/top_level.txt +1 -0
llama_stack/distributions/meta-reference-gpu/__init__.py +0 -7
llama_stack/distributions/meta-reference-gpu/config.yaml +0 -140
llama_stack/distributions/meta-reference-gpu/meta_reference.py +0 -163
llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +0 -155
llama_stack/models/llama/hadamard_utils.py +0 -88
llama_stack/models/llama/llama3/args.py +0 -74
llama_stack/models/llama/llama3/generation.py +0 -378
llama_stack/models/llama/llama3/model.py +0 -304
llama_stack/models/llama/llama3/multimodal/__init__.py +0 -12
llama_stack/models/llama/llama3/multimodal/encoder_utils.py +0 -180
llama_stack/models/llama/llama3/multimodal/image_transform.py +0 -409
llama_stack/models/llama/llama3/multimodal/model.py +0 -1430
llama_stack/models/llama/llama3/multimodal/utils.py +0 -26
llama_stack/models/llama/llama3/quantization/__init__.py +0 -5
llama_stack/models/llama/llama3/quantization/loader.py +0 -316
llama_stack/models/llama/llama3_1/__init__.py +0 -12
llama_stack/models/llama/llama3_1/prompt_format.md +0 -358
llama_stack/models/llama/llama3_1/prompts.py +0 -258
llama_stack/models/llama/llama3_2/__init__.py +0 -5
llama_stack/models/llama/llama3_2/prompts_text.py +0 -229
llama_stack/models/llama/llama3_2/prompts_vision.py +0 -126
llama_stack/models/llama/llama3_2/text_prompt_format.md +0 -286
llama_stack/models/llama/llama3_2/vision_prompt_format.md +0 -141
llama_stack/models/llama/llama3_3/__init__.py +0 -5
llama_stack/models/llama/llama3_3/prompts.py +0 -259
llama_stack/models/llama/llama4/args.py +0 -107
llama_stack/models/llama/llama4/ffn.py +0 -58
llama_stack/models/llama/llama4/moe.py +0 -214
llama_stack/models/llama/llama4/preprocess.py +0 -435
llama_stack/models/llama/llama4/quantization/__init__.py +0 -5
llama_stack/models/llama/llama4/quantization/loader.py +0 -226
llama_stack/models/llama/llama4/vision/__init__.py +0 -5
llama_stack/models/llama/llama4/vision/embedding.py +0 -210
llama_stack/models/llama/llama4/vision/encoder.py +0 -412
llama_stack/models/llama/quantize_impls.py +0 -316
llama_stack/providers/inline/inference/meta_reference/__init__.py +0 -20
llama_stack/providers/inline/inference/meta_reference/common.py +0 -24
llama_stack/providers/inline/inference/meta_reference/config.py +0 -68
llama_stack/providers/inline/inference/meta_reference/generators.py +0 -201
llama_stack/providers/inline/inference/meta_reference/inference.py +0 -542
llama_stack/providers/inline/inference/meta_reference/model_parallel.py +0 -77
llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +0 -353
llama_stack-0.4.3.dist-info/top_level.txt +0 -2
llama_stack_api/__init__.py +0 -945
llama_stack_api/admin/__init__.py +0 -45
llama_stack_api/admin/api.py +0 -72
llama_stack_api/admin/fastapi_routes.py +0 -117
llama_stack_api/admin/models.py +0 -113
llama_stack_api/agents.py +0 -173
llama_stack_api/batches/__init__.py +0 -40
llama_stack_api/batches/api.py +0 -53
llama_stack_api/batches/fastapi_routes.py +0 -113
llama_stack_api/batches/models.py +0 -78
llama_stack_api/benchmarks/__init__.py +0 -43
llama_stack_api/benchmarks/api.py +0 -39
llama_stack_api/benchmarks/fastapi_routes.py +0 -109
llama_stack_api/benchmarks/models.py +0 -109
llama_stack_api/common/__init__.py +0 -5
llama_stack_api/common/content_types.py +0 -101
llama_stack_api/common/errors.py +0 -95
llama_stack_api/common/job_types.py +0 -38
llama_stack_api/common/responses.py +0 -77
llama_stack_api/common/training_types.py +0 -47
llama_stack_api/common/type_system.py +0 -146
llama_stack_api/connectors.py +0 -146
llama_stack_api/conversations.py +0 -270
llama_stack_api/datasetio.py +0 -55
llama_stack_api/datasets/__init__.py +0 -61
llama_stack_api/datasets/api.py +0 -35
llama_stack_api/datasets/fastapi_routes.py +0 -104
llama_stack_api/datasets/models.py +0 -152
llama_stack_api/datatypes.py +0 -373
llama_stack_api/eval.py +0 -137
llama_stack_api/file_processors/__init__.py +0 -27
llama_stack_api/file_processors/api.py +0 -64
llama_stack_api/file_processors/fastapi_routes.py +0 -78
llama_stack_api/file_processors/models.py +0 -42
llama_stack_api/files/__init__.py +0 -35
llama_stack_api/files/api.py +0 -51
llama_stack_api/files/fastapi_routes.py +0 -124
llama_stack_api/files/models.py +0 -107
llama_stack_api/inference.py +0 -1169
llama_stack_api/inspect_api/__init__.py +0 -37
llama_stack_api/inspect_api/api.py +0 -25
llama_stack_api/inspect_api/fastapi_routes.py +0 -76
llama_stack_api/inspect_api/models.py +0 -28
llama_stack_api/internal/kvstore.py +0 -28
llama_stack_api/internal/sqlstore.py +0 -81
llama_stack_api/llama_stack_api/__init__.py +0 -945
llama_stack_api/llama_stack_api/admin/__init__.py +0 -45
llama_stack_api/llama_stack_api/admin/api.py +0 -72
llama_stack_api/llama_stack_api/admin/fastapi_routes.py +0 -117
llama_stack_api/llama_stack_api/admin/models.py +0 -113
llama_stack_api/llama_stack_api/agents.py +0 -173
llama_stack_api/llama_stack_api/batches/__init__.py +0 -40
llama_stack_api/llama_stack_api/batches/api.py +0 -53
llama_stack_api/llama_stack_api/batches/fastapi_routes.py +0 -113
llama_stack_api/llama_stack_api/batches/models.py +0 -78
llama_stack_api/llama_stack_api/benchmarks/__init__.py +0 -43
llama_stack_api/llama_stack_api/benchmarks/api.py +0 -39
llama_stack_api/llama_stack_api/benchmarks/fastapi_routes.py +0 -109
llama_stack_api/llama_stack_api/benchmarks/models.py +0 -109
llama_stack_api/llama_stack_api/common/__init__.py +0 -5
llama_stack_api/llama_stack_api/common/content_types.py +0 -101
llama_stack_api/llama_stack_api/common/errors.py +0 -95
llama_stack_api/llama_stack_api/common/job_types.py +0 -38
llama_stack_api/llama_stack_api/common/responses.py +0 -77
llama_stack_api/llama_stack_api/common/training_types.py +0 -47
llama_stack_api/llama_stack_api/common/type_system.py +0 -146
llama_stack_api/llama_stack_api/connectors.py +0 -146
llama_stack_api/llama_stack_api/conversations.py +0 -270
llama_stack_api/llama_stack_api/datasetio.py +0 -55
llama_stack_api/llama_stack_api/datasets/__init__.py +0 -61
llama_stack_api/llama_stack_api/datasets/api.py +0 -35
llama_stack_api/llama_stack_api/datasets/fastapi_routes.py +0 -104
llama_stack_api/llama_stack_api/datasets/models.py +0 -152
llama_stack_api/llama_stack_api/datatypes.py +0 -373
llama_stack_api/llama_stack_api/eval.py +0 -137
llama_stack_api/llama_stack_api/file_processors/__init__.py +0 -27
llama_stack_api/llama_stack_api/file_processors/api.py +0 -64
llama_stack_api/llama_stack_api/file_processors/fastapi_routes.py +0 -78
llama_stack_api/llama_stack_api/file_processors/models.py +0 -42
llama_stack_api/llama_stack_api/files/__init__.py +0 -35
llama_stack_api/llama_stack_api/files/api.py +0 -51
llama_stack_api/llama_stack_api/files/fastapi_routes.py +0 -124
llama_stack_api/llama_stack_api/files/models.py +0 -107
llama_stack_api/llama_stack_api/inference.py +0 -1169
llama_stack_api/llama_stack_api/inspect_api/__init__.py +0 -37
llama_stack_api/llama_stack_api/inspect_api/api.py +0 -25
llama_stack_api/llama_stack_api/inspect_api/fastapi_routes.py +0 -76
llama_stack_api/llama_stack_api/inspect_api/models.py +0 -28
llama_stack_api/llama_stack_api/internal/__init__.py +0 -9
llama_stack_api/llama_stack_api/internal/kvstore.py +0 -28
llama_stack_api/llama_stack_api/internal/sqlstore.py +0 -81
llama_stack_api/llama_stack_api/models.py +0 -171
llama_stack_api/llama_stack_api/openai_responses.py +0 -1468
llama_stack_api/llama_stack_api/post_training.py +0 -370
llama_stack_api/llama_stack_api/prompts.py +0 -203
llama_stack_api/llama_stack_api/providers/__init__.py +0 -33
llama_stack_api/llama_stack_api/providers/api.py +0 -16
llama_stack_api/llama_stack_api/providers/fastapi_routes.py +0 -57
llama_stack_api/llama_stack_api/providers/models.py +0 -24
llama_stack_api/llama_stack_api/py.typed +0 -0
llama_stack_api/llama_stack_api/rag_tool.py +0 -168
llama_stack_api/llama_stack_api/resource.py +0 -37
llama_stack_api/llama_stack_api/router_utils.py +0 -160
llama_stack_api/llama_stack_api/safety.py +0 -132
llama_stack_api/llama_stack_api/schema_utils.py +0 -208
llama_stack_api/llama_stack_api/scoring.py +0 -93
llama_stack_api/llama_stack_api/scoring_functions.py +0 -211
llama_stack_api/llama_stack_api/shields.py +0 -93
llama_stack_api/llama_stack_api/tools.py +0 -226
llama_stack_api/llama_stack_api/vector_io.py +0 -941
llama_stack_api/llama_stack_api/vector_stores.py +0 -53
llama_stack_api/llama_stack_api/version.py +0 -9
llama_stack_api/models.py +0 -171
llama_stack_api/openai_responses.py +0 -1468
llama_stack_api/post_training.py +0 -370
llama_stack_api/prompts.py +0 -203
llama_stack_api/providers/__init__.py +0 -33
llama_stack_api/providers/api.py +0 -16
llama_stack_api/providers/fastapi_routes.py +0 -57
llama_stack_api/providers/models.py +0 -24
llama_stack_api/py.typed +0 -0
llama_stack_api/rag_tool.py +0 -168
llama_stack_api/resource.py +0 -37
llama_stack_api/router_utils.py +0 -160
llama_stack_api/safety.py +0 -132
llama_stack_api/schema_utils.py +0 -208
llama_stack_api/scoring.py +0 -93
llama_stack_api/scoring_functions.py +0 -211
llama_stack_api/shields.py +0 -93
llama_stack_api/tools.py +0 -226
llama_stack_api/vector_io.py +0 -941
llama_stack_api/vector_stores.py +0 -53
llama_stack_api/version.py +0 -9
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/WHEEL +0 -0
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/entry_points.txt +0 -0
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/licenses/LICENSE +0 -0

llama_stack/providers/remote/vector_io/elasticsearch/elasticsearch.py ADDED Viewed

@@ -0,0 +1,463 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from typing import Any
+from elasticsearch import ApiError, AsyncElasticsearch
+from elasticsearch.helpers import async_bulk
+from numpy.typing import NDArray
+from llama_stack.core.storage.kvstore import kvstore_impl
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
+from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
+from llama_stack_api import (
+    EmbeddedChunk,
+    Files,
+    Inference,
+    InterleavedContent,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoreNotFoundError,
+    VectorStoresProtocolPrivate,
+)
+from .config import ElasticsearchVectorIOConfig
+log = get_logger(name=__name__, category="vector_io::elasticsearch")
+# KV store prefixes for vector databases
+VERSION = "v3"
+VECTOR_DBS_PREFIX = f"vector_stores:elasticsearch:{VERSION}::"
+VECTOR_INDEX_PREFIX = f"vector_index:elasticsearch:{VERSION}::"
+OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:elasticsearch:{VERSION}::"
+OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:elasticsearch:{VERSION}::"
+OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX = f"openai_vector_stores_files_contents:elasticsearch:{VERSION}::"
+class ElasticsearchIndex(EmbeddingIndex):
+    def __init__(self, client: AsyncElasticsearch, collection_name: str):
+        self.client = client
+        self.collection_name = collection_name
+    # Check if the rerank_params contains the following structure:
+    # {
+    #   "retrievers": {
+    #       "standard": {"weight": 0.7},
+    #       "knn": {"weight": 0.3}
+    #   }
+    # }
+    async def _is_rerank_linear_param_valid(self, value: dict) -> bool:
+        """Validate linear reranker parameters structure."""
+        try:
+            retrievers = value.get("retrievers", {})
+            return (
+                isinstance(retrievers.get("standard"), dict)
+                and isinstance(retrievers.get("knn"), dict)
+                and "weight" in retrievers["standard"]
+                and "weight" in retrievers["knn"]
+            )
+        except (AttributeError, TypeError):
+            return False
+    def _convert_to_linear_params(self, reranker_params: dict[str, Any]) -> dict[str, Any] | None:
+        weights = reranker_params.get("weights")
+        alpha = reranker_params.get("alpha")
+        if weights is not None:
+            vector_weight = weights.get("vector")
+            keyword_weight = weights.get("keyword")
+            if vector_weight is None or keyword_weight is None:
+                log.warning("Elasticsearch linear retriever requires 'vector' and 'keyword' weights; ignoring weights.")
+                return None
+            total = vector_weight + keyword_weight
+            if total == 0:
+                log.warning(
+                    "Elasticsearch linear retriever weights for 'vector' and 'keyword' sum to 0; ignoring weights."
+                )
+                return None
+            if abs(total - 1.0) > 0.001:
+                log.warning(
+                    "Elasticsearch linear retriever uses normalized vector/keyword weights; "
+                    "renormalizing provided weights."
+                )
+                vector_weight /= total
+                keyword_weight /= total
+        elif alpha is not None:
+            vector_weight = alpha
+            keyword_weight = 1 - alpha
+        else:
+            return None
+        return {
+            "retrievers": {
+                "standard": {"weight": keyword_weight},
+                "knn": {"weight": vector_weight},
+            }
+        }
+    async def initialize(self) -> None:
+        # Elasticsearch collections (indexes) are created on-demand in add_chunks
+        # If the index does not exist, it will be created in add_chunks.
+        pass
+    async def add_chunks(self, chunks: list[EmbeddedChunk]):
+        """Adds chunks to the Elasticsearch index."""
+        if not chunks:
+            return
+        try:
+            await self.client.indices.create(
+                index=self.collection_name,
+                body={
+                    "mappings": {
+                        "properties": {
+                            "content": {"type": "text"},
+                            "chunk_id": {"type": "keyword"},
+                            "metadata": {"type": "object"},
+                            "chunk_metadata": {"type": "object"},
+                            "embedding": {"type": "dense_vector", "dims": len(chunks[0].embedding)},
+                            "embedding_dimension": {"type": "integer"},
+                            "embedding_model": {"type": "keyword"},
+                        }
+                    }
+                },
+            )
+        except ApiError as e:
+            if e.status_code != 400 or "resource_already_exists_exception" not in e.message:
+                log.error(f"Error creating Elasticsearch index {self.collection_name}: {e}")
+                raise
+        actions = []
+        for chunk in chunks:
+            actions.append(
+                {
+                    "_op_type": "index",
+                    "_index": self.collection_name,
+                    "_id": chunk.chunk_id,
+                    "_source": chunk.model_dump(
+                        exclude_none=True,
+                        include={
+                            "content",
+                            "chunk_id",
+                            "metadata",
+                            "chunk_metadata",
+                            "embedding",
+                            "embedding_dimension",
+                            "embedding_model",
+                        },
+                    ),
+                }
+            )
+        try:
+            successful_count, error_count = await async_bulk(
+                client=self.client, actions=actions, timeout="300s", refresh=True, raise_on_error=False, stats_only=True
+            )
+            if error_count > 0:
+                log.warning(
+                    f"{error_count} out of {len(chunks)} documents failed to upload in Elasticsearch index {self.collection_name}"
+                )
+            log.info(f"Successfully added {successful_count} chunks to Elasticsearch index {self.collection_name}")
+        except Exception as e:
+            log.error(f"Error adding chunks to Elasticsearch index {self.collection_name}: {e}")
+            raise
+    async def delete_chunks(self, chunks_for_deletion: list[ChunkForDeletion]) -> None:
+        """Remove a chunk from the Elasticsearch index."""
+        actions = []
+        for chunk in chunks_for_deletion:
+            actions.append({"_op_type": "delete", "_index": self.collection_name, "_id": chunk.chunk_id})
+        try:
+            successful_count, error_count = await async_bulk(
+                client=self.client, actions=actions, timeout="300s", refresh=True, raise_on_error=True, stats_only=True
+            )
+            if error_count > 0:
+                log.warning(
+                    f"{error_count} out of {len(chunks_for_deletion)} documents failed to be deleted in Elasticsearch index {self.collection_name}"
+                )
+            log.info(f"Successfully deleted {successful_count} chunks from Elasticsearch index {self.collection_name}")
+        except Exception as e:
+            log.error(f"Error deleting chunks from Elasticsearch index {self.collection_name}: {e}")
+            raise
+    async def _results_to_chunks(self, results: dict) -> QueryChunksResponse:
+        """Convert search results to QueryChunksResponse."""
+        chunks, scores = [], []
+        for result in results.get("hits", {}).get("hits", []):
+            try:
+                source = result.get("_source", {})
+                chunk = EmbeddedChunk(
+                    content=source.get("content"),
+                    chunk_id=result.get("_id"),
+                    embedding=source.get("embedding", []),
+                    embedding_dimension=source.get("embedding_dimension", len(source.get("embedding", []))),
+                    embedding_model=source.get("embedding_model", "unknown"),
+                    chunk_metadata=source.get("chunk_metadata", {}),
+                    metadata=source.get("metadata", {}),
+                )
+            except Exception:
+                log.exception("Failed to parse chunk")
+                continue
+            chunks.append(chunk)
+            scores.append(result.get("_score"))
+        return QueryChunksResponse(chunks=chunks, scores=scores)
+    async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
+        """Vector search using kNN."""
+        try:
+            results = await self.client.search(
+                index=self.collection_name,
+                query={"knn": {"field": "embedding", "query_vector": embedding.tolist(), "k": k}},
+                min_score=score_threshold,
+                size=k,
+                source={"exclude_vectors": False},  # Retrieve the embedding
+                ignore_unavailable=True,  # In case the index does not exist
+            )
+        except Exception as e:
+            log.error(f"Error performing vector query on Elasticsearch index {self.collection_name}: {e}")
+            raise
+        return await self._results_to_chunks(results)
+    async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse:
+        """Keyword search using match query."""
+        try:
+            results = await self.client.search(
+                index=self.collection_name,
+                query={"match": {"content": {"query": query_string}}},
+                min_score=score_threshold,
+                size=k,
+                source={"exclude_vectors": False},  # Retrieve the embedding
+                ignore_unavailable=True,  # In case the index does not exist
+            )
+        except Exception as e:
+            log.error(f"Error performing keyword query on Elasticsearch index {self.collection_name}: {e}")
+            raise
+        return await self._results_to_chunks(results)
+    async def query_hybrid(
+        self,
+        embedding: NDArray,
+        query_string: str,
+        k: int,
+        score_threshold: float,
+        reranker_type: str,
+        reranker_params: dict[str, Any] | None = None,
+    ) -> QueryChunksResponse:
+        supported_retrievers = ["rrf", "linear"]
+        original_reranker_type = reranker_type
+        if reranker_type == "weighted":
+            log.warning("Elasticsearch does not support 'weighted' reranker; using 'linear' retriever instead.")
+            reranker_type = "linear"
+        if reranker_type not in supported_retrievers:
+            log.warning(
+                f"Unsupported reranker type: {reranker_type}. Supported types are: {supported_retrievers}. "
+                "Falling back to 'rrf'."
+            )
+            reranker_type = "rrf"
+        retriever = {
+            reranker_type: {
+                "retrievers": [
+                    {"retriever": {"standard": {"query": {"match": {"content": query_string}}}}},
+                    {
+                        "retriever": {
+                            "knn": {
+                                "field": "embedding",
+                                "query_vector": embedding.tolist(),
+                                "k": k,
+                                "num_candidates": k,
+                            }
+                        }
+                    },
+                ]
+            }
+        }
+        # Elasticsearch requires rank_window_size >= size for rrf/linear retrievers.
+        retriever[reranker_type]["rank_window_size"] = k
+        # Add reranker parameters if provided for RRF (e.g. rank_constant, rank_window_size, filter)
+        # see https://www.elastic.co/docs/reference/elasticsearch/rest-apis/retrievers/rrf-retriever
+        if reranker_type == "rrf" and reranker_params is not None:
+            allowed_rrf_params = {"rank_constant", "rank_windows_size", "filter"}
+            rrf_params = dict(reranker_params)
+            if "impact_factor" in rrf_params:
+                if "rank_constant" not in rrf_params:
+                    rrf_params["rank_constant"] = rrf_params.pop("impact_factor")
+                    log.warning("Elasticsearch RRF does not support impact_factor; mapping to rank_constant.")
+                else:
+                    rrf_params.pop("impact_factor")
+                    log.warning("Elasticsearch RRF ignores impact_factor when rank_constant is provided.")
+            if "rank_window_size" not in rrf_params and "rank_windows_size" in rrf_params:
+                rrf_params["rank_window_size"] = rrf_params.pop("rank_windows_size")
+            extra_keys = set(rrf_params.keys()) - allowed_rrf_params
+            if extra_keys:
+                log.warning(f"Ignoring unsupported RRF parameters for Elasticsearch: {extra_keys}")
+                for key in extra_keys:
+                    rrf_params.pop(key, None)
+            if rrf_params:
+                retriever["rrf"].update(rrf_params)
+        elif reranker_type == "linear" and reranker_params is not None:
+            # Add reranker parameters (i.e. weights) for linear
+            # see https://www.elastic.co/docs/reference/elasticsearch/rest-apis/retrievers/linear-retriever
+            if await self._is_rerank_linear_param_valid(reranker_params) is False:
+                converted_params = self._convert_to_linear_params(reranker_params)
+                if converted_params is None:
+                    log.warning(
+                        "Invalid linear reranker parameters for Elasticsearch; "
+                        'expected {"retrievers": {"standard": {"weight": float}, "knn": {"weight": float}}}. '
+                        "Ignoring provided parameters."
+                    )
+                else:
+                    reranker_params = converted_params
+            try:
+                if await self._is_rerank_linear_param_valid(reranker_params):
+                    retriever["linear"]["retrievers"][0].update(reranker_params["retrievers"]["standard"])
+                    retriever["linear"]["retrievers"][1].update(reranker_params["retrievers"]["knn"])
+            except Exception as e:
+                log.error(f"Error updating linear retrievers parameters: {e}")
+                raise
+        elif reranker_type == "linear" and reranker_params is None and original_reranker_type == "weighted":
+            converted_params = self._convert_to_linear_params({})
+            if converted_params:
+                retriever["linear"]["retrievers"][0].update(converted_params["retrievers"]["standard"])
+                retriever["linear"]["retrievers"][1].update(converted_params["retrievers"]["knn"])
+        try:
+            results = await self.client.search(
+                index=self.collection_name,
+                size=k,
+                retriever=retriever,
+                min_score=score_threshold,
+                source={"exclude_vectors": False},  # Retrieve the embedding
+                ignore_unavailable=True,  # In case the index does not exist
+            )
+        except Exception as e:
+            log.error(f"Error performing hybrid query on Elasticsearch index {self.collection_name}: {e}")
+            raise
+        return await self._results_to_chunks(results)
+    async def delete(self):
+        """Delete the entire Elasticsearch index with collection_name."""
+        try:
+            await self.client.indices.delete(index=self.collection_name, ignore_unavailable=True)
+        except Exception as e:
+            log.error(f"Error deleting Elasticsearch index {self.collection_name}: {e}")
+            raise
+class ElasticsearchVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
+    def __init__(
+        self,
+        config: ElasticsearchVectorIOConfig,
+        inference_api: Inference,
+        files_api: Files | None = None,
+    ) -> None:
+        super().__init__(inference_api=inference_api, files_api=files_api, kvstore=None)
+        self.config = config
+        self.client: AsyncElasticsearch = None
+        self.cache = {}
+        self.vector_store_table = None
+        self.metadata_collection_name = "openai_vector_stores_metadata"
+    async def initialize(self) -> None:
+        self.client = AsyncElasticsearch(hosts=self.config.elasticsearch_url, api_key=self.config.elasticsearch_api_key)
+        self.kvstore = await kvstore_impl(self.config.persistence)
+        start_key = VECTOR_DBS_PREFIX
+        end_key = f"{VECTOR_DBS_PREFIX}\xff"
+        stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key)
+        for vector_store_data in stored_vector_stores:
+            vector_store = VectorStore.model_validate_json(vector_store_data)
+            index = VectorStoreWithIndex(
+                vector_store, ElasticsearchIndex(self.client, vector_store.identifier), self.inference_api
+            )
+            self.cache[vector_store.identifier] = index
+        self.openai_vector_stores = await self._load_openai_vector_stores()
+    async def shutdown(self) -> None:
+        await self.client.close()
+        # Clean up mixin resources (file batch tasks)
+        await super().shutdown()
+    async def register_vector_store(self, vector_store: VectorStore) -> None:
+        assert self.kvstore is not None
+        key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
+        await self.kvstore.set(key=key, value=vector_store.model_dump_json())
+        index = VectorStoreWithIndex(
+            vector_store=vector_store,
+            index=ElasticsearchIndex(self.client, vector_store.identifier),
+            inference_api=self.inference_api,
+        )
+        self.cache[vector_store.identifier] = index
+    async def unregister_vector_store(self, vector_store_id: str) -> None:
+        if vector_store_id in self.cache:
+            await self.cache[vector_store_id].index.delete()
+            del self.cache[vector_store_id]
+        assert self.kvstore is not None
+        await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}")
+    async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
+        if vector_store_id in self.cache:
+            return self.cache[vector_store_id]
+        if self.vector_store_table is None:
+            raise ValueError(f"Vector DB not found {vector_store_id}")
+        vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
+        if not vector_store:
+            raise VectorStoreNotFoundError(vector_store_id)
+        index = VectorStoreWithIndex(
+            vector_store=vector_store,
+            index=ElasticsearchIndex(client=self.client, collection_name=vector_store.identifier),
+            inference_api=self.inference_api,
+        )
+        self.cache[vector_store_id] = index
+        return index
+    async def insert_chunks(
+        self, vector_store_id: str, chunks: list[EmbeddedChunk], ttl_seconds: int | None = None
+    ) -> None:
+        index = await self._get_and_cache_vector_store_index(vector_store_id)
+        if not index:
+            raise VectorStoreNotFoundError(vector_store_id)
+        await index.insert_chunks(chunks)
+    async def query_chunks(
+        self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
+    ) -> QueryChunksResponse:
+        index = await self._get_and_cache_vector_store_index(vector_store_id)
+        if not index:
+            raise VectorStoreNotFoundError(vector_store_id)
+        return await index.query_chunks(query, params)
+    async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
+        """Delete chunks from an Elasticsearch vector store."""
+        index = await self._get_and_cache_vector_store_index(store_id)
+        if not index:
+            raise ValueError(f"Vector DB {store_id} not found")
+        await index.index.delete_chunks(chunks_for_deletion)

llama_stack/providers/remote/vector_io/oci/__init__.py ADDED Viewed

@@ -0,0 +1,22 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from llama_stack.providers.remote.vector_io.oci.config import OCI26aiVectorIOConfig
+from llama_stack_api import Api, ProviderSpec
+async def get_adapter_impl(config: OCI26aiVectorIOConfig, deps: dict[Api, ProviderSpec]):
+    from typing import cast
+    from llama_stack.providers.remote.vector_io.oci.oci26ai import OCI26aiVectorIOAdapter
+    from llama_stack_api import Files, Inference
+    assert isinstance(config, OCI26aiVectorIOConfig), f"Unexpected config type: {type(config)}"
+    inference_api = cast(Inference, deps[Api.inference])
+    files_api = cast(Files | None, deps.get(Api.files))
+    impl = OCI26aiVectorIOAdapter(config, inference_api, files_api)
+    await impl.initialize()
+    return impl

llama_stack/providers/remote/vector_io/oci/config.py ADDED Viewed

@@ -0,0 +1,41 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from typing import Any
+from pydantic import BaseModel, Field
+from llama_stack.core.storage.datatypes import KVStoreReference
+from llama_stack_api import json_schema_type
+@json_schema_type
+class OCI26aiVectorIOConfig(BaseModel):
+    conn_str: str = Field(description="Connection string for the given 26ai Service")
+    user: str = Field(description="Username name to connect to the service")
+    password: str = Field(description="Password to connect to the service")
+    tnsnames_loc: str = Field(description="Directory location of the tsnanames.ora file")
+    ewallet_pem_loc: str = Field(description="Directory location of the ewallet.pem file")
+    ewallet_password: str = Field(description="Password for the ewallet.pem file")
+    persistence: KVStoreReference = Field(description="Config for KV store backend")
+    consistency_level: str = Field(description="The consistency level of the OCI26ai server", default="Strong")
+    vector_datatype: str = Field(description="Vector datatype for embeddings", default="FLOAT32")
+    @classmethod
+    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
+        return {
+            "conn_str": "${env.OCI26AI_CONNECTION_STRING}",
+            "user": "${env.OCI26AI_USER}",
+            "password": "${env.OCI26AI_PASSWORD}",
+            "tnsnames_loc": "${env.OCI26AI_TNSNAMES_LOC}",
+            "ewallet_pem_loc": "${env.OCI26AI_EWALLET_PEM_LOC}",
+            "ewallet_password": "${env.OCI26AI_EWALLET_PWD}",
+            "vector_datatype": "${env.OCI26AI_VECTOR_DATATYPE:=FLOAT32}",
+            "persistence": KVStoreReference(
+                backend="kv_default",
+                namespace="vector_io::oci26ai",
+            ).model_dump(exclude_none=True),
+        }

llama-stack 0.4.3__py3-none-any.whl → 0.5.0rc1__py3-none-any.whl

llama-stack 0.4.3py3-none-any.whl → 0.5.0rc1py3-none-any.whl