PyPI - llama-stack - Versions diffs - 0.4.3__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

llama-stack 0.4.3py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (311) hide show

llama_stack/cli/stack/_list_deps.py +11 -7
llama_stack/cli/stack/run.py +3 -25
llama_stack/core/access_control/datatypes.py +78 -0
llama_stack/core/configure.py +2 -2
{llama_stack_api/internal → llama_stack/core/connectors}/__init__.py +2 -2
llama_stack/core/connectors/connectors.py +162 -0
llama_stack/core/conversations/conversations.py +61 -58
llama_stack/core/datatypes.py +54 -8
llama_stack/core/library_client.py +60 -13
llama_stack/core/prompts/prompts.py +43 -42
llama_stack/core/routers/datasets.py +20 -17
llama_stack/core/routers/eval_scoring.py +143 -53
llama_stack/core/routers/inference.py +20 -9
llama_stack/core/routers/safety.py +30 -42
llama_stack/core/routers/vector_io.py +15 -7
llama_stack/core/routing_tables/models.py +42 -3
llama_stack/core/routing_tables/scoring_functions.py +19 -19
llama_stack/core/routing_tables/shields.py +20 -17
llama_stack/core/routing_tables/vector_stores.py +8 -5
llama_stack/core/server/auth.py +192 -17
llama_stack/core/server/fastapi_router_registry.py +40 -5
llama_stack/core/server/server.py +24 -5
llama_stack/core/stack.py +54 -10
llama_stack/core/storage/datatypes.py +9 -0
llama_stack/core/store/registry.py +1 -1
llama_stack/core/utils/exec.py +2 -2
llama_stack/core/utils/type_inspection.py +16 -2
llama_stack/distributions/dell/config.yaml +4 -1
llama_stack/distributions/dell/doc_template.md +209 -0
llama_stack/distributions/dell/run-with-safety.yaml +4 -1
llama_stack/distributions/nvidia/config.yaml +4 -1
llama_stack/distributions/nvidia/doc_template.md +170 -0
llama_stack/distributions/nvidia/run-with-safety.yaml +4 -1
llama_stack/distributions/oci/config.yaml +4 -1
llama_stack/distributions/oci/doc_template.md +140 -0
llama_stack/distributions/open-benchmark/config.yaml +9 -1
llama_stack/distributions/postgres-demo/config.yaml +1 -1
llama_stack/distributions/starter/build.yaml +62 -0
llama_stack/distributions/starter/config.yaml +22 -3
llama_stack/distributions/starter/run-with-postgres-store.yaml +22 -3
llama_stack/distributions/starter/starter.py +13 -1
llama_stack/distributions/starter-gpu/build.yaml +62 -0
llama_stack/distributions/starter-gpu/config.yaml +22 -3
llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +22 -3
llama_stack/distributions/template.py +10 -2
llama_stack/distributions/watsonx/config.yaml +4 -1
llama_stack/log.py +1 -0
llama_stack/models/llama/resources/dog.jpg +0 -0
llama_stack/models/llama/resources/pasta.jpeg +0 -0
llama_stack/models/llama/resources/small_dog.jpg +0 -0
llama_stack/providers/inline/agents/meta_reference/__init__.py +1 -0
llama_stack/providers/inline/agents/meta_reference/agents.py +58 -61
llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +187 -60
llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +99 -22
llama_stack/providers/inline/agents/meta_reference/responses/types.py +2 -1
llama_stack/providers/inline/agents/meta_reference/responses/utils.py +4 -1
llama_stack/providers/inline/agents/meta_reference/safety.py +2 -2
llama_stack/providers/inline/batches/reference/batches.py +2 -1
llama_stack/providers/inline/eval/meta_reference/eval.py +40 -32
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h +9 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift +189 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift +238 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift +12 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift +89 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj +550 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +8 -0
llama_stack/providers/inline/post_training/huggingface/post_training.py +33 -38
llama_stack/providers/inline/post_training/huggingface/utils.py +2 -5
llama_stack/providers/inline/post_training/torchtune/common/utils.py +5 -9
llama_stack/providers/inline/post_training/torchtune/post_training.py +28 -33
llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +2 -4
llama_stack/providers/inline/safety/code_scanner/code_scanner.py +12 -15
llama_stack/providers/inline/safety/llama_guard/llama_guard.py +20 -24
llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +11 -17
llama_stack/providers/inline/scoring/basic/scoring.py +13 -17
llama_stack/providers/inline/scoring/braintrust/braintrust.py +15 -15
llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +13 -17
llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +1 -1
llama_stack/providers/registry/agents.py +1 -0
llama_stack/providers/registry/inference.py +1 -9
llama_stack/providers/registry/vector_io.py +136 -16
llama_stack/providers/remote/datasetio/nvidia/README.md +74 -0
llama_stack/providers/remote/eval/nvidia/README.md +134 -0
llama_stack/providers/remote/eval/nvidia/eval.py +22 -21
llama_stack/providers/remote/files/s3/README.md +266 -0
llama_stack/providers/remote/files/s3/config.py +5 -3
llama_stack/providers/remote/files/s3/files.py +2 -2
llama_stack/providers/remote/inference/gemini/gemini.py +4 -0
llama_stack/providers/remote/inference/nvidia/NVIDIA.md +203 -0
llama_stack/providers/remote/inference/openai/openai.py +2 -0
llama_stack/providers/remote/inference/together/together.py +4 -0
llama_stack/providers/remote/inference/vertexai/config.py +3 -3
llama_stack/providers/remote/inference/vertexai/vertexai.py +5 -2
llama_stack/providers/remote/inference/vllm/config.py +37 -18
llama_stack/providers/remote/inference/vllm/vllm.py +0 -3
llama_stack/providers/remote/inference/watsonx/watsonx.py +4 -0
llama_stack/providers/remote/post_training/nvidia/README.md +151 -0
llama_stack/providers/remote/post_training/nvidia/models.py +3 -11
llama_stack/providers/remote/post_training/nvidia/post_training.py +31 -33
llama_stack/providers/remote/safety/bedrock/bedrock.py +10 -27
llama_stack/providers/remote/safety/nvidia/README.md +78 -0
llama_stack/providers/remote/safety/nvidia/nvidia.py +9 -25
llama_stack/providers/remote/safety/sambanova/sambanova.py +13 -11
llama_stack/providers/remote/vector_io/elasticsearch/__init__.py +17 -0
llama_stack/providers/remote/vector_io/elasticsearch/config.py +32 -0
llama_stack/providers/remote/vector_io/elasticsearch/elasticsearch.py +463 -0
llama_stack/providers/remote/vector_io/oci/__init__.py +22 -0
llama_stack/providers/remote/vector_io/oci/config.py +41 -0
llama_stack/providers/remote/vector_io/oci/oci26ai.py +595 -0
llama_stack/providers/remote/vector_io/pgvector/config.py +69 -2
llama_stack/providers/remote/vector_io/pgvector/pgvector.py +255 -6
llama_stack/providers/remote/vector_io/qdrant/qdrant.py +62 -38
llama_stack/providers/utils/bedrock/client.py +3 -3
llama_stack/providers/utils/bedrock/config.py +7 -7
llama_stack/providers/utils/inference/__init__.py +0 -25
llama_stack/providers/utils/inference/embedding_mixin.py +4 -0
llama_stack/providers/utils/inference/http_client.py +239 -0
llama_stack/providers/utils/inference/litellm_openai_mixin.py +6 -0
llama_stack/providers/utils/inference/model_registry.py +148 -2
llama_stack/providers/utils/inference/openai_compat.py +1 -158
llama_stack/providers/utils/inference/openai_mixin.py +42 -2
llama_stack/providers/utils/inference/prompt_adapter.py +0 -209
llama_stack/providers/utils/memory/openai_vector_store_mixin.py +92 -5
llama_stack/providers/utils/memory/vector_store.py +46 -19
llama_stack/providers/utils/responses/responses_store.py +40 -6
llama_stack/providers/utils/safety.py +114 -0
llama_stack/providers/utils/tools/mcp.py +44 -3
llama_stack/testing/api_recorder.py +9 -3
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/METADATA +14 -2
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/RECORD +135 -279
llama_stack-0.5.0.dist-info/top_level.txt +1 -0
llama_stack/distributions/meta-reference-gpu/__init__.py +0 -7
llama_stack/distributions/meta-reference-gpu/config.yaml +0 -140
llama_stack/distributions/meta-reference-gpu/meta_reference.py +0 -163
llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +0 -155
llama_stack/models/llama/hadamard_utils.py +0 -88
llama_stack/models/llama/llama3/args.py +0 -74
llama_stack/models/llama/llama3/generation.py +0 -378
llama_stack/models/llama/llama3/model.py +0 -304
llama_stack/models/llama/llama3/multimodal/__init__.py +0 -12
llama_stack/models/llama/llama3/multimodal/encoder_utils.py +0 -180
llama_stack/models/llama/llama3/multimodal/image_transform.py +0 -409
llama_stack/models/llama/llama3/multimodal/model.py +0 -1430
llama_stack/models/llama/llama3/multimodal/utils.py +0 -26
llama_stack/models/llama/llama3/quantization/__init__.py +0 -5
llama_stack/models/llama/llama3/quantization/loader.py +0 -316
llama_stack/models/llama/llama3_1/__init__.py +0 -12
llama_stack/models/llama/llama3_1/prompt_format.md +0 -358
llama_stack/models/llama/llama3_1/prompts.py +0 -258
llama_stack/models/llama/llama3_2/__init__.py +0 -5
llama_stack/models/llama/llama3_2/prompts_text.py +0 -229
llama_stack/models/llama/llama3_2/prompts_vision.py +0 -126
llama_stack/models/llama/llama3_2/text_prompt_format.md +0 -286
llama_stack/models/llama/llama3_2/vision_prompt_format.md +0 -141
llama_stack/models/llama/llama3_3/__init__.py +0 -5
llama_stack/models/llama/llama3_3/prompts.py +0 -259
llama_stack/models/llama/llama4/args.py +0 -107
llama_stack/models/llama/llama4/ffn.py +0 -58
llama_stack/models/llama/llama4/moe.py +0 -214
llama_stack/models/llama/llama4/preprocess.py +0 -435
llama_stack/models/llama/llama4/quantization/__init__.py +0 -5
llama_stack/models/llama/llama4/quantization/loader.py +0 -226
llama_stack/models/llama/llama4/vision/__init__.py +0 -5
llama_stack/models/llama/llama4/vision/embedding.py +0 -210
llama_stack/models/llama/llama4/vision/encoder.py +0 -412
llama_stack/models/llama/quantize_impls.py +0 -316
llama_stack/providers/inline/inference/meta_reference/__init__.py +0 -20
llama_stack/providers/inline/inference/meta_reference/common.py +0 -24
llama_stack/providers/inline/inference/meta_reference/config.py +0 -68
llama_stack/providers/inline/inference/meta_reference/generators.py +0 -201
llama_stack/providers/inline/inference/meta_reference/inference.py +0 -542
llama_stack/providers/inline/inference/meta_reference/model_parallel.py +0 -77
llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +0 -353
llama_stack-0.4.3.dist-info/top_level.txt +0 -2
llama_stack_api/__init__.py +0 -945
llama_stack_api/admin/__init__.py +0 -45
llama_stack_api/admin/api.py +0 -72
llama_stack_api/admin/fastapi_routes.py +0 -117
llama_stack_api/admin/models.py +0 -113
llama_stack_api/agents.py +0 -173
llama_stack_api/batches/__init__.py +0 -40
llama_stack_api/batches/api.py +0 -53
llama_stack_api/batches/fastapi_routes.py +0 -113
llama_stack_api/batches/models.py +0 -78
llama_stack_api/benchmarks/__init__.py +0 -43
llama_stack_api/benchmarks/api.py +0 -39
llama_stack_api/benchmarks/fastapi_routes.py +0 -109
llama_stack_api/benchmarks/models.py +0 -109
llama_stack_api/common/__init__.py +0 -5
llama_stack_api/common/content_types.py +0 -101
llama_stack_api/common/errors.py +0 -95
llama_stack_api/common/job_types.py +0 -38
llama_stack_api/common/responses.py +0 -77
llama_stack_api/common/training_types.py +0 -47
llama_stack_api/common/type_system.py +0 -146
llama_stack_api/connectors.py +0 -146
llama_stack_api/conversations.py +0 -270
llama_stack_api/datasetio.py +0 -55
llama_stack_api/datasets/__init__.py +0 -61
llama_stack_api/datasets/api.py +0 -35
llama_stack_api/datasets/fastapi_routes.py +0 -104
llama_stack_api/datasets/models.py +0 -152
llama_stack_api/datatypes.py +0 -373
llama_stack_api/eval.py +0 -137
llama_stack_api/file_processors/__init__.py +0 -27
llama_stack_api/file_processors/api.py +0 -64
llama_stack_api/file_processors/fastapi_routes.py +0 -78
llama_stack_api/file_processors/models.py +0 -42
llama_stack_api/files/__init__.py +0 -35
llama_stack_api/files/api.py +0 -51
llama_stack_api/files/fastapi_routes.py +0 -124
llama_stack_api/files/models.py +0 -107
llama_stack_api/inference.py +0 -1169
llama_stack_api/inspect_api/__init__.py +0 -37
llama_stack_api/inspect_api/api.py +0 -25
llama_stack_api/inspect_api/fastapi_routes.py +0 -76
llama_stack_api/inspect_api/models.py +0 -28
llama_stack_api/internal/kvstore.py +0 -28
llama_stack_api/internal/sqlstore.py +0 -81
llama_stack_api/llama_stack_api/__init__.py +0 -945
llama_stack_api/llama_stack_api/admin/__init__.py +0 -45
llama_stack_api/llama_stack_api/admin/api.py +0 -72
llama_stack_api/llama_stack_api/admin/fastapi_routes.py +0 -117
llama_stack_api/llama_stack_api/admin/models.py +0 -113
llama_stack_api/llama_stack_api/agents.py +0 -173
llama_stack_api/llama_stack_api/batches/__init__.py +0 -40
llama_stack_api/llama_stack_api/batches/api.py +0 -53
llama_stack_api/llama_stack_api/batches/fastapi_routes.py +0 -113
llama_stack_api/llama_stack_api/batches/models.py +0 -78
llama_stack_api/llama_stack_api/benchmarks/__init__.py +0 -43
llama_stack_api/llama_stack_api/benchmarks/api.py +0 -39
llama_stack_api/llama_stack_api/benchmarks/fastapi_routes.py +0 -109
llama_stack_api/llama_stack_api/benchmarks/models.py +0 -109
llama_stack_api/llama_stack_api/common/__init__.py +0 -5
llama_stack_api/llama_stack_api/common/content_types.py +0 -101
llama_stack_api/llama_stack_api/common/errors.py +0 -95
llama_stack_api/llama_stack_api/common/job_types.py +0 -38
llama_stack_api/llama_stack_api/common/responses.py +0 -77
llama_stack_api/llama_stack_api/common/training_types.py +0 -47
llama_stack_api/llama_stack_api/common/type_system.py +0 -146
llama_stack_api/llama_stack_api/connectors.py +0 -146
llama_stack_api/llama_stack_api/conversations.py +0 -270
llama_stack_api/llama_stack_api/datasetio.py +0 -55
llama_stack_api/llama_stack_api/datasets/__init__.py +0 -61
llama_stack_api/llama_stack_api/datasets/api.py +0 -35
llama_stack_api/llama_stack_api/datasets/fastapi_routes.py +0 -104
llama_stack_api/llama_stack_api/datasets/models.py +0 -152
llama_stack_api/llama_stack_api/datatypes.py +0 -373
llama_stack_api/llama_stack_api/eval.py +0 -137
llama_stack_api/llama_stack_api/file_processors/__init__.py +0 -27
llama_stack_api/llama_stack_api/file_processors/api.py +0 -64
llama_stack_api/llama_stack_api/file_processors/fastapi_routes.py +0 -78
llama_stack_api/llama_stack_api/file_processors/models.py +0 -42
llama_stack_api/llama_stack_api/files/__init__.py +0 -35
llama_stack_api/llama_stack_api/files/api.py +0 -51
llama_stack_api/llama_stack_api/files/fastapi_routes.py +0 -124
llama_stack_api/llama_stack_api/files/models.py +0 -107
llama_stack_api/llama_stack_api/inference.py +0 -1169
llama_stack_api/llama_stack_api/inspect_api/__init__.py +0 -37
llama_stack_api/llama_stack_api/inspect_api/api.py +0 -25
llama_stack_api/llama_stack_api/inspect_api/fastapi_routes.py +0 -76
llama_stack_api/llama_stack_api/inspect_api/models.py +0 -28
llama_stack_api/llama_stack_api/internal/__init__.py +0 -9
llama_stack_api/llama_stack_api/internal/kvstore.py +0 -28
llama_stack_api/llama_stack_api/internal/sqlstore.py +0 -81
llama_stack_api/llama_stack_api/models.py +0 -171
llama_stack_api/llama_stack_api/openai_responses.py +0 -1468
llama_stack_api/llama_stack_api/post_training.py +0 -370
llama_stack_api/llama_stack_api/prompts.py +0 -203
llama_stack_api/llama_stack_api/providers/__init__.py +0 -33
llama_stack_api/llama_stack_api/providers/api.py +0 -16
llama_stack_api/llama_stack_api/providers/fastapi_routes.py +0 -57
llama_stack_api/llama_stack_api/providers/models.py +0 -24
llama_stack_api/llama_stack_api/py.typed +0 -0
llama_stack_api/llama_stack_api/rag_tool.py +0 -168
llama_stack_api/llama_stack_api/resource.py +0 -37
llama_stack_api/llama_stack_api/router_utils.py +0 -160
llama_stack_api/llama_stack_api/safety.py +0 -132
llama_stack_api/llama_stack_api/schema_utils.py +0 -208
llama_stack_api/llama_stack_api/scoring.py +0 -93
llama_stack_api/llama_stack_api/scoring_functions.py +0 -211
llama_stack_api/llama_stack_api/shields.py +0 -93
llama_stack_api/llama_stack_api/tools.py +0 -226
llama_stack_api/llama_stack_api/vector_io.py +0 -941
llama_stack_api/llama_stack_api/vector_stores.py +0 -53
llama_stack_api/llama_stack_api/version.py +0 -9
llama_stack_api/models.py +0 -171
llama_stack_api/openai_responses.py +0 -1468
llama_stack_api/post_training.py +0 -370
llama_stack_api/prompts.py +0 -203
llama_stack_api/providers/__init__.py +0 -33
llama_stack_api/providers/api.py +0 -16
llama_stack_api/providers/fastapi_routes.py +0 -57
llama_stack_api/providers/models.py +0 -24
llama_stack_api/py.typed +0 -0
llama_stack_api/rag_tool.py +0 -168
llama_stack_api/resource.py +0 -37
llama_stack_api/router_utils.py +0 -160
llama_stack_api/safety.py +0 -132
llama_stack_api/schema_utils.py +0 -208
llama_stack_api/scoring.py +0 -93
llama_stack_api/scoring_functions.py +0 -211
llama_stack_api/shields.py +0 -93
llama_stack_api/tools.py +0 -226
llama_stack_api/vector_io.py +0 -941
llama_stack_api/vector_stores.py +0 -53
llama_stack_api/version.py +0 -9
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/WHEEL +0 -0
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/entry_points.txt +0 -0
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/licenses/LICENSE +0 -0

llama_stack/providers/remote/vector_io/oci/oci26ai.py ADDED Viewed

@@ -0,0 +1,595 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+import heapq
+import json
+from array import array
+from typing import Any
+import numpy as np
+import oracledb
+from numpy.typing import NDArray
+from llama_stack.core.storage.kvstore import kvstore_impl
+from llama_stack.log import get_logger
+from llama_stack.providers.remote.vector_io.oci.config import OCI26aiVectorIOConfig
+from llama_stack.providers.utils.memory.openai_vector_store_mixin import VERSION as OPENAIMIXINVERSION
+from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
+from llama_stack.providers.utils.memory.vector_store import (
+    ChunkForDeletion,
+    EmbeddingIndex,
+    VectorStoreWithIndex,
+)
+from llama_stack.providers.utils.vector_io.vector_utils import (
+    WeightedInMemoryAggregator,
+    sanitize_collection_name,
+)
+from llama_stack_api import (
+    EmbeddedChunk,
+    Files,
+    Inference,
+    InterleavedContent,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoreNotFoundError,
+    VectorStoresProtocolPrivate,
+)
+from llama_stack_api.internal.kvstore import KVStore
+logger = get_logger(name=__name__, category="vector_io::oci26ai")
+VERSION = "v1"
+VECTOR_DBS_PREFIX = f"vector_stores:oci26ai:{VERSION}::"
+VECTOR_INDEX_PREFIX = f"vector_index:oci26ai:{VERSION}::"
+OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:oci26ai:{OPENAIMIXINVERSION}::"
+OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:oci26ai:{OPENAIMIXINVERSION}::"
+OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX = f"openai_vector_stores_files_contents:oci26ai:{VERSION}::"
+def normalize_embedding(embedding: np.typing.NDArray) -> np.typing.NDArray:
+    """
+    Normalize an embedding vector to unit length (L2 norm).
+    This is required for cosine similarity to behave correctly.
+    """
+    if embedding is None:
+        raise ValueError("Embedding cannot be None")
+    emb = np.asarray(embedding, dtype=np.float64)
+    norm = np.linalg.norm(emb)
+    if norm == 0.0:
+        raise ValueError("Cannot normalize zero-length vector")
+    return emb / norm
+class OCI26aiIndex(EmbeddingIndex):
+    def __init__(
+        self,
+        connection,
+        vector_store: VectorStore,
+        consistency_level="Strong",
+        kvstore: KVStore | None = None,
+        vector_datatype: str = "FLOAT32",
+    ):
+        self.connection = connection
+        self.vector_store = vector_store
+        self.table_name = sanitize_collection_name(vector_store.vector_store_id)
+        self.dimensions = vector_store.embedding_dimension
+        self.consistency_level = consistency_level
+        self.kvstore = kvstore
+        self.vector_datatype = vector_datatype
+    async def initialize(self) -> None:
+        logger.info(f"Attempting to create table: {self.table_name}")
+        cursor = self.connection.cursor()
+        try:
+            #  Create table
+            create_table_sql = f"""
+                CREATE TABLE IF NOT EXISTS {self.table_name} (
+                    chunk_id VARCHAR2(100) PRIMARY KEY,
+                    content CLOB,
+                    vector VECTOR({self.dimensions}, {self.vector_datatype}),
+                    metadata JSON,
+                    chunk_metadata JSON
+                );
+            """
+            logger.debug(f"Executing SQL: {create_table_sql}")
+            cursor.execute(create_table_sql)
+            logger.info(f"Table {self.table_name} created successfully")
+            await self.create_indexes()
+        finally:
+            cursor.close()
+    async def index_exists(self, index_name: str) -> bool:
+        cursor = self.connection.cursor()
+        try:
+            cursor.execute(
+                """
+                SELECT COUNT(*)
+                FROM USER_INDEXES
+                WHERE INDEX_NAME = :index_name
+            """,
+                index_name=index_name.upper(),
+            )
+            (count,) = cursor.fetchone()
+            return bool(count > 0)
+        finally:
+            cursor.close()
+    async def create_indexes(self):
+        indexes = [
+            {
+                "name": f"{self.table_name}_content_idx",
+                "sql": f"""
+                    CREATE INDEX {self.table_name}_CONTENT_IDX
+                    ON {self.table_name}(content)
+                    INDEXTYPE IS CTXSYS.CONTEXT
+                    PARAMETERS ('SYNC (EVERY "FREQ=SECONDLY;INTERVAL=5")');
+                """,
+            },
+            {
+                "name": f"{self.table_name}_vector_ivf_idx",
+                "sql": f"""
+                    CREATE VECTOR INDEX {self.table_name}_vector_ivf_idx
+                    ON {self.table_name}(vector)
+                    ORGANIZATION NEIGHBOR PARTITIONS
+                    DISTANCE COSINE
+                    WITH TARGET ACCURACY 95
+                """,
+            },
+        ]
+        for idx in indexes:
+            if not await self.index_exists(idx["name"]):
+                logger.info(f"Creating index: {idx['name']}")
+                cursor = self.connection.cursor()
+                try:
+                    cursor.execute(idx["sql"])
+                    logger.info(f"Index {idx['name']} created successfully")
+                finally:
+                    cursor.close()
+            else:
+                logger.info(f"Index {idx['name']} already exists, skipping")
+    async def add_chunks(self, embedded_chunks: list[EmbeddedChunk]):
+        array_type = "d" if self.vector_datatype == "FLOAT64" else "f"
+        data = []
+        for chunk in embedded_chunks:
+            chunk_step = chunk.model_dump()
+            data.append(
+                {
+                    "chunk_id": chunk.chunk_id,
+                    "content": chunk.content,
+                    "vector": array(array_type, normalize_embedding(np.array(chunk.embedding)).astype(float).tolist()),
+                    "metadata": json.dumps(chunk_step.get("metadata")),
+                    "chunk_metadata": json.dumps(chunk_step.get("chunk_metadata")),
+                }
+            )
+        cursor = self.connection.cursor()
+        try:
+            query = f"""
+                MERGE INTO {self.table_name} t
+                USING (
+                    SELECT
+                        :chunk_id       AS chunk_id,
+                        :content        AS content,
+                        :vector         AS vector,
+                        :metadata       AS metadata,
+                        :chunk_metadata AS chunk_metadata
+                    FROM dual
+                ) s
+                ON (t.chunk_id = s.chunk_id)
+                WHEN MATCHED THEN
+                UPDATE SET
+                    t.content           = s.content,
+                    t.vector            = TO_VECTOR(s.vector, {self.dimensions}, {self.vector_datatype}),
+                    t.metadata          = s.metadata,
+                    t.chunk_metadata    = s.chunk_metadata
+                WHEN NOT MATCHED THEN
+                INSERT (chunk_id, content, vector, metadata, chunk_metadata)
+                VALUES (s.chunk_id, s.content, TO_VECTOR(s.vector, {self.dimensions}, {self.vector_datatype}), s.metadata, s.chunk_metadata)
+                """
+            logger.debug(f"query: {query}")
+            cursor.executemany(
+                query,
+                data,
+            )
+            logger.info("Merge completed successfully")
+        except Exception as e:
+            logger.error(f"Error inserting chunks into Oracle 26AI table {self.table_name}: {e}")
+            raise
+        finally:
+            cursor.close()
+    async def query_vector(
+        self,
+        embedding: NDArray,
+        k: int,
+        score_threshold: float | None,
+    ) -> QueryChunksResponse:
+        """
+        Oracle vector search using COSINE similarity.
+        Returns top-k chunks and normalized similarity scores in [0, 1].
+        """
+        cursor = self.connection.cursor()
+        # Ensure query vector is L2-normalized
+        array_type = "d" if self.vector_datatype == "FLOAT64" else "f"
+        query_vector = array(array_type, normalize_embedding(np.array(embedding)))
+        query = f"""
+            SELECT
+                *
+            FROM (
+                SELECT
+                    content,
+                    chunk_id,
+                    metadata,
+                    chunk_metadata,
+                    vector,
+                    VECTOR_DISTANCE(vector, :query_vector, COSINE) AS score
+                FROM {self.table_name}
+            )
+        """
+        params: dict = {
+            "query_vector": query_vector,
+        }
+        if score_threshold is not None:
+            query += " WHERE score >= :score_threshold"
+            params["score_threshold"] = score_threshold
+        query += " ORDER BY score DESC FETCH FIRST :k ROWS ONLY"
+        params["k"] = k
+        logger.debug(query)
+        logger.debug(query_vector)
+        try:
+            cursor.execute(query, params)
+            results = cursor.fetchall()
+            chunks: list[EmbeddedChunk] = []
+            scores: list[float] = []
+            for row in results:
+                content, chunk_id, metadata, chunk_metadata, vector, score = row
+                chunk = EmbeddedChunk(
+                    content=content.read(),
+                    chunk_id=chunk_id,
+                    metadata=metadata,
+                    embedding=vector,
+                    chunk_metadata=chunk_metadata,
+                    embedding_model=self.vector_store.embedding_model,
+                    embedding_dimension=self.vector_store.embedding_dimension,
+                )
+                chunks.append(chunk)
+                scores.append(float(score))
+            logger.debug(f"result count: {len(chunks)}")
+            return QueryChunksResponse(chunks=chunks, scores=scores)
+        except Exception as e:
+            logger.error("Error querying vector: %s", e)
+            raise
+        finally:
+            cursor.close()
+    async def query_keyword(self, query_string: str, k: int, score_threshold: float | None) -> QueryChunksResponse:
+        cursor = self.connection.cursor()
+        # Build base query
+        base_query = f"""
+                SELECT
+                    content,
+                    chunk_id,
+                    metadata,
+                    chunk_metadata,
+                    vector,
+                    score / max_score AS score
+                FROM (
+                    SELECT
+                        content,
+                        chunk_id,
+                        metadata,
+                        chunk_metadata,
+                        vector,
+                        SCORE(1) AS score,
+                        MAX(SCORE(1)) OVER () AS max_score
+                    FROM {self.table_name}
+                    WHERE CONTAINS(content, :query_string, 1) > 0
+                )
+        """
+        params = {"query_string": query_string, "k": k}
+        if score_threshold is not None:
+            base_query += " WHERE score >= :score_threshold"
+            params["score_threshold"] = score_threshold
+        query = base_query + " ORDER BY score DESC FETCH FIRST :k ROWS ONLY;"
+        logger.debug(query)
+        try:
+            cursor.execute(query, params)
+            results = cursor.fetchall()
+            chunks = []
+            scores = []
+            for row in results:
+                content, chunk_id, metadata, chunk_metadata, vector, score = row
+                chunk = EmbeddedChunk(
+                    content=content.read(),
+                    chunk_id=chunk_id,
+                    metadata=metadata,
+                    embedding=vector,
+                    chunk_metadata=chunk_metadata,
+                    embedding_model=self.vector_store.embedding_model,
+                    embedding_dimension=self.vector_store.embedding_dimension,
+                )
+                chunks.append(chunk)
+                scores.append(float(score))
+            logger.debug(f"result count: {len(chunks)}")
+            return QueryChunksResponse(chunks=chunks, scores=scores)
+        except Exception as e:
+            logger.error(f"Error performing keyword search: {e}")
+            raise
+        finally:
+            cursor.close()
+    async def query_hybrid(
+        self,
+        embedding: NDArray,
+        query_string: str,
+        k: int,
+        score_threshold: float | None,
+        reranker_type: str,
+        reranker_params: dict[str, Any] | None = None,
+    ) -> QueryChunksResponse:
+        """
+        Hybrid search combining vector similarity and keyword search using configurable reranking.
+        Args:
+            embedding: The query embedding vector
+            query_string: The text query for keyword search
+            k: Number of results to return
+            score_threshold: Minimum similarity score threshold
+            reranker_type: Type of reranker to use ("rrf" or "weighted")
+            reranker_params: Parameters for the reranker
+        Returns:
+            QueryChunksResponse with combined results
+        """
+        if reranker_params is None:
+            reranker_params = {}
+        # Get results from both search methods
+        vector_response = await self.query_vector(embedding, k, score_threshold)
+        keyword_response = await self.query_keyword(query_string, k, score_threshold)
+        # Convert responses to score dictionaries using chunk_id
+        vector_scores = {
+            chunk.chunk_id: score for chunk, score in zip(vector_response.chunks, vector_response.scores, strict=False)
+        }
+        keyword_scores = {
+            chunk.chunk_id: score
+            for chunk, score in zip(keyword_response.chunks, keyword_response.scores, strict=False)
+        }
+        # Combine scores using the reranking utility
+        combined_scores = WeightedInMemoryAggregator.combine_search_results(
+            vector_scores, keyword_scores, reranker_type, reranker_params
+        )
+        # Efficient top-k selection because it only tracks the k best candidates it's seen so far
+        top_k_items = heapq.nlargest(k, combined_scores.items(), key=lambda x: x[1])
+        # Filter by score threshold
+        filtered_items = [(doc_id, score) for doc_id, score in top_k_items if score >= (score_threshold or 0)]
+        # Create a map of chunk_id to chunk for both responses
+        chunk_map = {c.chunk_id: c for c in vector_response.chunks + keyword_response.chunks}
+        # Use the map to look up chunks by their IDs
+        chunks = []
+        scores = []
+        for doc_id, score in filtered_items:
+            if doc_id in chunk_map:
+                chunks.append(chunk_map[doc_id])
+                scores.append(score)
+        return QueryChunksResponse(chunks=chunks, scores=scores)
+    async def delete(self):
+        try:
+            with self.connection.cursor() as cursor:
+                cursor.execute(f"DROP TABLE IF EXISTS {self.table_name}")
+            logger.info("Dropped table: {self.table_name}")
+        except oracledb.DatabaseError as e:
+            logger.error(f"Error dropping table {self.table_name}: {e}")
+            raise
+    async def delete_chunks(self, chunks_for_deletion: list[ChunkForDeletion]) -> None:
+        chunk_ids = [c.chunk_id for c in chunks_for_deletion]
+        cursor = self.connection.cursor()
+        try:
+            cursor.execute(
+                f"""
+                DELETE FROM {self.table_name}
+                WHERE chunk_id IN ({", ".join([f"'{chunk_id}'" for chunk_id in chunk_ids])})
+                """
+            )
+        except Exception as e:
+            logger.error(f"Error deleting chunks from Oracle 26AI table {self.table_name}: {e}")
+            raise
+        finally:
+            cursor.close()
+class OCI26aiVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
+    def __init__(
+        self,
+        config: OCI26aiVectorIOConfig,
+        inference_api: Inference,
+        files_api: Files | None,
+    ) -> None:
+        super().__init__(inference_api=inference_api, files_api=files_api, kvstore=None)
+        self.config = config
+        self.cache: dict[str, VectorStoreWithIndex] = {}
+        self.pool = None
+        self.inference_api = inference_api
+        self.vector_store_table = None
+    async def initialize(self) -> None:
+        logger.info("Initializing OCI26aiVectorIOAdapter")
+        self.kvstore = await kvstore_impl(self.config.persistence)
+        await self.initialize_openai_vector_stores()
+        try:
+            self.connection = oracledb.connect(
+                user=self.config.user,
+                password=self.config.password,
+                dsn=self.config.conn_str,
+                config_dir=self.config.tnsnames_loc,
+                wallet_location=self.config.ewallet_pem_loc,
+                wallet_password=self.config.ewallet_password,
+                expire_time=1,  # minutes
+            )
+            self.connection.autocommit = True
+            logger.info("Oracle connection created successfully")
+        except Exception as e:
+            logger.error(f"Error creating Oracle connection: {e}")
+            raise
+        # Load State
+        start_key = OPENAI_VECTOR_STORES_PREFIX
+        end_key = f"{OPENAI_VECTOR_STORES_PREFIX}\xff"
+        stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key)
+        for vector_store_data in stored_vector_stores:
+            vector_store = VectorStore.model_validate_json(vector_store_data)
+            logger.info(f"Loading index {vector_store.vector_store_name}: {vector_store.vector_store_id}")
+            oci_index = OCI26aiIndex(
+                connection=self.connection,
+                vector_store=vector_store,
+                kvstore=self.kvstore,
+                vector_datatype=self.config.vector_datatype,
+            )
+            await oci_index.initialize()
+            index = VectorStoreWithIndex(vector_store, index=oci_index, inference_api=self.inference_api)
+            self.cache[vector_store.identifier] = index
+        logger.info(f"Completed loading {len(stored_vector_stores)} indexes")
+    async def shutdown(self) -> None:
+        logger.info("Shutting down Oracle connection")
+        if self.connection is not None:
+            self.connection.close()
+        # Clean up mixin resources (file batch tasks)
+        await super().shutdown()
+    async def register_vector_store(self, vector_store: VectorStore) -> None:
+        if self.kvstore is None:
+            raise RuntimeError("KVStore not initialized. Call initialize() before registering vector stores.")
+        # # Save to kvstore for persistence
+        key = f"{OPENAI_VECTOR_STORES_PREFIX}{vector_store.identifier}"
+        await self.kvstore.set(key=key, value=vector_store.model_dump_json())
+        if isinstance(self.config, OCI26aiVectorIOConfig):
+            consistency_level = self.config.consistency_level
+        else:
+            consistency_level = "Strong"
+        oci_index = OCI26aiIndex(
+            connection=self.connection,
+            vector_store=vector_store,
+            consistency_level=consistency_level,
+            vector_datatype=self.config.vector_datatype,
+        )
+        index = VectorStoreWithIndex(
+            vector_store=vector_store,
+            index=oci_index,
+            inference_api=self.inference_api,
+        )
+        await oci_index.initialize()
+        self.cache[vector_store.identifier] = index
+    async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
+        if vector_store_id in self.cache:
+            return self.cache[vector_store_id]
+        # Try to load from kvstore
+        if self.kvstore is None:
+            raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
+        key = f"{OPENAI_VECTOR_STORES_PREFIX}{vector_store_id}"
+        vector_store_data = await self.kvstore.get(key)
+        if not vector_store_data:
+            raise VectorStoreNotFoundError(vector_store_id)
+        vector_store = VectorStore.model_validate_json(vector_store_data)
+        index = VectorStoreWithIndex(
+            vector_store=vector_store,
+            index=OCI26aiIndex(
+                connection=self.connection,
+                vector_store=vector_store,
+                kvstore=self.kvstore,
+                vector_datatype=self.config.vector_datatype,
+            ),
+            inference_api=self.inference_api,
+        )
+        self.cache[vector_store_id] = index
+        return index
+    async def unregister_vector_store(self, vector_store_id: str) -> None:
+        # Remove provider index and cache
+        if vector_store_id in self.cache:
+            await self.cache[vector_store_id].index.delete()
+            del self.cache[vector_store_id]
+        # Delete vector DB metadata from KV store
+        if self.kvstore is None:
+            raise RuntimeError("KVStore not initialized. Call initialize() before unregistering vector stores.")
+        await self.kvstore.delete(key=f"{OPENAI_VECTOR_STORES_PREFIX}{vector_store_id}")
+    async def insert_chunks(
+        self, vector_store_id: str, chunks: list[EmbeddedChunk], ttl_seconds: int | None = None
+    ) -> None:
+        index = await self._get_and_cache_vector_store_index(vector_store_id)
+        if not index:
+            raise VectorStoreNotFoundError(vector_store_id)
+        await index.insert_chunks(chunks)
+    async def query_chunks(
+        self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
+    ) -> QueryChunksResponse:
+        index = await self._get_and_cache_vector_store_index(vector_store_id)
+        if not index:
+            raise VectorStoreNotFoundError(vector_store_id)
+        if params is None:
+            params = {}
+        if "embedding_dimensions" not in params:
+            params["embedding_dimensions"] = index.vector_store.embedding_dimension
+        return await index.query_chunks(query, params)
+    async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
+        """Delete a chunk from a milvus vector store."""
+        index = await self._get_and_cache_vector_store_index(store_id)
+        if not index:
+            raise VectorStoreNotFoundError(store_id)
+        await index.index.delete_chunks(chunks_for_deletion)

llama_stack/providers/remote/vector_io/pgvector/config.py CHANGED Viewed

@@ -4,14 +4,70 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Any
+from enum import StrEnum
+from typing import Annotated, Any, Literal, Self
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, model_validator
 from llama_stack.core.storage.datatypes import KVStoreReference
 from llama_stack_api import json_schema_type
+class PGVectorIndexType(StrEnum):
+    """Supported pgvector vector index types in Llama Stack."""
+    HNSW = "HNSW"
+    IVFFlat = "IVFFlat"
+class PGVectorHNSWVectorIndex(BaseModel):
+    """Configuration for PGVector HNSW (Hierarchical Navigable Small Worlds) vector index.
+    https://github.com/pgvector/pgvector?tab=readme-ov-file#hnsw
+    """
+    type: Literal[PGVectorIndexType.HNSW] = PGVectorIndexType.HNSW
+    m: int | None = Field(
+        gt=0,
+        default=16,
+        description="PGVector's HNSW index parameter - maximum number of edges each vertex has to its neighboring vertices in the graph",
+    )
+    ef_construction: int | None = Field(
+        gt=0,
+        default=64,
+        description="PGVector's HNSW index parameter - size of the dynamic candidate list used for graph construction",
+    )
+class PGVectorIVFFlatVectorIndex(BaseModel):
+    """Configuration for PGVector IVFFlat (Inverted File with Flat Compression) vector index.
+    https://github.com/pgvector/pgvector?tab=readme-ov-file#ivfflat
+    """
+    type: Literal[PGVectorIndexType.IVFFlat] = PGVectorIndexType.IVFFlat
+    lists: int | None = Field(
+        gt=0, default=100, description="PGVector's IVFFlat index parameter - number of lists index divides vectors into"
+    )
+    probes: int | None = Field(
+        gt=0,
+        default=10,
+        description="PGVector's IVFFlat index parameter - number of lists index searches through during ANN search",
+    )
+    @model_validator(mode="after")
+    def validate_probes(self) -> Self:
+        if self.probes >= self.lists:
+            raise ValueError(
+                "probes parameter for PGVector IVFFlat index can't be greater than or equal to the number of lists in the index to allow ANN search."
+            )
+        return self
+PGVectorIndexConfig = Annotated[
+    PGVectorHNSWVectorIndex | PGVectorIVFFlatVectorIndex,
+    Field(discriminator="type"),
+]
 @json_schema_type
 class PGVectorVectorIOConfig(BaseModel):
     host: str | None = Field(default="localhost")
@@ -19,6 +75,13 @@ class PGVectorVectorIOConfig(BaseModel):
     db: str | None = Field(default="postgres")
     user: str | None = Field(default="postgres")
     password: str | None = Field(default="mysecretpassword")
+    distance_metric: Literal["COSINE", "L2", "L1", "INNER_PRODUCT"] | None = Field(
+        default="COSINE", description="PGVector distance metric used for vector search in PGVectorIndex"
+    )
+    vector_index: PGVectorIndexConfig | None = Field(
+        default_factory=PGVectorHNSWVectorIndex,
+        description="PGVector vector index used for Approximate Nearest Neighbor (ANN) search",
+    )
     persistence: KVStoreReference | None = Field(
         description="Config for KV store backend (SQLite only for now)", default=None
     )
@@ -40,6 +103,10 @@ class PGVectorVectorIOConfig(BaseModel):
             "db": db,
             "user": user,
             "password": password,
+            "distance_metric": "COSINE",
+            "vector_index": PGVectorHNSWVectorIndex(m=16, ef_construction=64).model_dump(
+                mode="json", exclude_none=True
+            ),
             "persistence": KVStoreReference(
                 backend="kv_default",
                 namespace="vector_io::pgvector",

llama-stack 0.4.3__py3-none-any.whl → 0.5.0__py3-none-any.whl

llama-stack 0.4.3py3-none-any.whl → 0.5.0py3-none-any.whl