PyPI - gllm-datastore-binary - Versions diffs - 0.5.50__cp312-cp312-macosx_13_0_arm64.whl - Mend

gllm-datastore-binary 0.5.50__cp312-cp312-macosx_13_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (137) hide show

gllm_datastore/__init__.pyi +0 -0
gllm_datastore/cache/__init__.pyi +4 -0
gllm_datastore/cache/base.pyi +84 -0
gllm_datastore/cache/cache.pyi +137 -0
gllm_datastore/cache/hybrid_cache/__init__.pyi +5 -0
gllm_datastore/cache/hybrid_cache/file_system_hybrid_cache.pyi +50 -0
gllm_datastore/cache/hybrid_cache/hybrid_cache.pyi +115 -0
gllm_datastore/cache/hybrid_cache/in_memory_hybrid_cache.pyi +29 -0
gllm_datastore/cache/hybrid_cache/key_matcher/__init__.pyi +5 -0
gllm_datastore/cache/hybrid_cache/key_matcher/exact_key_matcher.pyi +44 -0
gllm_datastore/cache/hybrid_cache/key_matcher/fuzzy_key_matcher.pyi +70 -0
gllm_datastore/cache/hybrid_cache/key_matcher/key_matcher.pyi +60 -0
gllm_datastore/cache/hybrid_cache/key_matcher/semantic_key_matcher.pyi +93 -0
gllm_datastore/cache/hybrid_cache/redis_hybrid_cache.pyi +34 -0
gllm_datastore/cache/hybrid_cache/utils.pyi +36 -0
gllm_datastore/cache/utils.pyi +34 -0
gllm_datastore/cache/vector_cache/__init__.pyi +0 -0
gllm_datastore/cache/vector_cache/eviction_manager/__init__.pyi +0 -0
gllm_datastore/cache/vector_cache/eviction_manager/asyncio_eviction_manager.pyi +48 -0
gllm_datastore/cache/vector_cache/eviction_manager/eviction_manager.pyi +38 -0
gllm_datastore/cache/vector_cache/eviction_strategy/__init__.pyi +0 -0
gllm_datastore/cache/vector_cache/eviction_strategy/eviction_strategy.pyi +34 -0
gllm_datastore/cache/vector_cache/eviction_strategy/ttl_eviction_strategy.pyi +34 -0
gllm_datastore/cache/vector_cache/vector_cache.pyi +99 -0
gllm_datastore/constants.pyi +66 -0
gllm_datastore/core/__init__.pyi +7 -0
gllm_datastore/core/capabilities/__init__.pyi +7 -0
gllm_datastore/core/capabilities/encryption_capability.pyi +21 -0
gllm_datastore/core/capabilities/fulltext_capability.pyi +73 -0
gllm_datastore/core/capabilities/graph_capability.pyi +70 -0
gllm_datastore/core/capabilities/hybrid_capability.pyi +184 -0
gllm_datastore/core/capabilities/vector_capability.pyi +90 -0
gllm_datastore/core/filters/__init__.pyi +4 -0
gllm_datastore/core/filters/filter.pyi +340 -0
gllm_datastore/core/filters/schema.pyi +149 -0
gllm_datastore/data_store/__init__.pyi +8 -0
gllm_datastore/data_store/_elastic_core/__init__.pyi +0 -0
gllm_datastore/data_store/_elastic_core/client_factory.pyi +66 -0
gllm_datastore/data_store/_elastic_core/constants.pyi +27 -0
gllm_datastore/data_store/_elastic_core/elastic_like_core.pyi +115 -0
gllm_datastore/data_store/_elastic_core/index_manager.pyi +37 -0
gllm_datastore/data_store/_elastic_core/query_translator.pyi +89 -0
gllm_datastore/data_store/base.pyi +176 -0
gllm_datastore/data_store/chroma/__init__.pyi +4 -0
gllm_datastore/data_store/chroma/_chroma_import.pyi +13 -0
gllm_datastore/data_store/chroma/data_store.pyi +201 -0
gllm_datastore/data_store/chroma/fulltext.pyi +134 -0
gllm_datastore/data_store/chroma/query.pyi +266 -0
gllm_datastore/data_store/chroma/query_translator.pyi +41 -0
gllm_datastore/data_store/chroma/vector.pyi +197 -0
gllm_datastore/data_store/elasticsearch/__init__.pyi +5 -0
gllm_datastore/data_store/elasticsearch/data_store.pyi +147 -0
gllm_datastore/data_store/elasticsearch/fulltext.pyi +238 -0
gllm_datastore/data_store/elasticsearch/query.pyi +118 -0
gllm_datastore/data_store/elasticsearch/query_translator.pyi +18 -0
gllm_datastore/data_store/elasticsearch/vector.pyi +180 -0
gllm_datastore/data_store/exceptions.pyi +35 -0
gllm_datastore/data_store/in_memory/__init__.pyi +5 -0
gllm_datastore/data_store/in_memory/data_store.pyi +71 -0
gllm_datastore/data_store/in_memory/fulltext.pyi +131 -0
gllm_datastore/data_store/in_memory/query.pyi +175 -0
gllm_datastore/data_store/in_memory/vector.pyi +174 -0
gllm_datastore/data_store/opensearch/__init__.pyi +5 -0
gllm_datastore/data_store/opensearch/data_store.pyi +160 -0
gllm_datastore/data_store/opensearch/fulltext.pyi +240 -0
gllm_datastore/data_store/opensearch/query.pyi +89 -0
gllm_datastore/data_store/opensearch/query_translator.pyi +18 -0
gllm_datastore/data_store/opensearch/vector.pyi +211 -0
gllm_datastore/data_store/redis/__init__.pyi +5 -0
gllm_datastore/data_store/redis/data_store.pyi +153 -0
gllm_datastore/data_store/redis/fulltext.pyi +128 -0
gllm_datastore/data_store/redis/query.pyi +428 -0
gllm_datastore/data_store/redis/query_translator.pyi +37 -0
gllm_datastore/data_store/redis/vector.pyi +131 -0
gllm_datastore/data_store/sql/__init__.pyi +4 -0
gllm_datastore/data_store/sql/constants.pyi +5 -0
gllm_datastore/data_store/sql/data_store.pyi +201 -0
gllm_datastore/data_store/sql/fulltext.pyi +164 -0
gllm_datastore/data_store/sql/query.pyi +81 -0
gllm_datastore/data_store/sql/query_translator.pyi +51 -0
gllm_datastore/data_store/sql/schema.pyi +16 -0
gllm_datastore/encryptor/__init__.pyi +4 -0
gllm_datastore/encryptor/aes_gcm_encryptor.pyi +45 -0
gllm_datastore/encryptor/capability/__init__.pyi +3 -0
gllm_datastore/encryptor/capability/mixin.pyi +32 -0
gllm_datastore/encryptor/encryptor.pyi +52 -0
gllm_datastore/encryptor/key_ring/__init__.pyi +3 -0
gllm_datastore/encryptor/key_ring/in_memory_key_ring.pyi +52 -0
gllm_datastore/encryptor/key_ring/key_ring.pyi +45 -0
gllm_datastore/encryptor/key_rotating_encryptor.pyi +60 -0
gllm_datastore/graph_data_store/__init__.pyi +6 -0
gllm_datastore/graph_data_store/graph_data_store.pyi +151 -0
gllm_datastore/graph_data_store/graph_rag_data_store.pyi +29 -0
gllm_datastore/graph_data_store/light_rag_data_store.pyi +93 -0
gllm_datastore/graph_data_store/light_rag_postgres_data_store.pyi +96 -0
gllm_datastore/graph_data_store/llama_index_graph_rag_data_store.pyi +49 -0
gllm_datastore/graph_data_store/llama_index_neo4j_graph_rag_data_store.pyi +78 -0
gllm_datastore/graph_data_store/mixins/__init__.pyi +3 -0
gllm_datastore/graph_data_store/mixins/agentic_graph_tools_mixin.pyi +175 -0
gllm_datastore/graph_data_store/nebula_graph_data_store.pyi +206 -0
gllm_datastore/graph_data_store/neo4j_graph_data_store.pyi +182 -0
gllm_datastore/graph_data_store/schema.pyi +27 -0
gllm_datastore/graph_data_store/utils/__init__.pyi +6 -0
gllm_datastore/graph_data_store/utils/constants.pyi +21 -0
gllm_datastore/graph_data_store/utils/light_rag_em_invoker_adapter.pyi +56 -0
gllm_datastore/graph_data_store/utils/light_rag_lm_invoker_adapter.pyi +43 -0
gllm_datastore/graph_data_store/utils/llama_index_em_invoker_adapter.pyi +45 -0
gllm_datastore/graph_data_store/utils/llama_index_lm_invoker_adapter.pyi +169 -0
gllm_datastore/signature/__init__.pyi +0 -0
gllm_datastore/signature/webhook_signature.pyi +31 -0
gllm_datastore/sql_data_store/__init__.pyi +4 -0
gllm_datastore/sql_data_store/adapter/__init__.pyi +0 -0
gllm_datastore/sql_data_store/adapter/sqlalchemy_adapter.pyi +38 -0
gllm_datastore/sql_data_store/constants.pyi +6 -0
gllm_datastore/sql_data_store/sql_data_store.pyi +86 -0
gllm_datastore/sql_data_store/sqlalchemy_sql_data_store.pyi +216 -0
gllm_datastore/sql_data_store/types.pyi +31 -0
gllm_datastore/utils/__init__.pyi +6 -0
gllm_datastore/utils/converter.pyi +51 -0
gllm_datastore/utils/dict.pyi +21 -0
gllm_datastore/utils/ttl.pyi +25 -0
gllm_datastore/utils/types.pyi +32 -0
gllm_datastore/vector_data_store/__init__.pyi +6 -0
gllm_datastore/vector_data_store/chroma_vector_data_store.pyi +259 -0
gllm_datastore/vector_data_store/elasticsearch_vector_data_store.pyi +357 -0
gllm_datastore/vector_data_store/in_memory_vector_data_store.pyi +179 -0
gllm_datastore/vector_data_store/mixin/__init__.pyi +0 -0
gllm_datastore/vector_data_store/mixin/cache_compatible_mixin.pyi +145 -0
gllm_datastore/vector_data_store/redis_vector_data_store.pyi +191 -0
gllm_datastore/vector_data_store/vector_data_store.pyi +146 -0
gllm_datastore.build/.gitignore +1 -0
gllm_datastore.cpython-312-darwin.so +0 -0
gllm_datastore.pyi +178 -0
gllm_datastore_binary-0.5.50.dist-info/METADATA +185 -0
gllm_datastore_binary-0.5.50.dist-info/RECORD +137 -0
gllm_datastore_binary-0.5.50.dist-info/WHEEL +5 -0
gllm_datastore_binary-0.5.50.dist-info/top_level.txt +1 -0

gllm_datastore/data_store/opensearch/fulltext.pyi ADDED Viewed

@@ -0,0 +1,240 @@
+from _typeshed import Incomplete
+from enum import StrEnum
+from gllm_core.schema import Chunk
+from gllm_datastore.constants import METADATA_KEYS as METADATA_KEYS
+from gllm_datastore.core.filters.schema import FilterClause as FilterClause, QueryFilter as QueryFilter, QueryOptions as QueryOptions
+from gllm_datastore.data_store._elastic_core.elastic_like_core import ElasticLikeCore as ElasticLikeCore
+from gllm_datastore.data_store._elastic_core.query_translator import convert_filter_clause as convert_filter_clause
+from gllm_datastore.data_store.opensearch.query import apply_filter_query_to_search as apply_filter_query_to_search, create_search_with_filters as create_search_with_filters, delete_by_id as delete_by_id, delete_by_query as delete_by_query, safe_execute as safe_execute, update_by_query as update_by_query, validate_query_length as validate_query_length
+from gllm_datastore.data_store.opensearch.query_translator import OpenSearchQueryTranslator as OpenSearchQueryTranslator
+from opensearchpy import AsyncOpenSearch
+from typing import Any, Literal, overload
+class SupportedQueryMethods(StrEnum):
+    """Supported query methods for OpenSearch fulltext capability."""
+    AUTOCOMPLETE: str
+    AUTOSUGGEST: str
+    BM25: str
+    BY_FIELD: str
+    SHINGLES: str
+QUERY_REQUIRED_STRATEGIES: Incomplete
+class OpenSearchFulltextCapability:
+    """OpenSearch implementation of FulltextCapability protocol.
+    This class provides document CRUD operations and flexible querying using OpenSearch.
+    Attributes:
+        index_name (str): The name of the OpenSearch index.
+        client (AsyncOpenSearch): AsyncOpenSearch client.
+        query_field (str): The field name to use for text content.
+    """
+    index_name: Incomplete
+    client: Incomplete
+    query_field: Incomplete
+    def __init__(self, index_name: str, client: AsyncOpenSearch, query_field: str = 'text') -> None:
+        '''Initialize the OpenSearch fulltext capability.
+        Args:
+            index_name (str): The name of the OpenSearch index.
+            client (AsyncOpenSearch): The OpenSearch client.
+            query_field (str, optional): The field name to use for text content. Defaults to "text".
+        '''
+    async def get_size(self) -> int:
+        """Returns the total number of documents in the index.
+        Returns:
+            int: The total number of documents.
+        """
+    async def create(self, data: Chunk | list[Chunk], **kwargs: Any) -> None:
+        """Create new records in the datastore.
+        Args:
+            data (Chunk | list[Chunk]): Data to create (single item or collection).
+            **kwargs: Backend-specific parameters forwarded to OpenSearch bulk API.
+        Raises:
+            ValueError: If data structure is invalid.
+        """
+    @overload
+    async def retrieve(self, strategy: Literal[SupportedQueryMethods.BY_FIELD] | None = ..., query: str | None = None, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None, **kwargs: Any) -> list[Chunk]: ...
+    @overload
+    async def retrieve(self, strategy: Literal[SupportedQueryMethods.BM25], query: str, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None, k1: float | None = None, b: float | None = None, **kwargs: Any) -> list[Chunk]: ...
+    @overload
+    async def retrieve(self, strategy: Literal[SupportedQueryMethods.AUTOCOMPLETE], query: str, field: str, size: int = 20, fuzzy_tolerance: int = 1, min_prefix_length: int = 3, filter_query: dict[str, Any] | None = None, **kwargs: Any) -> list[str]: ...
+    @overload
+    async def retrieve(self, strategy: Literal[SupportedQueryMethods.AUTOSUGGEST], query: str, search_fields: list[str], autocomplete_field: str, size: int = 20, min_length: int = 3, filter_query: dict[str, Any] | None = None, **kwargs: Any) -> list[str]: ...
+    @overload
+    async def retrieve(self, strategy: Literal[SupportedQueryMethods.SHINGLES], query: str, field: str, size: int = 20, min_length: int = 3, max_length: int = 30, filter_query: dict[str, Any] | None = None, **kwargs: Any) -> list[str]: ...
+    async def retrieve_by_field(self, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None) -> list[Chunk]:
+        """Retrieve records from the datastore based on metadata field filtering.
+        This method filters and returns stored chunks based on metadata values
+        rather than text content. It is particularly useful for structured lookups,
+        such as retrieving all chunks from a certain source, tagged with a specific label,
+        or authored by a particular user.
+        Args:
+            filters (FilterClause | QueryFilter | None, optional): Query filters to apply.
+                FilterClause objects are automatically converted to QueryFilter internally.
+                Defaults to None.
+            options (QueryOptions | None, optional): Query options (sorting, pagination, etc.).
+                Defaults to None.
+        Returns:
+            list[Chunk]: The filtered results as Chunk objects.
+        """
+    async def retrieve_bm25(self, query: str, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None, k1: float | None = None, b: float | None = None) -> list[Chunk]:
+        '''Queries the OpenSearch data store using BM25 algorithm for keyword-based search.
+        Args:
+            query (str): The query string.
+            filters (FilterClause | QueryFilter | None, optional): Optional metadata filter to apply to the search.
+                FilterClause objects are automatically converted to QueryFilter internally.
+                Use filter builder functions like `F.eq()`, `F.and_()`, etc. Defaults to None.
+            options (QueryOptions | None, optional): Query options including fields, limit, order_by, etc.
+                For example, `QueryOptions(include_fields=["title", "content"], limit=10,
+                order_by="score", order_desc=True)`.
+                If include_fields is None, defaults to [query_field]. For multiple fields,
+                uses multi_match query. Defaults to None.
+            k1 (float | None, optional): BM25 parameter controlling term frequency saturation.
+                Higher values mean term frequency has more impact before diminishing returns.
+                Typical values: 1.2-2.0. If None, uses OpenSearch default (~1.2). Defaults to None.
+            b (float | None, optional): BM25 parameter controlling document length normalization.
+                0.0 = no length normalization, 1.0 = full normalization.
+                Typical values: 0.75. If None, uses OpenSearch default (~0.75). Defaults to None.
+        Examples:
+            ```python
+            from gllm_datastore.core.filters import filter as F
+            # Basic BM25 query on the \'text\' field
+            results = await data_store.retrieve_bm25("machine learning")
+            # BM25 query on specific fields with query options
+            results = await data_store.retrieve_bm25(
+                "natural language",
+                options=QueryOptions(include_fields=["title", "abstract"], limit=5)
+            )
+            # BM25 query with direct FilterClause
+            results = await data_store.retrieve_bm25(
+                "deep learning",
+                filters=F.eq("metadata.category", "AI")
+            )
+            # BM25 query with multiple filters
+            results = await data_store.retrieve_bm25(
+                "deep learning",
+                filters=F.and_(F.eq("metadata.category", "AI"), F.eq("metadata.status", "published"))
+            )
+            # BM25 query with custom BM25 parameters for more aggressive term frequency weighting
+            results = await data_store.retrieve_bm25(
+                "artificial intelligence",
+                k1=2.0,
+                b=0.5
+            )
+            # BM25 query with fields, filters, and options
+            results = await data_store.retrieve_bm25(
+                "data science applications",
+                filters=F.and_(
+                    F.eq("metadata.author_id", "user123"),
+                    F.in_("metadata.publication_year", [2022, 2023])
+                ),
+                options=QueryOptions(include_fields=["content", "tags"], limit=10, order_by="score", order_desc=True),
+                k1=1.5,
+                b=0.9
+            )
+            ```
+        Returns:
+            list[Chunk]: A list of Chunk objects representing the retrieved documents.
+        '''
+    async def retrieve_autocomplete(self, query: str, field: str, size: int = 20, fuzzy_tolerance: int = 1, min_prefix_length: int = 3, filter_query: dict[str, Any] | None = None) -> list[str]:
+        """Provides suggestions based on a prefix query for a specific field.
+        Args:
+            query (str): The query string.
+            field (str): The field name for autocomplete.
+            size (int, optional): The number of suggestions to retrieve. Defaults to 20.
+            fuzzy_tolerance (int, optional): The level of fuzziness for suggestions. Defaults to 1.
+            min_prefix_length (int, optional): The minimum prefix length to trigger fuzzy matching. Defaults to 3.
+            filter_query (dict[str, Any] | None, optional): The filter query. Defaults to None.
+        Returns:
+            list[str]: A list of suggestions.
+        """
+    async def retrieve_autosuggest(self, query: str, search_fields: list[str], autocomplete_field: str, size: int = 20, min_length: int = 3, filters: QueryFilter | None = None) -> list[str]:
+        """Generates suggestions across multiple fields using a multi_match query to broaden the search criteria.
+        Args:
+            query (str): The query string.
+            search_fields (list[str]): The fields to search for.
+            autocomplete_field (str): The field name for autocomplete.
+            size (int, optional): The number of suggestions to retrieve. Defaults to 20.
+            min_length (int, optional): The minimum length of the query. Defaults to 3.
+            filters (QueryFilter | None, optional): The filter query. Defaults to None.
+        Returns:
+            list[str]: A list of suggestions.
+        """
+    async def retrieve_shingles(self, query: str, field: str, size: int = 20, min_length: int = 3, max_length: int = 30, filters: QueryFilter | None = None) -> list[str]:
+        """Searches using shingles for prefix and fuzzy matching.
+        Args:
+            query (str): The query string.
+            field (str): The field name for autocomplete.
+            size (int, optional): The number of suggestions to retrieve. Defaults to 20.
+            min_length (int, optional): The minimum length of the query.
+                Queries shorter than this limit will return an empty list. Defaults to 3.
+            max_length (int, optional): The maximum length of the query.
+                Queries exceeding this limit will return an empty list. Defaults to 30.
+            filters (QueryFilter | None, optional): The filter query. Defaults to None.
+        Returns:
+            list[str]: A list of suggestions.
+        """
+    async def retrieve_fuzzy(self, query: str, max_distance: int = 2, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None) -> list[Chunk]:
+        """Find records that fuzzy match the query within distance threshold.
+        Args:
+            query (str): Text to fuzzy match against.
+            max_distance (int): Maximum edit distance for matches. Defaults to 2.
+            filters (FilterClause | QueryFilter | None, optional): Optional metadata filters to apply.
+                FilterClause objects are automatically converted to QueryFilter internally.
+                Defaults to None.
+            options (QueryOptions | None, optional): Query options (limit, sorting, etc.). Defaults to None.
+        Returns:
+            list[Chunk]: Matched chunks.
+        """
+    async def update(self, update_values: dict[str, Any], filters: FilterClause | QueryFilter | None = None) -> None:
+        """Update existing records in the datastore.
+        Args:
+            update_values (dict[str, Any]): Values to update.
+            filters (FilterClause | QueryFilter | None, optional): Filters to select records to update.
+                FilterClause objects are automatically converted to QueryFilter internally.
+                Defaults to None.
+        """
+    async def delete(self, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None) -> None:
+        """Delete records from the data store using filters and optional options.
+        Args:
+            filters (FilterClause | QueryFilter | None, optional): Filters to select records for deletion.
+                FilterClause objects are automatically converted to QueryFilter internally.
+                Defaults to None.
+            options (QueryOptions | None, optional): Query options supporting limit and sorting
+                for eviction-like operations. Defaults to None.
+        """
+    async def delete_by_id(self, id_: str | list[str]) -> None:
+        """Deletes records from the data store based on IDs.
+        Args:
+            id_ (str | list[str]): ID or list of IDs to delete.
+        """
+    async def clear(self) -> None:
+        """Clear all records from the datastore."""

gllm_datastore/data_store/opensearch/query.pyi ADDED Viewed

@@ -0,0 +1,89 @@
+import logging
+from _typeshed import Incomplete
+from gllm_datastore.core.filters.schema import QueryFilter as QueryFilter
+from gllm_datastore.data_store.opensearch.query_translator import OpenSearchQueryTranslator as OpenSearchQueryTranslator
+from gllm_datastore.utils import flatten_dict as flatten_dict
+from opensearchpy import AsyncOpenSearch
+from opensearchpy._async.helpers.search import AsyncSearch
+from opensearchpy.helpers.query import Query
+from typing import Any
+VALID_FIELD_PATH: Incomplete
+async def update_by_query(client: AsyncOpenSearch, index_name: str, update_values: dict[str, Any], filters: QueryFilter | None = None, logger: logging.Logger | None = None) -> None:
+    '''Update records in OpenSearch using UpdateByQuery with retry logic for version conflicts.
+    This function builds a painless script that safely assigns each updated field.
+    When a field path contains dots (e.g. "metadata.cache_value"), we must
+    access the corresponding param using bracket syntax: params[\'metadata.cache_value\']
+    to avoid Painless treating it as nested object access (which would be None).
+    Args:
+        client (AsyncOpenSearch): OpenSearch client instance.
+        index_name (str): The name of the OpenSearch index.
+        update_values (dict[str, Any]): Values to update.
+        filters (QueryFilter | None, optional): QueryFilter to select records to update.
+            Defaults to None.
+        logger (logging.Logger | None, optional): Logger instance. Defaults to None.
+    '''
+async def delete_by_query(client: AsyncOpenSearch, index_name: str, filters: QueryFilter | None = None) -> None:
+    """Delete records from OpenSearch using delete_by_query.
+    Args:
+        client (AsyncOpenSearch): OpenSearch client instance.
+        index_name (str): The name of the OpenSearch index.
+        filters (QueryFilter | None, optional): QueryFilter to select records for deletion.
+            Defaults to None, in which case no operation will be performed.
+    """
+async def delete_by_id(client: AsyncOpenSearch, index_name: str, ids: str | list[str]) -> None:
+    """Delete records from OpenSearch by IDs using Search.delete().
+    Args:
+        client (AsyncOpenSearch): OpenSearch client instance.
+        index_name (str): The name of the OpenSearch index.
+        ids (str | list[str]): ID or list of IDs to delete.
+    """
+def validate_query_length(query: str, min_length: int = 0, max_length: int | None = None) -> bool:
+    """Validate query length against minimum and maximum constraints.
+    Args:
+        query (str): The query string to validate.
+        min_length (int, optional): Minimum required length. Defaults to 0.
+        max_length (int | None, optional): Maximum allowed length. Defaults to None.
+    Returns:
+        bool: True if query is valid, False otherwise.
+    """
+def create_search_with_filters(client: AsyncOpenSearch, index_name: str, filters: QueryFilter | None = None, exclude_fields: list[str] | None = None) -> AsyncSearch:
+    """Create an AsyncSearch object with optional filters and field exclusions.
+    Args:
+        client (AsyncOpenSearch): OpenSearch client instance.
+        index_name (str): The name of the OpenSearch index.
+        filters (QueryFilter | None, optional): QueryFilter to apply. Defaults to None.
+        exclude_fields (list[str] | None, optional): Fields to exclude from source. Defaults to None.
+    Returns:
+        AsyncSearch: Configured AsyncSearch object.
+    """
+def apply_filter_query_to_search(search: AsyncSearch, main_query: Query, filters: QueryFilter | None = None) -> AsyncSearch:
+    """Apply filter query to a search with a main query.
+    Args:
+        search (AsyncSearch): OpenSearch search object.
+        main_query (Query): The main query to apply.
+        filters (QueryFilter | None, optional): Query filters to apply. Defaults to None.
+    Returns:
+        AsyncSearch: Search object with applied queries.
+    """
+async def safe_execute(search: AsyncSearch, logger: logging.Logger | None = None) -> Any | None:
+    """Execute an OpenSearch DSL search with unified error handling.
+    Args:
+        search (AsyncSearch): OpenSearch DSL AsyncSearch object.
+        logger (logging.Logger | None, optional): Logger instance for error messages. Defaults to None.
+    Returns:
+        Response | None: The OpenSearch response on success, otherwise None.
+    """

gllm_datastore/data_store/opensearch/query_translator.pyi ADDED Viewed

@@ -0,0 +1,18 @@
+from gllm_datastore.data_store._elastic_core.query_translator import ElasticLikeQueryTranslator as ElasticLikeQueryTranslator
+class OpenSearchQueryTranslator(ElasticLikeQueryTranslator):
+    """Translates QueryFilter and FilterClause objects to OpenSearch Query DSL.
+    This class extends ElasticLikeQueryTranslator and implements abstract methods
+    using OpenSearch DSL API (Q function). It also provides QueryOptions handling
+    methods specific to OpenSearch.
+    Attributes:
+        _logger (Logger): Logger instance for error messages and debugging.
+    """
+    def __init__(self) -> None:
+        """Initialize the OpenSearch query translator.
+        Raises:
+            ImportError: If opensearchpy package is not installed.
+        """

gllm_datastore/data_store/opensearch/vector.pyi ADDED Viewed

@@ -0,0 +1,211 @@
+from _typeshed import Incomplete
+from gllm_core.schema import Chunk
+from gllm_datastore.constants import DEFAULT_FETCH_K as DEFAULT_FETCH_K, DEFAULT_TOP_K as DEFAULT_TOP_K
+from gllm_datastore.core.filters.schema import FilterClause as FilterClause, QueryFilter as QueryFilter, QueryOptions as QueryOptions
+from gllm_datastore.data_store._elastic_core.elastic_like_core import ElasticLikeCore as ElasticLikeCore
+from gllm_datastore.data_store._elastic_core.query_translator import convert_filter_clause as convert_filter_clause
+from gllm_datastore.data_store.opensearch.query import delete_by_id as delete_by_id, delete_by_query as delete_by_query, update_by_query as update_by_query
+from gllm_datastore.data_store.opensearch.query_translator import OpenSearchQueryTranslator as OpenSearchQueryTranslator
+from gllm_datastore.utils.converter import from_langchain as from_langchain, to_langchain as to_langchain
+from gllm_inference.em_invoker.em_invoker import BaseEMInvoker
+from gllm_inference.schema import Vector
+from opensearchpy import AsyncOpenSearch
+from typing import Any
+class OpenSearchVectorCapability:
+    """OpenSearch implementation of VectorCapability protocol.
+    This class provides document CRUD operations and vector search using OpenSearch.
+    Uses LangChain's OpenSearchVectorSearch for create and retrieve operations,
+    and direct OpenSearch client for update and delete operations.
+    Attributes:
+        index_name (str): The name of the OpenSearch index.
+        vector_store (OpenSearchVectorSearch): The vector store instance.
+        client (AsyncOpenSearch): AsyncOpenSearch client for direct operations.
+        em_invoker (BaseEMInvoker): The embedding model to perform vectorization.
+    """
+    index_name: Incomplete
+    client: Incomplete
+    query_field: Incomplete
+    vector_query_field: Incomplete
+    vector_store: Incomplete
+    def __init__(self, index_name: str, em_invoker: BaseEMInvoker, client: AsyncOpenSearch, opensearch_url: str | None = None, query_field: str = 'text', vector_query_field: str = 'vector', retrieval_strategy: Any = None, distance_strategy: str | None = None, connection_params: dict[str, Any] | None = None) -> None:
+        '''Initialize the OpenSearch vector capability.
+        OpenSearchVectorSearch creates its own sync and async clients internally
+        based on the provided connection parameters. The async client is used
+        for operations like update, delete, and clear.
+        Args:
+            index_name (str): The name of the OpenSearch index.
+            em_invoker (BaseEMInvoker): The embedding model to perform vectorization.
+            client (AsyncOpenSearch): The OpenSearch client for direct operations.
+            opensearch_url (str | None, optional): The URL of the OpenSearch server.
+                Used for LangChain\'s OpenSearchVectorSearch initialization.
+                If None, will be extracted from client connection info. Defaults to None.
+            query_field (str, optional): The field name for text queries. Defaults to "text".
+            vector_query_field (str, optional): The field name for vector queries. Defaults to "vector".
+            retrieval_strategy: Not used with OpenSearchVectorSearch (kept for API compatibility).
+            distance_strategy (str | None, optional): The distance strategy for retrieval.
+                For example, "l2" for Euclidean distance, "l2squared" for squared Euclidean distance,
+                "cosine" for cosine similarity, etc. Defaults to None.
+            connection_params (dict[str, Any] | None, optional): Additional connection parameters
+                to override defaults. These will be merged with automatically detected parameters
+                (authentication, SSL settings). User-provided params take precedence. Defaults to None.
+                Available parameters include:
+                1. http_auth (tuple[str, str] | None): HTTP authentication tuple (username, password).
+                2. use_ssl (bool): Whether to use SSL/TLS. Defaults to True for HTTPS URLs.
+                3. verify_certs (bool): Whether to verify SSL certificates. Defaults to True for HTTPS URLs.
+                4. ssl_show_warn (bool): Whether to show SSL warnings. Defaults to True for HTTPS URLs.
+                5. ssl_assert_hostname (str | None): SSL hostname assertion. Defaults to None.
+                6. max_retries (int): Maximum number of retries for requests. Defaults to 3.
+                7. retry_on_timeout (bool): Whether to retry on timeouts. Defaults to True.
+                8. client_cert (str | None): Path to the client certificate file. Defaults to None.
+                9. client_key (str | None): Path to the client private key file. Defaults to None.
+                10. root_cert (str | None): Path to the root certificate file. Defaults to None.
+                11. Additional kwargs: Any other parameters accepted by OpenSearch client constructor.
+        '''
+    @property
+    def em_invoker(self) -> BaseEMInvoker:
+        """Returns the EM Invoker instance.
+        Returns:
+            BaseEMInvoker: The EM Invoker instance.
+        """
+    async def ensure_index(self, mapping: dict[str, Any] | None = None, index_settings: dict[str, Any] | None = None, dimension: int | None = None, distance_strategy: str | None = None) -> None:
+        '''Ensure OpenSearch index exists, creating it if necessary.
+        This method is idempotent - if the index already exists, it will skip creation
+        and return early.
+        Args:
+            mapping (dict[str, Any] | None, optional): Custom mapping dictionary to use
+                for index creation. If provided, this mapping will be used directly.
+                The mapping should follow OpenSearch mapping format. Defaults to None,
+                in which default mapping will be used.
+            index_settings (dict[str, Any] | None, optional): Custom index settings.
+                These settings will be merged with any default settings. Defaults to None.
+            dimension (int | None, optional): Vector dimension. If not provided and mapping
+                is not provided, will be inferred from em_invoker by generating a test embedding.
+            distance_strategy (str | None, optional): Distance strategy for vector similarity.
+                Supported values: "l2", "l2squared", "cosine", "innerproduct", etc.
+                Only used when building default mapping. Defaults to "l2" if not specified.
+        Raises:
+            ValueError: If mapping is invalid or required parameters are missing.
+            RuntimeError: If index creation fails.
+        '''
+    async def create(self, data: Chunk | list[Chunk], **kwargs: Any) -> None:
+        """Create new records in the datastore.
+        Args:
+            data (Chunk | list[Chunk]): Data to create (single item or collection).
+            **kwargs: Datastore-specific parameters.
+        Raises:
+            ValueError: If data structure is invalid.
+        """
+    async def create_from_vector(self, chunk_vectors: list[tuple[Chunk, Vector]], **kwargs: Any) -> list[str]:
+        """Add pre-computed embeddings directly.
+        Args:
+            chunk_vectors (list[tuple[Chunk, Vector]]): List of tuples containing chunks and their
+                corresponding vectors.
+            **kwargs: Datastore-specific parameters.
+        Returns:
+            list[str]: List of IDs of the added documents.
+        """
+    async def retrieve(self, query: str, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None, **kwargs: Any) -> list[Chunk]:
+        '''Semantic search using text query converted to vector.
+        Examples:
+            ```python
+            from gllm_datastore.core.filters import filter as F
+            # Direct FilterClause usage
+            await vector_capability.retrieve(
+                query="What is the capital of France?",
+                filters=F.eq("metadata.category", "tech"),
+                options=QueryOptions(limit=10),
+            )
+            # Multiple filters
+            filters = F.and_(F.eq("metadata.source", "wikipedia"), F.eq("metadata.category", "tech"))
+            await vector_capability.retrieve(query="What is the capital of France?", filters=filters)
+            ```
+        Args:
+            query (str): Text query to embed and search for.
+            filters (FilterClause | QueryFilter | None, optional): Filters to apply to the search.
+                FilterClause objects are automatically converted to QueryFilter internally.
+                Defaults to None.
+            options (QueryOptions | None, optional): Options to apply to the search. Defaults to None.
+            **kwargs: Datastore-specific parameters.
+        Returns:
+            list[Chunk]: List of chunks ordered by relevance score.
+        '''
+    async def retrieve_by_vector(self, vector: Vector, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None, **kwargs: Any) -> list[Chunk]:
+        '''Direct vector similarity search.
+        Examples:
+            ```python
+            from gllm_datastore.core.filters import filter as F
+            # Direct FilterClause usage
+            await vector_capability.retrieve_by_vector(
+                vector=[0.1, 0.2, 0.3],
+                filters=F.eq("metadata.category", "tech"),
+                options=QueryOptions(limit=10),
+            )
+            # Multiple filters
+            filters = F.and_(F.eq("metadata.source", "wikipedia"), F.eq("metadata.category", "tech"))
+            await vector_capability.retrieve_by_vector(vector=[0.1, 0.2, 0.3], filters=filters)
+            ```
+        Args:
+            vector (Vector): Query embedding vector.
+            filters (FilterClause | QueryFilter | None, optional): Filters to apply to the search.
+                FilterClause objects are automatically converted to QueryFilter internally.
+                Defaults to None.
+            options (QueryOptions | None, optional): Options to apply to the search. Defaults to None.
+            **kwargs: Datastore-specific parameters.
+        Returns:
+            list[Chunk]: List of chunks ordered by similarity score.
+        '''
+    async def update(self, update_values: dict[str, Any], filters: FilterClause | QueryFilter | None = None, **kwargs: Any) -> None:
+        """Update existing records in the datastore.
+        Args:
+            update_values (dict[str, Any]): Values to update.
+            filters (FilterClause | QueryFilter | None, optional): Filters to select records to update.
+                FilterClause objects are automatically converted to QueryFilter internally.
+                Defaults to None.
+            **kwargs: Datastore-specific parameters.
+        """
+    async def delete(self, filters: FilterClause | QueryFilter | None = None, **kwargs: Any) -> None:
+        """Delete records from the data store based on filters.
+        Args:
+            filters (FilterClause | QueryFilter | None, optional): Filters to select records for deletion.
+                FilterClause objects are automatically converted to QueryFilter internally.
+                Defaults to None.
+            **kwargs: Datastore-specific parameters.
+        """
+    async def delete_by_id(self, id: str | list[str], **kwargs: Any) -> None:
+        """Delete records from the data store based on IDs.
+        Args:
+            id (str | list[str]): ID or list of IDs to delete.
+            **kwargs: Datastore-specific parameters.
+        """
+    async def clear(self, **kwargs: Any) -> None:
+        """Clear all records from the datastore.
+        Args:
+            **kwargs: Datastore-specific parameters.
+        """

gllm_datastore/data_store/redis/__init__.pyi ADDED Viewed

@@ -0,0 +1,5 @@
+from gllm_datastore.data_store.redis.data_store import RedisDataStore as RedisDataStore
+from gllm_datastore.data_store.redis.fulltext import RedisFulltextCapability as RedisFulltextCapability
+from gllm_datastore.data_store.redis.vector import RedisVectorCapability as RedisVectorCapability
+__all__ = ['RedisDataStore', 'RedisFulltextCapability', 'RedisVectorCapability']