PyPI - gllm-datastore-binary - Versions diffs - 0.5.45__cp311-cp311-macosx_13_0_arm64.whl - Mend

gllm-datastore-binary 0.5.45__cp311-cp311-macosx_13_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of gllm-datastore-binary might be problematic. Click here for more details.

Files changed (108) hide show

gllm_datastore/__init__.pyi +0 -0
gllm_datastore/cache/__init__.pyi +4 -0
gllm_datastore/cache/base.pyi +84 -0
gllm_datastore/cache/cache.pyi +137 -0
gllm_datastore/cache/hybrid_cache/__init__.pyi +5 -0
gllm_datastore/cache/hybrid_cache/file_system_hybrid_cache.pyi +50 -0
gllm_datastore/cache/hybrid_cache/hybrid_cache.pyi +115 -0
gllm_datastore/cache/hybrid_cache/in_memory_hybrid_cache.pyi +29 -0
gllm_datastore/cache/hybrid_cache/key_matcher/__init__.pyi +5 -0
gllm_datastore/cache/hybrid_cache/key_matcher/exact_key_matcher.pyi +44 -0
gllm_datastore/cache/hybrid_cache/key_matcher/fuzzy_key_matcher.pyi +70 -0
gllm_datastore/cache/hybrid_cache/key_matcher/key_matcher.pyi +60 -0
gllm_datastore/cache/hybrid_cache/key_matcher/semantic_key_matcher.pyi +93 -0
gllm_datastore/cache/hybrid_cache/redis_hybrid_cache.pyi +34 -0
gllm_datastore/cache/hybrid_cache/utils.pyi +36 -0
gllm_datastore/cache/utils.pyi +34 -0
gllm_datastore/cache/vector_cache/__init__.pyi +0 -0
gllm_datastore/cache/vector_cache/eviction_manager/__init__.pyi +0 -0
gllm_datastore/cache/vector_cache/eviction_manager/asyncio_eviction_manager.pyi +48 -0
gllm_datastore/cache/vector_cache/eviction_manager/eviction_manager.pyi +38 -0
gllm_datastore/cache/vector_cache/eviction_strategy/__init__.pyi +0 -0
gllm_datastore/cache/vector_cache/eviction_strategy/eviction_strategy.pyi +34 -0
gllm_datastore/cache/vector_cache/eviction_strategy/ttl_eviction_strategy.pyi +34 -0
gllm_datastore/cache/vector_cache/vector_cache.pyi +99 -0
gllm_datastore/constants.pyi +66 -0
gllm_datastore/core/__init__.pyi +7 -0
gllm_datastore/core/capabilities/__init__.pyi +5 -0
gllm_datastore/core/capabilities/fulltext_capability.pyi +73 -0
gllm_datastore/core/capabilities/graph_capability.pyi +70 -0
gllm_datastore/core/capabilities/vector_capability.pyi +90 -0
gllm_datastore/core/filters/__init__.pyi +4 -0
gllm_datastore/core/filters/filter.pyi +340 -0
gllm_datastore/core/filters/schema.pyi +149 -0
gllm_datastore/data_store/__init__.pyi +7 -0
gllm_datastore/data_store/base.pyi +138 -0
gllm_datastore/data_store/chroma/__init__.pyi +4 -0
gllm_datastore/data_store/chroma/_chroma_import.pyi +13 -0
gllm_datastore/data_store/chroma/data_store.pyi +202 -0
gllm_datastore/data_store/chroma/fulltext.pyi +134 -0
gllm_datastore/data_store/chroma/query.pyi +266 -0
gllm_datastore/data_store/chroma/query_translator.pyi +41 -0
gllm_datastore/data_store/chroma/vector.pyi +197 -0
gllm_datastore/data_store/elasticsearch/__init__.pyi +5 -0
gllm_datastore/data_store/elasticsearch/data_store.pyi +119 -0
gllm_datastore/data_store/elasticsearch/fulltext.pyi +237 -0
gllm_datastore/data_store/elasticsearch/query.pyi +114 -0
gllm_datastore/data_store/elasticsearch/vector.pyi +179 -0
gllm_datastore/data_store/exceptions.pyi +35 -0
gllm_datastore/data_store/in_memory/__init__.pyi +5 -0
gllm_datastore/data_store/in_memory/data_store.pyi +71 -0
gllm_datastore/data_store/in_memory/fulltext.pyi +131 -0
gllm_datastore/data_store/in_memory/query.pyi +175 -0
gllm_datastore/data_store/in_memory/vector.pyi +174 -0
gllm_datastore/data_store/redis/__init__.pyi +5 -0
gllm_datastore/data_store/redis/data_store.pyi +154 -0
gllm_datastore/data_store/redis/fulltext.pyi +128 -0
gllm_datastore/data_store/redis/query.pyi +428 -0
gllm_datastore/data_store/redis/query_translator.pyi +37 -0
gllm_datastore/data_store/redis/vector.pyi +131 -0
gllm_datastore/encryptor/__init__.pyi +4 -0
gllm_datastore/encryptor/aes_gcm_encryptor.pyi +45 -0
gllm_datastore/encryptor/encryptor.pyi +52 -0
gllm_datastore/encryptor/key_ring/__init__.pyi +3 -0
gllm_datastore/encryptor/key_ring/in_memory_key_ring.pyi +52 -0
gllm_datastore/encryptor/key_ring/key_ring.pyi +45 -0
gllm_datastore/encryptor/key_rotating_encryptor.pyi +60 -0
gllm_datastore/graph_data_store/__init__.pyi +6 -0
gllm_datastore/graph_data_store/graph_data_store.pyi +151 -0
gllm_datastore/graph_data_store/graph_rag_data_store.pyi +29 -0
gllm_datastore/graph_data_store/light_rag_data_store.pyi +93 -0
gllm_datastore/graph_data_store/light_rag_postgres_data_store.pyi +96 -0
gllm_datastore/graph_data_store/llama_index_graph_rag_data_store.pyi +49 -0
gllm_datastore/graph_data_store/llama_index_neo4j_graph_rag_data_store.pyi +78 -0
gllm_datastore/graph_data_store/nebula_graph_data_store.pyi +206 -0
gllm_datastore/graph_data_store/neo4j_graph_data_store.pyi +182 -0
gllm_datastore/graph_data_store/utils/__init__.pyi +6 -0
gllm_datastore/graph_data_store/utils/constants.pyi +21 -0
gllm_datastore/graph_data_store/utils/light_rag_em_invoker_adapter.pyi +56 -0
gllm_datastore/graph_data_store/utils/light_rag_lm_invoker_adapter.pyi +43 -0
gllm_datastore/graph_data_store/utils/llama_index_em_invoker_adapter.pyi +45 -0
gllm_datastore/graph_data_store/utils/llama_index_lm_invoker_adapter.pyi +169 -0
gllm_datastore/sql_data_store/__init__.pyi +4 -0
gllm_datastore/sql_data_store/adapter/__init__.pyi +0 -0
gllm_datastore/sql_data_store/adapter/sqlalchemy_adapter.pyi +38 -0
gllm_datastore/sql_data_store/constants.pyi +6 -0
gllm_datastore/sql_data_store/sql_data_store.pyi +86 -0
gllm_datastore/sql_data_store/sqlalchemy_sql_data_store.pyi +216 -0
gllm_datastore/sql_data_store/types.pyi +31 -0
gllm_datastore/utils/__init__.pyi +6 -0
gllm_datastore/utils/converter.pyi +51 -0
gllm_datastore/utils/dict.pyi +21 -0
gllm_datastore/utils/ttl.pyi +25 -0
gllm_datastore/utils/types.pyi +32 -0
gllm_datastore/vector_data_store/__init__.pyi +6 -0
gllm_datastore/vector_data_store/chroma_vector_data_store.pyi +259 -0
gllm_datastore/vector_data_store/elasticsearch_vector_data_store.pyi +357 -0
gllm_datastore/vector_data_store/in_memory_vector_data_store.pyi +179 -0
gllm_datastore/vector_data_store/mixin/__init__.pyi +0 -0
gllm_datastore/vector_data_store/mixin/cache_compatible_mixin.pyi +145 -0
gllm_datastore/vector_data_store/redis_vector_data_store.pyi +191 -0
gllm_datastore/vector_data_store/vector_data_store.pyi +146 -0
gllm_datastore.build/.gitignore +1 -0
gllm_datastore.cpython-311-darwin.so +0 -0
gllm_datastore.pyi +156 -0
gllm_datastore_binary-0.5.45.dist-info/METADATA +178 -0
gllm_datastore_binary-0.5.45.dist-info/RECORD +108 -0
gllm_datastore_binary-0.5.45.dist-info/WHEEL +5 -0
gllm_datastore_binary-0.5.45.dist-info/top_level.txt +1 -0

gllm_datastore/data_store/elasticsearch/fulltext.pyi ADDED Viewed

@@ -0,0 +1,237 @@
+from _typeshed import Incomplete
+from elasticsearch import AsyncElasticsearch
+from elasticsearch.dsl import AttrDict as AttrDict
+from enum import StrEnum
+from gllm_core.schema import Chunk
+from gllm_datastore.constants import METADATA_KEYS as METADATA_KEYS
+from gllm_datastore.core.filters.schema import FilterClause as FilterClause, QueryFilter as QueryFilter, QueryOptions as QueryOptions
+from gllm_datastore.data_store.elasticsearch.query import apply_filter_query_to_search as apply_filter_query_to_search, apply_filters_and_options as apply_filters_and_options, create_search_with_filters as create_search_with_filters, delete_by_id as delete_by_id, delete_by_query as delete_by_query, safe_execute as safe_execute, translate_filter as translate_filter, update_by_query as update_by_query, validate_query_length as validate_query_length
+from typing import Any, Literal, overload
+class SupportedQueryMethods(StrEnum):
+    """Supported query methods for Elasticsearch fulltext capability."""
+    AUTOCOMPLETE: str
+    AUTOSUGGEST: str
+    BM25: str
+    BY_FIELD: str
+    SHINGLES: str
+QUERY_REQUIRED_STRATEGIES: Incomplete
+class ElasticsearchFulltextCapability:
+    """Elasticsearch implementation of FulltextCapability protocol.
+    This class provides document CRUD operations and flexible querying using Elasticsearch.
+    Attributes:
+        index_name (str): The name of the Elasticsearch index.
+        client (AsyncElasticsearch): AsyncElasticsearch client.
+        query_field (str): The field name to use for text content.
+    """
+    index_name: Incomplete
+    client: Incomplete
+    query_field: Incomplete
+    def __init__(self, index_name: str, client: AsyncElasticsearch, query_field: str = 'text') -> None:
+        '''Initialize the Elasticsearch fulltext capability.
+        Args:
+            index_name (str): The name of the Elasticsearch index.
+            client (AsyncElasticsearch): The Elasticsearch client.
+            query_field (str, optional): The field name to use for text content. Defaults to "text".
+        '''
+    async def get_size(self) -> int:
+        """Returns the total number of vectors in the index.
+        Returns:
+            int: The total number of vectors.
+        """
+    async def create(self, data: Chunk | list[Chunk], **kwargs: Any) -> None:
+        """Create new records in the datastore.
+        Args:
+            data (Chunk | list[Chunk]): Data to create (single item or collection).
+            **kwargs: Backend-specific parameters forwarded to Elasticsearch bulk API.
+        Raises:
+            ValueError: If data structure is invalid.
+        """
+    @overload
+    async def retrieve(self, strategy: Literal[SupportedQueryMethods.BY_FIELD] | None = ..., query: str | None = None, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None, **kwargs: Any) -> list[Chunk]: ...
+    @overload
+    async def retrieve(self, strategy: Literal[SupportedQueryMethods.BM25], query: str, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None, k1: float | None = None, b: float | None = None, **kwargs: Any) -> list[Chunk]: ...
+    @overload
+    async def retrieve(self, strategy: Literal[SupportedQueryMethods.AUTOCOMPLETE], query: str, field: str, size: int = 20, fuzzy_tolerance: int = 1, min_prefix_length: int = 3, filter_query: dict[str, Any] | None = None, **kwargs: Any) -> list[str]: ...
+    @overload
+    async def retrieve(self, strategy: Literal[SupportedQueryMethods.AUTOSUGGEST], query: str, search_fields: list[str], autocomplete_field: str, size: int = 20, min_length: int = 3, filter_query: dict[str, Any] | None = None, **kwargs: Any) -> list[str]: ...
+    @overload
+    async def retrieve(self, strategy: Literal[SupportedQueryMethods.SHINGLES], query: str, field: str, size: int = 20, min_length: int = 3, max_length: int = 30, filter_query: dict[str, Any] | None = None, **kwargs: Any) -> list[str]: ...
+    async def retrieve_by_field(self, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None) -> list[Chunk]:
+        """Retrieve records from the datastore based on metadata field filtering.
+        This method filters and returns stored chunks based on metadata values
+        rather than text content. It is particularly useful for structured lookups,
+        such as retrieving all chunks from a certain source, tagged with a specific label,
+        or authored by a particular user.
+        Args:
+            filters (FilterClause | QueryFilter | None, optional): Query filters to apply.
+                FilterClause objects are automatically converted to QueryFilter internally.
+                Defaults to None.
+            options (QueryOptions | None, optional): Query options (sorting, pagination, etc.).
+                Defaults to None.
+        Returns:
+            list[Chunk]: The filtered results as Chunk objects.
+        """
+    async def retrieve_bm25(self, query: str, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None, k1: float | None = None, b: float | None = None) -> list[Chunk]:
+        '''Queries the Elasticsearch data store using BM25 algorithm for keyword-based search.
+        Args:
+            query (str): The query string.
+            filters (FilterClause | QueryFilter | None, optional): Optional metadata filter to apply to the search.
+                FilterClause objects are automatically converted to QueryFilter internally.
+                Use filter builder functions like `F.eq()`, `F.and_()`, etc. Defaults to None.
+            options (QueryOptions | None, optional): Query options including fields, limit, order_by, etc.
+                For example, `QueryOptions(fields=["title", "content"], limit=10, order_by="score", order_desc=True)`.
+                If fields is None, defaults to ["text"]. For multiple fields, uses multi_match query. Defaults to None.
+            k1 (float | None, optional): BM25 parameter controlling term frequency saturation.
+                Higher values mean term frequency has more impact before diminishing returns.
+                Typical values: 1.2-2.0. If None, uses Elasticsearch default (~1.2). Defaults to None.
+            b (float | None, optional): BM25 parameter controlling document length normalization.
+                0.0 = no length normalization, 1.0 = full normalization.
+                Typical values: 0.75. If None, uses Elasticsearch default (~0.75). Defaults to None.
+        Examples:
+            ```python
+            from gllm_datastore.core.filters import filter as F
+            # Basic BM25 query on the \'text\' field
+            results = await data_store.query_bm25("machine learning")
+            # BM25 query on specific fields with query options
+            results = await data_store.query_bm25(
+                "natural language",
+                options=QueryOptions(fields=["title", "abstract"], limit=5)
+            )
+            # BM25 query with direct FilterClause
+            results = await data_store.query_bm25(
+                "deep learning",
+                filters=F.eq("metadata.category", "AI")
+            )
+            # BM25 query with multiple filters
+            results = await data_store.query_bm25(
+                "deep learning",
+                filters=F.and_(F.eq("metadata.category", "AI"), F.eq("metadata.status", "published"))
+            )
+            # BM25 query with custom BM25 parameters for more aggressive term frequency weighting
+            results = await data_store.query_bm25(
+                "artificial intelligence",
+                k1=2.0,
+                b=0.5
+            )
+            # BM25 query with fields, filters, and options
+            results = await data_store.query_bm25(
+                "data science applications",
+                filters=F.and_(
+                    F.eq("metadata.author_id", "user123"),
+                    F.in_("metadata.publication_year", [2022, 2023])
+                ),
+                options=QueryOptions(fields=["content", "tags"], limit=10, order_by="score", order_desc=True),
+                k1=1.5,
+                b=0.9
+            )
+            ```
+        Returns:
+            list[Chunk]: A list of Chunk objects representing the retrieved documents.
+        '''
+    async def retrieve_autocomplete(self, query: str, field: str, size: int = 20, fuzzy_tolerance: int = 1, min_prefix_length: int = 3, filter_query: dict[str, Any] | None = None) -> list[str]:
+        """Provides suggestions based on a prefix query for a specific field.
+        Args:
+            query (str): The query string.
+            field (str): The field name for autocomplete.
+            size (int, optional): The number of suggestions to retrieve. Defaults to 20.
+            fuzzy_tolerance (int, optional): The level of fuzziness for suggestions. Defaults to 1.
+            min_prefix_length (int, optional): The minimum prefix length to trigger fuzzy matching. Defaults to 3.
+            filter_query (dict[str, Any] | None, optional): The filter query. Defaults to None.
+        Returns:
+            list[str]: A list of suggestions.
+        """
+    async def retrieve_autosuggest(self, query: str, search_fields: list[str], autocomplete_field: str, size: int = 20, min_length: int = 3, filters: QueryFilter | None = None) -> list[str]:
+        """Generates suggestions across multiple fields using a multi_match query to broaden the search criteria.
+        Args:
+            query (str): The query string.
+            search_fields (list[str]): The fields to search for.
+            autocomplete_field (str): The field name for autocomplete.
+            size (int, optional): The number of suggestions to retrieve. Defaults to 20.
+            min_length (int, optional): The minimum length of the query. Defaults to 3.
+            filters (QueryFilter | None, optional): The filter query. Defaults to None.
+        Returns:
+            list[str]: A list of suggestions.
+        """
+    async def retrieve_shingles(self, query: str, field: str, size: int = 20, min_length: int = 3, max_length: int = 30, filters: QueryFilter | None = None) -> list[str]:
+        """Searches using shingles for prefix and fuzzy matching.
+        Args:
+            query (str): The query string.
+            field (str): The field name for autocomplete.
+            size (int, optional): The number of suggestions to retrieve. Defaults to 20.
+            min_length (int, optional): The minimum length of the query.
+                Queries shorter than this limit will return an empty list. Defaults to 3.
+            max_length (int, optional): The maximum length of the query.
+                Queries exceeding this limit will return an empty list. Defaults to 30.
+            filters (QueryFilter | None, optional): The filter query. Defaults to None.
+        Returns:
+            list[str]: A list of suggestions.
+        """
+    async def retrieve_fuzzy(self, query: str, max_distance: int = 2, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None) -> list[Chunk]:
+        """Find records that fuzzy match the query within distance threshold.
+        Args:
+            query (str): Text to fuzzy match against.
+            max_distance (int): Maximum edit distance for matches. Defaults to 2.
+            filters (FilterClause | QueryFilter | None, optional): Optional metadata filters to apply.
+                FilterClause objects are automatically converted to QueryFilter internally.
+                Defaults to None.
+            options (QueryOptions | None, optional): Query options (limit, sorting, etc.). Defaults to None.
+        Returns:
+            list[Chunk]: Matched chunks.
+        """
+    async def update(self, update_values: dict[str, Any], filters: FilterClause | QueryFilter | None = None) -> None:
+        """Update existing records in the datastore.
+        Args:
+            update_values (dict[str, Any]): Values to update.
+            filters (FilterClause | QueryFilter | None, optional): Filters to select records to update.
+                FilterClause objects are automatically converted to QueryFilter internally.
+                Defaults to None.
+        """
+    async def delete(self, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None) -> None:
+        """Delete records from the data store using filters and optional options.
+        Args:
+            filters (FilterClause | QueryFilter | None, optional): Filters to select records for deletion.
+                FilterClause objects are automatically converted to QueryFilter internally.
+                Defaults to None.
+            options (QueryOptions | None, optional): Query options supporting limit and sorting
+                for eviction-like operations. Defaults to None.
+        """
+    async def delete_by_id(self, id_: str | list[str]) -> None:
+        """Deletes records from the data store based on IDs.
+        Args:
+            id_ (str | list[str]): ID or list of IDs to delete.
+        """
+    async def clear(self) -> None:
+        """Clear all records from the datastore."""

gllm_datastore/data_store/elasticsearch/query.pyi ADDED Viewed

@@ -0,0 +1,114 @@
+import logging
+from elasticsearch import AsyncElasticsearch
+from elasticsearch.dsl import AsyncSearch
+from elasticsearch.dsl.query import Query
+from elasticsearch.dsl.response import Response
+from gllm_datastore.core.filters.schema import FilterClause as FilterClause, FilterCondition as FilterCondition, FilterOperator as FilterOperator, QueryFilter as QueryFilter, QueryOptions as QueryOptions
+from gllm_datastore.utils import flatten_dict as flatten_dict
+from typing import Any
+def apply_filters_and_options(search: AsyncSearch, filters: QueryFilter | None = None, options: QueryOptions | None = None) -> AsyncSearch:
+    """Apply filters and options to an Elasticsearch search object.
+    Args:
+        search (AsyncSearch): Elasticsearch search object.
+        filters (QueryFilter | None, optional): New QueryFilter with filters and condition.
+        options (QueryOptions | None, optional): Query options (limit, sort, fields).
+    Returns:
+        AsyncSearch: Elasticsearch search object.
+    """
+def translate_filter(filters: QueryFilter | None) -> Query | None:
+    """Translate a structured QueryFilter into an Elasticsearch DSL Query.
+    The translation supports comparison operators (EQ, NE, GT, LT, GTE, LTE),
+    array operators (IN, NIN, ARRAY_CONTAINS, ANY, ALL), text operators (TEXT_CONTAINS),
+    and logical conditions (AND, OR, NOT), including nested filters.
+    Args:
+        filters (QueryFilter | None): Structured QueryFilter. If None, returns None.
+    Returns:
+        Query | None: An Elasticsearch Query object or None if no filters are provided.
+    Raises:
+        ValueError: When the filter structure is invalid.
+        TypeError: When an operator-value type combination is invalid.
+    """
+async def update_by_query(client: AsyncElasticsearch, index_name: str, update_values: dict[str, Any], filters: QueryFilter | None = None, logger: logging.Logger | None = None) -> None:
+    '''Update records in Elasticsearch using UpdateByQuery with retry logic for version conflicts.
+    This function builds a painless script that safely assigns each updated field.
+    When a field path contains dots (e.g. "metadata.cache_value"), we must
+    access the corresponding param using bracket syntax: params[\'metadata.cache_value\']
+    to avoid Painless treating it as nested object access (which would be None).
+    Args:
+        client (AsyncElasticsearch): Elasticsearch client instance.
+        index_name (str): The name of the Elasticsearch index.
+        update_values (dict[str, Any]): Values to update.
+        filters (QueryFilter | None, optional): New QueryFilter to select records to update.
+            Defaults to None.
+        logger (Any | None, optional): Logger instance. Defaults to None.
+    '''
+async def delete_by_query(client: AsyncElasticsearch, index_name: str, filters: QueryFilter | None = None) -> None:
+    """Delete records from Elasticsearch using delete_by_query.
+    Args:
+        client (AsyncElasticsearch): Elasticsearch client instance.
+        index_name (str): The name of the Elasticsearch index.
+        filters (QueryFilter | None, optional): New QueryFilter to select records for deletion.
+            Defaults to None, in which case no operation will be performed.
+    """
+async def delete_by_id(client: AsyncElasticsearch, index_name: str, ids: str | list[str]) -> None:
+    """Delete records from Elasticsearch by IDs using Search.delete().
+    Args:
+        client (AsyncElasticsearch): Elasticsearch client instance.
+        index_name (str): The name of the Elasticsearch index.
+        ids (str | list[str]): ID or list of IDs to delete.
+    """
+def validate_query_length(query: str, min_length: int = 0, max_length: int | None = None) -> bool:
+    """Validate query length against minimum and maximum constraints.
+    Args:
+        query (str): The query string to validate.
+        min_length (int, optional): Minimum required length. Defaults to 0.
+        max_length (int | None, optional): Maximum allowed length. Defaults to None.
+    Returns:
+        bool: True if query is valid, False otherwise.
+    """
+def create_search_with_filters(client: AsyncElasticsearch, index_name: str, filters: QueryFilter | None = None, exclude_fields: list[str] | None = None) -> AsyncSearch:
+    """Create an AsyncSearch object with optional filters and field exclusions.
+    Args:
+        client (AsyncElasticsearch): Elasticsearch client instance.
+        index_name (str): The name of the Elasticsearch index.
+        filters (QueryFilter | None, optional): New QueryFilter to apply. Defaults to None.
+        exclude_fields (list[str] | None, optional): Fields to exclude from source. Defaults to None.
+    Returns:
+        AsyncSearch: Configured AsyncSearch object.
+    """
+def apply_filter_query_to_search(search: AsyncSearch, main_query: Query, filters: QueryFilter | None = None) -> AsyncSearch:
+    """Apply filter query to a search with a main query.
+    Args:
+        search (AsyncSearch): Elasticsearch search object.
+        main_query (Query): The main query to apply.
+        filters (QueryFilter | None, optional): Query filters to apply. Defaults to None.
+    Returns:
+        AsyncSearch: Search object with applied queries.
+    """
+async def safe_execute(search: AsyncSearch, logger: logging.Logger | None = None) -> Response | None:
+    """Execute an Elasticsearch DSL search with unified error handling.
+    Args:
+        search (AsyncSearch): Elasticsearch DSL AsyncSearch object.
+        logger (logging.Logger | None, optional): Logger instance for error messages. Defaults to None.
+    Returns:
+        Response | None: The Elasticsearch response on success, otherwise None.
+    """

gllm_datastore/data_store/elasticsearch/vector.pyi ADDED Viewed

@@ -0,0 +1,179 @@
+from _typeshed import Incomplete
+from elasticsearch import AsyncElasticsearch
+from gllm_core.schema import Chunk
+from gllm_datastore.constants import DEFAULT_FETCH_K as DEFAULT_FETCH_K, DEFAULT_TOP_K as DEFAULT_TOP_K
+from gllm_datastore.core.filters.schema import FilterClause as FilterClause, QueryFilter as QueryFilter, QueryOptions as QueryOptions
+from gllm_datastore.data_store.elasticsearch.query import delete_by_id as delete_by_id, delete_by_query as delete_by_query, translate_filter as translate_filter, update_by_query as update_by_query
+from gllm_datastore.utils.converter import from_langchain as from_langchain, to_langchain as to_langchain
+from gllm_inference.em_invoker.em_invoker import BaseEMInvoker
+from gllm_inference.schema import Vector
+from langchain_elasticsearch.vectorstores import AsyncRetrievalStrategy
+from typing import Any
+class ElasticsearchVectorCapability:
+    """Elasticsearch implementation of VectorCapability protocol.
+    This class provides document CRUD operations and vector search using Elasticsearch.
+    Attributes:
+        index_name (str): The name of the Elasticsearch index.
+        vector_store (AsyncElasticsearchStore): The vector store instance.
+        em_invoker (BaseEMInvoker): The embedding model to perform vectorization.
+    """
+    index_name: Incomplete
+    vector_store: Incomplete
+    def __init__(self, index_name: str, client: AsyncElasticsearch, em_invoker: BaseEMInvoker, query_field: str = 'text', vector_query_field: str = 'vector', retrieval_strategy: AsyncRetrievalStrategy | None = None, distance_strategy: str | None = None) -> None:
+        '''Initialize the Elasticsearch vector capability.
+        Args:
+            index_name (str): The name of the Elasticsearch index.
+            client (AsyncElasticsearch): The Elasticsearch client.
+            em_invoker (BaseEMInvoker): The embedding model to perform vectorization.
+            query_field (str, optional): The field name for text queries. Defaults to "text".
+            vector_query_field (str, optional): The field name for vector queries. Defaults to "vector".
+            retrieval_strategy (AsyncRetrievalStrategy | None, optional): The retrieval strategy for retrieval.
+                Defaults to None, in which case DenseVectorStrategy() is used.
+            distance_strategy (str | None, optional): The distance strategy for retrieval. Defaults to None.
+        '''
+    @property
+    def em_invoker(self) -> BaseEMInvoker:
+        """Returns the EM Invoker instance.
+        Returns:
+            BaseEMInvoker: The EM Invoker instance.
+        """
+    async def ensure_index(self, mapping: dict[str, Any] | None = None, index_settings: dict[str, Any] | None = None, dimension: int | None = None, distance_strategy: str | None = None) -> None:
+        '''Ensure Elasticsearch index exists, creating it if necessary.
+        This method is idempotent - if the index already exists, it will skip creation
+        and return early.
+        Args:
+            mapping (dict[str, Any] | None, optional): Custom mapping dictionary to use
+                for index creation. If provided, this mapping will be used directly.
+                The mapping should follow Elasticsearch mapping format. Defaults to None,
+                in which default mapping will be used.
+            index_settings (dict[str, Any] | None, optional): Custom index settings.
+                These settings will be merged with any default settings. Defaults to None.
+            dimension (int | None, optional): Vector dimension. If not provided and mapping
+                is not provided, will be inferred from em_invoker by generating a test embedding.
+            distance_strategy (str | None, optional): Distance strategy for vector similarity.
+                Supported values: "cosine", "l2_norm", "dot_product", etc.
+                Only used when building default mapping. Defaults to "cosine" if not specified.
+        Raises:
+            ValueError: If mapping is invalid or required parameters are missing.
+            RuntimeError: If index creation fails.
+        '''
+    async def create(self, data: Chunk | list[Chunk], **kwargs: Any) -> None:
+        """Create new records in the datastore.
+        Args:
+            data (Chunk | list[Chunk]): Data to create (single item or collection).
+            **kwargs: Datastore-specific parameters.
+        Raises:
+            ValueError: If data structure is invalid.
+        """
+    async def create_from_vector(self, chunk_vectors: list[tuple[Chunk, Vector]], **kwargs) -> list[str]:
+        """Add pre-computed embeddings directly.
+        Args:
+            chunk_vectors (list[tuple[Chunk, Vector]]): List of tuples containing chunks and their
+                corresponding vectors.
+            **kwargs: Datastore-specific parameters.
+        Returns:
+            list[str]: List of IDs assigned to added embeddings.
+        """
+    async def retrieve(self, query: str, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None, **kwargs: Any) -> list[Chunk]:
+        '''Semantic search using text query converted to vector.
+        Usage Example:
+            ```python
+            from gllm_datastore.core.filters import filter as F
+            # Direct FilterClause usage
+            await vector_capability.retrieve(
+                query="What is the capital of France?",
+                filters=F.eq("metadata.category", "tech"),
+                options=QueryOptions(limit=10),
+            )
+            # Multiple filters
+            filters = F.and_(F.eq("metadata.source", "wikipedia"), F.eq("metadata.category", "tech"))
+            await vector_capability.retrieve(query="What is the capital of France?", filters=filters)
+            ```
+        Args:
+            query (str): Text query to embed and search for.
+            filters (FilterClause | QueryFilter | None, optional): Filters to apply to the search.
+                FilterClause objects are automatically converted to QueryFilter internally.
+                Defaults to None.
+            options (QueryOptions | None, optional): Options to apply to the search. Defaults to None.
+            **kwargs: Datastore-specific parameters.
+        Returns:
+            list[Chunk]: List of chunks ordered by relevance score.
+        '''
+    async def retrieve_by_vector(self, vector: Vector, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None) -> list[Chunk]:
+        '''Direct vector similarity search.
+        Usage Example:
+            ```python
+            from gllm_datastore.core.filters import filter as F
+            # Direct FilterClause usage
+            await vector_capability.retrieve_by_vector(
+                vector=[0.1, 0.2, 0.3],
+                filters=F.eq("metadata.category", "tech"),
+                options=QueryOptions(limit=10),
+            )
+            # Multiple filters
+            filters = F.and_(F.eq("metadata.source", "wikipedia"), F.eq("metadata.category", "tech"))
+            await vector_capability.retrieve_by_vector(vector=[0.1, 0.2, 0.3], filters=filters)
+            ```
+        Args:
+            vector (Vector): Query embedding vector.
+            filters (FilterClause | QueryFilter | None, optional): Filters to apply to the search.
+                FilterClause objects are automatically converted to QueryFilter internally.
+                Defaults to None.
+            options (QueryOptions | None, optional): Options to apply to the search. Defaults to None.
+        Returns:
+            list[Chunk]: List of chunks ordered by similarity score.
+        '''
+    async def update(self, update_values: dict, filters: FilterClause | QueryFilter | None = None, **kwargs: Any) -> None:
+        """Update existing records in the datastore.
+        Args:
+            update_values (dict): Values to update.
+            filters (FilterClause | QueryFilter | None, optional): Filters to select records to update.
+                FilterClause objects are automatically converted to QueryFilter internally.
+                Defaults to None.
+            **kwargs: Datastore-specific parameters.
+        """
+    async def delete(self, filters: FilterClause | QueryFilter | None = None, **kwargs: Any) -> None:
+        """Delete records from the data store based on filters.
+        Args:
+            filters (FilterClause | QueryFilter | None, optional): Filters to select records for deletion.
+                FilterClause objects are automatically converted to QueryFilter internally.
+                Defaults to None.
+            **kwargs: Datastore-specific parameters.
+        """
+    async def delete_by_id(self, id: str | list[str], **kwargs: Any) -> None:
+        """Delete records from the data store based on IDs.
+        Args:
+            id (str | list[str]): ID or list of IDs to delete.
+            **kwargs: Datastore-specific parameters.
+        """
+    async def clear(self, **kwargs: Any) -> None:
+        """Clear all records from the datastore.
+        Args:
+            **kwargs: Datastore-specific parameters.
+        """

gllm_datastore/data_store/exceptions.pyi ADDED Viewed

@@ -0,0 +1,35 @@
+from _typeshed import Incomplete
+class NotSupportedException(Exception):
+    """Raised when attempting to access an unsupported capability.
+    This exception is raised when code attempts to access a capability
+    that isn't configured for a datastore.
+    """
+    capability: Incomplete
+    class_name: Incomplete
+    class_obj: Incomplete
+    def __init__(self, capability: str, class_obj: type) -> None:
+        """Initialize the exception.
+        Args:
+            capability (str): The name of the unsupported capability.
+            class_obj (Type): The class object for context.
+        """
+class NotRegisteredException(Exception):
+    """Raised when attempting to access a capability that is not registered.
+    This exception is raised when code attempts to access a capability
+    that is not registered for a datastore but is supported by the datastore.
+    """
+    capability: Incomplete
+    class_name: Incomplete
+    class_obj: Incomplete
+    def __init__(self, capability: str, class_obj: type) -> None:
+        """Initialize the exception.
+        Args:
+            capability (str): The name of the unregistered capability.
+            class_obj (Type): The class object for context.
+        """

gllm_datastore/data_store/in_memory/__init__.pyi ADDED Viewed

@@ -0,0 +1,5 @@
+from gllm_datastore.data_store.in_memory.data_store import InMemoryDataStore as InMemoryDataStore
+from gllm_datastore.data_store.in_memory.fulltext import InMemoryFulltextCapability as InMemoryFulltextCapability
+from gllm_datastore.data_store.in_memory.vector import InMemoryVectorCapability as InMemoryVectorCapability
+__all__ = ['InMemoryDataStore', 'InMemoryFulltextCapability', 'InMemoryVectorCapability']

gllm_datastore/data_store/in_memory/data_store.pyi ADDED Viewed

@@ -0,0 +1,71 @@
+from gllm_core.schema import Chunk as Chunk
+from gllm_datastore.core.filters.schema import FilterClause as FilterClause, QueryFilter as QueryFilter
+from gllm_datastore.data_store.base import BaseDataStore as BaseDataStore, CapabilityType as CapabilityType
+from gllm_datastore.data_store.in_memory.fulltext import InMemoryFulltextCapability as InMemoryFulltextCapability
+from gllm_datastore.data_store.in_memory.vector import InMemoryVectorCapability as InMemoryVectorCapability
+class InMemoryDataStore(BaseDataStore):
+    """In-memory data store with multiple capability support.
+    This class provides a unified interface for accessing vector, fulltext,
+    and cache capabilities using in-memory storage optimized for development
+    and testing scenarios.
+    Attributes:
+        store (dict[str, Chunk]): Dictionary storing data with their IDs as keys.
+    """
+    store: dict[str, Chunk]
+    def __init__(self) -> None:
+        """Initialize the in-memory data store."""
+    @property
+    def supported_capabilities(self) -> list[CapabilityType]:
+        """Return list of currently supported capabilities.
+        Returns:
+            list[str]: List of capability names that are supported.
+        """
+    @property
+    def fulltext(self) -> InMemoryFulltextCapability:
+        """Access fulltext capability if registered.
+        This method solely uses the logic of its parent class to return the fulltext capability handler.
+        This method overrides the parent class to return the InMemoryFulltextCapability handler for better
+        type hinting.
+        Returns:
+            InMemoryFulltextCapability: Fulltext capability handler.
+        Raises:
+            NotRegisteredException: If fulltext capability is not registered.
+        """
+    @property
+    def vector(self) -> InMemoryVectorCapability:
+        """Access vector capability if registered.
+        This method solely uses the logic of its parent class to return the vector capability handler.
+        This method overrides the parent class to return the InMemoryVectorCapability handler for better
+        type hinting.
+        Returns:
+            InMemoryVectorCapability: Vector capability handler.
+        Raises:
+            NotRegisteredException: If vector capability is not registered.
+        """
+    @classmethod
+    def translate_query_filter(cls, query_filter: FilterClause | QueryFilter | None) -> FilterClause | QueryFilter | None:
+        """Translate QueryFilter or FilterClause to in-memory datastore filter syntax.
+        For the in-memory datastore, this method acts as an identity function since
+        the datastore works directly with the QueryFilter DSL without requiring
+        translation to a native format.
+        Args:
+            query_filter (FilterClause | QueryFilter | None): The filter to translate.
+                Can be a single FilterClause, a QueryFilter with multiple clauses,
+                or None for empty filters.
+        Returns:
+            FilterClause | QueryFilter | None: The same filter object that was passed in.
+                Returns None for empty filters.
+        """