gllm-datastore-binary 0.5.50__cp312-cp312-macosx_13_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gllm_datastore/__init__.pyi +0 -0
- gllm_datastore/cache/__init__.pyi +4 -0
- gllm_datastore/cache/base.pyi +84 -0
- gllm_datastore/cache/cache.pyi +137 -0
- gllm_datastore/cache/hybrid_cache/__init__.pyi +5 -0
- gllm_datastore/cache/hybrid_cache/file_system_hybrid_cache.pyi +50 -0
- gllm_datastore/cache/hybrid_cache/hybrid_cache.pyi +115 -0
- gllm_datastore/cache/hybrid_cache/in_memory_hybrid_cache.pyi +29 -0
- gllm_datastore/cache/hybrid_cache/key_matcher/__init__.pyi +5 -0
- gllm_datastore/cache/hybrid_cache/key_matcher/exact_key_matcher.pyi +44 -0
- gllm_datastore/cache/hybrid_cache/key_matcher/fuzzy_key_matcher.pyi +70 -0
- gllm_datastore/cache/hybrid_cache/key_matcher/key_matcher.pyi +60 -0
- gllm_datastore/cache/hybrid_cache/key_matcher/semantic_key_matcher.pyi +93 -0
- gllm_datastore/cache/hybrid_cache/redis_hybrid_cache.pyi +34 -0
- gllm_datastore/cache/hybrid_cache/utils.pyi +36 -0
- gllm_datastore/cache/utils.pyi +34 -0
- gllm_datastore/cache/vector_cache/__init__.pyi +0 -0
- gllm_datastore/cache/vector_cache/eviction_manager/__init__.pyi +0 -0
- gllm_datastore/cache/vector_cache/eviction_manager/asyncio_eviction_manager.pyi +48 -0
- gllm_datastore/cache/vector_cache/eviction_manager/eviction_manager.pyi +38 -0
- gllm_datastore/cache/vector_cache/eviction_strategy/__init__.pyi +0 -0
- gllm_datastore/cache/vector_cache/eviction_strategy/eviction_strategy.pyi +34 -0
- gllm_datastore/cache/vector_cache/eviction_strategy/ttl_eviction_strategy.pyi +34 -0
- gllm_datastore/cache/vector_cache/vector_cache.pyi +99 -0
- gllm_datastore/constants.pyi +66 -0
- gllm_datastore/core/__init__.pyi +7 -0
- gllm_datastore/core/capabilities/__init__.pyi +7 -0
- gllm_datastore/core/capabilities/encryption_capability.pyi +21 -0
- gllm_datastore/core/capabilities/fulltext_capability.pyi +73 -0
- gllm_datastore/core/capabilities/graph_capability.pyi +70 -0
- gllm_datastore/core/capabilities/hybrid_capability.pyi +184 -0
- gllm_datastore/core/capabilities/vector_capability.pyi +90 -0
- gllm_datastore/core/filters/__init__.pyi +4 -0
- gllm_datastore/core/filters/filter.pyi +340 -0
- gllm_datastore/core/filters/schema.pyi +149 -0
- gllm_datastore/data_store/__init__.pyi +8 -0
- gllm_datastore/data_store/_elastic_core/__init__.pyi +0 -0
- gllm_datastore/data_store/_elastic_core/client_factory.pyi +66 -0
- gllm_datastore/data_store/_elastic_core/constants.pyi +27 -0
- gllm_datastore/data_store/_elastic_core/elastic_like_core.pyi +115 -0
- gllm_datastore/data_store/_elastic_core/index_manager.pyi +37 -0
- gllm_datastore/data_store/_elastic_core/query_translator.pyi +89 -0
- gllm_datastore/data_store/base.pyi +176 -0
- gllm_datastore/data_store/chroma/__init__.pyi +4 -0
- gllm_datastore/data_store/chroma/_chroma_import.pyi +13 -0
- gllm_datastore/data_store/chroma/data_store.pyi +201 -0
- gllm_datastore/data_store/chroma/fulltext.pyi +134 -0
- gllm_datastore/data_store/chroma/query.pyi +266 -0
- gllm_datastore/data_store/chroma/query_translator.pyi +41 -0
- gllm_datastore/data_store/chroma/vector.pyi +197 -0
- gllm_datastore/data_store/elasticsearch/__init__.pyi +5 -0
- gllm_datastore/data_store/elasticsearch/data_store.pyi +147 -0
- gllm_datastore/data_store/elasticsearch/fulltext.pyi +238 -0
- gllm_datastore/data_store/elasticsearch/query.pyi +118 -0
- gllm_datastore/data_store/elasticsearch/query_translator.pyi +18 -0
- gllm_datastore/data_store/elasticsearch/vector.pyi +180 -0
- gllm_datastore/data_store/exceptions.pyi +35 -0
- gllm_datastore/data_store/in_memory/__init__.pyi +5 -0
- gllm_datastore/data_store/in_memory/data_store.pyi +71 -0
- gllm_datastore/data_store/in_memory/fulltext.pyi +131 -0
- gllm_datastore/data_store/in_memory/query.pyi +175 -0
- gllm_datastore/data_store/in_memory/vector.pyi +174 -0
- gllm_datastore/data_store/opensearch/__init__.pyi +5 -0
- gllm_datastore/data_store/opensearch/data_store.pyi +160 -0
- gllm_datastore/data_store/opensearch/fulltext.pyi +240 -0
- gllm_datastore/data_store/opensearch/query.pyi +89 -0
- gllm_datastore/data_store/opensearch/query_translator.pyi +18 -0
- gllm_datastore/data_store/opensearch/vector.pyi +211 -0
- gllm_datastore/data_store/redis/__init__.pyi +5 -0
- gllm_datastore/data_store/redis/data_store.pyi +153 -0
- gllm_datastore/data_store/redis/fulltext.pyi +128 -0
- gllm_datastore/data_store/redis/query.pyi +428 -0
- gllm_datastore/data_store/redis/query_translator.pyi +37 -0
- gllm_datastore/data_store/redis/vector.pyi +131 -0
- gllm_datastore/data_store/sql/__init__.pyi +4 -0
- gllm_datastore/data_store/sql/constants.pyi +5 -0
- gllm_datastore/data_store/sql/data_store.pyi +201 -0
- gllm_datastore/data_store/sql/fulltext.pyi +164 -0
- gllm_datastore/data_store/sql/query.pyi +81 -0
- gllm_datastore/data_store/sql/query_translator.pyi +51 -0
- gllm_datastore/data_store/sql/schema.pyi +16 -0
- gllm_datastore/encryptor/__init__.pyi +4 -0
- gllm_datastore/encryptor/aes_gcm_encryptor.pyi +45 -0
- gllm_datastore/encryptor/capability/__init__.pyi +3 -0
- gllm_datastore/encryptor/capability/mixin.pyi +32 -0
- gllm_datastore/encryptor/encryptor.pyi +52 -0
- gllm_datastore/encryptor/key_ring/__init__.pyi +3 -0
- gllm_datastore/encryptor/key_ring/in_memory_key_ring.pyi +52 -0
- gllm_datastore/encryptor/key_ring/key_ring.pyi +45 -0
- gllm_datastore/encryptor/key_rotating_encryptor.pyi +60 -0
- gllm_datastore/graph_data_store/__init__.pyi +6 -0
- gllm_datastore/graph_data_store/graph_data_store.pyi +151 -0
- gllm_datastore/graph_data_store/graph_rag_data_store.pyi +29 -0
- gllm_datastore/graph_data_store/light_rag_data_store.pyi +93 -0
- gllm_datastore/graph_data_store/light_rag_postgres_data_store.pyi +96 -0
- gllm_datastore/graph_data_store/llama_index_graph_rag_data_store.pyi +49 -0
- gllm_datastore/graph_data_store/llama_index_neo4j_graph_rag_data_store.pyi +78 -0
- gllm_datastore/graph_data_store/mixins/__init__.pyi +3 -0
- gllm_datastore/graph_data_store/mixins/agentic_graph_tools_mixin.pyi +175 -0
- gllm_datastore/graph_data_store/nebula_graph_data_store.pyi +206 -0
- gllm_datastore/graph_data_store/neo4j_graph_data_store.pyi +182 -0
- gllm_datastore/graph_data_store/schema.pyi +27 -0
- gllm_datastore/graph_data_store/utils/__init__.pyi +6 -0
- gllm_datastore/graph_data_store/utils/constants.pyi +21 -0
- gllm_datastore/graph_data_store/utils/light_rag_em_invoker_adapter.pyi +56 -0
- gllm_datastore/graph_data_store/utils/light_rag_lm_invoker_adapter.pyi +43 -0
- gllm_datastore/graph_data_store/utils/llama_index_em_invoker_adapter.pyi +45 -0
- gllm_datastore/graph_data_store/utils/llama_index_lm_invoker_adapter.pyi +169 -0
- gllm_datastore/signature/__init__.pyi +0 -0
- gllm_datastore/signature/webhook_signature.pyi +31 -0
- gllm_datastore/sql_data_store/__init__.pyi +4 -0
- gllm_datastore/sql_data_store/adapter/__init__.pyi +0 -0
- gllm_datastore/sql_data_store/adapter/sqlalchemy_adapter.pyi +38 -0
- gllm_datastore/sql_data_store/constants.pyi +6 -0
- gllm_datastore/sql_data_store/sql_data_store.pyi +86 -0
- gllm_datastore/sql_data_store/sqlalchemy_sql_data_store.pyi +216 -0
- gllm_datastore/sql_data_store/types.pyi +31 -0
- gllm_datastore/utils/__init__.pyi +6 -0
- gllm_datastore/utils/converter.pyi +51 -0
- gllm_datastore/utils/dict.pyi +21 -0
- gllm_datastore/utils/ttl.pyi +25 -0
- gllm_datastore/utils/types.pyi +32 -0
- gllm_datastore/vector_data_store/__init__.pyi +6 -0
- gllm_datastore/vector_data_store/chroma_vector_data_store.pyi +259 -0
- gllm_datastore/vector_data_store/elasticsearch_vector_data_store.pyi +357 -0
- gllm_datastore/vector_data_store/in_memory_vector_data_store.pyi +179 -0
- gllm_datastore/vector_data_store/mixin/__init__.pyi +0 -0
- gllm_datastore/vector_data_store/mixin/cache_compatible_mixin.pyi +145 -0
- gllm_datastore/vector_data_store/redis_vector_data_store.pyi +191 -0
- gllm_datastore/vector_data_store/vector_data_store.pyi +146 -0
- gllm_datastore.build/.gitignore +1 -0
- gllm_datastore.cpython-312-darwin.so +0 -0
- gllm_datastore.pyi +178 -0
- gllm_datastore_binary-0.5.50.dist-info/METADATA +185 -0
- gllm_datastore_binary-0.5.50.dist-info/RECORD +137 -0
- gllm_datastore_binary-0.5.50.dist-info/WHEEL +5 -0
- gllm_datastore_binary-0.5.50.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from _typeshed import Incomplete
|
|
3
|
+
from elasticsearch import AsyncElasticsearch
|
|
4
|
+
from elasticsearch.dsl import AsyncSearch
|
|
5
|
+
from elasticsearch.dsl.query import Query
|
|
6
|
+
from elasticsearch.dsl.response import Response
|
|
7
|
+
from gllm_datastore.core.filters.schema import QueryFilter as QueryFilter, QueryOptions as QueryOptions
|
|
8
|
+
from gllm_datastore.data_store.elasticsearch.query_translator import ElasticsearchQueryTranslator as ElasticsearchQueryTranslator
|
|
9
|
+
from gllm_datastore.utils import flatten_dict as flatten_dict
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
VALID_FIELD_PATH: Incomplete
|
|
13
|
+
|
|
14
|
+
def apply_filters_and_options(search: AsyncSearch, filters: QueryFilter | None = None, options: QueryOptions | None = None) -> AsyncSearch:
|
|
15
|
+
"""Apply filters and options to an Elasticsearch search object.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
search (AsyncSearch): Elasticsearch search object.
|
|
19
|
+
filters (QueryFilter | None, optional): New QueryFilter with filters and condition.
|
|
20
|
+
options (QueryOptions | None, optional): Query options (limit, sort, fields).
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
AsyncSearch: Elasticsearch search object.
|
|
24
|
+
"""
|
|
25
|
+
def translate_filter(filters: QueryFilter | None) -> Query | None:
|
|
26
|
+
"""Translate a structured QueryFilter into an Elasticsearch DSL Query.
|
|
27
|
+
|
|
28
|
+
The translation supports comparison operators (EQ, NE, GT, LT, GTE, LTE),
|
|
29
|
+
array operators (IN, NIN, ARRAY_CONTAINS, ANY, ALL), text operators (TEXT_CONTAINS),
|
|
30
|
+
and logical conditions (AND, OR, NOT), including nested filters.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
filters (QueryFilter | None): Structured QueryFilter. If None, returns None.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
Query | None: An Elasticsearch Query object or None if no filters are provided.
|
|
37
|
+
|
|
38
|
+
Raises:
|
|
39
|
+
ValueError: When the filter structure is invalid.
|
|
40
|
+
TypeError: When an operator-value type combination is invalid.
|
|
41
|
+
"""
|
|
42
|
+
async def update_by_query(client: AsyncElasticsearch, index_name: str, update_values: dict[str, Any], filters: QueryFilter | None = None, logger: logging.Logger | None = None) -> None:
|
|
43
|
+
'''Update records in Elasticsearch using UpdateByQuery with retry logic for version conflicts.
|
|
44
|
+
|
|
45
|
+
This function builds a painless script that safely assigns each updated field.
|
|
46
|
+
When a field path contains dots (e.g. "metadata.cache_value"), we must
|
|
47
|
+
access the corresponding param using bracket syntax: params[\'metadata.cache_value\']
|
|
48
|
+
to avoid Painless treating it as nested object access (which would be None).
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
client (AsyncElasticsearch): Elasticsearch client instance.
|
|
52
|
+
index_name (str): The name of the Elasticsearch index.
|
|
53
|
+
update_values (dict[str, Any]): Values to update.
|
|
54
|
+
filters (QueryFilter | None, optional): New QueryFilter to select records to update.
|
|
55
|
+
Defaults to None.
|
|
56
|
+
logger (Any | None, optional): Logger instance. Defaults to None.
|
|
57
|
+
'''
|
|
58
|
+
async def delete_by_query(client: AsyncElasticsearch, index_name: str, filters: QueryFilter | None = None) -> None:
|
|
59
|
+
"""Delete records from Elasticsearch using delete_by_query.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
client (AsyncElasticsearch): Elasticsearch client instance.
|
|
63
|
+
index_name (str): The name of the Elasticsearch index.
|
|
64
|
+
filters (QueryFilter | None, optional): New QueryFilter to select records for deletion.
|
|
65
|
+
Defaults to None, in which case no operation will be performed.
|
|
66
|
+
"""
|
|
67
|
+
async def delete_by_id(client: AsyncElasticsearch, index_name: str, ids: str | list[str]) -> None:
|
|
68
|
+
"""Delete records from Elasticsearch by IDs using Search.delete().
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
client (AsyncElasticsearch): Elasticsearch client instance.
|
|
72
|
+
index_name (str): The name of the Elasticsearch index.
|
|
73
|
+
ids (str | list[str]): ID or list of IDs to delete.
|
|
74
|
+
"""
|
|
75
|
+
def validate_query_length(query: str, min_length: int = 0, max_length: int | None = None) -> bool:
|
|
76
|
+
"""Validate query length against minimum and maximum constraints.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
query (str): The query string to validate.
|
|
80
|
+
min_length (int, optional): Minimum required length. Defaults to 0.
|
|
81
|
+
max_length (int | None, optional): Maximum allowed length. Defaults to None.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
bool: True if query is valid, False otherwise.
|
|
85
|
+
"""
|
|
86
|
+
def create_search_with_filters(client: AsyncElasticsearch, index_name: str, filters: QueryFilter | None = None, exclude_fields: list[str] | None = None) -> AsyncSearch:
|
|
87
|
+
"""Create an AsyncSearch object with optional filters and field exclusions.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
client (AsyncElasticsearch): Elasticsearch client instance.
|
|
91
|
+
index_name (str): The name of the Elasticsearch index.
|
|
92
|
+
filters (QueryFilter | None, optional): New QueryFilter to apply. Defaults to None.
|
|
93
|
+
exclude_fields (list[str] | None, optional): Fields to exclude from source. Defaults to None.
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
AsyncSearch: Configured AsyncSearch object.
|
|
97
|
+
"""
|
|
98
|
+
def apply_filter_query_to_search(search: AsyncSearch, main_query: Query, filters: QueryFilter | None = None) -> AsyncSearch:
|
|
99
|
+
"""Apply filter query to a search with a main query.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
search (AsyncSearch): Elasticsearch search object.
|
|
103
|
+
main_query (Query): The main query to apply.
|
|
104
|
+
filters (QueryFilter | None, optional): Query filters to apply. Defaults to None.
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
AsyncSearch: Search object with applied queries.
|
|
108
|
+
"""
|
|
109
|
+
async def safe_execute(search: AsyncSearch, logger: logging.Logger | None = None) -> Response | None:
|
|
110
|
+
"""Execute an Elasticsearch DSL search with unified error handling.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
search (AsyncSearch): Elasticsearch DSL AsyncSearch object.
|
|
114
|
+
logger (logging.Logger | None, optional): Logger instance for error messages. Defaults to None.
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
Response | None: The Elasticsearch response on success, otherwise None.
|
|
118
|
+
"""
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from gllm_datastore.data_store._elastic_core.query_translator import ElasticLikeQueryTranslator as ElasticLikeQueryTranslator
|
|
2
|
+
|
|
3
|
+
class ElasticsearchQueryTranslator(ElasticLikeQueryTranslator):
|
|
4
|
+
"""Translates QueryFilter and FilterClause objects to Elasticsearch Query DSL.
|
|
5
|
+
|
|
6
|
+
This class extends ElasticLikeQueryTranslator and implements abstract methods
|
|
7
|
+
using Elasticsearch DSL API. It also provides QueryOptions handling
|
|
8
|
+
methods specific to Elasticsearch.
|
|
9
|
+
|
|
10
|
+
Attributes:
|
|
11
|
+
_logger (Logger): Logger instance for error messages and debugging.
|
|
12
|
+
"""
|
|
13
|
+
def __init__(self) -> None:
|
|
14
|
+
"""Initialize the Elasticsearch query translator.
|
|
15
|
+
|
|
16
|
+
Raises:
|
|
17
|
+
ImportError: If elasticsearch package is not installed.
|
|
18
|
+
"""
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
from elasticsearch import AsyncElasticsearch
|
|
3
|
+
from gllm_core.schema import Chunk
|
|
4
|
+
from gllm_datastore.constants import DEFAULT_FETCH_K as DEFAULT_FETCH_K, DEFAULT_TOP_K as DEFAULT_TOP_K
|
|
5
|
+
from gllm_datastore.core.filters.schema import FilterClause as FilterClause, QueryFilter as QueryFilter, QueryOptions as QueryOptions
|
|
6
|
+
from gllm_datastore.data_store._elastic_core.query_translator import convert_filter_clause as convert_filter_clause
|
|
7
|
+
from gllm_datastore.data_store.elasticsearch.query import delete_by_id as delete_by_id, delete_by_query as delete_by_query, translate_filter as translate_filter, update_by_query as update_by_query
|
|
8
|
+
from gllm_datastore.utils.converter import from_langchain as from_langchain, to_langchain as to_langchain
|
|
9
|
+
from gllm_inference.em_invoker.em_invoker import BaseEMInvoker
|
|
10
|
+
from gllm_inference.schema import Vector
|
|
11
|
+
from langchain_elasticsearch.vectorstores import AsyncRetrievalStrategy
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
class ElasticsearchVectorCapability:
|
|
15
|
+
"""Elasticsearch implementation of VectorCapability protocol.
|
|
16
|
+
|
|
17
|
+
This class provides document CRUD operations and vector search using Elasticsearch.
|
|
18
|
+
|
|
19
|
+
Attributes:
|
|
20
|
+
index_name (str): The name of the Elasticsearch index.
|
|
21
|
+
vector_store (AsyncElasticsearchStore): The vector store instance.
|
|
22
|
+
em_invoker (BaseEMInvoker): The embedding model to perform vectorization.
|
|
23
|
+
"""
|
|
24
|
+
index_name: Incomplete
|
|
25
|
+
vector_store: Incomplete
|
|
26
|
+
def __init__(self, index_name: str, client: AsyncElasticsearch, em_invoker: BaseEMInvoker, query_field: str = 'text', vector_query_field: str = 'vector', retrieval_strategy: AsyncRetrievalStrategy | None = None, distance_strategy: str | None = None) -> None:
|
|
27
|
+
'''Initialize the Elasticsearch vector capability.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
index_name (str): The name of the Elasticsearch index.
|
|
31
|
+
client (AsyncElasticsearch): The Elasticsearch client.
|
|
32
|
+
em_invoker (BaseEMInvoker): The embedding model to perform vectorization.
|
|
33
|
+
query_field (str, optional): The field name for text queries. Defaults to "text".
|
|
34
|
+
vector_query_field (str, optional): The field name for vector queries. Defaults to "vector".
|
|
35
|
+
retrieval_strategy (AsyncRetrievalStrategy | None, optional): The retrieval strategy for retrieval.
|
|
36
|
+
Defaults to None, in which case DenseVectorStrategy() is used.
|
|
37
|
+
distance_strategy (str | None, optional): The distance strategy for retrieval. Defaults to None.
|
|
38
|
+
'''
|
|
39
|
+
@property
|
|
40
|
+
def em_invoker(self) -> BaseEMInvoker:
|
|
41
|
+
"""Returns the EM Invoker instance.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
BaseEMInvoker: The EM Invoker instance.
|
|
45
|
+
"""
|
|
46
|
+
async def ensure_index(self, mapping: dict[str, Any] | None = None, index_settings: dict[str, Any] | None = None, dimension: int | None = None, distance_strategy: str | None = None) -> None:
|
|
47
|
+
'''Ensure Elasticsearch index exists, creating it if necessary.
|
|
48
|
+
|
|
49
|
+
This method is idempotent - if the index already exists, it will skip creation
|
|
50
|
+
and return early.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
mapping (dict[str, Any] | None, optional): Custom mapping dictionary to use
|
|
54
|
+
for index creation. If provided, this mapping will be used directly.
|
|
55
|
+
The mapping should follow Elasticsearch mapping format. Defaults to None,
|
|
56
|
+
in which default mapping will be used.
|
|
57
|
+
index_settings (dict[str, Any] | None, optional): Custom index settings.
|
|
58
|
+
These settings will be merged with any default settings. Defaults to None.
|
|
59
|
+
dimension (int | None, optional): Vector dimension. If not provided and mapping
|
|
60
|
+
is not provided, will be inferred from em_invoker by generating a test embedding.
|
|
61
|
+
distance_strategy (str | None, optional): Distance strategy for vector similarity.
|
|
62
|
+
Supported values: "cosine", "l2_norm", "dot_product", etc.
|
|
63
|
+
Only used when building default mapping. Defaults to "cosine" if not specified.
|
|
64
|
+
|
|
65
|
+
Raises:
|
|
66
|
+
ValueError: If mapping is invalid or required parameters are missing.
|
|
67
|
+
RuntimeError: If index creation fails.
|
|
68
|
+
'''
|
|
69
|
+
async def create(self, data: Chunk | list[Chunk], **kwargs: Any) -> None:
|
|
70
|
+
"""Create new records in the datastore.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
data (Chunk | list[Chunk]): Data to create (single item or collection).
|
|
74
|
+
**kwargs: Datastore-specific parameters.
|
|
75
|
+
|
|
76
|
+
Raises:
|
|
77
|
+
ValueError: If data structure is invalid.
|
|
78
|
+
"""
|
|
79
|
+
async def create_from_vector(self, chunk_vectors: list[tuple[Chunk, Vector]], **kwargs) -> list[str]:
|
|
80
|
+
"""Add pre-computed embeddings directly.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
chunk_vectors (list[tuple[Chunk, Vector]]): List of tuples containing chunks and their
|
|
84
|
+
corresponding vectors.
|
|
85
|
+
**kwargs: Datastore-specific parameters.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
list[str]: List of IDs assigned to added embeddings.
|
|
89
|
+
"""
|
|
90
|
+
async def retrieve(self, query: str, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None, **kwargs: Any) -> list[Chunk]:
|
|
91
|
+
'''Semantic search using text query converted to vector.
|
|
92
|
+
|
|
93
|
+
Usage Example:
|
|
94
|
+
```python
|
|
95
|
+
from gllm_datastore.core.filters import filter as F
|
|
96
|
+
|
|
97
|
+
# Direct FilterClause usage
|
|
98
|
+
await vector_capability.retrieve(
|
|
99
|
+
query="What is the capital of France?",
|
|
100
|
+
filters=F.eq("metadata.category", "tech"),
|
|
101
|
+
options=QueryOptions(limit=10),
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# Multiple filters
|
|
105
|
+
filters = F.and_(F.eq("metadata.source", "wikipedia"), F.eq("metadata.category", "tech"))
|
|
106
|
+
await vector_capability.retrieve(query="What is the capital of France?", filters=filters)
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
query (str): Text query to embed and search for.
|
|
111
|
+
filters (FilterClause | QueryFilter | None, optional): Filters to apply to the search.
|
|
112
|
+
FilterClause objects are automatically converted to QueryFilter internally.
|
|
113
|
+
Defaults to None.
|
|
114
|
+
options (QueryOptions | None, optional): Options to apply to the search. Defaults to None.
|
|
115
|
+
**kwargs: Datastore-specific parameters.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
list[Chunk]: List of chunks ordered by relevance score.
|
|
119
|
+
'''
|
|
120
|
+
async def retrieve_by_vector(self, vector: Vector, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None) -> list[Chunk]:
|
|
121
|
+
'''Direct vector similarity search.
|
|
122
|
+
|
|
123
|
+
Usage Example:
|
|
124
|
+
```python
|
|
125
|
+
from gllm_datastore.core.filters import filter as F
|
|
126
|
+
|
|
127
|
+
# Direct FilterClause usage
|
|
128
|
+
await vector_capability.retrieve_by_vector(
|
|
129
|
+
vector=[0.1, 0.2, 0.3],
|
|
130
|
+
filters=F.eq("metadata.category", "tech"),
|
|
131
|
+
options=QueryOptions(limit=10),
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
# Multiple filters
|
|
135
|
+
filters = F.and_(F.eq("metadata.source", "wikipedia"), F.eq("metadata.category", "tech"))
|
|
136
|
+
await vector_capability.retrieve_by_vector(vector=[0.1, 0.2, 0.3], filters=filters)
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
vector (Vector): Query embedding vector.
|
|
141
|
+
filters (FilterClause | QueryFilter | None, optional): Filters to apply to the search.
|
|
142
|
+
FilterClause objects are automatically converted to QueryFilter internally.
|
|
143
|
+
Defaults to None.
|
|
144
|
+
options (QueryOptions | None, optional): Options to apply to the search. Defaults to None.
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
list[Chunk]: List of chunks ordered by similarity score.
|
|
148
|
+
'''
|
|
149
|
+
async def update(self, update_values: dict, filters: FilterClause | QueryFilter | None = None, **kwargs: Any) -> None:
|
|
150
|
+
"""Update existing records in the datastore.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
update_values (dict): Values to update.
|
|
154
|
+
filters (FilterClause | QueryFilter | None, optional): Filters to select records to update.
|
|
155
|
+
FilterClause objects are automatically converted to QueryFilter internally.
|
|
156
|
+
Defaults to None.
|
|
157
|
+
**kwargs: Datastore-specific parameters.
|
|
158
|
+
"""
|
|
159
|
+
async def delete(self, filters: FilterClause | QueryFilter | None = None, **kwargs: Any) -> None:
|
|
160
|
+
"""Delete records from the data store based on filters.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
filters (FilterClause | QueryFilter | None, optional): Filters to select records for deletion.
|
|
164
|
+
FilterClause objects are automatically converted to QueryFilter internally.
|
|
165
|
+
Defaults to None.
|
|
166
|
+
**kwargs: Datastore-specific parameters.
|
|
167
|
+
"""
|
|
168
|
+
async def delete_by_id(self, id: str | list[str], **kwargs: Any) -> None:
|
|
169
|
+
"""Delete records from the data store based on IDs.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
id (str | list[str]): ID or list of IDs to delete.
|
|
173
|
+
**kwargs: Datastore-specific parameters.
|
|
174
|
+
"""
|
|
175
|
+
async def clear(self, **kwargs: Any) -> None:
|
|
176
|
+
"""Clear all records from the datastore.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
**kwargs: Datastore-specific parameters.
|
|
180
|
+
"""
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
|
|
3
|
+
class NotSupportedException(Exception):
|
|
4
|
+
"""Raised when attempting to access an unsupported capability.
|
|
5
|
+
|
|
6
|
+
This exception is raised when code attempts to access a capability
|
|
7
|
+
that isn't configured for a datastore.
|
|
8
|
+
"""
|
|
9
|
+
capability: Incomplete
|
|
10
|
+
class_name: Incomplete
|
|
11
|
+
class_obj: Incomplete
|
|
12
|
+
def __init__(self, capability: str, class_obj: type) -> None:
|
|
13
|
+
"""Initialize the exception.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
capability (str): The name of the unsupported capability.
|
|
17
|
+
class_obj (Type): The class object for context.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
class NotRegisteredException(Exception):
|
|
21
|
+
"""Raised when attempting to access a capability that is not registered.
|
|
22
|
+
|
|
23
|
+
This exception is raised when code attempts to access a capability
|
|
24
|
+
that is not registered for a datastore but is supported by the datastore.
|
|
25
|
+
"""
|
|
26
|
+
capability: Incomplete
|
|
27
|
+
class_name: Incomplete
|
|
28
|
+
class_obj: Incomplete
|
|
29
|
+
def __init__(self, capability: str, class_obj: type) -> None:
|
|
30
|
+
"""Initialize the exception.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
capability (str): The name of the unregistered capability.
|
|
34
|
+
class_obj (Type): The class object for context.
|
|
35
|
+
"""
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
from gllm_datastore.data_store.in_memory.data_store import InMemoryDataStore as InMemoryDataStore
|
|
2
|
+
from gllm_datastore.data_store.in_memory.fulltext import InMemoryFulltextCapability as InMemoryFulltextCapability
|
|
3
|
+
from gllm_datastore.data_store.in_memory.vector import InMemoryVectorCapability as InMemoryVectorCapability
|
|
4
|
+
|
|
5
|
+
__all__ = ['InMemoryDataStore', 'InMemoryFulltextCapability', 'InMemoryVectorCapability']
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
from gllm_core.schema import Chunk as Chunk
|
|
2
|
+
from gllm_datastore.core.filters.schema import FilterClause as FilterClause, QueryFilter as QueryFilter
|
|
3
|
+
from gllm_datastore.data_store.base import BaseDataStore as BaseDataStore, CapabilityType as CapabilityType
|
|
4
|
+
from gllm_datastore.data_store.in_memory.fulltext import InMemoryFulltextCapability as InMemoryFulltextCapability
|
|
5
|
+
from gllm_datastore.data_store.in_memory.vector import InMemoryVectorCapability as InMemoryVectorCapability
|
|
6
|
+
|
|
7
|
+
class InMemoryDataStore(BaseDataStore):
|
|
8
|
+
"""In-memory data store with multiple capability support.
|
|
9
|
+
|
|
10
|
+
This class provides a unified interface for accessing vector, fulltext,
|
|
11
|
+
and cache capabilities using in-memory storage optimized for development
|
|
12
|
+
and testing scenarios.
|
|
13
|
+
|
|
14
|
+
Attributes:
|
|
15
|
+
store (dict[str, Chunk]): Dictionary storing data with their IDs as keys.
|
|
16
|
+
"""
|
|
17
|
+
store: dict[str, Chunk]
|
|
18
|
+
def __init__(self) -> None:
|
|
19
|
+
"""Initialize the in-memory data store."""
|
|
20
|
+
@property
|
|
21
|
+
def supported_capabilities(self) -> list[CapabilityType]:
|
|
22
|
+
"""Return list of currently supported capabilities.
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
list[str]: List of capability names that are supported.
|
|
26
|
+
"""
|
|
27
|
+
@property
|
|
28
|
+
def fulltext(self) -> InMemoryFulltextCapability:
|
|
29
|
+
"""Access fulltext capability if registered.
|
|
30
|
+
|
|
31
|
+
This method solely uses the logic of its parent class to return the fulltext capability handler.
|
|
32
|
+
This method overrides the parent class to return the InMemoryFulltextCapability handler for better
|
|
33
|
+
type hinting.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
InMemoryFulltextCapability: Fulltext capability handler.
|
|
37
|
+
|
|
38
|
+
Raises:
|
|
39
|
+
NotRegisteredException: If fulltext capability is not registered.
|
|
40
|
+
"""
|
|
41
|
+
@property
|
|
42
|
+
def vector(self) -> InMemoryVectorCapability:
|
|
43
|
+
"""Access vector capability if registered.
|
|
44
|
+
|
|
45
|
+
This method solely uses the logic of its parent class to return the vector capability handler.
|
|
46
|
+
This method overrides the parent class to return the InMemoryVectorCapability handler for better
|
|
47
|
+
type hinting.
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
InMemoryVectorCapability: Vector capability handler.
|
|
51
|
+
|
|
52
|
+
Raises:
|
|
53
|
+
NotRegisteredException: If vector capability is not registered.
|
|
54
|
+
"""
|
|
55
|
+
@classmethod
|
|
56
|
+
def translate_query_filter(cls, query_filter: FilterClause | QueryFilter | None) -> FilterClause | QueryFilter | None:
|
|
57
|
+
"""Translate QueryFilter or FilterClause to in-memory datastore filter syntax.
|
|
58
|
+
|
|
59
|
+
For the in-memory datastore, this method acts as an identity function since
|
|
60
|
+
the datastore works directly with the QueryFilter DSL without requiring
|
|
61
|
+
translation to a native format.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
query_filter (FilterClause | QueryFilter | None): The filter to translate.
|
|
65
|
+
Can be a single FilterClause, a QueryFilter with multiple clauses,
|
|
66
|
+
or None for empty filters.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
FilterClause | QueryFilter | None: The same filter object that was passed in.
|
|
70
|
+
Returns None for empty filters.
|
|
71
|
+
"""
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
from gllm_core.schema.chunk import Chunk
|
|
2
|
+
from gllm_datastore.core.filters import FilterClause as FilterClause, QueryFilter as QueryFilter, QueryOptions as QueryOptions
|
|
3
|
+
from gllm_datastore.data_store.in_memory.query import create_updated_chunk as create_updated_chunk, delete_chunks_by_filters as delete_chunks_by_filters, get_chunks_from_store as get_chunks_from_store
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
class InMemoryFulltextCapability:
|
|
7
|
+
"""In-memory implementation of FulltextCapability protocol.
|
|
8
|
+
|
|
9
|
+
This class provides document CRUD operations and flexible querying using pure
|
|
10
|
+
Python data structures optimized for development and testing.
|
|
11
|
+
|
|
12
|
+
Attributes:
|
|
13
|
+
store (dict[str, Chunk]): Dictionary storing Chunk objects with their IDs as keys.
|
|
14
|
+
"""
|
|
15
|
+
store: dict[str, Chunk]
|
|
16
|
+
def __init__(self, store: dict[str, Any] | None = None) -> None:
|
|
17
|
+
"""Initialize the in-memory fulltext capability.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
store (dict[str, Any] | None, optional): Dictionary storing Chunk objects with their IDs as keys.
|
|
21
|
+
Defaults to None.
|
|
22
|
+
"""
|
|
23
|
+
async def create(self, data: Chunk | list[Chunk]) -> None:
|
|
24
|
+
'''Create new records in the datastore.
|
|
25
|
+
|
|
26
|
+
Examples:
|
|
27
|
+
Create a new chunk.
|
|
28
|
+
```python
|
|
29
|
+
await fulltext_capability.create(Chunk(content="Test chunk", metadata={"category": "test"}))
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
data (Chunk | list[Chunk]): Data to create (single item or collection).
|
|
34
|
+
|
|
35
|
+
Raises:
|
|
36
|
+
ValueError: If data structure is invalid.
|
|
37
|
+
'''
|
|
38
|
+
async def retrieve(self, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None) -> list[Chunk]:
|
|
39
|
+
'''Read records from the datastore with optional filtering.
|
|
40
|
+
|
|
41
|
+
Usage Example:
|
|
42
|
+
```python
|
|
43
|
+
from gllm_datastore.core.filters import filter as F
|
|
44
|
+
|
|
45
|
+
# Direct FilterClause usage
|
|
46
|
+
results = await fulltext_capability.retrieve(filters=F.eq("metadata.category", "tech"))
|
|
47
|
+
|
|
48
|
+
# Multiple filters
|
|
49
|
+
results = await fulltext_capability.retrieve(
|
|
50
|
+
filters=F.and_(F.eq("metadata.category", "tech"), F.eq("metadata.status", "active"))
|
|
51
|
+
)
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
filters (FilterClause | QueryFilter | None, optional): Query filters to apply.
|
|
56
|
+
FilterClause objects are automatically converted to QueryFilter internally.
|
|
57
|
+
Defaults to None.
|
|
58
|
+
options (QueryOptions | None, optional): Query options for sorting and pagination. Defaults to None.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
list[Chunk]: List of matched chunks after applying filters and options.
|
|
62
|
+
'''
|
|
63
|
+
async def retrieve_fuzzy(self, query: str, max_distance: int = 2, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None) -> list[Chunk]:
|
|
64
|
+
"""Find records that fuzzy match the query within distance threshold.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
query (str): Text to fuzzy match against.
|
|
68
|
+
max_distance (int, optional): Maximum edit distance for matches. Defaults to 2.
|
|
69
|
+
filters (FilterClause | QueryFilter | None, optional): Optional metadata filters to apply.
|
|
70
|
+
FilterClause objects are automatically converted to QueryFilter internally.
|
|
71
|
+
Defaults to None.
|
|
72
|
+
options (QueryOptions | None, optional): Query options, only limit is used here. Defaults to None.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
list[Chunk]: Matched chunks ordered by distance (ascending), limited by options.limit.
|
|
76
|
+
"""
|
|
77
|
+
async def update(self, update_values: dict[str, Any], filters: FilterClause | QueryFilter | None = None) -> None:
|
|
78
|
+
'''Update existing records in the datastore.
|
|
79
|
+
|
|
80
|
+
Examples:
|
|
81
|
+
Update certain metadata of a chunk with specific filters.
|
|
82
|
+
```python
|
|
83
|
+
from gllm_datastore.core.filters import filter as F
|
|
84
|
+
|
|
85
|
+
# Direct FilterClause usage
|
|
86
|
+
await fulltext_capability.update(
|
|
87
|
+
update_values={"metadata": {"status": "published"}},
|
|
88
|
+
filters=F.eq("metadata.category", "tech"),
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# Multiple filters
|
|
92
|
+
await fulltext_capability.update(
|
|
93
|
+
update_values={"metadata": {"status": "published"}},
|
|
94
|
+
filters=F.and_(F.eq("metadata.status", "draft"), F.eq("metadata.category", "tech")),
|
|
95
|
+
)
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
update_values (dict[str, Any]): Mapping of fields to new values to apply.
|
|
100
|
+
filters (FilterClause | QueryFilter | None, optional): Filters to select records to update.
|
|
101
|
+
FilterClause objects are automatically converted to QueryFilter internally.
|
|
102
|
+
Defaults to None.
|
|
103
|
+
'''
|
|
104
|
+
async def delete(self, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None) -> None:
|
|
105
|
+
'''Delete records from the datastore.
|
|
106
|
+
|
|
107
|
+
Usage Example:
|
|
108
|
+
```python
|
|
109
|
+
from gllm_datastore.core.filters import filter as F
|
|
110
|
+
|
|
111
|
+
# Direct FilterClause usage
|
|
112
|
+
await fulltext_capability.delete(filters=F.eq("metadata.category", "tech"))
|
|
113
|
+
|
|
114
|
+
# Multiple filters
|
|
115
|
+
await fulltext_capability.delete(
|
|
116
|
+
filters=F.and_(F.eq("metadata.category", "tech"), F.eq("metadata.status", "draft"))
|
|
117
|
+
)
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
filters (FilterClause | QueryFilter | None, optional): Filters to select records to delete.
|
|
122
|
+
FilterClause objects are automatically converted to QueryFilter internally.
|
|
123
|
+
Defaults to None.
|
|
124
|
+
options (QueryOptions | None, optional): Query options for sorting and limiting deletions
|
|
125
|
+
(for eviction-like operations). Defaults to None.
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
None: This method performs deletions in-place.
|
|
129
|
+
'''
|
|
130
|
+
async def clear(self) -> None:
|
|
131
|
+
"""Clear all records from the datastore."""
|