gllm-datastore-binary 0.5.50__cp312-cp312-macosx_13_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. gllm_datastore/__init__.pyi +0 -0
  2. gllm_datastore/cache/__init__.pyi +4 -0
  3. gllm_datastore/cache/base.pyi +84 -0
  4. gllm_datastore/cache/cache.pyi +137 -0
  5. gllm_datastore/cache/hybrid_cache/__init__.pyi +5 -0
  6. gllm_datastore/cache/hybrid_cache/file_system_hybrid_cache.pyi +50 -0
  7. gllm_datastore/cache/hybrid_cache/hybrid_cache.pyi +115 -0
  8. gllm_datastore/cache/hybrid_cache/in_memory_hybrid_cache.pyi +29 -0
  9. gllm_datastore/cache/hybrid_cache/key_matcher/__init__.pyi +5 -0
  10. gllm_datastore/cache/hybrid_cache/key_matcher/exact_key_matcher.pyi +44 -0
  11. gllm_datastore/cache/hybrid_cache/key_matcher/fuzzy_key_matcher.pyi +70 -0
  12. gllm_datastore/cache/hybrid_cache/key_matcher/key_matcher.pyi +60 -0
  13. gllm_datastore/cache/hybrid_cache/key_matcher/semantic_key_matcher.pyi +93 -0
  14. gllm_datastore/cache/hybrid_cache/redis_hybrid_cache.pyi +34 -0
  15. gllm_datastore/cache/hybrid_cache/utils.pyi +36 -0
  16. gllm_datastore/cache/utils.pyi +34 -0
  17. gllm_datastore/cache/vector_cache/__init__.pyi +0 -0
  18. gllm_datastore/cache/vector_cache/eviction_manager/__init__.pyi +0 -0
  19. gllm_datastore/cache/vector_cache/eviction_manager/asyncio_eviction_manager.pyi +48 -0
  20. gllm_datastore/cache/vector_cache/eviction_manager/eviction_manager.pyi +38 -0
  21. gllm_datastore/cache/vector_cache/eviction_strategy/__init__.pyi +0 -0
  22. gllm_datastore/cache/vector_cache/eviction_strategy/eviction_strategy.pyi +34 -0
  23. gllm_datastore/cache/vector_cache/eviction_strategy/ttl_eviction_strategy.pyi +34 -0
  24. gllm_datastore/cache/vector_cache/vector_cache.pyi +99 -0
  25. gllm_datastore/constants.pyi +66 -0
  26. gllm_datastore/core/__init__.pyi +7 -0
  27. gllm_datastore/core/capabilities/__init__.pyi +7 -0
  28. gllm_datastore/core/capabilities/encryption_capability.pyi +21 -0
  29. gllm_datastore/core/capabilities/fulltext_capability.pyi +73 -0
  30. gllm_datastore/core/capabilities/graph_capability.pyi +70 -0
  31. gllm_datastore/core/capabilities/hybrid_capability.pyi +184 -0
  32. gllm_datastore/core/capabilities/vector_capability.pyi +90 -0
  33. gllm_datastore/core/filters/__init__.pyi +4 -0
  34. gllm_datastore/core/filters/filter.pyi +340 -0
  35. gllm_datastore/core/filters/schema.pyi +149 -0
  36. gllm_datastore/data_store/__init__.pyi +8 -0
  37. gllm_datastore/data_store/_elastic_core/__init__.pyi +0 -0
  38. gllm_datastore/data_store/_elastic_core/client_factory.pyi +66 -0
  39. gllm_datastore/data_store/_elastic_core/constants.pyi +27 -0
  40. gllm_datastore/data_store/_elastic_core/elastic_like_core.pyi +115 -0
  41. gllm_datastore/data_store/_elastic_core/index_manager.pyi +37 -0
  42. gllm_datastore/data_store/_elastic_core/query_translator.pyi +89 -0
  43. gllm_datastore/data_store/base.pyi +176 -0
  44. gllm_datastore/data_store/chroma/__init__.pyi +4 -0
  45. gllm_datastore/data_store/chroma/_chroma_import.pyi +13 -0
  46. gllm_datastore/data_store/chroma/data_store.pyi +201 -0
  47. gllm_datastore/data_store/chroma/fulltext.pyi +134 -0
  48. gllm_datastore/data_store/chroma/query.pyi +266 -0
  49. gllm_datastore/data_store/chroma/query_translator.pyi +41 -0
  50. gllm_datastore/data_store/chroma/vector.pyi +197 -0
  51. gllm_datastore/data_store/elasticsearch/__init__.pyi +5 -0
  52. gllm_datastore/data_store/elasticsearch/data_store.pyi +147 -0
  53. gllm_datastore/data_store/elasticsearch/fulltext.pyi +238 -0
  54. gllm_datastore/data_store/elasticsearch/query.pyi +118 -0
  55. gllm_datastore/data_store/elasticsearch/query_translator.pyi +18 -0
  56. gllm_datastore/data_store/elasticsearch/vector.pyi +180 -0
  57. gllm_datastore/data_store/exceptions.pyi +35 -0
  58. gllm_datastore/data_store/in_memory/__init__.pyi +5 -0
  59. gllm_datastore/data_store/in_memory/data_store.pyi +71 -0
  60. gllm_datastore/data_store/in_memory/fulltext.pyi +131 -0
  61. gllm_datastore/data_store/in_memory/query.pyi +175 -0
  62. gllm_datastore/data_store/in_memory/vector.pyi +174 -0
  63. gllm_datastore/data_store/opensearch/__init__.pyi +5 -0
  64. gllm_datastore/data_store/opensearch/data_store.pyi +160 -0
  65. gllm_datastore/data_store/opensearch/fulltext.pyi +240 -0
  66. gllm_datastore/data_store/opensearch/query.pyi +89 -0
  67. gllm_datastore/data_store/opensearch/query_translator.pyi +18 -0
  68. gllm_datastore/data_store/opensearch/vector.pyi +211 -0
  69. gllm_datastore/data_store/redis/__init__.pyi +5 -0
  70. gllm_datastore/data_store/redis/data_store.pyi +153 -0
  71. gllm_datastore/data_store/redis/fulltext.pyi +128 -0
  72. gllm_datastore/data_store/redis/query.pyi +428 -0
  73. gllm_datastore/data_store/redis/query_translator.pyi +37 -0
  74. gllm_datastore/data_store/redis/vector.pyi +131 -0
  75. gllm_datastore/data_store/sql/__init__.pyi +4 -0
  76. gllm_datastore/data_store/sql/constants.pyi +5 -0
  77. gllm_datastore/data_store/sql/data_store.pyi +201 -0
  78. gllm_datastore/data_store/sql/fulltext.pyi +164 -0
  79. gllm_datastore/data_store/sql/query.pyi +81 -0
  80. gllm_datastore/data_store/sql/query_translator.pyi +51 -0
  81. gllm_datastore/data_store/sql/schema.pyi +16 -0
  82. gllm_datastore/encryptor/__init__.pyi +4 -0
  83. gllm_datastore/encryptor/aes_gcm_encryptor.pyi +45 -0
  84. gllm_datastore/encryptor/capability/__init__.pyi +3 -0
  85. gllm_datastore/encryptor/capability/mixin.pyi +32 -0
  86. gllm_datastore/encryptor/encryptor.pyi +52 -0
  87. gllm_datastore/encryptor/key_ring/__init__.pyi +3 -0
  88. gllm_datastore/encryptor/key_ring/in_memory_key_ring.pyi +52 -0
  89. gllm_datastore/encryptor/key_ring/key_ring.pyi +45 -0
  90. gllm_datastore/encryptor/key_rotating_encryptor.pyi +60 -0
  91. gllm_datastore/graph_data_store/__init__.pyi +6 -0
  92. gllm_datastore/graph_data_store/graph_data_store.pyi +151 -0
  93. gllm_datastore/graph_data_store/graph_rag_data_store.pyi +29 -0
  94. gllm_datastore/graph_data_store/light_rag_data_store.pyi +93 -0
  95. gllm_datastore/graph_data_store/light_rag_postgres_data_store.pyi +96 -0
  96. gllm_datastore/graph_data_store/llama_index_graph_rag_data_store.pyi +49 -0
  97. gllm_datastore/graph_data_store/llama_index_neo4j_graph_rag_data_store.pyi +78 -0
  98. gllm_datastore/graph_data_store/mixins/__init__.pyi +3 -0
  99. gllm_datastore/graph_data_store/mixins/agentic_graph_tools_mixin.pyi +175 -0
  100. gllm_datastore/graph_data_store/nebula_graph_data_store.pyi +206 -0
  101. gllm_datastore/graph_data_store/neo4j_graph_data_store.pyi +182 -0
  102. gllm_datastore/graph_data_store/schema.pyi +27 -0
  103. gllm_datastore/graph_data_store/utils/__init__.pyi +6 -0
  104. gllm_datastore/graph_data_store/utils/constants.pyi +21 -0
  105. gllm_datastore/graph_data_store/utils/light_rag_em_invoker_adapter.pyi +56 -0
  106. gllm_datastore/graph_data_store/utils/light_rag_lm_invoker_adapter.pyi +43 -0
  107. gllm_datastore/graph_data_store/utils/llama_index_em_invoker_adapter.pyi +45 -0
  108. gllm_datastore/graph_data_store/utils/llama_index_lm_invoker_adapter.pyi +169 -0
  109. gllm_datastore/signature/__init__.pyi +0 -0
  110. gllm_datastore/signature/webhook_signature.pyi +31 -0
  111. gllm_datastore/sql_data_store/__init__.pyi +4 -0
  112. gllm_datastore/sql_data_store/adapter/__init__.pyi +0 -0
  113. gllm_datastore/sql_data_store/adapter/sqlalchemy_adapter.pyi +38 -0
  114. gllm_datastore/sql_data_store/constants.pyi +6 -0
  115. gllm_datastore/sql_data_store/sql_data_store.pyi +86 -0
  116. gllm_datastore/sql_data_store/sqlalchemy_sql_data_store.pyi +216 -0
  117. gllm_datastore/sql_data_store/types.pyi +31 -0
  118. gllm_datastore/utils/__init__.pyi +6 -0
  119. gllm_datastore/utils/converter.pyi +51 -0
  120. gllm_datastore/utils/dict.pyi +21 -0
  121. gllm_datastore/utils/ttl.pyi +25 -0
  122. gllm_datastore/utils/types.pyi +32 -0
  123. gllm_datastore/vector_data_store/__init__.pyi +6 -0
  124. gllm_datastore/vector_data_store/chroma_vector_data_store.pyi +259 -0
  125. gllm_datastore/vector_data_store/elasticsearch_vector_data_store.pyi +357 -0
  126. gllm_datastore/vector_data_store/in_memory_vector_data_store.pyi +179 -0
  127. gllm_datastore/vector_data_store/mixin/__init__.pyi +0 -0
  128. gllm_datastore/vector_data_store/mixin/cache_compatible_mixin.pyi +145 -0
  129. gllm_datastore/vector_data_store/redis_vector_data_store.pyi +191 -0
  130. gllm_datastore/vector_data_store/vector_data_store.pyi +146 -0
  131. gllm_datastore.build/.gitignore +1 -0
  132. gllm_datastore.cpython-312-darwin.so +0 -0
  133. gllm_datastore.pyi +178 -0
  134. gllm_datastore_binary-0.5.50.dist-info/METADATA +185 -0
  135. gllm_datastore_binary-0.5.50.dist-info/RECORD +137 -0
  136. gllm_datastore_binary-0.5.50.dist-info/WHEEL +5 -0
  137. gllm_datastore_binary-0.5.50.dist-info/top_level.txt +1 -0
@@ -0,0 +1,240 @@
1
+ from _typeshed import Incomplete
2
+ from enum import StrEnum
3
+ from gllm_core.schema import Chunk
4
+ from gllm_datastore.constants import METADATA_KEYS as METADATA_KEYS
5
+ from gllm_datastore.core.filters.schema import FilterClause as FilterClause, QueryFilter as QueryFilter, QueryOptions as QueryOptions
6
+ from gllm_datastore.data_store._elastic_core.elastic_like_core import ElasticLikeCore as ElasticLikeCore
7
+ from gllm_datastore.data_store._elastic_core.query_translator import convert_filter_clause as convert_filter_clause
8
+ from gllm_datastore.data_store.opensearch.query import apply_filter_query_to_search as apply_filter_query_to_search, create_search_with_filters as create_search_with_filters, delete_by_id as delete_by_id, delete_by_query as delete_by_query, safe_execute as safe_execute, update_by_query as update_by_query, validate_query_length as validate_query_length
9
+ from gllm_datastore.data_store.opensearch.query_translator import OpenSearchQueryTranslator as OpenSearchQueryTranslator
10
+ from opensearchpy import AsyncOpenSearch
11
+ from typing import Any, Literal, overload
12
+
13
+ class SupportedQueryMethods(StrEnum):
14
+ """Supported query methods for OpenSearch fulltext capability."""
15
+ AUTOCOMPLETE: str
16
+ AUTOSUGGEST: str
17
+ BM25: str
18
+ BY_FIELD: str
19
+ SHINGLES: str
20
+
21
+ QUERY_REQUIRED_STRATEGIES: Incomplete
22
+
23
+ class OpenSearchFulltextCapability:
24
+ """OpenSearch implementation of FulltextCapability protocol.
25
+
26
+ This class provides document CRUD operations and flexible querying using OpenSearch.
27
+
28
+ Attributes:
29
+ index_name (str): The name of the OpenSearch index.
30
+ client (AsyncOpenSearch): AsyncOpenSearch client.
31
+ query_field (str): The field name to use for text content.
32
+ """
33
+ index_name: Incomplete
34
+ client: Incomplete
35
+ query_field: Incomplete
36
+ def __init__(self, index_name: str, client: AsyncOpenSearch, query_field: str = 'text') -> None:
37
+ '''Initialize the OpenSearch fulltext capability.
38
+
39
+ Args:
40
+ index_name (str): The name of the OpenSearch index.
41
+ client (AsyncOpenSearch): The OpenSearch client.
42
+ query_field (str, optional): The field name to use for text content. Defaults to "text".
43
+ '''
44
+ async def get_size(self) -> int:
45
+ """Returns the total number of documents in the index.
46
+
47
+ Returns:
48
+ int: The total number of documents.
49
+ """
50
+ async def create(self, data: Chunk | list[Chunk], **kwargs: Any) -> None:
51
+ """Create new records in the datastore.
52
+
53
+ Args:
54
+ data (Chunk | list[Chunk]): Data to create (single item or collection).
55
+ **kwargs: Backend-specific parameters forwarded to OpenSearch bulk API.
56
+
57
+ Raises:
58
+ ValueError: If data structure is invalid.
59
+ """
60
+ @overload
61
+ async def retrieve(self, strategy: Literal[SupportedQueryMethods.BY_FIELD] | None = ..., query: str | None = None, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None, **kwargs: Any) -> list[Chunk]: ...
62
+ @overload
63
+ async def retrieve(self, strategy: Literal[SupportedQueryMethods.BM25], query: str, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None, k1: float | None = None, b: float | None = None, **kwargs: Any) -> list[Chunk]: ...
64
+ @overload
65
+ async def retrieve(self, strategy: Literal[SupportedQueryMethods.AUTOCOMPLETE], query: str, field: str, size: int = 20, fuzzy_tolerance: int = 1, min_prefix_length: int = 3, filter_query: dict[str, Any] | None = None, **kwargs: Any) -> list[str]: ...
66
+ @overload
67
+ async def retrieve(self, strategy: Literal[SupportedQueryMethods.AUTOSUGGEST], query: str, search_fields: list[str], autocomplete_field: str, size: int = 20, min_length: int = 3, filter_query: dict[str, Any] | None = None, **kwargs: Any) -> list[str]: ...
68
+ @overload
69
+ async def retrieve(self, strategy: Literal[SupportedQueryMethods.SHINGLES], query: str, field: str, size: int = 20, min_length: int = 3, max_length: int = 30, filter_query: dict[str, Any] | None = None, **kwargs: Any) -> list[str]: ...
70
+ async def retrieve_by_field(self, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None) -> list[Chunk]:
71
+ """Retrieve records from the datastore based on metadata field filtering.
72
+
73
+ This method filters and returns stored chunks based on metadata values
74
+ rather than text content. It is particularly useful for structured lookups,
75
+ such as retrieving all chunks from a certain source, tagged with a specific label,
76
+ or authored by a particular user.
77
+
78
+ Args:
79
+ filters (FilterClause | QueryFilter | None, optional): Query filters to apply.
80
+ FilterClause objects are automatically converted to QueryFilter internally.
81
+ Defaults to None.
82
+ options (QueryOptions | None, optional): Query options (sorting, pagination, etc.).
83
+ Defaults to None.
84
+
85
+ Returns:
86
+ list[Chunk]: The filtered results as Chunk objects.
87
+ """
88
+ async def retrieve_bm25(self, query: str, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None, k1: float | None = None, b: float | None = None) -> list[Chunk]:
89
+ '''Queries the OpenSearch data store using BM25 algorithm for keyword-based search.
90
+
91
+ Args:
92
+ query (str): The query string.
93
+ filters (FilterClause | QueryFilter | None, optional): Optional metadata filter to apply to the search.
94
+ FilterClause objects are automatically converted to QueryFilter internally.
95
+ Use filter builder functions like `F.eq()`, `F.and_()`, etc. Defaults to None.
96
+ options (QueryOptions | None, optional): Query options including fields, limit, order_by, etc.
97
+ For example, `QueryOptions(include_fields=["title", "content"], limit=10,
98
+ order_by="score", order_desc=True)`.
99
+ If include_fields is None, defaults to [query_field]. For multiple fields,
100
+ uses multi_match query. Defaults to None.
101
+ k1 (float | None, optional): BM25 parameter controlling term frequency saturation.
102
+ Higher values mean term frequency has more impact before diminishing returns.
103
+ Typical values: 1.2-2.0. If None, uses OpenSearch default (~1.2). Defaults to None.
104
+ b (float | None, optional): BM25 parameter controlling document length normalization.
105
+ 0.0 = no length normalization, 1.0 = full normalization.
106
+ Typical values: 0.75. If None, uses OpenSearch default (~0.75). Defaults to None.
107
+
108
+ Examples:
109
+ ```python
110
+ from gllm_datastore.core.filters import filter as F
111
+
112
+ # Basic BM25 query on the \'text\' field
113
+ results = await data_store.retrieve_bm25("machine learning")
114
+
115
+ # BM25 query on specific fields with query options
116
+ results = await data_store.retrieve_bm25(
117
+ "natural language",
118
+ options=QueryOptions(include_fields=["title", "abstract"], limit=5)
119
+ )
120
+
121
+ # BM25 query with direct FilterClause
122
+ results = await data_store.retrieve_bm25(
123
+ "deep learning",
124
+ filters=F.eq("metadata.category", "AI")
125
+ )
126
+
127
+ # BM25 query with multiple filters
128
+ results = await data_store.retrieve_bm25(
129
+ "deep learning",
130
+ filters=F.and_(F.eq("metadata.category", "AI"), F.eq("metadata.status", "published"))
131
+ )
132
+
133
+ # BM25 query with custom BM25 parameters for more aggressive term frequency weighting
134
+ results = await data_store.retrieve_bm25(
135
+ "artificial intelligence",
136
+ k1=2.0,
137
+ b=0.5
138
+ )
139
+
140
+ # BM25 query with fields, filters, and options
141
+ results = await data_store.retrieve_bm25(
142
+ "data science applications",
143
+ filters=F.and_(
144
+ F.eq("metadata.author_id", "user123"),
145
+ F.in_("metadata.publication_year", [2022, 2023])
146
+ ),
147
+ options=QueryOptions(include_fields=["content", "tags"], limit=10, order_by="score", order_desc=True),
148
+ k1=1.5,
149
+ b=0.9
150
+ )
151
+ ```
152
+
153
+ Returns:
154
+ list[Chunk]: A list of Chunk objects representing the retrieved documents.
155
+ '''
156
+ async def retrieve_autocomplete(self, query: str, field: str, size: int = 20, fuzzy_tolerance: int = 1, min_prefix_length: int = 3, filter_query: dict[str, Any] | None = None) -> list[str]:
157
+ """Provides suggestions based on a prefix query for a specific field.
158
+
159
+ Args:
160
+ query (str): The query string.
161
+ field (str): The field name for autocomplete.
162
+ size (int, optional): The number of suggestions to retrieve. Defaults to 20.
163
+ fuzzy_tolerance (int, optional): The level of fuzziness for suggestions. Defaults to 1.
164
+ min_prefix_length (int, optional): The minimum prefix length to trigger fuzzy matching. Defaults to 3.
165
+ filter_query (dict[str, Any] | None, optional): The filter query. Defaults to None.
166
+
167
+ Returns:
168
+ list[str]: A list of suggestions.
169
+ """
170
+ async def retrieve_autosuggest(self, query: str, search_fields: list[str], autocomplete_field: str, size: int = 20, min_length: int = 3, filters: QueryFilter | None = None) -> list[str]:
171
+ """Generates suggestions across multiple fields using a multi_match query to broaden the search criteria.
172
+
173
+ Args:
174
+ query (str): The query string.
175
+ search_fields (list[str]): The fields to search for.
176
+ autocomplete_field (str): The field name for autocomplete.
177
+ size (int, optional): The number of suggestions to retrieve. Defaults to 20.
178
+ min_length (int, optional): The minimum length of the query. Defaults to 3.
179
+ filters (QueryFilter | None, optional): The filter query. Defaults to None.
180
+
181
+ Returns:
182
+ list[str]: A list of suggestions.
183
+ """
184
+ async def retrieve_shingles(self, query: str, field: str, size: int = 20, min_length: int = 3, max_length: int = 30, filters: QueryFilter | None = None) -> list[str]:
185
+ """Searches using shingles for prefix and fuzzy matching.
186
+
187
+ Args:
188
+ query (str): The query string.
189
+ field (str): The field name for autocomplete.
190
+ size (int, optional): The number of suggestions to retrieve. Defaults to 20.
191
+ min_length (int, optional): The minimum length of the query.
192
+ Queries shorter than this limit will return an empty list. Defaults to 3.
193
+ max_length (int, optional): The maximum length of the query.
194
+ Queries exceeding this limit will return an empty list. Defaults to 30.
195
+ filters (QueryFilter | None, optional): The filter query. Defaults to None.
196
+
197
+ Returns:
198
+ list[str]: A list of suggestions.
199
+ """
200
+ async def retrieve_fuzzy(self, query: str, max_distance: int = 2, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None) -> list[Chunk]:
201
+ """Find records that fuzzy match the query within distance threshold.
202
+
203
+ Args:
204
+ query (str): Text to fuzzy match against.
205
+ max_distance (int): Maximum edit distance for matches. Defaults to 2.
206
+ filters (FilterClause | QueryFilter | None, optional): Optional metadata filters to apply.
207
+ FilterClause objects are automatically converted to QueryFilter internally.
208
+ Defaults to None.
209
+ options (QueryOptions | None, optional): Query options (limit, sorting, etc.). Defaults to None.
210
+
211
+ Returns:
212
+ list[Chunk]: Matched chunks.
213
+ """
214
+ async def update(self, update_values: dict[str, Any], filters: FilterClause | QueryFilter | None = None) -> None:
215
+ """Update existing records in the datastore.
216
+
217
+ Args:
218
+ update_values (dict[str, Any]): Values to update.
219
+ filters (FilterClause | QueryFilter | None, optional): Filters to select records to update.
220
+ FilterClause objects are automatically converted to QueryFilter internally.
221
+ Defaults to None.
222
+ """
223
+ async def delete(self, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None) -> None:
224
+ """Delete records from the data store using filters and optional options.
225
+
226
+ Args:
227
+ filters (FilterClause | QueryFilter | None, optional): Filters to select records for deletion.
228
+ FilterClause objects are automatically converted to QueryFilter internally.
229
+ Defaults to None.
230
+ options (QueryOptions | None, optional): Query options supporting limit and sorting
231
+ for eviction-like operations. Defaults to None.
232
+ """
233
+ async def delete_by_id(self, id_: str | list[str]) -> None:
234
+ """Deletes records from the data store based on IDs.
235
+
236
+ Args:
237
+ id_ (str | list[str]): ID or list of IDs to delete.
238
+ """
239
+ async def clear(self) -> None:
240
+ """Clear all records from the datastore."""
@@ -0,0 +1,89 @@
1
+ import logging
2
+ from _typeshed import Incomplete
3
+ from gllm_datastore.core.filters.schema import QueryFilter as QueryFilter
4
+ from gllm_datastore.data_store.opensearch.query_translator import OpenSearchQueryTranslator as OpenSearchQueryTranslator
5
+ from gllm_datastore.utils import flatten_dict as flatten_dict
6
+ from opensearchpy import AsyncOpenSearch
7
+ from opensearchpy._async.helpers.search import AsyncSearch
8
+ from opensearchpy.helpers.query import Query
9
+ from typing import Any
10
+
11
+ VALID_FIELD_PATH: Incomplete
12
+
13
+ async def update_by_query(client: AsyncOpenSearch, index_name: str, update_values: dict[str, Any], filters: QueryFilter | None = None, logger: logging.Logger | None = None) -> None:
14
+ '''Update records in OpenSearch using UpdateByQuery with retry logic for version conflicts.
15
+
16
+ This function builds a painless script that safely assigns each updated field.
17
+ When a field path contains dots (e.g. "metadata.cache_value"), we must
18
+ access the corresponding param using bracket syntax: params[\'metadata.cache_value\']
19
+ to avoid Painless treating it as nested object access (which would be None).
20
+
21
+ Args:
22
+ client (AsyncOpenSearch): OpenSearch client instance.
23
+ index_name (str): The name of the OpenSearch index.
24
+ update_values (dict[str, Any]): Values to update.
25
+ filters (QueryFilter | None, optional): QueryFilter to select records to update.
26
+ Defaults to None.
27
+ logger (logging.Logger | None, optional): Logger instance. Defaults to None.
28
+ '''
29
+ async def delete_by_query(client: AsyncOpenSearch, index_name: str, filters: QueryFilter | None = None) -> None:
30
+ """Delete records from OpenSearch using delete_by_query.
31
+
32
+ Args:
33
+ client (AsyncOpenSearch): OpenSearch client instance.
34
+ index_name (str): The name of the OpenSearch index.
35
+ filters (QueryFilter | None, optional): QueryFilter to select records for deletion.
36
+ Defaults to None, in which case no operation will be performed.
37
+ """
38
+ async def delete_by_id(client: AsyncOpenSearch, index_name: str, ids: str | list[str]) -> None:
39
+ """Delete records from OpenSearch by IDs using Search.delete().
40
+
41
+ Args:
42
+ client (AsyncOpenSearch): OpenSearch client instance.
43
+ index_name (str): The name of the OpenSearch index.
44
+ ids (str | list[str]): ID or list of IDs to delete.
45
+ """
46
+ def validate_query_length(query: str, min_length: int = 0, max_length: int | None = None) -> bool:
47
+ """Validate query length against minimum and maximum constraints.
48
+
49
+ Args:
50
+ query (str): The query string to validate.
51
+ min_length (int, optional): Minimum required length. Defaults to 0.
52
+ max_length (int | None, optional): Maximum allowed length. Defaults to None.
53
+
54
+ Returns:
55
+ bool: True if query is valid, False otherwise.
56
+ """
57
+ def create_search_with_filters(client: AsyncOpenSearch, index_name: str, filters: QueryFilter | None = None, exclude_fields: list[str] | None = None) -> AsyncSearch:
58
+ """Create an AsyncSearch object with optional filters and field exclusions.
59
+
60
+ Args:
61
+ client (AsyncOpenSearch): OpenSearch client instance.
62
+ index_name (str): The name of the OpenSearch index.
63
+ filters (QueryFilter | None, optional): QueryFilter to apply. Defaults to None.
64
+ exclude_fields (list[str] | None, optional): Fields to exclude from source. Defaults to None.
65
+
66
+ Returns:
67
+ AsyncSearch: Configured AsyncSearch object.
68
+ """
69
+ def apply_filter_query_to_search(search: AsyncSearch, main_query: Query, filters: QueryFilter | None = None) -> AsyncSearch:
70
+ """Apply filter query to a search with a main query.
71
+
72
+ Args:
73
+ search (AsyncSearch): OpenSearch search object.
74
+ main_query (Query): The main query to apply.
75
+ filters (QueryFilter | None, optional): Query filters to apply. Defaults to None.
76
+
77
+ Returns:
78
+ AsyncSearch: Search object with applied queries.
79
+ """
80
+ async def safe_execute(search: AsyncSearch, logger: logging.Logger | None = None) -> Any | None:
81
+ """Execute an OpenSearch DSL search with unified error handling.
82
+
83
+ Args:
84
+ search (AsyncSearch): OpenSearch DSL AsyncSearch object.
85
+ logger (logging.Logger | None, optional): Logger instance for error messages. Defaults to None.
86
+
87
+ Returns:
88
+ Response | None: The OpenSearch response on success, otherwise None.
89
+ """
@@ -0,0 +1,18 @@
1
+ from gllm_datastore.data_store._elastic_core.query_translator import ElasticLikeQueryTranslator as ElasticLikeQueryTranslator
2
+
3
+ class OpenSearchQueryTranslator(ElasticLikeQueryTranslator):
4
+ """Translates QueryFilter and FilterClause objects to OpenSearch Query DSL.
5
+
6
+ This class extends ElasticLikeQueryTranslator and implements abstract methods
7
+ using OpenSearch DSL API (Q function). It also provides QueryOptions handling
8
+ methods specific to OpenSearch.
9
+
10
+ Attributes:
11
+ _logger (Logger): Logger instance for error messages and debugging.
12
+ """
13
+ def __init__(self) -> None:
14
+ """Initialize the OpenSearch query translator.
15
+
16
+ Raises:
17
+ ImportError: If opensearchpy package is not installed.
18
+ """
@@ -0,0 +1,211 @@
1
+ from _typeshed import Incomplete
2
+ from gllm_core.schema import Chunk
3
+ from gllm_datastore.constants import DEFAULT_FETCH_K as DEFAULT_FETCH_K, DEFAULT_TOP_K as DEFAULT_TOP_K
4
+ from gllm_datastore.core.filters.schema import FilterClause as FilterClause, QueryFilter as QueryFilter, QueryOptions as QueryOptions
5
+ from gllm_datastore.data_store._elastic_core.elastic_like_core import ElasticLikeCore as ElasticLikeCore
6
+ from gllm_datastore.data_store._elastic_core.query_translator import convert_filter_clause as convert_filter_clause
7
+ from gllm_datastore.data_store.opensearch.query import delete_by_id as delete_by_id, delete_by_query as delete_by_query, update_by_query as update_by_query
8
+ from gllm_datastore.data_store.opensearch.query_translator import OpenSearchQueryTranslator as OpenSearchQueryTranslator
9
+ from gllm_datastore.utils.converter import from_langchain as from_langchain, to_langchain as to_langchain
10
+ from gllm_inference.em_invoker.em_invoker import BaseEMInvoker
11
+ from gllm_inference.schema import Vector
12
+ from opensearchpy import AsyncOpenSearch
13
+ from typing import Any
14
+
15
+ class OpenSearchVectorCapability:
16
+ """OpenSearch implementation of VectorCapability protocol.
17
+
18
+ This class provides document CRUD operations and vector search using OpenSearch.
19
+ Uses LangChain's OpenSearchVectorSearch for create and retrieve operations,
20
+ and direct OpenSearch client for update and delete operations.
21
+
22
+ Attributes:
23
+ index_name (str): The name of the OpenSearch index.
24
+ vector_store (OpenSearchVectorSearch): The vector store instance.
25
+ client (AsyncOpenSearch): AsyncOpenSearch client for direct operations.
26
+ em_invoker (BaseEMInvoker): The embedding model to perform vectorization.
27
+ """
28
+ index_name: Incomplete
29
+ client: Incomplete
30
+ query_field: Incomplete
31
+ vector_query_field: Incomplete
32
+ vector_store: Incomplete
33
+ def __init__(self, index_name: str, em_invoker: BaseEMInvoker, client: AsyncOpenSearch, opensearch_url: str | None = None, query_field: str = 'text', vector_query_field: str = 'vector', retrieval_strategy: Any = None, distance_strategy: str | None = None, connection_params: dict[str, Any] | None = None) -> None:
34
+ '''Initialize the OpenSearch vector capability.
35
+
36
+ OpenSearchVectorSearch creates its own sync and async clients internally
37
+ based on the provided connection parameters. The async client is used
38
+ for operations like update, delete, and clear.
39
+
40
+ Args:
41
+ index_name (str): The name of the OpenSearch index.
42
+ em_invoker (BaseEMInvoker): The embedding model to perform vectorization.
43
+ client (AsyncOpenSearch): The OpenSearch client for direct operations.
44
+ opensearch_url (str | None, optional): The URL of the OpenSearch server.
45
+ Used for LangChain\'s OpenSearchVectorSearch initialization.
46
+ If None, will be extracted from client connection info. Defaults to None.
47
+ query_field (str, optional): The field name for text queries. Defaults to "text".
48
+ vector_query_field (str, optional): The field name for vector queries. Defaults to "vector".
49
+ retrieval_strategy: Not used with OpenSearchVectorSearch (kept for API compatibility).
50
+ distance_strategy (str | None, optional): The distance strategy for retrieval.
51
+ For example, "l2" for Euclidean distance, "l2squared" for squared Euclidean distance,
52
+ "cosine" for cosine similarity, etc. Defaults to None.
53
+ connection_params (dict[str, Any] | None, optional): Additional connection parameters
54
+ to override defaults. These will be merged with automatically detected parameters
55
+ (authentication, SSL settings). User-provided params take precedence. Defaults to None.
56
+ Available parameters include:
57
+ 1. http_auth (tuple[str, str] | None): HTTP authentication tuple (username, password).
58
+ 2. use_ssl (bool): Whether to use SSL/TLS. Defaults to True for HTTPS URLs.
59
+ 3. verify_certs (bool): Whether to verify SSL certificates. Defaults to True for HTTPS URLs.
60
+ 4. ssl_show_warn (bool): Whether to show SSL warnings. Defaults to True for HTTPS URLs.
61
+ 5. ssl_assert_hostname (str | None): SSL hostname assertion. Defaults to None.
62
+ 6. max_retries (int): Maximum number of retries for requests. Defaults to 3.
63
+ 7. retry_on_timeout (bool): Whether to retry on timeouts. Defaults to True.
64
+ 8. client_cert (str | None): Path to the client certificate file. Defaults to None.
65
+ 9. client_key (str | None): Path to the client private key file. Defaults to None.
66
+ 10. root_cert (str | None): Path to the root certificate file. Defaults to None.
67
+ 11. Additional kwargs: Any other parameters accepted by OpenSearch client constructor.
68
+ '''
69
+ @property
70
+ def em_invoker(self) -> BaseEMInvoker:
71
+ """Returns the EM Invoker instance.
72
+
73
+ Returns:
74
+ BaseEMInvoker: The EM Invoker instance.
75
+ """
76
+ async def ensure_index(self, mapping: dict[str, Any] | None = None, index_settings: dict[str, Any] | None = None, dimension: int | None = None, distance_strategy: str | None = None) -> None:
77
+ '''Ensure OpenSearch index exists, creating it if necessary.
78
+
79
+ This method is idempotent - if the index already exists, it will skip creation
80
+ and return early.
81
+
82
+ Args:
83
+ mapping (dict[str, Any] | None, optional): Custom mapping dictionary to use
84
+ for index creation. If provided, this mapping will be used directly.
85
+ The mapping should follow OpenSearch mapping format. Defaults to None,
86
+ in which default mapping will be used.
87
+ index_settings (dict[str, Any] | None, optional): Custom index settings.
88
+ These settings will be merged with any default settings. Defaults to None.
89
+ dimension (int | None, optional): Vector dimension. If not provided and mapping
90
+ is not provided, will be inferred from em_invoker by generating a test embedding.
91
+ distance_strategy (str | None, optional): Distance strategy for vector similarity.
92
+ Supported values: "l2", "l2squared", "cosine", "innerproduct", etc.
93
+ Only used when building default mapping. Defaults to "l2" if not specified.
94
+
95
+ Raises:
96
+ ValueError: If mapping is invalid or required parameters are missing.
97
+ RuntimeError: If index creation fails.
98
+ '''
99
+ async def create(self, data: Chunk | list[Chunk], **kwargs: Any) -> None:
100
+ """Create new records in the datastore.
101
+
102
+ Args:
103
+ data (Chunk | list[Chunk]): Data to create (single item or collection).
104
+ **kwargs: Datastore-specific parameters.
105
+
106
+ Raises:
107
+ ValueError: If data structure is invalid.
108
+ """
109
+ async def create_from_vector(self, chunk_vectors: list[tuple[Chunk, Vector]], **kwargs: Any) -> list[str]:
110
+ """Add pre-computed embeddings directly.
111
+
112
+ Args:
113
+ chunk_vectors (list[tuple[Chunk, Vector]]): List of tuples containing chunks and their
114
+ corresponding vectors.
115
+ **kwargs: Datastore-specific parameters.
116
+
117
+ Returns:
118
+ list[str]: List of IDs of the added documents.
119
+ """
120
+ async def retrieve(self, query: str, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None, **kwargs: Any) -> list[Chunk]:
121
+ '''Semantic search using text query converted to vector.
122
+
123
+ Examples:
124
+ ```python
125
+ from gllm_datastore.core.filters import filter as F
126
+
127
+ # Direct FilterClause usage
128
+ await vector_capability.retrieve(
129
+ query="What is the capital of France?",
130
+ filters=F.eq("metadata.category", "tech"),
131
+ options=QueryOptions(limit=10),
132
+ )
133
+
134
+ # Multiple filters
135
+ filters = F.and_(F.eq("metadata.source", "wikipedia"), F.eq("metadata.category", "tech"))
136
+ await vector_capability.retrieve(query="What is the capital of France?", filters=filters)
137
+ ```
138
+
139
+ Args:
140
+ query (str): Text query to embed and search for.
141
+ filters (FilterClause | QueryFilter | None, optional): Filters to apply to the search.
142
+ FilterClause objects are automatically converted to QueryFilter internally.
143
+ Defaults to None.
144
+ options (QueryOptions | None, optional): Options to apply to the search. Defaults to None.
145
+ **kwargs: Datastore-specific parameters.
146
+
147
+ Returns:
148
+ list[Chunk]: List of chunks ordered by relevance score.
149
+ '''
150
+ async def retrieve_by_vector(self, vector: Vector, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None, **kwargs: Any) -> list[Chunk]:
151
+ '''Direct vector similarity search.
152
+
153
+ Examples:
154
+ ```python
155
+ from gllm_datastore.core.filters import filter as F
156
+
157
+ # Direct FilterClause usage
158
+ await vector_capability.retrieve_by_vector(
159
+ vector=[0.1, 0.2, 0.3],
160
+ filters=F.eq("metadata.category", "tech"),
161
+ options=QueryOptions(limit=10),
162
+ )
163
+
164
+ # Multiple filters
165
+ filters = F.and_(F.eq("metadata.source", "wikipedia"), F.eq("metadata.category", "tech"))
166
+ await vector_capability.retrieve_by_vector(vector=[0.1, 0.2, 0.3], filters=filters)
167
+ ```
168
+
169
+ Args:
170
+ vector (Vector): Query embedding vector.
171
+ filters (FilterClause | QueryFilter | None, optional): Filters to apply to the search.
172
+ FilterClause objects are automatically converted to QueryFilter internally.
173
+ Defaults to None.
174
+ options (QueryOptions | None, optional): Options to apply to the search. Defaults to None.
175
+ **kwargs: Datastore-specific parameters.
176
+
177
+ Returns:
178
+ list[Chunk]: List of chunks ordered by similarity score.
179
+ '''
180
+ async def update(self, update_values: dict[str, Any], filters: FilterClause | QueryFilter | None = None, **kwargs: Any) -> None:
181
+ """Update existing records in the datastore.
182
+
183
+ Args:
184
+ update_values (dict[str, Any]): Values to update.
185
+ filters (FilterClause | QueryFilter | None, optional): Filters to select records to update.
186
+ FilterClause objects are automatically converted to QueryFilter internally.
187
+ Defaults to None.
188
+ **kwargs: Datastore-specific parameters.
189
+ """
190
+ async def delete(self, filters: FilterClause | QueryFilter | None = None, **kwargs: Any) -> None:
191
+ """Delete records from the data store based on filters.
192
+
193
+ Args:
194
+ filters (FilterClause | QueryFilter | None, optional): Filters to select records for deletion.
195
+ FilterClause objects are automatically converted to QueryFilter internally.
196
+ Defaults to None.
197
+ **kwargs: Datastore-specific parameters.
198
+ """
199
+ async def delete_by_id(self, id: str | list[str], **kwargs: Any) -> None:
200
+ """Delete records from the data store based on IDs.
201
+
202
+ Args:
203
+ id (str | list[str]): ID or list of IDs to delete.
204
+ **kwargs: Datastore-specific parameters.
205
+ """
206
+ async def clear(self, **kwargs: Any) -> None:
207
+ """Clear all records from the datastore.
208
+
209
+ Args:
210
+ **kwargs: Datastore-specific parameters.
211
+ """
@@ -0,0 +1,5 @@
1
+ from gllm_datastore.data_store.redis.data_store import RedisDataStore as RedisDataStore
2
+ from gllm_datastore.data_store.redis.fulltext import RedisFulltextCapability as RedisFulltextCapability
3
+ from gllm_datastore.data_store.redis.vector import RedisVectorCapability as RedisVectorCapability
4
+
5
+ __all__ = ['RedisDataStore', 'RedisFulltextCapability', 'RedisVectorCapability']