gllm-datastore-binary 0.5.50__cp312-cp312-macosx_13_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. gllm_datastore/__init__.pyi +0 -0
  2. gllm_datastore/cache/__init__.pyi +4 -0
  3. gllm_datastore/cache/base.pyi +84 -0
  4. gllm_datastore/cache/cache.pyi +137 -0
  5. gllm_datastore/cache/hybrid_cache/__init__.pyi +5 -0
  6. gllm_datastore/cache/hybrid_cache/file_system_hybrid_cache.pyi +50 -0
  7. gllm_datastore/cache/hybrid_cache/hybrid_cache.pyi +115 -0
  8. gllm_datastore/cache/hybrid_cache/in_memory_hybrid_cache.pyi +29 -0
  9. gllm_datastore/cache/hybrid_cache/key_matcher/__init__.pyi +5 -0
  10. gllm_datastore/cache/hybrid_cache/key_matcher/exact_key_matcher.pyi +44 -0
  11. gllm_datastore/cache/hybrid_cache/key_matcher/fuzzy_key_matcher.pyi +70 -0
  12. gllm_datastore/cache/hybrid_cache/key_matcher/key_matcher.pyi +60 -0
  13. gllm_datastore/cache/hybrid_cache/key_matcher/semantic_key_matcher.pyi +93 -0
  14. gllm_datastore/cache/hybrid_cache/redis_hybrid_cache.pyi +34 -0
  15. gllm_datastore/cache/hybrid_cache/utils.pyi +36 -0
  16. gllm_datastore/cache/utils.pyi +34 -0
  17. gllm_datastore/cache/vector_cache/__init__.pyi +0 -0
  18. gllm_datastore/cache/vector_cache/eviction_manager/__init__.pyi +0 -0
  19. gllm_datastore/cache/vector_cache/eviction_manager/asyncio_eviction_manager.pyi +48 -0
  20. gllm_datastore/cache/vector_cache/eviction_manager/eviction_manager.pyi +38 -0
  21. gllm_datastore/cache/vector_cache/eviction_strategy/__init__.pyi +0 -0
  22. gllm_datastore/cache/vector_cache/eviction_strategy/eviction_strategy.pyi +34 -0
  23. gllm_datastore/cache/vector_cache/eviction_strategy/ttl_eviction_strategy.pyi +34 -0
  24. gllm_datastore/cache/vector_cache/vector_cache.pyi +99 -0
  25. gllm_datastore/constants.pyi +66 -0
  26. gllm_datastore/core/__init__.pyi +7 -0
  27. gllm_datastore/core/capabilities/__init__.pyi +7 -0
  28. gllm_datastore/core/capabilities/encryption_capability.pyi +21 -0
  29. gllm_datastore/core/capabilities/fulltext_capability.pyi +73 -0
  30. gllm_datastore/core/capabilities/graph_capability.pyi +70 -0
  31. gllm_datastore/core/capabilities/hybrid_capability.pyi +184 -0
  32. gllm_datastore/core/capabilities/vector_capability.pyi +90 -0
  33. gllm_datastore/core/filters/__init__.pyi +4 -0
  34. gllm_datastore/core/filters/filter.pyi +340 -0
  35. gllm_datastore/core/filters/schema.pyi +149 -0
  36. gllm_datastore/data_store/__init__.pyi +8 -0
  37. gllm_datastore/data_store/_elastic_core/__init__.pyi +0 -0
  38. gllm_datastore/data_store/_elastic_core/client_factory.pyi +66 -0
  39. gllm_datastore/data_store/_elastic_core/constants.pyi +27 -0
  40. gllm_datastore/data_store/_elastic_core/elastic_like_core.pyi +115 -0
  41. gllm_datastore/data_store/_elastic_core/index_manager.pyi +37 -0
  42. gllm_datastore/data_store/_elastic_core/query_translator.pyi +89 -0
  43. gllm_datastore/data_store/base.pyi +176 -0
  44. gllm_datastore/data_store/chroma/__init__.pyi +4 -0
  45. gllm_datastore/data_store/chroma/_chroma_import.pyi +13 -0
  46. gllm_datastore/data_store/chroma/data_store.pyi +201 -0
  47. gllm_datastore/data_store/chroma/fulltext.pyi +134 -0
  48. gllm_datastore/data_store/chroma/query.pyi +266 -0
  49. gllm_datastore/data_store/chroma/query_translator.pyi +41 -0
  50. gllm_datastore/data_store/chroma/vector.pyi +197 -0
  51. gllm_datastore/data_store/elasticsearch/__init__.pyi +5 -0
  52. gllm_datastore/data_store/elasticsearch/data_store.pyi +147 -0
  53. gllm_datastore/data_store/elasticsearch/fulltext.pyi +238 -0
  54. gllm_datastore/data_store/elasticsearch/query.pyi +118 -0
  55. gllm_datastore/data_store/elasticsearch/query_translator.pyi +18 -0
  56. gllm_datastore/data_store/elasticsearch/vector.pyi +180 -0
  57. gllm_datastore/data_store/exceptions.pyi +35 -0
  58. gllm_datastore/data_store/in_memory/__init__.pyi +5 -0
  59. gllm_datastore/data_store/in_memory/data_store.pyi +71 -0
  60. gllm_datastore/data_store/in_memory/fulltext.pyi +131 -0
  61. gllm_datastore/data_store/in_memory/query.pyi +175 -0
  62. gllm_datastore/data_store/in_memory/vector.pyi +174 -0
  63. gllm_datastore/data_store/opensearch/__init__.pyi +5 -0
  64. gllm_datastore/data_store/opensearch/data_store.pyi +160 -0
  65. gllm_datastore/data_store/opensearch/fulltext.pyi +240 -0
  66. gllm_datastore/data_store/opensearch/query.pyi +89 -0
  67. gllm_datastore/data_store/opensearch/query_translator.pyi +18 -0
  68. gllm_datastore/data_store/opensearch/vector.pyi +211 -0
  69. gllm_datastore/data_store/redis/__init__.pyi +5 -0
  70. gllm_datastore/data_store/redis/data_store.pyi +153 -0
  71. gllm_datastore/data_store/redis/fulltext.pyi +128 -0
  72. gllm_datastore/data_store/redis/query.pyi +428 -0
  73. gllm_datastore/data_store/redis/query_translator.pyi +37 -0
  74. gllm_datastore/data_store/redis/vector.pyi +131 -0
  75. gllm_datastore/data_store/sql/__init__.pyi +4 -0
  76. gllm_datastore/data_store/sql/constants.pyi +5 -0
  77. gllm_datastore/data_store/sql/data_store.pyi +201 -0
  78. gllm_datastore/data_store/sql/fulltext.pyi +164 -0
  79. gllm_datastore/data_store/sql/query.pyi +81 -0
  80. gllm_datastore/data_store/sql/query_translator.pyi +51 -0
  81. gllm_datastore/data_store/sql/schema.pyi +16 -0
  82. gllm_datastore/encryptor/__init__.pyi +4 -0
  83. gllm_datastore/encryptor/aes_gcm_encryptor.pyi +45 -0
  84. gllm_datastore/encryptor/capability/__init__.pyi +3 -0
  85. gllm_datastore/encryptor/capability/mixin.pyi +32 -0
  86. gllm_datastore/encryptor/encryptor.pyi +52 -0
  87. gllm_datastore/encryptor/key_ring/__init__.pyi +3 -0
  88. gllm_datastore/encryptor/key_ring/in_memory_key_ring.pyi +52 -0
  89. gllm_datastore/encryptor/key_ring/key_ring.pyi +45 -0
  90. gllm_datastore/encryptor/key_rotating_encryptor.pyi +60 -0
  91. gllm_datastore/graph_data_store/__init__.pyi +6 -0
  92. gllm_datastore/graph_data_store/graph_data_store.pyi +151 -0
  93. gllm_datastore/graph_data_store/graph_rag_data_store.pyi +29 -0
  94. gllm_datastore/graph_data_store/light_rag_data_store.pyi +93 -0
  95. gllm_datastore/graph_data_store/light_rag_postgres_data_store.pyi +96 -0
  96. gllm_datastore/graph_data_store/llama_index_graph_rag_data_store.pyi +49 -0
  97. gllm_datastore/graph_data_store/llama_index_neo4j_graph_rag_data_store.pyi +78 -0
  98. gllm_datastore/graph_data_store/mixins/__init__.pyi +3 -0
  99. gllm_datastore/graph_data_store/mixins/agentic_graph_tools_mixin.pyi +175 -0
  100. gllm_datastore/graph_data_store/nebula_graph_data_store.pyi +206 -0
  101. gllm_datastore/graph_data_store/neo4j_graph_data_store.pyi +182 -0
  102. gllm_datastore/graph_data_store/schema.pyi +27 -0
  103. gllm_datastore/graph_data_store/utils/__init__.pyi +6 -0
  104. gllm_datastore/graph_data_store/utils/constants.pyi +21 -0
  105. gllm_datastore/graph_data_store/utils/light_rag_em_invoker_adapter.pyi +56 -0
  106. gllm_datastore/graph_data_store/utils/light_rag_lm_invoker_adapter.pyi +43 -0
  107. gllm_datastore/graph_data_store/utils/llama_index_em_invoker_adapter.pyi +45 -0
  108. gllm_datastore/graph_data_store/utils/llama_index_lm_invoker_adapter.pyi +169 -0
  109. gllm_datastore/signature/__init__.pyi +0 -0
  110. gllm_datastore/signature/webhook_signature.pyi +31 -0
  111. gllm_datastore/sql_data_store/__init__.pyi +4 -0
  112. gllm_datastore/sql_data_store/adapter/__init__.pyi +0 -0
  113. gllm_datastore/sql_data_store/adapter/sqlalchemy_adapter.pyi +38 -0
  114. gllm_datastore/sql_data_store/constants.pyi +6 -0
  115. gllm_datastore/sql_data_store/sql_data_store.pyi +86 -0
  116. gllm_datastore/sql_data_store/sqlalchemy_sql_data_store.pyi +216 -0
  117. gllm_datastore/sql_data_store/types.pyi +31 -0
  118. gllm_datastore/utils/__init__.pyi +6 -0
  119. gllm_datastore/utils/converter.pyi +51 -0
  120. gllm_datastore/utils/dict.pyi +21 -0
  121. gllm_datastore/utils/ttl.pyi +25 -0
  122. gllm_datastore/utils/types.pyi +32 -0
  123. gllm_datastore/vector_data_store/__init__.pyi +6 -0
  124. gllm_datastore/vector_data_store/chroma_vector_data_store.pyi +259 -0
  125. gllm_datastore/vector_data_store/elasticsearch_vector_data_store.pyi +357 -0
  126. gllm_datastore/vector_data_store/in_memory_vector_data_store.pyi +179 -0
  127. gllm_datastore/vector_data_store/mixin/__init__.pyi +0 -0
  128. gllm_datastore/vector_data_store/mixin/cache_compatible_mixin.pyi +145 -0
  129. gllm_datastore/vector_data_store/redis_vector_data_store.pyi +191 -0
  130. gllm_datastore/vector_data_store/vector_data_store.pyi +146 -0
  131. gllm_datastore.build/.gitignore +1 -0
  132. gllm_datastore.cpython-312-darwin.so +0 -0
  133. gllm_datastore.pyi +178 -0
  134. gllm_datastore_binary-0.5.50.dist-info/METADATA +185 -0
  135. gllm_datastore_binary-0.5.50.dist-info/RECORD +137 -0
  136. gllm_datastore_binary-0.5.50.dist-info/WHEEL +5 -0
  137. gllm_datastore_binary-0.5.50.dist-info/top_level.txt +1 -0
@@ -0,0 +1,428 @@
1
+ from _typeshed import Incomplete
2
+ from collections.abc import AsyncIterator
3
+ from gllm_core.schema.chunk import Chunk
4
+ from gllm_datastore.constants import BOOL_FALSE_STR as BOOL_FALSE_STR, BOOL_TRUE_STR as BOOL_TRUE_STR, CHUNK_KEYS as CHUNK_KEYS, DEFAULT_REDIS_QUERY_BATCH_SIZE as DEFAULT_REDIS_QUERY_BATCH_SIZE, FIELD_CONFIG_NAME as FIELD_CONFIG_NAME, FIELD_CONFIG_TYPE as FIELD_CONFIG_TYPE, FieldType as FieldType, LIST_SEPARATOR as LIST_SEPARATOR, METADATA_PREFIX as METADATA_PREFIX, METADATA_SEPARATOR as METADATA_SEPARATOR, REDIS_DEFAULT_DB as REDIS_DEFAULT_DB, REDIS_DEFAULT_HOST as REDIS_DEFAULT_HOST, REDIS_DEFAULT_PORT as REDIS_DEFAULT_PORT
5
+ from gllm_datastore.core.filters import FilterClause as FilterClause, QueryFilter as QueryFilter, QueryOptions as QueryOptions
6
+ from gllm_datastore.data_store.redis.query_translator import RedisQueryTranslator as RedisQueryTranslator
7
+ from gllm_inference.schema import Vector
8
+ from redis.asyncio.client import Redis
9
+ from redis.commands.search.document import Document as Document
10
+ from redis.commands.search.query import Query
11
+ from typing import Any
12
+
13
+ REDIS_SPECIAL_CHARS: str
14
+ REDIS_SPECIAL_CHARS_PATTERN: Incomplete
15
+ logger: Incomplete
16
+
17
+ def sanitize_key(key: str) -> str:
18
+ """Sanitize a key for use in Redis queries.
19
+
20
+ Args:
21
+ key (str): The key to sanitize.
22
+
23
+ Returns:
24
+ str: The sanitized key.
25
+ """
26
+ def sanitize_value(value: Any) -> str:
27
+ """Sanitize a value for use in Redis queries.
28
+
29
+ Args:
30
+ value (Any): The value to sanitize.
31
+
32
+ Returns:
33
+ str: The sanitized value.
34
+ """
35
+ def build_redis_query(query_translator: RedisQueryTranslator, filters: QueryFilter | None = None) -> str:
36
+ '''Build a Redis query string from filters.
37
+
38
+ Translates QueryFilter to Redis Search query syntax.
39
+
40
+ Examples:
41
+ - F.eq("name", "John") → "@name:{John}"
42
+ - F.gt("age", 18) → "@age:[18 +inf]"
43
+ - F.and_(F.eq("status", "active"), F.gt("score", 50)) → "@status:{active} @score:[50 +inf]"
44
+ - F.in_("category", ["tech", "science"]) → "@category:(tech|science)"
45
+
46
+ Args:
47
+ query_translator (RedisQueryTranslator): Query translator instance.
48
+ filters (QueryFilter | None, optional): Query filters to apply. Defaults to None.
49
+
50
+ Returns:
51
+ str: Redis query string.
52
+
53
+ Raises:
54
+ ValueError: If filter structure is invalid or operator is incompatible with field type.
55
+ TypeError: If filter contains type mismatches.
56
+ '''
57
+ def apply_options_to_query(query: Query, options: QueryOptions | None = None) -> Query:
58
+ """Apply query options to Redis Search Query object.
59
+
60
+ Uses Redis Search's native SORTBY and LIMIT capabilities for better performance.
61
+
62
+ Args:
63
+ query (Query): Redis Search Query object.
64
+ options (QueryOptions | None, optional): Query options to apply. Defaults to None.
65
+
66
+ Returns:
67
+ Query: Modified Query object with options applied.
68
+ """
69
+ async def execute_search_query(client: Redis, index_name: str, query_translator: RedisQueryTranslator, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None) -> Any:
70
+ """Execute a Redis search query with filters and options.
71
+
72
+ When options is None or options.limit is None, automatically paginates to fetch all matching results
73
+ using DEFAULT_REDIS_QUERY_BATCH_SIZE.
74
+
75
+ Args:
76
+ client (Redis): Redis client instance.
77
+ index_name (str): Name of the Redis index to search.
78
+ query_translator (RedisQueryTranslator): Query translator instance.
79
+ filters (FilterClause | QueryFilter | None, optional): Query filters to apply. Defaults to None.
80
+ options (QueryOptions | None, optional): Query options for sorting and pagination. Defaults to None,
81
+ in which case query is executed in batches of DEFAULT_REDIS_QUERY_BATCH_SIZE.
82
+
83
+ Returns:
84
+ Any: Redis search result containing documents and metadata.
85
+ """
86
+ async def retrieve_document_ids_batched(client: Redis, index_name: str, query_translator: RedisQueryTranslator, filters: FilterClause | QueryFilter | None = None, batch_size: int = 100) -> AsyncIterator[list[str]]:
87
+ """Retrieve matching document IDs in batches without loading full document content.
88
+
89
+ This function is optimized for delete operations where only document IDs are needed.
90
+ It extracts IDs directly from search results without parsing full document content,
91
+ significantly reducing memory usage.
92
+
93
+ Args:
94
+ client (Redis): Redis client instance.
95
+ index_name (str): Name of the Redis index to search.
96
+ query_translator (RedisQueryTranslator): Query translator instance.
97
+ filters (FilterClause | QueryFilter | None, optional): Query filters to apply. Defaults to None.
98
+ batch_size (int, optional): Number of document IDs per batch. Defaults to 100.
99
+
100
+ Yields:
101
+ list[str]: Batches of document IDs for processing.
102
+ """
103
+ def parse_redis_documents(docs: list[Document], logger: Any | None = None) -> list[Chunk]:
104
+ """Parse Redis search result documents into Chunk objects.
105
+
106
+ Args:
107
+ docs (list[Document]): List of Redis search result documents.
108
+ logger (Any | None, optional): Logger instance for error logging. Defaults to None.
109
+
110
+ Returns:
111
+ list[Chunk]: List of parsed Chunk objects.
112
+ """
113
+ def get_str_value(data: dict, key: str, default: str = '') -> str:
114
+ """Extract string value from Redis hash data, handling bytes keys and values.
115
+
116
+ Args:
117
+ data (dict): Redis hash data (may have bytes keys/values).
118
+ key (str): Key to look up.
119
+ default (str): Default value if key not found.
120
+
121
+ Returns:
122
+ str: Decoded string value.
123
+ """
124
+ def normalize_field_name_for_schema(field_name: str) -> str:
125
+ '''Normalize field name for Redis schema (dot to underscore).
126
+
127
+ This matches the query builder\'s normalization so schema fields match query fields.
128
+
129
+ Args:
130
+ field_name (str): Field name in dot notation (e.g., "metadata.score").
131
+
132
+ Returns:
133
+ str: Normalized field name with underscores (e.g., "metadata_score").
134
+ '''
135
+ def get_filterable_field_type(field_name: str, filterable_fields: list[dict[str, Any]]) -> str | None:
136
+ '''Get the field type for a metadata key from filterable_fields.
137
+
138
+ Args:
139
+ field_name (str): Metadata key (e.g., "score") or full field name (e.g., "metadata.score").
140
+ filterable_fields (list[dict[str, Any]]): List of filterable field configurations.
141
+
142
+ Returns:
143
+ str | None: Field type (FieldType enum value) or None if not found.
144
+ '''
145
+ def metadata_field_mapping(metadata: dict[str, Any], filterable_fields: list[dict[str, Any]]) -> dict[str, Any]:
146
+ '''Convert metadata dictionary into Redis hash field mappings with type-aware storage.
147
+
148
+ Values are stored in appropriate formats based on their types and filterable_fields configuration:
149
+ 1. Numeric values: stored as numbers (for NUMERIC fields)
150
+ 2. String values: stored as strings (for TAG fields)
151
+ 3. Boolean values: stored as "1"/"0" (for TAG fields)
152
+ 4. List values: stored as comma-separated strings (for TAG fields)
153
+
154
+ Args:
155
+ metadata (dict[str, Any]): Metadata dictionary to convert.
156
+ filterable_fields (list[dict[str, Any]]): List of filterable field configurations.
157
+
158
+ Returns:
159
+ dict[str, Any]: Mapping of normalized field names to values in appropriate formats.
160
+ '''
161
+ def infer_filterable_fields_from_chunks(items: list[Any]) -> list[dict[str, Any]]:
162
+ '''Infer filterable fields schema from chunks.
163
+
164
+ Analyzes metadata in chunks to determine field types:
165
+ 1. Boolean types (bool) -> FieldType.TAG (stored as "1"/"0" strings)
166
+ 2. Numeric types (int, float) -> FieldType.NUMERIC
167
+ 3. All other types -> FieldType.TEXT (default)
168
+
169
+ Note: bool must be checked before int/float since bool is a subclass of int in Python.
170
+
171
+ Args:
172
+ items (list[Any]): Chunks to analyze.
173
+
174
+ Returns:
175
+ list[dict[str, Any]]: List of inferred filterable field configurations.
176
+ '''
177
+ def validate_metadata_fields(items: list[Any], filterable_fields: list[dict[str, Any]]) -> None:
178
+ '''Validate that metadata fields in chunks are compatible with the index schema.
179
+
180
+ For example, if filterable_fields is [{"name": "metadata.score", "type": "numeric"}],
181
+ and the chunk has metadata {"score": "not-a-number"}, this method will raise a ValueError.
182
+
183
+ Args:
184
+ items (list[Any]): Chunks to validate.
185
+ filterable_fields (list[dict[str, Any]]): Filterable fields configuration.
186
+
187
+ Raises:
188
+ ValueError: If values are incompatible with the field type (e.g., non-numeric
189
+ value for numeric field).
190
+ '''
191
+ async def get_doc_ids_for_deletion(client: Redis, index_name: str, query_translator: RedisQueryTranslator, filters: QueryFilter | None = None, options: QueryOptions | None = None) -> list[str]:
192
+ """Get document IDs for deletion based on filters or options.
193
+
194
+ When using filters (not options), uses batching with incrementing offset to collect
195
+ all matching document IDs before deletion.
196
+
197
+ Args:
198
+ client (Redis): Redis client instance.
199
+ index_name (str): Name of the Redis index.
200
+ query_translator (RedisQueryTranslator): Query translator instance.
201
+ filters (QueryFilter | None, optional): Query filters. Defaults to None.
202
+ options (QueryOptions | None, optional): Query options. Defaults to None.
203
+
204
+ Returns:
205
+ list[str]: List of document IDs to delete.
206
+ """
207
+ async def collect_document_ids(client: Redis, index_name: str, query_translator: RedisQueryTranslator, filters: QueryFilter | None = None, batch_size: int = 100) -> list[str]:
208
+ """Collect all matching document IDs from the datastore.
209
+
210
+ Args:
211
+ client (Redis): Redis client instance.
212
+ index_name (str): Name of the Redis index to search.
213
+ query_translator (RedisQueryTranslator): Query translator instance.
214
+ filters (QueryFilter | None, optional): Query filters to apply. Defaults to None.
215
+ batch_size (int, optional): Number of document IDs per batch. Defaults to 100.
216
+
217
+ Returns:
218
+ list[str]: List of all matching document IDs.
219
+ """
220
+ async def delete_keys_batched(client: Redis, index_name: str, doc_ids: list[str]) -> int:
221
+ """Delete Redis keys for the specified document IDs in a batch.
222
+
223
+ Args:
224
+ client (Redis): Redis client instance.
225
+ index_name (str): Name of the Redis index.
226
+ doc_ids (list[str]): List of document IDs to delete.
227
+
228
+ Returns:
229
+ int: Number of keys deleted.
230
+ """
231
+ async def process_doc_ids_in_batches(client: Redis, index_name: str, doc_ids: list[str], batch_func: Any, *args, batch_size: int = 100) -> int:
232
+ """Process document IDs in batches using the provided batch function.
233
+
234
+ Args:
235
+ client (Redis): Redis client instance.
236
+ index_name (str): Name of the Redis index.
237
+ doc_ids (list[str]): List of document IDs to process.
238
+ batch_func (Any): Async function to call for each batch.
239
+ *args: Additional arguments to pass to batch_func.
240
+ batch_size (int, optional): Number of documents per batch. Defaults to 100.
241
+
242
+ Returns:
243
+ int: Total number of documents processed.
244
+ """
245
+ async def fetch_hash_data_batch(client: Redis, index_name: str, doc_ids: list[str]) -> list[dict[str, Any]]:
246
+ """Fetch hash data for a batch of document IDs.
247
+
248
+ Args:
249
+ client (Redis): Redis client instance.
250
+ index_name (str): Name of the Redis index.
251
+ doc_ids (list[str]): List of document IDs to fetch.
252
+
253
+ Returns:
254
+ list[dict[str, Any]]: List of hash data for each document.
255
+ """
256
+ def prepare_update_values(doc: dict[str, Any], update_values: dict[str, Any], filterable_fields: list[dict[str, Any]]) -> tuple[str, dict[str, Any], set[str]]:
257
+ """Prepare update values for a document.
258
+
259
+ Args:
260
+ doc (dict[str, Any]): Current document data from Redis.
261
+ update_values (dict[str, Any]): Values to update.
262
+ filterable_fields (list[dict[str, Any]]): List of filterable field configurations.
263
+
264
+ Returns:
265
+ tuple[str, dict[str, Any], set[str]]: Tuple of (content, merged_metadata, old_metadata_fields).
266
+ """
267
+ def build_update_commands(keys: list[str], doc_data: list[dict[str, Any]], update_values: dict[str, Any], filterable_fields: list[dict[str, Any]]) -> tuple[dict[str, dict[str, Any]], dict[str, set[str]]]:
268
+ """Build update commands for a batch of documents.
269
+
270
+ Args:
271
+ keys (list[str]): List of Redis keys for the documents.
272
+ doc_data (list[dict[str, Any]]): List of current document data.
273
+ update_values (dict[str, Any]): Values to update.
274
+ filterable_fields (list[dict[str, Any]]): List of filterable field configurations.
275
+
276
+ Returns:
277
+ tuple[dict[str, dict[str, Any]], dict[str, set[str]]]: Tuple of (update_mappings, removal_fields).
278
+ """
279
+ async def process_update_batch(client: Redis, index_name: str, doc_ids: list[str], update_values: dict[str, Any], filterable_fields: list[dict[str, Any]]) -> int:
280
+ """Process a batch of document updates.
281
+
282
+ Args:
283
+ client (Redis): Redis client instance.
284
+ index_name (str): Name of the Redis index.
285
+ doc_ids (list[str]): List of document IDs to update.
286
+ update_values (dict[str, Any]): Values to update.
287
+ filterable_fields (list[dict[str, Any]]): List of filterable field configurations.
288
+
289
+ Returns:
290
+ int: Number of documents updated.
291
+ """
292
+ def build_filter_expression(query_translator: RedisQueryTranslator, filters: QueryFilter | None = None) -> str | None:
293
+ '''Build RedisVL filter expression from QueryFilter.
294
+
295
+ This function converts QueryFilter objects into RedisVL filter expression format.
296
+ Both RedisVL and Redis Search use the same query syntax, so this delegates to
297
+ RedisQueryTranslator which produces Redis Search/RedisVL compatible queries.
298
+
299
+ Examples:
300
+ 1. F.eq("name", "John") → "@name:{John}"
301
+ 2. F.gt("age", 18) → "@age:[18 +inf]"
302
+ 3. F.and_(F.eq("status", "active"), F.gt("score", 50)) → "@status:{active} @score:[50 +inf]"
303
+
304
+ Args:
305
+ query_translator (RedisQueryTranslator): Query translator instance.
306
+ filters (QueryFilter | None, optional): Query filters to apply. Defaults to None.
307
+
308
+ Returns:
309
+ str | None: RedisVL filter expression string or None if no filters.
310
+
311
+ Raises:
312
+ ValueError: If filter structure is invalid or operator is incompatible with field type.
313
+ TypeError: If filter contains type mismatches.
314
+ '''
315
+ async def check_index_exists(client: Redis, index_name: str) -> bool:
316
+ """Check if a Redis index exists.
317
+
318
+ Args:
319
+ client (Redis): Redis client instance.
320
+ index_name (str): Name of the Redis index.
321
+
322
+ Returns:
323
+ bool: True if index exists, False otherwise.
324
+ """
325
+ def validate_chunk_list(data: Any) -> list[Any]:
326
+ """Validate and normalize chunk input to a list.
327
+
328
+ Args:
329
+ data (Any): Input data to validate (single Chunk or list of Chunks).
330
+
331
+ Returns:
332
+ list[Any]: List of chunks.
333
+
334
+ Raises:
335
+ ValueError: If data structure is invalid.
336
+ """
337
+ def validate_chunk_content(chunks: list[Any]) -> None:
338
+ """Validate chunk content is non-empty string.
339
+
340
+ Args:
341
+ chunks (list[Any]): List of chunks to validate.
342
+
343
+ Raises:
344
+ ValueError: If chunk content is invalid.
345
+ """
346
+ def prepare_chunk_document(chunk: Any, filterable_fields: list[dict[str, Any]], include_vector: bool = False, vector: Vector | None = None, index_name: str | None = None) -> dict[str, Any]:
347
+ """Prepare a chunk document for Redis storage.
348
+
349
+ Args:
350
+ chunk (Any): Chunk object to prepare.
351
+ filterable_fields (list[dict[str, Any]]): List of filterable field configurations.
352
+ include_vector (bool, optional): Whether to include vector field. Defaults to False.
353
+ vector (Vector | None, optional): Vector data to include if include_vector is True. Defaults to None.
354
+ index_name (str | None, optional): Index name to strip from chunk ID if present. Defaults to None.
355
+
356
+ Returns:
357
+ dict[str, Any]: Document dictionary ready for Redis storage.
358
+ """
359
+ def strip_index_prefix(doc_id: str, index_name: str) -> str:
360
+ '''Remove index prefix from document ID.
361
+
362
+ RedisVL returns document IDs with the index prefix (e.g., "index_name:doc_id" or "index_name::doc_id").
363
+ This function strips the prefix to return just the document ID.
364
+ Also handles cases where the ID starts with just ":" or "::" separators.
365
+
366
+ Args:
367
+ doc_id (str): Full document ID with index prefix.
368
+ index_name (str): Index name to remove from the prefix.
369
+
370
+ Returns:
371
+ str: Document ID without the index prefix.
372
+ '''
373
+ def get_redis_url_from_client(client: Any) -> str:
374
+ """Extract Redis URL from a Redis client connection.
375
+
376
+ Works with both sync and async Redis clients.
377
+
378
+ Warning:
379
+ This function returns the Redis URL with additional information like password, host, port, and database.
380
+ Do not log the returned URL.
381
+
382
+ Args:
383
+ client (Any): Redis client instance (sync or async).
384
+
385
+ Returns:
386
+ str: Redis URL in the format redis://[password@]host:port[/db].
387
+ """
388
+ def get_filterable_fields_from_index(index_name: str, client: Any, cached_fields: list[dict[str, Any]] | None = None, excluded_fields: set[str] | None = None) -> list[dict[str, Any]]:
389
+ """Get filterable fields from an existing Redis index schema.
390
+
391
+ This function extracts filterable field definitions from a Redis index
392
+ and converts them into the filterable_fields format used by this library.
393
+
394
+ Args:
395
+ index_name (str): Name of the Redis index.
396
+ client (Any): Redis client instance (sync or async).
397
+ cached_fields (list[dict[str, Any]] | None, optional): Cached filterable fields
398
+ to return if available. Defaults to None.
399
+ excluded_fields (set[str] | None, optional): Set of field names to exclude
400
+ from the result. Defaults to None.
401
+
402
+ Returns:
403
+ list[dict[str, Any]]: List of filterable field configurations.
404
+ """
405
+ async def execute_update(client: Redis, index_name: str, chunks: list[Chunk], chunk_ids: list[str], vectors: list[Vector], filterable_fields: list[dict[str, Any]]) -> None:
406
+ """Execute update transaction using Redis pipeline.
407
+
408
+ Args:
409
+ client (Redis): Redis client instance.
410
+ index_name (str): Name of the Redis index.
411
+ chunks (list[Chunk]): List of updated chunks.
412
+ chunk_ids (list[str]): List of chunk IDs to update.
413
+ vectors (list[Vector]): List of vectors corresponding to chunks.
414
+ filterable_fields (list[dict[str, Any]]): List of filterable field configurations.
415
+ """
416
+ def parse_redisvl_result_to_chunks(results: list[dict[str, Any]], index_name: str) -> list[Chunk]:
417
+ """Parse RedisVL search results into Chunk objects.
418
+
419
+ RedisVL returns search results as a list of dictionaries. This function
420
+ converts them into Chunk objects with proper metadata parsing and ID stripping.
421
+
422
+ Args:
423
+ results (list[dict[str, Any]]): List of RedisVL search result dictionaries.
424
+ index_name (str): Index name for stripping prefixes from document IDs.
425
+
426
+ Returns:
427
+ list[Chunk]: List of parsed Chunk objects.
428
+ """
@@ -0,0 +1,37 @@
1
+ from _typeshed import Incomplete
2
+ from collections.abc import Callable
3
+ from gllm_datastore.constants import BOOL_FALSE_STR as BOOL_FALSE_STR, BOOL_TRUE_STR as BOOL_TRUE_STR, CHUNK_KEYS as CHUNK_KEYS, FIELD_CONFIG_NAME as FIELD_CONFIG_NAME, FIELD_CONFIG_TYPE as FIELD_CONFIG_TYPE, FieldType as FieldType, METADATA_SEPARATOR as METADATA_SEPARATOR
4
+ from gllm_datastore.core.filters import FilterClause as FilterClause, FilterCondition as FilterCondition, FilterOperator as FilterOperator, QueryFilter as QueryFilter
5
+ from typing import Any
6
+
7
+ REDIS_SPECIAL_CHARS: str
8
+ REDIS_SPECIAL_CHARS_PATTERN: Incomplete
9
+
10
+ class RedisQueryTranslator:
11
+ """Translates QueryFilter and FilterClause objects to Redis Search query syntax.
12
+
13
+ This class encapsulates all query translation logic.
14
+ """
15
+ def __init__(self, get_filterable_fields: Callable[[], list[dict[str, Any]]]) -> None:
16
+ """Initialize the Redis query translator.
17
+
18
+ Args:
19
+ get_filterable_fields (Callable[[], list[dict[str, Any]]]): Callable that returns
20
+ the current filterable_fields configuration. This allows lazy loading of
21
+ filterable_fields when they become available.
22
+ """
23
+ def translate(self, filters: QueryFilter | None) -> str | None:
24
+ """Translate a structured QueryFilter into a Redis Search query string.
25
+
26
+ This is the main entry point for query translation. It handles None filters
27
+ and delegates to internal translation methods.
28
+
29
+ Args:
30
+ filters (QueryFilter | None): Structured QueryFilter to translate. Defaults to None.
31
+
32
+ Returns:
33
+ str | None: A Redis Search query string or None if no filters are provided.
34
+
35
+ Raises:
36
+ ValueError: When the filter structure is invalid or operator is incompatible with field type.
37
+ """
@@ -0,0 +1,131 @@
1
+ from _typeshed import Incomplete
2
+ from gllm_core.schema.chunk import Chunk
3
+ from gllm_datastore.constants import CHUNK_KEYS as CHUNK_KEYS, FIELD_CONFIG_NAME as FIELD_CONFIG_NAME, FIELD_CONFIG_TYPE as FIELD_CONFIG_TYPE
4
+ from gllm_datastore.core.filters import FilterClause as FilterClause, QueryFilter as QueryFilter, QueryOptions as QueryOptions
5
+ from gllm_datastore.data_store.redis.query import build_filter_expression as build_filter_expression, check_index_exists as check_index_exists, execute_update as execute_update, fetch_hash_data_batch as fetch_hash_data_batch, get_filterable_fields_from_index as get_filterable_fields_from_index, infer_filterable_fields_from_chunks as infer_filterable_fields_from_chunks, normalize_field_name_for_schema as normalize_field_name_for_schema, parse_redisvl_result_to_chunks as parse_redisvl_result_to_chunks, prepare_chunk_document as prepare_chunk_document, strip_index_prefix as strip_index_prefix, validate_chunk_content as validate_chunk_content, validate_chunk_list as validate_chunk_list, validate_metadata_fields as validate_metadata_fields
6
+ from gllm_datastore.data_store.redis.query_translator import RedisQueryTranslator as RedisQueryTranslator
7
+ from gllm_datastore.utils.converter import cosine_distance_to_similarity_score as cosine_distance_to_similarity_score
8
+ from gllm_inference.em_invoker.em_invoker import BaseEMInvoker
9
+ from gllm_inference.schema import Vector
10
+ from redis.asyncio.client import Redis
11
+ from typing import Any
12
+
13
+ class RedisVectorCapability:
14
+ """Redis implementation of VectorCapability protocol.
15
+
16
+ This class provides vector similarity search operations using RedisVL
17
+ AsyncSearchIndex for vector storage and retrieval.
18
+
19
+ Attributes:
20
+ index_name (str): Name of the Redis index.
21
+ client (Redis): Redis async client instance.
22
+ em_invoker (BaseEMInvoker): Embedding model for vectorization.
23
+ index (Any): RedisVL AsyncSearchIndex instance.
24
+ """
25
+ index_name: Incomplete
26
+ client: Incomplete
27
+ index: Any
28
+ def __init__(self, index_name: str, client: Redis, em_invoker: BaseEMInvoker) -> None:
29
+ """Initialize the Redis vector capability.
30
+
31
+ Schema will be automatically inferred from chunks when creating a new index,
32
+ or auto-detected from an existing index when performing operations.
33
+
34
+ Args:
35
+ index_name (str): Name of the Redis index.
36
+ client (Redis): Redis async client instance.
37
+ em_invoker (BaseEMInvoker): Embedding model for vectorization.
38
+ """
39
+ @property
40
+ def em_invoker(self) -> BaseEMInvoker:
41
+ """Returns the EM Invoker instance.
42
+
43
+ Returns:
44
+ BaseEMInvoker: The EM Invoker instance.
45
+ """
46
+ async def ensure_index(self, filterable_fields: list[dict[str, Any]] | None = None) -> None:
47
+ '''Ensure Redis vector index exists, creating it if necessary.
48
+
49
+ This method is idempotent - if the index already exists, it will skip creation
50
+ and return early.
51
+
52
+ Args:
53
+ filterable_fields (list[dict[str, Any]] | None, optional): List of filterable field
54
+ configurations to use when creating a new index. Each field should be a dictionary
55
+ with "name" and "type" keys. For example:
56
+ [{"name": "metadata.category", "type": "tag"}, {"name": "metadata.score", "type": "numeric"}]
57
+ If not provided and index doesn\'t exist, a default schema will be created with
58
+ only basic fields (id, content, metadata, vector). Defaults to None.
59
+
60
+ Raises:
61
+ RuntimeError: If index creation fails.
62
+ '''
63
+ async def create(self, data: Chunk | list[Chunk]) -> None:
64
+ """Add chunks to the vector store with automatic embedding generation.
65
+
66
+ If the index does not exist, the schema will be inferred from the chunks being created.
67
+
68
+ Args:
69
+ data (Chunk | list[Chunk]): Single chunk or list of chunks to add.
70
+
71
+ Raises:
72
+ ValueError: If data structure is invalid or chunk content is invalid.
73
+ """
74
+ async def create_from_vector(self, chunk_vectors: list[tuple[Chunk, Vector]]) -> None:
75
+ """Add pre-computed vectors directly.
76
+
77
+ If the index does not exist, the schema will be inferred from the chunks being created.
78
+
79
+ Args:
80
+ chunk_vectors (list[tuple[Chunk, Vector]]): List of tuples containing chunks
81
+ and their corresponding vectors.
82
+
83
+ Raises:
84
+ ValueError: If chunk content is invalid.
85
+ """
86
+ async def retrieve(self, query: str, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None) -> list[Chunk]:
87
+ """Read records from the datastore using text-based similarity search with optional filtering.
88
+
89
+ Args:
90
+ query (str): Input text to embed and search with.
91
+ filters (FilterClause | QueryFilter | None, optional): Query filters to apply.
92
+ Defaults to None.
93
+ options (QueryOptions | None, optional): Query options like limit and sorting. Defaults to None.
94
+
95
+ Returns:
96
+ list[Chunk]: Query results ordered by similarity score.
97
+ """
98
+ async def retrieve_by_vector(self, vector: Vector, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None) -> list[Chunk]:
99
+ """Direct vector similarity search.
100
+
101
+ Args:
102
+ vector (Vector): Query embedding vector.
103
+ filters (FilterClause | QueryFilter | None, optional): Query filters to apply.
104
+ FilterClause objects are automatically converted to QueryFilter internally.
105
+ Defaults to None.
106
+ options (QueryOptions | None, optional): Query options like limit and sorting. Defaults to None.
107
+
108
+ Returns:
109
+ list[Chunk]: List of chunks ordered by similarity score.
110
+ """
111
+ async def update(self, update_values: dict[str, Any], filters: FilterClause | QueryFilter | None = None) -> None:
112
+ """Update existing records in the datastore.
113
+
114
+ Args:
115
+ update_values (dict[str, Any]): Values to update.
116
+ filters (FilterClause | QueryFilter | None, optional): Filters to select records to update.
117
+ FilterClause objects are automatically converted to QueryFilter internally.
118
+ Defaults to None.
119
+ """
120
+ async def delete(self, filters: FilterClause | QueryFilter | None = None) -> None:
121
+ """Delete records from the datastore.
122
+
123
+ Processes deletions in batches to avoid loading all matching documents into memory.
124
+ If filters is None, no operation is performed (no-op).
125
+
126
+ Args:
127
+ filters (FilterClause | QueryFilter | None, optional): Filters to select records to delete.
128
+ Defaults to None.
129
+ """
130
+ async def clear(self) -> None:
131
+ """Clear all records from the datastore."""
@@ -0,0 +1,4 @@
1
+ from gllm_datastore.data_store.sql.data_store import SQLDataStore as SQLDataStore
2
+ from gllm_datastore.data_store.sql.fulltext import SQLFulltextCapability as SQLFulltextCapability
3
+
4
+ __all__ = ['SQLDataStore', 'SQLFulltextCapability']
@@ -0,0 +1,5 @@
1
+ class SQL_COLUMNS:
2
+ """SQL database column names for chunk storage."""
3
+ ID: str
4
+ CONTENT: str
5
+ CHUNK_METADATA: str