gllm-datastore-binary 0.5.45__cp311-cp311-macosx_13_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gllm-datastore-binary might be problematic. Click here for more details.

Files changed (108) hide show
  1. gllm_datastore/__init__.pyi +0 -0
  2. gllm_datastore/cache/__init__.pyi +4 -0
  3. gllm_datastore/cache/base.pyi +84 -0
  4. gllm_datastore/cache/cache.pyi +137 -0
  5. gllm_datastore/cache/hybrid_cache/__init__.pyi +5 -0
  6. gllm_datastore/cache/hybrid_cache/file_system_hybrid_cache.pyi +50 -0
  7. gllm_datastore/cache/hybrid_cache/hybrid_cache.pyi +115 -0
  8. gllm_datastore/cache/hybrid_cache/in_memory_hybrid_cache.pyi +29 -0
  9. gllm_datastore/cache/hybrid_cache/key_matcher/__init__.pyi +5 -0
  10. gllm_datastore/cache/hybrid_cache/key_matcher/exact_key_matcher.pyi +44 -0
  11. gllm_datastore/cache/hybrid_cache/key_matcher/fuzzy_key_matcher.pyi +70 -0
  12. gllm_datastore/cache/hybrid_cache/key_matcher/key_matcher.pyi +60 -0
  13. gllm_datastore/cache/hybrid_cache/key_matcher/semantic_key_matcher.pyi +93 -0
  14. gllm_datastore/cache/hybrid_cache/redis_hybrid_cache.pyi +34 -0
  15. gllm_datastore/cache/hybrid_cache/utils.pyi +36 -0
  16. gllm_datastore/cache/utils.pyi +34 -0
  17. gllm_datastore/cache/vector_cache/__init__.pyi +0 -0
  18. gllm_datastore/cache/vector_cache/eviction_manager/__init__.pyi +0 -0
  19. gllm_datastore/cache/vector_cache/eviction_manager/asyncio_eviction_manager.pyi +48 -0
  20. gllm_datastore/cache/vector_cache/eviction_manager/eviction_manager.pyi +38 -0
  21. gllm_datastore/cache/vector_cache/eviction_strategy/__init__.pyi +0 -0
  22. gllm_datastore/cache/vector_cache/eviction_strategy/eviction_strategy.pyi +34 -0
  23. gllm_datastore/cache/vector_cache/eviction_strategy/ttl_eviction_strategy.pyi +34 -0
  24. gllm_datastore/cache/vector_cache/vector_cache.pyi +99 -0
  25. gllm_datastore/constants.pyi +66 -0
  26. gllm_datastore/core/__init__.pyi +7 -0
  27. gllm_datastore/core/capabilities/__init__.pyi +5 -0
  28. gllm_datastore/core/capabilities/fulltext_capability.pyi +73 -0
  29. gllm_datastore/core/capabilities/graph_capability.pyi +70 -0
  30. gllm_datastore/core/capabilities/vector_capability.pyi +90 -0
  31. gllm_datastore/core/filters/__init__.pyi +4 -0
  32. gllm_datastore/core/filters/filter.pyi +340 -0
  33. gllm_datastore/core/filters/schema.pyi +149 -0
  34. gllm_datastore/data_store/__init__.pyi +7 -0
  35. gllm_datastore/data_store/base.pyi +138 -0
  36. gllm_datastore/data_store/chroma/__init__.pyi +4 -0
  37. gllm_datastore/data_store/chroma/_chroma_import.pyi +13 -0
  38. gllm_datastore/data_store/chroma/data_store.pyi +202 -0
  39. gllm_datastore/data_store/chroma/fulltext.pyi +134 -0
  40. gllm_datastore/data_store/chroma/query.pyi +266 -0
  41. gllm_datastore/data_store/chroma/query_translator.pyi +41 -0
  42. gllm_datastore/data_store/chroma/vector.pyi +197 -0
  43. gllm_datastore/data_store/elasticsearch/__init__.pyi +5 -0
  44. gllm_datastore/data_store/elasticsearch/data_store.pyi +119 -0
  45. gllm_datastore/data_store/elasticsearch/fulltext.pyi +237 -0
  46. gllm_datastore/data_store/elasticsearch/query.pyi +114 -0
  47. gllm_datastore/data_store/elasticsearch/vector.pyi +179 -0
  48. gllm_datastore/data_store/exceptions.pyi +35 -0
  49. gllm_datastore/data_store/in_memory/__init__.pyi +5 -0
  50. gllm_datastore/data_store/in_memory/data_store.pyi +71 -0
  51. gllm_datastore/data_store/in_memory/fulltext.pyi +131 -0
  52. gllm_datastore/data_store/in_memory/query.pyi +175 -0
  53. gllm_datastore/data_store/in_memory/vector.pyi +174 -0
  54. gllm_datastore/data_store/redis/__init__.pyi +5 -0
  55. gllm_datastore/data_store/redis/data_store.pyi +154 -0
  56. gllm_datastore/data_store/redis/fulltext.pyi +128 -0
  57. gllm_datastore/data_store/redis/query.pyi +428 -0
  58. gllm_datastore/data_store/redis/query_translator.pyi +37 -0
  59. gllm_datastore/data_store/redis/vector.pyi +131 -0
  60. gllm_datastore/encryptor/__init__.pyi +4 -0
  61. gllm_datastore/encryptor/aes_gcm_encryptor.pyi +45 -0
  62. gllm_datastore/encryptor/encryptor.pyi +52 -0
  63. gllm_datastore/encryptor/key_ring/__init__.pyi +3 -0
  64. gllm_datastore/encryptor/key_ring/in_memory_key_ring.pyi +52 -0
  65. gllm_datastore/encryptor/key_ring/key_ring.pyi +45 -0
  66. gllm_datastore/encryptor/key_rotating_encryptor.pyi +60 -0
  67. gllm_datastore/graph_data_store/__init__.pyi +6 -0
  68. gllm_datastore/graph_data_store/graph_data_store.pyi +151 -0
  69. gllm_datastore/graph_data_store/graph_rag_data_store.pyi +29 -0
  70. gllm_datastore/graph_data_store/light_rag_data_store.pyi +93 -0
  71. gllm_datastore/graph_data_store/light_rag_postgres_data_store.pyi +96 -0
  72. gllm_datastore/graph_data_store/llama_index_graph_rag_data_store.pyi +49 -0
  73. gllm_datastore/graph_data_store/llama_index_neo4j_graph_rag_data_store.pyi +78 -0
  74. gllm_datastore/graph_data_store/nebula_graph_data_store.pyi +206 -0
  75. gllm_datastore/graph_data_store/neo4j_graph_data_store.pyi +182 -0
  76. gllm_datastore/graph_data_store/utils/__init__.pyi +6 -0
  77. gllm_datastore/graph_data_store/utils/constants.pyi +21 -0
  78. gllm_datastore/graph_data_store/utils/light_rag_em_invoker_adapter.pyi +56 -0
  79. gllm_datastore/graph_data_store/utils/light_rag_lm_invoker_adapter.pyi +43 -0
  80. gllm_datastore/graph_data_store/utils/llama_index_em_invoker_adapter.pyi +45 -0
  81. gllm_datastore/graph_data_store/utils/llama_index_lm_invoker_adapter.pyi +169 -0
  82. gllm_datastore/sql_data_store/__init__.pyi +4 -0
  83. gllm_datastore/sql_data_store/adapter/__init__.pyi +0 -0
  84. gllm_datastore/sql_data_store/adapter/sqlalchemy_adapter.pyi +38 -0
  85. gllm_datastore/sql_data_store/constants.pyi +6 -0
  86. gllm_datastore/sql_data_store/sql_data_store.pyi +86 -0
  87. gllm_datastore/sql_data_store/sqlalchemy_sql_data_store.pyi +216 -0
  88. gllm_datastore/sql_data_store/types.pyi +31 -0
  89. gllm_datastore/utils/__init__.pyi +6 -0
  90. gllm_datastore/utils/converter.pyi +51 -0
  91. gllm_datastore/utils/dict.pyi +21 -0
  92. gllm_datastore/utils/ttl.pyi +25 -0
  93. gllm_datastore/utils/types.pyi +32 -0
  94. gllm_datastore/vector_data_store/__init__.pyi +6 -0
  95. gllm_datastore/vector_data_store/chroma_vector_data_store.pyi +259 -0
  96. gllm_datastore/vector_data_store/elasticsearch_vector_data_store.pyi +357 -0
  97. gllm_datastore/vector_data_store/in_memory_vector_data_store.pyi +179 -0
  98. gllm_datastore/vector_data_store/mixin/__init__.pyi +0 -0
  99. gllm_datastore/vector_data_store/mixin/cache_compatible_mixin.pyi +145 -0
  100. gllm_datastore/vector_data_store/redis_vector_data_store.pyi +191 -0
  101. gllm_datastore/vector_data_store/vector_data_store.pyi +146 -0
  102. gllm_datastore.build/.gitignore +1 -0
  103. gllm_datastore.cpython-311-darwin.so +0 -0
  104. gllm_datastore.pyi +156 -0
  105. gllm_datastore_binary-0.5.45.dist-info/METADATA +178 -0
  106. gllm_datastore_binary-0.5.45.dist-info/RECORD +108 -0
  107. gllm_datastore_binary-0.5.45.dist-info/WHEEL +5 -0
  108. gllm_datastore_binary-0.5.45.dist-info/top_level.txt +1 -0
@@ -0,0 +1,128 @@
1
+ from _typeshed import Incomplete
2
+ from gllm_core.schema.chunk import Chunk
3
+ from gllm_datastore.constants import CHUNK_KEYS as CHUNK_KEYS, FIELD_CONFIG_NAME as FIELD_CONFIG_NAME, FIELD_CONFIG_TYPE as FIELD_CONFIG_TYPE, FieldType as FieldType
4
+ from gllm_datastore.core.filters import FilterClause as FilterClause, QueryFilter as QueryFilter, QueryOptions as QueryOptions
5
+ from gllm_datastore.data_store.redis.query import apply_options_to_query as apply_options_to_query, check_index_exists as check_index_exists, collect_document_ids as collect_document_ids, delete_keys_batched as delete_keys_batched, execute_search_query as execute_search_query, get_doc_ids_for_deletion as get_doc_ids_for_deletion, get_filterable_fields_from_index as get_filterable_fields_from_index, infer_filterable_fields_from_chunks as infer_filterable_fields_from_chunks, normalize_field_name_for_schema as normalize_field_name_for_schema, parse_redis_documents as parse_redis_documents, prepare_chunk_document as prepare_chunk_document, process_doc_ids_in_batches as process_doc_ids_in_batches, process_update_batch as process_update_batch, sanitize_key as sanitize_key, strip_index_prefix as strip_index_prefix, validate_chunk_content as validate_chunk_content, validate_chunk_list as validate_chunk_list, validate_metadata_fields as validate_metadata_fields
6
+ from gllm_datastore.data_store.redis.query_translator import RedisQueryTranslator as RedisQueryTranslator
7
+ from redis.asyncio.client import Redis
8
+ from typing import Any
9
+
10
+ FUZZY_MATCH_MAX_DISTANCE: int
11
+
12
+ class DefaultBatchSize:
13
+ """Default batch sizes for Redis operations."""
14
+ DELETE: int
15
+ UPDATE: int
16
+
17
+ class RedisFulltextCapability:
18
+ """Redis implementation of FulltextCapability protocol.
19
+
20
+ Attributes:
21
+ index_name (str): Name of the Redis index.
22
+ client (Redis): Redis client instance.
23
+ """
24
+ index_name: Incomplete
25
+ client: Incomplete
26
+ def __init__(self, index_name: str, client: Redis) -> None:
27
+ """Initialize the Redis fulltext capability.
28
+
29
+ Schema will be automatically inferred from chunks when creating a new index,
30
+ or auto-detected from an existing index when performing operations.
31
+
32
+ Args:
33
+ index_name (str): Name of the Redis index.
34
+ client (Redis): Redis client instance.
35
+ """
36
+ async def create(self, data: Chunk | list[Chunk]) -> None:
37
+ '''Create new records in the datastore.
38
+
39
+ If the index does not exist and no filterable_fields were provided,
40
+ the schema will be inferred from the chunks being created.
41
+
42
+ Examples:
43
+ Create a new chunk.
44
+ ```python
45
+ await fulltext_capability.create(Chunk(content="Test chunk", metadata={"category": "test"}))
46
+ ```
47
+
48
+ Args:
49
+ data (Chunk | list[Chunk]): Data to create (single item or collection).
50
+
51
+ Raises:
52
+ ValueError: If data structure is invalid or chunk content is invalid.
53
+ '''
54
+ async def retrieve(self, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None) -> list[Chunk]:
55
+ """Read records from the datastore with optional filtering.
56
+
57
+ Args:
58
+ filters (FilterClause | QueryFilter | None, optional): Query filters to apply. Defaults to None.
59
+ options (QueryOptions | None, optional): Query options for sorting and pagination. Defaults to None,
60
+ in which case the default limit of 10 is used.
61
+
62
+ Returns:
63
+ list[Chunk]: List of matched chunks after applying filters and options.
64
+ """
65
+ async def retrieve_fuzzy(self, query: str, max_distance: int = 2, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None) -> list[Chunk]:
66
+ """Find records that fuzzy match the query within distance threshold.
67
+
68
+ Args:
69
+ query (str): Text to fuzzy match against.
70
+ max_distance (int): Maximum edit distance for matches. Defaults to 2.
71
+ Maximum value is 3 (limitation of Redis Vector Search).
72
+ filters (FilterClause | QueryFilter | None, optional): Optional metadata filters to apply.
73
+ Defaults to None.
74
+ options (QueryOptions | None, optional): Query options, only limit is used here. Defaults to None.
75
+
76
+ Returns:
77
+ list[Chunk]: Matched chunks ordered by relevance/distance.
78
+
79
+ Raises:
80
+ ValueError: If max_distance is greater than 3.
81
+
82
+ Note:
83
+ Maximum fuzzy distance is 3. This is a limitation of the Redis Search module.
84
+ """
85
+ async def update(self, update_values: dict[str, Any], filters: FilterClause | QueryFilter | None = None) -> None:
86
+ '''Update existing records in the datastore.
87
+
88
+ Processes updates in batches to avoid loading all matching documents into memory.
89
+ 1. Get document IDs matching the filters.
90
+ 2. In batch, get document data via document IDs.
91
+ 3. In batch, update the document data.
92
+
93
+ Examples:
94
+ Update certain metadata of a chunk with specific filters.
95
+ ```python
96
+ from gllm_datastore.core.filters import filter as F
97
+
98
+ await fulltext_capability.update(
99
+ update_values={"metadata": {"status": "published"}},
100
+ filters=F.eq("metadata.status", "draft"),
101
+ )
102
+ ```
103
+
104
+ Args:
105
+ update_values (dict[str, Any]): Mapping of fields to new values to apply.
106
+ filters (FilterClause | QueryFilter | None, optional): Filters to select records to update.
107
+ Defaults to None.
108
+
109
+ Raises:
110
+ Exception: If Redis operations fail.
111
+ '''
112
+ async def delete(self, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None) -> None:
113
+ """Delete records from the datastore.
114
+
115
+ Processes deletions in batches to avoid loading all matching documents into memory.
116
+ For delete operations, only document IDs are retrieved (not full content) to minimize memory usage.
117
+
118
+ Args:
119
+ filters (FilterClause | QueryFilter | None, optional): Filters to select records to delete.
120
+ Defaults to None.
121
+ options (QueryOptions | None, optional): Query options for sorting and limiting deletions
122
+ (for eviction-like operations). Defaults to None.
123
+
124
+ Raises:
125
+ Exception: If Redis operations fail.
126
+ """
127
+ async def clear(self) -> None:
128
+ """Clear all records from the datastore."""
@@ -0,0 +1,428 @@
1
+ from _typeshed import Incomplete
2
+ from collections.abc import AsyncIterator
3
+ from gllm_core.schema.chunk import Chunk
4
+ from gllm_datastore.constants import BOOL_FALSE_STR as BOOL_FALSE_STR, BOOL_TRUE_STR as BOOL_TRUE_STR, CHUNK_KEYS as CHUNK_KEYS, DEFAULT_REDIS_QUERY_BATCH_SIZE as DEFAULT_REDIS_QUERY_BATCH_SIZE, FIELD_CONFIG_NAME as FIELD_CONFIG_NAME, FIELD_CONFIG_TYPE as FIELD_CONFIG_TYPE, FieldType as FieldType, LIST_SEPARATOR as LIST_SEPARATOR, METADATA_PREFIX as METADATA_PREFIX, METADATA_SEPARATOR as METADATA_SEPARATOR, REDIS_DEFAULT_DB as REDIS_DEFAULT_DB, REDIS_DEFAULT_HOST as REDIS_DEFAULT_HOST, REDIS_DEFAULT_PORT as REDIS_DEFAULT_PORT
5
+ from gllm_datastore.core.filters import FilterClause as FilterClause, QueryFilter as QueryFilter, QueryOptions as QueryOptions
6
+ from gllm_datastore.data_store.redis.query_translator import RedisQueryTranslator as RedisQueryTranslator
7
+ from gllm_inference.schema import Vector
8
+ from redis.asyncio.client import Redis
9
+ from redis.commands.search.document import Document as Document
10
+ from redis.commands.search.query import Query
11
+ from typing import Any
12
+
13
+ REDIS_SPECIAL_CHARS: str
14
+ REDIS_SPECIAL_CHARS_PATTERN: Incomplete
15
+ logger: Incomplete
16
+
17
+ def sanitize_key(key: str) -> str:
18
+ """Sanitize a key for use in Redis queries.
19
+
20
+ Args:
21
+ key (str): The key to sanitize.
22
+
23
+ Returns:
24
+ str: The sanitized key.
25
+ """
26
+ def sanitize_value(value: Any) -> str:
27
+ """Sanitize a value for use in Redis queries.
28
+
29
+ Args:
30
+ value (Any): The value to sanitize.
31
+
32
+ Returns:
33
+ str: The sanitized value.
34
+ """
35
+ def build_redis_query(query_translator: RedisQueryTranslator, filters: QueryFilter | None = None) -> str:
36
+ '''Build a Redis query string from filters.
37
+
38
+ Translates QueryFilter to Redis Search query syntax.
39
+
40
+ Examples:
41
+ - F.eq("name", "John") → "@name:{John}"
42
+ - F.gt("age", 18) → "@age:[18 +inf]"
43
+ - F.and_(F.eq("status", "active"), F.gt("score", 50)) → "@status:{active} @score:[50 +inf]"
44
+ - F.in_("category", ["tech", "science"]) → "@category:(tech|science)"
45
+
46
+ Args:
47
+ query_translator (RedisQueryTranslator): Query translator instance.
48
+ filters (QueryFilter | None, optional): Query filters to apply. Defaults to None.
49
+
50
+ Returns:
51
+ str: Redis query string.
52
+
53
+ Raises:
54
+ ValueError: If filter structure is invalid or operator is incompatible with field type.
55
+ TypeError: If filter contains type mismatches.
56
+ '''
57
+ def apply_options_to_query(query: Query, options: QueryOptions | None = None) -> Query:
58
+ """Apply query options to Redis Search Query object.
59
+
60
+ Uses Redis Search's native SORTBY and LIMIT capabilities for better performance.
61
+
62
+ Args:
63
+ query (Query): Redis Search Query object.
64
+ options (QueryOptions | None, optional): Query options to apply. Defaults to None.
65
+
66
+ Returns:
67
+ Query: Modified Query object with options applied.
68
+ """
69
+ async def execute_search_query(client: Redis, index_name: str, query_translator: RedisQueryTranslator, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None) -> Any:
70
+ """Execute a Redis search query with filters and options.
71
+
72
+ When options is None or options.limit is None, automatically paginates to fetch all matching results
73
+ using DEFAULT_REDIS_QUERY_BATCH_SIZE.
74
+
75
+ Args:
76
+ client (Redis): Redis client instance.
77
+ index_name (str): Name of the Redis index to search.
78
+ query_translator (RedisQueryTranslator): Query translator instance.
79
+ filters (FilterClause | QueryFilter | None, optional): Query filters to apply. Defaults to None.
80
+ options (QueryOptions | None, optional): Query options for sorting and pagination. Defaults to None,
81
+ in which case query is executed in batches of DEFAULT_REDIS_QUERY_BATCH_SIZE.
82
+
83
+ Returns:
84
+ Any: Redis search result containing documents and metadata.
85
+ """
86
+ async def retrieve_document_ids_batched(client: Redis, index_name: str, query_translator: RedisQueryTranslator, filters: FilterClause | QueryFilter | None = None, batch_size: int = 100) -> AsyncIterator[list[str]]:
87
+ """Retrieve matching document IDs in batches without loading full document content.
88
+
89
+ This function is optimized for delete operations where only document IDs are needed.
90
+ It extracts IDs directly from search results without parsing full document content,
91
+ significantly reducing memory usage.
92
+
93
+ Args:
94
+ client (Redis): Redis client instance.
95
+ index_name (str): Name of the Redis index to search.
96
+ query_translator (RedisQueryTranslator): Query translator instance.
97
+ filters (FilterClause | QueryFilter | None, optional): Query filters to apply. Defaults to None.
98
+ batch_size (int, optional): Number of document IDs per batch. Defaults to 100.
99
+
100
+ Yields:
101
+ list[str]: Batches of document IDs for processing.
102
+ """
103
+ def parse_redis_documents(docs: list[Document], logger: Any | None = None) -> list[Chunk]:
104
+ """Parse Redis search result documents into Chunk objects.
105
+
106
+ Args:
107
+ docs (list[Document]): List of Redis search result documents.
108
+ logger (Any | None, optional): Logger instance for error logging. Defaults to None.
109
+
110
+ Returns:
111
+ list[Chunk]: List of parsed Chunk objects.
112
+ """
113
+ def get_str_value(data: dict, key: str, default: str = '') -> str:
114
+ """Extract string value from Redis hash data, handling bytes keys and values.
115
+
116
+ Args:
117
+ data (dict): Redis hash data (may have bytes keys/values).
118
+ key (str): Key to look up.
119
+ default (str): Default value if key not found.
120
+
121
+ Returns:
122
+ str: Decoded string value.
123
+ """
124
+ def normalize_field_name_for_schema(field_name: str) -> str:
125
+ '''Normalize field name for Redis schema (dot to underscore).
126
+
127
+ This matches the query builder\'s normalization so schema fields match query fields.
128
+
129
+ Args:
130
+ field_name (str): Field name in dot notation (e.g., "metadata.score").
131
+
132
+ Returns:
133
+ str: Normalized field name with underscores (e.g., "metadata_score").
134
+ '''
135
+ def get_filterable_field_type(field_name: str, filterable_fields: list[dict[str, Any]]) -> str | None:
136
+ '''Get the field type for a metadata key from filterable_fields.
137
+
138
+ Args:
139
+ field_name (str): Metadata key (e.g., "score") or full field name (e.g., "metadata.score").
140
+ filterable_fields (list[dict[str, Any]]): List of filterable field configurations.
141
+
142
+ Returns:
143
+ str | None: Field type (FieldType enum value) or None if not found.
144
+ '''
145
+ def metadata_field_mapping(metadata: dict[str, Any], filterable_fields: list[dict[str, Any]]) -> dict[str, Any]:
146
+ '''Convert metadata dictionary into Redis hash field mappings with type-aware storage.
147
+
148
+ Values are stored in appropriate formats based on their types and filterable_fields configuration:
149
+ 1. Numeric values: stored as numbers (for NUMERIC fields)
150
+ 2. String values: stored as strings (for TAG fields)
151
+ 3. Boolean values: stored as "1"/"0" (for TAG fields)
152
+ 4. List values: stored as comma-separated strings (for TAG fields)
153
+
154
+ Args:
155
+ metadata (dict[str, Any]): Metadata dictionary to convert.
156
+ filterable_fields (list[dict[str, Any]]): List of filterable field configurations.
157
+
158
+ Returns:
159
+ dict[str, Any]: Mapping of normalized field names to values in appropriate formats.
160
+ '''
161
+ def infer_filterable_fields_from_chunks(items: list[Any]) -> list[dict[str, Any]]:
162
+ '''Infer filterable fields schema from chunks.
163
+
164
+ Analyzes metadata in chunks to determine field types:
165
+ 1. Boolean types (bool) -> FieldType.TAG (stored as "1"/"0" strings)
166
+ 2. Numeric types (int, float) -> FieldType.NUMERIC
167
+ 3. All other types -> FieldType.TEXT (default)
168
+
169
+ Note: bool must be checked before int/float since bool is a subclass of int in Python.
170
+
171
+ Args:
172
+ items (list[Any]): Chunks to analyze.
173
+
174
+ Returns:
175
+ list[dict[str, Any]]: List of inferred filterable field configurations.
176
+ '''
177
+ def validate_metadata_fields(items: list[Any], filterable_fields: list[dict[str, Any]]) -> None:
178
+ '''Validate that metadata fields in chunks are compatible with the index schema.
179
+
180
+ For example, if filterable_fields is [{"name": "metadata.score", "type": "numeric"}],
181
+ and the chunk has metadata {"score": "not-a-number"}, this method will raise a ValueError.
182
+
183
+ Args:
184
+ items (list[Any]): Chunks to validate.
185
+ filterable_fields (list[dict[str, Any]]): Filterable fields configuration.
186
+
187
+ Raises:
188
+ ValueError: If values are incompatible with the field type (e.g., non-numeric
189
+ value for numeric field).
190
+ '''
191
+ async def get_doc_ids_for_deletion(client: Redis, index_name: str, query_translator: RedisQueryTranslator, filters: QueryFilter | None = None, options: QueryOptions | None = None) -> list[str]:
192
+ """Get document IDs for deletion based on filters or options.
193
+
194
+ When using filters (not options), uses batching with incrementing offset to collect
195
+ all matching document IDs before deletion.
196
+
197
+ Args:
198
+ client (Redis): Redis client instance.
199
+ index_name (str): Name of the Redis index.
200
+ query_translator (RedisQueryTranslator): Query translator instance.
201
+ filters (QueryFilter | None, optional): Query filters. Defaults to None.
202
+ options (QueryOptions | None, optional): Query options. Defaults to None.
203
+
204
+ Returns:
205
+ list[str]: List of document IDs to delete.
206
+ """
207
+ async def collect_document_ids(client: Redis, index_name: str, query_translator: RedisQueryTranslator, filters: QueryFilter | None = None, batch_size: int = 100) -> list[str]:
208
+ """Collect all matching document IDs from the datastore.
209
+
210
+ Args:
211
+ client (Redis): Redis client instance.
212
+ index_name (str): Name of the Redis index to search.
213
+ query_translator (RedisQueryTranslator): Query translator instance.
214
+ filters (QueryFilter | None, optional): Query filters to apply. Defaults to None.
215
+ batch_size (int, optional): Number of document IDs per batch. Defaults to 100.
216
+
217
+ Returns:
218
+ list[str]: List of all matching document IDs.
219
+ """
220
+ async def delete_keys_batched(client: Redis, index_name: str, doc_ids: list[str]) -> int:
221
+ """Delete Redis keys for the specified document IDs in a batch.
222
+
223
+ Args:
224
+ client (Redis): Redis client instance.
225
+ index_name (str): Name of the Redis index.
226
+ doc_ids (list[str]): List of document IDs to delete.
227
+
228
+ Returns:
229
+ int: Number of keys deleted.
230
+ """
231
+ async def process_doc_ids_in_batches(client: Redis, index_name: str, doc_ids: list[str], batch_func: Any, *args, batch_size: int = 100) -> int:
232
+ """Process document IDs in batches using the provided batch function.
233
+
234
+ Args:
235
+ client (Redis): Redis client instance.
236
+ index_name (str): Name of the Redis index.
237
+ doc_ids (list[str]): List of document IDs to process.
238
+ batch_func (Any): Async function to call for each batch.
239
+ *args: Additional arguments to pass to batch_func.
240
+ batch_size (int, optional): Number of documents per batch. Defaults to 100.
241
+
242
+ Returns:
243
+ int: Total number of documents processed.
244
+ """
245
+ async def fetch_hash_data_batch(client: Redis, index_name: str, doc_ids: list[str]) -> list[dict[str, Any]]:
246
+ """Fetch hash data for a batch of document IDs.
247
+
248
+ Args:
249
+ client (Redis): Redis client instance.
250
+ index_name (str): Name of the Redis index.
251
+ doc_ids (list[str]): List of document IDs to fetch.
252
+
253
+ Returns:
254
+ list[dict[str, Any]]: List of hash data for each document.
255
+ """
256
+ def prepare_update_values(doc: dict[str, Any], update_values: dict[str, Any], filterable_fields: list[dict[str, Any]]) -> tuple[str, dict[str, Any], set[str]]:
257
+ """Prepare update values for a document.
258
+
259
+ Args:
260
+ doc (dict[str, Any]): Current document data from Redis.
261
+ update_values (dict[str, Any]): Values to update.
262
+ filterable_fields (list[dict[str, Any]]): List of filterable field configurations.
263
+
264
+ Returns:
265
+ tuple[str, dict[str, Any], set[str]]: Tuple of (content, merged_metadata, old_metadata_fields).
266
+ """
267
+ def build_update_commands(keys: list[str], doc_data: list[dict[str, Any]], update_values: dict[str, Any], filterable_fields: list[dict[str, Any]]) -> tuple[dict[str, dict[str, Any]], dict[str, set[str]]]:
268
+ """Build update commands for a batch of documents.
269
+
270
+ Args:
271
+ keys (list[str]): List of Redis keys for the documents.
272
+ doc_data (list[dict[str, Any]]): List of current document data.
273
+ update_values (dict[str, Any]): Values to update.
274
+ filterable_fields (list[dict[str, Any]]): List of filterable field configurations.
275
+
276
+ Returns:
277
+ tuple[dict[str, dict[str, Any]], dict[str, set[str]]]: Tuple of (update_mappings, removal_fields).
278
+ """
279
+ async def process_update_batch(client: Redis, index_name: str, doc_ids: list[str], update_values: dict[str, Any], filterable_fields: list[dict[str, Any]]) -> int:
280
+ """Process a batch of document updates.
281
+
282
+ Args:
283
+ client (Redis): Redis client instance.
284
+ index_name (str): Name of the Redis index.
285
+ doc_ids (list[str]): List of document IDs to update.
286
+ update_values (dict[str, Any]): Values to update.
287
+ filterable_fields (list[dict[str, Any]]): List of filterable field configurations.
288
+
289
+ Returns:
290
+ int: Number of documents updated.
291
+ """
292
+ def build_filter_expression(query_translator: RedisQueryTranslator, filters: QueryFilter | None = None) -> str | None:
293
+ '''Build RedisVL filter expression from QueryFilter.
294
+
295
+ This function converts QueryFilter objects into RedisVL filter expression format.
296
+ Both RedisVL and Redis Search use the same query syntax, so this delegates to
297
+ RedisQueryTranslator which produces Redis Search/RedisVL compatible queries.
298
+
299
+ Examples:
300
+ 1. F.eq("name", "John") → "@name:{John}"
301
+ 2. F.gt("age", 18) → "@age:[18 +inf]"
302
+ 3. F.and_(F.eq("status", "active"), F.gt("score", 50)) → "@status:{active} @score:[50 +inf]"
303
+
304
+ Args:
305
+ query_translator (RedisQueryTranslator): Query translator instance.
306
+ filters (QueryFilter | None, optional): Query filters to apply. Defaults to None.
307
+
308
+ Returns:
309
+ str | None: RedisVL filter expression string or None if no filters.
310
+
311
+ Raises:
312
+ ValueError: If filter structure is invalid or operator is incompatible with field type.
313
+ TypeError: If filter contains type mismatches.
314
+ '''
315
+ async def check_index_exists(client: Redis, index_name: str) -> bool:
316
+ """Check if a Redis index exists.
317
+
318
+ Args:
319
+ client (Redis): Redis client instance.
320
+ index_name (str): Name of the Redis index.
321
+
322
+ Returns:
323
+ bool: True if index exists, False otherwise.
324
+ """
325
+ def validate_chunk_list(data: Any) -> list[Any]:
326
+ """Validate and normalize chunk input to a list.
327
+
328
+ Args:
329
+ data (Any): Input data to validate (single Chunk or list of Chunks).
330
+
331
+ Returns:
332
+ list[Any]: List of chunks.
333
+
334
+ Raises:
335
+ ValueError: If data structure is invalid.
336
+ """
337
+ def validate_chunk_content(chunks: list[Any]) -> None:
338
+ """Validate chunk content is non-empty string.
339
+
340
+ Args:
341
+ chunks (list[Any]): List of chunks to validate.
342
+
343
+ Raises:
344
+ ValueError: If chunk content is invalid.
345
+ """
346
+ def prepare_chunk_document(chunk: Any, filterable_fields: list[dict[str, Any]], include_vector: bool = False, vector: Vector | None = None, index_name: str | None = None) -> dict[str, Any]:
347
+ """Prepare a chunk document for Redis storage.
348
+
349
+ Args:
350
+ chunk (Any): Chunk object to prepare.
351
+ filterable_fields (list[dict[str, Any]]): List of filterable field configurations.
352
+ include_vector (bool, optional): Whether to include vector field. Defaults to False.
353
+ vector (Vector | None, optional): Vector data to include if include_vector is True. Defaults to None.
354
+ index_name (str | None, optional): Index name to strip from chunk ID if present. Defaults to None.
355
+
356
+ Returns:
357
+ dict[str, Any]: Document dictionary ready for Redis storage.
358
+ """
359
+ def strip_index_prefix(doc_id: str, index_name: str) -> str:
360
+ '''Remove index prefix from document ID.
361
+
362
+ RedisVL returns document IDs with the index prefix (e.g., "index_name:doc_id" or "index_name::doc_id").
363
+ This function strips the prefix to return just the document ID.
364
+ Also handles cases where the ID starts with just ":" or "::" separators.
365
+
366
+ Args:
367
+ doc_id (str): Full document ID with index prefix.
368
+ index_name (str): Index name to remove from the prefix.
369
+
370
+ Returns:
371
+ str: Document ID without the index prefix.
372
+ '''
373
+ def get_redis_url_from_client(client: Any) -> str:
374
+ """Extract Redis URL from a Redis client connection.
375
+
376
+ Works with both sync and async Redis clients.
377
+
378
+ Warning:
379
+ This function returns the Redis URL with additional information like password, host, port, and database.
380
+ Do not log the returned URL.
381
+
382
+ Args:
383
+ client (Any): Redis client instance (sync or async).
384
+
385
+ Returns:
386
+ str: Redis URL in the format redis://[password@]host:port[/db].
387
+ """
388
+ def get_filterable_fields_from_index(index_name: str, client: Any, cached_fields: list[dict[str, Any]] | None = None, excluded_fields: set[str] | None = None) -> list[dict[str, Any]]:
389
+ """Get filterable fields from an existing Redis index schema.
390
+
391
+ This function extracts filterable field definitions from a Redis index
392
+ and converts them into the filterable_fields format used by this library.
393
+
394
+ Args:
395
+ index_name (str): Name of the Redis index.
396
+ client (Any): Redis client instance (sync or async).
397
+ cached_fields (list[dict[str, Any]] | None, optional): Cached filterable fields
398
+ to return if available. Defaults to None.
399
+ excluded_fields (set[str] | None, optional): Set of field names to exclude
400
+ from the result. Defaults to None.
401
+
402
+ Returns:
403
+ list[dict[str, Any]]: List of filterable field configurations.
404
+ """
405
+ async def execute_update(client: Redis, index_name: str, chunks: list[Chunk], chunk_ids: list[str], vectors: list[Vector], filterable_fields: list[dict[str, Any]]) -> None:
406
+ """Execute update transaction using Redis pipeline.
407
+
408
+ Args:
409
+ client (Redis): Redis client instance.
410
+ index_name (str): Name of the Redis index.
411
+ chunks (list[Chunk]): List of updated chunks.
412
+ chunk_ids (list[str]): List of chunk IDs to update.
413
+ vectors (list[Vector]): List of vectors corresponding to chunks.
414
+ filterable_fields (list[dict[str, Any]]): List of filterable field configurations.
415
+ """
416
+ def parse_redisvl_result_to_chunks(results: list[dict[str, Any]], index_name: str) -> list[Chunk]:
417
+ """Parse RedisVL search results into Chunk objects.
418
+
419
+ RedisVL returns search results as a list of dictionaries. This function
420
+ converts them into Chunk objects with proper metadata parsing and ID stripping.
421
+
422
+ Args:
423
+ results (list[dict[str, Any]]): List of RedisVL search result dictionaries.
424
+ index_name (str): Index name for stripping prefixes from document IDs.
425
+
426
+ Returns:
427
+ list[Chunk]: List of parsed Chunk objects.
428
+ """
@@ -0,0 +1,37 @@
1
+ from _typeshed import Incomplete
2
+ from collections.abc import Callable
3
+ from gllm_datastore.constants import BOOL_FALSE_STR as BOOL_FALSE_STR, BOOL_TRUE_STR as BOOL_TRUE_STR, CHUNK_KEYS as CHUNK_KEYS, FIELD_CONFIG_NAME as FIELD_CONFIG_NAME, FIELD_CONFIG_TYPE as FIELD_CONFIG_TYPE, FieldType as FieldType, METADATA_SEPARATOR as METADATA_SEPARATOR
4
+ from gllm_datastore.core.filters import FilterClause as FilterClause, FilterCondition as FilterCondition, FilterOperator as FilterOperator, QueryFilter as QueryFilter
5
+ from typing import Any
6
+
7
+ REDIS_SPECIAL_CHARS: str
8
+ REDIS_SPECIAL_CHARS_PATTERN: Incomplete
9
+
10
+ class RedisQueryTranslator:
11
+ """Translates QueryFilter and FilterClause objects to Redis Search query syntax.
12
+
13
+ This class encapsulates all query translation logic.
14
+ """
15
+ def __init__(self, get_filterable_fields: Callable[[], list[dict[str, Any]]]) -> None:
16
+ """Initialize the Redis query translator.
17
+
18
+ Args:
19
+ get_filterable_fields (Callable[[], list[dict[str, Any]]]): Callable that returns
20
+ the current filterable_fields configuration. This allows lazy loading of
21
+ filterable_fields when they become available.
22
+ """
23
+ def translate(self, filters: QueryFilter | None) -> str | None:
24
+ """Translate a structured QueryFilter into a Redis Search query string.
25
+
26
+ This is the main entry point for query translation. It handles None filters
27
+ and delegates to internal translation methods.
28
+
29
+ Args:
30
+ filters (QueryFilter | None): Structured QueryFilter to translate. Defaults to None.
31
+
32
+ Returns:
33
+ str | None: A Redis Search query string or None if no filters are provided.
34
+
35
+ Raises:
36
+ ValueError: When the filter structure is invalid or operator is incompatible with field type.
37
+ """