gllm-datastore-binary 0.5.45__cp311-cp311-macosx_13_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gllm-datastore-binary might be problematic. Click here for more details.

Files changed (108) hide show
  1. gllm_datastore/__init__.pyi +0 -0
  2. gllm_datastore/cache/__init__.pyi +4 -0
  3. gllm_datastore/cache/base.pyi +84 -0
  4. gllm_datastore/cache/cache.pyi +137 -0
  5. gllm_datastore/cache/hybrid_cache/__init__.pyi +5 -0
  6. gllm_datastore/cache/hybrid_cache/file_system_hybrid_cache.pyi +50 -0
  7. gllm_datastore/cache/hybrid_cache/hybrid_cache.pyi +115 -0
  8. gllm_datastore/cache/hybrid_cache/in_memory_hybrid_cache.pyi +29 -0
  9. gllm_datastore/cache/hybrid_cache/key_matcher/__init__.pyi +5 -0
  10. gllm_datastore/cache/hybrid_cache/key_matcher/exact_key_matcher.pyi +44 -0
  11. gllm_datastore/cache/hybrid_cache/key_matcher/fuzzy_key_matcher.pyi +70 -0
  12. gllm_datastore/cache/hybrid_cache/key_matcher/key_matcher.pyi +60 -0
  13. gllm_datastore/cache/hybrid_cache/key_matcher/semantic_key_matcher.pyi +93 -0
  14. gllm_datastore/cache/hybrid_cache/redis_hybrid_cache.pyi +34 -0
  15. gllm_datastore/cache/hybrid_cache/utils.pyi +36 -0
  16. gllm_datastore/cache/utils.pyi +34 -0
  17. gllm_datastore/cache/vector_cache/__init__.pyi +0 -0
  18. gllm_datastore/cache/vector_cache/eviction_manager/__init__.pyi +0 -0
  19. gllm_datastore/cache/vector_cache/eviction_manager/asyncio_eviction_manager.pyi +48 -0
  20. gllm_datastore/cache/vector_cache/eviction_manager/eviction_manager.pyi +38 -0
  21. gllm_datastore/cache/vector_cache/eviction_strategy/__init__.pyi +0 -0
  22. gllm_datastore/cache/vector_cache/eviction_strategy/eviction_strategy.pyi +34 -0
  23. gllm_datastore/cache/vector_cache/eviction_strategy/ttl_eviction_strategy.pyi +34 -0
  24. gllm_datastore/cache/vector_cache/vector_cache.pyi +99 -0
  25. gllm_datastore/constants.pyi +66 -0
  26. gllm_datastore/core/__init__.pyi +7 -0
  27. gllm_datastore/core/capabilities/__init__.pyi +5 -0
  28. gllm_datastore/core/capabilities/fulltext_capability.pyi +73 -0
  29. gllm_datastore/core/capabilities/graph_capability.pyi +70 -0
  30. gllm_datastore/core/capabilities/vector_capability.pyi +90 -0
  31. gllm_datastore/core/filters/__init__.pyi +4 -0
  32. gllm_datastore/core/filters/filter.pyi +340 -0
  33. gllm_datastore/core/filters/schema.pyi +149 -0
  34. gllm_datastore/data_store/__init__.pyi +7 -0
  35. gllm_datastore/data_store/base.pyi +138 -0
  36. gllm_datastore/data_store/chroma/__init__.pyi +4 -0
  37. gllm_datastore/data_store/chroma/_chroma_import.pyi +13 -0
  38. gllm_datastore/data_store/chroma/data_store.pyi +202 -0
  39. gllm_datastore/data_store/chroma/fulltext.pyi +134 -0
  40. gllm_datastore/data_store/chroma/query.pyi +266 -0
  41. gllm_datastore/data_store/chroma/query_translator.pyi +41 -0
  42. gllm_datastore/data_store/chroma/vector.pyi +197 -0
  43. gllm_datastore/data_store/elasticsearch/__init__.pyi +5 -0
  44. gllm_datastore/data_store/elasticsearch/data_store.pyi +119 -0
  45. gllm_datastore/data_store/elasticsearch/fulltext.pyi +237 -0
  46. gllm_datastore/data_store/elasticsearch/query.pyi +114 -0
  47. gllm_datastore/data_store/elasticsearch/vector.pyi +179 -0
  48. gllm_datastore/data_store/exceptions.pyi +35 -0
  49. gllm_datastore/data_store/in_memory/__init__.pyi +5 -0
  50. gllm_datastore/data_store/in_memory/data_store.pyi +71 -0
  51. gllm_datastore/data_store/in_memory/fulltext.pyi +131 -0
  52. gllm_datastore/data_store/in_memory/query.pyi +175 -0
  53. gllm_datastore/data_store/in_memory/vector.pyi +174 -0
  54. gllm_datastore/data_store/redis/__init__.pyi +5 -0
  55. gllm_datastore/data_store/redis/data_store.pyi +154 -0
  56. gllm_datastore/data_store/redis/fulltext.pyi +128 -0
  57. gllm_datastore/data_store/redis/query.pyi +428 -0
  58. gllm_datastore/data_store/redis/query_translator.pyi +37 -0
  59. gllm_datastore/data_store/redis/vector.pyi +131 -0
  60. gllm_datastore/encryptor/__init__.pyi +4 -0
  61. gllm_datastore/encryptor/aes_gcm_encryptor.pyi +45 -0
  62. gllm_datastore/encryptor/encryptor.pyi +52 -0
  63. gllm_datastore/encryptor/key_ring/__init__.pyi +3 -0
  64. gllm_datastore/encryptor/key_ring/in_memory_key_ring.pyi +52 -0
  65. gllm_datastore/encryptor/key_ring/key_ring.pyi +45 -0
  66. gllm_datastore/encryptor/key_rotating_encryptor.pyi +60 -0
  67. gllm_datastore/graph_data_store/__init__.pyi +6 -0
  68. gllm_datastore/graph_data_store/graph_data_store.pyi +151 -0
  69. gllm_datastore/graph_data_store/graph_rag_data_store.pyi +29 -0
  70. gllm_datastore/graph_data_store/light_rag_data_store.pyi +93 -0
  71. gllm_datastore/graph_data_store/light_rag_postgres_data_store.pyi +96 -0
  72. gllm_datastore/graph_data_store/llama_index_graph_rag_data_store.pyi +49 -0
  73. gllm_datastore/graph_data_store/llama_index_neo4j_graph_rag_data_store.pyi +78 -0
  74. gllm_datastore/graph_data_store/nebula_graph_data_store.pyi +206 -0
  75. gllm_datastore/graph_data_store/neo4j_graph_data_store.pyi +182 -0
  76. gllm_datastore/graph_data_store/utils/__init__.pyi +6 -0
  77. gllm_datastore/graph_data_store/utils/constants.pyi +21 -0
  78. gllm_datastore/graph_data_store/utils/light_rag_em_invoker_adapter.pyi +56 -0
  79. gllm_datastore/graph_data_store/utils/light_rag_lm_invoker_adapter.pyi +43 -0
  80. gllm_datastore/graph_data_store/utils/llama_index_em_invoker_adapter.pyi +45 -0
  81. gllm_datastore/graph_data_store/utils/llama_index_lm_invoker_adapter.pyi +169 -0
  82. gllm_datastore/sql_data_store/__init__.pyi +4 -0
  83. gllm_datastore/sql_data_store/adapter/__init__.pyi +0 -0
  84. gllm_datastore/sql_data_store/adapter/sqlalchemy_adapter.pyi +38 -0
  85. gllm_datastore/sql_data_store/constants.pyi +6 -0
  86. gllm_datastore/sql_data_store/sql_data_store.pyi +86 -0
  87. gllm_datastore/sql_data_store/sqlalchemy_sql_data_store.pyi +216 -0
  88. gllm_datastore/sql_data_store/types.pyi +31 -0
  89. gllm_datastore/utils/__init__.pyi +6 -0
  90. gllm_datastore/utils/converter.pyi +51 -0
  91. gllm_datastore/utils/dict.pyi +21 -0
  92. gllm_datastore/utils/ttl.pyi +25 -0
  93. gllm_datastore/utils/types.pyi +32 -0
  94. gllm_datastore/vector_data_store/__init__.pyi +6 -0
  95. gllm_datastore/vector_data_store/chroma_vector_data_store.pyi +259 -0
  96. gllm_datastore/vector_data_store/elasticsearch_vector_data_store.pyi +357 -0
  97. gllm_datastore/vector_data_store/in_memory_vector_data_store.pyi +179 -0
  98. gllm_datastore/vector_data_store/mixin/__init__.pyi +0 -0
  99. gllm_datastore/vector_data_store/mixin/cache_compatible_mixin.pyi +145 -0
  100. gllm_datastore/vector_data_store/redis_vector_data_store.pyi +191 -0
  101. gllm_datastore/vector_data_store/vector_data_store.pyi +146 -0
  102. gllm_datastore.build/.gitignore +1 -0
  103. gllm_datastore.cpython-311-darwin.so +0 -0
  104. gllm_datastore.pyi +156 -0
  105. gllm_datastore_binary-0.5.45.dist-info/METADATA +178 -0
  106. gllm_datastore_binary-0.5.45.dist-info/RECORD +108 -0
  107. gllm_datastore_binary-0.5.45.dist-info/WHEEL +5 -0
  108. gllm_datastore_binary-0.5.45.dist-info/top_level.txt +1 -0
@@ -0,0 +1,70 @@
1
+ from typing import Any, Protocol
2
+
3
+ class GraphCapability(Protocol):
4
+ """Protocol for graph database operations.
5
+
6
+ This protocol defines the interface for datastores that support graph-based
7
+ data operations. This includes node and relationship management as well as graph queries.
8
+ """
9
+ async def upsert_node(self, label: str, identifier_key: str, identifier_value: str, properties: dict[str, Any] | None = None) -> Any:
10
+ """Create or update a node in the graph.
11
+
12
+ Args:
13
+ label (str): Node label/type.
14
+ identifier_key (str): Key field for node identification.
15
+ identifier_value (str): Value for node identification.
16
+ properties (dict[str, Any] | None, optional): Additional node properties.
17
+ Defaults to None.
18
+
19
+ Returns:
20
+ Any: Created/updated node information.
21
+ """
22
+ async def upsert_relationship(self, node_source_key: str, node_source_value: str, relation: str, node_target_key: str, node_target_value: str, properties: dict[str, Any] | None = None) -> Any:
23
+ """Create or update a relationship between nodes.
24
+
25
+ Args:
26
+ node_source_key (str): Source node identifier key.
27
+ node_source_value (str): Source node identifier value.
28
+ relation (str): Relationship type.
29
+ node_target_key (str): Target node identifier key.
30
+ node_target_value (str): Target node identifier value.
31
+ properties (dict[str, Any] | None, optional): Relationship properties.
32
+ Defaults to None.
33
+
34
+ Returns:
35
+ Any: Created/updated relationship information.
36
+ """
37
+ async def retrieve(self, query: str, parameters: dict[str, Any] | None = None) -> list[dict[str, Any]]:
38
+ """Retrieve data from the graph with specific query.
39
+
40
+ Args:
41
+ query (str): Query to retrieve data from the graph.
42
+ parameters (dict[str, Any] | None, optional): Query parameters. Defaults to None.
43
+
44
+ Returns:
45
+ list[dict[str, Any]]: Query results as list of dictionaries.
46
+ """
47
+ async def delete_node(self, label: str, identifier_key: str, identifier_value: str) -> Any:
48
+ """Delete a node and its relationships.
49
+
50
+ Args:
51
+ label (str): Node label/type.
52
+ identifier_key (str): Node identifier key.
53
+ identifier_value (str): Node identifier value.
54
+
55
+ Returns:
56
+ Any: Deletion result information.
57
+ """
58
+ async def delete_relationship(self, node_source_key: str, node_source_value: str, relation: str, node_target_key: str, node_target_value: str) -> Any:
59
+ """Delete a relationship between nodes.
60
+
61
+ Args:
62
+ node_source_key (str): Source node identifier key.
63
+ node_source_value (str): Source node identifier value.
64
+ relation (str): Relationship type.
65
+ node_target_key (str): Target node identifier key.
66
+ node_target_value (str): Target node identifier value.
67
+
68
+ Returns:
69
+ Any: Deletion result information.
70
+ """
@@ -0,0 +1,90 @@
1
+ from gllm_core.schema.chunk import Chunk
2
+ from gllm_datastore.core.filters import FilterClause as FilterClause, QueryFilter as QueryFilter, QueryOptions as QueryOptions
3
+ from gllm_inference.schema import Vector
4
+ from typing import Any, Protocol
5
+
6
+ class VectorCapability(Protocol):
7
+ """Protocol for vector similarity search operations.
8
+
9
+ This protocol defines the interface for datastores that support vector-based
10
+ retrieval operations. This includes similarity search, ID-based lookup as well as
11
+ vector storage.
12
+ """
13
+ async def create(self, data: Chunk | list[Chunk]) -> None:
14
+ """Add chunks to the vector store with automatic embedding generation.
15
+
16
+ Args:
17
+ data (Chunk | list[Chunk]): Single chunk or list of chunks to add.
18
+ """
19
+ async def create_from_vector(self, chunk_vectors: list[tuple[Chunk, Vector]], **kwargs: Any) -> None:
20
+ """Add pre-computed vectors directly.
21
+
22
+ Args:
23
+ chunk_vectors (list[tuple[Chunk, Vector]]): List of tuples containing chunks and their
24
+ corresponding vectors.
25
+ **kwargs: Datastore-specific parameters.
26
+ """
27
+ async def retrieve(self, query: str, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None, **kwargs: Any) -> list[Chunk]:
28
+ """Read records from the datastore using text-based similarity search with optional filtering.
29
+
30
+ Args:
31
+ query (str): Input text to embed and search with.
32
+ filters (FilterClause | QueryFilter | None, optional): Query filters to apply.
33
+ FilterClause objects are automatically converted to QueryFilter internally.
34
+ Defaults to None.
35
+ options (QueryOptions | None, optional): Query options like limit and sorting.
36
+ Defaults to None.
37
+ **kwargs: Datastore-specific parameters.
38
+
39
+ Returns:
40
+ list[Chunk]: Query results.
41
+ """
42
+ async def retrieve_by_vector(self, vector: Vector, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None, **kwargs: Any) -> list[Chunk]:
43
+ """Direct vector similarity search.
44
+
45
+ Args:
46
+ vector (Vector): Query embedding vector.
47
+ filters (FilterClause | QueryFilter | None, optional): Query filters to apply.
48
+ FilterClause objects are automatically converted to QueryFilter internally.
49
+ Defaults to None.
50
+ options (QueryOptions | None, optional): Query options like limit and sorting.
51
+ Defaults to None.
52
+ **kwargs: Datastore-specific parameters.
53
+
54
+ Returns:
55
+ list[Chunk]: List of chunks ordered by similarity score.
56
+ """
57
+ async def update(self, update_values: dict[str, Any], filters: FilterClause | QueryFilter | None = None, **kwargs: Any) -> None:
58
+ """Update existing records in the datastore.
59
+
60
+ Args:
61
+ update_values (dict[str, Any]): Values to update.
62
+ filters (FilterClause | QueryFilter | None, optional): Filters to select records to update.
63
+ FilterClause objects are automatically converted to QueryFilter internally.
64
+ Defaults to None.
65
+ **kwargs: Datastore-specific parameters.
66
+ """
67
+ async def delete(self, filters: FilterClause | QueryFilter | None = None, **kwargs: Any) -> None:
68
+ """Delete records from the datastore.
69
+
70
+ Args:
71
+ filters (FilterClause | QueryFilter | None, optional): Filters to select records to delete.
72
+ FilterClause objects are automatically converted to QueryFilter internally.
73
+ Defaults to None.
74
+ **kwargs: Datastore-specific parameters
75
+
76
+ Note:
77
+ If filters is None, no operation is performed (no-op).
78
+ """
79
+ async def clear(self) -> None:
80
+ """Clear all records from the datastore."""
81
+ async def ensure_index(self, **kwargs: Any) -> None:
82
+ """Ensure vector index exists, creating it if necessary.
83
+
84
+ This method ensures that the vector index required for similarity search
85
+ operations is created. If the index already exists, this method performs
86
+ no operation (idempotent).
87
+
88
+ Args:
89
+ **kwargs (Any): Datastore-specific parameters for index configuration.
90
+ """
@@ -0,0 +1,4 @@
1
+ from gllm_datastore.core.filters.filter import all_ as all_, and_ as and_, any_ as any_, array_contains as array_contains, eq as eq, gt as gt, gte as gte, in_ as in_, lt as lt, lte as lte, ne as ne, nin as nin, not_ as not_, or_ as or_, text_contains as text_contains
2
+ from gllm_datastore.core.filters.schema import FilterClause as FilterClause, FilterCondition as FilterCondition, FilterOperator as FilterOperator, QueryFilter as QueryFilter, QueryOptions as QueryOptions
3
+
4
+ __all__ = ['FilterCondition', 'FilterOperator', 'FilterClause', 'QueryFilter', 'QueryOptions', 'all_', 'and_', 'any_', 'array_contains', 'eq', 'gt', 'gte', 'in_', 'lt', 'lte', 'ne', 'nin', 'not_', 'or_', 'text_contains']
@@ -0,0 +1,340 @@
1
+ from gllm_datastore.core.filters.schema import FilterClause as FilterClause, FilterCondition as FilterCondition, FilterOperator as FilterOperator, QueryFilter as QueryFilter
2
+ from typing import Any
3
+
4
+ def eq(key: str, value: Any) -> FilterClause:
5
+ '''Create an equality filter.
6
+
7
+ This operator checks if the field value is exactly equal to the specified value.
8
+ Works with strings, numbers, booleans, and other scalar types.
9
+
10
+ Example:
11
+ Filter for documents where `metadata.status == active`.
12
+ ```python
13
+ from gllm_datastore.core.filters import eq
14
+
15
+ filter = eq("metadata.status", "active")
16
+ ```
17
+
18
+ Args:
19
+ key (str): Field path to filter on.
20
+ value (Any): Value to compare. Matches field values exactly equal to this value.
21
+
22
+ Returns:
23
+ FilterClause: Equality filter.
24
+ '''
25
+ def ne(key: str, value: Any) -> FilterClause:
26
+ '''Create a not-equal filter.
27
+
28
+ This operator checks if the field value is not equal to the specified value.
29
+ Works with strings, numbers, booleans, and other scalar types.
30
+
31
+ Example:
32
+ Filter for documents where `metadata.status != active`.
33
+ ```python
34
+ from gllm_datastore.core.filters import ne
35
+
36
+ filter = ne("metadata.status", "active")
37
+ ```
38
+
39
+ Args:
40
+ key (str): Field path to filter on.
41
+ value (Any): Value to exclude. Matches all values except this one.
42
+
43
+ Returns:
44
+ FilterClause: Not-equal filter.
45
+ '''
46
+ def gt(key: str, value: int | float) -> FilterClause:
47
+ '''Create a greater-than filter.
48
+
49
+ This operator checks if the field value is strictly greater than the specified value.
50
+ Only works with numeric fields (int or float).
51
+
52
+ Example:
53
+ Filter for documents where `metadata.price > 100`.
54
+ ```python
55
+ from gllm_datastore.core.filters import gt
56
+
57
+ filter = gt("metadata.price", 100)
58
+ ```
59
+
60
+ Args:
61
+ key (str): Field path to filter on (must be numeric).
62
+ value (int | float): Threshold value. Matches field values greater than this.
63
+
64
+ Returns:
65
+ FilterClause: Greater-than filter.
66
+ '''
67
+ def lt(key: str, value: int | float) -> FilterClause:
68
+ '''Create a less-than filter.
69
+
70
+ This operator checks if the field value is strictly less than the specified value.
71
+ Only works with numeric fields (int or float).
72
+
73
+ Example:
74
+ Filter for documents where `metadata.price < 100`.
75
+ ```python
76
+ from gllm_datastore.core.filters import lt
77
+
78
+ filter = lt("metadata.price", 100)
79
+ ```
80
+
81
+ Args:
82
+ key (str): Field path to filter on (must be numeric).
83
+ value (int | float): Threshold value. Matches field values less than this.
84
+
85
+ Returns:
86
+ FilterClause: Less-than filter.
87
+ '''
88
+ def gte(key: str, value: int | float) -> FilterClause:
89
+ '''Create a greater-than-or-equal filter.
90
+
91
+ This operator checks if the field value is greater than or equal to the specified value.
92
+ Only works with numeric fields (int or float).
93
+
94
+ Example:
95
+ Filter for documents where `metadata.price >= 100`.
96
+ ```python
97
+ from gllm_datastore.core.filters import gte
98
+
99
+ filter = gte("metadata.price", 100)
100
+ ```
101
+
102
+ Args:
103
+ key (str): Field path to filter on (must be numeric).
104
+ value (int | float): Threshold value. Matches field values greater than or equal to this.
105
+
106
+ Returns:
107
+ FilterClause: Greater-than-or-equal filter.
108
+ '''
109
+ def lte(key: str, value: int | float) -> FilterClause:
110
+ '''Create a less-than-or-equal filter.
111
+
112
+ This operator checks if the field value is less than or equal to the specified value.
113
+ Only works with numeric fields (int or float).
114
+
115
+ Example:
116
+ Filter for documents where `metadata.price <= 100`.
117
+ ```python
118
+ from gllm_datastore.core.filters import lte
119
+
120
+ filter = lte("metadata.price", 100)
121
+ ```
122
+
123
+ Args:
124
+ key (str): Field path to filter on (must be numeric).
125
+ value (int | float): Threshold value. Matches field values less than or equal to this.
126
+
127
+ Returns:
128
+ FilterClause: Less-than-or-equal filter.
129
+ '''
130
+ def in_(key: str, values: list) -> FilterClause:
131
+ '''Create an IN filter.
132
+
133
+ This operator checks if the field value is one of the values in the provided list.
134
+ Works with scalar fields (string, number, boolean). The field value must exactly
135
+ match one of the values in the list.
136
+
137
+ Example:
138
+ Filter for documents where `metadata.status in ["active", "pending"]`.
139
+ ```python
140
+ from gllm_datastore.core.filters import in_
141
+
142
+ filter = in_("metadata.status", ["active", "pending"])
143
+ ```
144
+
145
+ Args:
146
+ key (str): Field path to filter on (must be a scalar field).
147
+ values (list): List of possible values. Matches field values that match one of these exactly.
148
+
149
+ Returns:
150
+ FilterClause: IN filter.
151
+ '''
152
+ def nin(key: str, values: list) -> FilterClause:
153
+ '''Create a NOT IN filter.
154
+
155
+ This operator checks if the field value is not in the provided list.
156
+ Works with scalar fields (string, number, boolean). The field value must not
157
+ match any of the values in the list.
158
+
159
+ Example:
160
+ Filter for documents where `metadata.status not in ["deleted", "archived"]`.
161
+ ```python
162
+ from gllm_datastore.core.filters import nin
163
+
164
+ filter = nin("metadata.status", ["deleted", "archived"])
165
+ ```
166
+
167
+ Args:
168
+ key (str): Field path to filter on (must be a scalar field).
169
+ values (list): List of excluded values. Matches field values that do not match any of these.
170
+
171
+ Returns:
172
+ FilterClause: NOT IN filter.
173
+ '''
174
+ def array_contains(key: str, value: Any) -> FilterClause:
175
+ '''Create an ARRAY_CONTAINS filter (array field contains value).
176
+
177
+ This operator checks if an array field contains the specified value as an element.
178
+ The field must be an array/list, and the value must be present in that array.
179
+ Use this for checking array membership.
180
+
181
+ Example:
182
+ Filter for documents where the tags array contains "python".
183
+ This will match documents where "python" is an element in metadata.tags.
184
+ For example, if metadata.tags = ["python", "javascript"], this will match.
185
+ ```python
186
+ from gllm_datastore.core.filters import array_contains
187
+
188
+ filter = array_contains("metadata.tags", "python")
189
+ ```
190
+
191
+ Args:
192
+ key (str): Field path to filter on (must be an array field).
193
+ value (Any): Value to check if it exists as an element in the array.
194
+
195
+ Returns:
196
+ FilterClause: ARRAY_CONTAINS filter.
197
+ '''
198
+ def text_contains(key: str, value: str) -> FilterClause:
199
+ '''Create a TEXT_CONTAINS filter (text field contains substring).
200
+
201
+ This operator checks if a text/string field contains the specified substring.
202
+ The field must be a string, and the value must appear as a substring within that string.
203
+ Use this for substring matching in text content.
204
+
205
+ Example:
206
+ Filter for documents where the content field contains "machine learning".
207
+ This will match documents where "machine learning" appears anywhere in the content.
208
+ For example, if content = "This is about machine learning algorithms", this will match.
209
+ ```python
210
+ from gllm_datastore.core.filters import text_contains
211
+
212
+ filter = text_contains("content", "machine learning")
213
+ ```
214
+
215
+ Args:
216
+ key (str): Field path to filter on (must be a string/text field).
217
+ value (str): Substring to search for in the text.
218
+
219
+ Returns:
220
+ FilterClause: TEXT_CONTAINS filter.
221
+ '''
222
+ def any_(key: str, values: list) -> FilterClause:
223
+ '''Create an ANY filter (array field contains any of the values).
224
+
225
+ This operator checks if an array field contains at least one of the values in the provided list.
226
+ The field must be an array/list, and at least one element from the values list must be
227
+ present in the array. This is similar to checking if the arrays have any intersection.
228
+
229
+ Example:
230
+ Filter for documents where the tags array contains at least one of "python" or "javascript".
231
+ This will match if metadata.tags contains "python", "javascript", or both.
232
+ For example, if metadata.tags = ["python", "rust"], this will match (because of "python").
233
+ ```python
234
+ from gllm_datastore.core.filters import any_
235
+
236
+ filter = any_("metadata.tags", ["python", "javascript"])
237
+ ```
238
+
239
+ Args:
240
+ key (str): Field path to filter on (must be an array field).
241
+ values (list): List of values. At least one must be present in the array.
242
+
243
+ Returns:
244
+ FilterClause: ANY filter.
245
+ '''
246
+ def all_(key: str, values: list) -> FilterClause:
247
+ '''Create an ALL filter (array field contains all of the values).
248
+
249
+ This operator checks if an array field contains all of the values in the provided list.
250
+ The field must be an array/list, and every value in the values list must be present
251
+ as an element in the array. The array may contain additional elements.
252
+
253
+ Example:
254
+ Filter for documents where the tags array contains both "python" and "javascript".
255
+ This will match only if metadata.tags contains both values.
256
+ For example, if metadata.tags = ["python", "javascript", "rust"], this will match.
257
+ If metadata.tags = ["python", "rust"], this will not match (missing "javascript").
258
+ ```python
259
+ from gllm_datastore.core.filters import all_
260
+
261
+ filter = all_("metadata.tags", ["python", "javascript"])
262
+ ```
263
+
264
+ Args:
265
+ key (str): Field path to filter on (must be an array field).
266
+ values (list): List of values. All must be present in the array.
267
+
268
+ Returns:
269
+ FilterClause: ALL filter.
270
+ '''
271
+ def and_(*filters: FilterClause | QueryFilter) -> QueryFilter:
272
+ '''Combine filters with AND condition.
273
+
274
+ This logical operator combines multiple filters such that all conditions must be satisfied.
275
+ A document matches only if it satisfies every filter in the list.
276
+
277
+ Example:
278
+ Filter for documents where status is "active" AND age is at least 18.
279
+ This will match documents that satisfy both conditions simultaneously.
280
+ ```python
281
+ from gllm_datastore.core.filters import and_, eq, gte
282
+
283
+ filter = and_(eq("metadata.status", "active"), gte("metadata.age", 18))
284
+ ```
285
+
286
+ Args:
287
+ *filters (FilterClause | QueryFilter): Variable number of filters to combine.
288
+ All filters must match for a document to be included.
289
+
290
+ Returns:
291
+ QueryFilter: Combined filter with AND condition.
292
+ '''
293
+ def or_(*filters: FilterClause | QueryFilter) -> QueryFilter:
294
+ '''Combine filters with OR condition.
295
+
296
+ This logical operator combines multiple filters such that at least one condition must be satisfied.
297
+ A document matches if it satisfies any of the filters in the list.
298
+
299
+ Example:
300
+ Filter for documents where status is "active" OR status is "pending".
301
+ This will match documents that satisfy either condition (or both).
302
+ ```python
303
+ from gllm_datastore.core.filters import or_, eq
304
+
305
+ filter = or_(eq("metadata.status", "active"), eq("metadata.status", "pending"))
306
+ ```
307
+
308
+ Args:
309
+ *filters (FilterClause | QueryFilter): Variable number of filters to combine.
310
+ At least one filter must match for a document to be included.
311
+
312
+ Returns:
313
+ QueryFilter: Combined filter with OR condition.
314
+ '''
315
+ def not_(filter: FilterClause | QueryFilter) -> QueryFilter:
316
+ '''Negate a filter.
317
+
318
+ This logical operator inverts the result of a filter. A document matches if it does
319
+ not satisfy the specified filter condition. Useful for exclusion criteria.
320
+
321
+ This operator only supports NOT with a single filter. Multiple filters in NOT condition are not supported.
322
+
323
+ Example:
324
+ Filter for documents where status is NOT "deleted".
325
+ This will match all documents except those with status == "deleted".
326
+ Can also be used with other operators, e.g., not_(text_contains("content", "spam"))
327
+ to exclude documents containing a specific substring.
328
+ ```python
329
+ from gllm_datastore.core.filters import not_, eq
330
+
331
+ filter = not_(eq("metadata.status", "deleted"))
332
+ ```
333
+
334
+ Args:
335
+ filter (FilterClause | QueryFilter): Filter to negate. Documents matching this
336
+ filter will be excluded from results.
337
+
338
+ Returns:
339
+ QueryFilter: Negated filter.
340
+ '''
@@ -0,0 +1,149 @@
1
+ from enum import StrEnum
2
+ from pydantic import BaseModel
3
+ from typing import Any, Sequence
4
+
5
+ class FilterOperator(StrEnum):
6
+ """Operators for comparing field values."""
7
+ EQ: str
8
+ NE: str
9
+ GT: str
10
+ LT: str
11
+ GTE: str
12
+ LTE: str
13
+ IN: str
14
+ NIN: str
15
+ ANY: str
16
+ ALL: str
17
+ ARRAY_CONTAINS: str
18
+ TEXT_CONTAINS: str
19
+
20
+ class FilterCondition(StrEnum):
21
+ """Logical conditions for combining filters."""
22
+ AND: str
23
+ OR: str
24
+ NOT: str
25
+
26
+ class FilterClause(BaseModel):
27
+ '''Single filter criterion with operator support.
28
+
29
+ Examples:
30
+ ```python
31
+ FilterClause(key="metadata.age", value=25, operator=FilterOperator.GT)
32
+ FilterClause(key="metadata.status", value=["active", "pending"], operator=FilterOperator.IN)
33
+ ```
34
+
35
+ Attributes:
36
+ key (str): The field path to filter on (supports dot notation for nested fields).
37
+ value (int | float | str | bool | list[str] | list[float] | list[int] | list[bool] | None):
38
+ The value to compare against.
39
+ operator (FilterOperator): The comparison operator.
40
+ '''
41
+ key: str
42
+ value: bool | int | float | str | list[str] | list[float] | list[int] | list[bool] | None
43
+ operator: FilterOperator
44
+ def to_query_filter(self) -> QueryFilter:
45
+ '''Convert FilterClause to QueryFilter.
46
+
47
+ This method enables automatic conversion of FilterClause to QueryFilter.
48
+
49
+ Example:
50
+ ```python
51
+ clause = FilterClause(key="metadata.status", value="active", operator=FilterOperator.EQ)
52
+ query_filter = clause.to_query_filter()
53
+ # Results in: QueryFilter(filters=[clause], condition=FilterCondition.AND)
54
+ ```
55
+
56
+ Returns:
57
+ QueryFilter: A QueryFilter wrapping this FilterClause with AND condition.
58
+ '''
59
+
60
+ class QueryFilter(BaseModel):
61
+ '''Composite filter supporting multiple conditions and logical operators.
62
+
63
+ Attributes:
64
+ filters (list[FilterClause | QueryFilter]): List of filters to combine.
65
+ Can include nested QueryFilter for complex logic.
66
+ condition (FilterCondition): Logical operator to combine filters. Defaults to AND.
67
+
68
+ Examples:
69
+ 1. Simple AND: age > 25 AND status == "active"
70
+ ```python
71
+ QueryFilter(
72
+ filters=[
73
+ FilterClause(key="metadata.age", value=25, operator=FilterOperator.GT),
74
+ FilterClause(key="metadata.status", value="active", operator=FilterOperator.EQ)
75
+ ],
76
+ condition=FilterCondition.AND
77
+ )
78
+ ```
79
+
80
+ 2. Complex OR: (status == "active" OR status == "pending") AND age >= 18
81
+ ```python
82
+ QueryFilter(
83
+ filters=[
84
+ QueryFilter(
85
+ filters=[
86
+ FilterClause(key="metadata.status", value="active"),
87
+ FilterClause(key="metadata.status", value="pending")
88
+ ],
89
+ condition=FilterCondition.OR
90
+ ),
91
+ FilterClause(key="metadata.age", value=18, operator=FilterOperator.GTE)
92
+ ],
93
+ condition=FilterCondition.AND
94
+ )
95
+ ```
96
+
97
+ 3. NOT: NOT (status == "deleted")
98
+ ```python
99
+ QueryFilter(
100
+ filters=[
101
+ FilterClause(key="metadata.status", value="deleted")
102
+ ],
103
+ condition=FilterCondition.NOT
104
+ )
105
+ ```
106
+ '''
107
+ filters: list[FilterClause | QueryFilter]
108
+ condition: FilterCondition
109
+ @classmethod
110
+ def from_dicts(cls, filter_dicts: list[dict[str, Any]], condition: FilterCondition = ...) -> QueryFilter:
111
+ '''Create QueryFilter from list of filter dictionaries.
112
+
113
+ Example:
114
+ ```python
115
+ QueryFilter.from_dicts(
116
+ [
117
+ {"key": "metadata.age", "value": 25, "operator": ">"},
118
+ {"key": "metadata.status", "value": "active"}
119
+ ],
120
+ condition=FilterCondition.AND
121
+ )
122
+ ```
123
+
124
+ Args:
125
+ filter_dicts (list[dict[str, Any]]): List of filter dictionaries. Contains the key, value, and operator.
126
+ condition (FilterCondition, optional): Logical operator to combine filters. Defaults to AND.
127
+
128
+ Returns:
129
+ QueryFilter: Composite filter instance.
130
+ '''
131
+
132
+ class QueryOptions(BaseModel):
133
+ '''Model for query options.
134
+
135
+ Attributes:
136
+ include_fields (Sequence[str] | None): The fields to include in the query result. Defaults to None.
137
+ order_by (str | None): The column to order the query result by. Defaults to None.
138
+ order_desc (bool): Whether to order the query result in descending order. Defaults to False.
139
+ limit (int | None): The maximum number of rows to return. Must be >= 0. Defaults to None.
140
+
141
+ Example:
142
+ ```python
143
+ QueryOptions(include_fields=["field1", "field2"], order_by="column1", order_desc=True, limit=10)
144
+ ```
145
+ '''
146
+ include_fields: Sequence[str] | None
147
+ order_by: str | None
148
+ order_desc: bool
149
+ limit: int | None
@@ -0,0 +1,7 @@
1
+ from gllm_datastore.data_store.chroma import ChromaDataStore as ChromaDataStore
2
+ from gllm_datastore.data_store.elasticsearch import ElasticsearchDataStore as ElasticsearchDataStore
3
+ from gllm_datastore.data_store.exceptions import NotRegisteredException as NotRegisteredException, NotSupportedException as NotSupportedException
4
+ from gllm_datastore.data_store.in_memory import InMemoryDataStore as InMemoryDataStore
5
+ from gllm_datastore.data_store.redis import RedisDataStore as RedisDataStore
6
+
7
+ __all__ = ['ChromaDataStore', 'ElasticsearchDataStore', 'InMemoryDataStore', 'NotRegisteredException', 'NotSupportedException', 'RedisDataStore']