gllm-datastore-binary 0.5.45__cp311-cp311-macosx_13_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gllm-datastore-binary might be problematic. Click here for more details.
- gllm_datastore/__init__.pyi +0 -0
- gllm_datastore/cache/__init__.pyi +4 -0
- gllm_datastore/cache/base.pyi +84 -0
- gllm_datastore/cache/cache.pyi +137 -0
- gllm_datastore/cache/hybrid_cache/__init__.pyi +5 -0
- gllm_datastore/cache/hybrid_cache/file_system_hybrid_cache.pyi +50 -0
- gllm_datastore/cache/hybrid_cache/hybrid_cache.pyi +115 -0
- gllm_datastore/cache/hybrid_cache/in_memory_hybrid_cache.pyi +29 -0
- gllm_datastore/cache/hybrid_cache/key_matcher/__init__.pyi +5 -0
- gllm_datastore/cache/hybrid_cache/key_matcher/exact_key_matcher.pyi +44 -0
- gllm_datastore/cache/hybrid_cache/key_matcher/fuzzy_key_matcher.pyi +70 -0
- gllm_datastore/cache/hybrid_cache/key_matcher/key_matcher.pyi +60 -0
- gllm_datastore/cache/hybrid_cache/key_matcher/semantic_key_matcher.pyi +93 -0
- gllm_datastore/cache/hybrid_cache/redis_hybrid_cache.pyi +34 -0
- gllm_datastore/cache/hybrid_cache/utils.pyi +36 -0
- gllm_datastore/cache/utils.pyi +34 -0
- gllm_datastore/cache/vector_cache/__init__.pyi +0 -0
- gllm_datastore/cache/vector_cache/eviction_manager/__init__.pyi +0 -0
- gllm_datastore/cache/vector_cache/eviction_manager/asyncio_eviction_manager.pyi +48 -0
- gllm_datastore/cache/vector_cache/eviction_manager/eviction_manager.pyi +38 -0
- gllm_datastore/cache/vector_cache/eviction_strategy/__init__.pyi +0 -0
- gllm_datastore/cache/vector_cache/eviction_strategy/eviction_strategy.pyi +34 -0
- gllm_datastore/cache/vector_cache/eviction_strategy/ttl_eviction_strategy.pyi +34 -0
- gllm_datastore/cache/vector_cache/vector_cache.pyi +99 -0
- gllm_datastore/constants.pyi +66 -0
- gllm_datastore/core/__init__.pyi +7 -0
- gllm_datastore/core/capabilities/__init__.pyi +5 -0
- gllm_datastore/core/capabilities/fulltext_capability.pyi +73 -0
- gllm_datastore/core/capabilities/graph_capability.pyi +70 -0
- gllm_datastore/core/capabilities/vector_capability.pyi +90 -0
- gllm_datastore/core/filters/__init__.pyi +4 -0
- gllm_datastore/core/filters/filter.pyi +340 -0
- gllm_datastore/core/filters/schema.pyi +149 -0
- gllm_datastore/data_store/__init__.pyi +7 -0
- gllm_datastore/data_store/base.pyi +138 -0
- gllm_datastore/data_store/chroma/__init__.pyi +4 -0
- gllm_datastore/data_store/chroma/_chroma_import.pyi +13 -0
- gllm_datastore/data_store/chroma/data_store.pyi +202 -0
- gllm_datastore/data_store/chroma/fulltext.pyi +134 -0
- gllm_datastore/data_store/chroma/query.pyi +266 -0
- gllm_datastore/data_store/chroma/query_translator.pyi +41 -0
- gllm_datastore/data_store/chroma/vector.pyi +197 -0
- gllm_datastore/data_store/elasticsearch/__init__.pyi +5 -0
- gllm_datastore/data_store/elasticsearch/data_store.pyi +119 -0
- gllm_datastore/data_store/elasticsearch/fulltext.pyi +237 -0
- gllm_datastore/data_store/elasticsearch/query.pyi +114 -0
- gllm_datastore/data_store/elasticsearch/vector.pyi +179 -0
- gllm_datastore/data_store/exceptions.pyi +35 -0
- gllm_datastore/data_store/in_memory/__init__.pyi +5 -0
- gllm_datastore/data_store/in_memory/data_store.pyi +71 -0
- gllm_datastore/data_store/in_memory/fulltext.pyi +131 -0
- gllm_datastore/data_store/in_memory/query.pyi +175 -0
- gllm_datastore/data_store/in_memory/vector.pyi +174 -0
- gllm_datastore/data_store/redis/__init__.pyi +5 -0
- gllm_datastore/data_store/redis/data_store.pyi +154 -0
- gllm_datastore/data_store/redis/fulltext.pyi +128 -0
- gllm_datastore/data_store/redis/query.pyi +428 -0
- gllm_datastore/data_store/redis/query_translator.pyi +37 -0
- gllm_datastore/data_store/redis/vector.pyi +131 -0
- gllm_datastore/encryptor/__init__.pyi +4 -0
- gllm_datastore/encryptor/aes_gcm_encryptor.pyi +45 -0
- gllm_datastore/encryptor/encryptor.pyi +52 -0
- gllm_datastore/encryptor/key_ring/__init__.pyi +3 -0
- gllm_datastore/encryptor/key_ring/in_memory_key_ring.pyi +52 -0
- gllm_datastore/encryptor/key_ring/key_ring.pyi +45 -0
- gllm_datastore/encryptor/key_rotating_encryptor.pyi +60 -0
- gllm_datastore/graph_data_store/__init__.pyi +6 -0
- gllm_datastore/graph_data_store/graph_data_store.pyi +151 -0
- gllm_datastore/graph_data_store/graph_rag_data_store.pyi +29 -0
- gllm_datastore/graph_data_store/light_rag_data_store.pyi +93 -0
- gllm_datastore/graph_data_store/light_rag_postgres_data_store.pyi +96 -0
- gllm_datastore/graph_data_store/llama_index_graph_rag_data_store.pyi +49 -0
- gllm_datastore/graph_data_store/llama_index_neo4j_graph_rag_data_store.pyi +78 -0
- gllm_datastore/graph_data_store/nebula_graph_data_store.pyi +206 -0
- gllm_datastore/graph_data_store/neo4j_graph_data_store.pyi +182 -0
- gllm_datastore/graph_data_store/utils/__init__.pyi +6 -0
- gllm_datastore/graph_data_store/utils/constants.pyi +21 -0
- gllm_datastore/graph_data_store/utils/light_rag_em_invoker_adapter.pyi +56 -0
- gllm_datastore/graph_data_store/utils/light_rag_lm_invoker_adapter.pyi +43 -0
- gllm_datastore/graph_data_store/utils/llama_index_em_invoker_adapter.pyi +45 -0
- gllm_datastore/graph_data_store/utils/llama_index_lm_invoker_adapter.pyi +169 -0
- gllm_datastore/sql_data_store/__init__.pyi +4 -0
- gllm_datastore/sql_data_store/adapter/__init__.pyi +0 -0
- gllm_datastore/sql_data_store/adapter/sqlalchemy_adapter.pyi +38 -0
- gllm_datastore/sql_data_store/constants.pyi +6 -0
- gllm_datastore/sql_data_store/sql_data_store.pyi +86 -0
- gllm_datastore/sql_data_store/sqlalchemy_sql_data_store.pyi +216 -0
- gllm_datastore/sql_data_store/types.pyi +31 -0
- gllm_datastore/utils/__init__.pyi +6 -0
- gllm_datastore/utils/converter.pyi +51 -0
- gllm_datastore/utils/dict.pyi +21 -0
- gllm_datastore/utils/ttl.pyi +25 -0
- gllm_datastore/utils/types.pyi +32 -0
- gllm_datastore/vector_data_store/__init__.pyi +6 -0
- gllm_datastore/vector_data_store/chroma_vector_data_store.pyi +259 -0
- gllm_datastore/vector_data_store/elasticsearch_vector_data_store.pyi +357 -0
- gllm_datastore/vector_data_store/in_memory_vector_data_store.pyi +179 -0
- gllm_datastore/vector_data_store/mixin/__init__.pyi +0 -0
- gllm_datastore/vector_data_store/mixin/cache_compatible_mixin.pyi +145 -0
- gllm_datastore/vector_data_store/redis_vector_data_store.pyi +191 -0
- gllm_datastore/vector_data_store/vector_data_store.pyi +146 -0
- gllm_datastore.build/.gitignore +1 -0
- gllm_datastore.cpython-311-darwin.so +0 -0
- gllm_datastore.pyi +156 -0
- gllm_datastore_binary-0.5.45.dist-info/METADATA +178 -0
- gllm_datastore_binary-0.5.45.dist-info/RECORD +108 -0
- gllm_datastore_binary-0.5.45.dist-info/WHEEL +5 -0
- gllm_datastore_binary-0.5.45.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
from typing import Any, Protocol
|
|
2
|
+
|
|
3
|
+
class GraphCapability(Protocol):
|
|
4
|
+
"""Protocol for graph database operations.
|
|
5
|
+
|
|
6
|
+
This protocol defines the interface for datastores that support graph-based
|
|
7
|
+
data operations. This includes node and relationship management as well as graph queries.
|
|
8
|
+
"""
|
|
9
|
+
async def upsert_node(self, label: str, identifier_key: str, identifier_value: str, properties: dict[str, Any] | None = None) -> Any:
|
|
10
|
+
"""Create or update a node in the graph.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
label (str): Node label/type.
|
|
14
|
+
identifier_key (str): Key field for node identification.
|
|
15
|
+
identifier_value (str): Value for node identification.
|
|
16
|
+
properties (dict[str, Any] | None, optional): Additional node properties.
|
|
17
|
+
Defaults to None.
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
Any: Created/updated node information.
|
|
21
|
+
"""
|
|
22
|
+
async def upsert_relationship(self, node_source_key: str, node_source_value: str, relation: str, node_target_key: str, node_target_value: str, properties: dict[str, Any] | None = None) -> Any:
|
|
23
|
+
"""Create or update a relationship between nodes.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
node_source_key (str): Source node identifier key.
|
|
27
|
+
node_source_value (str): Source node identifier value.
|
|
28
|
+
relation (str): Relationship type.
|
|
29
|
+
node_target_key (str): Target node identifier key.
|
|
30
|
+
node_target_value (str): Target node identifier value.
|
|
31
|
+
properties (dict[str, Any] | None, optional): Relationship properties.
|
|
32
|
+
Defaults to None.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
Any: Created/updated relationship information.
|
|
36
|
+
"""
|
|
37
|
+
async def retrieve(self, query: str, parameters: dict[str, Any] | None = None) -> list[dict[str, Any]]:
|
|
38
|
+
"""Retrieve data from the graph with specific query.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
query (str): Query to retrieve data from the graph.
|
|
42
|
+
parameters (dict[str, Any] | None, optional): Query parameters. Defaults to None.
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
list[dict[str, Any]]: Query results as list of dictionaries.
|
|
46
|
+
"""
|
|
47
|
+
async def delete_node(self, label: str, identifier_key: str, identifier_value: str) -> Any:
|
|
48
|
+
"""Delete a node and its relationships.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
label (str): Node label/type.
|
|
52
|
+
identifier_key (str): Node identifier key.
|
|
53
|
+
identifier_value (str): Node identifier value.
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
Any: Deletion result information.
|
|
57
|
+
"""
|
|
58
|
+
async def delete_relationship(self, node_source_key: str, node_source_value: str, relation: str, node_target_key: str, node_target_value: str) -> Any:
|
|
59
|
+
"""Delete a relationship between nodes.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
node_source_key (str): Source node identifier key.
|
|
63
|
+
node_source_value (str): Source node identifier value.
|
|
64
|
+
relation (str): Relationship type.
|
|
65
|
+
node_target_key (str): Target node identifier key.
|
|
66
|
+
node_target_value (str): Target node identifier value.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
Any: Deletion result information.
|
|
70
|
+
"""
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
from gllm_core.schema.chunk import Chunk
|
|
2
|
+
from gllm_datastore.core.filters import FilterClause as FilterClause, QueryFilter as QueryFilter, QueryOptions as QueryOptions
|
|
3
|
+
from gllm_inference.schema import Vector
|
|
4
|
+
from typing import Any, Protocol
|
|
5
|
+
|
|
6
|
+
class VectorCapability(Protocol):
|
|
7
|
+
"""Protocol for vector similarity search operations.
|
|
8
|
+
|
|
9
|
+
This protocol defines the interface for datastores that support vector-based
|
|
10
|
+
retrieval operations. This includes similarity search, ID-based lookup as well as
|
|
11
|
+
vector storage.
|
|
12
|
+
"""
|
|
13
|
+
async def create(self, data: Chunk | list[Chunk]) -> None:
|
|
14
|
+
"""Add chunks to the vector store with automatic embedding generation.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
data (Chunk | list[Chunk]): Single chunk or list of chunks to add.
|
|
18
|
+
"""
|
|
19
|
+
async def create_from_vector(self, chunk_vectors: list[tuple[Chunk, Vector]], **kwargs: Any) -> None:
|
|
20
|
+
"""Add pre-computed vectors directly.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
chunk_vectors (list[tuple[Chunk, Vector]]): List of tuples containing chunks and their
|
|
24
|
+
corresponding vectors.
|
|
25
|
+
**kwargs: Datastore-specific parameters.
|
|
26
|
+
"""
|
|
27
|
+
async def retrieve(self, query: str, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None, **kwargs: Any) -> list[Chunk]:
|
|
28
|
+
"""Read records from the datastore using text-based similarity search with optional filtering.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
query (str): Input text to embed and search with.
|
|
32
|
+
filters (FilterClause | QueryFilter | None, optional): Query filters to apply.
|
|
33
|
+
FilterClause objects are automatically converted to QueryFilter internally.
|
|
34
|
+
Defaults to None.
|
|
35
|
+
options (QueryOptions | None, optional): Query options like limit and sorting.
|
|
36
|
+
Defaults to None.
|
|
37
|
+
**kwargs: Datastore-specific parameters.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
list[Chunk]: Query results.
|
|
41
|
+
"""
|
|
42
|
+
async def retrieve_by_vector(self, vector: Vector, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None, **kwargs: Any) -> list[Chunk]:
|
|
43
|
+
"""Direct vector similarity search.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
vector (Vector): Query embedding vector.
|
|
47
|
+
filters (FilterClause | QueryFilter | None, optional): Query filters to apply.
|
|
48
|
+
FilterClause objects are automatically converted to QueryFilter internally.
|
|
49
|
+
Defaults to None.
|
|
50
|
+
options (QueryOptions | None, optional): Query options like limit and sorting.
|
|
51
|
+
Defaults to None.
|
|
52
|
+
**kwargs: Datastore-specific parameters.
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
list[Chunk]: List of chunks ordered by similarity score.
|
|
56
|
+
"""
|
|
57
|
+
async def update(self, update_values: dict[str, Any], filters: FilterClause | QueryFilter | None = None, **kwargs: Any) -> None:
|
|
58
|
+
"""Update existing records in the datastore.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
update_values (dict[str, Any]): Values to update.
|
|
62
|
+
filters (FilterClause | QueryFilter | None, optional): Filters to select records to update.
|
|
63
|
+
FilterClause objects are automatically converted to QueryFilter internally.
|
|
64
|
+
Defaults to None.
|
|
65
|
+
**kwargs: Datastore-specific parameters.
|
|
66
|
+
"""
|
|
67
|
+
async def delete(self, filters: FilterClause | QueryFilter | None = None, **kwargs: Any) -> None:
|
|
68
|
+
"""Delete records from the datastore.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
filters (FilterClause | QueryFilter | None, optional): Filters to select records to delete.
|
|
72
|
+
FilterClause objects are automatically converted to QueryFilter internally.
|
|
73
|
+
Defaults to None.
|
|
74
|
+
**kwargs: Datastore-specific parameters
|
|
75
|
+
|
|
76
|
+
Note:
|
|
77
|
+
If filters is None, no operation is performed (no-op).
|
|
78
|
+
"""
|
|
79
|
+
async def clear(self) -> None:
|
|
80
|
+
"""Clear all records from the datastore."""
|
|
81
|
+
async def ensure_index(self, **kwargs: Any) -> None:
|
|
82
|
+
"""Ensure vector index exists, creating it if necessary.
|
|
83
|
+
|
|
84
|
+
This method ensures that the vector index required for similarity search
|
|
85
|
+
operations is created. If the index already exists, this method performs
|
|
86
|
+
no operation (idempotent).
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
**kwargs (Any): Datastore-specific parameters for index configuration.
|
|
90
|
+
"""
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
from gllm_datastore.core.filters.filter import all_ as all_, and_ as and_, any_ as any_, array_contains as array_contains, eq as eq, gt as gt, gte as gte, in_ as in_, lt as lt, lte as lte, ne as ne, nin as nin, not_ as not_, or_ as or_, text_contains as text_contains
|
|
2
|
+
from gllm_datastore.core.filters.schema import FilterClause as FilterClause, FilterCondition as FilterCondition, FilterOperator as FilterOperator, QueryFilter as QueryFilter, QueryOptions as QueryOptions
|
|
3
|
+
|
|
4
|
+
__all__ = ['FilterCondition', 'FilterOperator', 'FilterClause', 'QueryFilter', 'QueryOptions', 'all_', 'and_', 'any_', 'array_contains', 'eq', 'gt', 'gte', 'in_', 'lt', 'lte', 'ne', 'nin', 'not_', 'or_', 'text_contains']
|
|
@@ -0,0 +1,340 @@
|
|
|
1
|
+
from gllm_datastore.core.filters.schema import FilterClause as FilterClause, FilterCondition as FilterCondition, FilterOperator as FilterOperator, QueryFilter as QueryFilter
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
def eq(key: str, value: Any) -> FilterClause:
|
|
5
|
+
'''Create an equality filter.
|
|
6
|
+
|
|
7
|
+
This operator checks if the field value is exactly equal to the specified value.
|
|
8
|
+
Works with strings, numbers, booleans, and other scalar types.
|
|
9
|
+
|
|
10
|
+
Example:
|
|
11
|
+
Filter for documents where `metadata.status == active`.
|
|
12
|
+
```python
|
|
13
|
+
from gllm_datastore.core.filters import eq
|
|
14
|
+
|
|
15
|
+
filter = eq("metadata.status", "active")
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
key (str): Field path to filter on.
|
|
20
|
+
value (Any): Value to compare. Matches field values exactly equal to this value.
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
FilterClause: Equality filter.
|
|
24
|
+
'''
|
|
25
|
+
def ne(key: str, value: Any) -> FilterClause:
|
|
26
|
+
'''Create a not-equal filter.
|
|
27
|
+
|
|
28
|
+
This operator checks if the field value is not equal to the specified value.
|
|
29
|
+
Works with strings, numbers, booleans, and other scalar types.
|
|
30
|
+
|
|
31
|
+
Example:
|
|
32
|
+
Filter for documents where `metadata.status != active`.
|
|
33
|
+
```python
|
|
34
|
+
from gllm_datastore.core.filters import ne
|
|
35
|
+
|
|
36
|
+
filter = ne("metadata.status", "active")
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
key (str): Field path to filter on.
|
|
41
|
+
value (Any): Value to exclude. Matches all values except this one.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
FilterClause: Not-equal filter.
|
|
45
|
+
'''
|
|
46
|
+
def gt(key: str, value: int | float) -> FilterClause:
|
|
47
|
+
'''Create a greater-than filter.
|
|
48
|
+
|
|
49
|
+
This operator checks if the field value is strictly greater than the specified value.
|
|
50
|
+
Only works with numeric fields (int or float).
|
|
51
|
+
|
|
52
|
+
Example:
|
|
53
|
+
Filter for documents where `metadata.price > 100`.
|
|
54
|
+
```python
|
|
55
|
+
from gllm_datastore.core.filters import gt
|
|
56
|
+
|
|
57
|
+
filter = gt("metadata.price", 100)
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
key (str): Field path to filter on (must be numeric).
|
|
62
|
+
value (int | float): Threshold value. Matches field values greater than this.
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
FilterClause: Greater-than filter.
|
|
66
|
+
'''
|
|
67
|
+
def lt(key: str, value: int | float) -> FilterClause:
|
|
68
|
+
'''Create a less-than filter.
|
|
69
|
+
|
|
70
|
+
This operator checks if the field value is strictly less than the specified value.
|
|
71
|
+
Only works with numeric fields (int or float).
|
|
72
|
+
|
|
73
|
+
Example:
|
|
74
|
+
Filter for documents where `metadata.price < 100`.
|
|
75
|
+
```python
|
|
76
|
+
from gllm_datastore.core.filters import lt
|
|
77
|
+
|
|
78
|
+
filter = lt("metadata.price", 100)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
key (str): Field path to filter on (must be numeric).
|
|
83
|
+
value (int | float): Threshold value. Matches field values less than this.
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
FilterClause: Less-than filter.
|
|
87
|
+
'''
|
|
88
|
+
def gte(key: str, value: int | float) -> FilterClause:
|
|
89
|
+
'''Create a greater-than-or-equal filter.
|
|
90
|
+
|
|
91
|
+
This operator checks if the field value is greater than or equal to the specified value.
|
|
92
|
+
Only works with numeric fields (int or float).
|
|
93
|
+
|
|
94
|
+
Example:
|
|
95
|
+
Filter for documents where `metadata.price >= 100`.
|
|
96
|
+
```python
|
|
97
|
+
from gllm_datastore.core.filters import gte
|
|
98
|
+
|
|
99
|
+
filter = gte("metadata.price", 100)
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
key (str): Field path to filter on (must be numeric).
|
|
104
|
+
value (int | float): Threshold value. Matches field values greater than or equal to this.
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
FilterClause: Greater-than-or-equal filter.
|
|
108
|
+
'''
|
|
109
|
+
def lte(key: str, value: int | float) -> FilterClause:
|
|
110
|
+
'''Create a less-than-or-equal filter.
|
|
111
|
+
|
|
112
|
+
This operator checks if the field value is less than or equal to the specified value.
|
|
113
|
+
Only works with numeric fields (int or float).
|
|
114
|
+
|
|
115
|
+
Example:
|
|
116
|
+
Filter for documents where `metadata.price <= 100`.
|
|
117
|
+
```python
|
|
118
|
+
from gllm_datastore.core.filters import lte
|
|
119
|
+
|
|
120
|
+
filter = lte("metadata.price", 100)
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
key (str): Field path to filter on (must be numeric).
|
|
125
|
+
value (int | float): Threshold value. Matches field values less than or equal to this.
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
FilterClause: Less-than-or-equal filter.
|
|
129
|
+
'''
|
|
130
|
+
def in_(key: str, values: list) -> FilterClause:
|
|
131
|
+
'''Create an IN filter.
|
|
132
|
+
|
|
133
|
+
This operator checks if the field value is one of the values in the provided list.
|
|
134
|
+
Works with scalar fields (string, number, boolean). The field value must exactly
|
|
135
|
+
match one of the values in the list.
|
|
136
|
+
|
|
137
|
+
Example:
|
|
138
|
+
Filter for documents where `metadata.status in ["active", "pending"]`.
|
|
139
|
+
```python
|
|
140
|
+
from gllm_datastore.core.filters import in_
|
|
141
|
+
|
|
142
|
+
filter = in_("metadata.status", ["active", "pending"])
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
key (str): Field path to filter on (must be a scalar field).
|
|
147
|
+
values (list): List of possible values. Matches field values that match one of these exactly.
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
FilterClause: IN filter.
|
|
151
|
+
'''
|
|
152
|
+
def nin(key: str, values: list) -> FilterClause:
|
|
153
|
+
'''Create a NOT IN filter.
|
|
154
|
+
|
|
155
|
+
This operator checks if the field value is not in the provided list.
|
|
156
|
+
Works with scalar fields (string, number, boolean). The field value must not
|
|
157
|
+
match any of the values in the list.
|
|
158
|
+
|
|
159
|
+
Example:
|
|
160
|
+
Filter for documents where `metadata.status not in ["deleted", "archived"]`.
|
|
161
|
+
```python
|
|
162
|
+
from gllm_datastore.core.filters import nin
|
|
163
|
+
|
|
164
|
+
filter = nin("metadata.status", ["deleted", "archived"])
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
key (str): Field path to filter on (must be a scalar field).
|
|
169
|
+
values (list): List of excluded values. Matches field values that do not match any of these.
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
FilterClause: NOT IN filter.
|
|
173
|
+
'''
|
|
174
|
+
def array_contains(key: str, value: Any) -> FilterClause:
|
|
175
|
+
'''Create an ARRAY_CONTAINS filter (array field contains value).
|
|
176
|
+
|
|
177
|
+
This operator checks if an array field contains the specified value as an element.
|
|
178
|
+
The field must be an array/list, and the value must be present in that array.
|
|
179
|
+
Use this for checking array membership.
|
|
180
|
+
|
|
181
|
+
Example:
|
|
182
|
+
Filter for documents where the tags array contains "python".
|
|
183
|
+
This will match documents where "python" is an element in metadata.tags.
|
|
184
|
+
For example, if metadata.tags = ["python", "javascript"], this will match.
|
|
185
|
+
```python
|
|
186
|
+
from gllm_datastore.core.filters import array_contains
|
|
187
|
+
|
|
188
|
+
filter = array_contains("metadata.tags", "python")
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
key (str): Field path to filter on (must be an array field).
|
|
193
|
+
value (Any): Value to check if it exists as an element in the array.
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
FilterClause: ARRAY_CONTAINS filter.
|
|
197
|
+
'''
|
|
198
|
+
def text_contains(key: str, value: str) -> FilterClause:
|
|
199
|
+
'''Create a TEXT_CONTAINS filter (text field contains substring).
|
|
200
|
+
|
|
201
|
+
This operator checks if a text/string field contains the specified substring.
|
|
202
|
+
The field must be a string, and the value must appear as a substring within that string.
|
|
203
|
+
Use this for substring matching in text content.
|
|
204
|
+
|
|
205
|
+
Example:
|
|
206
|
+
Filter for documents where the content field contains "machine learning".
|
|
207
|
+
This will match documents where "machine learning" appears anywhere in the content.
|
|
208
|
+
For example, if content = "This is about machine learning algorithms", this will match.
|
|
209
|
+
```python
|
|
210
|
+
from gllm_datastore.core.filters import text_contains
|
|
211
|
+
|
|
212
|
+
filter = text_contains("content", "machine learning")
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
key (str): Field path to filter on (must be a string/text field).
|
|
217
|
+
value (str): Substring to search for in the text.
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
FilterClause: TEXT_CONTAINS filter.
|
|
221
|
+
'''
|
|
222
|
+
def any_(key: str, values: list) -> FilterClause:
|
|
223
|
+
'''Create an ANY filter (array field contains any of the values).
|
|
224
|
+
|
|
225
|
+
This operator checks if an array field contains at least one of the values in the provided list.
|
|
226
|
+
The field must be an array/list, and at least one element from the values list must be
|
|
227
|
+
present in the array. This is similar to checking if the arrays have any intersection.
|
|
228
|
+
|
|
229
|
+
Example:
|
|
230
|
+
Filter for documents where the tags array contains at least one of "python" or "javascript".
|
|
231
|
+
This will match if metadata.tags contains "python", "javascript", or both.
|
|
232
|
+
For example, if metadata.tags = ["python", "rust"], this will match (because of "python").
|
|
233
|
+
```python
|
|
234
|
+
from gllm_datastore.core.filters import any_
|
|
235
|
+
|
|
236
|
+
filter = any_("metadata.tags", ["python", "javascript"])
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
key (str): Field path to filter on (must be an array field).
|
|
241
|
+
values (list): List of values. At least one must be present in the array.
|
|
242
|
+
|
|
243
|
+
Returns:
|
|
244
|
+
FilterClause: ANY filter.
|
|
245
|
+
'''
|
|
246
|
+
def all_(key: str, values: list) -> FilterClause:
|
|
247
|
+
'''Create an ALL filter (array field contains all of the values).
|
|
248
|
+
|
|
249
|
+
This operator checks if an array field contains all of the values in the provided list.
|
|
250
|
+
The field must be an array/list, and every value in the values list must be present
|
|
251
|
+
as an element in the array. The array may contain additional elements.
|
|
252
|
+
|
|
253
|
+
Example:
|
|
254
|
+
Filter for documents where the tags array contains both "python" and "javascript".
|
|
255
|
+
This will match only if metadata.tags contains both values.
|
|
256
|
+
For example, if metadata.tags = ["python", "javascript", "rust"], this will match.
|
|
257
|
+
If metadata.tags = ["python", "rust"], this will not match (missing "javascript").
|
|
258
|
+
```python
|
|
259
|
+
from gllm_datastore.core.filters import all_
|
|
260
|
+
|
|
261
|
+
filter = all_("metadata.tags", ["python", "javascript"])
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
Args:
|
|
265
|
+
key (str): Field path to filter on (must be an array field).
|
|
266
|
+
values (list): List of values. All must be present in the array.
|
|
267
|
+
|
|
268
|
+
Returns:
|
|
269
|
+
FilterClause: ALL filter.
|
|
270
|
+
'''
|
|
271
|
+
def and_(*filters: FilterClause | QueryFilter) -> QueryFilter:
|
|
272
|
+
'''Combine filters with AND condition.
|
|
273
|
+
|
|
274
|
+
This logical operator combines multiple filters such that all conditions must be satisfied.
|
|
275
|
+
A document matches only if it satisfies every filter in the list.
|
|
276
|
+
|
|
277
|
+
Example:
|
|
278
|
+
Filter for documents where status is "active" AND age is at least 18.
|
|
279
|
+
This will match documents that satisfy both conditions simultaneously.
|
|
280
|
+
```python
|
|
281
|
+
from gllm_datastore.core.filters import and_, eq, gte
|
|
282
|
+
|
|
283
|
+
filter = and_(eq("metadata.status", "active"), gte("metadata.age", 18))
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
*filters (FilterClause | QueryFilter): Variable number of filters to combine.
|
|
288
|
+
All filters must match for a document to be included.
|
|
289
|
+
|
|
290
|
+
Returns:
|
|
291
|
+
QueryFilter: Combined filter with AND condition.
|
|
292
|
+
'''
|
|
293
|
+
def or_(*filters: FilterClause | QueryFilter) -> QueryFilter:
|
|
294
|
+
'''Combine filters with OR condition.
|
|
295
|
+
|
|
296
|
+
This logical operator combines multiple filters such that at least one condition must be satisfied.
|
|
297
|
+
A document matches if it satisfies any of the filters in the list.
|
|
298
|
+
|
|
299
|
+
Example:
|
|
300
|
+
Filter for documents where status is "active" OR status is "pending".
|
|
301
|
+
This will match documents that satisfy either condition (or both).
|
|
302
|
+
```python
|
|
303
|
+
from gllm_datastore.core.filters import or_, eq
|
|
304
|
+
|
|
305
|
+
filter = or_(eq("metadata.status", "active"), eq("metadata.status", "pending"))
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
Args:
|
|
309
|
+
*filters (FilterClause | QueryFilter): Variable number of filters to combine.
|
|
310
|
+
At least one filter must match for a document to be included.
|
|
311
|
+
|
|
312
|
+
Returns:
|
|
313
|
+
QueryFilter: Combined filter with OR condition.
|
|
314
|
+
'''
|
|
315
|
+
def not_(filter: FilterClause | QueryFilter) -> QueryFilter:
|
|
316
|
+
'''Negate a filter.
|
|
317
|
+
|
|
318
|
+
This logical operator inverts the result of a filter. A document matches if it does
|
|
319
|
+
not satisfy the specified filter condition. Useful for exclusion criteria.
|
|
320
|
+
|
|
321
|
+
This operator only supports NOT with a single filter. Multiple filters in NOT condition are not supported.
|
|
322
|
+
|
|
323
|
+
Example:
|
|
324
|
+
Filter for documents where status is NOT "deleted".
|
|
325
|
+
This will match all documents except those with status == "deleted".
|
|
326
|
+
Can also be used with other operators, e.g., not_(text_contains("content", "spam"))
|
|
327
|
+
to exclude documents containing a specific substring.
|
|
328
|
+
```python
|
|
329
|
+
from gllm_datastore.core.filters import not_, eq
|
|
330
|
+
|
|
331
|
+
filter = not_(eq("metadata.status", "deleted"))
|
|
332
|
+
```
|
|
333
|
+
|
|
334
|
+
Args:
|
|
335
|
+
filter (FilterClause | QueryFilter): Filter to negate. Documents matching this
|
|
336
|
+
filter will be excluded from results.
|
|
337
|
+
|
|
338
|
+
Returns:
|
|
339
|
+
QueryFilter: Negated filter.
|
|
340
|
+
'''
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
from enum import StrEnum
|
|
2
|
+
from pydantic import BaseModel
|
|
3
|
+
from typing import Any, Sequence
|
|
4
|
+
|
|
5
|
+
class FilterOperator(StrEnum):
|
|
6
|
+
"""Operators for comparing field values."""
|
|
7
|
+
EQ: str
|
|
8
|
+
NE: str
|
|
9
|
+
GT: str
|
|
10
|
+
LT: str
|
|
11
|
+
GTE: str
|
|
12
|
+
LTE: str
|
|
13
|
+
IN: str
|
|
14
|
+
NIN: str
|
|
15
|
+
ANY: str
|
|
16
|
+
ALL: str
|
|
17
|
+
ARRAY_CONTAINS: str
|
|
18
|
+
TEXT_CONTAINS: str
|
|
19
|
+
|
|
20
|
+
class FilterCondition(StrEnum):
|
|
21
|
+
"""Logical conditions for combining filters."""
|
|
22
|
+
AND: str
|
|
23
|
+
OR: str
|
|
24
|
+
NOT: str
|
|
25
|
+
|
|
26
|
+
class FilterClause(BaseModel):
|
|
27
|
+
'''Single filter criterion with operator support.
|
|
28
|
+
|
|
29
|
+
Examples:
|
|
30
|
+
```python
|
|
31
|
+
FilterClause(key="metadata.age", value=25, operator=FilterOperator.GT)
|
|
32
|
+
FilterClause(key="metadata.status", value=["active", "pending"], operator=FilterOperator.IN)
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
Attributes:
|
|
36
|
+
key (str): The field path to filter on (supports dot notation for nested fields).
|
|
37
|
+
value (int | float | str | bool | list[str] | list[float] | list[int] | list[bool] | None):
|
|
38
|
+
The value to compare against.
|
|
39
|
+
operator (FilterOperator): The comparison operator.
|
|
40
|
+
'''
|
|
41
|
+
key: str
|
|
42
|
+
value: bool | int | float | str | list[str] | list[float] | list[int] | list[bool] | None
|
|
43
|
+
operator: FilterOperator
|
|
44
|
+
def to_query_filter(self) -> QueryFilter:
|
|
45
|
+
'''Convert FilterClause to QueryFilter.
|
|
46
|
+
|
|
47
|
+
This method enables automatic conversion of FilterClause to QueryFilter.
|
|
48
|
+
|
|
49
|
+
Example:
|
|
50
|
+
```python
|
|
51
|
+
clause = FilterClause(key="metadata.status", value="active", operator=FilterOperator.EQ)
|
|
52
|
+
query_filter = clause.to_query_filter()
|
|
53
|
+
# Results in: QueryFilter(filters=[clause], condition=FilterCondition.AND)
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
QueryFilter: A QueryFilter wrapping this FilterClause with AND condition.
|
|
58
|
+
'''
|
|
59
|
+
|
|
60
|
+
class QueryFilter(BaseModel):
|
|
61
|
+
'''Composite filter supporting multiple conditions and logical operators.
|
|
62
|
+
|
|
63
|
+
Attributes:
|
|
64
|
+
filters (list[FilterClause | QueryFilter]): List of filters to combine.
|
|
65
|
+
Can include nested QueryFilter for complex logic.
|
|
66
|
+
condition (FilterCondition): Logical operator to combine filters. Defaults to AND.
|
|
67
|
+
|
|
68
|
+
Examples:
|
|
69
|
+
1. Simple AND: age > 25 AND status == "active"
|
|
70
|
+
```python
|
|
71
|
+
QueryFilter(
|
|
72
|
+
filters=[
|
|
73
|
+
FilterClause(key="metadata.age", value=25, operator=FilterOperator.GT),
|
|
74
|
+
FilterClause(key="metadata.status", value="active", operator=FilterOperator.EQ)
|
|
75
|
+
],
|
|
76
|
+
condition=FilterCondition.AND
|
|
77
|
+
)
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
2. Complex OR: (status == "active" OR status == "pending") AND age >= 18
|
|
81
|
+
```python
|
|
82
|
+
QueryFilter(
|
|
83
|
+
filters=[
|
|
84
|
+
QueryFilter(
|
|
85
|
+
filters=[
|
|
86
|
+
FilterClause(key="metadata.status", value="active"),
|
|
87
|
+
FilterClause(key="metadata.status", value="pending")
|
|
88
|
+
],
|
|
89
|
+
condition=FilterCondition.OR
|
|
90
|
+
),
|
|
91
|
+
FilterClause(key="metadata.age", value=18, operator=FilterOperator.GTE)
|
|
92
|
+
],
|
|
93
|
+
condition=FilterCondition.AND
|
|
94
|
+
)
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
3. NOT: NOT (status == "deleted")
|
|
98
|
+
```python
|
|
99
|
+
QueryFilter(
|
|
100
|
+
filters=[
|
|
101
|
+
FilterClause(key="metadata.status", value="deleted")
|
|
102
|
+
],
|
|
103
|
+
condition=FilterCondition.NOT
|
|
104
|
+
)
|
|
105
|
+
```
|
|
106
|
+
'''
|
|
107
|
+
filters: list[FilterClause | QueryFilter]
|
|
108
|
+
condition: FilterCondition
|
|
109
|
+
@classmethod
|
|
110
|
+
def from_dicts(cls, filter_dicts: list[dict[str, Any]], condition: FilterCondition = ...) -> QueryFilter:
|
|
111
|
+
'''Create QueryFilter from list of filter dictionaries.
|
|
112
|
+
|
|
113
|
+
Example:
|
|
114
|
+
```python
|
|
115
|
+
QueryFilter.from_dicts(
|
|
116
|
+
[
|
|
117
|
+
{"key": "metadata.age", "value": 25, "operator": ">"},
|
|
118
|
+
{"key": "metadata.status", "value": "active"}
|
|
119
|
+
],
|
|
120
|
+
condition=FilterCondition.AND
|
|
121
|
+
)
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
filter_dicts (list[dict[str, Any]]): List of filter dictionaries. Contains the key, value, and operator.
|
|
126
|
+
condition (FilterCondition, optional): Logical operator to combine filters. Defaults to AND.
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
QueryFilter: Composite filter instance.
|
|
130
|
+
'''
|
|
131
|
+
|
|
132
|
+
class QueryOptions(BaseModel):
|
|
133
|
+
'''Model for query options.
|
|
134
|
+
|
|
135
|
+
Attributes:
|
|
136
|
+
include_fields (Sequence[str] | None): The fields to include in the query result. Defaults to None.
|
|
137
|
+
order_by (str | None): The column to order the query result by. Defaults to None.
|
|
138
|
+
order_desc (bool): Whether to order the query result in descending order. Defaults to False.
|
|
139
|
+
limit (int | None): The maximum number of rows to return. Must be >= 0. Defaults to None.
|
|
140
|
+
|
|
141
|
+
Example:
|
|
142
|
+
```python
|
|
143
|
+
QueryOptions(include_fields=["field1", "field2"], order_by="column1", order_desc=True, limit=10)
|
|
144
|
+
```
|
|
145
|
+
'''
|
|
146
|
+
include_fields: Sequence[str] | None
|
|
147
|
+
order_by: str | None
|
|
148
|
+
order_desc: bool
|
|
149
|
+
limit: int | None
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
from gllm_datastore.data_store.chroma import ChromaDataStore as ChromaDataStore
|
|
2
|
+
from gllm_datastore.data_store.elasticsearch import ElasticsearchDataStore as ElasticsearchDataStore
|
|
3
|
+
from gllm_datastore.data_store.exceptions import NotRegisteredException as NotRegisteredException, NotSupportedException as NotSupportedException
|
|
4
|
+
from gllm_datastore.data_store.in_memory import InMemoryDataStore as InMemoryDataStore
|
|
5
|
+
from gllm_datastore.data_store.redis import RedisDataStore as RedisDataStore
|
|
6
|
+
|
|
7
|
+
__all__ = ['ChromaDataStore', 'ElasticsearchDataStore', 'InMemoryDataStore', 'NotRegisteredException', 'NotSupportedException', 'RedisDataStore']
|