gllm-datastore-binary 0.5.45__cp311-cp311-macosx_13_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gllm-datastore-binary might be problematic. Click here for more details.
- gllm_datastore/__init__.pyi +0 -0
- gllm_datastore/cache/__init__.pyi +4 -0
- gllm_datastore/cache/base.pyi +84 -0
- gllm_datastore/cache/cache.pyi +137 -0
- gllm_datastore/cache/hybrid_cache/__init__.pyi +5 -0
- gllm_datastore/cache/hybrid_cache/file_system_hybrid_cache.pyi +50 -0
- gllm_datastore/cache/hybrid_cache/hybrid_cache.pyi +115 -0
- gllm_datastore/cache/hybrid_cache/in_memory_hybrid_cache.pyi +29 -0
- gllm_datastore/cache/hybrid_cache/key_matcher/__init__.pyi +5 -0
- gllm_datastore/cache/hybrid_cache/key_matcher/exact_key_matcher.pyi +44 -0
- gllm_datastore/cache/hybrid_cache/key_matcher/fuzzy_key_matcher.pyi +70 -0
- gllm_datastore/cache/hybrid_cache/key_matcher/key_matcher.pyi +60 -0
- gllm_datastore/cache/hybrid_cache/key_matcher/semantic_key_matcher.pyi +93 -0
- gllm_datastore/cache/hybrid_cache/redis_hybrid_cache.pyi +34 -0
- gllm_datastore/cache/hybrid_cache/utils.pyi +36 -0
- gllm_datastore/cache/utils.pyi +34 -0
- gllm_datastore/cache/vector_cache/__init__.pyi +0 -0
- gllm_datastore/cache/vector_cache/eviction_manager/__init__.pyi +0 -0
- gllm_datastore/cache/vector_cache/eviction_manager/asyncio_eviction_manager.pyi +48 -0
- gllm_datastore/cache/vector_cache/eviction_manager/eviction_manager.pyi +38 -0
- gllm_datastore/cache/vector_cache/eviction_strategy/__init__.pyi +0 -0
- gllm_datastore/cache/vector_cache/eviction_strategy/eviction_strategy.pyi +34 -0
- gllm_datastore/cache/vector_cache/eviction_strategy/ttl_eviction_strategy.pyi +34 -0
- gllm_datastore/cache/vector_cache/vector_cache.pyi +99 -0
- gllm_datastore/constants.pyi +66 -0
- gllm_datastore/core/__init__.pyi +7 -0
- gllm_datastore/core/capabilities/__init__.pyi +5 -0
- gllm_datastore/core/capabilities/fulltext_capability.pyi +73 -0
- gllm_datastore/core/capabilities/graph_capability.pyi +70 -0
- gllm_datastore/core/capabilities/vector_capability.pyi +90 -0
- gllm_datastore/core/filters/__init__.pyi +4 -0
- gllm_datastore/core/filters/filter.pyi +340 -0
- gllm_datastore/core/filters/schema.pyi +149 -0
- gllm_datastore/data_store/__init__.pyi +7 -0
- gllm_datastore/data_store/base.pyi +138 -0
- gllm_datastore/data_store/chroma/__init__.pyi +4 -0
- gllm_datastore/data_store/chroma/_chroma_import.pyi +13 -0
- gllm_datastore/data_store/chroma/data_store.pyi +202 -0
- gllm_datastore/data_store/chroma/fulltext.pyi +134 -0
- gllm_datastore/data_store/chroma/query.pyi +266 -0
- gllm_datastore/data_store/chroma/query_translator.pyi +41 -0
- gllm_datastore/data_store/chroma/vector.pyi +197 -0
- gllm_datastore/data_store/elasticsearch/__init__.pyi +5 -0
- gllm_datastore/data_store/elasticsearch/data_store.pyi +119 -0
- gllm_datastore/data_store/elasticsearch/fulltext.pyi +237 -0
- gllm_datastore/data_store/elasticsearch/query.pyi +114 -0
- gllm_datastore/data_store/elasticsearch/vector.pyi +179 -0
- gllm_datastore/data_store/exceptions.pyi +35 -0
- gllm_datastore/data_store/in_memory/__init__.pyi +5 -0
- gllm_datastore/data_store/in_memory/data_store.pyi +71 -0
- gllm_datastore/data_store/in_memory/fulltext.pyi +131 -0
- gllm_datastore/data_store/in_memory/query.pyi +175 -0
- gllm_datastore/data_store/in_memory/vector.pyi +174 -0
- gllm_datastore/data_store/redis/__init__.pyi +5 -0
- gllm_datastore/data_store/redis/data_store.pyi +154 -0
- gllm_datastore/data_store/redis/fulltext.pyi +128 -0
- gllm_datastore/data_store/redis/query.pyi +428 -0
- gllm_datastore/data_store/redis/query_translator.pyi +37 -0
- gllm_datastore/data_store/redis/vector.pyi +131 -0
- gllm_datastore/encryptor/__init__.pyi +4 -0
- gllm_datastore/encryptor/aes_gcm_encryptor.pyi +45 -0
- gllm_datastore/encryptor/encryptor.pyi +52 -0
- gllm_datastore/encryptor/key_ring/__init__.pyi +3 -0
- gllm_datastore/encryptor/key_ring/in_memory_key_ring.pyi +52 -0
- gllm_datastore/encryptor/key_ring/key_ring.pyi +45 -0
- gllm_datastore/encryptor/key_rotating_encryptor.pyi +60 -0
- gllm_datastore/graph_data_store/__init__.pyi +6 -0
- gllm_datastore/graph_data_store/graph_data_store.pyi +151 -0
- gllm_datastore/graph_data_store/graph_rag_data_store.pyi +29 -0
- gllm_datastore/graph_data_store/light_rag_data_store.pyi +93 -0
- gllm_datastore/graph_data_store/light_rag_postgres_data_store.pyi +96 -0
- gllm_datastore/graph_data_store/llama_index_graph_rag_data_store.pyi +49 -0
- gllm_datastore/graph_data_store/llama_index_neo4j_graph_rag_data_store.pyi +78 -0
- gllm_datastore/graph_data_store/nebula_graph_data_store.pyi +206 -0
- gllm_datastore/graph_data_store/neo4j_graph_data_store.pyi +182 -0
- gllm_datastore/graph_data_store/utils/__init__.pyi +6 -0
- gllm_datastore/graph_data_store/utils/constants.pyi +21 -0
- gllm_datastore/graph_data_store/utils/light_rag_em_invoker_adapter.pyi +56 -0
- gllm_datastore/graph_data_store/utils/light_rag_lm_invoker_adapter.pyi +43 -0
- gllm_datastore/graph_data_store/utils/llama_index_em_invoker_adapter.pyi +45 -0
- gllm_datastore/graph_data_store/utils/llama_index_lm_invoker_adapter.pyi +169 -0
- gllm_datastore/sql_data_store/__init__.pyi +4 -0
- gllm_datastore/sql_data_store/adapter/__init__.pyi +0 -0
- gllm_datastore/sql_data_store/adapter/sqlalchemy_adapter.pyi +38 -0
- gllm_datastore/sql_data_store/constants.pyi +6 -0
- gllm_datastore/sql_data_store/sql_data_store.pyi +86 -0
- gllm_datastore/sql_data_store/sqlalchemy_sql_data_store.pyi +216 -0
- gllm_datastore/sql_data_store/types.pyi +31 -0
- gllm_datastore/utils/__init__.pyi +6 -0
- gllm_datastore/utils/converter.pyi +51 -0
- gllm_datastore/utils/dict.pyi +21 -0
- gllm_datastore/utils/ttl.pyi +25 -0
- gllm_datastore/utils/types.pyi +32 -0
- gllm_datastore/vector_data_store/__init__.pyi +6 -0
- gllm_datastore/vector_data_store/chroma_vector_data_store.pyi +259 -0
- gllm_datastore/vector_data_store/elasticsearch_vector_data_store.pyi +357 -0
- gllm_datastore/vector_data_store/in_memory_vector_data_store.pyi +179 -0
- gllm_datastore/vector_data_store/mixin/__init__.pyi +0 -0
- gllm_datastore/vector_data_store/mixin/cache_compatible_mixin.pyi +145 -0
- gllm_datastore/vector_data_store/redis_vector_data_store.pyi +191 -0
- gllm_datastore/vector_data_store/vector_data_store.pyi +146 -0
- gllm_datastore.build/.gitignore +1 -0
- gllm_datastore.cpython-311-darwin.so +0 -0
- gllm_datastore.pyi +156 -0
- gllm_datastore_binary-0.5.45.dist-info/METADATA +178 -0
- gllm_datastore_binary-0.5.45.dist-info/RECORD +108 -0
- gllm_datastore_binary-0.5.45.dist-info/WHEEL +5 -0
- gllm_datastore_binary-0.5.45.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
from gllm_core.schema.chunk import Chunk
|
|
2
|
+
from gllm_datastore.core.filters import FilterClause as FilterClause, QueryFilter as QueryFilter, QueryOptions as QueryOptions
|
|
3
|
+
from gllm_datastore.data_store.in_memory.query import create_updated_chunk as create_updated_chunk, delete_chunks_by_filters as delete_chunks_by_filters, get_chunks_from_store as get_chunks_from_store
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
class InMemoryFulltextCapability:
|
|
7
|
+
"""In-memory implementation of FulltextCapability protocol.
|
|
8
|
+
|
|
9
|
+
This class provides document CRUD operations and flexible querying using pure
|
|
10
|
+
Python data structures optimized for development and testing.
|
|
11
|
+
|
|
12
|
+
Attributes:
|
|
13
|
+
store (dict[str, Chunk]): Dictionary storing Chunk objects with their IDs as keys.
|
|
14
|
+
"""
|
|
15
|
+
store: dict[str, Chunk]
|
|
16
|
+
def __init__(self, store: dict[str, Any] | None = None) -> None:
|
|
17
|
+
"""Initialize the in-memory fulltext capability.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
store (dict[str, Any] | None, optional): Dictionary storing Chunk objects with their IDs as keys.
|
|
21
|
+
Defaults to None.
|
|
22
|
+
"""
|
|
23
|
+
async def create(self, data: Chunk | list[Chunk]) -> None:
|
|
24
|
+
'''Create new records in the datastore.
|
|
25
|
+
|
|
26
|
+
Examples:
|
|
27
|
+
Create a new chunk.
|
|
28
|
+
```python
|
|
29
|
+
await fulltext_capability.create(Chunk(content="Test chunk", metadata={"category": "test"}))
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
data (Chunk | list[Chunk]): Data to create (single item or collection).
|
|
34
|
+
|
|
35
|
+
Raises:
|
|
36
|
+
ValueError: If data structure is invalid.
|
|
37
|
+
'''
|
|
38
|
+
async def retrieve(self, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None) -> list[Chunk]:
|
|
39
|
+
'''Read records from the datastore with optional filtering.
|
|
40
|
+
|
|
41
|
+
Usage Example:
|
|
42
|
+
```python
|
|
43
|
+
from gllm_datastore.core.filters import filter as F
|
|
44
|
+
|
|
45
|
+
# Direct FilterClause usage
|
|
46
|
+
results = await fulltext_capability.retrieve(filters=F.eq("metadata.category", "tech"))
|
|
47
|
+
|
|
48
|
+
# Multiple filters
|
|
49
|
+
results = await fulltext_capability.retrieve(
|
|
50
|
+
filters=F.and_(F.eq("metadata.category", "tech"), F.eq("metadata.status", "active"))
|
|
51
|
+
)
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
filters (FilterClause | QueryFilter | None, optional): Query filters to apply.
|
|
56
|
+
FilterClause objects are automatically converted to QueryFilter internally.
|
|
57
|
+
Defaults to None.
|
|
58
|
+
options (QueryOptions | None, optional): Query options for sorting and pagination. Defaults to None.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
list[Chunk]: List of matched chunks after applying filters and options.
|
|
62
|
+
'''
|
|
63
|
+
async def retrieve_fuzzy(self, query: str, max_distance: int = 2, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None) -> list[Chunk]:
|
|
64
|
+
"""Find records that fuzzy match the query within distance threshold.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
query (str): Text to fuzzy match against.
|
|
68
|
+
max_distance (int, optional): Maximum edit distance for matches. Defaults to 2.
|
|
69
|
+
filters (FilterClause | QueryFilter | None, optional): Optional metadata filters to apply.
|
|
70
|
+
FilterClause objects are automatically converted to QueryFilter internally.
|
|
71
|
+
Defaults to None.
|
|
72
|
+
options (QueryOptions | None, optional): Query options, only limit is used here. Defaults to None.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
list[Chunk]: Matched chunks ordered by distance (ascending), limited by options.limit.
|
|
76
|
+
"""
|
|
77
|
+
async def update(self, update_values: dict[str, Any], filters: FilterClause | QueryFilter | None = None) -> None:
|
|
78
|
+
'''Update existing records in the datastore.
|
|
79
|
+
|
|
80
|
+
Examples:
|
|
81
|
+
Update certain metadata of a chunk with specific filters.
|
|
82
|
+
```python
|
|
83
|
+
from gllm_datastore.core.filters import filter as F
|
|
84
|
+
|
|
85
|
+
# Direct FilterClause usage
|
|
86
|
+
await fulltext_capability.update(
|
|
87
|
+
update_values={"metadata": {"status": "published"}},
|
|
88
|
+
filters=F.eq("metadata.category", "tech"),
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# Multiple filters
|
|
92
|
+
await fulltext_capability.update(
|
|
93
|
+
update_values={"metadata": {"status": "published"}},
|
|
94
|
+
filters=F.and_(F.eq("metadata.status", "draft"), F.eq("metadata.category", "tech")),
|
|
95
|
+
)
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
update_values (dict[str, Any]): Mapping of fields to new values to apply.
|
|
100
|
+
filters (FilterClause | QueryFilter | None, optional): Filters to select records to update.
|
|
101
|
+
FilterClause objects are automatically converted to QueryFilter internally.
|
|
102
|
+
Defaults to None.
|
|
103
|
+
'''
|
|
104
|
+
async def delete(self, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None) -> None:
|
|
105
|
+
'''Delete records from the datastore.
|
|
106
|
+
|
|
107
|
+
Usage Example:
|
|
108
|
+
```python
|
|
109
|
+
from gllm_datastore.core.filters import filter as F
|
|
110
|
+
|
|
111
|
+
# Direct FilterClause usage
|
|
112
|
+
await fulltext_capability.delete(filters=F.eq("metadata.category", "tech"))
|
|
113
|
+
|
|
114
|
+
# Multiple filters
|
|
115
|
+
await fulltext_capability.delete(
|
|
116
|
+
filters=F.and_(F.eq("metadata.category", "tech"), F.eq("metadata.status", "draft"))
|
|
117
|
+
)
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
filters (FilterClause | QueryFilter | None, optional): Filters to select records to delete.
|
|
122
|
+
FilterClause objects are automatically converted to QueryFilter internally.
|
|
123
|
+
Defaults to None.
|
|
124
|
+
options (QueryOptions | None, optional): Query options for sorting and limiting deletions
|
|
125
|
+
(for eviction-like operations). Defaults to None.
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
None: This method performs deletions in-place.
|
|
129
|
+
'''
|
|
130
|
+
async def clear(self) -> None:
|
|
131
|
+
"""Clear all records from the datastore."""
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
from gllm_core.schema.chunk import Chunk
|
|
2
|
+
from gllm_datastore.constants import CHUNK_KEYS as CHUNK_KEYS
|
|
3
|
+
from gllm_datastore.core.filters import FilterClause as FilterClause, FilterCondition as FilterCondition, FilterOperator as FilterOperator, QueryFilter as QueryFilter, QueryOptions as QueryOptions
|
|
4
|
+
from gllm_inference.schema import Vector
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
def apply_filters(chunks: list[Chunk], filters: FilterClause | QueryFilter) -> list[Chunk]:
|
|
8
|
+
'''Apply filters to chunks.
|
|
9
|
+
|
|
10
|
+
Usage Example:
|
|
11
|
+
```python
|
|
12
|
+
from gllm_datastore.core.filters import filter as F
|
|
13
|
+
|
|
14
|
+
chunks = [
|
|
15
|
+
Chunk(id="1", content="Chunk 1", metadata={"category": "test"}),
|
|
16
|
+
Chunk(id="2", content="Chunk 2", metadata={"category": "test"}),
|
|
17
|
+
Chunk(id="3", content="Chunk 3", metadata={"category": "test"}),
|
|
18
|
+
]
|
|
19
|
+
# Direct FilterClause usage
|
|
20
|
+
filters = F.eq("metadata.category", "test")
|
|
21
|
+
filtered_chunks = apply_filters(chunks, filters)
|
|
22
|
+
|
|
23
|
+
# Multiple filters
|
|
24
|
+
filters = F.and_(F.eq("metadata.category", "test"), F.eq("metadata.status", "active"))
|
|
25
|
+
filtered_chunks = apply_filters(chunks, filters)
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
chunks (list[Chunk]): List of chunks to filter.
|
|
30
|
+
filters (FilterClause | QueryFilter): Filter criteria to apply.
|
|
31
|
+
FilterClause objects are automatically converted to QueryFilter internally.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
list[Chunk]: Filtered list of chunks.
|
|
35
|
+
'''
|
|
36
|
+
def apply_options(chunks: list[Chunk], options: QueryOptions) -> list[Chunk]:
|
|
37
|
+
"""Apply query options (sorting, pagination).
|
|
38
|
+
|
|
39
|
+
Note: columns filtering is not applicable to Chunk objects since they have a fixed structure
|
|
40
|
+
and we can only filter on id, content, score, and metadata.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
chunks (list[Chunk]): List of chunks to process.
|
|
44
|
+
options (QueryOptions): Query options to apply.
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
list[Chunk]: Processed list of chunks.
|
|
48
|
+
"""
|
|
49
|
+
def get_nested_value(obj: dict[str, Any], key_path: str) -> Any:
|
|
50
|
+
'''Get a nested value from a dictionary using dot notation.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
obj (dict[str, Any]): Dictionary to traverse.
|
|
54
|
+
key_path (str): Dot-separated path to the value (e.g., "user.profile.name").
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
Any: The value at the specified path, or None if not found.
|
|
58
|
+
'''
|
|
59
|
+
def get_sort_value(chunk: Chunk, order_by: str) -> Any:
|
|
60
|
+
"""Get the value to sort by.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
chunk (Chunk): Chunk to get the value from.
|
|
64
|
+
order_by (str): The field to sort by.
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
Any: The value to sort by.
|
|
68
|
+
"""
|
|
69
|
+
def validate_cache_key(key: str) -> None:
|
|
70
|
+
"""Validate cache key format and content.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
key (str): Cache key to validate.
|
|
74
|
+
|
|
75
|
+
Raises:
|
|
76
|
+
TypeError: If key is not a string.
|
|
77
|
+
ValueError: If key is empty or whitespace-only.
|
|
78
|
+
"""
|
|
79
|
+
def get_chunks_from_store(store: dict[str, Chunk], filters: QueryFilter | None = None, options: QueryOptions | None = None) -> list[Chunk]:
|
|
80
|
+
"""Get chunks from a store as a list with optional filters and options.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
store (dict[str, Chunk]): Store containing chunks.
|
|
84
|
+
filters (QueryFilter | None, optional): Filter criteria to apply. Defaults to None.
|
|
85
|
+
options (QueryOptions | None, optional): Query options to apply. Defaults to None.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
list[Chunk]: List of all chunks in the store.
|
|
89
|
+
"""
|
|
90
|
+
def apply_filters_and_options(chunks: list[Chunk], filters: QueryFilter | None = None, options: QueryOptions | None = None) -> list[Chunk]:
|
|
91
|
+
"""Apply filters and options to a list of chunks.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
chunks (list[Chunk]): List of chunks to process.
|
|
95
|
+
filters (QueryFilter | None, optional): Filter criteria to apply. Defaults to None.
|
|
96
|
+
options (QueryOptions | None, optional): Query options to apply. Defaults to None.
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
list[Chunk]: Processed list of chunks.
|
|
100
|
+
"""
|
|
101
|
+
def create_updated_chunk(existing_chunk: Chunk, update_values: dict[str, Any]) -> Chunk:
|
|
102
|
+
"""Create an updated chunk with new values.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
existing_chunk (Chunk): The existing chunk to update.
|
|
106
|
+
update_values (dict[str, Any]): Values to update.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
Chunk: Updated chunk with new values.
|
|
110
|
+
"""
|
|
111
|
+
def delete_chunks_by_filters(store: dict[str, Chunk], filters: QueryFilter | None = None) -> int:
|
|
112
|
+
"""Delete chunks from store based on filters.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
store (dict[str, Chunk]): Store containing chunks.
|
|
116
|
+
filters (QueryFilter | None, optional): Filters to select chunks to delete. Defaults to None.
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
int: Number of chunks deleted.
|
|
120
|
+
"""
|
|
121
|
+
def find_matching_chunk_ids(store: dict[str, Chunk], filters: QueryFilter) -> list[str]:
|
|
122
|
+
"""Find chunk IDs that match the given filters.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
store (dict[str, Chunk]): Store containing chunks.
|
|
126
|
+
filters (QueryFilter): The filters to apply.
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
list[str]: List of chunk IDs that match the filters.
|
|
130
|
+
"""
|
|
131
|
+
def similarity_search(query_vector: Vector, store: dict[str, Chunk], filters: QueryFilter | None = None, options: QueryOptions | None = None) -> list[Chunk]:
|
|
132
|
+
"""Retrieve chunks by vector similarity from a store.
|
|
133
|
+
|
|
134
|
+
This method will only return chunks that have a vector in their metadata.
|
|
135
|
+
It will also apply the filters and options to the chunks.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
query_vector (Vector): Query embedding vector.
|
|
139
|
+
store (dict[str, Chunk]): Store containing chunks.
|
|
140
|
+
filters (QueryFilter | None): Query filters to apply.
|
|
141
|
+
options (QueryOptions | None, optional): Query options to apply.
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
list[Chunk]: List of chunks ordered by similarity score.
|
|
145
|
+
"""
|
|
146
|
+
def evaluate_filter(chunk: Chunk, filters: QueryFilter) -> bool:
|
|
147
|
+
'''Evaluate if a chunk matches the given filters.
|
|
148
|
+
|
|
149
|
+
Examples:
|
|
150
|
+
```python
|
|
151
|
+
from gllm_datastore.core.filters import filter as F
|
|
152
|
+
|
|
153
|
+
# Simple filter
|
|
154
|
+
filters = F.and_(F.eq("metadata.category", "tech"))
|
|
155
|
+
result = evaluate_filter(chunk, filters)
|
|
156
|
+
|
|
157
|
+
# Complex nested filter
|
|
158
|
+
filters = F.and_(
|
|
159
|
+
F.gte("metadata.price", 10),
|
|
160
|
+
F.lte("metadata.price", 100),
|
|
161
|
+
F.or_(
|
|
162
|
+
F.eq("metadata.status", "active"),
|
|
163
|
+
F.eq("metadata.status", "pending")
|
|
164
|
+
)
|
|
165
|
+
)
|
|
166
|
+
result = evaluate_filter(chunk, filters)
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
chunk (Chunk): The chunk to evaluate.
|
|
171
|
+
filters (QueryFilter): The filters to apply.
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
bool: True if the chunk matches all filters, False otherwise.
|
|
175
|
+
'''
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
from gllm_core.schema.chunk import Chunk
|
|
2
|
+
from gllm_datastore.constants import CHUNK_KEYS as CHUNK_KEYS
|
|
3
|
+
from gllm_datastore.core.filters import FilterClause as FilterClause, QueryFilter as QueryFilter, QueryOptions as QueryOptions
|
|
4
|
+
from gllm_datastore.data_store.in_memory.query import create_updated_chunk as create_updated_chunk, delete_chunks_by_filters as delete_chunks_by_filters, get_chunks_from_store as get_chunks_from_store, similarity_search as similarity_search
|
|
5
|
+
from gllm_inference.em_invoker.em_invoker import BaseEMInvoker
|
|
6
|
+
from gllm_inference.schema import Vector
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
class InMemoryVectorCapability:
|
|
10
|
+
"""In-memory implementation of VectorCapability protocol.
|
|
11
|
+
|
|
12
|
+
This class provides vector similarity search operations using pure Python
|
|
13
|
+
data structures optimized for development and testing.
|
|
14
|
+
|
|
15
|
+
Attributes:
|
|
16
|
+
store (dict[str, Chunk]): Dictionary storing Chunk objects with their IDs as keys.
|
|
17
|
+
em_invoker (BaseEMInvoker): The embedding model to perform vectorization.
|
|
18
|
+
"""
|
|
19
|
+
store: dict[str, Chunk]
|
|
20
|
+
def __init__(self, em_invoker: BaseEMInvoker, store: dict[str, Any] | None = None) -> None:
|
|
21
|
+
"""Initialize the in-memory vector capability.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
em_invoker (BaseEMInvoker): em_invoker model for text-to-vector conversion.
|
|
25
|
+
store (dict[str, Any] | None, optional): Dictionary storing Chunk objects with their IDs as keys.
|
|
26
|
+
Defaults to None.
|
|
27
|
+
"""
|
|
28
|
+
@property
|
|
29
|
+
def em_invoker(self) -> BaseEMInvoker:
|
|
30
|
+
"""Returns the EM Invoker instance.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
BaseEMInvoker: The EM Invoker instance.
|
|
34
|
+
"""
|
|
35
|
+
async def ensure_index(self) -> None:
|
|
36
|
+
"""Ensure in-memory vector store exists, initializing it if necessary.
|
|
37
|
+
|
|
38
|
+
This method is idempotent - if the store already exists, it will skip
|
|
39
|
+
initialization and return early.
|
|
40
|
+
"""
|
|
41
|
+
async def create(self, data: Chunk | list[Chunk]) -> None:
|
|
42
|
+
"""Add chunks to the vector store with automatic embedding generation.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
data (Chunk | list[Chunk]): Single chunk or list of chunks to add.
|
|
46
|
+
"""
|
|
47
|
+
async def create_from_vector(self, chunk_vectors: list[tuple[Chunk, Vector]]) -> None:
|
|
48
|
+
"""Add pre-computed vectors directly.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
chunk_vectors (list[tuple[Chunk, Vector]]): List of tuples containing chunks and their
|
|
52
|
+
corresponding vectors.
|
|
53
|
+
"""
|
|
54
|
+
async def retrieve(self, query: str, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None) -> list[Chunk]:
|
|
55
|
+
'''Read records from the datastore using text-based similarity search with optional filtering.
|
|
56
|
+
|
|
57
|
+
Usage Example:
|
|
58
|
+
```python
|
|
59
|
+
from gllm_datastore.core.filters import filter as F
|
|
60
|
+
|
|
61
|
+
# Direct FilterClause usage
|
|
62
|
+
await vector_capability.retrieve(
|
|
63
|
+
query="What is the capital of France?",
|
|
64
|
+
filters=F.eq("metadata.category", "tech"),
|
|
65
|
+
options=QueryOptions(limit=2),
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# Multiple filters
|
|
69
|
+
filters = F.and_(F.eq("metadata.source", "wikipedia"), F.eq("metadata.category", "tech"))
|
|
70
|
+
await vector_capability.retrieve(
|
|
71
|
+
query="What is the capital of France?",
|
|
72
|
+
filters=filters,
|
|
73
|
+
options=QueryOptions(limit=2),
|
|
74
|
+
)
|
|
75
|
+
```
|
|
76
|
+
This will retrieve the top 2 chunks by similarity score from the vector store
|
|
77
|
+
that match the query and the filters. The chunks will be sorted by score in descending order.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
query (str): Input text to embed and search with.
|
|
81
|
+
filters (FilterClause | QueryFilter | None, optional): Query filters to apply.
|
|
82
|
+
FilterClause objects are automatically converted to QueryFilter internally.
|
|
83
|
+
Defaults to None.
|
|
84
|
+
options (QueryOptions | None, optional): Query options like limit and sorting.
|
|
85
|
+
Defaults to None, in which case, no sorting is applied and top 10 chunks are returned.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
list[Chunk]: Top ranked chunks by similarity score.
|
|
89
|
+
'''
|
|
90
|
+
async def retrieve_by_vector(self, vector: Vector, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None) -> list[Chunk]:
|
|
91
|
+
"""Direct vector similarity search.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
vector (Vector): Query embedding vector.
|
|
95
|
+
filters (FilterClause | QueryFilter | None, optional): Query filters to apply.
|
|
96
|
+
FilterClause objects are automatically converted to QueryFilter internally.
|
|
97
|
+
Defaults to None.
|
|
98
|
+
options (QueryOptions | None, optional): Query options like limit and sorting.
|
|
99
|
+
Defaults to None, in which case, no sorting is applied and top 10 chunks are returned.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
list[Chunk]: List of chunks ordered by similarity score.
|
|
103
|
+
"""
|
|
104
|
+
async def update(self, update_values: dict[str, Any], filters: FilterClause | QueryFilter | None = None, **kwargs: Any) -> None:
|
|
105
|
+
'''Update existing records in the datastore.
|
|
106
|
+
|
|
107
|
+
Examples:
|
|
108
|
+
1. Update certain metadata of a chunk with specific filters.
|
|
109
|
+
```python
|
|
110
|
+
from gllm_datastore.core.filters import filter as F
|
|
111
|
+
|
|
112
|
+
# Direct FilterClause usage
|
|
113
|
+
await vector_capability.update(
|
|
114
|
+
update_values={"metadata": {"status": "published"}},
|
|
115
|
+
filters=F.eq("metadata.category", "tech"),
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
# Multiple filters
|
|
119
|
+
await vector_capability.update(
|
|
120
|
+
update_values={"metadata": {"status": "published"}},
|
|
121
|
+
filters=F.and_(F.eq("metadata.status", "draft"), F.eq("metadata.category", "tech")),
|
|
122
|
+
)
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
2. Update certain content of a chunk with specific id.
|
|
126
|
+
This will also regenerate the vector of the chunk.
|
|
127
|
+
```python
|
|
128
|
+
# Direct FilterClause usage
|
|
129
|
+
await vector_capability.update(
|
|
130
|
+
update_values={"content": "new_content"},
|
|
131
|
+
filters=F.eq("id", "unique_id"),
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
# Multiple filters
|
|
135
|
+
await vector_capability.update(
|
|
136
|
+
update_values={"content": "new_content"},
|
|
137
|
+
filters=F.and_(F.eq("id", "unique_id"), F.eq("metadata.category", "tech")),
|
|
138
|
+
)
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
update_values (dict[str, Any]): Values to update.
|
|
143
|
+
filters (FilterClause | QueryFilter | None, optional): Filters to select records to update.
|
|
144
|
+
FilterClause objects are automatically converted to QueryFilter internally.
|
|
145
|
+
Defaults to None, in which case no operation is performed (no-op).
|
|
146
|
+
**kwargs: Datastore-specific parameters.
|
|
147
|
+
|
|
148
|
+
Raises:
|
|
149
|
+
ValueError: If content is empty.
|
|
150
|
+
'''
|
|
151
|
+
async def delete(self, filters: FilterClause | QueryFilter | None = None) -> None:
|
|
152
|
+
'''Delete records from the datastore.
|
|
153
|
+
|
|
154
|
+
Usage Example:
|
|
155
|
+
```python
|
|
156
|
+
from gllm_datastore.core.filters import filter as F
|
|
157
|
+
|
|
158
|
+
# Direct FilterClause usage
|
|
159
|
+
await vector_capability.delete(filters=F.eq("metadata.category", "AI"))
|
|
160
|
+
|
|
161
|
+
# Multiple filters
|
|
162
|
+
await vector_capability.delete(
|
|
163
|
+
filters=F.and_(F.eq("metadata.category", "AI"), F.eq("metadata.status", "published")),
|
|
164
|
+
)
|
|
165
|
+
```
|
|
166
|
+
This will delete all chunks from the vector store that match the filters.
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
filters (FilterClause | QueryFilter | None, optional): Filters to select records to delete.
|
|
170
|
+
FilterClause objects are automatically converted to QueryFilter internally.
|
|
171
|
+
Defaults to None, in which case no operation is performed (no-op).
|
|
172
|
+
'''
|
|
173
|
+
async def clear(self) -> None:
|
|
174
|
+
"""Clear all vectors from the store."""
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
from gllm_datastore.data_store.redis.data_store import RedisDataStore as RedisDataStore
|
|
2
|
+
from gllm_datastore.data_store.redis.fulltext import RedisFulltextCapability as RedisFulltextCapability
|
|
3
|
+
from gllm_datastore.data_store.redis.vector import RedisVectorCapability as RedisVectorCapability
|
|
4
|
+
|
|
5
|
+
__all__ = ['RedisDataStore', 'RedisFulltextCapability', 'RedisVectorCapability']
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
from gllm_datastore.core.filters.schema import FilterClause as FilterClause, QueryFilter as QueryFilter
|
|
3
|
+
from gllm_datastore.data_store.base import BaseDataStore as BaseDataStore, CapabilityType as CapabilityType
|
|
4
|
+
from gllm_datastore.data_store.redis.fulltext import RedisFulltextCapability as RedisFulltextCapability
|
|
5
|
+
from gllm_datastore.data_store.redis.query import get_filterable_fields_from_index as get_filterable_fields_from_index
|
|
6
|
+
from gllm_datastore.data_store.redis.query_translator import RedisQueryTranslator as RedisQueryTranslator
|
|
7
|
+
from gllm_datastore.data_store.redis.vector import RedisVectorCapability as RedisVectorCapability
|
|
8
|
+
from gllm_inference.em_invoker.em_invoker import BaseEMInvoker
|
|
9
|
+
from redis.asyncio.client import Redis
|
|
10
|
+
|
|
11
|
+
class RedisDataStore(BaseDataStore):
|
|
12
|
+
"""Redis data store with fulltext capability support.
|
|
13
|
+
|
|
14
|
+
Attributes:
|
|
15
|
+
index_name (str): Name for the Redis index.
|
|
16
|
+
client (Redis): Redis client instance.
|
|
17
|
+
"""
|
|
18
|
+
client: Incomplete
|
|
19
|
+
index_name: Incomplete
|
|
20
|
+
def __init__(self, index_name: str, url: str | None = None, client: Redis | None = None) -> None:
|
|
21
|
+
"""Initialize the Redis data store.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
index_name (str): Name of the Redis index to use.
|
|
25
|
+
url (str | None, optional): URL for Redis connection. Defaults to None.
|
|
26
|
+
Format: redis://[[username]:[password]]@host:port/database
|
|
27
|
+
client (Redis | None, optional): Redis client instance to use. Defaults to None.
|
|
28
|
+
in which case the url parameter will be used to create a new Redis client.
|
|
29
|
+
|
|
30
|
+
Raises:
|
|
31
|
+
ValueError: If neither `url` nor `client` is provided, or if URL is invalid.
|
|
32
|
+
TypeError: If `client` is not a Redis instance.
|
|
33
|
+
ConnectionError: If Redis connection fails.
|
|
34
|
+
"""
|
|
35
|
+
@property
|
|
36
|
+
def supported_capabilities(self) -> list[CapabilityType]:
|
|
37
|
+
"""Return list of currently supported capabilities.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
list[CapabilityType]: List of capability names that are supported.
|
|
41
|
+
"""
|
|
42
|
+
@property
|
|
43
|
+
def fulltext(self) -> RedisFulltextCapability:
|
|
44
|
+
"""Access fulltext capability if registered.
|
|
45
|
+
|
|
46
|
+
This method uses the logic of its parent class to return the fulltext capability handler.
|
|
47
|
+
This method overrides the parent class to return the RedisFulltextCapability handler for better
|
|
48
|
+
type hinting.
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
RedisFulltextCapability: Fulltext capability handler.
|
|
52
|
+
|
|
53
|
+
Raises:
|
|
54
|
+
NotRegisteredException: If fulltext capability is not registered.
|
|
55
|
+
"""
|
|
56
|
+
@property
|
|
57
|
+
def vector(self) -> RedisVectorCapability:
|
|
58
|
+
"""Access vector capability if registered.
|
|
59
|
+
|
|
60
|
+
This method uses the logic of its parent class to return the vector capability handler.
|
|
61
|
+
This method overrides the parent class to return the RedisVectorCapability handler for better
|
|
62
|
+
type hinting.
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
RedisVectorCapability: Vector capability handler.
|
|
66
|
+
|
|
67
|
+
Raises:
|
|
68
|
+
NotRegisteredException: If vector capability is not registered.
|
|
69
|
+
"""
|
|
70
|
+
def with_fulltext(self, index_name: str | None = None) -> RedisDataStore:
|
|
71
|
+
"""Configure fulltext capability and return datastore instance.
|
|
72
|
+
|
|
73
|
+
Schema will be automatically inferred from chunks when creating a new index,
|
|
74
|
+
or auto-detected from an existing index when performing operations.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
index_name (str | None, optional): The name of the Redis index. Defaults to None,
|
|
78
|
+
in which case the default class attribute will be utilized.
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
RedisDataStore: RedisDataStore instance for method chaining.
|
|
82
|
+
"""
|
|
83
|
+
def with_vector(self, em_invoker: BaseEMInvoker, index_name: str | None = None) -> RedisDataStore:
|
|
84
|
+
"""Configure vector capability and return datastore instance.
|
|
85
|
+
|
|
86
|
+
Schema will be automatically inferred from chunks when creating a new index,
|
|
87
|
+
or auto-detected from an existing index when performing operations.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
em_invoker (BaseEMInvoker): The embedding model to perform vectorization.
|
|
91
|
+
index_name (str | None, optional): The name of the Redis index. Defaults to None,
|
|
92
|
+
in which case the default class attribute will be utilized.
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
RedisDataStore: RedisDataStore instance for method chaining.
|
|
96
|
+
|
|
97
|
+
Raises:
|
|
98
|
+
ValueError: If em_invoker is not provided.
|
|
99
|
+
"""
|
|
100
|
+
def translate_query_filter(self, query_filter: FilterClause | QueryFilter | None = None) -> str | None:
|
|
101
|
+
'''Translate QueryFilter or FilterClause to Redis native filter syntax.
|
|
102
|
+
|
|
103
|
+
This method delegates to the existing RedisQueryTranslator in the
|
|
104
|
+
redis.query_translator module and returns the result as a string.
|
|
105
|
+
It uses the instance\'s index_name and client to detect field types
|
|
106
|
+
for accurate Redis Search query syntax.
|
|
107
|
+
|
|
108
|
+
Examples:
|
|
109
|
+
```python
|
|
110
|
+
from gllm_datastore.core.filters import filter as F
|
|
111
|
+
|
|
112
|
+
# Create datastore instance
|
|
113
|
+
datastore = RedisDataStore(index_name="my_index", url="redis://localhost:6379")
|
|
114
|
+
|
|
115
|
+
# Single FilterClause (field types detected from index schema)
|
|
116
|
+
clause = F.eq("metadata.status", "active")
|
|
117
|
+
result = datastore.translate_query_filter(clause)
|
|
118
|
+
# Returns: "@metadata_status:{active}" if status is a TAG field
|
|
119
|
+
# Returns: "@metadata_status:active" if status is a TEXT field
|
|
120
|
+
|
|
121
|
+
# QueryFilter with multiple clauses (AND condition)
|
|
122
|
+
filter_obj = F.and_(
|
|
123
|
+
F.eq("metadata.status", "active"),
|
|
124
|
+
F.gt("metadata.age", 25),
|
|
125
|
+
)
|
|
126
|
+
result = datastore.translate_query_filter(filter_obj)
|
|
127
|
+
# Returns: "@metadata_status:{active} @metadata_age:[(25 +inf]"
|
|
128
|
+
|
|
129
|
+
# QueryFilter with OR condition
|
|
130
|
+
filter_obj = F.or_(
|
|
131
|
+
F.eq("metadata.status", "active"),
|
|
132
|
+
F.eq("metadata.status", "pending"),
|
|
133
|
+
)
|
|
134
|
+
result = datastore.translate_query_filter(filter_obj)
|
|
135
|
+
# Returns: "@metadata_status:{active} | @metadata_status:{pending}"
|
|
136
|
+
|
|
137
|
+
# IN operator (produces parentheses syntax)
|
|
138
|
+
filter_obj = F.in_("metadata.category", ["tech", "science"])
|
|
139
|
+
result = datastore.translate_query_filter(filter_obj)
|
|
140
|
+
# Returns: "@metadata_category:(tech|science)"
|
|
141
|
+
|
|
142
|
+
# Empty filter returns None
|
|
143
|
+
result = datastore.translate_query_filter(None)
|
|
144
|
+
# Returns: None
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
query_filter (FilterClause | QueryFilter | None, optional): The filter to translate.
|
|
149
|
+
Can be a single FilterClause, a QueryFilter with multiple clauses. Defaults to None.
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
str | None: The translated filter as a Redis Search query string.
|
|
153
|
+
Returns None if no filter is provided.
|
|
154
|
+
'''
|