gllm-datastore-binary 0.5.50__cp312-cp312-macosx_13_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gllm_datastore/__init__.pyi +0 -0
- gllm_datastore/cache/__init__.pyi +4 -0
- gllm_datastore/cache/base.pyi +84 -0
- gllm_datastore/cache/cache.pyi +137 -0
- gllm_datastore/cache/hybrid_cache/__init__.pyi +5 -0
- gllm_datastore/cache/hybrid_cache/file_system_hybrid_cache.pyi +50 -0
- gllm_datastore/cache/hybrid_cache/hybrid_cache.pyi +115 -0
- gllm_datastore/cache/hybrid_cache/in_memory_hybrid_cache.pyi +29 -0
- gllm_datastore/cache/hybrid_cache/key_matcher/__init__.pyi +5 -0
- gllm_datastore/cache/hybrid_cache/key_matcher/exact_key_matcher.pyi +44 -0
- gllm_datastore/cache/hybrid_cache/key_matcher/fuzzy_key_matcher.pyi +70 -0
- gllm_datastore/cache/hybrid_cache/key_matcher/key_matcher.pyi +60 -0
- gllm_datastore/cache/hybrid_cache/key_matcher/semantic_key_matcher.pyi +93 -0
- gllm_datastore/cache/hybrid_cache/redis_hybrid_cache.pyi +34 -0
- gllm_datastore/cache/hybrid_cache/utils.pyi +36 -0
- gllm_datastore/cache/utils.pyi +34 -0
- gllm_datastore/cache/vector_cache/__init__.pyi +0 -0
- gllm_datastore/cache/vector_cache/eviction_manager/__init__.pyi +0 -0
- gllm_datastore/cache/vector_cache/eviction_manager/asyncio_eviction_manager.pyi +48 -0
- gllm_datastore/cache/vector_cache/eviction_manager/eviction_manager.pyi +38 -0
- gllm_datastore/cache/vector_cache/eviction_strategy/__init__.pyi +0 -0
- gllm_datastore/cache/vector_cache/eviction_strategy/eviction_strategy.pyi +34 -0
- gllm_datastore/cache/vector_cache/eviction_strategy/ttl_eviction_strategy.pyi +34 -0
- gllm_datastore/cache/vector_cache/vector_cache.pyi +99 -0
- gllm_datastore/constants.pyi +66 -0
- gllm_datastore/core/__init__.pyi +7 -0
- gllm_datastore/core/capabilities/__init__.pyi +7 -0
- gllm_datastore/core/capabilities/encryption_capability.pyi +21 -0
- gllm_datastore/core/capabilities/fulltext_capability.pyi +73 -0
- gllm_datastore/core/capabilities/graph_capability.pyi +70 -0
- gllm_datastore/core/capabilities/hybrid_capability.pyi +184 -0
- gllm_datastore/core/capabilities/vector_capability.pyi +90 -0
- gllm_datastore/core/filters/__init__.pyi +4 -0
- gllm_datastore/core/filters/filter.pyi +340 -0
- gllm_datastore/core/filters/schema.pyi +149 -0
- gllm_datastore/data_store/__init__.pyi +8 -0
- gllm_datastore/data_store/_elastic_core/__init__.pyi +0 -0
- gllm_datastore/data_store/_elastic_core/client_factory.pyi +66 -0
- gllm_datastore/data_store/_elastic_core/constants.pyi +27 -0
- gllm_datastore/data_store/_elastic_core/elastic_like_core.pyi +115 -0
- gllm_datastore/data_store/_elastic_core/index_manager.pyi +37 -0
- gllm_datastore/data_store/_elastic_core/query_translator.pyi +89 -0
- gllm_datastore/data_store/base.pyi +176 -0
- gllm_datastore/data_store/chroma/__init__.pyi +4 -0
- gllm_datastore/data_store/chroma/_chroma_import.pyi +13 -0
- gllm_datastore/data_store/chroma/data_store.pyi +201 -0
- gllm_datastore/data_store/chroma/fulltext.pyi +134 -0
- gllm_datastore/data_store/chroma/query.pyi +266 -0
- gllm_datastore/data_store/chroma/query_translator.pyi +41 -0
- gllm_datastore/data_store/chroma/vector.pyi +197 -0
- gllm_datastore/data_store/elasticsearch/__init__.pyi +5 -0
- gllm_datastore/data_store/elasticsearch/data_store.pyi +147 -0
- gllm_datastore/data_store/elasticsearch/fulltext.pyi +238 -0
- gllm_datastore/data_store/elasticsearch/query.pyi +118 -0
- gllm_datastore/data_store/elasticsearch/query_translator.pyi +18 -0
- gllm_datastore/data_store/elasticsearch/vector.pyi +180 -0
- gllm_datastore/data_store/exceptions.pyi +35 -0
- gllm_datastore/data_store/in_memory/__init__.pyi +5 -0
- gllm_datastore/data_store/in_memory/data_store.pyi +71 -0
- gllm_datastore/data_store/in_memory/fulltext.pyi +131 -0
- gllm_datastore/data_store/in_memory/query.pyi +175 -0
- gllm_datastore/data_store/in_memory/vector.pyi +174 -0
- gllm_datastore/data_store/opensearch/__init__.pyi +5 -0
- gllm_datastore/data_store/opensearch/data_store.pyi +160 -0
- gllm_datastore/data_store/opensearch/fulltext.pyi +240 -0
- gllm_datastore/data_store/opensearch/query.pyi +89 -0
- gllm_datastore/data_store/opensearch/query_translator.pyi +18 -0
- gllm_datastore/data_store/opensearch/vector.pyi +211 -0
- gllm_datastore/data_store/redis/__init__.pyi +5 -0
- gllm_datastore/data_store/redis/data_store.pyi +153 -0
- gllm_datastore/data_store/redis/fulltext.pyi +128 -0
- gllm_datastore/data_store/redis/query.pyi +428 -0
- gllm_datastore/data_store/redis/query_translator.pyi +37 -0
- gllm_datastore/data_store/redis/vector.pyi +131 -0
- gllm_datastore/data_store/sql/__init__.pyi +4 -0
- gllm_datastore/data_store/sql/constants.pyi +5 -0
- gllm_datastore/data_store/sql/data_store.pyi +201 -0
- gllm_datastore/data_store/sql/fulltext.pyi +164 -0
- gllm_datastore/data_store/sql/query.pyi +81 -0
- gllm_datastore/data_store/sql/query_translator.pyi +51 -0
- gllm_datastore/data_store/sql/schema.pyi +16 -0
- gllm_datastore/encryptor/__init__.pyi +4 -0
- gllm_datastore/encryptor/aes_gcm_encryptor.pyi +45 -0
- gllm_datastore/encryptor/capability/__init__.pyi +3 -0
- gllm_datastore/encryptor/capability/mixin.pyi +32 -0
- gllm_datastore/encryptor/encryptor.pyi +52 -0
- gllm_datastore/encryptor/key_ring/__init__.pyi +3 -0
- gllm_datastore/encryptor/key_ring/in_memory_key_ring.pyi +52 -0
- gllm_datastore/encryptor/key_ring/key_ring.pyi +45 -0
- gllm_datastore/encryptor/key_rotating_encryptor.pyi +60 -0
- gllm_datastore/graph_data_store/__init__.pyi +6 -0
- gllm_datastore/graph_data_store/graph_data_store.pyi +151 -0
- gllm_datastore/graph_data_store/graph_rag_data_store.pyi +29 -0
- gllm_datastore/graph_data_store/light_rag_data_store.pyi +93 -0
- gllm_datastore/graph_data_store/light_rag_postgres_data_store.pyi +96 -0
- gllm_datastore/graph_data_store/llama_index_graph_rag_data_store.pyi +49 -0
- gllm_datastore/graph_data_store/llama_index_neo4j_graph_rag_data_store.pyi +78 -0
- gllm_datastore/graph_data_store/mixins/__init__.pyi +3 -0
- gllm_datastore/graph_data_store/mixins/agentic_graph_tools_mixin.pyi +175 -0
- gllm_datastore/graph_data_store/nebula_graph_data_store.pyi +206 -0
- gllm_datastore/graph_data_store/neo4j_graph_data_store.pyi +182 -0
- gllm_datastore/graph_data_store/schema.pyi +27 -0
- gllm_datastore/graph_data_store/utils/__init__.pyi +6 -0
- gllm_datastore/graph_data_store/utils/constants.pyi +21 -0
- gllm_datastore/graph_data_store/utils/light_rag_em_invoker_adapter.pyi +56 -0
- gllm_datastore/graph_data_store/utils/light_rag_lm_invoker_adapter.pyi +43 -0
- gllm_datastore/graph_data_store/utils/llama_index_em_invoker_adapter.pyi +45 -0
- gllm_datastore/graph_data_store/utils/llama_index_lm_invoker_adapter.pyi +169 -0
- gllm_datastore/signature/__init__.pyi +0 -0
- gllm_datastore/signature/webhook_signature.pyi +31 -0
- gllm_datastore/sql_data_store/__init__.pyi +4 -0
- gllm_datastore/sql_data_store/adapter/__init__.pyi +0 -0
- gllm_datastore/sql_data_store/adapter/sqlalchemy_adapter.pyi +38 -0
- gllm_datastore/sql_data_store/constants.pyi +6 -0
- gllm_datastore/sql_data_store/sql_data_store.pyi +86 -0
- gllm_datastore/sql_data_store/sqlalchemy_sql_data_store.pyi +216 -0
- gllm_datastore/sql_data_store/types.pyi +31 -0
- gllm_datastore/utils/__init__.pyi +6 -0
- gllm_datastore/utils/converter.pyi +51 -0
- gllm_datastore/utils/dict.pyi +21 -0
- gllm_datastore/utils/ttl.pyi +25 -0
- gllm_datastore/utils/types.pyi +32 -0
- gllm_datastore/vector_data_store/__init__.pyi +6 -0
- gllm_datastore/vector_data_store/chroma_vector_data_store.pyi +259 -0
- gllm_datastore/vector_data_store/elasticsearch_vector_data_store.pyi +357 -0
- gllm_datastore/vector_data_store/in_memory_vector_data_store.pyi +179 -0
- gllm_datastore/vector_data_store/mixin/__init__.pyi +0 -0
- gllm_datastore/vector_data_store/mixin/cache_compatible_mixin.pyi +145 -0
- gllm_datastore/vector_data_store/redis_vector_data_store.pyi +191 -0
- gllm_datastore/vector_data_store/vector_data_store.pyi +146 -0
- gllm_datastore.build/.gitignore +1 -0
- gllm_datastore.cpython-312-darwin.so +0 -0
- gllm_datastore.pyi +178 -0
- gllm_datastore_binary-0.5.50.dist-info/METADATA +185 -0
- gllm_datastore_binary-0.5.50.dist-info/RECORD +137 -0
- gllm_datastore_binary-0.5.50.dist-info/WHEEL +5 -0
- gllm_datastore_binary-0.5.50.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
from gllm_datastore.core.filters import FilterClause as FilterClause, QueryFilter as QueryFilter
|
|
3
|
+
from gllm_datastore.data_store.base import BaseDataStore as BaseDataStore, CapabilityType as CapabilityType
|
|
4
|
+
from gllm_datastore.data_store.sql.fulltext import SQLFulltextCapability as SQLFulltextCapability
|
|
5
|
+
from gllm_datastore.data_store.sql.query_translator import SQLQueryTranslator as SQLQueryTranslator
|
|
6
|
+
from gllm_datastore.data_store.sql.schema import Base as Base
|
|
7
|
+
from sqlalchemy import Table as Table, URL
|
|
8
|
+
from sqlalchemy.ext.asyncio import AsyncEngine
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
class SQLDataStore(BaseDataStore):
|
|
12
|
+
'''SQL data store with multiple capability support using async SQLAlchemy.
|
|
13
|
+
|
|
14
|
+
This data store follows the "one instance = one table" pattern. Each instance
|
|
15
|
+
operates on a single table specified at construction time. To work with multiple
|
|
16
|
+
tables, create multiple instances sharing the same engine.
|
|
17
|
+
|
|
18
|
+
Attributes:
|
|
19
|
+
engine (AsyncEngine): SQLAlchemy async engine instance. Can be shared across
|
|
20
|
+
multiple SQLDataStore instances for different tables with single connection pool.
|
|
21
|
+
table_name (str): Name of the table this instance operates on. This is immutable
|
|
22
|
+
after construction and defines the scope of all operations.
|
|
23
|
+
'''
|
|
24
|
+
engine: Incomplete
|
|
25
|
+
table_name: Incomplete
|
|
26
|
+
def __init__(self, engine_or_url: AsyncEngine | str | URL, pool_size: int = 10, max_overflow: int = 10, table_name: str = 'chunks', **engine_kwargs: Any) -> None:
|
|
27
|
+
'''Initialize the SQL data store with async support.
|
|
28
|
+
|
|
29
|
+
This creates a data store instance scoped to a single table. Each instance
|
|
30
|
+
operates exclusively on the table specified by `table_name`. To work with
|
|
31
|
+
multiple tables, create multiple instances sharing the same engine.
|
|
32
|
+
|
|
33
|
+
Examples:
|
|
34
|
+
```python
|
|
35
|
+
# Single table usage
|
|
36
|
+
datastore = SQLDataStore(
|
|
37
|
+
engine_or_url="postgresql+asyncpg://user:pass@localhost/mydb",
|
|
38
|
+
table_name="chunks"
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
# Multiple tables with shared engine (recommended pattern)
|
|
42
|
+
# Both stores share the same connection pool
|
|
43
|
+
engine = create_async_engine("postgresql+asyncpg://user:pass@localhost/mydb")
|
|
44
|
+
chunks_store = SQLDataStore(engine, table_name="chunks")
|
|
45
|
+
users_store = SQLDataStore(engine, table_name="users")
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
engine_or_url (AsyncEngine | str | URL): AsyncEngine instance, database URL string, or URL object.
|
|
50
|
+
For async support, async drivers are automatically added if not specified:
|
|
51
|
+
1. PostgreSQL: "postgresql://..." -> "postgresql+asyncpg://..."
|
|
52
|
+
2. MySQL: "mysql://..." -> "mysql+aiomysql://..."
|
|
53
|
+
3. SQLite: "sqlite://..." -> "sqlite+aiosqlite://..."
|
|
54
|
+
If a driver is already specified (e.g., "postgresql+asyncpg://"), it is used as-is.
|
|
55
|
+
If an AsyncEngine is provided, it can be shared across multiple SQLDataStore instances.
|
|
56
|
+
pool_size (int): The size of the database connection pool. Defaults to 10.
|
|
57
|
+
Only used when creating a new engine from a URL. Ignored if AsyncEngine is provided.
|
|
58
|
+
max_overflow (int): The maximum overflow size of the pool. Defaults to 10.
|
|
59
|
+
Only used when creating a new engine from a URL. Ignored for SQLite or if AsyncEngine is provided.
|
|
60
|
+
table_name (str): Name of the table this instance will operate on. Defaults to "chunks".
|
|
61
|
+
This defines the scope of all operations for this instance and cannot be changed after construction.
|
|
62
|
+
**engine_kwargs (Any): Additional keyword arguments for create_async_engine.
|
|
63
|
+
Only used when creating a new engine from a URL.
|
|
64
|
+
|
|
65
|
+
Raises:
|
|
66
|
+
ValueError: If the database engine initialization fails or if engine_kwargs
|
|
67
|
+
contains pool-related parameters that conflict with pool_size/max_overflow.
|
|
68
|
+
'''
|
|
69
|
+
async def initialize(self) -> None:
|
|
70
|
+
'''Initialize the datastore by creating tables.
|
|
71
|
+
|
|
72
|
+
This method must be called after instantiation to set up the database schema.
|
|
73
|
+
|
|
74
|
+
Example:
|
|
75
|
+
```python
|
|
76
|
+
datastore = SQLDataStore(engine_or_url="sqlite+aiosqlite:///./data.db")
|
|
77
|
+
await datastore.initialize()
|
|
78
|
+
datastore.with_fulltext()
|
|
79
|
+
```
|
|
80
|
+
'''
|
|
81
|
+
async def close(self) -> None:
|
|
82
|
+
"""Close the database engine and clean up connections.
|
|
83
|
+
|
|
84
|
+
Example:
|
|
85
|
+
```python
|
|
86
|
+
await datastore.close()
|
|
87
|
+
```
|
|
88
|
+
"""
|
|
89
|
+
@property
|
|
90
|
+
def supported_capabilities(self) -> list[CapabilityType]:
|
|
91
|
+
"""Return list of currently supported capabilities.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
list[CapabilityType]: List of capability names that are supported.
|
|
95
|
+
"""
|
|
96
|
+
@property
|
|
97
|
+
def fulltext(self) -> SQLFulltextCapability:
|
|
98
|
+
"""Access fulltext capability if registered.
|
|
99
|
+
|
|
100
|
+
This method overrides the parent class to return SQLFulltextCapability for better type hinting.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
SQLFulltextCapability: Fulltext capability handler.
|
|
104
|
+
|
|
105
|
+
Raises:
|
|
106
|
+
NotRegisteredException: If fulltext capability is not registered.
|
|
107
|
+
"""
|
|
108
|
+
def with_fulltext(self) -> SQLDataStore:
|
|
109
|
+
'''Configure fulltext capability and return datastore instance.
|
|
110
|
+
|
|
111
|
+
Examples:
|
|
112
|
+
```python
|
|
113
|
+
# Enable fulltext
|
|
114
|
+
datastore.with_fulltext()
|
|
115
|
+
|
|
116
|
+
# For multiple tables with shared engine
|
|
117
|
+
engine = create_async_engine("postgresql+asyncpg://...")
|
|
118
|
+
chunks_store = SQLDataStore(engine, table_name="chunks")
|
|
119
|
+
users_store = SQLDataStore(engine, table_name="users")
|
|
120
|
+
chunks_store.with_fulltext()
|
|
121
|
+
users_store.with_fulltext()
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
SQLDataStore: Self for method chaining.
|
|
126
|
+
'''
|
|
127
|
+
@classmethod
|
|
128
|
+
def translate_query_filter(cls, query_filter: FilterClause | QueryFilter | None = None, table_name: str = 'chunks', engine_or_url: AsyncEngine | str | URL | None = None) -> str | None:
|
|
129
|
+
'''Translate QueryFilter or FilterClause to SQL WHERE clause string.
|
|
130
|
+
|
|
131
|
+
This method delegates to the SQLQueryTranslator and returns the result as a
|
|
132
|
+
SQL WHERE clause string that can be used in SQL queries. The table structure
|
|
133
|
+
is reflected from the database using the provided engine_or_url and table_name.
|
|
134
|
+
|
|
135
|
+
Examples:
|
|
136
|
+
```python
|
|
137
|
+
from gllm_datastore.core.filters import filter as F
|
|
138
|
+
|
|
139
|
+
# With database URL string
|
|
140
|
+
clause = F.eq("id", "test")
|
|
141
|
+
result = SQLDataStore.translate_query_filter(
|
|
142
|
+
clause,
|
|
143
|
+
table_name="chunks",
|
|
144
|
+
engine_or_url="postgresql://user:pass@localhost/mydb"
|
|
145
|
+
)
|
|
146
|
+
# Returns: "chunks.id = \'test\'"
|
|
147
|
+
|
|
148
|
+
# With AsyncEngine instance
|
|
149
|
+
from sqlalchemy.ext.asyncio import create_async_engine
|
|
150
|
+
engine = create_async_engine("postgresql+asyncpg://user:pass@localhost/mydb")
|
|
151
|
+
filter_obj = F.and_(
|
|
152
|
+
F.eq("id", "test"),
|
|
153
|
+
F.gt("chunk_metadata.age", 25),
|
|
154
|
+
)
|
|
155
|
+
result = SQLDataStore.translate_query_filter(
|
|
156
|
+
filter_obj,
|
|
157
|
+
table_name="chunks",
|
|
158
|
+
engine_or_url=engine
|
|
159
|
+
)
|
|
160
|
+
# Returns: "(chunks.id = \'test\' AND json_extract(chunks.chunk_metadata, \'$.age\') > 25)"
|
|
161
|
+
|
|
162
|
+
# QueryFilter with OR condition
|
|
163
|
+
filter_obj = F.or_(
|
|
164
|
+
F.eq("id", "test1"),
|
|
165
|
+
F.eq("id", "test2"),
|
|
166
|
+
)
|
|
167
|
+
result = SQLDataStore.translate_query_filter(
|
|
168
|
+
filter_obj,
|
|
169
|
+
table_name="chunks",
|
|
170
|
+
engine_or_url="sqlite:///./data.db"
|
|
171
|
+
)
|
|
172
|
+
# Returns: "(chunks.id = \'test1\' OR chunks.id = \'test2\')"
|
|
173
|
+
|
|
174
|
+
# Empty filter returns None
|
|
175
|
+
result = SQLDataStore.translate_query_filter(
|
|
176
|
+
None,
|
|
177
|
+
table_name="chunks",
|
|
178
|
+
engine_or_url="postgresql://user:pass@localhost/mydb"
|
|
179
|
+
)
|
|
180
|
+
# Returns: None
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
query_filter (FilterClause | QueryFilter): The filter to translate.
|
|
185
|
+
Can be a single FilterClause or a QueryFilter with multiple clauses.
|
|
186
|
+
table_name (str): Name of the table to reflect from the database. Defaults to "chunks".
|
|
187
|
+
engine_or_url (AsyncEngine | str | URL | None): AsyncEngine instance, database URL string,
|
|
188
|
+
or URL object for table reflection. Required. The table structure is reflected from the database.
|
|
189
|
+
For async support, async drivers are automatically added if not specified:
|
|
190
|
+
PostgreSQL -> postgresql+asyncpg://, MySQL -> mysql+aiomysql://,
|
|
191
|
+
SQLite -> sqlite+aiosqlite://.
|
|
192
|
+
|
|
193
|
+
Raises:
|
|
194
|
+
ValueError: If engine_or_url is None.
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
str: The translated filter as a SQL WHERE clause string.
|
|
198
|
+
|
|
199
|
+
Raises:
|
|
200
|
+
RuntimeError: If table reflection fails or table is not found in database.
|
|
201
|
+
'''
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
from gllm_core.schema.chunk import Chunk
|
|
3
|
+
from gllm_datastore.core.filters import FilterClause as FilterClause, QueryFilter as QueryFilter, QueryOptions as QueryOptions
|
|
4
|
+
from gllm_datastore.data_store.sql.query import execute_delete_with_filters as execute_delete_with_filters, execute_update_with_filters as execute_update_with_filters, row_to_chunk as row_to_chunk
|
|
5
|
+
from gllm_datastore.data_store.sql.query_translator import SQLQueryTranslator as SQLQueryTranslator
|
|
6
|
+
from gllm_datastore.data_store.sql.schema import ChunkModel as ChunkModel
|
|
7
|
+
from sqlalchemy.ext.asyncio import AsyncEngine
|
|
8
|
+
from sqlalchemy.orm import DeclarativeBase
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
class SQLFulltextCapability:
|
|
12
|
+
"""SQL implementation of FulltextCapability protocol using async SQLAlchemy.
|
|
13
|
+
|
|
14
|
+
This capability creates its own session factory from the engine, making it
|
|
15
|
+
self-contained and independent of the data store's session configuration.
|
|
16
|
+
|
|
17
|
+
Attributes:
|
|
18
|
+
engine (AsyncEngine): SQLAlchemy async engine instance.
|
|
19
|
+
session_factory (async_sessionmaker): Async session factory created from engine.
|
|
20
|
+
table_name (str): Name of the table this capability operates on. This is immutable
|
|
21
|
+
and defines the scope of all operations.
|
|
22
|
+
_table (Table | None): Cached SQLAlchemy Table object. Initialized lazily on first use.
|
|
23
|
+
_metadata (MetaData): SQLAlchemy metadata instance for table reflection.
|
|
24
|
+
"""
|
|
25
|
+
engine: Incomplete
|
|
26
|
+
table_name: Incomplete
|
|
27
|
+
session_factory: Incomplete
|
|
28
|
+
def __init__(self, engine: AsyncEngine, table_name: str) -> None:
|
|
29
|
+
"""Initialize the SQL fulltext capability with async support.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
engine (AsyncEngine): SQLAlchemy async engine instance.
|
|
33
|
+
table_name (str): Name of the table this capability will operate on.
|
|
34
|
+
This defines the scope of all operations and cannot be changed after initialization.
|
|
35
|
+
"""
|
|
36
|
+
async def create(self, data: Chunk | list[Chunk] | DeclarativeBase | list[DeclarativeBase]) -> None:
|
|
37
|
+
'''Create new records in the datastore.
|
|
38
|
+
|
|
39
|
+
This method accepts both Chunk and DeclarativeBase instances.
|
|
40
|
+
1. If data is a Chunk, it will be converted to a ChunkModel instance with flattened metadata.
|
|
41
|
+
2. If data is a DeclarativeBase, it will be added directly to the database.
|
|
42
|
+
|
|
43
|
+
Examples:
|
|
44
|
+
1. Create a single record using a Chunk.
|
|
45
|
+
```python
|
|
46
|
+
# Create a single chunk
|
|
47
|
+
chunk = Chunk(id="1", content="Test content", metadata={"source": "test"})
|
|
48
|
+
await datastore.fulltext.create(chunk)
|
|
49
|
+
|
|
50
|
+
# Bulk create
|
|
51
|
+
chunks = [
|
|
52
|
+
Chunk(id=str(i), content=f"Test content {i}", metadata={"source": "test"})
|
|
53
|
+
for i in range(10)
|
|
54
|
+
]
|
|
55
|
+
await datastore.fulltext.create(chunks)
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
2. Create a single record using a declarative base model.
|
|
59
|
+
```python
|
|
60
|
+
# Create a single user
|
|
61
|
+
user = UserModel(id="1", name="John", email="john@example.com")
|
|
62
|
+
await datastore.fulltext.create(user)
|
|
63
|
+
|
|
64
|
+
# Bulk create
|
|
65
|
+
users = [UserModel(id=str(i), name=f"User{i}") for i in range(10)]
|
|
66
|
+
await datastore.fulltext.create(users)
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
data (Chunk | list[Chunk] | DeclarativeBase | list[DeclarativeBase]):
|
|
71
|
+
Data to create (single item or collection). Chunk instances will be converted
|
|
72
|
+
to ChunkModel. DeclarativeBase instances will be added directly to the database.
|
|
73
|
+
|
|
74
|
+
Raises:
|
|
75
|
+
RuntimeError: If the creation fails.
|
|
76
|
+
'''
|
|
77
|
+
async def retrieve(self, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None) -> list[Chunk]:
|
|
78
|
+
'''Read records from the datastore with optional filtering.
|
|
79
|
+
|
|
80
|
+
This method operates on this capability\'s table (specified at initialization).
|
|
81
|
+
It returns Chunk objects matching the filters and options.
|
|
82
|
+
|
|
83
|
+
Examples:
|
|
84
|
+
```python
|
|
85
|
+
# Retrieve all records from the instance\'s table
|
|
86
|
+
chunks = await datastore.fulltext.retrieve()
|
|
87
|
+
|
|
88
|
+
# Retrieve with filters
|
|
89
|
+
chunks = await datastore.fulltext.retrieve(
|
|
90
|
+
filters=F.eq("status", "active")
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# Retrieve with options (using CHUNK_KEYS.ID for column name)
|
|
94
|
+
chunks = await datastore.fulltext.retrieve(
|
|
95
|
+
options=QueryOptions(order_by=CHUNK_KEYS.ID, order_desc=True, limit=10)
|
|
96
|
+
)
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
filters (FilterClause | QueryFilter | None, optional): Query filters to apply.
|
|
101
|
+
Defaults to None.
|
|
102
|
+
options (QueryOptions | None, optional): Query options for sorting and pagination.
|
|
103
|
+
Defaults to None.
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
list[Chunk]: List of Chunk objects from this capability\'s table.
|
|
107
|
+
|
|
108
|
+
Raises:
|
|
109
|
+
RuntimeError: If the read operation fails.
|
|
110
|
+
'''
|
|
111
|
+
async def update(self, update_values: dict[str, Any], filters: FilterClause | QueryFilter | None = None) -> None:
|
|
112
|
+
'''Update existing records in the datastore.
|
|
113
|
+
|
|
114
|
+
This method operates on this capability\'s table (specified at initialization).
|
|
115
|
+
Updates records matching the filters with the provided values.
|
|
116
|
+
|
|
117
|
+
Examples:
|
|
118
|
+
```python
|
|
119
|
+
# Update all records (no filters)
|
|
120
|
+
await datastore.fulltext.update(
|
|
121
|
+
{"status": "active"}
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
# Update with filters (using CHUNK_KEYS constants)
|
|
125
|
+
await datastore.fulltext.update(
|
|
126
|
+
{CHUNK_KEYS.CONTENT: "Updated content"},
|
|
127
|
+
filters=F.eq(CHUNK_KEYS.ID, "chunk_123")
|
|
128
|
+
)
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
update_values (dict[str, Any]): Mapping of fields to new values to apply.
|
|
133
|
+
filters (FilterClause | QueryFilter | None, optional): Filters to select records to update.
|
|
134
|
+
Defaults to None. If None, no records will be updated (safety measure).
|
|
135
|
+
|
|
136
|
+
Raises:
|
|
137
|
+
RuntimeError: If the update operation fails.
|
|
138
|
+
'''
|
|
139
|
+
async def delete(self, filters: FilterClause | QueryFilter | None = None) -> None:
|
|
140
|
+
'''Delete records from the datastore.
|
|
141
|
+
|
|
142
|
+
This method operates on this capability\'s table (specified at initialization).
|
|
143
|
+
Deletes records matching the provided filters.
|
|
144
|
+
|
|
145
|
+
Examples:
|
|
146
|
+
```python
|
|
147
|
+
# Delete with filters (using CHUNK_KEYS.ID)
|
|
148
|
+
await datastore.fulltext.delete(
|
|
149
|
+
filters=F.eq(CHUNK_KEYS.ID, "chunk_123")
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
# Delete multiple records
|
|
153
|
+
await datastore.fulltext.delete(
|
|
154
|
+
filters=F.in_("status", ["deleted", "archived"])
|
|
155
|
+
)
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
filters (FilterClause | QueryFilter | None, optional): Filters to select records to delete.
|
|
160
|
+
Defaults to None. If None, no records will be deleted (safety measure).
|
|
161
|
+
|
|
162
|
+
Raises:
|
|
163
|
+
RuntimeError: If the delete operation fails.
|
|
164
|
+
'''
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
from gllm_core.schema.chunk import Chunk
|
|
2
|
+
from gllm_datastore.core.filters import QueryFilter as QueryFilter
|
|
3
|
+
from gllm_datastore.data_store.sql.constants import SQL_COLUMNS as SQL_COLUMNS
|
|
4
|
+
from gllm_datastore.data_store.sql.query_translator import SQLQueryTranslator as SQLQueryTranslator
|
|
5
|
+
from sqlalchemy import Table
|
|
6
|
+
from sqlalchemy.engine import Row
|
|
7
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
def row_to_chunk(row: Row) -> Chunk:
|
|
11
|
+
"""Convert a database row to a Chunk object.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
row (Row): Database row with _mapping attribute.
|
|
15
|
+
|
|
16
|
+
Returns:
|
|
17
|
+
Chunk: Chunk object created from the row data.
|
|
18
|
+
"""
|
|
19
|
+
async def execute_select(session: AsyncSession, table: Table, filters: QueryFilter | None) -> list[Row]:
|
|
20
|
+
"""Execute a select query on a table with filters.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
session (AsyncSession): Database session.
|
|
24
|
+
table (Table): SQLAlchemy table.
|
|
25
|
+
filters (QueryFilter | None): Query filters.
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
list[Row]: List of result rows.
|
|
29
|
+
"""
|
|
30
|
+
async def execute_update(session: AsyncSession, table: Table, update_values: dict[str, Any], rows: list[Row]) -> None:
|
|
31
|
+
"""Execute update operations on table rows.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
session (AsyncSession): Database session.
|
|
35
|
+
table (Table): SQLAlchemy table.
|
|
36
|
+
update_values (dict[str, Any]): Values to update.
|
|
37
|
+
rows (list[Row]): Rows to update.
|
|
38
|
+
|
|
39
|
+
Raises:
|
|
40
|
+
ValueError: If table has no primary key.
|
|
41
|
+
"""
|
|
42
|
+
async def execute_delete(session: AsyncSession, table: Table, rows: list[Row]) -> None:
|
|
43
|
+
"""Execute delete operations on table rows.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
session (AsyncSession): Database session.
|
|
47
|
+
table (Table): SQLAlchemy table.
|
|
48
|
+
rows (list[Row]): Rows to delete.
|
|
49
|
+
|
|
50
|
+
Raises:
|
|
51
|
+
ValueError: If table has no primary key.
|
|
52
|
+
"""
|
|
53
|
+
async def execute_update_with_filters(session: AsyncSession, table: Table, update_values: dict[str, Any], filters: QueryFilter | None) -> None:
|
|
54
|
+
"""Execute update operations using filters directly.
|
|
55
|
+
|
|
56
|
+
This function constructs a single atomic UPDATE statement with WHERE conditions
|
|
57
|
+
derived from the filters, avoiding the need to fetch rows first.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
session (AsyncSession): Database session.
|
|
61
|
+
table (Table): SQLAlchemy table.
|
|
62
|
+
update_values (dict[str, Any]): Values to update.
|
|
63
|
+
filters (QueryFilter | None): Query filters to apply as WHERE conditions.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
None: No return value.
|
|
67
|
+
"""
|
|
68
|
+
async def execute_delete_with_filters(session: AsyncSession, table: Table, filters: QueryFilter | None) -> None:
|
|
69
|
+
"""Execute delete operations using filters directly.
|
|
70
|
+
|
|
71
|
+
This function constructs a single atomic DELETE statement with WHERE conditions
|
|
72
|
+
derived from the filters, avoiding the need to fetch rows first.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
session (AsyncSession): Database session.
|
|
76
|
+
table (Table): SQLAlchemy table.
|
|
77
|
+
filters (QueryFilter | None): Query filters to apply as WHERE conditions.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
None: No return value.
|
|
81
|
+
"""
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
from gllm_datastore.core.filters import FilterClause as FilterClause, FilterCondition as FilterCondition, FilterOperator as FilterOperator, QueryFilter as QueryFilter, QueryOptions as QueryOptions
|
|
3
|
+
from sqlalchemy import Select, Table
|
|
4
|
+
from sqlalchemy.sql.expression import ColumnElement
|
|
5
|
+
|
|
6
|
+
class SQLQueryTranslator:
|
|
7
|
+
"""Translates QueryFilter and FilterClause objects to SQLAlchemy ColumnElement expressions.
|
|
8
|
+
|
|
9
|
+
This class encapsulates all query translation logic for SQL data stores.
|
|
10
|
+
It works with reflected Table objects (not DeclarativeBase models) and supports
|
|
11
|
+
both direct column access and JSON field paths.
|
|
12
|
+
"""
|
|
13
|
+
table: Incomplete
|
|
14
|
+
def __init__(self, table: Table) -> None:
|
|
15
|
+
"""Initialize the SQL query translator.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
table (Table): SQLAlchemy Table object for column resolution.
|
|
19
|
+
"""
|
|
20
|
+
def translate(self, filters: QueryFilter | None) -> ColumnElement | None:
|
|
21
|
+
"""Translate a structured QueryFilter into a SQLAlchemy ColumnElement expression.
|
|
22
|
+
|
|
23
|
+
This is the main entry point for filter translation. It handles None filters
|
|
24
|
+
and delegates to internal translation methods.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
filters (QueryFilter | None): Structured QueryFilter to translate. Defaults to None.
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
ColumnElement | None: A SQLAlchemy condition expression or None if no filters are provided.
|
|
31
|
+
"""
|
|
32
|
+
def apply_filters(self, query: Select, filters: QueryFilter | None) -> Select:
|
|
33
|
+
"""Apply filters to a SQLAlchemy Select query.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
query (Select): SQLAlchemy Select query.
|
|
37
|
+
filters (QueryFilter | None): Query filters to apply. Defaults to None.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
Select: Query with filters applied.
|
|
41
|
+
"""
|
|
42
|
+
def apply_options(self, query: Select, options: QueryOptions | None) -> Select:
|
|
43
|
+
"""Apply query options to a SQLAlchemy Select query.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
query (Select): SQLAlchemy Select query.
|
|
47
|
+
options (QueryOptions | None): Query options to apply. Defaults to None.
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
Select: Query with options applied.
|
|
51
|
+
"""
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
|
|
3
|
+
Base: Incomplete
|
|
4
|
+
|
|
5
|
+
class ChunkModel(Base):
|
|
6
|
+
"""SQLAlchemy model for the chunk table.
|
|
7
|
+
|
|
8
|
+
Attributes:
|
|
9
|
+
id (Column): The ID of the chunk.
|
|
10
|
+
content (Column): The content of the chunk.
|
|
11
|
+
chunk_metadata (Column): The metadata of the chunk stored as JSON.
|
|
12
|
+
"""
|
|
13
|
+
__tablename__: str
|
|
14
|
+
id: Incomplete
|
|
15
|
+
content: Incomplete
|
|
16
|
+
chunk_metadata: Incomplete
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
from gllm_datastore.encryptor.encryptor import BaseEncryptor as BaseEncryptor
|
|
3
|
+
|
|
4
|
+
KEY_LENGTH_BYTES: int
|
|
5
|
+
NONCE_LENGTH_BYTES: int
|
|
6
|
+
|
|
7
|
+
class AESGCMEncryptor(BaseEncryptor):
|
|
8
|
+
"""AES-GCM 256 Encryptor that accepts keys directly.
|
|
9
|
+
|
|
10
|
+
This class provides AES-GCM symmetric encryption and decryption methods
|
|
11
|
+
with a 256-bit key provided directly by the client.
|
|
12
|
+
|
|
13
|
+
Attributes:
|
|
14
|
+
key (bytes): 256-bit encryption key.
|
|
15
|
+
aesgcm (AESGCM): AES-GCM instance.
|
|
16
|
+
"""
|
|
17
|
+
key: Incomplete
|
|
18
|
+
aesgcm: Incomplete
|
|
19
|
+
def __init__(self, key: bytes) -> None:
|
|
20
|
+
"""Initialize AESGCMEncryptor with a direct key.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
key (bytes): 256-bit encryption key.
|
|
24
|
+
|
|
25
|
+
Raises:
|
|
26
|
+
ValueError: If key length is not 256 bits.
|
|
27
|
+
"""
|
|
28
|
+
def encrypt(self, plaintext: str) -> str:
|
|
29
|
+
"""Encrypts the plaintext using AES-GCM with a random nonce.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
plaintext (str): The plaintext data to be encrypted.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
str: The encrypted data, encoded in base64 format.
|
|
36
|
+
"""
|
|
37
|
+
def decrypt(self, ciphertext: str) -> str:
|
|
38
|
+
"""Decrypts the AES-GCM ciphertext.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
ciphertext (str): The ciphertext in base64 format to be decrypted.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
str: The decrypted plaintext data.
|
|
45
|
+
"""
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
from gllm_datastore.constants import CHUNK_KEYS as CHUNK_KEYS
|
|
3
|
+
from gllm_datastore.encryptor.encryptor import BaseEncryptor as BaseEncryptor
|
|
4
|
+
|
|
5
|
+
class EncryptionCapabilityMixin:
|
|
6
|
+
"""Mixin implementation of EncryptionCapability with common encryption logic.
|
|
7
|
+
|
|
8
|
+
This class provides the shared encryption and decryption logic that is identical
|
|
9
|
+
across all backend implementations. Backend-specific encryption capabilities
|
|
10
|
+
should inherit from this class and add backend-specific initialization.
|
|
11
|
+
|
|
12
|
+
Attributes:
|
|
13
|
+
encryptor (BaseEncryptor): The encryptor instance to use for encryption/decryption.
|
|
14
|
+
"""
|
|
15
|
+
encryptor: Incomplete
|
|
16
|
+
def __init__(self, encryptor: BaseEncryptor, encrypted_fields: set[str]) -> None:
|
|
17
|
+
'''Initialize the encryption capability mixin.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
encryptor (BaseEncryptor): The encryptor instance to use for encryption.
|
|
21
|
+
encrypted_fields (set[str]): The set of fields to encrypt. Supports:
|
|
22
|
+
1. Content field: "content"
|
|
23
|
+
2. Metadata fields using dot notation: "metadata.secret_key", "metadata.secret_value"
|
|
24
|
+
Example: `{"content", "metadata.secret_key", "metadata.secret_value"}`
|
|
25
|
+
'''
|
|
26
|
+
@property
|
|
27
|
+
def encryption_config(self) -> set[str] | None:
|
|
28
|
+
"""Get the current encryption configuration.
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
set[str] | None: Set of encrypted field names.
|
|
32
|
+
"""
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
|
|
3
|
+
class BaseEncryptor(ABC):
|
|
4
|
+
"""Abstract base class defining the interface for encryption implementations.
|
|
5
|
+
|
|
6
|
+
This abstract base class ensures that all encryptors implement the required
|
|
7
|
+
encrypt and decrypt methods with consistent signatures.
|
|
8
|
+
|
|
9
|
+
Thread-safety requirement:
|
|
10
|
+
Implementations MUST be thread-safe. The client may
|
|
11
|
+
invoke `encrypt` and `decrypt` concurrently from multiple threads, so
|
|
12
|
+
any internal state (e.g., buffers, nonces, cipher instances) must be
|
|
13
|
+
protected or designed to avoid race conditions.
|
|
14
|
+
"""
|
|
15
|
+
@abstractmethod
|
|
16
|
+
def encrypt(self, plaintext: str) -> str:
|
|
17
|
+
"""Encrypt plain text into cipher text.
|
|
18
|
+
|
|
19
|
+
This method should be implemented by subclasses to provide the encryption functionality.
|
|
20
|
+
|
|
21
|
+
Note:
|
|
22
|
+
The implementation must be thread-safe and must not mutate shared state
|
|
23
|
+
without proper synchronization.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
plaintext (str): The raw plain text to encrypt.
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
str: The encrypted cipher text.
|
|
30
|
+
|
|
31
|
+
Raises:
|
|
32
|
+
NotImplementedError: If the method is not implemented by the subclass.
|
|
33
|
+
"""
|
|
34
|
+
@abstractmethod
|
|
35
|
+
def decrypt(self, ciphertext: str) -> str:
|
|
36
|
+
"""Decrypt cipher text back into plain text.
|
|
37
|
+
|
|
38
|
+
This method should be implemented by subclasses to provide the decryption functionality.
|
|
39
|
+
|
|
40
|
+
Note:
|
|
41
|
+
The implementation must be thread-safe and must not mutate shared state
|
|
42
|
+
without proper synchronization.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
ciphertext (str): The ciphertext to decrypt.
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
str: The decrypted plain text.
|
|
49
|
+
|
|
50
|
+
Raises:
|
|
51
|
+
NotImplementedError: If the method is not implemented by the subclass.
|
|
52
|
+
"""
|