gllm-datastore-binary 0.5.45__cp311-cp311-macosx_13_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gllm-datastore-binary might be problematic. Click here for more details.
- gllm_datastore/__init__.pyi +0 -0
- gllm_datastore/cache/__init__.pyi +4 -0
- gllm_datastore/cache/base.pyi +84 -0
- gllm_datastore/cache/cache.pyi +137 -0
- gllm_datastore/cache/hybrid_cache/__init__.pyi +5 -0
- gllm_datastore/cache/hybrid_cache/file_system_hybrid_cache.pyi +50 -0
- gllm_datastore/cache/hybrid_cache/hybrid_cache.pyi +115 -0
- gllm_datastore/cache/hybrid_cache/in_memory_hybrid_cache.pyi +29 -0
- gllm_datastore/cache/hybrid_cache/key_matcher/__init__.pyi +5 -0
- gllm_datastore/cache/hybrid_cache/key_matcher/exact_key_matcher.pyi +44 -0
- gllm_datastore/cache/hybrid_cache/key_matcher/fuzzy_key_matcher.pyi +70 -0
- gllm_datastore/cache/hybrid_cache/key_matcher/key_matcher.pyi +60 -0
- gllm_datastore/cache/hybrid_cache/key_matcher/semantic_key_matcher.pyi +93 -0
- gllm_datastore/cache/hybrid_cache/redis_hybrid_cache.pyi +34 -0
- gllm_datastore/cache/hybrid_cache/utils.pyi +36 -0
- gllm_datastore/cache/utils.pyi +34 -0
- gllm_datastore/cache/vector_cache/__init__.pyi +0 -0
- gllm_datastore/cache/vector_cache/eviction_manager/__init__.pyi +0 -0
- gllm_datastore/cache/vector_cache/eviction_manager/asyncio_eviction_manager.pyi +48 -0
- gllm_datastore/cache/vector_cache/eviction_manager/eviction_manager.pyi +38 -0
- gllm_datastore/cache/vector_cache/eviction_strategy/__init__.pyi +0 -0
- gllm_datastore/cache/vector_cache/eviction_strategy/eviction_strategy.pyi +34 -0
- gllm_datastore/cache/vector_cache/eviction_strategy/ttl_eviction_strategy.pyi +34 -0
- gllm_datastore/cache/vector_cache/vector_cache.pyi +99 -0
- gllm_datastore/constants.pyi +66 -0
- gllm_datastore/core/__init__.pyi +7 -0
- gllm_datastore/core/capabilities/__init__.pyi +5 -0
- gllm_datastore/core/capabilities/fulltext_capability.pyi +73 -0
- gllm_datastore/core/capabilities/graph_capability.pyi +70 -0
- gllm_datastore/core/capabilities/vector_capability.pyi +90 -0
- gllm_datastore/core/filters/__init__.pyi +4 -0
- gllm_datastore/core/filters/filter.pyi +340 -0
- gllm_datastore/core/filters/schema.pyi +149 -0
- gllm_datastore/data_store/__init__.pyi +7 -0
- gllm_datastore/data_store/base.pyi +138 -0
- gllm_datastore/data_store/chroma/__init__.pyi +4 -0
- gllm_datastore/data_store/chroma/_chroma_import.pyi +13 -0
- gllm_datastore/data_store/chroma/data_store.pyi +202 -0
- gllm_datastore/data_store/chroma/fulltext.pyi +134 -0
- gllm_datastore/data_store/chroma/query.pyi +266 -0
- gllm_datastore/data_store/chroma/query_translator.pyi +41 -0
- gllm_datastore/data_store/chroma/vector.pyi +197 -0
- gllm_datastore/data_store/elasticsearch/__init__.pyi +5 -0
- gllm_datastore/data_store/elasticsearch/data_store.pyi +119 -0
- gllm_datastore/data_store/elasticsearch/fulltext.pyi +237 -0
- gllm_datastore/data_store/elasticsearch/query.pyi +114 -0
- gllm_datastore/data_store/elasticsearch/vector.pyi +179 -0
- gllm_datastore/data_store/exceptions.pyi +35 -0
- gllm_datastore/data_store/in_memory/__init__.pyi +5 -0
- gllm_datastore/data_store/in_memory/data_store.pyi +71 -0
- gllm_datastore/data_store/in_memory/fulltext.pyi +131 -0
- gllm_datastore/data_store/in_memory/query.pyi +175 -0
- gllm_datastore/data_store/in_memory/vector.pyi +174 -0
- gllm_datastore/data_store/redis/__init__.pyi +5 -0
- gllm_datastore/data_store/redis/data_store.pyi +154 -0
- gllm_datastore/data_store/redis/fulltext.pyi +128 -0
- gllm_datastore/data_store/redis/query.pyi +428 -0
- gllm_datastore/data_store/redis/query_translator.pyi +37 -0
- gllm_datastore/data_store/redis/vector.pyi +131 -0
- gllm_datastore/encryptor/__init__.pyi +4 -0
- gllm_datastore/encryptor/aes_gcm_encryptor.pyi +45 -0
- gllm_datastore/encryptor/encryptor.pyi +52 -0
- gllm_datastore/encryptor/key_ring/__init__.pyi +3 -0
- gllm_datastore/encryptor/key_ring/in_memory_key_ring.pyi +52 -0
- gllm_datastore/encryptor/key_ring/key_ring.pyi +45 -0
- gllm_datastore/encryptor/key_rotating_encryptor.pyi +60 -0
- gllm_datastore/graph_data_store/__init__.pyi +6 -0
- gllm_datastore/graph_data_store/graph_data_store.pyi +151 -0
- gllm_datastore/graph_data_store/graph_rag_data_store.pyi +29 -0
- gllm_datastore/graph_data_store/light_rag_data_store.pyi +93 -0
- gllm_datastore/graph_data_store/light_rag_postgres_data_store.pyi +96 -0
- gllm_datastore/graph_data_store/llama_index_graph_rag_data_store.pyi +49 -0
- gllm_datastore/graph_data_store/llama_index_neo4j_graph_rag_data_store.pyi +78 -0
- gllm_datastore/graph_data_store/nebula_graph_data_store.pyi +206 -0
- gllm_datastore/graph_data_store/neo4j_graph_data_store.pyi +182 -0
- gllm_datastore/graph_data_store/utils/__init__.pyi +6 -0
- gllm_datastore/graph_data_store/utils/constants.pyi +21 -0
- gllm_datastore/graph_data_store/utils/light_rag_em_invoker_adapter.pyi +56 -0
- gllm_datastore/graph_data_store/utils/light_rag_lm_invoker_adapter.pyi +43 -0
- gllm_datastore/graph_data_store/utils/llama_index_em_invoker_adapter.pyi +45 -0
- gllm_datastore/graph_data_store/utils/llama_index_lm_invoker_adapter.pyi +169 -0
- gllm_datastore/sql_data_store/__init__.pyi +4 -0
- gllm_datastore/sql_data_store/adapter/__init__.pyi +0 -0
- gllm_datastore/sql_data_store/adapter/sqlalchemy_adapter.pyi +38 -0
- gllm_datastore/sql_data_store/constants.pyi +6 -0
- gllm_datastore/sql_data_store/sql_data_store.pyi +86 -0
- gllm_datastore/sql_data_store/sqlalchemy_sql_data_store.pyi +216 -0
- gllm_datastore/sql_data_store/types.pyi +31 -0
- gllm_datastore/utils/__init__.pyi +6 -0
- gllm_datastore/utils/converter.pyi +51 -0
- gllm_datastore/utils/dict.pyi +21 -0
- gllm_datastore/utils/ttl.pyi +25 -0
- gllm_datastore/utils/types.pyi +32 -0
- gllm_datastore/vector_data_store/__init__.pyi +6 -0
- gllm_datastore/vector_data_store/chroma_vector_data_store.pyi +259 -0
- gllm_datastore/vector_data_store/elasticsearch_vector_data_store.pyi +357 -0
- gllm_datastore/vector_data_store/in_memory_vector_data_store.pyi +179 -0
- gllm_datastore/vector_data_store/mixin/__init__.pyi +0 -0
- gllm_datastore/vector_data_store/mixin/cache_compatible_mixin.pyi +145 -0
- gllm_datastore/vector_data_store/redis_vector_data_store.pyi +191 -0
- gllm_datastore/vector_data_store/vector_data_store.pyi +146 -0
- gllm_datastore.build/.gitignore +1 -0
- gllm_datastore.cpython-311-darwin.so +0 -0
- gllm_datastore.pyi +156 -0
- gllm_datastore_binary-0.5.45.dist-info/METADATA +178 -0
- gllm_datastore_binary-0.5.45.dist-info/RECORD +108 -0
- gllm_datastore_binary-0.5.45.dist-info/WHEEL +5 -0
- gllm_datastore_binary-0.5.45.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
from gllm_core.schema.chunk import Chunk
|
|
3
|
+
from gllm_datastore.constants import CHUNK_KEYS as CHUNK_KEYS, FIELD_CONFIG_NAME as FIELD_CONFIG_NAME, FIELD_CONFIG_TYPE as FIELD_CONFIG_TYPE
|
|
4
|
+
from gllm_datastore.core.filters import FilterClause as FilterClause, QueryFilter as QueryFilter, QueryOptions as QueryOptions
|
|
5
|
+
from gllm_datastore.data_store.redis.query import build_filter_expression as build_filter_expression, check_index_exists as check_index_exists, execute_update as execute_update, fetch_hash_data_batch as fetch_hash_data_batch, get_filterable_fields_from_index as get_filterable_fields_from_index, infer_filterable_fields_from_chunks as infer_filterable_fields_from_chunks, normalize_field_name_for_schema as normalize_field_name_for_schema, parse_redisvl_result_to_chunks as parse_redisvl_result_to_chunks, prepare_chunk_document as prepare_chunk_document, strip_index_prefix as strip_index_prefix, validate_chunk_content as validate_chunk_content, validate_chunk_list as validate_chunk_list, validate_metadata_fields as validate_metadata_fields
|
|
6
|
+
from gllm_datastore.data_store.redis.query_translator import RedisQueryTranslator as RedisQueryTranslator
|
|
7
|
+
from gllm_datastore.utils.converter import cosine_distance_to_similarity_score as cosine_distance_to_similarity_score
|
|
8
|
+
from gllm_inference.em_invoker.em_invoker import BaseEMInvoker
|
|
9
|
+
from gllm_inference.schema import Vector
|
|
10
|
+
from redis.asyncio.client import Redis
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
class RedisVectorCapability:
|
|
14
|
+
"""Redis implementation of VectorCapability protocol.
|
|
15
|
+
|
|
16
|
+
This class provides vector similarity search operations using RedisVL
|
|
17
|
+
AsyncSearchIndex for vector storage and retrieval.
|
|
18
|
+
|
|
19
|
+
Attributes:
|
|
20
|
+
index_name (str): Name of the Redis index.
|
|
21
|
+
client (Redis): Redis async client instance.
|
|
22
|
+
em_invoker (BaseEMInvoker): Embedding model for vectorization.
|
|
23
|
+
index (Any): RedisVL AsyncSearchIndex instance.
|
|
24
|
+
"""
|
|
25
|
+
index_name: Incomplete
|
|
26
|
+
client: Incomplete
|
|
27
|
+
index: Any
|
|
28
|
+
def __init__(self, index_name: str, client: Redis, em_invoker: BaseEMInvoker) -> None:
|
|
29
|
+
"""Initialize the Redis vector capability.
|
|
30
|
+
|
|
31
|
+
Schema will be automatically inferred from chunks when creating a new index,
|
|
32
|
+
or auto-detected from an existing index when performing operations.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
index_name (str): Name of the Redis index.
|
|
36
|
+
client (Redis): Redis async client instance.
|
|
37
|
+
em_invoker (BaseEMInvoker): Embedding model for vectorization.
|
|
38
|
+
"""
|
|
39
|
+
@property
|
|
40
|
+
def em_invoker(self) -> BaseEMInvoker:
|
|
41
|
+
"""Returns the EM Invoker instance.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
BaseEMInvoker: The EM Invoker instance.
|
|
45
|
+
"""
|
|
46
|
+
async def ensure_index(self, filterable_fields: list[dict[str, Any]] | None = None) -> None:
|
|
47
|
+
'''Ensure Redis vector index exists, creating it if necessary.
|
|
48
|
+
|
|
49
|
+
This method is idempotent - if the index already exists, it will skip creation
|
|
50
|
+
and return early.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
filterable_fields (list[dict[str, Any]] | None, optional): List of filterable field
|
|
54
|
+
configurations to use when creating a new index. Each field should be a dictionary
|
|
55
|
+
with "name" and "type" keys. For example:
|
|
56
|
+
[{"name": "metadata.category", "type": "tag"}, {"name": "metadata.score", "type": "numeric"}]
|
|
57
|
+
If not provided and index doesn\'t exist, a default schema will be created with
|
|
58
|
+
only basic fields (id, content, metadata, vector). Defaults to None.
|
|
59
|
+
|
|
60
|
+
Raises:
|
|
61
|
+
RuntimeError: If index creation fails.
|
|
62
|
+
'''
|
|
63
|
+
async def create(self, data: Chunk | list[Chunk]) -> None:
|
|
64
|
+
"""Add chunks to the vector store with automatic embedding generation.
|
|
65
|
+
|
|
66
|
+
If the index does not exist, the schema will be inferred from the chunks being created.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
data (Chunk | list[Chunk]): Single chunk or list of chunks to add.
|
|
70
|
+
|
|
71
|
+
Raises:
|
|
72
|
+
ValueError: If data structure is invalid or chunk content is invalid.
|
|
73
|
+
"""
|
|
74
|
+
async def create_from_vector(self, chunk_vectors: list[tuple[Chunk, Vector]]) -> None:
|
|
75
|
+
"""Add pre-computed vectors directly.
|
|
76
|
+
|
|
77
|
+
If the index does not exist, the schema will be inferred from the chunks being created.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
chunk_vectors (list[tuple[Chunk, Vector]]): List of tuples containing chunks
|
|
81
|
+
and their corresponding vectors.
|
|
82
|
+
|
|
83
|
+
Raises:
|
|
84
|
+
ValueError: If chunk content is invalid.
|
|
85
|
+
"""
|
|
86
|
+
async def retrieve(self, query: str, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None) -> list[Chunk]:
|
|
87
|
+
"""Read records from the datastore using text-based similarity search with optional filtering.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
query (str): Input text to embed and search with.
|
|
91
|
+
filters (FilterClause | QueryFilter | None, optional): Query filters to apply.
|
|
92
|
+
Defaults to None.
|
|
93
|
+
options (QueryOptions | None, optional): Query options like limit and sorting. Defaults to None.
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
list[Chunk]: Query results ordered by similarity score.
|
|
97
|
+
"""
|
|
98
|
+
async def retrieve_by_vector(self, vector: Vector, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None) -> list[Chunk]:
|
|
99
|
+
"""Direct vector similarity search.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
vector (Vector): Query embedding vector.
|
|
103
|
+
filters (FilterClause | QueryFilter | None, optional): Query filters to apply.
|
|
104
|
+
FilterClause objects are automatically converted to QueryFilter internally.
|
|
105
|
+
Defaults to None.
|
|
106
|
+
options (QueryOptions | None, optional): Query options like limit and sorting. Defaults to None.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
list[Chunk]: List of chunks ordered by similarity score.
|
|
110
|
+
"""
|
|
111
|
+
async def update(self, update_values: dict[str, Any], filters: FilterClause | QueryFilter | None = None) -> None:
|
|
112
|
+
"""Update existing records in the datastore.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
update_values (dict[str, Any]): Values to update.
|
|
116
|
+
filters (FilterClause | QueryFilter | None, optional): Filters to select records to update.
|
|
117
|
+
FilterClause objects are automatically converted to QueryFilter internally.
|
|
118
|
+
Defaults to None.
|
|
119
|
+
"""
|
|
120
|
+
async def delete(self, filters: FilterClause | QueryFilter | None = None) -> None:
|
|
121
|
+
"""Delete records from the datastore.
|
|
122
|
+
|
|
123
|
+
Processes deletions in batches to avoid loading all matching documents into memory.
|
|
124
|
+
If filters is None, no operation is performed (no-op).
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
filters (FilterClause | QueryFilter | None, optional): Filters to select records to delete.
|
|
128
|
+
Defaults to None.
|
|
129
|
+
"""
|
|
130
|
+
async def clear(self) -> None:
|
|
131
|
+
"""Clear all records from the datastore."""
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
from gllm_datastore.encryptor.encryptor import BaseEncryptor as BaseEncryptor
|
|
3
|
+
|
|
4
|
+
KEY_LENGTH_BYTES: int
|
|
5
|
+
NONCE_LENGTH_BYTES: int
|
|
6
|
+
|
|
7
|
+
class AESGCMEncryptor(BaseEncryptor):
|
|
8
|
+
"""AES-GCM 256 Encryptor that accepts keys directly.
|
|
9
|
+
|
|
10
|
+
This class provides AES-GCM symmetric encryption and decryption methods
|
|
11
|
+
with a 256-bit key provided directly by the client.
|
|
12
|
+
|
|
13
|
+
Attributes:
|
|
14
|
+
key (bytes): 256-bit encryption key.
|
|
15
|
+
aesgcm (AESGCM): AES-GCM instance.
|
|
16
|
+
"""
|
|
17
|
+
key: Incomplete
|
|
18
|
+
aesgcm: Incomplete
|
|
19
|
+
def __init__(self, key: bytes) -> None:
|
|
20
|
+
"""Initialize AESGCMEncryptor with a direct key.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
key (bytes): 256-bit encryption key.
|
|
24
|
+
|
|
25
|
+
Raises:
|
|
26
|
+
ValueError: If key length is not 256 bits.
|
|
27
|
+
"""
|
|
28
|
+
def encrypt(self, plaintext: str) -> str:
|
|
29
|
+
"""Encrypts the plaintext using AES-GCM with a random nonce.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
plaintext (str): The plaintext data to be encrypted.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
str: The encrypted data, encoded in base64 format.
|
|
36
|
+
"""
|
|
37
|
+
def decrypt(self, ciphertext: str) -> str:
|
|
38
|
+
"""Decrypts the AES-GCM ciphertext.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
ciphertext (str): The ciphertext in base64 format to be decrypted.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
str: The decrypted plaintext data.
|
|
45
|
+
"""
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
|
|
3
|
+
class BaseEncryptor(ABC):
|
|
4
|
+
"""Abstract base class defining the interface for encryption implementations.
|
|
5
|
+
|
|
6
|
+
This abstract base class ensures that all encryptors implement the required
|
|
7
|
+
encrypt and decrypt methods with consistent signatures.
|
|
8
|
+
|
|
9
|
+
Thread-safety requirement:
|
|
10
|
+
Implementations MUST be thread-safe. The client may
|
|
11
|
+
invoke `encrypt` and `decrypt` concurrently from multiple threads, so
|
|
12
|
+
any internal state (e.g., buffers, nonces, cipher instances) must be
|
|
13
|
+
protected or designed to avoid race conditions.
|
|
14
|
+
"""
|
|
15
|
+
@abstractmethod
|
|
16
|
+
def encrypt(self, plaintext: str) -> str:
|
|
17
|
+
"""Encrypt plain text into cipher text.
|
|
18
|
+
|
|
19
|
+
This method should be implemented by subclasses to provide the encryption functionality.
|
|
20
|
+
|
|
21
|
+
Note:
|
|
22
|
+
The implementation must be thread-safe and must not mutate shared state
|
|
23
|
+
without proper synchronization.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
plaintext (str): The raw plain text to encrypt.
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
str: The encrypted cipher text.
|
|
30
|
+
|
|
31
|
+
Raises:
|
|
32
|
+
NotImplementedError: If the method is not implemented by the subclass.
|
|
33
|
+
"""
|
|
34
|
+
@abstractmethod
|
|
35
|
+
def decrypt(self, ciphertext: str) -> str:
|
|
36
|
+
"""Decrypt cipher text back into plain text.
|
|
37
|
+
|
|
38
|
+
This method should be implemented by subclasses to provide the decryption functionality.
|
|
39
|
+
|
|
40
|
+
Note:
|
|
41
|
+
The implementation must be thread-safe and must not mutate shared state
|
|
42
|
+
without proper synchronization.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
ciphertext (str): The ciphertext to decrypt.
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
str: The decrypted plain text.
|
|
49
|
+
|
|
50
|
+
Raises:
|
|
51
|
+
NotImplementedError: If the method is not implemented by the subclass.
|
|
52
|
+
"""
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
from gllm_datastore.encryptor.encryptor import BaseEncryptor as BaseEncryptor
|
|
2
|
+
from gllm_datastore.encryptor.key_ring.key_ring import BaseKeyRing as BaseKeyRing
|
|
3
|
+
|
|
4
|
+
class InMemoryKeyRing(BaseKeyRing):
|
|
5
|
+
"""In-memory implementation of BaseKeyRing.
|
|
6
|
+
|
|
7
|
+
This class provides a simple in-memory storage for encryption keys and
|
|
8
|
+
their associated encryptors. All keys are stored in memory and will be
|
|
9
|
+
lost when the application terminates.
|
|
10
|
+
|
|
11
|
+
Attributes:
|
|
12
|
+
encryptors (dict[str, BaseEncryptor]): A dictionary to store the keys and their associated encryptors.
|
|
13
|
+
"""
|
|
14
|
+
encryptors: dict[str, BaseEncryptor]
|
|
15
|
+
def __init__(self, encryptors: dict[str, BaseEncryptor] | None = None) -> None:
|
|
16
|
+
"""Initialize the InMemoryKeyRing.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
encryptors (dict[str, BaseEncryptor] | None, optional): A dictionary to store the keys and
|
|
20
|
+
their associated encryptors. Defaults to None.
|
|
21
|
+
"""
|
|
22
|
+
def get(self, key_id: str) -> BaseEncryptor:
|
|
23
|
+
"""Get an encryptor by key ID.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
key_id (str): ID of the key to retrieve.
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
BaseEncryptor: The encryptor for the specified key.
|
|
30
|
+
|
|
31
|
+
Raises:
|
|
32
|
+
KeyError: If key_id does not exist.
|
|
33
|
+
"""
|
|
34
|
+
def add(self, key_id: str, encryptor: BaseEncryptor) -> None:
|
|
35
|
+
"""Add a new key to the key ring.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
key_id (str): Unique identifier for the key.
|
|
39
|
+
encryptor (BaseEncryptor): The encryptor instance for this key.
|
|
40
|
+
|
|
41
|
+
Raises:
|
|
42
|
+
KeyError: If key_id already exists.
|
|
43
|
+
"""
|
|
44
|
+
def remove(self, key_id: str) -> None:
|
|
45
|
+
"""Remove a key from the key ring.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
key_id (str): ID of the key to remove.
|
|
49
|
+
|
|
50
|
+
Raises:
|
|
51
|
+
KeyError: If key_id does not exist.
|
|
52
|
+
"""
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from gllm_datastore.encryptor.encryptor import BaseEncryptor as BaseEncryptor
|
|
3
|
+
|
|
4
|
+
class BaseKeyRing(ABC):
|
|
5
|
+
"""Abstract base class defining the interface for managing multiple encryption keys."""
|
|
6
|
+
@abstractmethod
|
|
7
|
+
def get(self, key_id: str) -> BaseEncryptor:
|
|
8
|
+
"""Get an encryptor by key ID.
|
|
9
|
+
|
|
10
|
+
This method should be implemented by subclasses to provide the getting functionality.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
key_id (str): ID of the key to retrieve.
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
BaseEncryptor: The encryptor for the specified key.
|
|
17
|
+
|
|
18
|
+
Raises:
|
|
19
|
+
NotImplementedError: If the method is not implemented by the subclass.
|
|
20
|
+
"""
|
|
21
|
+
@abstractmethod
|
|
22
|
+
def add(self, key_id: str, encryptor: BaseEncryptor) -> None:
|
|
23
|
+
"""Add a new key to the key ring.
|
|
24
|
+
|
|
25
|
+
This method should be implemented by subclasses to provide the adding functionality.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
key_id (str): Unique identifier for the key.
|
|
29
|
+
encryptor (BaseEncryptor): The encryptor instance for this key.
|
|
30
|
+
|
|
31
|
+
Raises:
|
|
32
|
+
NotImplementedError: If the method is not implemented by the subclass.
|
|
33
|
+
"""
|
|
34
|
+
@abstractmethod
|
|
35
|
+
def remove(self, key_id: str) -> None:
|
|
36
|
+
"""Remove a key from the key ring.
|
|
37
|
+
|
|
38
|
+
This method should be implemented by subclasses to provide the removing functionality.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
key_id (str): ID of the key to remove.
|
|
42
|
+
|
|
43
|
+
Raises:
|
|
44
|
+
NotImplementedError: If the method is not implemented by the subclass.
|
|
45
|
+
"""
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
from gllm_datastore.encryptor.encryptor import BaseEncryptor as BaseEncryptor
|
|
3
|
+
from gllm_datastore.encryptor.key_ring.key_ring import BaseKeyRing as BaseKeyRing
|
|
4
|
+
|
|
5
|
+
class KeyRotatingEncryptor(BaseEncryptor):
|
|
6
|
+
"""Encryptor that supports key rotation through a key ring.
|
|
7
|
+
|
|
8
|
+
This encryptor uses a BaseKeyRing to manage multiple encryption keys.
|
|
9
|
+
Users must specify which key to use for encryption and decryption operations.
|
|
10
|
+
|
|
11
|
+
Attributes:
|
|
12
|
+
key_ring (BaseKeyRing): The key ring managing encryption keys.
|
|
13
|
+
active_key_id (str): The ID of the current key to use for encryption.
|
|
14
|
+
"""
|
|
15
|
+
key_ring: Incomplete
|
|
16
|
+
def __init__(self, key_ring: BaseKeyRing, active_key_id: str) -> None:
|
|
17
|
+
"""Initialize KeyRotatingEncryptor with a key ring.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
key_ring (BaseKeyRing): The key ring to use for key management.
|
|
21
|
+
active_key_id (str): The ID of the current key to use for encryption.
|
|
22
|
+
"""
|
|
23
|
+
@property
|
|
24
|
+
def active_key_id(self) -> str:
|
|
25
|
+
"""Get the ID of the current key to use for encryption."""
|
|
26
|
+
@active_key_id.setter
|
|
27
|
+
def active_key_id(self, value: str) -> None:
|
|
28
|
+
"""Set the ID of the current key to use for encryption.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
value (str): The ID of the current key to use for encryption.
|
|
32
|
+
|
|
33
|
+
Raises:
|
|
34
|
+
KeyError: If the specified key does not exist.
|
|
35
|
+
"""
|
|
36
|
+
def encrypt(self, plaintext: str) -> str:
|
|
37
|
+
"""Encrypt plaintext using the specified key.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
plaintext (str): The plaintext to encrypt.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
str: The encrypted data with key metadata, encoded in base64.
|
|
44
|
+
|
|
45
|
+
Raises:
|
|
46
|
+
KeyError: If the specified key does not exist.
|
|
47
|
+
"""
|
|
48
|
+
def decrypt(self, ciphertext: str) -> str:
|
|
49
|
+
"""Decrypt ciphertext the key detected from metadata.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
ciphertext (str): The encrypted data with key metadata.
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
str: The decrypted plaintext.
|
|
56
|
+
|
|
57
|
+
Raises:
|
|
58
|
+
ValueError: If the data format is invalid or decryption fails.
|
|
59
|
+
KeyError: If the required key is not available.
|
|
60
|
+
"""
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
from gllm_datastore.graph_data_store.light_rag_postgres_data_store import LightRAGPostgresDataStore as LightRAGPostgresDataStore
|
|
2
|
+
from gllm_datastore.graph_data_store.llama_index_neo4j_graph_rag_data_store import LlamaIndexNeo4jGraphRAGDataStore as LlamaIndexNeo4jGraphRAGDataStore
|
|
3
|
+
from gllm_datastore.graph_data_store.nebula_graph_data_store import NebulaGraphDataStore as NebulaGraphDataStore
|
|
4
|
+
from gllm_datastore.graph_data_store.neo4j_graph_data_store import Neo4jGraphDataStore as Neo4jGraphDataStore
|
|
5
|
+
|
|
6
|
+
__all__ = ['LightRAGPostgresDataStore', 'LlamaIndexNeo4jGraphRAGDataStore', 'NebulaGraphDataStore', 'Neo4jGraphDataStore']
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
class BaseGraphDataStore(ABC):
|
|
5
|
+
"""Abstract base class for an async graph data store interface.
|
|
6
|
+
|
|
7
|
+
This class defines the asynchronous interface for all graph data store implementations.
|
|
8
|
+
It provides methods for creating, updating, and querying graph data.
|
|
9
|
+
"""
|
|
10
|
+
@abstractmethod
|
|
11
|
+
async def upsert_node(self, label: str, identifier_key: str, identifier_value: str, properties: dict[str, Any] | None) -> Any:
|
|
12
|
+
"""Upsert a node in the graph.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
label (str): The label of the node.
|
|
16
|
+
identifier_key (str): The key of the identifier.
|
|
17
|
+
identifier_value (str): The value of the identifier.
|
|
18
|
+
properties (dict[str, Any] | None, optional): The properties of the node. Defaults to None.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
Any: The result of the operation.
|
|
22
|
+
"""
|
|
23
|
+
@abstractmethod
|
|
24
|
+
async def upsert_relationship(self, node_source_key: str, node_source_value: str, relation: str, node_target_key: str, node_target_value: str, properties: dict[str, Any] | None) -> Any:
|
|
25
|
+
"""Upsert a relationship between two nodes in the graph.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
node_source_key (str): The key of the source node.
|
|
29
|
+
node_source_value (str): The value of the source node.
|
|
30
|
+
relation (str): The type of the relationship.
|
|
31
|
+
node_target_key (str): The key of the target node.
|
|
32
|
+
node_target_value (str): The value of the target node.
|
|
33
|
+
properties (dict[str, Any] | None, optional): The properties of the relationship. Defaults to None.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
Any: The result of the operation.
|
|
37
|
+
"""
|
|
38
|
+
@abstractmethod
|
|
39
|
+
async def delete_node(self, label: str, identifier_key: str, identifier_value: str) -> Any:
|
|
40
|
+
"""Delete a node from the graph.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
label (str): The label of the node.
|
|
44
|
+
identifier_key (str): The key of the identifier.
|
|
45
|
+
identifier_value (str): The identifier of the node.
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
Any: The result of the operation.
|
|
49
|
+
"""
|
|
50
|
+
@abstractmethod
|
|
51
|
+
async def delete_relationship(self, node_source_key: str, node_source_value: str, relation: str, node_target_key: str, node_target_value: str) -> Any:
|
|
52
|
+
"""Delete a relationship between two nodes in the graph.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
node_source_key (str): The key of the source node.
|
|
56
|
+
node_source_value (str): The identifier of the source node.
|
|
57
|
+
relation (str): The type of the relationship.
|
|
58
|
+
node_target_key (str): The key of the target node.
|
|
59
|
+
node_target_value (str): The identifier of the target node.
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
Any: The result of the operation.
|
|
63
|
+
"""
|
|
64
|
+
@abstractmethod
|
|
65
|
+
async def query(self, query: str, parameters: dict[str, Any] | None = None) -> list[dict[str, Any]]:
|
|
66
|
+
"""Query the graph data store.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
query (str): The query to be executed.
|
|
70
|
+
parameters (dict[str, Any] | None, optional): The parameters of the query. Defaults to None.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
list[dict[str, Any]]: The result of the query as a list of dictionaries.
|
|
74
|
+
"""
|
|
75
|
+
@abstractmethod
|
|
76
|
+
async def traverse_graph(self, node_properties: dict[str, Any], extracted_node_properties: list[str] | None = None, extracted_relationship_properties: list[str] | None = None, depth: int = 3) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
|
|
77
|
+
'''Traverse graph from a node with specified properties, ignoring relationship\'s direction, up to a given depth.
|
|
78
|
+
|
|
79
|
+
Example:
|
|
80
|
+
```python
|
|
81
|
+
nodes, relationships = await graph_data_store.traverse_graph(
|
|
82
|
+
node_properties={"name": "John Doe"},
|
|
83
|
+
extracted_node_properties=["name", "age"],
|
|
84
|
+
extracted_relationship_properties=["since"],
|
|
85
|
+
depth=1
|
|
86
|
+
)
|
|
87
|
+
```
|
|
88
|
+
Means starting from the node with property `name` equal to "John Doe", traverse
|
|
89
|
+
the graph up to depth 1, extracting the `name` and `age` properties from nodes
|
|
90
|
+
and the `since` property from relationships.
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
nodes, relationships = await graph_data_store.traverse_graph(
|
|
94
|
+
node_properties={"name": "John Doe"},
|
|
95
|
+
depth=2
|
|
96
|
+
)
|
|
97
|
+
```
|
|
98
|
+
Means starting from the node with property `name` equal to "John Doe", traverse
|
|
99
|
+
the graph up to depth 2, extracting all properties from nodes and relationships.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
node_properties (dict[str, Any]): The properties of the starting node.
|
|
103
|
+
extracted_node_properties (list[str] | None, optional): The properties to extract from nodes during
|
|
104
|
+
traversal. If None or empty list, all node properties will be returned. Defaults to None.
|
|
105
|
+
extracted_relationship_properties (list[str] | None, optional): The properties to extract from relationships
|
|
106
|
+
during traversal. If None or empty list, all relationship properties will be returned. Defaults to None.
|
|
107
|
+
depth (int, optional): The depth of traversal. Defaults to 3.
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
tuple[list[dict[str, Any]], list[dict[str, Any]]]: A tuple containing two lists:
|
|
111
|
+
- List of nodes with their extracted properties.
|
|
112
|
+
- List of relationships with their extracted properties.
|
|
113
|
+
|
|
114
|
+
Example return value:
|
|
115
|
+
nodes = [
|
|
116
|
+
{
|
|
117
|
+
"id": 1001,
|
|
118
|
+
"labels": ["Person"],
|
|
119
|
+
"properties": {
|
|
120
|
+
"name": "John Doe",
|
|
121
|
+
"age": 30,
|
|
122
|
+
"occupation": "Engineer"
|
|
123
|
+
}
|
|
124
|
+
},
|
|
125
|
+
{
|
|
126
|
+
"id": 2001,
|
|
127
|
+
"labels": ["Company"],
|
|
128
|
+
"properties": {
|
|
129
|
+
"name": "TechCorp",
|
|
130
|
+
"industry": "Technology",
|
|
131
|
+
"employees": 500
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
]
|
|
135
|
+
|
|
136
|
+
relationships = [
|
|
137
|
+
{
|
|
138
|
+
"id": 5002,
|
|
139
|
+
"type": "FRIEND_OF",
|
|
140
|
+
"start_node": 1001,
|
|
141
|
+
"end_node": 1002,
|
|
142
|
+
"properties": {
|
|
143
|
+
"since": "2018-05-20",
|
|
144
|
+
"closeness": 8
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
]
|
|
148
|
+
'''
|
|
149
|
+
@abstractmethod
|
|
150
|
+
async def close(self) -> None:
|
|
151
|
+
"""Close the graph data store."""
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
class BaseGraphRAGDataStore(ABC):
|
|
5
|
+
"""Abstract base class for graph RAG data stores in the retrieval system.
|
|
6
|
+
|
|
7
|
+
This class defines the interface for all graph-based Retrieval-Augmented
|
|
8
|
+
Generation (RAG) implementations. It provides methods for querying the graph with
|
|
9
|
+
natural language and managing document-related data.
|
|
10
|
+
"""
|
|
11
|
+
@abstractmethod
|
|
12
|
+
async def query(self, query: str, **kwargs: Any) -> list[dict[str, Any]]:
|
|
13
|
+
"""Query the graph RAG data store.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
query (str): The query to be executed.
|
|
17
|
+
**kwargs (Any): Additional keyword arguments.
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
list[dict[str, Any]]: The result of the query as a list of dictionaries.
|
|
21
|
+
"""
|
|
22
|
+
@abstractmethod
|
|
23
|
+
async def delete_by_document_id(self, document_id: str, **kwargs: Any) -> None:
|
|
24
|
+
"""Delete nodes and edges by document ID.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
document_id (str): The document ID.
|
|
28
|
+
**kwargs (Any): Additional keyword arguments.
|
|
29
|
+
"""
|