gllm-datastore-binary 0.5.45__cp311-cp311-macosx_13_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gllm-datastore-binary might be problematic. Click here for more details.
- gllm_datastore/__init__.pyi +0 -0
- gllm_datastore/cache/__init__.pyi +4 -0
- gllm_datastore/cache/base.pyi +84 -0
- gllm_datastore/cache/cache.pyi +137 -0
- gllm_datastore/cache/hybrid_cache/__init__.pyi +5 -0
- gllm_datastore/cache/hybrid_cache/file_system_hybrid_cache.pyi +50 -0
- gllm_datastore/cache/hybrid_cache/hybrid_cache.pyi +115 -0
- gllm_datastore/cache/hybrid_cache/in_memory_hybrid_cache.pyi +29 -0
- gllm_datastore/cache/hybrid_cache/key_matcher/__init__.pyi +5 -0
- gllm_datastore/cache/hybrid_cache/key_matcher/exact_key_matcher.pyi +44 -0
- gllm_datastore/cache/hybrid_cache/key_matcher/fuzzy_key_matcher.pyi +70 -0
- gllm_datastore/cache/hybrid_cache/key_matcher/key_matcher.pyi +60 -0
- gllm_datastore/cache/hybrid_cache/key_matcher/semantic_key_matcher.pyi +93 -0
- gllm_datastore/cache/hybrid_cache/redis_hybrid_cache.pyi +34 -0
- gllm_datastore/cache/hybrid_cache/utils.pyi +36 -0
- gllm_datastore/cache/utils.pyi +34 -0
- gllm_datastore/cache/vector_cache/__init__.pyi +0 -0
- gllm_datastore/cache/vector_cache/eviction_manager/__init__.pyi +0 -0
- gllm_datastore/cache/vector_cache/eviction_manager/asyncio_eviction_manager.pyi +48 -0
- gllm_datastore/cache/vector_cache/eviction_manager/eviction_manager.pyi +38 -0
- gllm_datastore/cache/vector_cache/eviction_strategy/__init__.pyi +0 -0
- gllm_datastore/cache/vector_cache/eviction_strategy/eviction_strategy.pyi +34 -0
- gllm_datastore/cache/vector_cache/eviction_strategy/ttl_eviction_strategy.pyi +34 -0
- gllm_datastore/cache/vector_cache/vector_cache.pyi +99 -0
- gllm_datastore/constants.pyi +66 -0
- gllm_datastore/core/__init__.pyi +7 -0
- gllm_datastore/core/capabilities/__init__.pyi +5 -0
- gllm_datastore/core/capabilities/fulltext_capability.pyi +73 -0
- gllm_datastore/core/capabilities/graph_capability.pyi +70 -0
- gllm_datastore/core/capabilities/vector_capability.pyi +90 -0
- gllm_datastore/core/filters/__init__.pyi +4 -0
- gllm_datastore/core/filters/filter.pyi +340 -0
- gllm_datastore/core/filters/schema.pyi +149 -0
- gllm_datastore/data_store/__init__.pyi +7 -0
- gllm_datastore/data_store/base.pyi +138 -0
- gllm_datastore/data_store/chroma/__init__.pyi +4 -0
- gllm_datastore/data_store/chroma/_chroma_import.pyi +13 -0
- gllm_datastore/data_store/chroma/data_store.pyi +202 -0
- gllm_datastore/data_store/chroma/fulltext.pyi +134 -0
- gllm_datastore/data_store/chroma/query.pyi +266 -0
- gllm_datastore/data_store/chroma/query_translator.pyi +41 -0
- gllm_datastore/data_store/chroma/vector.pyi +197 -0
- gllm_datastore/data_store/elasticsearch/__init__.pyi +5 -0
- gllm_datastore/data_store/elasticsearch/data_store.pyi +119 -0
- gllm_datastore/data_store/elasticsearch/fulltext.pyi +237 -0
- gllm_datastore/data_store/elasticsearch/query.pyi +114 -0
- gllm_datastore/data_store/elasticsearch/vector.pyi +179 -0
- gllm_datastore/data_store/exceptions.pyi +35 -0
- gllm_datastore/data_store/in_memory/__init__.pyi +5 -0
- gllm_datastore/data_store/in_memory/data_store.pyi +71 -0
- gllm_datastore/data_store/in_memory/fulltext.pyi +131 -0
- gllm_datastore/data_store/in_memory/query.pyi +175 -0
- gllm_datastore/data_store/in_memory/vector.pyi +174 -0
- gllm_datastore/data_store/redis/__init__.pyi +5 -0
- gllm_datastore/data_store/redis/data_store.pyi +154 -0
- gllm_datastore/data_store/redis/fulltext.pyi +128 -0
- gllm_datastore/data_store/redis/query.pyi +428 -0
- gllm_datastore/data_store/redis/query_translator.pyi +37 -0
- gllm_datastore/data_store/redis/vector.pyi +131 -0
- gllm_datastore/encryptor/__init__.pyi +4 -0
- gllm_datastore/encryptor/aes_gcm_encryptor.pyi +45 -0
- gllm_datastore/encryptor/encryptor.pyi +52 -0
- gllm_datastore/encryptor/key_ring/__init__.pyi +3 -0
- gllm_datastore/encryptor/key_ring/in_memory_key_ring.pyi +52 -0
- gllm_datastore/encryptor/key_ring/key_ring.pyi +45 -0
- gllm_datastore/encryptor/key_rotating_encryptor.pyi +60 -0
- gllm_datastore/graph_data_store/__init__.pyi +6 -0
- gllm_datastore/graph_data_store/graph_data_store.pyi +151 -0
- gllm_datastore/graph_data_store/graph_rag_data_store.pyi +29 -0
- gllm_datastore/graph_data_store/light_rag_data_store.pyi +93 -0
- gllm_datastore/graph_data_store/light_rag_postgres_data_store.pyi +96 -0
- gllm_datastore/graph_data_store/llama_index_graph_rag_data_store.pyi +49 -0
- gllm_datastore/graph_data_store/llama_index_neo4j_graph_rag_data_store.pyi +78 -0
- gllm_datastore/graph_data_store/nebula_graph_data_store.pyi +206 -0
- gllm_datastore/graph_data_store/neo4j_graph_data_store.pyi +182 -0
- gllm_datastore/graph_data_store/utils/__init__.pyi +6 -0
- gllm_datastore/graph_data_store/utils/constants.pyi +21 -0
- gllm_datastore/graph_data_store/utils/light_rag_em_invoker_adapter.pyi +56 -0
- gllm_datastore/graph_data_store/utils/light_rag_lm_invoker_adapter.pyi +43 -0
- gllm_datastore/graph_data_store/utils/llama_index_em_invoker_adapter.pyi +45 -0
- gllm_datastore/graph_data_store/utils/llama_index_lm_invoker_adapter.pyi +169 -0
- gllm_datastore/sql_data_store/__init__.pyi +4 -0
- gllm_datastore/sql_data_store/adapter/__init__.pyi +0 -0
- gllm_datastore/sql_data_store/adapter/sqlalchemy_adapter.pyi +38 -0
- gllm_datastore/sql_data_store/constants.pyi +6 -0
- gllm_datastore/sql_data_store/sql_data_store.pyi +86 -0
- gllm_datastore/sql_data_store/sqlalchemy_sql_data_store.pyi +216 -0
- gllm_datastore/sql_data_store/types.pyi +31 -0
- gllm_datastore/utils/__init__.pyi +6 -0
- gllm_datastore/utils/converter.pyi +51 -0
- gllm_datastore/utils/dict.pyi +21 -0
- gllm_datastore/utils/ttl.pyi +25 -0
- gllm_datastore/utils/types.pyi +32 -0
- gllm_datastore/vector_data_store/__init__.pyi +6 -0
- gllm_datastore/vector_data_store/chroma_vector_data_store.pyi +259 -0
- gllm_datastore/vector_data_store/elasticsearch_vector_data_store.pyi +357 -0
- gllm_datastore/vector_data_store/in_memory_vector_data_store.pyi +179 -0
- gllm_datastore/vector_data_store/mixin/__init__.pyi +0 -0
- gllm_datastore/vector_data_store/mixin/cache_compatible_mixin.pyi +145 -0
- gllm_datastore/vector_data_store/redis_vector_data_store.pyi +191 -0
- gllm_datastore/vector_data_store/vector_data_store.pyi +146 -0
- gllm_datastore.build/.gitignore +1 -0
- gllm_datastore.cpython-311-darwin.so +0 -0
- gllm_datastore.pyi +156 -0
- gllm_datastore_binary-0.5.45.dist-info/METADATA +178 -0
- gllm_datastore_binary-0.5.45.dist-info/RECORD +108 -0
- gllm_datastore_binary-0.5.45.dist-info/WHEEL +5 -0
- gllm_datastore_binary-0.5.45.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
from gllm_core.utils.retry import RetryConfig
|
|
3
|
+
from gllm_datastore.graph_data_store.graph_data_store import BaseGraphDataStore as BaseGraphDataStore
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
class Neo4jGraphDataStore(BaseGraphDataStore):
|
|
7
|
+
'''Implementation of BaseGraphDataStore for Neo4j.
|
|
8
|
+
|
|
9
|
+
This class provides an interface for graph-based Retrieval-Augmented Generation (RAG)
|
|
10
|
+
operations on Neo4j graph databases.
|
|
11
|
+
|
|
12
|
+
Attributes:
|
|
13
|
+
driver (Driver): The Neo4j driver.
|
|
14
|
+
|
|
15
|
+
Example:
|
|
16
|
+
```python
|
|
17
|
+
store = Neo4jGraphDataStore(
|
|
18
|
+
uri="bolt://localhost:7687",
|
|
19
|
+
user="neo4j",
|
|
20
|
+
password="password"
|
|
21
|
+
)
|
|
22
|
+
# Perform async operations
|
|
23
|
+
results = await store.query("MATCH (n) RETURN n")
|
|
24
|
+
|
|
25
|
+
# Create a node
|
|
26
|
+
node = await store.upsert_node("Person", "name", "John", {"age": 30})
|
|
27
|
+
```
|
|
28
|
+
'''
|
|
29
|
+
driver: Incomplete
|
|
30
|
+
retry_config: Incomplete
|
|
31
|
+
def __init__(self, uri: str, user: str, password: str, max_connection_pool_size: int = 100, retry_config: RetryConfig | None = None, **kwargs: Any) -> None:
|
|
32
|
+
"""Initialize Neo4jGraphDataStore.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
uri (str): The URI of the graph store.
|
|
36
|
+
user (str): The user of the graph store.
|
|
37
|
+
password (str): The password of the graph store.
|
|
38
|
+
max_connection_pool_size (int, optional): The maximum size of the connection pool. Defaults to 100.
|
|
39
|
+
retry_config (RetryConfig | None, optional): Configuration for retry behavior. Defaults to None.
|
|
40
|
+
If provided, query operations will be retried according to the specified RetryConfig parameters.
|
|
41
|
+
When a database operation fails with a retryable exception (e.g., neo4j.exceptions.ServiceUnavailable),
|
|
42
|
+
the operation will be automatically retried based on the retry policy defined in the configuration.
|
|
43
|
+
**kwargs (Any): Additional keyword arguments for the driver.
|
|
44
|
+
"""
|
|
45
|
+
async def upsert_node(self, label: str, identifier_key: str, identifier_value: str, properties: dict[str, Any] | None = None) -> Any:
|
|
46
|
+
"""Upsert a node in the graph.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
label (str): The label of the node.
|
|
50
|
+
identifier_key (str): The key of the identifier.
|
|
51
|
+
identifier_value (str): The value of the identifier.
|
|
52
|
+
properties (dict[str, Any] | None, optional): The properties of the node. Defaults to None.
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
Any: The result of the operation.
|
|
56
|
+
"""
|
|
57
|
+
async def upsert_relationship(self, node_source_key: str, node_source_value: str, relation: str, node_target_key: str, node_target_value: str, properties: dict[str, Any] | None = None) -> Any:
|
|
58
|
+
"""Upsert a relationship between two nodes in the graph.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
node_source_key (str): The key of the source node.
|
|
62
|
+
node_source_value (str): The value of the source node.
|
|
63
|
+
relation (str): The type of the relationship.
|
|
64
|
+
node_target_key (str): The key of the target node.
|
|
65
|
+
node_target_value (str): The value of the target node.
|
|
66
|
+
properties (dict[str, Any] | None, optional): The properties of the relationship. Defaults to None.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
Any: The result of the operation.
|
|
70
|
+
"""
|
|
71
|
+
async def delete_node(self, label: str, identifier_key: str, identifier_value: str) -> Any:
|
|
72
|
+
"""Delete a node from the graph.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
label (str): The label of the node.
|
|
76
|
+
identifier_key (str): The key of the identifier.
|
|
77
|
+
identifier_value (str): The identifier of the node.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
Any: The result of the operation.
|
|
81
|
+
"""
|
|
82
|
+
async def delete_relationship(self, node_source_key: str, node_source_value: str, relation: str, node_target_key: str, node_target_value: str) -> Any:
|
|
83
|
+
"""Delete a relationship between two nodes in the graph.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
node_source_key (str): The key of the source node.
|
|
87
|
+
node_source_value (str): The identifier of the source node.
|
|
88
|
+
relation (str): The type of the relationship.
|
|
89
|
+
node_target_key (str): The key of the target node.
|
|
90
|
+
node_target_value (str): The identifier of the target node.
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
Any: The result of the operation.
|
|
94
|
+
"""
|
|
95
|
+
async def query(self, query: str, parameters: dict[str, Any] | None = None) -> list[dict[str, Any]]:
|
|
96
|
+
"""Query the graph store.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
query (str): The query to be executed.
|
|
100
|
+
parameters (dict[str, Any] | None, optional): The parameters of the query. Defaults to None.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
list[dict[str, Any]]: The result of the query.
|
|
104
|
+
"""
|
|
105
|
+
async def traverse_graph(self, node_properties: dict[str, Any], extracted_node_properties: list[str] | None = None, extracted_relationship_properties: list[str] | None = None, depth: int = 3) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
|
|
106
|
+
'''Traverse graph from a node with specified properties, ignoring relationship\'s direction, up to a given depth.
|
|
107
|
+
|
|
108
|
+
Example:
|
|
109
|
+
```python
|
|
110
|
+
nodes, relationships = await graph_data_store.traverse_graph(
|
|
111
|
+
node_properties={"name": "John Doe"},
|
|
112
|
+
extracted_node_properties=["name", "age"],
|
|
113
|
+
extracted_relationship_properties=["since"],
|
|
114
|
+
depth=1
|
|
115
|
+
)
|
|
116
|
+
```
|
|
117
|
+
Means starting from the node with property `name` equal to "John Doe", traverse
|
|
118
|
+
the graph up to depth 1, extracting the `name` and `age` properties from nodes
|
|
119
|
+
and the `since` property from relationships.
|
|
120
|
+
|
|
121
|
+
```python
|
|
122
|
+
nodes, relationships = await graph_data_store.traverse_graph(
|
|
123
|
+
node_properties={"name": "John Doe"},
|
|
124
|
+
depth=2
|
|
125
|
+
)
|
|
126
|
+
```
|
|
127
|
+
Means starting from the node with property `name` equal to "John Doe", traverse
|
|
128
|
+
the graph up to depth 2, extracting all properties from nodes and relationships.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
node_properties (dict[str, Any]): The properties of the starting node.
|
|
132
|
+
extracted_node_properties (list[str] | None, optional): The properties to extract from nodes during
|
|
133
|
+
traversal. If None or empty list, all node properties will be returned. Defaults to None.
|
|
134
|
+
extracted_relationship_properties (list[str] | None, optional): The properties to extract from relationships
|
|
135
|
+
during traversal. If None or empty list, all relationship properties will be returned. Defaults to None.
|
|
136
|
+
depth (int, optional): The depth of traversal. Defaults to 3.
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
tuple[list[dict[str, Any]], list[dict[str, Any]]]: A tuple containing two lists:
|
|
140
|
+
- List of nodes with their extracted properties (including the source node).
|
|
141
|
+
- List of relationships with their extracted properties.
|
|
142
|
+
|
|
143
|
+
Example return value:
|
|
144
|
+
nodes = [
|
|
145
|
+
{
|
|
146
|
+
"id": 1001,
|
|
147
|
+
"labels": ["Person"],
|
|
148
|
+
"properties": {
|
|
149
|
+
"name": "John Doe",
|
|
150
|
+
"age": 30,
|
|
151
|
+
"occupation": "Engineer"
|
|
152
|
+
}
|
|
153
|
+
},
|
|
154
|
+
{
|
|
155
|
+
"id": 2001,
|
|
156
|
+
"labels": ["Company"],
|
|
157
|
+
"properties": {
|
|
158
|
+
"name": "TechCorp",
|
|
159
|
+
"industry": "Technology",
|
|
160
|
+
"employees": 500
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
]
|
|
164
|
+
|
|
165
|
+
relationships = [
|
|
166
|
+
{
|
|
167
|
+
"id": 5002,
|
|
168
|
+
"type": "FRIEND_OF",
|
|
169
|
+
"start_node": 1001,
|
|
170
|
+
"end_node": 1002,
|
|
171
|
+
"properties": {
|
|
172
|
+
"since": "2018-05-20",
|
|
173
|
+
"closeness": 8
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
]
|
|
177
|
+
|
|
178
|
+
Raises:
|
|
179
|
+
ValueError: If node_properties is empty or depth is less than 1.
|
|
180
|
+
'''
|
|
181
|
+
async def close(self) -> None:
|
|
182
|
+
"""Close the graph data store."""
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
from gllm_datastore.graph_data_store.utils.light_rag_em_invoker_adapter import LightRAGEMInvokerAdapter as LightRAGEMInvokerAdapter
|
|
2
|
+
from gllm_datastore.graph_data_store.utils.light_rag_lm_invoker_adapter import LightRAGLMInvokerAdapter as LightRAGLMInvokerAdapter
|
|
3
|
+
from gllm_datastore.graph_data_store.utils.llama_index_em_invoker_adapter import LlamaIndexEMInvokerAdapter as LlamaIndexEMInvokerAdapter
|
|
4
|
+
from gllm_datastore.graph_data_store.utils.llama_index_lm_invoker_adapter import LlamaIndexLMInvokerAdapter as LlamaIndexLMInvokerAdapter
|
|
5
|
+
|
|
6
|
+
__all__ = ['LightRAGEMInvokerAdapter', 'LightRAGLMInvokerAdapter', 'LlamaIndexEMInvokerAdapter', 'LlamaIndexLMInvokerAdapter']
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
class LightRAGKeys:
|
|
2
|
+
"""Keys used in LightRAG indexer."""
|
|
3
|
+
ENTITY_TYPE: str
|
|
4
|
+
ENTITY_ID: str
|
|
5
|
+
SOURCE_ID: str
|
|
6
|
+
ROLE: str
|
|
7
|
+
CONTENT: str
|
|
8
|
+
|
|
9
|
+
class LightRAGConstants:
|
|
10
|
+
"""Constants used in LightRAG indexer."""
|
|
11
|
+
CHUNK_TYPE: str
|
|
12
|
+
DEVELOPER_ROLE: str
|
|
13
|
+
EMBEDDING_PAYLOAD_TEST: str
|
|
14
|
+
FILE_TYPE: str
|
|
15
|
+
|
|
16
|
+
class LightRAGPostgresStorageConstants:
|
|
17
|
+
"""Constants used in LightRAG indexer with PostgreSQL storage."""
|
|
18
|
+
DOC_STATUS_STORAGE: str
|
|
19
|
+
GRAPH_STORAGE: str
|
|
20
|
+
KV_STORAGE: str
|
|
21
|
+
VECTOR_STORAGE: str
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
from gllm_datastore.graph_data_store.utils.constants import LightRAGConstants as LightRAGConstants
|
|
3
|
+
from gllm_inference.em_invoker.em_invoker import BaseEMInvoker
|
|
4
|
+
from lightrag.base import EmbeddingFunc
|
|
5
|
+
|
|
6
|
+
class LightRAGEMInvokerAdapter(EmbeddingFunc):
|
|
7
|
+
"""Adapter for embedding model invokers to work with LightRAG.
|
|
8
|
+
|
|
9
|
+
This adapter wraps BaseEMInvoker instances to make them compatible
|
|
10
|
+
with LightRAG's expected interface.
|
|
11
|
+
|
|
12
|
+
Attributes:
|
|
13
|
+
_em_invoker (BaseEMInvoker): The EM invoker to use.
|
|
14
|
+
func (callable): The embedding function.
|
|
15
|
+
embedding_dim (int): The embedding dimension. Defaults to 0.
|
|
16
|
+
"""
|
|
17
|
+
func: Incomplete
|
|
18
|
+
embedding_dim: int
|
|
19
|
+
def __init__(self, em_invoker: BaseEMInvoker) -> None:
|
|
20
|
+
"""Initialize the LightRAGEMInvokerAdapter.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
em_invoker (BaseEMInvoker): The EM invoker to use.
|
|
24
|
+
"""
|
|
25
|
+
async def ensure_initialized(self) -> None:
|
|
26
|
+
"""Ensure that the adapter is initialized.
|
|
27
|
+
|
|
28
|
+
This asynchronous method ensures that the embedding dimension is determined.
|
|
29
|
+
If the embedding dimension is 0, it will determine the dimension by calling
|
|
30
|
+
the embedding invoker with a test input. Raises an error if initialization fails.
|
|
31
|
+
|
|
32
|
+
Raises:
|
|
33
|
+
RuntimeError: If embedding dimension cannot be determined after initialization.
|
|
34
|
+
"""
|
|
35
|
+
def __deepcopy__(self, memo: dict) -> LightRAGEMInvokerAdapter:
|
|
36
|
+
"""Custom deepcopy implementation to handle non-serializable objects.
|
|
37
|
+
|
|
38
|
+
This method is called when copy.deepcopy() is invoked on this object.
|
|
39
|
+
We create a new instance without deep-copying the invoker object
|
|
40
|
+
which may contain non-serializable components.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
memo (dict): Memoization dictionary for deepcopy process
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
LightRAGEMInvokerAdapter: A new instance with the same invoker reference
|
|
47
|
+
"""
|
|
48
|
+
async def __call__(self, input: str | list[str]) -> list[list[float]]:
|
|
49
|
+
"""Make the adapter callable for compatibility with LightRAG.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
input (str | list[str]): The input text or list of texts to embed.
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
list[list[float]]: The embeddings for the input texts.
|
|
56
|
+
"""
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from gllm_datastore.graph_data_store.utils.constants import LightRAGConstants as LightRAGConstants, LightRAGKeys as LightRAGKeys
|
|
2
|
+
from gllm_inference.lm_invoker.lm_invoker import BaseLMInvoker
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
class LightRAGLMInvokerAdapter:
|
|
6
|
+
"""LMInvoker adapter for the LightRAG module.
|
|
7
|
+
|
|
8
|
+
This adapter is used to adapt the LMInvoker interface to the LightRAG module.
|
|
9
|
+
It handles the conversion between different prompt formats and manages
|
|
10
|
+
asynchronous invocation in a way that's compatible with nested event loops.
|
|
11
|
+
"""
|
|
12
|
+
def __init__(self, lm_invoker: BaseLMInvoker) -> None:
|
|
13
|
+
"""Initialize the LightRAGLMInvokerAdapter.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
lm_invoker (BaseLMInvoker): The LM invoker to use.
|
|
17
|
+
"""
|
|
18
|
+
def __deepcopy__(self, memo: dict) -> LightRAGLMInvokerAdapter:
|
|
19
|
+
"""Custom deepcopy implementation to handle non-serializable objects.
|
|
20
|
+
|
|
21
|
+
This method is called when copy.deepcopy() is invoked on this object.
|
|
22
|
+
We create a new instance without deep-copying the invoker object
|
|
23
|
+
which may contain non-serializable components.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
memo (dict): Memoization dictionary for deepcopy process
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
LightRAGLMInvokerAdapter: A new instance with the same invoker reference
|
|
30
|
+
"""
|
|
31
|
+
async def __call__(self, prompt: str, system_prompt: str | None = None, history_messages: list[dict[str, Any]] | None = None, **kwargs: Any) -> str:
|
|
32
|
+
"""Make the adapter callable for compatibility with LightRAG.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
prompt (str): The prompt to invoke the LM invoker with.
|
|
36
|
+
system_prompt (str | None, optional): The system prompt to format in string format. Defaults to None.
|
|
37
|
+
history_messages (list[dict[str, Any]] | None, optional): The history messages to format in OpenAI format.
|
|
38
|
+
Defaults to None.
|
|
39
|
+
**kwargs (Any): Additional keyword arguments for the LM invoker.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
str: The response from the LM invoker.
|
|
43
|
+
"""
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from gllm_inference.em_invoker.em_invoker import BaseEMInvoker
|
|
2
|
+
from llama_index.core.base.embeddings.base import BaseEmbedding
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
class LlamaIndexEMInvokerAdapter(BaseEmbedding):
|
|
6
|
+
"""Minimal EMInvoker adapter for the LlamaIndex BaseEmbedding interface.
|
|
7
|
+
|
|
8
|
+
This adapter wraps a BaseEMInvoker instance to provide compatibility with
|
|
9
|
+
LlamaIndex's BaseEmbedding interface. Embeddings from the underlying invoker
|
|
10
|
+
are returned directly without any conversion, assuming they are already in
|
|
11
|
+
the correct format (list of floats).
|
|
12
|
+
|
|
13
|
+
The adapter provides both synchronous and asynchronous methods for:
|
|
14
|
+
- Query embeddings: Single text embedding for search queries
|
|
15
|
+
- Text embeddings: Single or batch text embedding for documents
|
|
16
|
+
|
|
17
|
+
Attributes:
|
|
18
|
+
em_invoker (BaseEMInvoker): The underlying EM invoker instance.
|
|
19
|
+
model_name (str): The name of the embedding model (inherited from invoker).
|
|
20
|
+
embed_batch_size (int): The batch size for batch embedding operations.
|
|
21
|
+
|
|
22
|
+
Note:
|
|
23
|
+
Sync methods (_get_*) use asyncio.run internally to call async methods.
|
|
24
|
+
The implementation uses nest_asyncio to handle nested event loops if needed.
|
|
25
|
+
"""
|
|
26
|
+
em_invoker: BaseEMInvoker
|
|
27
|
+
def __init__(self, em_invoker: BaseEMInvoker, embed_batch_size: int = ..., **kwargs: Any) -> None:
|
|
28
|
+
"""Initialize the LlamaIndexEMInvokerAdapter.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
em_invoker (BaseEMInvoker): The EM invoker to wrap.
|
|
32
|
+
embed_batch_size (int, optional): The batch size for embedding operations.
|
|
33
|
+
Defaults to DEFAULT_EMBED_BATCH_SIZE from LlamaIndex.
|
|
34
|
+
**kwargs (Any): Additional keyword arguments passed to BaseEmbedding (e.g.,
|
|
35
|
+
callback_manager).
|
|
36
|
+
"""
|
|
37
|
+
@classmethod
|
|
38
|
+
def class_name(cls) -> str:
|
|
39
|
+
'''Get the class name (implements BaseEmbedding.class_name).
|
|
40
|
+
|
|
41
|
+
This is used by LlamaIndex for serialization and debugging.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
str: The class name "LlamaIndexEMInvokerAdapter".
|
|
45
|
+
'''
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
from gllm_inference.lm_invoker.lm_invoker import BaseLMInvoker
|
|
3
|
+
from llama_index.core.base.llms.types import ChatMessage, ChatResponse, CompletionResponse, LLMMetadata
|
|
4
|
+
from llama_index.core.llms import LLM
|
|
5
|
+
from typing import Any, AsyncGenerator, Sequence
|
|
6
|
+
|
|
7
|
+
ROLE_MAPPING: Incomplete
|
|
8
|
+
|
|
9
|
+
class LlamaIndexLMInvokerAdapter(LLM):
|
|
10
|
+
"""Minimal LMInvoker adapter for the LlamaIndex LLM interface.
|
|
11
|
+
|
|
12
|
+
This adapter wraps a BaseLMInvoker instance to provide compatibility with
|
|
13
|
+
LlamaIndex's LLM interface. It handles conversion between GLLM message formats
|
|
14
|
+
and LlamaIndex ChatMessage formats.
|
|
15
|
+
|
|
16
|
+
Only chat functionality is implemented. Completion and streaming methods raise
|
|
17
|
+
NotImplementedError to keep the implementation minimal.
|
|
18
|
+
|
|
19
|
+
Attributes:
|
|
20
|
+
lm_invoker (BaseLMInvoker): The underlying LM invoker instance.
|
|
21
|
+
|
|
22
|
+
Note:
|
|
23
|
+
Message roles are converted using the ROLE_MAPPING constant, which maps
|
|
24
|
+
all LlamaIndex message roles (SYSTEM, DEVELOPER, USER, ASSISTANT, TOOL,
|
|
25
|
+
FUNCTION, CHATBOT, MODEL) to GLLM MessageRole values.
|
|
26
|
+
"""
|
|
27
|
+
lm_invoker: BaseLMInvoker
|
|
28
|
+
def __init__(self, lm_invoker: BaseLMInvoker, **kwargs: Any) -> None:
|
|
29
|
+
"""Initialize the LlamaIndexLMInvokerAdapter.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
lm_invoker (BaseLMInvoker): The LM invoker to wrap.
|
|
33
|
+
**kwargs (Any): Additional keyword arguments.
|
|
34
|
+
"""
|
|
35
|
+
@property
|
|
36
|
+
def metadata(self) -> LLMMetadata:
|
|
37
|
+
"""Get metadata about the language model.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
LLMMetadata: Metadata containing model information.
|
|
41
|
+
"""
|
|
42
|
+
def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
|
|
43
|
+
"""Synchronous chat endpoint (implements LlamaIndex LLM.chat).
|
|
44
|
+
|
|
45
|
+
This is a synchronous wrapper around the async achat() method.
|
|
46
|
+
It handles both scenarios: when called from within an event loop and when
|
|
47
|
+
called from synchronous code.
|
|
48
|
+
|
|
49
|
+
Converts LlamaIndex ChatMessage objects to GLLM Message format, invokes
|
|
50
|
+
the underlying LM invoker, and converts the response back to ChatResponse.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
messages (Sequence[ChatMessage]): The chat messages in LlamaIndex format.
|
|
54
|
+
**kwargs (Any): Additional keyword arguments. Supports:
|
|
55
|
+
- hyperparameters (dict, optional): Model hyperparameters like
|
|
56
|
+
temperature, max_tokens, etc.
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
ChatResponse: The chat response in LlamaIndex format with message content,
|
|
60
|
+
role, and optional metadata (token usage, finish details).
|
|
61
|
+
"""
|
|
62
|
+
def complete(self, prompt: str, formatted: bool = False, **kwargs: Any) -> CompletionResponse:
|
|
63
|
+
"""Synchronous completion endpoint.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
prompt (str): The prompt string.
|
|
67
|
+
formatted (bool, optional): Whether the prompt is already formatted. Defaults to False.
|
|
68
|
+
**kwargs (Any): Additional keyword arguments.
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
CompletionResponse: The completion response.
|
|
72
|
+
|
|
73
|
+
Raises:
|
|
74
|
+
NotImplementedError: Always raises this exception.
|
|
75
|
+
"""
|
|
76
|
+
def stream_chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> AsyncGenerator[ChatResponse, None]:
|
|
77
|
+
"""Streaming chat endpoint.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
messages (Sequence[ChatMessage]): The chat messages.
|
|
81
|
+
**kwargs (Any): Additional keyword arguments.
|
|
82
|
+
|
|
83
|
+
Yields:
|
|
84
|
+
ChatResponse: Streaming chat responses.
|
|
85
|
+
|
|
86
|
+
Raises:
|
|
87
|
+
NotImplementedError: Always raises this exception.
|
|
88
|
+
"""
|
|
89
|
+
def stream_complete(self, prompt: str, formatted: bool = False, **kwargs: Any) -> AsyncGenerator[CompletionResponse, None]:
|
|
90
|
+
"""Streaming completion endpoint.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
prompt (str): The prompt string.
|
|
94
|
+
formatted (bool, optional): Whether the prompt is already formatted. Defaults to False.
|
|
95
|
+
**kwargs (Any): Additional keyword arguments.
|
|
96
|
+
|
|
97
|
+
Yields:
|
|
98
|
+
CompletionResponse: Streaming completion responses.
|
|
99
|
+
|
|
100
|
+
Raises:
|
|
101
|
+
NotImplementedError: Always raises this exception.
|
|
102
|
+
"""
|
|
103
|
+
async def achat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
|
|
104
|
+
"""Asynchronous chat endpoint (implements LlamaIndex LLM.achat).
|
|
105
|
+
|
|
106
|
+
Converts LlamaIndex ChatMessage objects to GLLM Message format, invokes
|
|
107
|
+
the underlying LM invoker asynchronously, and converts the response back
|
|
108
|
+
to ChatResponse.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
messages (Sequence[ChatMessage]): The chat messages in LlamaIndex format.
|
|
112
|
+
**kwargs (Any): Additional keyword arguments. Supports:
|
|
113
|
+
- hyperparameters (dict, optional): Model hyperparameters like
|
|
114
|
+
temperature, max_tokens, etc.
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
ChatResponse: The chat response in LlamaIndex format with message content,
|
|
118
|
+
role, and optional metadata (token usage, finish details).
|
|
119
|
+
"""
|
|
120
|
+
async def acomplete(self, prompt: str, formatted: bool = False, **kwargs: Any) -> CompletionResponse:
|
|
121
|
+
"""Asynchronous completion endpoint.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
prompt (str): The prompt string.
|
|
125
|
+
formatted (bool, optional): Whether the prompt is already formatted. Defaults to False.
|
|
126
|
+
**kwargs (Any): Additional keyword arguments.
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
CompletionResponse: The completion response.
|
|
130
|
+
|
|
131
|
+
Raises:
|
|
132
|
+
NotImplementedError: Always raises this exception.
|
|
133
|
+
"""
|
|
134
|
+
def astream_chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> AsyncGenerator[ChatResponse, None]:
|
|
135
|
+
"""Asynchronous streaming chat endpoint.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
messages (Sequence[ChatMessage]): The chat messages.
|
|
139
|
+
**kwargs (Any): Additional keyword arguments.
|
|
140
|
+
|
|
141
|
+
Yields:
|
|
142
|
+
ChatResponse: Streaming chat responses.
|
|
143
|
+
|
|
144
|
+
Raises:
|
|
145
|
+
NotImplementedError: Always raises this exception.
|
|
146
|
+
"""
|
|
147
|
+
def astream_complete(self, prompt: str, formatted: bool = False, **kwargs: Any) -> AsyncGenerator[CompletionResponse, None]:
|
|
148
|
+
"""Asynchronous streaming completion endpoint.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
prompt (str): The prompt string.
|
|
152
|
+
formatted (bool, optional): Whether the prompt is already formatted. Defaults to False.
|
|
153
|
+
**kwargs (Any): Additional keyword arguments.
|
|
154
|
+
|
|
155
|
+
Yields:
|
|
156
|
+
CompletionResponse: Streaming completion responses.
|
|
157
|
+
|
|
158
|
+
Raises:
|
|
159
|
+
NotImplementedError: Always raises this exception.
|
|
160
|
+
"""
|
|
161
|
+
@classmethod
|
|
162
|
+
def class_name(cls) -> str:
|
|
163
|
+
'''Get the class name (implements LLM.class_name).
|
|
164
|
+
|
|
165
|
+
This is used by LlamaIndex for serialization and debugging.
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
str: The class name "LlamaIndexLMInvokerAdapter".
|
|
169
|
+
'''
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
from gllm_datastore.sql_data_store.sqlalchemy_sql_data_store import SQLAlchemySQLDataStore as SQLAlchemySQLDataStore
|
|
2
|
+
from gllm_datastore.sql_data_store.types import QueryFilter as QueryFilter, QueryOptions as QueryOptions
|
|
3
|
+
|
|
4
|
+
__all__ = ['SQLAlchemySQLDataStore', 'QueryFilter', 'QueryOptions']
|
|
File without changes
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
from sqlalchemy.engine import Engine
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
class SQLAlchemyAdapter:
|
|
6
|
+
"""Initializes a database engine and session using SQLAlchemy.
|
|
7
|
+
|
|
8
|
+
Provides a scoped session and a base query property for interacting with the database.
|
|
9
|
+
|
|
10
|
+
Attributes:
|
|
11
|
+
engine (Engine): The SQLAlchemy engine object.
|
|
12
|
+
db (Session): The SQLAlchemy session object.
|
|
13
|
+
base (DeclarativeMeta): The SQLAlchemy declarative base object.
|
|
14
|
+
"""
|
|
15
|
+
engine: Incomplete
|
|
16
|
+
db: Incomplete
|
|
17
|
+
base: Incomplete
|
|
18
|
+
@classmethod
|
|
19
|
+
def initialize(cls, engine_or_url: Engine | str, pool_size: int = 10, max_overflow: int = 10, autocommit: bool = False, autoflush: bool = True, **kwargs: Any):
|
|
20
|
+
"""Creates a new database engine and session.
|
|
21
|
+
|
|
22
|
+
Must provide either an engine or a database URL.
|
|
23
|
+
If a database URL is provided, the engine will be created with the specified configurations:
|
|
24
|
+
1. For SQLite, only the pool size can be specified, since the engine will use SingletonThreadPool which
|
|
25
|
+
doesn't support max_overflow.
|
|
26
|
+
2. For other databases, the pool size and max overflow can be specified.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
engine_or_url (Engine | str): Sqlalchemy engine object or database URL.
|
|
30
|
+
pool_size (int, optional): The size of the database connections to be maintained. Defaults to 10.
|
|
31
|
+
max_overflow (int, optional): The maximum overflow size of the pool. Defaults to 10.
|
|
32
|
+
If the engine_or_url is a SQLite URL, this parameter is ignored.
|
|
33
|
+
autocommit (bool, optional): If True, all changes to the database are committed immediately.
|
|
34
|
+
Defaults to False.
|
|
35
|
+
autoflush (bool, optional): If True, all changes to the database are flushed immediately. Defaults to True.
|
|
36
|
+
**kwargs (Any): Additional keyword arguments to be passed to the SQLAlchemy create_engine function.
|
|
37
|
+
These are only used when engine_or_url is a string URL.
|
|
38
|
+
"""
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
from gllm_datastore.sql_data_store.types import QueryFilter as QueryFilter, QueryOptions as QueryOptions
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
class BaseSQLDataStore(ABC):
|
|
7
|
+
"""Abstract base class for SQL data stores.
|
|
8
|
+
|
|
9
|
+
This class defines the interface for all SQL data store implementations.
|
|
10
|
+
Subclasses must implement the abstract methods.
|
|
11
|
+
"""
|
|
12
|
+
@abstractmethod
|
|
13
|
+
async def query(self, query: str, params: dict[str, Any] | None = None) -> pd.DataFrame:
|
|
14
|
+
"""Executes raw SQL query.
|
|
15
|
+
|
|
16
|
+
This method must be implemented by subclasses to execute a raw SQL query.
|
|
17
|
+
Use this method for raw queries, complex queries, or for executing a query generated by LLM.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
query (str): The query string to execute.
|
|
21
|
+
params (dict[str, Any] | None, optional): Parameters to bind to the query. Defaults to None.
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
pd.DataFrame: A DataFrame of query results.
|
|
25
|
+
|
|
26
|
+
Raises:
|
|
27
|
+
NotImplementedError: If the method is not implemented.
|
|
28
|
+
"""
|
|
29
|
+
@abstractmethod
|
|
30
|
+
def create(self, **kwargs: Any) -> None:
|
|
31
|
+
"""Create data using available information in kwargs.
|
|
32
|
+
|
|
33
|
+
This method must be implemented by subclasses to create data in the data store.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
**kwargs (Any): A dictionary of information to create data.
|
|
37
|
+
|
|
38
|
+
Raises:
|
|
39
|
+
NotImplementedError: If the method is not implemented.
|
|
40
|
+
"""
|
|
41
|
+
@abstractmethod
|
|
42
|
+
def read(self, filters: QueryFilter | None = None, options: QueryOptions | None = None, **kwargs: Any) -> pd.DataFrame:
|
|
43
|
+
"""Read data from the data store using optional filters and options.
|
|
44
|
+
|
|
45
|
+
This method must be implemented by subclasses to read data from the data store.
|
|
46
|
+
Use this method for simple queries with filters and options.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
filters (QueryFilter | None, optional): Filters to apply to the query. Defaults to None.
|
|
50
|
+
options (QueryOptions | None, optional): Options to apply to the query. Defaults to None.
|
|
51
|
+
**kwargs (Any): A dictionary of additional information to support the read method.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
pd.DataFrame: A DataFrame of query results.
|
|
55
|
+
|
|
56
|
+
Raises:
|
|
57
|
+
NotImplementedError: If the method is not implemented.
|
|
58
|
+
"""
|
|
59
|
+
@abstractmethod
|
|
60
|
+
def update(self, update_values: dict[str, Any], filters: QueryFilter | None = None, **kwargs: Any) -> None:
|
|
61
|
+
"""Update data in the data store using optional filters and update values.
|
|
62
|
+
|
|
63
|
+
This method must be implemented by subclasses to update data in the data store.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
update_values (dict[str, Any]): Values to update in the data store.
|
|
67
|
+
filters (QueryFilter | None, optional): Filters to apply to the query. Defaults to None.
|
|
68
|
+
**kwargs (Any): A dictionary of additional information to support the update method.
|
|
69
|
+
|
|
70
|
+
Raises:
|
|
71
|
+
NotImplementedError: If the method is not implemented.
|
|
72
|
+
"""
|
|
73
|
+
@abstractmethod
|
|
74
|
+
def delete(self, filters: QueryFilter | None = None, allow_delete_all: bool = False, **kwargs: Any) -> None:
|
|
75
|
+
"""Delete data in the data store using filters.
|
|
76
|
+
|
|
77
|
+
This method must be implemented by subclasses to delete data in the data store.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
filters (QueryFilter | None, optional): Filters to apply to the query. Defaults to None.
|
|
81
|
+
allow_delete_all (bool, optional): A flag to allow deleting all data. Defaults to False.
|
|
82
|
+
**kwargs (Any): A dictionary of additional information to support the delete method.
|
|
83
|
+
|
|
84
|
+
Raises:
|
|
85
|
+
NotImplementedError: If the method is not implemented.
|
|
86
|
+
"""
|