real-ladybug 0.13.0__cp311-cp311-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- real_ladybug/__init__.py +83 -0
- real_ladybug/_lbug.cpython-311-darwin.so +0 -0
- real_ladybug/async_connection.py +226 -0
- real_ladybug/connection.py +323 -0
- real_ladybug/constants.py +7 -0
- real_ladybug/database.py +307 -0
- real_ladybug/prepared_statement.py +51 -0
- real_ladybug/py.typed +0 -0
- real_ladybug/query_result.py +511 -0
- real_ladybug/torch_geometric_feature_store.py +185 -0
- real_ladybug/torch_geometric_graph_store.py +131 -0
- real_ladybug/torch_geometric_result_converter.py +282 -0
- real_ladybug/types.py +39 -0
- real_ladybug-0.13.0.dist-info/METADATA +98 -0
- real_ladybug-0.13.0.dist-info/RECORD +19 -0
- real_ladybug-0.13.0.dist-info/WHEEL +6 -0
- real_ladybug-0.13.0.dist-info/licenses/LICENSE +21 -0
- real_ladybug-0.13.0.dist-info/top_level.txt +1 -0
- real_ladybug-0.13.0.dist-info/zip-safe +1 -0
real_ladybug/database.py
ADDED
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import TYPE_CHECKING, Any
|
|
5
|
+
|
|
6
|
+
from . import _lbug
|
|
7
|
+
from .types import Type
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
import sys
|
|
11
|
+
from types import TracebackType
|
|
12
|
+
|
|
13
|
+
from numpy.typing import NDArray
|
|
14
|
+
from torch_geometric.data.feature_store import IndexType
|
|
15
|
+
|
|
16
|
+
from .torch_geometric_feature_store import LbugFeatureStore
|
|
17
|
+
from .torch_geometric_graph_store import LbugGraphStore
|
|
18
|
+
|
|
19
|
+
if sys.version_info >= (3, 11):
|
|
20
|
+
from typing import Self
|
|
21
|
+
else:
|
|
22
|
+
from typing_extensions import Self
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class Database:
|
|
26
|
+
"""Lbug database instance."""
|
|
27
|
+
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
database_path: str | Path | None = None,
|
|
31
|
+
*,
|
|
32
|
+
buffer_pool_size: int = 0,
|
|
33
|
+
max_num_threads: int = 0,
|
|
34
|
+
compression: bool = True,
|
|
35
|
+
lazy_init: bool = False,
|
|
36
|
+
read_only: bool = False,
|
|
37
|
+
max_db_size: int = (1 << 43),
|
|
38
|
+
auto_checkpoint: bool = True,
|
|
39
|
+
checkpoint_threshold: int = -1,
|
|
40
|
+
throw_on_wal_replay_failure: bool = True,
|
|
41
|
+
enable_checksums: bool = True,
|
|
42
|
+
):
|
|
43
|
+
"""
|
|
44
|
+
Parameters
|
|
45
|
+
----------
|
|
46
|
+
database_path : str, Path
|
|
47
|
+
The path to database files. If the path is not specified, or empty, or equal to `:memory:`, the database
|
|
48
|
+
will be created in memory.
|
|
49
|
+
|
|
50
|
+
buffer_pool_size : int
|
|
51
|
+
The maximum size of buffer pool in bytes. Defaults to ~80% of system memory.
|
|
52
|
+
|
|
53
|
+
max_num_threads : int
|
|
54
|
+
The maximum number of threads to use for executing queries.
|
|
55
|
+
|
|
56
|
+
compression : bool
|
|
57
|
+
Enable database compression.
|
|
58
|
+
|
|
59
|
+
lazy_init : bool
|
|
60
|
+
If True, the database will not be initialized until the first query.
|
|
61
|
+
This is useful when the database is not used in the main thread or
|
|
62
|
+
when the main process is forked.
|
|
63
|
+
Default to False.
|
|
64
|
+
|
|
65
|
+
read_only : bool
|
|
66
|
+
If true, the database is opened read-only. No write transactions is
|
|
67
|
+
allowed on the `Database` object. Multiple read-only `Database`
|
|
68
|
+
objects can be created with the same database path. However, there
|
|
69
|
+
cannot be multiple `Database` objects created with the same
|
|
70
|
+
database path.
|
|
71
|
+
Default to False.
|
|
72
|
+
|
|
73
|
+
max_db_size : int
|
|
74
|
+
The maximum size of the database in bytes. Note that this is introduced
|
|
75
|
+
temporarily for now to get around with the default 8TB mmap address
|
|
76
|
+
space limit some environment. This will be removed once we implemente
|
|
77
|
+
a better solution later. The value is default to 1 << 43 (8TB) under 64-bit
|
|
78
|
+
environment and 1GB under 32-bit one.
|
|
79
|
+
|
|
80
|
+
auto_checkpoint: bool
|
|
81
|
+
If true, the database will automatically checkpoint when the size of
|
|
82
|
+
the WAL file exceeds the checkpoint threshold.
|
|
83
|
+
|
|
84
|
+
checkpoint_threshold: int
|
|
85
|
+
The threshold of the WAL file size in bytes. When the size of the
|
|
86
|
+
WAL file exceeds this threshold, the database will checkpoint if autoCheckpoint is true.
|
|
87
|
+
|
|
88
|
+
throw_on_wal_replay_failure: bool
|
|
89
|
+
If true, any WAL replaying failure when loading the database will throw an error.
|
|
90
|
+
Otherwise, Lbug will silently ignore the failure and replay up to where the error
|
|
91
|
+
occured.
|
|
92
|
+
|
|
93
|
+
enable_checksums: bool
|
|
94
|
+
If true, the database will use checksums to detect corruption in the
|
|
95
|
+
WAL file.
|
|
96
|
+
|
|
97
|
+
"""
|
|
98
|
+
if database_path is None:
|
|
99
|
+
database_path = ":memory:"
|
|
100
|
+
if isinstance(database_path, Path):
|
|
101
|
+
database_path = str(database_path)
|
|
102
|
+
|
|
103
|
+
self.database_path = database_path
|
|
104
|
+
self.buffer_pool_size = buffer_pool_size
|
|
105
|
+
self.max_num_threads = max_num_threads
|
|
106
|
+
self.compression = compression
|
|
107
|
+
self.read_only = read_only
|
|
108
|
+
self.max_db_size = max_db_size
|
|
109
|
+
self.auto_checkpoint = auto_checkpoint
|
|
110
|
+
self.checkpoint_threshold = checkpoint_threshold
|
|
111
|
+
self.throw_on_wal_replay_failure = throw_on_wal_replay_failure
|
|
112
|
+
self.enable_checksums = enable_checksums
|
|
113
|
+
self.is_closed = False
|
|
114
|
+
|
|
115
|
+
self._database: Any = None # (type: _lbug.Database from pybind11)
|
|
116
|
+
if not lazy_init:
|
|
117
|
+
self.init_database()
|
|
118
|
+
|
|
119
|
+
def __enter__(self) -> Self:
|
|
120
|
+
return self
|
|
121
|
+
|
|
122
|
+
def __exit__(
|
|
123
|
+
self,
|
|
124
|
+
exc_type: type[BaseException] | None,
|
|
125
|
+
exc_value: BaseException | None,
|
|
126
|
+
exc_traceback: TracebackType | None,
|
|
127
|
+
) -> None:
|
|
128
|
+
self.close()
|
|
129
|
+
|
|
130
|
+
@staticmethod
|
|
131
|
+
def get_version() -> str:
|
|
132
|
+
"""
|
|
133
|
+
Get the version of the database.
|
|
134
|
+
|
|
135
|
+
Returns
|
|
136
|
+
-------
|
|
137
|
+
str
|
|
138
|
+
The version of the database.
|
|
139
|
+
"""
|
|
140
|
+
return _lbug.Database.get_version() # type: ignore[union-attr]
|
|
141
|
+
|
|
142
|
+
@staticmethod
|
|
143
|
+
def get_storage_version() -> int:
|
|
144
|
+
"""
|
|
145
|
+
Get the storage version of the database.
|
|
146
|
+
|
|
147
|
+
Returns
|
|
148
|
+
-------
|
|
149
|
+
int
|
|
150
|
+
The storage version of the database.
|
|
151
|
+
"""
|
|
152
|
+
return _lbug.Database.get_storage_version() # type: ignore[union-attr]
|
|
153
|
+
|
|
154
|
+
def __getstate__(self) -> dict[str, Any]:
|
|
155
|
+
state = {
|
|
156
|
+
"database_path": self.database_path,
|
|
157
|
+
"buffer_pool_size": self.buffer_pool_size,
|
|
158
|
+
"compression": self.compression,
|
|
159
|
+
"read_only": self.read_only,
|
|
160
|
+
"_database": None,
|
|
161
|
+
}
|
|
162
|
+
return state
|
|
163
|
+
|
|
164
|
+
def init_database(self) -> None:
|
|
165
|
+
"""Initialize the database."""
|
|
166
|
+
self.check_for_database_close()
|
|
167
|
+
if self._database is None:
|
|
168
|
+
self._database = _lbug.Database( # type: ignore[union-attr]
|
|
169
|
+
self.database_path,
|
|
170
|
+
self.buffer_pool_size,
|
|
171
|
+
self.max_num_threads,
|
|
172
|
+
self.compression,
|
|
173
|
+
self.read_only,
|
|
174
|
+
self.max_db_size,
|
|
175
|
+
self.auto_checkpoint,
|
|
176
|
+
self.checkpoint_threshold,
|
|
177
|
+
self.throw_on_wal_replay_failure,
|
|
178
|
+
self.enable_checksums,
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
def get_torch_geometric_remote_backend(
|
|
182
|
+
self, num_threads: int | None = None
|
|
183
|
+
) -> tuple[LbugFeatureStore, LbugGraphStore]:
|
|
184
|
+
"""
|
|
185
|
+
Use the database as the remote backend for torch_geometric.
|
|
186
|
+
|
|
187
|
+
For the interface of the remote backend, please refer to
|
|
188
|
+
https://pytorch-geometric.readthedocs.io/en/latest/advanced/remote.html.
|
|
189
|
+
The current implementation is read-only and does not support edge
|
|
190
|
+
features. The IDs of the nodes are based on the internal IDs (i.e., node
|
|
191
|
+
offsets). For the remote node IDs to be consistent with the positions in
|
|
192
|
+
the output tensors, please ensure that no deletion has been performed
|
|
193
|
+
on the node tables.
|
|
194
|
+
|
|
195
|
+
The remote backend can also be plugged into the data loader of
|
|
196
|
+
torch_geometric, which is useful for mini-batch training. For example:
|
|
197
|
+
|
|
198
|
+
```python
|
|
199
|
+
loader_lbug = NeighborLoader(
|
|
200
|
+
data=(feature_store, graph_store),
|
|
201
|
+
num_neighbors={('paper', 'cites', 'paper'): [12, 12, 12]},
|
|
202
|
+
batch_size=LOADER_BATCH_SIZE,
|
|
203
|
+
input_nodes=('paper', input_nodes),
|
|
204
|
+
num_workers=4,
|
|
205
|
+
filter_per_worker=False,
|
|
206
|
+
)
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
Please note that the database instance is not fork-safe, so if more than
|
|
210
|
+
one worker is used, `filter_per_worker` must be set to False.
|
|
211
|
+
|
|
212
|
+
Parameters
|
|
213
|
+
----------
|
|
214
|
+
num_threads : int
|
|
215
|
+
Number of threads to use for data loading. Default to None, which
|
|
216
|
+
means using the number of CPU cores.
|
|
217
|
+
|
|
218
|
+
Returns
|
|
219
|
+
-------
|
|
220
|
+
feature_store : LbugFeatureStore
|
|
221
|
+
Feature store compatible with torch_geometric.
|
|
222
|
+
graph_store : LbugGraphStore
|
|
223
|
+
Graph store compatible with torch_geometric.
|
|
224
|
+
"""
|
|
225
|
+
self.check_for_database_close()
|
|
226
|
+
from .torch_geometric_feature_store import LbugFeatureStore
|
|
227
|
+
from .torch_geometric_graph_store import LbugGraphStore
|
|
228
|
+
|
|
229
|
+
return (
|
|
230
|
+
LbugFeatureStore(self, num_threads),
|
|
231
|
+
LbugGraphStore(self, num_threads),
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
def _scan_node_table(
|
|
235
|
+
self,
|
|
236
|
+
table_name: str,
|
|
237
|
+
prop_name: str,
|
|
238
|
+
prop_type: str,
|
|
239
|
+
dim: int,
|
|
240
|
+
indices: IndexType,
|
|
241
|
+
num_threads: int,
|
|
242
|
+
) -> NDArray[Any]:
|
|
243
|
+
self.check_for_database_close()
|
|
244
|
+
import numpy as np
|
|
245
|
+
|
|
246
|
+
"""
|
|
247
|
+
Scan a node table from storage directly, bypassing query engine.
|
|
248
|
+
Used internally by torch_geometric remote backend only.
|
|
249
|
+
"""
|
|
250
|
+
self.init_database()
|
|
251
|
+
indices_cast = np.array(indices, dtype=np.uint64)
|
|
252
|
+
result = None
|
|
253
|
+
|
|
254
|
+
if prop_type == Type.INT64.value:
|
|
255
|
+
result = np.empty(len(indices) * dim, dtype=np.int64)
|
|
256
|
+
self._database.scan_node_table_as_int64(table_name, prop_name, indices_cast, result, num_threads)
|
|
257
|
+
elif prop_type == Type.INT32.value:
|
|
258
|
+
result = np.empty(len(indices) * dim, dtype=np.int32)
|
|
259
|
+
self._database.scan_node_table_as_int32(table_name, prop_name, indices_cast, result, num_threads)
|
|
260
|
+
elif prop_type == Type.INT16.value:
|
|
261
|
+
result = np.empty(len(indices) * dim, dtype=np.int16)
|
|
262
|
+
self._database.scan_node_table_as_int16(table_name, prop_name, indices_cast, result, num_threads)
|
|
263
|
+
elif prop_type == Type.DOUBLE.value:
|
|
264
|
+
result = np.empty(len(indices) * dim, dtype=np.float64)
|
|
265
|
+
self._database.scan_node_table_as_double(table_name, prop_name, indices_cast, result, num_threads)
|
|
266
|
+
elif prop_type == Type.FLOAT.value:
|
|
267
|
+
result = np.empty(len(indices) * dim, dtype=np.float32)
|
|
268
|
+
self._database.scan_node_table_as_float(table_name, prop_name, indices_cast, result, num_threads)
|
|
269
|
+
|
|
270
|
+
if result is not None:
|
|
271
|
+
return result
|
|
272
|
+
|
|
273
|
+
msg = f"Unsupported property type: {prop_type}"
|
|
274
|
+
raise ValueError(msg)
|
|
275
|
+
|
|
276
|
+
def close(self) -> None:
|
|
277
|
+
"""
|
|
278
|
+
Close the database. Once the database is closed, the lock on the database
|
|
279
|
+
files is released and the database can be opened in another process.
|
|
280
|
+
|
|
281
|
+
Note: Call to this method is not required. The Python garbage collector
|
|
282
|
+
will automatically close the database when no references to the database
|
|
283
|
+
object exist. It is recommended not to call this method explicitly. If you
|
|
284
|
+
decide to manually close the database, make sure that all the QueryResult
|
|
285
|
+
and Connection objects are closed before calling this method.
|
|
286
|
+
"""
|
|
287
|
+
if self.is_closed:
|
|
288
|
+
return
|
|
289
|
+
self.is_closed = True
|
|
290
|
+
if self._database is not None:
|
|
291
|
+
self._database.close()
|
|
292
|
+
self._database: Any = None # (type: _lbug.Database from pybind11)
|
|
293
|
+
|
|
294
|
+
def check_for_database_close(self) -> None:
|
|
295
|
+
"""
|
|
296
|
+
Check if the database is closed and raise an exception if it is.
|
|
297
|
+
|
|
298
|
+
Raises
|
|
299
|
+
------
|
|
300
|
+
Exception
|
|
301
|
+
If the database is closed.
|
|
302
|
+
|
|
303
|
+
"""
|
|
304
|
+
if not self.is_closed:
|
|
305
|
+
return
|
|
306
|
+
msg = "Database is closed"
|
|
307
|
+
raise RuntimeError(msg)
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any
|
|
4
|
+
|
|
5
|
+
if TYPE_CHECKING:
|
|
6
|
+
from .connection import Connection
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class PreparedStatement:
|
|
10
|
+
"""
|
|
11
|
+
A prepared statement is a parameterized query which can avoid planning the
|
|
12
|
+
same query for repeated execution.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(self, connection: Connection, query: str, parameters: dict[str, Any] | None = None):
|
|
16
|
+
"""
|
|
17
|
+
Parameters
|
|
18
|
+
----------
|
|
19
|
+
connection : Connection
|
|
20
|
+
Connection to a database.
|
|
21
|
+
query : str
|
|
22
|
+
Query to prepare.
|
|
23
|
+
parameters : dict[str, Any]
|
|
24
|
+
Parameters for the query.
|
|
25
|
+
"""
|
|
26
|
+
if parameters is None:
|
|
27
|
+
parameters = {}
|
|
28
|
+
self._prepared_statement = connection._connection.prepare(query, parameters)
|
|
29
|
+
self._connection = connection
|
|
30
|
+
|
|
31
|
+
def is_success(self) -> bool:
|
|
32
|
+
"""
|
|
33
|
+
Check if the prepared statement is successfully prepared.
|
|
34
|
+
|
|
35
|
+
Returns
|
|
36
|
+
-------
|
|
37
|
+
bool
|
|
38
|
+
True if the prepared statement is successfully prepared.
|
|
39
|
+
"""
|
|
40
|
+
return self._prepared_statement.is_success()
|
|
41
|
+
|
|
42
|
+
def get_error_message(self) -> str:
|
|
43
|
+
"""
|
|
44
|
+
Get the error message if the query is not prepared successfully.
|
|
45
|
+
|
|
46
|
+
Returns
|
|
47
|
+
-------
|
|
48
|
+
str
|
|
49
|
+
Error message.
|
|
50
|
+
"""
|
|
51
|
+
return self._prepared_statement.get_error_message()
|
real_ladybug/py.typed
ADDED
|
File without changes
|