scruby 0.17.0__py3-none-any.whl → 0.27.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scruby/__init__.py +17 -9
- scruby/aggregation.py +4 -0
- scruby/db.py +100 -643
- scruby/errors.py +8 -2
- scruby/mixins/__init__.py +25 -0
- scruby/mixins/collection.py +50 -0
- scruby/mixins/count.py +62 -0
- scruby/mixins/custom_task.py +75 -0
- scruby/mixins/delete.py +96 -0
- scruby/mixins/docs.py +168 -0
- scruby/mixins/find.py +149 -0
- scruby/mixins/update.py +99 -0
- scruby/settings.py +44 -0
- {scruby-0.17.0.dist-info → scruby-0.27.2.dist-info}/METADATA +114 -96
- scruby-0.27.2.dist-info/RECORD +18 -0
- {scruby-0.17.0.dist-info → scruby-0.27.2.dist-info}/WHEEL +1 -1
- scruby/constants.py +0 -31
- scruby-0.17.0.dist-info/RECORD +0 -10
- {scruby-0.17.0.dist-info → scruby-0.27.2.dist-info}/licenses/LICENSE +0 -0
scruby/db.py
CHANGED
|
@@ -1,97 +1,115 @@
|
|
|
1
|
+
# Scruby - Asynchronous library for building and managing a hybrid database, by scheme of key-value.
|
|
2
|
+
# Copyright (c) 2025 Gennady Kostyunin
|
|
3
|
+
# SPDX-License-Identifier: MIT
|
|
4
|
+
#
|
|
1
5
|
"""Creation and management of the database."""
|
|
2
6
|
|
|
3
7
|
from __future__ import annotations
|
|
4
8
|
|
|
5
9
|
__all__ = ("Scruby",)
|
|
6
10
|
|
|
7
|
-
import concurrent.futures
|
|
8
11
|
import contextlib
|
|
9
12
|
import logging
|
|
13
|
+
import re
|
|
10
14
|
import zlib
|
|
11
|
-
from collections.abc import Callable
|
|
12
|
-
from pathlib import Path as SyncPath
|
|
13
15
|
from shutil import rmtree
|
|
14
|
-
from typing import Any, Literal, Never,
|
|
16
|
+
from typing import Any, Literal, Never, assert_never
|
|
15
17
|
|
|
16
|
-
import
|
|
17
|
-
from anyio import Path, to_thread
|
|
18
|
+
from anyio import Path
|
|
18
19
|
from pydantic import BaseModel
|
|
19
20
|
|
|
20
|
-
from scruby import
|
|
21
|
-
from scruby.errors import (
|
|
22
|
-
KeyAlreadyExistsError,
|
|
23
|
-
KeyNotExistsError,
|
|
24
|
-
)
|
|
25
|
-
|
|
26
|
-
logger = logging.getLogger(__name__)
|
|
27
|
-
|
|
28
|
-
T = TypeVar("T")
|
|
21
|
+
from scruby import mixins, settings
|
|
29
22
|
|
|
30
23
|
|
|
31
24
|
class _Meta(BaseModel):
|
|
32
25
|
"""Metadata of Collection."""
|
|
33
26
|
|
|
27
|
+
db_root: str
|
|
28
|
+
collection_name: str
|
|
29
|
+
hash_reduce_left: int
|
|
30
|
+
max_branch_number: int
|
|
34
31
|
counter_documents: int
|
|
35
32
|
|
|
36
33
|
|
|
37
|
-
class Scruby
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
34
|
+
class Scruby(
|
|
35
|
+
mixins.Keys,
|
|
36
|
+
mixins.Find,
|
|
37
|
+
mixins.CustomTask,
|
|
38
|
+
mixins.Collection,
|
|
39
|
+
mixins.Count,
|
|
40
|
+
mixins.Delete,
|
|
41
|
+
mixins.Update,
|
|
42
|
+
):
|
|
43
|
+
"""Creation and management of database."""
|
|
43
44
|
|
|
44
45
|
def __init__( # noqa: D107
|
|
45
46
|
self,
|
|
46
|
-
class_model: T,
|
|
47
47
|
) -> None:
|
|
48
|
-
|
|
49
|
-
self.
|
|
50
|
-
self.
|
|
51
|
-
self.
|
|
48
|
+
super().__init__()
|
|
49
|
+
self._meta = _Meta
|
|
50
|
+
self._db_root = settings.DB_ROOT
|
|
51
|
+
self._hash_reduce_left = settings.HASH_REDUCE_LEFT
|
|
52
|
+
self._max_workers = settings.MAX_WORKERS
|
|
52
53
|
# The maximum number of branches.
|
|
53
|
-
match self.
|
|
54
|
+
match self._hash_reduce_left:
|
|
54
55
|
case 0:
|
|
55
|
-
self.
|
|
56
|
+
self._max_branch_number = 4294967296
|
|
56
57
|
case 2:
|
|
57
|
-
self.
|
|
58
|
+
self._max_branch_number = 16777216
|
|
58
59
|
case 4:
|
|
59
|
-
self.
|
|
60
|
+
self._max_branch_number = 65536
|
|
60
61
|
case 6:
|
|
61
|
-
self.
|
|
62
|
+
self._max_branch_number = 256
|
|
62
63
|
case _ as unreachable:
|
|
63
64
|
msg: str = f"{unreachable} - Unacceptable value for HASH_REDUCE_LEFT."
|
|
64
|
-
|
|
65
|
-
assert_never(Never(unreachable))
|
|
66
|
-
|
|
65
|
+
logging.critical(msg)
|
|
66
|
+
assert_never(Never(unreachable)) # pyrefly: ignore[not-callable]
|
|
67
|
+
|
|
68
|
+
@classmethod
|
|
69
|
+
async def collection(cls, class_model: Any) -> Any:
|
|
70
|
+
"""Get an object to access a collection.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
class_model: Class of Model (pydantic.BaseModel).
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
Instance of Scruby for access a collection.
|
|
77
|
+
"""
|
|
78
|
+
assert BaseModel in class_model.__bases__, "`class_model` does not contain the base class `pydantic.BaseModel`!"
|
|
79
|
+
|
|
80
|
+
instance = cls()
|
|
81
|
+
instance.__dict__["_class_model"] = class_model
|
|
82
|
+
# Caching a pati for metadata.
|
|
67
83
|
# The zero branch is reserved for metadata.
|
|
68
84
|
branch_number: int = 0
|
|
69
|
-
branch_number_as_hash: str = f"{branch_number:08x}"[
|
|
85
|
+
branch_number_as_hash: str = f"{branch_number:08x}"[settings.HASH_REDUCE_LEFT :]
|
|
70
86
|
separated_hash: str = "/".join(list(branch_number_as_hash))
|
|
71
|
-
|
|
72
|
-
|
|
87
|
+
meta_dir_path_tuple = (
|
|
88
|
+
settings.DB_ROOT,
|
|
73
89
|
class_model.__name__,
|
|
74
90
|
separated_hash,
|
|
75
|
-
"meta.json",
|
|
76
91
|
)
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
self.__db_root,
|
|
81
|
-
self.__class_model.__name__,
|
|
82
|
-
separated_hash,
|
|
83
|
-
),
|
|
92
|
+
instance.__dict__["_meta_path"] = Path(
|
|
93
|
+
*meta_dir_path_tuple,
|
|
94
|
+
"meta.json",
|
|
84
95
|
)
|
|
85
|
-
if
|
|
86
|
-
|
|
96
|
+
# Create metadata for collection, if missing.
|
|
97
|
+
branch_path = Path(*meta_dir_path_tuple)
|
|
98
|
+
if not await branch_path.exists():
|
|
99
|
+
await branch_path.mkdir(parents=True)
|
|
87
100
|
meta = _Meta(
|
|
101
|
+
db_root=settings.DB_ROOT,
|
|
102
|
+
collection_name=class_model.__name__,
|
|
103
|
+
hash_reduce_left=settings.HASH_REDUCE_LEFT,
|
|
104
|
+
max_branch_number=instance.__dict__["_max_branch_number"],
|
|
88
105
|
counter_documents=0,
|
|
89
106
|
)
|
|
90
107
|
meta_json = meta.model_dump_json()
|
|
91
|
-
meta_path =
|
|
92
|
-
meta_path.write_text(meta_json, "utf-8")
|
|
108
|
+
meta_path = Path(*(branch_path, "meta.json"))
|
|
109
|
+
await meta_path.write_text(meta_json, "utf-8")
|
|
110
|
+
return instance
|
|
93
111
|
|
|
94
|
-
async def
|
|
112
|
+
async def get_meta(self) -> _Meta:
|
|
95
113
|
"""Asynchronous method for getting metadata of collection.
|
|
96
114
|
|
|
97
115
|
This method is for internal use.
|
|
@@ -99,9 +117,8 @@ class Scruby[T]:
|
|
|
99
117
|
Returns:
|
|
100
118
|
Metadata object.
|
|
101
119
|
"""
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
meta: _Meta = self.__meta.model_validate_json(meta_json)
|
|
120
|
+
meta_json = await self._meta_path.read_text()
|
|
121
|
+
meta: _Meta = self._meta.model_validate_json(meta_json)
|
|
105
122
|
return meta
|
|
106
123
|
|
|
107
124
|
async def _set_meta(self, meta: _Meta) -> None:
|
|
@@ -109,66 +126,66 @@ class Scruby[T]:
|
|
|
109
126
|
|
|
110
127
|
This method is for internal use.
|
|
111
128
|
|
|
129
|
+
Args:
|
|
130
|
+
meta (_Meta): Metadata of Collection.
|
|
131
|
+
|
|
112
132
|
Returns:
|
|
113
133
|
None.
|
|
114
134
|
"""
|
|
115
135
|
meta_json = meta.model_dump_json()
|
|
116
|
-
|
|
117
|
-
await meta_path.write_text(meta_json, "utf-8")
|
|
136
|
+
await self._meta_path.write_text(meta_json, "utf-8")
|
|
118
137
|
|
|
119
138
|
async def _counter_documents(self, step: Literal[1, -1]) -> None:
|
|
120
139
|
"""Asynchronous method for management of documents in metadata of collection.
|
|
121
140
|
|
|
122
141
|
This method is for internal use.
|
|
123
142
|
|
|
143
|
+
Args:
|
|
144
|
+
step (Literal[1, -1]): Number of documents added or removed.
|
|
145
|
+
|
|
124
146
|
Returns:
|
|
125
147
|
None.
|
|
126
148
|
"""
|
|
127
|
-
meta_path =
|
|
149
|
+
meta_path = self._meta_path
|
|
128
150
|
meta_json = await meta_path.read_text("utf-8")
|
|
129
|
-
meta: _Meta = self.
|
|
151
|
+
meta: _Meta = self._meta.model_validate_json(meta_json)
|
|
130
152
|
meta.counter_documents += step
|
|
131
153
|
meta_json = meta.model_dump_json()
|
|
132
154
|
await meta_path.write_text(meta_json, "utf-8")
|
|
133
155
|
|
|
134
|
-
def
|
|
135
|
-
"""Management of documents in metadata of collection.
|
|
136
|
-
|
|
137
|
-
This method is for internal use.
|
|
138
|
-
"""
|
|
139
|
-
meta_path = SyncPath(*self.__meta_path_tuple)
|
|
140
|
-
meta_json = meta_path.read_text("utf-8")
|
|
141
|
-
meta: _Meta = self.__meta.model_validate_json(meta_json)
|
|
142
|
-
meta.counter_documents += number
|
|
143
|
-
meta_json = meta.model_dump_json()
|
|
144
|
-
meta_path.write_text(meta_json, "utf-8")
|
|
145
|
-
|
|
146
|
-
async def _get_leaf_path(self, key: str) -> Path:
|
|
156
|
+
async def _get_leaf_path(self, key: str) -> tuple[Path, str]:
|
|
147
157
|
"""Asynchronous method for getting path to collection cell by key.
|
|
148
158
|
|
|
149
159
|
This method is for internal use.
|
|
150
160
|
|
|
151
161
|
Args:
|
|
152
|
-
key: Key name.
|
|
162
|
+
key (str): Key name.
|
|
153
163
|
|
|
154
164
|
Returns:
|
|
155
165
|
Path to cell of collection.
|
|
156
166
|
"""
|
|
157
167
|
if not isinstance(key, str):
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
168
|
+
msg = "The key is not a string."
|
|
169
|
+
logging.error(msg)
|
|
170
|
+
raise KeyError(msg)
|
|
171
|
+
# Prepare key.
|
|
172
|
+
# Removes spaces at the beginning and end of a string.
|
|
173
|
+
# Replaces all whitespace characters with a single space.
|
|
174
|
+
prepared_key = re.sub(r"\s+", " ", key).strip().lower()
|
|
175
|
+
# Check the key for an empty string.
|
|
176
|
+
if len(prepared_key) == 0:
|
|
177
|
+
msg = "The key should not be empty."
|
|
178
|
+
logging.error(msg)
|
|
179
|
+
raise KeyError(msg)
|
|
163
180
|
# Key to crc32 sum.
|
|
164
|
-
key_as_hash: str = f"{zlib.crc32(
|
|
181
|
+
key_as_hash: str = f"{zlib.crc32(prepared_key.encode('utf-8')):08x}"[self._hash_reduce_left :]
|
|
165
182
|
# Convert crc32 sum in the segment of path.
|
|
166
183
|
separated_hash: str = "/".join(list(key_as_hash))
|
|
167
184
|
# The path of the branch to the database.
|
|
168
185
|
branch_path: Path = Path(
|
|
169
186
|
*(
|
|
170
|
-
self.
|
|
171
|
-
self.
|
|
187
|
+
self._db_root,
|
|
188
|
+
self._class_model.__name__,
|
|
172
189
|
separated_hash,
|
|
173
190
|
),
|
|
174
191
|
)
|
|
@@ -177,147 +194,11 @@ class Scruby[T]:
|
|
|
177
194
|
await branch_path.mkdir(parents=True)
|
|
178
195
|
# The path to the database cell.
|
|
179
196
|
leaf_path: Path = Path(*(branch_path, "leaf.json"))
|
|
180
|
-
return leaf_path
|
|
181
|
-
|
|
182
|
-
async def add_key(
|
|
183
|
-
self,
|
|
184
|
-
key: str,
|
|
185
|
-
value: T,
|
|
186
|
-
) -> None:
|
|
187
|
-
"""Asynchronous method for adding key to collection.
|
|
188
|
-
|
|
189
|
-
Args:
|
|
190
|
-
key: Key name. Type `str`.
|
|
191
|
-
value: Value of key. Type `BaseModel`.
|
|
192
|
-
|
|
193
|
-
Returns:
|
|
194
|
-
None.
|
|
195
|
-
"""
|
|
196
|
-
# The path to cell of collection.
|
|
197
|
-
leaf_path: Path = await self._get_leaf_path(key)
|
|
198
|
-
value_json: str = value.model_dump_json()
|
|
199
|
-
# Write key-value to collection.
|
|
200
|
-
if await leaf_path.exists():
|
|
201
|
-
# Add new key.
|
|
202
|
-
data_json: bytes = await leaf_path.read_bytes()
|
|
203
|
-
data: dict = orjson.loads(data_json) or {}
|
|
204
|
-
try:
|
|
205
|
-
data[key]
|
|
206
|
-
except KeyError:
|
|
207
|
-
data[key] = value_json
|
|
208
|
-
await leaf_path.write_bytes(orjson.dumps(data))
|
|
209
|
-
else:
|
|
210
|
-
err = KeyAlreadyExistsError()
|
|
211
|
-
logger.error(err.message)
|
|
212
|
-
raise err
|
|
213
|
-
else:
|
|
214
|
-
# Add new key to a blank leaf.
|
|
215
|
-
await leaf_path.write_bytes(orjson.dumps({key: value_json}))
|
|
216
|
-
await self._counter_documents(1)
|
|
217
|
-
|
|
218
|
-
async def update_key(
|
|
219
|
-
self,
|
|
220
|
-
key: str,
|
|
221
|
-
value: T,
|
|
222
|
-
) -> None:
|
|
223
|
-
"""Asynchronous method for updating key to collection.
|
|
224
|
-
|
|
225
|
-
Args:
|
|
226
|
-
key: Key name. Type `str`.
|
|
227
|
-
value: Value of key. Type `BaseModel`.
|
|
228
|
-
|
|
229
|
-
Returns:
|
|
230
|
-
None.
|
|
231
|
-
"""
|
|
232
|
-
# The path to cell of collection.
|
|
233
|
-
leaf_path: Path = await self._get_leaf_path(key)
|
|
234
|
-
value_json: str = value.model_dump_json()
|
|
235
|
-
# Update the existing key.
|
|
236
|
-
if await leaf_path.exists():
|
|
237
|
-
# Update the existing key.
|
|
238
|
-
data_json: bytes = await leaf_path.read_bytes()
|
|
239
|
-
data: dict = orjson.loads(data_json) or {}
|
|
240
|
-
try:
|
|
241
|
-
data[key]
|
|
242
|
-
data[key] = value_json
|
|
243
|
-
await leaf_path.write_bytes(orjson.dumps(data))
|
|
244
|
-
except KeyError:
|
|
245
|
-
err = KeyNotExistsError()
|
|
246
|
-
logger.error(err.message)
|
|
247
|
-
raise err from None
|
|
248
|
-
else:
|
|
249
|
-
logger.error("The key not exists.")
|
|
250
|
-
raise KeyError()
|
|
251
|
-
|
|
252
|
-
async def get_key(self, key: str) -> T:
|
|
253
|
-
"""Asynchronous method for getting value of key from collection.
|
|
254
|
-
|
|
255
|
-
Args:
|
|
256
|
-
key: Key name.
|
|
257
|
-
|
|
258
|
-
Returns:
|
|
259
|
-
Value of key or KeyError.
|
|
260
|
-
"""
|
|
261
|
-
# The path to the database cell.
|
|
262
|
-
leaf_path: Path = await self._get_leaf_path(key)
|
|
263
|
-
# Get value of key.
|
|
264
|
-
if await leaf_path.exists():
|
|
265
|
-
data_json: bytes = await leaf_path.read_bytes()
|
|
266
|
-
data: dict = orjson.loads(data_json) or {}
|
|
267
|
-
obj: T = self.__class_model.model_validate_json(data[key])
|
|
268
|
-
return obj
|
|
269
|
-
msg: str = "`get_key` - The unacceptable key value."
|
|
270
|
-
logger.error(msg)
|
|
271
|
-
raise KeyError()
|
|
272
|
-
|
|
273
|
-
async def has_key(self, key: str) -> bool:
|
|
274
|
-
"""Asynchronous method for checking presence of key in collection.
|
|
275
|
-
|
|
276
|
-
Args:
|
|
277
|
-
key: Key name.
|
|
278
|
-
|
|
279
|
-
Returns:
|
|
280
|
-
True, if the key is present.
|
|
281
|
-
"""
|
|
282
|
-
# Get path to cell of collection.
|
|
283
|
-
leaf_path: Path = await self._get_leaf_path(key)
|
|
284
|
-
# Checking whether there is a key.
|
|
285
|
-
if await leaf_path.exists():
|
|
286
|
-
data_json: bytes = await leaf_path.read_bytes()
|
|
287
|
-
data: dict = orjson.loads(data_json) or {}
|
|
288
|
-
try:
|
|
289
|
-
data[key]
|
|
290
|
-
return True
|
|
291
|
-
except KeyError:
|
|
292
|
-
return False
|
|
293
|
-
return False
|
|
294
|
-
|
|
295
|
-
async def delete_key(self, key: str) -> None:
|
|
296
|
-
"""Asynchronous method for deleting key from collection.
|
|
297
|
-
|
|
298
|
-
Args:
|
|
299
|
-
key: Key name.
|
|
300
|
-
|
|
301
|
-
Returns:
|
|
302
|
-
None.
|
|
303
|
-
"""
|
|
304
|
-
# The path to the database cell.
|
|
305
|
-
leaf_path: Path = await self._get_leaf_path(key)
|
|
306
|
-
# Deleting key.
|
|
307
|
-
if await leaf_path.exists():
|
|
308
|
-
data_json: bytes = await leaf_path.read_bytes()
|
|
309
|
-
data: dict = orjson.loads(data_json) or {}
|
|
310
|
-
del data[key]
|
|
311
|
-
await leaf_path.write_bytes(orjson.dumps(data))
|
|
312
|
-
await self._counter_documents(-1)
|
|
313
|
-
return
|
|
314
|
-
msg: str = "`delete_key` - The unacceptable key value."
|
|
315
|
-
logger.error(msg)
|
|
316
|
-
raise KeyError()
|
|
197
|
+
return (leaf_path, prepared_key)
|
|
317
198
|
|
|
318
199
|
@staticmethod
|
|
319
|
-
|
|
320
|
-
"""
|
|
200
|
+
def napalm() -> None:
|
|
201
|
+
"""Method for full database deletion.
|
|
321
202
|
|
|
322
203
|
The main purpose is tests.
|
|
323
204
|
|
|
@@ -328,429 +209,5 @@ class Scruby[T]:
|
|
|
328
209
|
None.
|
|
329
210
|
"""
|
|
330
211
|
with contextlib.suppress(FileNotFoundError):
|
|
331
|
-
|
|
212
|
+
rmtree(settings.DB_ROOT)
|
|
332
213
|
return
|
|
333
|
-
|
|
334
|
-
@staticmethod
|
|
335
|
-
def _task_find(
|
|
336
|
-
branch_number: int,
|
|
337
|
-
filter_fn: Callable,
|
|
338
|
-
hash_reduce_left: str,
|
|
339
|
-
db_root: str,
|
|
340
|
-
class_model: T,
|
|
341
|
-
) -> list[T] | None:
|
|
342
|
-
"""Task for find documents.
|
|
343
|
-
|
|
344
|
-
This method is for internal use.
|
|
345
|
-
|
|
346
|
-
Returns:
|
|
347
|
-
List of documents or None.
|
|
348
|
-
"""
|
|
349
|
-
branch_number_as_hash: str = f"{branch_number:08x}"[hash_reduce_left:]
|
|
350
|
-
separated_hash: str = "/".join(list(branch_number_as_hash))
|
|
351
|
-
leaf_path: SyncPath = SyncPath(
|
|
352
|
-
*(
|
|
353
|
-
db_root,
|
|
354
|
-
class_model.__name__,
|
|
355
|
-
separated_hash,
|
|
356
|
-
"leaf.json",
|
|
357
|
-
),
|
|
358
|
-
)
|
|
359
|
-
docs: list[T] = []
|
|
360
|
-
if leaf_path.exists():
|
|
361
|
-
data_json: bytes = leaf_path.read_bytes()
|
|
362
|
-
data: dict[str, str] = orjson.loads(data_json) or {}
|
|
363
|
-
for _, val in data.items():
|
|
364
|
-
doc = class_model.model_validate_json(val)
|
|
365
|
-
if filter_fn(doc):
|
|
366
|
-
docs.append(doc)
|
|
367
|
-
return docs or None
|
|
368
|
-
|
|
369
|
-
def find_one(
|
|
370
|
-
self,
|
|
371
|
-
filter_fn: Callable,
|
|
372
|
-
max_workers: int | None = None,
|
|
373
|
-
timeout: float | None = None,
|
|
374
|
-
) -> T | None:
|
|
375
|
-
"""Finds a single document matching the filter.
|
|
376
|
-
|
|
377
|
-
The search is based on the effect of a quantum loop.
|
|
378
|
-
The search effectiveness depends on the number of processor threads.
|
|
379
|
-
Ideally, hundreds and even thousands of threads are required.
|
|
380
|
-
|
|
381
|
-
Args:
|
|
382
|
-
filter_fn: A function that execute the conditions of filtering.
|
|
383
|
-
max_workers: The maximum number of processes that can be used to
|
|
384
|
-
execute the given calls. If None or not given then as many
|
|
385
|
-
worker processes will be created as the machine has processors.
|
|
386
|
-
timeout: The number of seconds to wait for the result if the future isn't done.
|
|
387
|
-
If None, then there is no limit on the wait time.
|
|
388
|
-
|
|
389
|
-
Returns:
|
|
390
|
-
Document or None.
|
|
391
|
-
"""
|
|
392
|
-
branch_numbers: range = range(1, self.__max_branch_number)
|
|
393
|
-
search_task_fn: Callable = self._task_find
|
|
394
|
-
hash_reduce_left: int = self.__hash_reduce_left
|
|
395
|
-
db_root: str = self.__db_root
|
|
396
|
-
class_model: T = self.__class_model
|
|
397
|
-
with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
|
|
398
|
-
for branch_number in branch_numbers:
|
|
399
|
-
future = executor.submit(
|
|
400
|
-
search_task_fn,
|
|
401
|
-
branch_number,
|
|
402
|
-
filter_fn,
|
|
403
|
-
hash_reduce_left,
|
|
404
|
-
db_root,
|
|
405
|
-
class_model,
|
|
406
|
-
)
|
|
407
|
-
docs = future.result(timeout)
|
|
408
|
-
if docs is not None:
|
|
409
|
-
return docs[0]
|
|
410
|
-
return None
|
|
411
|
-
|
|
412
|
-
def find_many(
|
|
413
|
-
self,
|
|
414
|
-
filter_fn: Callable,
|
|
415
|
-
limit_docs: int = 1000,
|
|
416
|
-
max_workers: int | None = None,
|
|
417
|
-
timeout: float | None = None,
|
|
418
|
-
) -> list[T] | None:
|
|
419
|
-
"""Finds one or more documents matching the filter.
|
|
420
|
-
|
|
421
|
-
The search is based on the effect of a quantum loop.
|
|
422
|
-
The search effectiveness depends on the number of processor threads.
|
|
423
|
-
Ideally, hundreds and even thousands of threads are required.
|
|
424
|
-
|
|
425
|
-
Args:
|
|
426
|
-
filter_fn: A function that execute the conditions of filtering.
|
|
427
|
-
limit_docs: Limiting the number of documents. By default = 1000.
|
|
428
|
-
max_workers: The maximum number of processes that can be used to
|
|
429
|
-
execute the given calls. If None or not given then as many
|
|
430
|
-
worker processes will be created as the machine has processors.
|
|
431
|
-
timeout: The number of seconds to wait for the result if the future isn't done.
|
|
432
|
-
If None, then there is no limit on the wait time.
|
|
433
|
-
|
|
434
|
-
Returns:
|
|
435
|
-
List of documents or None.
|
|
436
|
-
"""
|
|
437
|
-
branch_numbers: range = range(1, self.__max_branch_number)
|
|
438
|
-
search_task_fn: Callable = self._task_find
|
|
439
|
-
hash_reduce_left: int = self.__hash_reduce_left
|
|
440
|
-
db_root: str = self.__db_root
|
|
441
|
-
class_model: T = self.__class_model
|
|
442
|
-
counter: int = 0
|
|
443
|
-
result: list[T] = []
|
|
444
|
-
with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
|
|
445
|
-
for branch_number in branch_numbers:
|
|
446
|
-
if counter >= limit_docs:
|
|
447
|
-
return result[:limit_docs]
|
|
448
|
-
future = executor.submit(
|
|
449
|
-
search_task_fn,
|
|
450
|
-
branch_number,
|
|
451
|
-
filter_fn,
|
|
452
|
-
hash_reduce_left,
|
|
453
|
-
db_root,
|
|
454
|
-
class_model,
|
|
455
|
-
)
|
|
456
|
-
docs = future.result(timeout)
|
|
457
|
-
if docs is not None:
|
|
458
|
-
for doc in docs:
|
|
459
|
-
if counter >= limit_docs:
|
|
460
|
-
return result[:limit_docs]
|
|
461
|
-
result.append(doc)
|
|
462
|
-
counter += 1
|
|
463
|
-
return result or None
|
|
464
|
-
|
|
465
|
-
def collection_name(self) -> str:
|
|
466
|
-
"""Get collection name.
|
|
467
|
-
|
|
468
|
-
Returns:
|
|
469
|
-
Collection name.
|
|
470
|
-
"""
|
|
471
|
-
return self.__class_model.__name__
|
|
472
|
-
|
|
473
|
-
def collection_full_name(self) -> str:
|
|
474
|
-
"""Get full name of collection.
|
|
475
|
-
|
|
476
|
-
Returns:
|
|
477
|
-
Full name of collection.
|
|
478
|
-
"""
|
|
479
|
-
return f"{self.__db_root}/{self.__class_model.__name__}"
|
|
480
|
-
|
|
481
|
-
async def estimated_document_count(self) -> int:
|
|
482
|
-
"""Get an estimate of the number of documents in this collection using collection metadata.
|
|
483
|
-
|
|
484
|
-
Returns:
|
|
485
|
-
The number of documents.
|
|
486
|
-
"""
|
|
487
|
-
meta = await self._get_meta()
|
|
488
|
-
return meta.counter_documents
|
|
489
|
-
|
|
490
|
-
def count_documents(
|
|
491
|
-
self,
|
|
492
|
-
filter_fn: Callable,
|
|
493
|
-
max_workers: int | None = None,
|
|
494
|
-
timeout: float | None = None,
|
|
495
|
-
) -> int:
|
|
496
|
-
"""Count the number of documents a matching the filter in this collection.
|
|
497
|
-
|
|
498
|
-
The search is based on the effect of a quantum loop.
|
|
499
|
-
The search effectiveness depends on the number of processor threads.
|
|
500
|
-
Ideally, hundreds and even thousands of threads are required.
|
|
501
|
-
|
|
502
|
-
Args:
|
|
503
|
-
filter_fn: A function that execute the conditions of filtering.
|
|
504
|
-
max_workers: The maximum number of processes that can be used to
|
|
505
|
-
execute the given calls. If None or not given then as many
|
|
506
|
-
worker processes will be created as the machine has processors.
|
|
507
|
-
timeout: The number of seconds to wait for the result if the future isn't done.
|
|
508
|
-
If None, then there is no limit on the wait time.
|
|
509
|
-
|
|
510
|
-
Returns:
|
|
511
|
-
The number of documents.
|
|
512
|
-
"""
|
|
513
|
-
branch_numbers: range = range(1, self.__max_branch_number)
|
|
514
|
-
search_task_fn: Callable = self._task_find
|
|
515
|
-
hash_reduce_left: int = self.__hash_reduce_left
|
|
516
|
-
db_root: str = self.__db_root
|
|
517
|
-
class_model: T = self.__class_model
|
|
518
|
-
counter: int = 0
|
|
519
|
-
with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
|
|
520
|
-
for branch_number in branch_numbers:
|
|
521
|
-
future = executor.submit(
|
|
522
|
-
search_task_fn,
|
|
523
|
-
branch_number,
|
|
524
|
-
filter_fn,
|
|
525
|
-
hash_reduce_left,
|
|
526
|
-
db_root,
|
|
527
|
-
class_model,
|
|
528
|
-
)
|
|
529
|
-
if future.result(timeout) is not None:
|
|
530
|
-
counter += 1
|
|
531
|
-
return counter
|
|
532
|
-
|
|
533
|
-
@staticmethod
|
|
534
|
-
def _task_delete(
|
|
535
|
-
branch_number: int,
|
|
536
|
-
filter_fn: Callable,
|
|
537
|
-
hash_reduce_left: int,
|
|
538
|
-
db_root: str,
|
|
539
|
-
class_model: T,
|
|
540
|
-
) -> int:
|
|
541
|
-
"""Task for find and delete documents.
|
|
542
|
-
|
|
543
|
-
This method is for internal use.
|
|
544
|
-
|
|
545
|
-
Returns:
|
|
546
|
-
The number of deleted documents.
|
|
547
|
-
"""
|
|
548
|
-
branch_number_as_hash: str = f"{branch_number:08x}"[hash_reduce_left:]
|
|
549
|
-
separated_hash: str = "/".join(list(branch_number_as_hash))
|
|
550
|
-
leaf_path: SyncPath = SyncPath(
|
|
551
|
-
*(
|
|
552
|
-
db_root,
|
|
553
|
-
class_model.__name__,
|
|
554
|
-
separated_hash,
|
|
555
|
-
"leaf.json",
|
|
556
|
-
),
|
|
557
|
-
)
|
|
558
|
-
counter: int = 0
|
|
559
|
-
if leaf_path.exists():
|
|
560
|
-
data_json: bytes = leaf_path.read_bytes()
|
|
561
|
-
data: dict[str, str] = orjson.loads(data_json) or {}
|
|
562
|
-
new_state: dict[str, str] = {}
|
|
563
|
-
for key, val in data.items():
|
|
564
|
-
doc = class_model.model_validate_json(val)
|
|
565
|
-
if filter_fn(doc):
|
|
566
|
-
counter -= 1
|
|
567
|
-
else:
|
|
568
|
-
new_state[key] = val
|
|
569
|
-
leaf_path.write_bytes(orjson.dumps(new_state))
|
|
570
|
-
return counter
|
|
571
|
-
|
|
572
|
-
def delete_many(
|
|
573
|
-
self,
|
|
574
|
-
filter_fn: Callable,
|
|
575
|
-
max_workers: int | None = None,
|
|
576
|
-
timeout: float | None = None,
|
|
577
|
-
) -> int:
|
|
578
|
-
"""Delete one or more documents matching the filter.
|
|
579
|
-
|
|
580
|
-
The search is based on the effect of a quantum loop.
|
|
581
|
-
The search effectiveness depends on the number of processor threads.
|
|
582
|
-
Ideally, hundreds and even thousands of threads are required.
|
|
583
|
-
|
|
584
|
-
Args:
|
|
585
|
-
filter_fn: A function that execute the conditions of filtering.
|
|
586
|
-
max_workers: The maximum number of processes that can be used to
|
|
587
|
-
execute the given calls. If None or not given then as many
|
|
588
|
-
worker processes will be created as the machine has processors.
|
|
589
|
-
timeout: The number of seconds to wait for the result if the future isn't done.
|
|
590
|
-
If None, then there is no limit on the wait time.
|
|
591
|
-
|
|
592
|
-
Returns:
|
|
593
|
-
The number of deleted documents.
|
|
594
|
-
"""
|
|
595
|
-
branch_numbers: range = range(1, self.__max_branch_number)
|
|
596
|
-
search_task_fn: Callable = self._task_delete
|
|
597
|
-
hash_reduce_left: int = self.__hash_reduce_left
|
|
598
|
-
db_root: str = self.__db_root
|
|
599
|
-
class_model: T = self.__class_model
|
|
600
|
-
counter: int = 0
|
|
601
|
-
with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
|
|
602
|
-
for branch_number in branch_numbers:
|
|
603
|
-
future = executor.submit(
|
|
604
|
-
search_task_fn,
|
|
605
|
-
branch_number,
|
|
606
|
-
filter_fn,
|
|
607
|
-
hash_reduce_left,
|
|
608
|
-
db_root,
|
|
609
|
-
class_model,
|
|
610
|
-
)
|
|
611
|
-
counter += future.result(timeout)
|
|
612
|
-
if counter < 0:
|
|
613
|
-
self._sync_counter_documents(counter)
|
|
614
|
-
return abs(counter)
|
|
615
|
-
|
|
616
|
-
@staticmethod
|
|
617
|
-
def _task_get_docs(
|
|
618
|
-
branch_number: int,
|
|
619
|
-
hash_reduce_left: int,
|
|
620
|
-
db_root: str,
|
|
621
|
-
class_model: T,
|
|
622
|
-
) -> list[Any]:
|
|
623
|
-
"""Get documents for custom task.
|
|
624
|
-
|
|
625
|
-
This method is for internal use.
|
|
626
|
-
|
|
627
|
-
Returns:
|
|
628
|
-
List of documents.
|
|
629
|
-
"""
|
|
630
|
-
branch_number_as_hash: str = f"{branch_number:08x}"[hash_reduce_left:]
|
|
631
|
-
separated_hash: str = "/".join(list(branch_number_as_hash))
|
|
632
|
-
leaf_path: SyncPath = SyncPath(
|
|
633
|
-
*(
|
|
634
|
-
db_root,
|
|
635
|
-
class_model.__name__,
|
|
636
|
-
separated_hash,
|
|
637
|
-
"leaf.json",
|
|
638
|
-
),
|
|
639
|
-
)
|
|
640
|
-
docs: list[str, T] = []
|
|
641
|
-
if leaf_path.exists():
|
|
642
|
-
data_json: bytes = leaf_path.read_bytes()
|
|
643
|
-
data: dict[str, str] = orjson.loads(data_json) or {}
|
|
644
|
-
for _, val in data.items():
|
|
645
|
-
docs.append(class_model.model_validate_json(val))
|
|
646
|
-
return docs
|
|
647
|
-
|
|
648
|
-
def run_custom_task(self, custom_task_fn: Callable, limit_docs: int = 1000) -> Any:
|
|
649
|
-
"""Running custom task.
|
|
650
|
-
|
|
651
|
-
This method running a task created on the basis of a quantum loop.
|
|
652
|
-
Effectiveness running task depends on the number of processor threads.
|
|
653
|
-
Ideally, hundreds and even thousands of threads are required.
|
|
654
|
-
|
|
655
|
-
Args:
|
|
656
|
-
custom_task_fn: A function that execute the custom task.
|
|
657
|
-
limit_docs: Limiting the number of documents. By default = 1000.
|
|
658
|
-
|
|
659
|
-
Returns:
|
|
660
|
-
The result of a custom task.
|
|
661
|
-
"""
|
|
662
|
-
kwargs = {
|
|
663
|
-
"get_docs_fn": self._task_get_docs,
|
|
664
|
-
"branch_numbers": range(1, self.__max_branch_number),
|
|
665
|
-
"hash_reduce_left": self.__hash_reduce_left,
|
|
666
|
-
"db_root": self.__db_root,
|
|
667
|
-
"class_model": self.__class_model,
|
|
668
|
-
"limit_docs": limit_docs,
|
|
669
|
-
}
|
|
670
|
-
return custom_task_fn(**kwargs)
|
|
671
|
-
|
|
672
|
-
@staticmethod
|
|
673
|
-
def _task_update(
|
|
674
|
-
branch_number: int,
|
|
675
|
-
filter_fn: Callable,
|
|
676
|
-
hash_reduce_left: str,
|
|
677
|
-
db_root: str,
|
|
678
|
-
class_model: T,
|
|
679
|
-
new_data: dict[str, Any],
|
|
680
|
-
) -> int:
|
|
681
|
-
"""Task for find documents.
|
|
682
|
-
|
|
683
|
-
This method is for internal use.
|
|
684
|
-
|
|
685
|
-
Returns:
|
|
686
|
-
The number of updated documents.
|
|
687
|
-
"""
|
|
688
|
-
branch_number_as_hash: str = f"{branch_number:08x}"[hash_reduce_left:]
|
|
689
|
-
separated_hash: str = "/".join(list(branch_number_as_hash))
|
|
690
|
-
leaf_path: SyncPath = SyncPath(
|
|
691
|
-
*(
|
|
692
|
-
db_root,
|
|
693
|
-
class_model.__name__,
|
|
694
|
-
separated_hash,
|
|
695
|
-
"leaf.json",
|
|
696
|
-
),
|
|
697
|
-
)
|
|
698
|
-
counter: int = 0
|
|
699
|
-
if leaf_path.exists():
|
|
700
|
-
data_json: bytes = leaf_path.read_bytes()
|
|
701
|
-
data: dict[str, str] = orjson.loads(data_json) or {}
|
|
702
|
-
new_state: dict[str, str] = {}
|
|
703
|
-
for _, val in data.items():
|
|
704
|
-
doc = class_model.model_validate_json(val)
|
|
705
|
-
if filter_fn(doc):
|
|
706
|
-
for key, value in new_data.items():
|
|
707
|
-
doc.__dict__[key] = value
|
|
708
|
-
new_state[key] = doc.model_dump_json()
|
|
709
|
-
counter += 1
|
|
710
|
-
leaf_path.write_bytes(orjson.dumps(new_state))
|
|
711
|
-
return counter
|
|
712
|
-
|
|
713
|
-
def update_many(
|
|
714
|
-
self,
|
|
715
|
-
filter_fn: Callable,
|
|
716
|
-
new_data: dict[str, Any],
|
|
717
|
-
max_workers: int | None = None,
|
|
718
|
-
timeout: float | None = None,
|
|
719
|
-
) -> int:
|
|
720
|
-
"""Updates one or more documents matching the filter.
|
|
721
|
-
|
|
722
|
-
The search is based on the effect of a quantum loop.
|
|
723
|
-
The search effectiveness depends on the number of processor threads.
|
|
724
|
-
Ideally, hundreds and even thousands of threads are required.
|
|
725
|
-
|
|
726
|
-
Args:
|
|
727
|
-
filter_fn: A function that execute the conditions of filtering.
|
|
728
|
-
new_data: New data for the fields that need to be updated.
|
|
729
|
-
max_workers: The maximum number of processes that can be used to
|
|
730
|
-
execute the given calls. If None or not given then as many
|
|
731
|
-
worker processes will be created as the machine has processors.
|
|
732
|
-
timeout: The number of seconds to wait for the result if the future isn't done.
|
|
733
|
-
If None, then there is no limit on the wait time.
|
|
734
|
-
|
|
735
|
-
Returns:
|
|
736
|
-
The number of updated documents.
|
|
737
|
-
"""
|
|
738
|
-
branch_numbers: range = range(1, self.__max_branch_number)
|
|
739
|
-
update_task_fn: Callable = self._task_update
|
|
740
|
-
hash_reduce_left: int = self.__hash_reduce_left
|
|
741
|
-
db_root: str = self.__db_root
|
|
742
|
-
class_model: T = self.__class_model
|
|
743
|
-
counter: int = 0
|
|
744
|
-
with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
|
|
745
|
-
for branch_number in branch_numbers:
|
|
746
|
-
future = executor.submit(
|
|
747
|
-
update_task_fn,
|
|
748
|
-
branch_number,
|
|
749
|
-
filter_fn,
|
|
750
|
-
hash_reduce_left,
|
|
751
|
-
db_root,
|
|
752
|
-
class_model,
|
|
753
|
-
new_data,
|
|
754
|
-
)
|
|
755
|
-
counter += future.result(timeout)
|
|
756
|
-
return counter
|