scruby 0.24.4__py3-none-any.whl → 0.28.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scruby might be problematic. Click here for more details.
- scruby/__init__.py +14 -9
- scruby/aggregation.py +4 -0
- scruby/db.py +44 -25
- scruby/errors.py +4 -0
- scruby/mixins/__init__.py +4 -0
- scruby/mixins/collection.py +9 -8
- scruby/mixins/count.py +11 -12
- scruby/mixins/custom_task.py +8 -10
- scruby/mixins/delete.py +12 -16
- scruby/mixins/find.py +44 -40
- scruby/mixins/keys.py +52 -50
- scruby/mixins/update.py +12 -16
- scruby/{constants.py → settings.py} +13 -2
- {scruby-0.24.4.dist-info → scruby-0.28.3.dist-info}/METADATA +86 -54
- scruby-0.28.3.dist-info/RECORD +18 -0
- scruby-0.24.4.dist-info/RECORD +0 -18
- {scruby-0.24.4.dist-info → scruby-0.28.3.dist-info}/WHEEL +0 -0
- {scruby-0.24.4.dist-info → scruby-0.28.3.dist-info}/licenses/LICENSE +0 -0
scruby/__init__.py
CHANGED
|
@@ -1,3 +1,12 @@
|
|
|
1
|
+
#
|
|
2
|
+
# .dP"Y8 dP""b8 88""Yb 88 88 88""Yb Yb dP
|
|
3
|
+
# `Ybo." dP `" 88__dP 88 88 88__dP YbdP
|
|
4
|
+
# o.`Y8b Yb 88"Yb Y8 8P 88""Yb 8P
|
|
5
|
+
# 8bodP' YboodP 88 Yb `YbodP' 88oodP dP
|
|
6
|
+
#
|
|
7
|
+
# Copyright (c) 2025 Gennady Kostyunin
|
|
8
|
+
# SPDX-License-Identifier: MIT
|
|
9
|
+
#
|
|
1
10
|
"""Asynchronous library for building and managing a hybrid database, by scheme of key-value.
|
|
2
11
|
|
|
3
12
|
The library uses fractal-tree addressing and
|
|
@@ -16,14 +25,10 @@ requires a large number of processor threads.
|
|
|
16
25
|
|
|
17
26
|
from __future__ import annotations
|
|
18
27
|
|
|
19
|
-
__all__ = (
|
|
20
|
-
|
|
21
|
-
|
|
28
|
+
__all__ = (
|
|
29
|
+
"settings",
|
|
30
|
+
"Scruby",
|
|
31
|
+
)
|
|
22
32
|
|
|
33
|
+
from scruby import settings
|
|
23
34
|
from scruby.db import Scruby
|
|
24
|
-
|
|
25
|
-
logging.basicConfig(
|
|
26
|
-
level=logging.INFO,
|
|
27
|
-
datefmt="%Y-%m-%d %H:%M:%S",
|
|
28
|
-
format="[%(asctime)s.%(msecs)03d] %(module)10s:%(lineno)-3d %(levelname)-7s - %(message)s",
|
|
29
|
-
)
|
scruby/aggregation.py
CHANGED
scruby/db.py
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
# Scruby - Asynchronous library for building and managing a hybrid database, by scheme of key-value.
|
|
2
|
+
# Copyright (c) 2025 Gennady Kostyunin
|
|
3
|
+
# SPDX-License-Identifier: MIT
|
|
4
|
+
#
|
|
1
5
|
"""Creation and management of the database."""
|
|
2
6
|
|
|
3
7
|
from __future__ import annotations
|
|
@@ -6,18 +10,15 @@ __all__ = ("Scruby",)
|
|
|
6
10
|
|
|
7
11
|
import contextlib
|
|
8
12
|
import logging
|
|
13
|
+
import re
|
|
9
14
|
import zlib
|
|
10
15
|
from shutil import rmtree
|
|
11
|
-
from typing import Any, Literal, Never,
|
|
16
|
+
from typing import Any, Literal, Never, assert_never
|
|
12
17
|
|
|
13
18
|
from anyio import Path
|
|
14
19
|
from pydantic import BaseModel
|
|
15
20
|
|
|
16
|
-
from scruby import
|
|
17
|
-
|
|
18
|
-
logger = logging.getLogger(__name__)
|
|
19
|
-
|
|
20
|
-
T = TypeVar("T")
|
|
21
|
+
from scruby import mixins, settings
|
|
21
22
|
|
|
22
23
|
|
|
23
24
|
class _Meta(BaseModel):
|
|
@@ -30,7 +31,7 @@ class _Meta(BaseModel):
|
|
|
30
31
|
counter_documents: int
|
|
31
32
|
|
|
32
33
|
|
|
33
|
-
class Scruby
|
|
34
|
+
class Scruby(
|
|
34
35
|
mixins.Keys,
|
|
35
36
|
mixins.Find,
|
|
36
37
|
mixins.CustomTask,
|
|
@@ -46,8 +47,9 @@ class Scruby[T](
|
|
|
46
47
|
) -> None:
|
|
47
48
|
super().__init__()
|
|
48
49
|
self._meta = _Meta
|
|
49
|
-
self._db_root =
|
|
50
|
-
self._hash_reduce_left =
|
|
50
|
+
self._db_root = settings.DB_ROOT
|
|
51
|
+
self._hash_reduce_left = settings.HASH_REDUCE_LEFT
|
|
52
|
+
self._max_workers = settings.MAX_WORKERS
|
|
51
53
|
# The maximum number of branches.
|
|
52
54
|
match self._hash_reduce_left:
|
|
53
55
|
case 0:
|
|
@@ -60,29 +62,30 @@ class Scruby[T](
|
|
|
60
62
|
self._max_branch_number = 256
|
|
61
63
|
case _ as unreachable:
|
|
62
64
|
msg: str = f"{unreachable} - Unacceptable value for HASH_REDUCE_LEFT."
|
|
63
|
-
|
|
64
|
-
assert_never(Never(unreachable))
|
|
65
|
+
logging.critical(msg)
|
|
66
|
+
assert_never(Never(unreachable)) # pyrefly: ignore[not-callable]
|
|
65
67
|
|
|
66
68
|
@classmethod
|
|
67
|
-
async def
|
|
69
|
+
async def collection(cls, class_model: Any) -> Any:
|
|
68
70
|
"""Get an object to access a collection.
|
|
69
71
|
|
|
70
72
|
Args:
|
|
71
|
-
class_model: Class of Model (
|
|
73
|
+
class_model: Class of Model (pydantic.BaseModel).
|
|
72
74
|
|
|
73
75
|
Returns:
|
|
74
76
|
Instance of Scruby for access a collection.
|
|
75
77
|
"""
|
|
76
78
|
assert BaseModel in class_model.__bases__, "`class_model` does not contain the base class `pydantic.BaseModel`!"
|
|
79
|
+
|
|
77
80
|
instance = cls()
|
|
78
81
|
instance.__dict__["_class_model"] = class_model
|
|
79
82
|
# Caching a pati for metadata.
|
|
80
83
|
# The zero branch is reserved for metadata.
|
|
81
84
|
branch_number: int = 0
|
|
82
|
-
branch_number_as_hash: str = f"{branch_number:08x}"[
|
|
85
|
+
branch_number_as_hash: str = f"{branch_number:08x}"[settings.HASH_REDUCE_LEFT :]
|
|
83
86
|
separated_hash: str = "/".join(list(branch_number_as_hash))
|
|
84
87
|
meta_dir_path_tuple = (
|
|
85
|
-
|
|
88
|
+
settings.DB_ROOT,
|
|
86
89
|
class_model.__name__,
|
|
87
90
|
separated_hash,
|
|
88
91
|
)
|
|
@@ -95,9 +98,9 @@ class Scruby[T](
|
|
|
95
98
|
if not await branch_path.exists():
|
|
96
99
|
await branch_path.mkdir(parents=True)
|
|
97
100
|
meta = _Meta(
|
|
98
|
-
db_root=
|
|
101
|
+
db_root=settings.DB_ROOT,
|
|
99
102
|
collection_name=class_model.__name__,
|
|
100
|
-
hash_reduce_left=
|
|
103
|
+
hash_reduce_left=settings.HASH_REDUCE_LEFT,
|
|
101
104
|
max_branch_number=instance.__dict__["_max_branch_number"],
|
|
102
105
|
counter_documents=0,
|
|
103
106
|
)
|
|
@@ -123,6 +126,9 @@ class Scruby[T](
|
|
|
123
126
|
|
|
124
127
|
This method is for internal use.
|
|
125
128
|
|
|
129
|
+
Args:
|
|
130
|
+
meta (_Meta): Metadata of Collection.
|
|
131
|
+
|
|
126
132
|
Returns:
|
|
127
133
|
None.
|
|
128
134
|
"""
|
|
@@ -134,6 +140,9 @@ class Scruby[T](
|
|
|
134
140
|
|
|
135
141
|
This method is for internal use.
|
|
136
142
|
|
|
143
|
+
Args:
|
|
144
|
+
step (Literal[1, -1]): Number of documents added or removed.
|
|
145
|
+
|
|
137
146
|
Returns:
|
|
138
147
|
None.
|
|
139
148
|
"""
|
|
@@ -144,22 +153,32 @@ class Scruby[T](
|
|
|
144
153
|
meta_json = meta.model_dump_json()
|
|
145
154
|
await meta_path.write_text(meta_json, "utf-8")
|
|
146
155
|
|
|
147
|
-
async def _get_leaf_path(self, key: str) -> Path:
|
|
156
|
+
async def _get_leaf_path(self, key: str) -> tuple[Path, str]:
|
|
148
157
|
"""Asynchronous method for getting path to collection cell by key.
|
|
149
158
|
|
|
150
159
|
This method is for internal use.
|
|
151
160
|
|
|
152
161
|
Args:
|
|
153
|
-
key: Key name.
|
|
162
|
+
key (str): Key name.
|
|
154
163
|
|
|
155
164
|
Returns:
|
|
156
165
|
Path to cell of collection.
|
|
157
166
|
"""
|
|
158
|
-
if
|
|
159
|
-
|
|
160
|
-
|
|
167
|
+
if not isinstance(key, str):
|
|
168
|
+
msg = "The key is not a string."
|
|
169
|
+
logging.error(msg)
|
|
170
|
+
raise KeyError(msg)
|
|
171
|
+
# Prepare key.
|
|
172
|
+
# Removes spaces at the beginning and end of a string.
|
|
173
|
+
# Replaces all whitespace characters with a single space.
|
|
174
|
+
prepared_key = re.sub(r"\s+", " ", key).strip().lower()
|
|
175
|
+
# Check the key for an empty string.
|
|
176
|
+
if len(prepared_key) == 0:
|
|
177
|
+
msg = "The key should not be empty."
|
|
178
|
+
logging.error(msg)
|
|
179
|
+
raise KeyError(msg)
|
|
161
180
|
# Key to crc32 sum.
|
|
162
|
-
key_as_hash: str = f"{zlib.crc32(
|
|
181
|
+
key_as_hash: str = f"{zlib.crc32(prepared_key.encode('utf-8')):08x}"[self._hash_reduce_left :]
|
|
163
182
|
# Convert crc32 sum in the segment of path.
|
|
164
183
|
separated_hash: str = "/".join(list(key_as_hash))
|
|
165
184
|
# The path of the branch to the database.
|
|
@@ -175,7 +194,7 @@ class Scruby[T](
|
|
|
175
194
|
await branch_path.mkdir(parents=True)
|
|
176
195
|
# The path to the database cell.
|
|
177
196
|
leaf_path: Path = Path(*(branch_path, "leaf.json"))
|
|
178
|
-
return leaf_path
|
|
197
|
+
return (leaf_path, prepared_key)
|
|
179
198
|
|
|
180
199
|
@staticmethod
|
|
181
200
|
def napalm() -> None:
|
|
@@ -190,5 +209,5 @@ class Scruby[T](
|
|
|
190
209
|
None.
|
|
191
210
|
"""
|
|
192
211
|
with contextlib.suppress(FileNotFoundError):
|
|
193
|
-
rmtree(
|
|
212
|
+
rmtree(settings.DB_ROOT)
|
|
194
213
|
return
|
scruby/errors.py
CHANGED
scruby/mixins/__init__.py
CHANGED
scruby/mixins/collection.py
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
# Scruby - Asynchronous library for building and managing a hybrid database, by scheme of key-value.
|
|
2
|
+
# Copyright (c) 2025 Gennady Kostyunin
|
|
3
|
+
# SPDX-License-Identifier: MIT
|
|
4
|
+
#
|
|
1
5
|
"""Methods for working with collections."""
|
|
2
6
|
|
|
3
7
|
from __future__ import annotations
|
|
@@ -5,16 +9,13 @@ from __future__ import annotations
|
|
|
5
9
|
__all__ = ("Collection",)
|
|
6
10
|
|
|
7
11
|
from shutil import rmtree
|
|
8
|
-
from typing import TypeVar
|
|
9
12
|
|
|
10
13
|
from anyio import Path, to_thread
|
|
11
14
|
|
|
12
|
-
from scruby import
|
|
15
|
+
from scruby import settings
|
|
13
16
|
|
|
14
|
-
T = TypeVar("T")
|
|
15
17
|
|
|
16
|
-
|
|
17
|
-
class Collection[T]:
|
|
18
|
+
class Collection:
|
|
18
19
|
"""Methods for working with collections."""
|
|
19
20
|
|
|
20
21
|
def collection_name(self) -> str:
|
|
@@ -28,7 +29,7 @@ class Collection[T]:
|
|
|
28
29
|
@staticmethod
|
|
29
30
|
async def collection_list() -> list[str]:
|
|
30
31
|
"""Get collection list."""
|
|
31
|
-
target_directory = Path(
|
|
32
|
+
target_directory = Path(settings.DB_ROOT)
|
|
32
33
|
# Get all entries in the directory
|
|
33
34
|
all_entries = Path.iterdir(target_directory)
|
|
34
35
|
directory_names: list[str] = [entry.name async for entry in all_entries]
|
|
@@ -44,6 +45,6 @@ class Collection[T]:
|
|
|
44
45
|
Returns:
|
|
45
46
|
None.
|
|
46
47
|
"""
|
|
47
|
-
target_directory = f"{
|
|
48
|
-
await to_thread.run_sync(rmtree, target_directory)
|
|
48
|
+
target_directory = f"{settings.DB_ROOT}/{name}"
|
|
49
|
+
await to_thread.run_sync(rmtree, target_directory) # pyrefly: ignore[bad-argument-type]
|
|
49
50
|
return
|
scruby/mixins/count.py
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
# Scruby - Asynchronous library for building and managing a hybrid database, by scheme of key-value.
|
|
2
|
+
# Copyright (c) 2025 Gennady Kostyunin
|
|
3
|
+
# SPDX-License-Identifier: MIT
|
|
4
|
+
#
|
|
1
5
|
"""Methods for counting the number of documents."""
|
|
2
6
|
|
|
3
7
|
from __future__ import annotations
|
|
@@ -6,12 +10,10 @@ __all__ = ("Count",)
|
|
|
6
10
|
|
|
7
11
|
import concurrent.futures
|
|
8
12
|
from collections.abc import Callable
|
|
9
|
-
from typing import
|
|
13
|
+
from typing import Any
|
|
10
14
|
|
|
11
|
-
T = TypeVar("T")
|
|
12
15
|
|
|
13
|
-
|
|
14
|
-
class Count[T]:
|
|
16
|
+
class Count:
|
|
15
17
|
"""Methods for counting the number of documents."""
|
|
16
18
|
|
|
17
19
|
async def estimated_document_count(self) -> int:
|
|
@@ -26,30 +28,27 @@ class Count[T]:
|
|
|
26
28
|
async def count_documents(
|
|
27
29
|
self,
|
|
28
30
|
filter_fn: Callable,
|
|
29
|
-
max_workers: int | None = None,
|
|
30
31
|
) -> int:
|
|
31
32
|
"""Count the number of documents a matching the filter in this collection.
|
|
32
33
|
|
|
33
34
|
The search is based on the effect of a quantum loop.
|
|
34
35
|
The search effectiveness depends on the number of processor threads.
|
|
35
|
-
Ideally, hundreds and even thousands of threads are required.
|
|
36
36
|
|
|
37
37
|
Args:
|
|
38
38
|
filter_fn: A function that execute the conditions of filtering.
|
|
39
|
-
max_workers: The maximum number of processes that can be used to
|
|
40
|
-
execute the given calls. If None or not given then as many
|
|
41
|
-
worker processes will be created as the machine has processors.
|
|
42
39
|
|
|
43
40
|
Returns:
|
|
44
41
|
The number of documents.
|
|
45
42
|
"""
|
|
46
|
-
|
|
43
|
+
# Variable initialization
|
|
47
44
|
search_task_fn: Callable = self._task_find
|
|
45
|
+
branch_numbers: range = range(1, self._max_branch_number)
|
|
48
46
|
hash_reduce_left: int = self._hash_reduce_left
|
|
49
47
|
db_root: str = self._db_root
|
|
50
|
-
class_model:
|
|
48
|
+
class_model: Any = self._class_model
|
|
51
49
|
counter: int = 0
|
|
52
|
-
|
|
50
|
+
# Run quantum loop
|
|
51
|
+
with concurrent.futures.ThreadPoolExecutor(self._max_workers) as executor:
|
|
53
52
|
for branch_number in branch_numbers:
|
|
54
53
|
future = executor.submit(
|
|
55
54
|
search_task_fn,
|
scruby/mixins/custom_task.py
CHANGED
|
@@ -1,22 +1,21 @@
|
|
|
1
|
+
# Scruby - Asynchronous library for building and managing a hybrid database, by scheme of key-value.
|
|
2
|
+
# Copyright (c) 2025 Gennady Kostyunin
|
|
3
|
+
# SPDX-License-Identifier: MIT
|
|
4
|
+
#
|
|
1
5
|
"""Quantum methods for running custom tasks."""
|
|
2
6
|
|
|
3
7
|
from __future__ import annotations
|
|
4
8
|
|
|
5
9
|
__all__ = ("CustomTask",)
|
|
6
10
|
|
|
7
|
-
import logging
|
|
8
11
|
from collections.abc import Callable
|
|
9
|
-
from typing import Any
|
|
12
|
+
from typing import Any
|
|
10
13
|
|
|
11
14
|
import orjson
|
|
12
15
|
from anyio import Path
|
|
13
16
|
|
|
14
|
-
logger = logging.getLogger(__name__)
|
|
15
17
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class CustomTask[T]:
|
|
18
|
+
class CustomTask:
|
|
20
19
|
"""Quantum methods for running custom tasks."""
|
|
21
20
|
|
|
22
21
|
@staticmethod
|
|
@@ -24,7 +23,7 @@ class CustomTask[T]:
|
|
|
24
23
|
branch_number: int,
|
|
25
24
|
hash_reduce_left: int,
|
|
26
25
|
db_root: str,
|
|
27
|
-
class_model:
|
|
26
|
+
class_model: Any,
|
|
28
27
|
) -> list[Any]:
|
|
29
28
|
"""Get documents for custom task.
|
|
30
29
|
|
|
@@ -43,7 +42,7 @@ class CustomTask[T]:
|
|
|
43
42
|
"leaf.json",
|
|
44
43
|
),
|
|
45
44
|
)
|
|
46
|
-
docs: list[
|
|
45
|
+
docs: list[Any] = []
|
|
47
46
|
if await leaf_path.exists():
|
|
48
47
|
data_json: bytes = await leaf_path.read_bytes()
|
|
49
48
|
data: dict[str, str] = orjson.loads(data_json) or {}
|
|
@@ -56,7 +55,6 @@ class CustomTask[T]:
|
|
|
56
55
|
|
|
57
56
|
This method running a task created on the basis of a quantum loop.
|
|
58
57
|
Effectiveness running task depends on the number of processor threads.
|
|
59
|
-
Ideally, hundreds and even thousands of threads are required.
|
|
60
58
|
|
|
61
59
|
Args:
|
|
62
60
|
custom_task_fn: A function that execute the custom task.
|
scruby/mixins/delete.py
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
# Scruby - Asynchronous library for building and managing a hybrid database, by scheme of key-value.
|
|
2
|
+
# Copyright (c) 2025 Gennady Kostyunin
|
|
3
|
+
# SPDX-License-Identifier: MIT
|
|
4
|
+
#
|
|
1
5
|
"""Methods for deleting documents."""
|
|
2
6
|
|
|
3
7
|
from __future__ import annotations
|
|
@@ -5,19 +9,14 @@ from __future__ import annotations
|
|
|
5
9
|
__all__ = ("Delete",)
|
|
6
10
|
|
|
7
11
|
import concurrent.futures
|
|
8
|
-
import logging
|
|
9
12
|
from collections.abc import Callable
|
|
10
|
-
from typing import
|
|
13
|
+
from typing import Any
|
|
11
14
|
|
|
12
15
|
import orjson
|
|
13
16
|
from anyio import Path
|
|
14
17
|
|
|
15
|
-
logger = logging.getLogger(__name__)
|
|
16
18
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
class Delete[T]:
|
|
19
|
+
class Delete:
|
|
21
20
|
"""Methods for deleting documents."""
|
|
22
21
|
|
|
23
22
|
@staticmethod
|
|
@@ -26,7 +25,7 @@ class Delete[T]:
|
|
|
26
25
|
filter_fn: Callable,
|
|
27
26
|
hash_reduce_left: int,
|
|
28
27
|
db_root: str,
|
|
29
|
-
class_model:
|
|
28
|
+
class_model: Any,
|
|
30
29
|
) -> int:
|
|
31
30
|
"""Task for find and delete documents.
|
|
32
31
|
|
|
@@ -62,30 +61,27 @@ class Delete[T]:
|
|
|
62
61
|
async def delete_many(
|
|
63
62
|
self,
|
|
64
63
|
filter_fn: Callable,
|
|
65
|
-
max_workers: int | None = None,
|
|
66
64
|
) -> int:
|
|
67
65
|
"""Delete one or more documents matching the filter.
|
|
68
66
|
|
|
69
67
|
The search is based on the effect of a quantum loop.
|
|
70
68
|
The search effectiveness depends on the number of processor threads.
|
|
71
|
-
Ideally, hundreds and even thousands of threads are required.
|
|
72
69
|
|
|
73
70
|
Args:
|
|
74
71
|
filter_fn: A function that execute the conditions of filtering.
|
|
75
|
-
max_workers: The maximum number of processes that can be used to
|
|
76
|
-
execute the given calls. If None or not given then as many
|
|
77
|
-
worker processes will be created as the machine has processors.
|
|
78
72
|
|
|
79
73
|
Returns:
|
|
80
74
|
The number of deleted documents.
|
|
81
75
|
"""
|
|
82
|
-
|
|
76
|
+
# Variable initialization
|
|
83
77
|
search_task_fn: Callable = self._task_delete
|
|
78
|
+
branch_numbers: range = range(1, self._max_branch_number)
|
|
84
79
|
hash_reduce_left: int = self._hash_reduce_left
|
|
85
80
|
db_root: str = self._db_root
|
|
86
|
-
class_model:
|
|
81
|
+
class_model: Any = self._class_model
|
|
87
82
|
counter: int = 0
|
|
88
|
-
|
|
83
|
+
# Run quantum loop
|
|
84
|
+
with concurrent.futures.ThreadPoolExecutor(self._max_workers) as executor:
|
|
89
85
|
for branch_number in branch_numbers:
|
|
90
86
|
future = executor.submit(
|
|
91
87
|
search_task_fn,
|
scruby/mixins/find.py
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
# Scruby - Asynchronous library for building and managing a hybrid database, by scheme of key-value.
|
|
2
|
+
# Copyright (c) 2025 Gennady Kostyunin
|
|
3
|
+
# SPDX-License-Identifier: MIT
|
|
4
|
+
#
|
|
1
5
|
"""Quantum methods for searching documents."""
|
|
2
6
|
|
|
3
7
|
from __future__ import annotations
|
|
@@ -5,19 +9,14 @@ from __future__ import annotations
|
|
|
5
9
|
__all__ = ("Find",)
|
|
6
10
|
|
|
7
11
|
import concurrent.futures
|
|
8
|
-
import logging
|
|
9
12
|
from collections.abc import Callable
|
|
10
|
-
from typing import
|
|
13
|
+
from typing import Any
|
|
11
14
|
|
|
12
15
|
import orjson
|
|
13
16
|
from anyio import Path
|
|
14
17
|
|
|
15
|
-
logger = logging.getLogger(__name__)
|
|
16
18
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
class Find[T]:
|
|
19
|
+
class Find:
|
|
21
20
|
"""Quantum methods for searching documents."""
|
|
22
21
|
|
|
23
22
|
@staticmethod
|
|
@@ -26,8 +25,8 @@ class Find[T]:
|
|
|
26
25
|
filter_fn: Callable,
|
|
27
26
|
hash_reduce_left: str,
|
|
28
27
|
db_root: str,
|
|
29
|
-
class_model:
|
|
30
|
-
) -> list[
|
|
28
|
+
class_model: Any,
|
|
29
|
+
) -> list[Any] | None:
|
|
31
30
|
"""Task for find documents.
|
|
32
31
|
|
|
33
32
|
This method is for internal use.
|
|
@@ -35,6 +34,7 @@ class Find[T]:
|
|
|
35
34
|
Returns:
|
|
36
35
|
List of documents or None.
|
|
37
36
|
"""
|
|
37
|
+
# Variable initialization
|
|
38
38
|
branch_number_as_hash: str = f"{branch_number:08x}"[hash_reduce_left:]
|
|
39
39
|
separated_hash: str = "/".join(list(branch_number_as_hash))
|
|
40
40
|
leaf_path: Path = Path(
|
|
@@ -45,7 +45,7 @@ class Find[T]:
|
|
|
45
45
|
"leaf.json",
|
|
46
46
|
),
|
|
47
47
|
)
|
|
48
|
-
docs: list[
|
|
48
|
+
docs: list[Any] = []
|
|
49
49
|
if await leaf_path.exists():
|
|
50
50
|
data_json: bytes = await leaf_path.read_bytes()
|
|
51
51
|
data: dict[str, str] = orjson.loads(data_json) or {}
|
|
@@ -58,29 +58,26 @@ class Find[T]:
|
|
|
58
58
|
async def find_one(
|
|
59
59
|
self,
|
|
60
60
|
filter_fn: Callable,
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
"""Finds a single document matching the filter.
|
|
61
|
+
) -> Any | None:
|
|
62
|
+
"""Find one document matching the filter.
|
|
64
63
|
|
|
65
64
|
The search is based on the effect of a quantum loop.
|
|
66
65
|
The search effectiveness depends on the number of processor threads.
|
|
67
|
-
Ideally, hundreds and even thousands of threads are required.
|
|
68
66
|
|
|
69
67
|
Args:
|
|
70
|
-
filter_fn: A function that execute the conditions of filtering.
|
|
71
|
-
max_workers: The maximum number of processes that can be used to
|
|
72
|
-
execute the given calls. If None or not given then as many
|
|
73
|
-
worker processes will be created as the machine has processors.
|
|
68
|
+
filter_fn (Callable): A function that execute the conditions of filtering.
|
|
74
69
|
|
|
75
70
|
Returns:
|
|
76
71
|
Document or None.
|
|
77
72
|
"""
|
|
78
|
-
|
|
73
|
+
# Variable initialization
|
|
79
74
|
search_task_fn: Callable = self._task_find
|
|
75
|
+
branch_numbers: range = range(1, self._max_branch_number)
|
|
80
76
|
hash_reduce_left: int = self._hash_reduce_left
|
|
81
77
|
db_root: str = self._db_root
|
|
82
|
-
class_model:
|
|
83
|
-
|
|
78
|
+
class_model: Any = self._class_model
|
|
79
|
+
# Run quantum loop
|
|
80
|
+
with concurrent.futures.ThreadPoolExecutor(self._max_workers) as executor:
|
|
84
81
|
for branch_number in branch_numbers:
|
|
85
82
|
future = executor.submit(
|
|
86
83
|
search_task_fn,
|
|
@@ -97,36 +94,40 @@ class Find[T]:
|
|
|
97
94
|
|
|
98
95
|
async def find_many(
|
|
99
96
|
self,
|
|
100
|
-
filter_fn: Callable,
|
|
97
|
+
filter_fn: Callable = lambda _: True,
|
|
101
98
|
limit_docs: int = 1000,
|
|
102
|
-
|
|
103
|
-
) -> list[
|
|
104
|
-
"""
|
|
99
|
+
page_number: int = 1,
|
|
100
|
+
) -> list[Any] | None:
|
|
101
|
+
"""Find many documents matching the filter.
|
|
105
102
|
|
|
106
103
|
The search is based on the effect of a quantum loop.
|
|
107
104
|
The search effectiveness depends on the number of processor threads.
|
|
108
|
-
Ideally, hundreds and even thousands of threads are required.
|
|
109
105
|
|
|
110
106
|
Args:
|
|
111
|
-
filter_fn: A function that execute the conditions of filtering.
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
107
|
+
filter_fn (Callable): A function that execute the conditions of filtering.
|
|
108
|
+
By default it searches for all documents.
|
|
109
|
+
limit_docs (int): Limiting the number of documents. By default = 1000.
|
|
110
|
+
page_number (int): For pagination output. By default = 1.
|
|
111
|
+
Number of documents per page = limit_docs.
|
|
116
112
|
|
|
117
113
|
Returns:
|
|
118
114
|
List of documents or None.
|
|
119
115
|
"""
|
|
120
|
-
|
|
116
|
+
# The `page_number` parameter must not be less than one
|
|
117
|
+
assert page_number > 0, "`find_many` => The `page_number` parameter must not be less than one."
|
|
118
|
+
# Variable initialization
|
|
121
119
|
search_task_fn: Callable = self._task_find
|
|
120
|
+
branch_numbers: range = range(1, self._max_branch_number)
|
|
122
121
|
hash_reduce_left: int = self._hash_reduce_left
|
|
123
122
|
db_root: str = self._db_root
|
|
124
|
-
class_model:
|
|
123
|
+
class_model: Any = self._class_model
|
|
125
124
|
counter: int = 0
|
|
126
|
-
|
|
127
|
-
|
|
125
|
+
number_docs_skippe: int = limit_docs * (page_number - 1) if page_number > 1 else 0
|
|
126
|
+
result: list[Any] = []
|
|
127
|
+
# Run quantum loop
|
|
128
|
+
with concurrent.futures.ThreadPoolExecutor(self._max_workers) as executor:
|
|
128
129
|
for branch_number in branch_numbers:
|
|
129
|
-
if counter >= limit_docs:
|
|
130
|
+
if number_docs_skippe == 0 and counter >= limit_docs:
|
|
130
131
|
return result[:limit_docs]
|
|
131
132
|
future = executor.submit(
|
|
132
133
|
search_task_fn,
|
|
@@ -139,8 +140,11 @@ class Find[T]:
|
|
|
139
140
|
docs = await future.result()
|
|
140
141
|
if docs is not None:
|
|
141
142
|
for doc in docs:
|
|
142
|
-
if
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
143
|
+
if number_docs_skippe == 0:
|
|
144
|
+
if counter >= limit_docs:
|
|
145
|
+
return result[:limit_docs]
|
|
146
|
+
result.append(doc)
|
|
147
|
+
counter += 1
|
|
148
|
+
else:
|
|
149
|
+
number_docs_skippe -= 1
|
|
146
150
|
return result or None
|
scruby/mixins/keys.py
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
# Scruby - Asynchronous library for building and managing a hybrid database, by scheme of key-value.
|
|
2
|
+
# Copyright (c) 2025 Gennady Kostyunin
|
|
3
|
+
# SPDX-License-Identifier: MIT
|
|
4
|
+
#
|
|
1
5
|
"""Methods for working with keys."""
|
|
2
6
|
|
|
3
7
|
from __future__ import annotations
|
|
@@ -5,98 +9,99 @@ from __future__ import annotations
|
|
|
5
9
|
__all__ = ("Keys",)
|
|
6
10
|
|
|
7
11
|
import logging
|
|
8
|
-
import
|
|
9
|
-
from typing import TypeVar
|
|
12
|
+
from typing import Any
|
|
10
13
|
|
|
11
14
|
import orjson
|
|
12
|
-
from anyio import Path
|
|
13
15
|
|
|
14
16
|
from scruby.errors import (
|
|
15
17
|
KeyAlreadyExistsError,
|
|
16
18
|
KeyNotExistsError,
|
|
17
19
|
)
|
|
18
20
|
|
|
19
|
-
logger = logging.getLogger(__name__)
|
|
20
21
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
class Keys[T]:
|
|
22
|
+
class Keys:
|
|
25
23
|
"""Methods for working with keys."""
|
|
26
24
|
|
|
27
|
-
async def
|
|
28
|
-
|
|
29
|
-
key: str,
|
|
30
|
-
value: T,
|
|
31
|
-
) -> None:
|
|
32
|
-
"""Asynchronous method for adding key to collection.
|
|
25
|
+
async def add_doc(self, doc: Any) -> None:
|
|
26
|
+
"""Asynchronous method for adding document to collection.
|
|
33
27
|
|
|
34
28
|
Args:
|
|
35
|
-
|
|
36
|
-
value: Value of key. Type `BaseModel`.
|
|
29
|
+
doc: Value of key. Type, derived from `BaseModel`.
|
|
37
30
|
|
|
38
31
|
Returns:
|
|
39
32
|
None.
|
|
40
33
|
"""
|
|
41
|
-
|
|
34
|
+
# Check if the Model matches the collection
|
|
35
|
+
if not isinstance(doc, self._class_model):
|
|
36
|
+
doc_class_name = doc.__class__.__name__
|
|
37
|
+
collection_name = self._class_model.__name__
|
|
38
|
+
msg = (
|
|
39
|
+
f"(add_doc) Parameter `doc` => Model `{doc_class_name}` does not match collection `{collection_name}`!"
|
|
40
|
+
)
|
|
41
|
+
logging.error(msg)
|
|
42
|
+
raise TypeError(msg)
|
|
42
43
|
# The path to cell of collection.
|
|
43
|
-
leaf_path
|
|
44
|
-
|
|
44
|
+
leaf_path, prepared_key = await self._get_leaf_path(doc.key)
|
|
45
|
+
doc_json: str = doc.model_dump_json()
|
|
45
46
|
# Write key-value to collection.
|
|
46
47
|
if await leaf_path.exists():
|
|
47
48
|
# Add new key.
|
|
48
49
|
data_json: bytes = await leaf_path.read_bytes()
|
|
49
50
|
data: dict = orjson.loads(data_json) or {}
|
|
50
51
|
try:
|
|
51
|
-
data[
|
|
52
|
+
data[prepared_key]
|
|
52
53
|
except KeyError:
|
|
53
|
-
data[
|
|
54
|
+
data[prepared_key] = doc_json
|
|
54
55
|
await leaf_path.write_bytes(orjson.dumps(data))
|
|
55
56
|
else:
|
|
56
57
|
err = KeyAlreadyExistsError()
|
|
57
|
-
|
|
58
|
+
logging.error(err.message)
|
|
58
59
|
raise err
|
|
59
60
|
else:
|
|
60
|
-
# Add new
|
|
61
|
-
await leaf_path.write_bytes(orjson.dumps({
|
|
61
|
+
# Add new document to a blank leaf.
|
|
62
|
+
await leaf_path.write_bytes(orjson.dumps({prepared_key: doc_json}))
|
|
62
63
|
await self._counter_documents(1)
|
|
63
64
|
|
|
64
|
-
async def
|
|
65
|
-
self,
|
|
66
|
-
key: str,
|
|
67
|
-
value: T,
|
|
68
|
-
) -> None:
|
|
65
|
+
async def update_doc(self, doc: Any) -> None:
|
|
69
66
|
"""Asynchronous method for updating key to collection.
|
|
70
67
|
|
|
71
68
|
Args:
|
|
72
|
-
|
|
73
|
-
value: Value of key. Type `BaseModel`.
|
|
69
|
+
doc: Value of key. Type `BaseModel`.
|
|
74
70
|
|
|
75
71
|
Returns:
|
|
76
72
|
None.
|
|
77
73
|
"""
|
|
78
|
-
|
|
74
|
+
# Check if the Model matches the collection
|
|
75
|
+
if not isinstance(doc, self._class_model):
|
|
76
|
+
doc_class_name = doc.__class__.__name__
|
|
77
|
+
collection_name = self._class_model.__name__
|
|
78
|
+
msg = (
|
|
79
|
+
f"(update_doc) Parameter `doc` => Model `{doc_class_name}` "
|
|
80
|
+
f"does not match collection `{collection_name}`!"
|
|
81
|
+
)
|
|
82
|
+
logging.error(msg)
|
|
83
|
+
raise TypeError(msg)
|
|
79
84
|
# The path to cell of collection.
|
|
80
|
-
leaf_path
|
|
81
|
-
|
|
85
|
+
leaf_path, prepared_key = await self._get_leaf_path(doc.key)
|
|
86
|
+
doc_json: str = doc.model_dump_json()
|
|
82
87
|
# Update the existing key.
|
|
83
88
|
if await leaf_path.exists():
|
|
84
89
|
# Update the existing key.
|
|
85
90
|
data_json: bytes = await leaf_path.read_bytes()
|
|
86
91
|
data: dict = orjson.loads(data_json) or {}
|
|
87
92
|
try:
|
|
88
|
-
data[
|
|
89
|
-
data[
|
|
93
|
+
data[prepared_key]
|
|
94
|
+
data[prepared_key] = doc_json
|
|
90
95
|
await leaf_path.write_bytes(orjson.dumps(data))
|
|
91
96
|
except KeyError:
|
|
92
97
|
err = KeyNotExistsError()
|
|
93
|
-
|
|
98
|
+
logging.error(err.message)
|
|
94
99
|
raise err from None
|
|
95
100
|
else:
|
|
96
|
-
|
|
101
|
+
logging.error("The key not exists.")
|
|
97
102
|
raise KeyError()
|
|
98
103
|
|
|
99
|
-
async def get_key(self, key: str) ->
|
|
104
|
+
async def get_key(self, key: str) -> Any:
|
|
100
105
|
"""Asynchronous method for getting value of key from collection.
|
|
101
106
|
|
|
102
107
|
Args:
|
|
@@ -105,17 +110,16 @@ class Keys[T]:
|
|
|
105
110
|
Returns:
|
|
106
111
|
Value of key or KeyError.
|
|
107
112
|
"""
|
|
108
|
-
key = re.sub(r"\s+", " ", key.strip())
|
|
109
113
|
# The path to the database cell.
|
|
110
|
-
leaf_path
|
|
114
|
+
leaf_path, prepared_key = await self._get_leaf_path(key)
|
|
111
115
|
# Get value of key.
|
|
112
116
|
if await leaf_path.exists():
|
|
113
117
|
data_json: bytes = await leaf_path.read_bytes()
|
|
114
118
|
data: dict = orjson.loads(data_json) or {}
|
|
115
|
-
obj:
|
|
119
|
+
obj: Any = self._class_model.model_validate_json(data[prepared_key])
|
|
116
120
|
return obj
|
|
117
121
|
msg: str = "`get_key` - The unacceptable key value."
|
|
118
|
-
|
|
122
|
+
logging.error(msg)
|
|
119
123
|
raise KeyError()
|
|
120
124
|
|
|
121
125
|
async def has_key(self, key: str) -> bool:
|
|
@@ -127,15 +131,14 @@ class Keys[T]:
|
|
|
127
131
|
Returns:
|
|
128
132
|
True, if the key is present.
|
|
129
133
|
"""
|
|
130
|
-
key = re.sub(r"\s+", " ", key.strip())
|
|
131
134
|
# Get path to cell of collection.
|
|
132
|
-
leaf_path
|
|
135
|
+
leaf_path, prepared_key = await self._get_leaf_path(key)
|
|
133
136
|
# Checking whether there is a key.
|
|
134
137
|
if await leaf_path.exists():
|
|
135
138
|
data_json: bytes = await leaf_path.read_bytes()
|
|
136
139
|
data: dict = orjson.loads(data_json) or {}
|
|
137
140
|
try:
|
|
138
|
-
data[
|
|
141
|
+
data[prepared_key]
|
|
139
142
|
return True
|
|
140
143
|
except KeyError:
|
|
141
144
|
return False
|
|
@@ -150,17 +153,16 @@ class Keys[T]:
|
|
|
150
153
|
Returns:
|
|
151
154
|
None.
|
|
152
155
|
"""
|
|
153
|
-
key = re.sub(r"\s+", " ", key.strip())
|
|
154
156
|
# The path to the database cell.
|
|
155
|
-
leaf_path
|
|
157
|
+
leaf_path, prepared_key = await self._get_leaf_path(key)
|
|
156
158
|
# Deleting key.
|
|
157
159
|
if await leaf_path.exists():
|
|
158
160
|
data_json: bytes = await leaf_path.read_bytes()
|
|
159
161
|
data: dict = orjson.loads(data_json) or {}
|
|
160
|
-
del data[
|
|
162
|
+
del data[prepared_key]
|
|
161
163
|
await leaf_path.write_bytes(orjson.dumps(data))
|
|
162
164
|
await self._counter_documents(-1)
|
|
163
165
|
return
|
|
164
166
|
msg: str = "`delete_key` - The unacceptable key value."
|
|
165
|
-
|
|
167
|
+
logging.error(msg)
|
|
166
168
|
raise KeyError()
|
scruby/mixins/update.py
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
# Scruby - Asynchronous library for building and managing a hybrid database, by scheme of key-value.
|
|
2
|
+
# Copyright (c) 2025 Gennady Kostyunin
|
|
3
|
+
# SPDX-License-Identifier: MIT
|
|
4
|
+
#
|
|
1
5
|
"""Methods for updating documents."""
|
|
2
6
|
|
|
3
7
|
from __future__ import annotations
|
|
@@ -5,19 +9,14 @@ from __future__ import annotations
|
|
|
5
9
|
__all__ = ("Update",)
|
|
6
10
|
|
|
7
11
|
import concurrent.futures
|
|
8
|
-
import logging
|
|
9
12
|
from collections.abc import Callable
|
|
10
|
-
from typing import Any
|
|
13
|
+
from typing import Any
|
|
11
14
|
|
|
12
15
|
import orjson
|
|
13
16
|
from anyio import Path
|
|
14
17
|
|
|
15
|
-
logger = logging.getLogger(__name__)
|
|
16
18
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
class Update[T]:
|
|
19
|
+
class Update:
|
|
21
20
|
"""Methods for updating documents."""
|
|
22
21
|
|
|
23
22
|
@staticmethod
|
|
@@ -26,7 +25,7 @@ class Update[T]:
|
|
|
26
25
|
filter_fn: Callable,
|
|
27
26
|
hash_reduce_left: str,
|
|
28
27
|
db_root: str,
|
|
29
|
-
class_model:
|
|
28
|
+
class_model: Any,
|
|
30
29
|
new_data: dict[str, Any],
|
|
31
30
|
) -> int:
|
|
32
31
|
"""Task for find documents.
|
|
@@ -65,31 +64,28 @@ class Update[T]:
|
|
|
65
64
|
self,
|
|
66
65
|
filter_fn: Callable,
|
|
67
66
|
new_data: dict[str, Any],
|
|
68
|
-
max_workers: int | None = None,
|
|
69
67
|
) -> int:
|
|
70
68
|
"""Updates one or more documents matching the filter.
|
|
71
69
|
|
|
72
70
|
The search is based on the effect of a quantum loop.
|
|
73
71
|
The search effectiveness depends on the number of processor threads.
|
|
74
|
-
Ideally, hundreds and even thousands of threads are required.
|
|
75
72
|
|
|
76
73
|
Args:
|
|
77
74
|
filter_fn: A function that execute the conditions of filtering.
|
|
78
75
|
new_data: New data for the fields that need to be updated.
|
|
79
|
-
max_workers: The maximum number of processes that can be used to
|
|
80
|
-
execute the given calls. If None or not given then as many
|
|
81
|
-
worker processes will be created as the machine has processors.
|
|
82
76
|
|
|
83
77
|
Returns:
|
|
84
78
|
The number of updated documents.
|
|
85
79
|
"""
|
|
86
|
-
|
|
80
|
+
# Variable initialization
|
|
87
81
|
update_task_fn: Callable = self._task_update
|
|
82
|
+
branch_numbers: range = range(1, self._max_branch_number)
|
|
88
83
|
hash_reduce_left: int = self._hash_reduce_left
|
|
89
84
|
db_root: str = self._db_root
|
|
90
|
-
class_model:
|
|
85
|
+
class_model: Any = self._class_model
|
|
91
86
|
counter: int = 0
|
|
92
|
-
|
|
87
|
+
# Run quantum loop
|
|
88
|
+
with concurrent.futures.ThreadPoolExecutor(self._max_workers) as executor:
|
|
93
89
|
for branch_number in branch_numbers:
|
|
94
90
|
future = executor.submit(
|
|
95
91
|
update_task_fn,
|
|
@@ -1,6 +1,10 @@
|
|
|
1
|
-
|
|
1
|
+
# Scruby - Asynchronous library for building and managing a hybrid database, by scheme of key-value.
|
|
2
|
+
# Copyright (c) 2025 Gennady Kostyunin
|
|
3
|
+
# SPDX-License-Identifier: MIT
|
|
4
|
+
#
|
|
5
|
+
"""Database settings.
|
|
2
6
|
|
|
3
|
-
The module contains the following
|
|
7
|
+
The module contains the following parameters:
|
|
4
8
|
|
|
5
9
|
- `DB_ROOT` - Path to root directory of database. `By default = "ScrubyDB" (in root of project)`.
|
|
6
10
|
- `HASH_REDUCE_LEFT` - The length of the hash reduction on the left side.
|
|
@@ -8,6 +12,7 @@ The module contains the following variables:
|
|
|
8
12
|
- `2` - 16777216 branches in collection.
|
|
9
13
|
- `4` - 65536 branches in collection.
|
|
10
14
|
- `6` - 256 branches in collection (by default).
|
|
15
|
+
- `MAX_WORKERS` - The maximum number of processes that can be used `By default = None`.
|
|
11
16
|
"""
|
|
12
17
|
|
|
13
18
|
from __future__ import annotations
|
|
@@ -15,6 +20,7 @@ from __future__ import annotations
|
|
|
15
20
|
__all__ = (
|
|
16
21
|
"DB_ROOT",
|
|
17
22
|
"HASH_REDUCE_LEFT",
|
|
23
|
+
"MAX_WORKERS",
|
|
18
24
|
)
|
|
19
25
|
|
|
20
26
|
from typing import Literal
|
|
@@ -31,3 +37,8 @@ DB_ROOT: str = "ScrubyDB"
|
|
|
31
37
|
# Number of branches is number of requests to the hard disk during quantum operations.
|
|
32
38
|
# Quantum operations: find_one, find_many, count_documents, delete_many, run_custom_task.
|
|
33
39
|
HASH_REDUCE_LEFT: Literal[0, 2, 4, 6] = 6
|
|
40
|
+
|
|
41
|
+
# The maximum number of processes that can be used to execute the given calls.
|
|
42
|
+
# If None, then as many worker processes will be
|
|
43
|
+
# created as the machine has processors.
|
|
44
|
+
MAX_WORKERS: int | None = None
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: scruby
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary:
|
|
5
|
-
Project-URL: Homepage, https://github.
|
|
3
|
+
Version: 0.28.3
|
|
4
|
+
Summary: Asynchronous library for building and managing a hybrid database, by scheme of key-value.
|
|
5
|
+
Project-URL: Homepage, https://kebasyaty.github.io/scruby/
|
|
6
6
|
Project-URL: Repository, https://github.com/kebasyaty/scruby
|
|
7
7
|
Project-URL: Source, https://github.com/kebasyaty/scruby
|
|
8
8
|
Project-URL: Bug Tracker, https://github.com/kebasyaty/scruby/issues
|
|
@@ -17,6 +17,7 @@ Classifier: License :: OSI Approved :: MIT License
|
|
|
17
17
|
Classifier: Operating System :: MacOS :: MacOS X
|
|
18
18
|
Classifier: Operating System :: Microsoft :: Windows
|
|
19
19
|
Classifier: Operating System :: POSIX
|
|
20
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
20
21
|
Classifier: Programming Language :: Python :: 3
|
|
21
22
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
22
23
|
Classifier: Programming Language :: Python :: 3.12
|
|
@@ -30,7 +31,7 @@ Requires-Dist: anyio>=4.10.0
|
|
|
30
31
|
Requires-Dist: orjson>=3.11.3
|
|
31
32
|
Requires-Dist: phonenumbers>=9.0.13
|
|
32
33
|
Requires-Dist: pydantic-extra-types>=2.10.5
|
|
33
|
-
Requires-Dist: pydantic[email]>=2.11.7
|
|
34
|
+
Requires-Dist: pydantic[email,timezone]>=2.11.7
|
|
34
35
|
Description-Content-Type: text/markdown
|
|
35
36
|
|
|
36
37
|
<div align="center">
|
|
@@ -52,7 +53,7 @@ Description-Content-Type: text/markdown
|
|
|
52
53
|
<a href="https://pypi.python.org/pypi/scruby/" alt="PyPI status"><img src="https://img.shields.io/pypi/status/scruby.svg" alt="PyPI status"></a>
|
|
53
54
|
<a href="https://pypi.python.org/pypi/scruby/" alt="PyPI version fury.io"><img src="https://badge.fury.io/py/scruby.svg" alt="PyPI version fury.io"></a>
|
|
54
55
|
<br>
|
|
55
|
-
<a href="https://
|
|
56
|
+
<a href="https://pyrefly.org/" alt="Types: Pyrefly"><img src="https://img.shields.io/badge/types-Pyrefly-FFB74D.svg" alt="Types: Pyrefly"></a>
|
|
56
57
|
<a href="https://docs.astral.sh/ruff/" alt="Code style: Ruff"><img src="https://img.shields.io/badge/code%20style-Ruff-FDD835.svg" alt="Code style: Ruff"></a>
|
|
57
58
|
<a href="https://pypi.org/project/scruby"><img src="https://img.shields.io/pypi/format/scruby" alt="Format"></a>
|
|
58
59
|
<a href="https://pepy.tech/projects/scruby"><img src="https://static.pepy.tech/badge/scruby" alt="PyPI Downloads"></a>
|
|
@@ -65,11 +66,11 @@ Description-Content-Type: text/markdown
|
|
|
65
66
|
<br>
|
|
66
67
|
The database consists of collections.
|
|
67
68
|
<br>
|
|
68
|
-
The maximum size of the one collection is 16
|
|
69
|
+
The maximum size of the one collection is <b>16**8=4294967296</b> branches,
|
|
69
70
|
<br>
|
|
70
71
|
each branch can store one or more keys.
|
|
71
72
|
<br>
|
|
72
|
-
The value of any key in collection can be obtained in 8 steps,
|
|
73
|
+
The value of any key in collection can be obtained maximum in <b>8</b> steps,
|
|
73
74
|
<br>
|
|
74
75
|
thereby achieving high performance.
|
|
75
76
|
<br>
|
|
@@ -108,26 +109,33 @@ See more examples here [https://kebasyaty.github.io/scruby/latest/pages/usage/](
|
|
|
108
109
|
import anyio
|
|
109
110
|
import datetime
|
|
110
111
|
from typing import Annotated
|
|
111
|
-
from pydantic import BaseModel, EmailStr
|
|
112
|
+
from pydantic import BaseModel, EmailStr, Field
|
|
112
113
|
from pydantic_extra_types.phone_numbers import PhoneNumber, PhoneNumberValidator
|
|
113
|
-
from scruby import Scruby,
|
|
114
|
+
from scruby import Scruby, settings
|
|
114
115
|
|
|
115
|
-
|
|
116
|
-
|
|
116
|
+
settings.DB_ROOT = "ScrubyDB" # By default = "ScrubyDB"
|
|
117
|
+
settings.HASH_REDUCE_LEFT = 6 # By default = 6
|
|
118
|
+
settings.MAX_WORKERS = None # By default = None
|
|
117
119
|
|
|
118
120
|
class User(BaseModel):
|
|
119
|
-
"""
|
|
120
|
-
first_name: str
|
|
121
|
-
last_name: str
|
|
122
|
-
birthday: datetime.datetime
|
|
123
|
-
email: EmailStr
|
|
124
|
-
phone: Annotated[PhoneNumber, PhoneNumberValidator(number_format="E164")]
|
|
121
|
+
"""User model."""
|
|
122
|
+
first_name: str = Field(strict=True)
|
|
123
|
+
last_name: str = Field(strict=True)
|
|
124
|
+
birthday: datetime.datetime = Field(strict=True)
|
|
125
|
+
email: EmailStr = Field(strict=True)
|
|
126
|
+
phone: Annotated[PhoneNumber, PhoneNumberValidator(number_format="E164")] = Field(frozen=True)
|
|
127
|
+
# The key is always at the bottom
|
|
128
|
+
key: str = Field(
|
|
129
|
+
strict=True,
|
|
130
|
+
frozen=True,
|
|
131
|
+
default_factory=lambda data: data["phone"],
|
|
132
|
+
)
|
|
125
133
|
|
|
126
134
|
|
|
127
135
|
async def main() -> None:
|
|
128
136
|
"""Example."""
|
|
129
|
-
# Get collection
|
|
130
|
-
user_coll = await Scruby.
|
|
137
|
+
# Get collection `User`.
|
|
138
|
+
user_coll = await Scruby.collection(User)
|
|
131
139
|
|
|
132
140
|
user = User(
|
|
133
141
|
first_name="John",
|
|
@@ -137,9 +145,9 @@ async def main() -> None:
|
|
|
137
145
|
phone="+447986123456",
|
|
138
146
|
)
|
|
139
147
|
|
|
140
|
-
await user_coll.
|
|
148
|
+
await user_coll.add_doc(user)
|
|
141
149
|
|
|
142
|
-
await user_coll.
|
|
150
|
+
await user_coll.update_doc(user)
|
|
143
151
|
|
|
144
152
|
await user_coll.get_key("+447986123456") # => user
|
|
145
153
|
await user_coll.get_key("key missing") # => KeyError
|
|
@@ -161,36 +169,42 @@ if __name__ == "__main__":
|
|
|
161
169
|
```
|
|
162
170
|
|
|
163
171
|
```python
|
|
164
|
-
"""Find
|
|
172
|
+
"""Find one document matching the filter.
|
|
165
173
|
|
|
166
174
|
The search is based on the effect of a quantum loop.
|
|
167
175
|
The search effectiveness depends on the number of processor threads.
|
|
168
|
-
Ideally, hundreds and even thousands of threads are required.
|
|
169
176
|
"""
|
|
170
177
|
|
|
171
178
|
import anyio
|
|
172
179
|
import datetime
|
|
173
180
|
from typing import Annotated
|
|
174
|
-
from pydantic import BaseModel
|
|
175
|
-
from scruby import Scruby,
|
|
181
|
+
from pydantic import BaseModel, Field
|
|
182
|
+
from scruby import Scruby, settings
|
|
176
183
|
from pprint import pprint as pp
|
|
177
184
|
|
|
178
|
-
|
|
179
|
-
|
|
185
|
+
settings.DB_ROOT = "ScrubyDB" # By default = "ScrubyDB"
|
|
186
|
+
settings.HASH_REDUCE_LEFT = 6 # By default = 6
|
|
187
|
+
settings.MAX_WORKERS = None # By default = None
|
|
180
188
|
|
|
181
189
|
|
|
182
190
|
class Phone(BaseModel):
|
|
183
191
|
"""Phone model."""
|
|
184
|
-
brand: str
|
|
185
|
-
model: str
|
|
186
|
-
screen_diagonal: float
|
|
187
|
-
matrix_type: str
|
|
192
|
+
brand: str = Field(strict=True, frozen=True)
|
|
193
|
+
model: str = Field(strict=True, frozen=True)
|
|
194
|
+
screen_diagonal: float = Field(strict=True)
|
|
195
|
+
matrix_type: str = Field(strict=True)
|
|
196
|
+
# The key is always at the bottom
|
|
197
|
+
key: str = Field(
|
|
198
|
+
strict=True,
|
|
199
|
+
frozen=True,
|
|
200
|
+
default_factory=lambda data: f"{data['brand']}:{data['model']}",
|
|
201
|
+
)
|
|
188
202
|
|
|
189
203
|
|
|
190
204
|
async def main() -> None:
|
|
191
205
|
"""Example."""
|
|
192
|
-
# Get collection
|
|
193
|
-
phone_coll = await Scruby.
|
|
206
|
+
# Get collection `Phone`.
|
|
207
|
+
phone_coll = await Scruby.collection(Phone)
|
|
194
208
|
|
|
195
209
|
# Create phone.
|
|
196
210
|
phone = Phone(
|
|
@@ -201,8 +215,7 @@ async def main() -> None:
|
|
|
201
215
|
)
|
|
202
216
|
|
|
203
217
|
# Add phone to collection.
|
|
204
|
-
|
|
205
|
-
await phone_coll.add_key(key, phone)
|
|
218
|
+
await phone_coll.add_doc(phone)
|
|
206
219
|
|
|
207
220
|
# Find phone by brand.
|
|
208
221
|
phone_details: Phone | None = await phone_coll.find_one(
|
|
@@ -232,60 +245,79 @@ if __name__ == "__main__":
|
|
|
232
245
|
```
|
|
233
246
|
|
|
234
247
|
```python
|
|
235
|
-
"""Find
|
|
248
|
+
"""Find many documents matching the filter.
|
|
236
249
|
|
|
237
250
|
The search is based on the effect of a quantum loop.
|
|
238
251
|
The search effectiveness depends on the number of processor threads.
|
|
239
|
-
Ideally, hundreds and even thousands of threads are required.
|
|
240
252
|
"""
|
|
241
253
|
|
|
242
254
|
import anyio
|
|
243
255
|
import datetime
|
|
244
256
|
from typing import Annotated
|
|
245
|
-
from pydantic import BaseModel
|
|
246
|
-
from scruby import Scruby,
|
|
257
|
+
from pydantic import BaseModel, Field
|
|
258
|
+
from scruby import Scruby, settings
|
|
247
259
|
from pprint import pprint as pp
|
|
248
260
|
|
|
249
|
-
|
|
250
|
-
|
|
261
|
+
settings.DB_ROOT = "ScrubyDB" # By default = "ScrubyDB"
|
|
262
|
+
settings.HASH_REDUCE_LEFT = 6 # By default = 6
|
|
263
|
+
settings.MAX_WORKERS = None # By default = None
|
|
251
264
|
|
|
252
265
|
|
|
253
266
|
class Car(BaseModel):
|
|
254
267
|
"""Car model."""
|
|
255
|
-
brand: str
|
|
256
|
-
model: str
|
|
257
|
-
year: int
|
|
258
|
-
power_reserve: int
|
|
268
|
+
brand: str = Field(strict=True, frozen=True)
|
|
269
|
+
model: str = Field(strict=True, frozen=True)
|
|
270
|
+
year: int = Field(strict=True)
|
|
271
|
+
power_reserve: int = Field(strict=True)
|
|
272
|
+
# The key is always at the bottom
|
|
273
|
+
key: str = Field(
|
|
274
|
+
strict=True,
|
|
275
|
+
frozen=True,
|
|
276
|
+
default_factory=lambda data: f"{data['brand']}:{data['model']}",
|
|
277
|
+
)
|
|
259
278
|
|
|
260
279
|
|
|
261
280
|
async def main() -> None:
|
|
262
281
|
"""Example."""
|
|
263
|
-
# Get collection
|
|
264
|
-
car_coll = await Scruby.
|
|
282
|
+
# Get collection `Car`.
|
|
283
|
+
car_coll = await Scruby.collection(Car)
|
|
265
284
|
|
|
266
285
|
# Create cars.
|
|
267
|
-
for
|
|
286
|
+
for num in range(1, 10):
|
|
268
287
|
car = Car(
|
|
269
288
|
brand="Mazda",
|
|
270
289
|
model=f"EZ-6 {num}",
|
|
271
290
|
year=2025,
|
|
272
291
|
power_reserve=600,
|
|
273
292
|
)
|
|
274
|
-
|
|
275
|
-
await car_coll.add_key(key, car)
|
|
293
|
+
await car_coll.add_doc(car)
|
|
276
294
|
|
|
277
295
|
# Find cars by brand and year.
|
|
278
296
|
car_list: list[Car] | None = await car_coll.find_many(
|
|
279
|
-
filter_fn=lambda doc: doc.brand == "Mazda"
|
|
297
|
+
filter_fn=lambda doc: doc.brand == "Mazda" and doc.year == 2025,
|
|
280
298
|
)
|
|
281
299
|
if car_list is not None:
|
|
282
300
|
pp(car_list)
|
|
283
301
|
else:
|
|
284
302
|
print("No cars!")
|
|
285
303
|
|
|
286
|
-
#
|
|
287
|
-
|
|
288
|
-
|
|
304
|
+
# Find all cars.
|
|
305
|
+
car_list: list[Car] | None = await car_coll.find_many()
|
|
306
|
+
if car_list is not None:
|
|
307
|
+
pp(car_list)
|
|
308
|
+
else:
|
|
309
|
+
print("No cars!")
|
|
310
|
+
|
|
311
|
+
# For pagination output.
|
|
312
|
+
car_list: list[Car] | None = await car_coll.find_many(
|
|
313
|
+
filter_fn=lambda doc: doc.brand == "Mazda",
|
|
314
|
+
limit_docs=5,
|
|
315
|
+
page_number=2,
|
|
316
|
+
)
|
|
317
|
+
if car_list is not None:
|
|
318
|
+
pp(car_list)
|
|
319
|
+
else:
|
|
320
|
+
print("No cars!")
|
|
289
321
|
|
|
290
322
|
# Full database deletion.
|
|
291
323
|
# Hint: The main purpose is tests.
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
scruby/__init__.py,sha256=bw2Le5ULYlf2nFQT_617rmEumu66Ll-QCLCxqDFERWw,1014
|
|
2
|
+
scruby/aggregation.py,sha256=bd70J1Xye6faNHD8LS3lVQoHWKtPdPV_cqT_i7oui38,3491
|
|
3
|
+
scruby/db.py,sha256=djo4JkfuKCcV3jRbd2L3mIwENS3ptqJBt7SlAuiRhGY,6794
|
|
4
|
+
scruby/errors.py,sha256=D0jisudUsZk9iXp4nRSymaSMwyqHPVshsSlxx4HDVVk,1297
|
|
5
|
+
scruby/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
scruby/settings.py,sha256=_uVdZIGWoi6q9zcu0c2PS51OBEBNASRRrxfzaF7Nwy0,1580
|
|
7
|
+
scruby/mixins/__init__.py,sha256=XPMjJvOZN7dLpTE1FfGMBGQ_0421HXug-0rWKMU5fRQ,627
|
|
8
|
+
scruby/mixins/collection.py,sha256=coF-IOhicV_EihDwnYf6SW5Mfi3nOFR0gAhCc619NmI,1382
|
|
9
|
+
scruby/mixins/count.py,sha256=PGzRtgLvseQnHg6wt-A81s30pnsdY1d8cr-EQRnbfyU,2050
|
|
10
|
+
scruby/mixins/custom_task.py,sha256=ZhvCDiYnJ8BTIWlnRu6cTH-9G9o7dHSixjMIsxAtDpw,2316
|
|
11
|
+
scruby/mixins/delete.py,sha256=B2loiowj8ToO0euumDRxpHUVrLQx0iTcRys0jszn-rA,3046
|
|
12
|
+
scruby/mixins/find.py,sha256=gnHjnm0MZbzMHmWOBUJbMm8LZFBqdJ6yWA6Pxfap51Q,5340
|
|
13
|
+
scruby/mixins/keys.py,sha256=OUByWbHfNVWJVkUrhCsJZdVqf0zez_an6Gti2n5iKnM,5671
|
|
14
|
+
scruby/mixins/update.py,sha256=YkUiz1gcVtNXdgf7Mmda-0g3vJm3jL09v-msGy2tAWg,3229
|
|
15
|
+
scruby-0.28.3.dist-info/METADATA,sha256=DoDzTEj_wrZu3EfAhBcKBtCfbr9pb74FS3spyDpFfzY,10849
|
|
16
|
+
scruby-0.28.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
17
|
+
scruby-0.28.3.dist-info/licenses/LICENSE,sha256=mS0Wz0yGNB63gEcWEnuIb_lldDYV0sjRaO-o_GL6CWE,1074
|
|
18
|
+
scruby-0.28.3.dist-info/RECORD,,
|
scruby-0.24.4.dist-info/RECORD
DELETED
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
scruby/__init__.py,sha256=elrW_AWMyl3kuTpEqGPaYFSpF8iVzjpivF6MxVNlqoQ,855
|
|
2
|
-
scruby/aggregation.py,sha256=SYGcnMy2eq9vJb-pW3xR9LLAQIQ55TK-LGW_yKQ-7sU,3318
|
|
3
|
-
scruby/constants.py,sha256=KInSZ_4dsQNXilrs7DvtQXevKEYibnNzl69a7XiWG4k,1099
|
|
4
|
-
scruby/db.py,sha256=ggYW4dQPtr7m9-GM4QeYMMDZm5eUYN5bTAdz2Tj0hlw,5980
|
|
5
|
-
scruby/errors.py,sha256=aj1zQlfxGwZC-bZZ07DRX2vHx31SpyWPqXHMpQ9kRVY,1124
|
|
6
|
-
scruby/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
-
scruby/mixins/__init__.py,sha256=-rRZE-JZwGmEkC0wS_X0hs8OXsEYyvgSNIfil8wmjFA,454
|
|
8
|
-
scruby/mixins/collection.py,sha256=eMnfHFdzk7LWILMmDbzugcOYSeIKp0DlEEqCmmGQRwA,1222
|
|
9
|
-
scruby/mixins/count.py,sha256=Wcn6CeWrYSgsTTmYQ4J-CEiM4630rUSwRP9iKwbCl6c,2193
|
|
10
|
-
scruby/mixins/custom_task.py,sha256=Ib1G1I7NyDGbow4SeafkYd9C0r6u6EDgUK0NxjhsEa0,2297
|
|
11
|
-
scruby/mixins/delete.py,sha256=BmfQH68iX7kzC20w16xzFcLO3uLxYKdNyqZqIbXb1M0,3240
|
|
12
|
-
scruby/mixins/find.py,sha256=va1hTm6Poua7_TMcZW2iqI-xmL1HcCUOx8pkKvTvu6U,5063
|
|
13
|
-
scruby/mixins/keys.py,sha256=Hbb0AX68ph--fA43AXDWoM72PzSmS48h3iVwlQwQH0c,4971
|
|
14
|
-
scruby/mixins/update.py,sha256=A9V4PjA3INnqLTGoBxIvC8y8Wo-nLxlFejkPUhsebzQ,3428
|
|
15
|
-
scruby-0.24.4.dist-info/METADATA,sha256=RDE0Fa_IXd2hx2MDjdsI-5iwGhZOCZ9zOqqQuOe8k5g,9643
|
|
16
|
-
scruby-0.24.4.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
17
|
-
scruby-0.24.4.dist-info/licenses/LICENSE,sha256=mS0Wz0yGNB63gEcWEnuIb_lldDYV0sjRaO-o_GL6CWE,1074
|
|
18
|
-
scruby-0.24.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|