scruby 0.10.4__py3-none-any.whl → 0.26.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scruby might be problematic. Click here for more details.
- scruby/__init__.py +43 -29
- scruby/aggregation.py +148 -0
- scruby/constants.py +33 -31
- scruby/db.py +204 -437
- scruby/errors.py +41 -23
- scruby/mixins/__init__.py +21 -0
- scruby/mixins/collection.py +49 -0
- scruby/mixins/count.py +64 -0
- scruby/mixins/custom_task.py +76 -0
- scruby/mixins/delete.py +101 -0
- scruby/mixins/docs.py +166 -0
- scruby/mixins/find.py +146 -0
- scruby/mixins/update.py +104 -0
- {scruby-0.10.4.dist-info → scruby-0.26.0.dist-info}/METADATA +127 -105
- scruby-0.26.0.dist-info/RECORD +18 -0
- {scruby-0.10.4.dist-info → scruby-0.26.0.dist-info}/WHEEL +1 -1
- {scruby-0.10.4.dist-info → scruby-0.26.0.dist-info}/licenses/LICENSE +21 -21
- scruby-0.10.4.dist-info/RECORD +0 -9
scruby/errors.py
CHANGED
|
@@ -1,23 +1,41 @@
|
|
|
1
|
-
"""Scruby Exceptions."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
__all__ = (
|
|
6
|
-
"ScrubyException",
|
|
7
|
-
"MetadataValueError",
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
1
|
+
"""Scruby Exceptions."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
__all__ = (
|
|
6
|
+
"ScrubyException",
|
|
7
|
+
"MetadataValueError",
|
|
8
|
+
"KeyAlreadyExistsError",
|
|
9
|
+
"KeyNotExistsError",
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ScrubyException(Exception):
|
|
14
|
+
"""Root Custom Exception."""
|
|
15
|
+
|
|
16
|
+
def __init__(self, *args, **kwargs) -> None: # type: ignore[no-untyped-def] # noqa: D107
|
|
17
|
+
super().__init__(*args, **kwargs)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class MetadataValueError(ScrubyException):
|
|
21
|
+
"""Exception is raised if value of variable in metadata does not matching expected."""
|
|
22
|
+
|
|
23
|
+
def __init__(self, message: str) -> None: # noqa: D107
|
|
24
|
+
self.message = message
|
|
25
|
+
super().__init__(self.message)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class KeyAlreadyExistsError(ScrubyException):
|
|
29
|
+
"""Exception is raised if the key already exists."""
|
|
30
|
+
|
|
31
|
+
def __init__(self) -> None: # noqa: D107
|
|
32
|
+
self.message = "The key already exists."
|
|
33
|
+
super().__init__(self.message)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class KeyNotExistsError(ScrubyException):
|
|
37
|
+
"""Exception is raised If the key is not exists."""
|
|
38
|
+
|
|
39
|
+
def __init__(self) -> None: # noqa: D107
|
|
40
|
+
self.message = "The key not exists."
|
|
41
|
+
super().__init__(self.message)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Mixins."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
__all__ = (
|
|
6
|
+
"Collection",
|
|
7
|
+
"Count",
|
|
8
|
+
"CustomTask",
|
|
9
|
+
"Delete",
|
|
10
|
+
"Find",
|
|
11
|
+
"Docs",
|
|
12
|
+
"Update",
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
from scruby.mixins.collection import Collection
|
|
16
|
+
from scruby.mixins.count import Count
|
|
17
|
+
from scruby.mixins.custom_task import CustomTask
|
|
18
|
+
from scruby.mixins.delete import Delete
|
|
19
|
+
from scruby.mixins.docs import Docs
|
|
20
|
+
from scruby.mixins.find import Find
|
|
21
|
+
from scruby.mixins.update import Update
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""Methods for working with collections."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
__all__ = ("Collection",)
|
|
6
|
+
|
|
7
|
+
from shutil import rmtree
|
|
8
|
+
from typing import TypeVar
|
|
9
|
+
|
|
10
|
+
from anyio import Path, to_thread
|
|
11
|
+
|
|
12
|
+
from scruby import constants
|
|
13
|
+
|
|
14
|
+
T = TypeVar("T")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Collection[T]:
|
|
18
|
+
"""Methods for working with collections."""
|
|
19
|
+
|
|
20
|
+
def collection_name(self) -> str:
|
|
21
|
+
"""Get collection name.
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
Collection name.
|
|
25
|
+
"""
|
|
26
|
+
return self._class_model.__name__
|
|
27
|
+
|
|
28
|
+
@staticmethod
|
|
29
|
+
async def collection_list() -> list[str]:
|
|
30
|
+
"""Get collection list."""
|
|
31
|
+
target_directory = Path(constants.DB_ROOT)
|
|
32
|
+
# Get all entries in the directory
|
|
33
|
+
all_entries = Path.iterdir(target_directory)
|
|
34
|
+
directory_names: list[str] = [entry.name async for entry in all_entries]
|
|
35
|
+
return directory_names
|
|
36
|
+
|
|
37
|
+
@staticmethod
|
|
38
|
+
async def delete_collection(name: str) -> None:
|
|
39
|
+
"""Asynchronous method for deleting a collection by its name.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
name (str): Collection name.
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
None.
|
|
46
|
+
"""
|
|
47
|
+
target_directory = f"{constants.DB_ROOT}/{name}"
|
|
48
|
+
await to_thread.run_sync(rmtree, target_directory) # pyrefly: ignore[bad-argument-type]
|
|
49
|
+
return
|
scruby/mixins/count.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""Methods for counting the number of documents."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
__all__ = ("Count",)
|
|
6
|
+
|
|
7
|
+
import concurrent.futures
|
|
8
|
+
from collections.abc import Callable
|
|
9
|
+
from typing import TypeVar
|
|
10
|
+
|
|
11
|
+
T = TypeVar("T")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Count[T]:
|
|
15
|
+
"""Methods for counting the number of documents."""
|
|
16
|
+
|
|
17
|
+
async def estimated_document_count(self) -> int:
|
|
18
|
+
"""Get an estimate of the number of documents in this collection using collection metadata.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
The number of documents.
|
|
22
|
+
"""
|
|
23
|
+
meta = await self.get_meta()
|
|
24
|
+
return meta.counter_documents
|
|
25
|
+
|
|
26
|
+
async def count_documents(
|
|
27
|
+
self,
|
|
28
|
+
filter_fn: Callable,
|
|
29
|
+
max_workers: int | None = None,
|
|
30
|
+
) -> int:
|
|
31
|
+
"""Count the number of documents a matching the filter in this collection.
|
|
32
|
+
|
|
33
|
+
The search is based on the effect of a quantum loop.
|
|
34
|
+
The search effectiveness depends on the number of processor threads.
|
|
35
|
+
Ideally, hundreds and even thousands of threads are required.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
filter_fn: A function that execute the conditions of filtering.
|
|
39
|
+
max_workers: The maximum number of processes that can be used to
|
|
40
|
+
execute the given calls. If None or not given then as many
|
|
41
|
+
worker processes will be created as the machine has processors.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
The number of documents.
|
|
45
|
+
"""
|
|
46
|
+
branch_numbers: range = range(1, self._max_branch_number)
|
|
47
|
+
search_task_fn: Callable = self._task_find
|
|
48
|
+
hash_reduce_left: int = self._hash_reduce_left
|
|
49
|
+
db_root: str = self._db_root
|
|
50
|
+
class_model: T = self._class_model
|
|
51
|
+
counter: int = 0
|
|
52
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
|
|
53
|
+
for branch_number in branch_numbers:
|
|
54
|
+
future = executor.submit(
|
|
55
|
+
search_task_fn,
|
|
56
|
+
branch_number,
|
|
57
|
+
filter_fn,
|
|
58
|
+
hash_reduce_left,
|
|
59
|
+
db_root,
|
|
60
|
+
class_model,
|
|
61
|
+
)
|
|
62
|
+
if await future.result() is not None:
|
|
63
|
+
counter += 1
|
|
64
|
+
return counter
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""Quantum methods for running custom tasks."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
__all__ = ("CustomTask",)
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from collections.abc import Callable
|
|
9
|
+
from typing import Any, TypeVar
|
|
10
|
+
|
|
11
|
+
import orjson
|
|
12
|
+
from anyio import Path
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
T = TypeVar("T")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class CustomTask[T]:
|
|
20
|
+
"""Quantum methods for running custom tasks."""
|
|
21
|
+
|
|
22
|
+
@staticmethod
|
|
23
|
+
async def _task_get_docs(
|
|
24
|
+
branch_number: int,
|
|
25
|
+
hash_reduce_left: int,
|
|
26
|
+
db_root: str,
|
|
27
|
+
class_model: T,
|
|
28
|
+
) -> list[Any]:
|
|
29
|
+
"""Get documents for custom task.
|
|
30
|
+
|
|
31
|
+
This method is for internal use.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
List of documents.
|
|
35
|
+
"""
|
|
36
|
+
branch_number_as_hash: str = f"{branch_number:08x}"[hash_reduce_left:]
|
|
37
|
+
separated_hash: str = "/".join(list(branch_number_as_hash))
|
|
38
|
+
leaf_path: Path = Path(
|
|
39
|
+
*(
|
|
40
|
+
db_root,
|
|
41
|
+
class_model.__name__,
|
|
42
|
+
separated_hash,
|
|
43
|
+
"leaf.json",
|
|
44
|
+
),
|
|
45
|
+
)
|
|
46
|
+
docs: list[Any] = []
|
|
47
|
+
if await leaf_path.exists():
|
|
48
|
+
data_json: bytes = await leaf_path.read_bytes()
|
|
49
|
+
data: dict[str, str] = orjson.loads(data_json) or {}
|
|
50
|
+
for _, val in data.items():
|
|
51
|
+
docs.append(class_model.model_validate_json(val))
|
|
52
|
+
return docs
|
|
53
|
+
|
|
54
|
+
async def run_custom_task(self, custom_task_fn: Callable, limit_docs: int = 1000) -> Any:
|
|
55
|
+
"""Running custom task.
|
|
56
|
+
|
|
57
|
+
This method running a task created on the basis of a quantum loop.
|
|
58
|
+
Effectiveness running task depends on the number of processor threads.
|
|
59
|
+
Ideally, hundreds and even thousands of threads are required.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
custom_task_fn: A function that execute the custom task.
|
|
63
|
+
limit_docs: Limiting the number of documents. By default = 1000.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
The result of a custom task.
|
|
67
|
+
"""
|
|
68
|
+
kwargs = {
|
|
69
|
+
"get_docs_fn": self._task_get_docs,
|
|
70
|
+
"branch_numbers": range(1, self._max_branch_number),
|
|
71
|
+
"hash_reduce_left": self._hash_reduce_left,
|
|
72
|
+
"db_root": self._db_root,
|
|
73
|
+
"class_model": self._class_model,
|
|
74
|
+
"limit_docs": limit_docs,
|
|
75
|
+
}
|
|
76
|
+
return await custom_task_fn(**kwargs)
|
scruby/mixins/delete.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"""Methods for deleting documents."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
__all__ = ("Delete",)
|
|
6
|
+
|
|
7
|
+
import concurrent.futures
|
|
8
|
+
import logging
|
|
9
|
+
from collections.abc import Callable
|
|
10
|
+
from typing import TypeVar
|
|
11
|
+
|
|
12
|
+
import orjson
|
|
13
|
+
from anyio import Path
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
T = TypeVar("T")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Delete[T]:
|
|
21
|
+
"""Methods for deleting documents."""
|
|
22
|
+
|
|
23
|
+
@staticmethod
|
|
24
|
+
async def _task_delete(
|
|
25
|
+
branch_number: int,
|
|
26
|
+
filter_fn: Callable,
|
|
27
|
+
hash_reduce_left: int,
|
|
28
|
+
db_root: str,
|
|
29
|
+
class_model: T,
|
|
30
|
+
) -> int:
|
|
31
|
+
"""Task for find and delete documents.
|
|
32
|
+
|
|
33
|
+
This method is for internal use.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
The number of deleted documents.
|
|
37
|
+
"""
|
|
38
|
+
branch_number_as_hash: str = f"{branch_number:08x}"[hash_reduce_left:]
|
|
39
|
+
separated_hash: str = "/".join(list(branch_number_as_hash))
|
|
40
|
+
leaf_path: Path = Path(
|
|
41
|
+
*(
|
|
42
|
+
db_root,
|
|
43
|
+
class_model.__name__,
|
|
44
|
+
separated_hash,
|
|
45
|
+
"leaf.json",
|
|
46
|
+
),
|
|
47
|
+
)
|
|
48
|
+
counter: int = 0
|
|
49
|
+
if await leaf_path.exists():
|
|
50
|
+
data_json: bytes = await leaf_path.read_bytes()
|
|
51
|
+
data: dict[str, str] = orjson.loads(data_json) or {}
|
|
52
|
+
new_state: dict[str, str] = {}
|
|
53
|
+
for key, val in data.items():
|
|
54
|
+
doc = class_model.model_validate_json(val)
|
|
55
|
+
if filter_fn(doc):
|
|
56
|
+
counter -= 1
|
|
57
|
+
else:
|
|
58
|
+
new_state[key] = val
|
|
59
|
+
await leaf_path.write_bytes(orjson.dumps(new_state))
|
|
60
|
+
return counter
|
|
61
|
+
|
|
62
|
+
async def delete_many(
|
|
63
|
+
self,
|
|
64
|
+
filter_fn: Callable,
|
|
65
|
+
max_workers: int | None = None,
|
|
66
|
+
) -> int:
|
|
67
|
+
"""Delete one or more documents matching the filter.
|
|
68
|
+
|
|
69
|
+
The search is based on the effect of a quantum loop.
|
|
70
|
+
The search effectiveness depends on the number of processor threads.
|
|
71
|
+
Ideally, hundreds and even thousands of threads are required.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
filter_fn: A function that execute the conditions of filtering.
|
|
75
|
+
max_workers: The maximum number of processes that can be used to
|
|
76
|
+
execute the given calls. If None or not given then as many
|
|
77
|
+
worker processes will be created as the machine has processors.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
The number of deleted documents.
|
|
81
|
+
"""
|
|
82
|
+
branch_numbers: range = range(1, self._max_branch_number)
|
|
83
|
+
search_task_fn: Callable = self._task_delete
|
|
84
|
+
hash_reduce_left: int = self._hash_reduce_left
|
|
85
|
+
db_root: str = self._db_root
|
|
86
|
+
class_model: T = self._class_model
|
|
87
|
+
counter: int = 0
|
|
88
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
|
|
89
|
+
for branch_number in branch_numbers:
|
|
90
|
+
future = executor.submit(
|
|
91
|
+
search_task_fn,
|
|
92
|
+
branch_number,
|
|
93
|
+
filter_fn,
|
|
94
|
+
hash_reduce_left,
|
|
95
|
+
db_root,
|
|
96
|
+
class_model,
|
|
97
|
+
)
|
|
98
|
+
counter += await future.result()
|
|
99
|
+
if counter < 0:
|
|
100
|
+
await self._counter_documents(counter)
|
|
101
|
+
return abs(counter)
|
scruby/mixins/docs.py
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
"""Methods for working with keys."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
__all__ = ("Docs",)
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
import orjson
|
|
11
|
+
|
|
12
|
+
from scruby.errors import (
|
|
13
|
+
KeyAlreadyExistsError,
|
|
14
|
+
KeyNotExistsError,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Docs:
|
|
21
|
+
"""Methods for working with document."""
|
|
22
|
+
|
|
23
|
+
async def add_doc(self, doc: Any) -> None:
|
|
24
|
+
"""Asynchronous method for adding document to collection.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
doc: Value of key. Type, derived from `BaseModel`.
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
None.
|
|
31
|
+
"""
|
|
32
|
+
# Check if the Model matches the collection
|
|
33
|
+
if not isinstance(doc, self._class_model):
|
|
34
|
+
doc_class_name = doc.__class__.__name__
|
|
35
|
+
collection_name = self._class_model.__name__
|
|
36
|
+
msg = (
|
|
37
|
+
f"(add_doc) Parameter `doc` => Model `{doc_class_name}` does not match collection `{collection_name}`!"
|
|
38
|
+
)
|
|
39
|
+
logger.error(msg)
|
|
40
|
+
raise TypeError(msg)
|
|
41
|
+
# The path to cell of collection.
|
|
42
|
+
leaf_path, prepared_key = await self._get_leaf_path(doc.key)
|
|
43
|
+
doc_json: str = doc.model_dump_json()
|
|
44
|
+
# Write key-value to collection.
|
|
45
|
+
if await leaf_path.exists():
|
|
46
|
+
# Add new key.
|
|
47
|
+
data_json: bytes = await leaf_path.read_bytes()
|
|
48
|
+
data: dict = orjson.loads(data_json) or {}
|
|
49
|
+
try:
|
|
50
|
+
data[prepared_key]
|
|
51
|
+
except KeyError:
|
|
52
|
+
data[prepared_key] = doc_json
|
|
53
|
+
await leaf_path.write_bytes(orjson.dumps(data))
|
|
54
|
+
else:
|
|
55
|
+
err = KeyAlreadyExistsError()
|
|
56
|
+
logger.error(err.message)
|
|
57
|
+
raise err
|
|
58
|
+
else:
|
|
59
|
+
# Add new document to a blank leaf.
|
|
60
|
+
await leaf_path.write_bytes(orjson.dumps({prepared_key: doc_json}))
|
|
61
|
+
await self._counter_documents(1)
|
|
62
|
+
|
|
63
|
+
async def update_doc(self, doc: Any) -> None:
|
|
64
|
+
"""Asynchronous method for updating key to collection.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
doc: Value of key. Type `BaseModel`.
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
None.
|
|
71
|
+
"""
|
|
72
|
+
# Check if the Model matches the collection
|
|
73
|
+
if not isinstance(doc, self._class_model):
|
|
74
|
+
doc_class_name = doc.__class__.__name__
|
|
75
|
+
collection_name = self._class_model.__name__
|
|
76
|
+
msg = (
|
|
77
|
+
f"(update_doc) Parameter `doc` => Model `{doc_class_name}` "
|
|
78
|
+
f"does not match collection `{collection_name}`!"
|
|
79
|
+
)
|
|
80
|
+
logger.error(msg)
|
|
81
|
+
raise TypeError(msg)
|
|
82
|
+
# The path to cell of collection.
|
|
83
|
+
leaf_path, prepared_key = await self._get_leaf_path(doc.key)
|
|
84
|
+
doc_json: str = doc.model_dump_json()
|
|
85
|
+
# Update the existing key.
|
|
86
|
+
if await leaf_path.exists():
|
|
87
|
+
# Update the existing key.
|
|
88
|
+
data_json: bytes = await leaf_path.read_bytes()
|
|
89
|
+
data: dict = orjson.loads(data_json) or {}
|
|
90
|
+
try:
|
|
91
|
+
data[prepared_key]
|
|
92
|
+
data[prepared_key] = doc_json
|
|
93
|
+
await leaf_path.write_bytes(orjson.dumps(data))
|
|
94
|
+
except KeyError:
|
|
95
|
+
err = KeyNotExistsError()
|
|
96
|
+
logger.error(err.message)
|
|
97
|
+
raise err from None
|
|
98
|
+
else:
|
|
99
|
+
logger.error("The key not exists.")
|
|
100
|
+
raise KeyError()
|
|
101
|
+
|
|
102
|
+
async def get_key(self, key: str) -> Any:
|
|
103
|
+
"""Asynchronous method for getting value of key from collection.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
key: Key name.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
Value of key or KeyError.
|
|
110
|
+
"""
|
|
111
|
+
# The path to the database cell.
|
|
112
|
+
leaf_path, prepared_key = await self._get_leaf_path(key)
|
|
113
|
+
# Get value of key.
|
|
114
|
+
if await leaf_path.exists():
|
|
115
|
+
data_json: bytes = await leaf_path.read_bytes()
|
|
116
|
+
data: dict = orjson.loads(data_json) or {}
|
|
117
|
+
obj: Any = self._class_model.model_validate_json(data[prepared_key])
|
|
118
|
+
return obj
|
|
119
|
+
msg: str = "`get_key` - The unacceptable key value."
|
|
120
|
+
logger.error(msg)
|
|
121
|
+
raise KeyError()
|
|
122
|
+
|
|
123
|
+
async def has_key(self, key: str) -> bool:
|
|
124
|
+
"""Asynchronous method for checking presence of key in collection.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
key: Key name.
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
True, if the key is present.
|
|
131
|
+
"""
|
|
132
|
+
# Get path to cell of collection.
|
|
133
|
+
leaf_path, prepared_key = await self._get_leaf_path(key)
|
|
134
|
+
# Checking whether there is a key.
|
|
135
|
+
if await leaf_path.exists():
|
|
136
|
+
data_json: bytes = await leaf_path.read_bytes()
|
|
137
|
+
data: dict = orjson.loads(data_json) or {}
|
|
138
|
+
try:
|
|
139
|
+
data[prepared_key]
|
|
140
|
+
return True
|
|
141
|
+
except KeyError:
|
|
142
|
+
return False
|
|
143
|
+
return False
|
|
144
|
+
|
|
145
|
+
async def delete_key(self, key: str) -> None:
|
|
146
|
+
"""Asynchronous method for deleting key from collection.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
key: Key name.
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
None.
|
|
153
|
+
"""
|
|
154
|
+
# The path to the database cell.
|
|
155
|
+
leaf_path, prepared_key = await self._get_leaf_path(key)
|
|
156
|
+
# Deleting key.
|
|
157
|
+
if await leaf_path.exists():
|
|
158
|
+
data_json: bytes = await leaf_path.read_bytes()
|
|
159
|
+
data: dict = orjson.loads(data_json) or {}
|
|
160
|
+
del data[prepared_key]
|
|
161
|
+
await leaf_path.write_bytes(orjson.dumps(data))
|
|
162
|
+
await self._counter_documents(-1)
|
|
163
|
+
return
|
|
164
|
+
msg: str = "`delete_key` - The unacceptable key value."
|
|
165
|
+
logger.error(msg)
|
|
166
|
+
raise KeyError()
|
scruby/mixins/find.py
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
"""Quantum methods for searching documents."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
__all__ = ("Find",)
|
|
6
|
+
|
|
7
|
+
import concurrent.futures
|
|
8
|
+
import logging
|
|
9
|
+
from collections.abc import Callable
|
|
10
|
+
from typing import TypeVar
|
|
11
|
+
|
|
12
|
+
import orjson
|
|
13
|
+
from anyio import Path
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
T = TypeVar("T")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Find[T]:
|
|
21
|
+
"""Quantum methods for searching documents."""
|
|
22
|
+
|
|
23
|
+
@staticmethod
|
|
24
|
+
async def _task_find(
|
|
25
|
+
branch_number: int,
|
|
26
|
+
filter_fn: Callable,
|
|
27
|
+
hash_reduce_left: str,
|
|
28
|
+
db_root: str,
|
|
29
|
+
class_model: T,
|
|
30
|
+
) -> list[T] | None:
|
|
31
|
+
"""Task for find documents.
|
|
32
|
+
|
|
33
|
+
This method is for internal use.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
List of documents or None.
|
|
37
|
+
"""
|
|
38
|
+
branch_number_as_hash: str = f"{branch_number:08x}"[hash_reduce_left:]
|
|
39
|
+
separated_hash: str = "/".join(list(branch_number_as_hash))
|
|
40
|
+
leaf_path: Path = Path(
|
|
41
|
+
*(
|
|
42
|
+
db_root,
|
|
43
|
+
class_model.__name__,
|
|
44
|
+
separated_hash,
|
|
45
|
+
"leaf.json",
|
|
46
|
+
),
|
|
47
|
+
)
|
|
48
|
+
docs: list[T] = []
|
|
49
|
+
if await leaf_path.exists():
|
|
50
|
+
data_json: bytes = await leaf_path.read_bytes()
|
|
51
|
+
data: dict[str, str] = orjson.loads(data_json) or {}
|
|
52
|
+
for _, val in data.items():
|
|
53
|
+
doc = class_model.model_validate_json(val)
|
|
54
|
+
if filter_fn(doc):
|
|
55
|
+
docs.append(doc)
|
|
56
|
+
return docs or None
|
|
57
|
+
|
|
58
|
+
async def find_one(
|
|
59
|
+
self,
|
|
60
|
+
filter_fn: Callable,
|
|
61
|
+
max_workers: int | None = None,
|
|
62
|
+
) -> T | None:
|
|
63
|
+
"""Finds a single document matching the filter.
|
|
64
|
+
|
|
65
|
+
The search is based on the effect of a quantum loop.
|
|
66
|
+
The search effectiveness depends on the number of processor threads.
|
|
67
|
+
Ideally, hundreds and even thousands of threads are required.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
filter_fn: A function that execute the conditions of filtering.
|
|
71
|
+
max_workers: The maximum number of processes that can be used to
|
|
72
|
+
execute the given calls. If None or not given then as many
|
|
73
|
+
worker processes will be created as the machine has processors.
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
Document or None.
|
|
77
|
+
"""
|
|
78
|
+
branch_numbers: range = range(1, self._max_branch_number)
|
|
79
|
+
search_task_fn: Callable = self._task_find
|
|
80
|
+
hash_reduce_left: int = self._hash_reduce_left
|
|
81
|
+
db_root: str = self._db_root
|
|
82
|
+
class_model: T = self._class_model
|
|
83
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
|
|
84
|
+
for branch_number in branch_numbers:
|
|
85
|
+
future = executor.submit(
|
|
86
|
+
search_task_fn,
|
|
87
|
+
branch_number,
|
|
88
|
+
filter_fn,
|
|
89
|
+
hash_reduce_left,
|
|
90
|
+
db_root,
|
|
91
|
+
class_model,
|
|
92
|
+
)
|
|
93
|
+
docs = await future.result()
|
|
94
|
+
if docs is not None:
|
|
95
|
+
return docs[0]
|
|
96
|
+
return None
|
|
97
|
+
|
|
98
|
+
async def find_many(
|
|
99
|
+
self,
|
|
100
|
+
filter_fn: Callable,
|
|
101
|
+
limit_docs: int = 1000,
|
|
102
|
+
max_workers: int | None = None,
|
|
103
|
+
) -> list[T] | None:
|
|
104
|
+
"""Finds one or more documents matching the filter.
|
|
105
|
+
|
|
106
|
+
The search is based on the effect of a quantum loop.
|
|
107
|
+
The search effectiveness depends on the number of processor threads.
|
|
108
|
+
Ideally, hundreds and even thousands of threads are required.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
filter_fn: A function that execute the conditions of filtering.
|
|
112
|
+
limit_docs: Limiting the number of documents. By default = 1000.
|
|
113
|
+
max_workers: The maximum number of processes that can be used to
|
|
114
|
+
execute the given calls. If None or not given then as many
|
|
115
|
+
worker processes will be created as the machine has processors.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
List of documents or None.
|
|
119
|
+
"""
|
|
120
|
+
branch_numbers: range = range(1, self._max_branch_number)
|
|
121
|
+
search_task_fn: Callable = self._task_find
|
|
122
|
+
hash_reduce_left: int = self._hash_reduce_left
|
|
123
|
+
db_root: str = self._db_root
|
|
124
|
+
class_model: T = self._class_model
|
|
125
|
+
counter: int = 0
|
|
126
|
+
result: list[T] = []
|
|
127
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
|
|
128
|
+
for branch_number in branch_numbers:
|
|
129
|
+
if counter >= limit_docs:
|
|
130
|
+
return result[:limit_docs]
|
|
131
|
+
future = executor.submit(
|
|
132
|
+
search_task_fn,
|
|
133
|
+
branch_number,
|
|
134
|
+
filter_fn,
|
|
135
|
+
hash_reduce_left,
|
|
136
|
+
db_root,
|
|
137
|
+
class_model,
|
|
138
|
+
)
|
|
139
|
+
docs = await future.result()
|
|
140
|
+
if docs is not None:
|
|
141
|
+
for doc in docs:
|
|
142
|
+
if counter >= limit_docs:
|
|
143
|
+
return result[:limit_docs]
|
|
144
|
+
result.append(doc)
|
|
145
|
+
counter += 1
|
|
146
|
+
return result or None
|