scruby 0.10.3__py3-none-any.whl → 0.24.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scruby might be problematic. Click here for more details.

scruby/errors.py CHANGED
@@ -1,20 +1,41 @@
1
- """XLOT Exceptions."""
2
-
3
- from __future__ import annotations
4
-
5
- __all__ = ("MetadataValueError",)
6
-
7
-
8
- class ScrubyException(Exception):
9
- """Root Custom Exception."""
10
-
11
- def __init__(self, *args, **kwargs) -> None: # type: ignore[no-untyped-def]
12
- super().__init__(*args, **kwargs)
13
-
14
-
15
- class MetadataValueError(ScrubyException):
16
- """Exception is raised if value of variable in metadata does not matching expected."""
17
-
18
- def __init__(self, message: str) -> None: # noqa: D107
19
- self.message = message
20
- super().__init__(self.message)
1
+ """Scruby Exceptions."""
2
+
3
+ from __future__ import annotations
4
+
5
+ __all__ = (
6
+ "ScrubyException",
7
+ "MetadataValueError",
8
+ "KeyAlreadyExistsError",
9
+ "KeyNotExistsError",
10
+ )
11
+
12
+
13
+ class ScrubyException(Exception):
14
+ """Root Custom Exception."""
15
+
16
+ def __init__(self, *args, **kwargs) -> None: # type: ignore[no-untyped-def] # noqa: D107
17
+ super().__init__(*args, **kwargs)
18
+
19
+
20
+ class MetadataValueError(ScrubyException):
21
+ """Exception is raised if value of variable in metadata does not matching expected."""
22
+
23
+ def __init__(self, message: str) -> None: # noqa: D107
24
+ self.message = message
25
+ super().__init__(self.message)
26
+
27
+
28
+ class KeyAlreadyExistsError(ScrubyException):
29
+ """Exception is raised if the key already exists."""
30
+
31
+ def __init__(self) -> None: # noqa: D107
32
+ self.message = "The key already exists."
33
+ super().__init__(self.message)
34
+
35
+
36
+ class KeyNotExistsError(ScrubyException):
37
+ """Exception is raised If the key is not exists."""
38
+
39
+ def __init__(self) -> None: # noqa: D107
40
+ self.message = "The key not exists."
41
+ super().__init__(self.message)
@@ -0,0 +1,21 @@
1
+ """Mixins."""
2
+
3
+ from __future__ import annotations
4
+
5
+ __all__ = (
6
+ "Collection",
7
+ "Count",
8
+ "CustomTask",
9
+ "Delete",
10
+ "Find",
11
+ "Keys",
12
+ "Update",
13
+ )
14
+
15
+ from scruby.mixins.collection import Collection
16
+ from scruby.mixins.count import Count
17
+ from scruby.mixins.custom_task import CustomTask
18
+ from scruby.mixins.delete import Delete
19
+ from scruby.mixins.find import Find
20
+ from scruby.mixins.keys import Keys
21
+ from scruby.mixins.update import Update
@@ -0,0 +1,49 @@
1
+ """Methods for working with collections."""
2
+
3
+ from __future__ import annotations
4
+
5
+ __all__ = ("Collection",)
6
+
7
+ from shutil import rmtree
8
+ from typing import TypeVar
9
+
10
+ from anyio import Path, to_thread
11
+
12
+ from scruby import constants
13
+
14
+ T = TypeVar("T")
15
+
16
+
17
+ class Collection[T]:
18
+ """Methods for working with collections."""
19
+
20
+ def collection_name(self) -> str:
21
+ """Get collection name.
22
+
23
+ Returns:
24
+ Collection name.
25
+ """
26
+ return self._class_model.__name__
27
+
28
+ @staticmethod
29
+ async def collection_list() -> list[str]:
30
+ """Get collection list."""
31
+ target_directory = Path(constants.DB_ROOT)
32
+ # Get all entries in the directory
33
+ all_entries = Path.iterdir(target_directory)
34
+ directory_names: list[str] = [entry.name async for entry in all_entries]
35
+ return directory_names
36
+
37
+ @staticmethod
38
+ async def delete_collection(name: str) -> None:
39
+ """Asynchronous method for deleting a collection by its name.
40
+
41
+ Args:
42
+ name (str): Collection name.
43
+
44
+ Returns:
45
+ None.
46
+ """
47
+ target_directory = f"{constants.DB_ROOT}/{name}"
48
+ await to_thread.run_sync(rmtree, target_directory)
49
+ return
scruby/mixins/count.py ADDED
@@ -0,0 +1,64 @@
1
+ """Methods for counting the number of documents."""
2
+
3
+ from __future__ import annotations
4
+
5
+ __all__ = ("Count",)
6
+
7
+ import concurrent.futures
8
+ from collections.abc import Callable
9
+ from typing import TypeVar
10
+
11
+ T = TypeVar("T")
12
+
13
+
14
+ class Count[T]:
15
+ """Methods for counting the number of documents."""
16
+
17
+ async def estimated_document_count(self) -> int:
18
+ """Get an estimate of the number of documents in this collection using collection metadata.
19
+
20
+ Returns:
21
+ The number of documents.
22
+ """
23
+ meta = await self.get_meta()
24
+ return meta.counter_documents
25
+
26
+ async def count_documents(
27
+ self,
28
+ filter_fn: Callable,
29
+ max_workers: int | None = None,
30
+ ) -> int:
31
+ """Count the number of documents a matching the filter in this collection.
32
+
33
+ The search is based on the effect of a quantum loop.
34
+ The search effectiveness depends on the number of processor threads.
35
+ Ideally, hundreds and even thousands of threads are required.
36
+
37
+ Args:
38
+ filter_fn: A function that execute the conditions of filtering.
39
+ max_workers: The maximum number of processes that can be used to
40
+ execute the given calls. If None or not given then as many
41
+ worker processes will be created as the machine has processors.
42
+
43
+ Returns:
44
+ The number of documents.
45
+ """
46
+ branch_numbers: range = range(1, self._max_branch_number)
47
+ search_task_fn: Callable = self._task_find
48
+ hash_reduce_left: int = self._hash_reduce_left
49
+ db_root: str = self._db_root
50
+ class_model: T = self._class_model
51
+ counter: int = 0
52
+ with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
53
+ for branch_number in branch_numbers:
54
+ future = executor.submit(
55
+ search_task_fn,
56
+ branch_number,
57
+ filter_fn,
58
+ hash_reduce_left,
59
+ db_root,
60
+ class_model,
61
+ )
62
+ if await future.result() is not None:
63
+ counter += 1
64
+ return counter
@@ -0,0 +1,76 @@
1
+ """Quantum methods for running custom tasks."""
2
+
3
+ from __future__ import annotations
4
+
5
+ __all__ = ("CustomTask",)
6
+
7
+ import logging
8
+ from collections.abc import Callable
9
+ from typing import Any, TypeVar
10
+
11
+ import orjson
12
+ from anyio import Path
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ T = TypeVar("T")
17
+
18
+
19
+ class CustomTask[T]:
20
+ """Quantum methods for running custom tasks."""
21
+
22
+ @staticmethod
23
+ async def _task_get_docs(
24
+ branch_number: int,
25
+ hash_reduce_left: int,
26
+ db_root: str,
27
+ class_model: T,
28
+ ) -> list[Any]:
29
+ """Get documents for custom task.
30
+
31
+ This method is for internal use.
32
+
33
+ Returns:
34
+ List of documents.
35
+ """
36
+ branch_number_as_hash: str = f"{branch_number:08x}"[hash_reduce_left:]
37
+ separated_hash: str = "/".join(list(branch_number_as_hash))
38
+ leaf_path: Path = Path(
39
+ *(
40
+ db_root,
41
+ class_model.__name__,
42
+ separated_hash,
43
+ "leaf.json",
44
+ ),
45
+ )
46
+ docs: list[str, T] = []
47
+ if await leaf_path.exists():
48
+ data_json: bytes = await leaf_path.read_bytes()
49
+ data: dict[str, str] = orjson.loads(data_json) or {}
50
+ for _, val in data.items():
51
+ docs.append(class_model.model_validate_json(val))
52
+ return docs
53
+
54
+ async def run_custom_task(self, custom_task_fn: Callable, limit_docs: int = 1000) -> Any:
55
+ """Running custom task.
56
+
57
+ This method running a task created on the basis of a quantum loop.
58
+ Effectiveness running task depends on the number of processor threads.
59
+ Ideally, hundreds and even thousands of threads are required.
60
+
61
+ Args:
62
+ custom_task_fn: A function that execute the custom task.
63
+ limit_docs: Limiting the number of documents. By default = 1000.
64
+
65
+ Returns:
66
+ The result of a custom task.
67
+ """
68
+ kwargs = {
69
+ "get_docs_fn": self._task_get_docs,
70
+ "branch_numbers": range(1, self._max_branch_number),
71
+ "hash_reduce_left": self._hash_reduce_left,
72
+ "db_root": self._db_root,
73
+ "class_model": self._class_model,
74
+ "limit_docs": limit_docs,
75
+ }
76
+ return await custom_task_fn(**kwargs)
@@ -0,0 +1,101 @@
1
+ """Methods for deleting documents."""
2
+
3
+ from __future__ import annotations
4
+
5
+ __all__ = ("Delete",)
6
+
7
+ import concurrent.futures
8
+ import logging
9
+ from collections.abc import Callable
10
+ from typing import TypeVar
11
+
12
+ import orjson
13
+ from anyio import Path
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ T = TypeVar("T")
18
+
19
+
20
+ class Delete[T]:
21
+ """Methods for deleting documents."""
22
+
23
+ @staticmethod
24
+ async def _task_delete(
25
+ branch_number: int,
26
+ filter_fn: Callable,
27
+ hash_reduce_left: int,
28
+ db_root: str,
29
+ class_model: T,
30
+ ) -> int:
31
+ """Task for find and delete documents.
32
+
33
+ This method is for internal use.
34
+
35
+ Returns:
36
+ The number of deleted documents.
37
+ """
38
+ branch_number_as_hash: str = f"{branch_number:08x}"[hash_reduce_left:]
39
+ separated_hash: str = "/".join(list(branch_number_as_hash))
40
+ leaf_path: Path = Path(
41
+ *(
42
+ db_root,
43
+ class_model.__name__,
44
+ separated_hash,
45
+ "leaf.json",
46
+ ),
47
+ )
48
+ counter: int = 0
49
+ if await leaf_path.exists():
50
+ data_json: bytes = await leaf_path.read_bytes()
51
+ data: dict[str, str] = orjson.loads(data_json) or {}
52
+ new_state: dict[str, str] = {}
53
+ for key, val in data.items():
54
+ doc = class_model.model_validate_json(val)
55
+ if filter_fn(doc):
56
+ counter -= 1
57
+ else:
58
+ new_state[key] = val
59
+ await leaf_path.write_bytes(orjson.dumps(new_state))
60
+ return counter
61
+
62
+ async def delete_many(
63
+ self,
64
+ filter_fn: Callable,
65
+ max_workers: int | None = None,
66
+ ) -> int:
67
+ """Delete one or more documents matching the filter.
68
+
69
+ The search is based on the effect of a quantum loop.
70
+ The search effectiveness depends on the number of processor threads.
71
+ Ideally, hundreds and even thousands of threads are required.
72
+
73
+ Args:
74
+ filter_fn: A function that execute the conditions of filtering.
75
+ max_workers: The maximum number of processes that can be used to
76
+ execute the given calls. If None or not given then as many
77
+ worker processes will be created as the machine has processors.
78
+
79
+ Returns:
80
+ The number of deleted documents.
81
+ """
82
+ branch_numbers: range = range(1, self._max_branch_number)
83
+ search_task_fn: Callable = self._task_delete
84
+ hash_reduce_left: int = self._hash_reduce_left
85
+ db_root: str = self._db_root
86
+ class_model: T = self._class_model
87
+ counter: int = 0
88
+ with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
89
+ for branch_number in branch_numbers:
90
+ future = executor.submit(
91
+ search_task_fn,
92
+ branch_number,
93
+ filter_fn,
94
+ hash_reduce_left,
95
+ db_root,
96
+ class_model,
97
+ )
98
+ counter += await future.result()
99
+ if counter < 0:
100
+ await self._counter_documents(counter)
101
+ return abs(counter)
scruby/mixins/find.py ADDED
@@ -0,0 +1,146 @@
1
+ """Quantum methods for searching documents."""
2
+
3
+ from __future__ import annotations
4
+
5
+ __all__ = ("Find",)
6
+
7
+ import concurrent.futures
8
+ import logging
9
+ from collections.abc import Callable
10
+ from typing import TypeVar
11
+
12
+ import orjson
13
+ from anyio import Path
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ T = TypeVar("T")
18
+
19
+
20
+ class Find[T]:
21
+ """Quantum methods for searching documents."""
22
+
23
+ @staticmethod
24
+ async def _task_find(
25
+ branch_number: int,
26
+ filter_fn: Callable,
27
+ hash_reduce_left: str,
28
+ db_root: str,
29
+ class_model: T,
30
+ ) -> list[T] | None:
31
+ """Task for find documents.
32
+
33
+ This method is for internal use.
34
+
35
+ Returns:
36
+ List of documents or None.
37
+ """
38
+ branch_number_as_hash: str = f"{branch_number:08x}"[hash_reduce_left:]
39
+ separated_hash: str = "/".join(list(branch_number_as_hash))
40
+ leaf_path: Path = Path(
41
+ *(
42
+ db_root,
43
+ class_model.__name__,
44
+ separated_hash,
45
+ "leaf.json",
46
+ ),
47
+ )
48
+ docs: list[T] = []
49
+ if await leaf_path.exists():
50
+ data_json: bytes = await leaf_path.read_bytes()
51
+ data: dict[str, str] = orjson.loads(data_json) or {}
52
+ for _, val in data.items():
53
+ doc = class_model.model_validate_json(val)
54
+ if filter_fn(doc):
55
+ docs.append(doc)
56
+ return docs or None
57
+
58
+ async def find_one(
59
+ self,
60
+ filter_fn: Callable,
61
+ max_workers: int | None = None,
62
+ ) -> T | None:
63
+ """Finds a single document matching the filter.
64
+
65
+ The search is based on the effect of a quantum loop.
66
+ The search effectiveness depends on the number of processor threads.
67
+ Ideally, hundreds and even thousands of threads are required.
68
+
69
+ Args:
70
+ filter_fn: A function that execute the conditions of filtering.
71
+ max_workers: The maximum number of processes that can be used to
72
+ execute the given calls. If None or not given then as many
73
+ worker processes will be created as the machine has processors.
74
+
75
+ Returns:
76
+ Document or None.
77
+ """
78
+ branch_numbers: range = range(1, self._max_branch_number)
79
+ search_task_fn: Callable = self._task_find
80
+ hash_reduce_left: int = self._hash_reduce_left
81
+ db_root: str = self._db_root
82
+ class_model: T = self._class_model
83
+ with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
84
+ for branch_number in branch_numbers:
85
+ future = executor.submit(
86
+ search_task_fn,
87
+ branch_number,
88
+ filter_fn,
89
+ hash_reduce_left,
90
+ db_root,
91
+ class_model,
92
+ )
93
+ docs = await future.result()
94
+ if docs is not None:
95
+ return docs[0]
96
+ return None
97
+
98
+ async def find_many(
99
+ self,
100
+ filter_fn: Callable,
101
+ limit_docs: int = 1000,
102
+ max_workers: int | None = None,
103
+ ) -> list[T] | None:
104
+ """Finds one or more documents matching the filter.
105
+
106
+ The search is based on the effect of a quantum loop.
107
+ The search effectiveness depends on the number of processor threads.
108
+ Ideally, hundreds and even thousands of threads are required.
109
+
110
+ Args:
111
+ filter_fn: A function that execute the conditions of filtering.
112
+ limit_docs: Limiting the number of documents. By default = 1000.
113
+ max_workers: The maximum number of processes that can be used to
114
+ execute the given calls. If None or not given then as many
115
+ worker processes will be created as the machine has processors.
116
+
117
+ Returns:
118
+ List of documents or None.
119
+ """
120
+ branch_numbers: range = range(1, self._max_branch_number)
121
+ search_task_fn: Callable = self._task_find
122
+ hash_reduce_left: int = self._hash_reduce_left
123
+ db_root: str = self._db_root
124
+ class_model: T = self._class_model
125
+ counter: int = 0
126
+ result: list[T] = []
127
+ with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
128
+ for branch_number in branch_numbers:
129
+ if counter >= limit_docs:
130
+ return result[:limit_docs]
131
+ future = executor.submit(
132
+ search_task_fn,
133
+ branch_number,
134
+ filter_fn,
135
+ hash_reduce_left,
136
+ db_root,
137
+ class_model,
138
+ )
139
+ docs = await future.result()
140
+ if docs is not None:
141
+ for doc in docs:
142
+ if counter >= limit_docs:
143
+ return result[:limit_docs]
144
+ result.append(doc)
145
+ counter += 1
146
+ return result or None
scruby/mixins/keys.py ADDED
@@ -0,0 +1,166 @@
1
+ """Methods for working with keys."""
2
+
3
+ from __future__ import annotations
4
+
5
+ __all__ = ("Keys",)
6
+
7
+ import logging
8
+ import re
9
+ from typing import TypeVar
10
+
11
+ import orjson
12
+ from anyio import Path
13
+
14
+ from scruby.errors import (
15
+ KeyAlreadyExistsError,
16
+ KeyNotExistsError,
17
+ )
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ T = TypeVar("T")
22
+
23
+
24
+ class Keys[T]:
25
+ """Methods for working with keys."""
26
+
27
+ async def add_key(
28
+ self,
29
+ key: str,
30
+ value: T,
31
+ ) -> None:
32
+ """Asynchronous method for adding key to collection.
33
+
34
+ Args:
35
+ key: Key name. Type `str`.
36
+ value: Value of key. Type `BaseModel`.
37
+
38
+ Returns:
39
+ None.
40
+ """
41
+ key = re.sub(r"\s+", " ", key.strip())
42
+ # The path to cell of collection.
43
+ leaf_path: Path = await self._get_leaf_path(key)
44
+ value_json: str = value.model_dump_json()
45
+ # Write key-value to collection.
46
+ if await leaf_path.exists():
47
+ # Add new key.
48
+ data_json: bytes = await leaf_path.read_bytes()
49
+ data: dict = orjson.loads(data_json) or {}
50
+ try:
51
+ data[key]
52
+ except KeyError:
53
+ data[key] = value_json
54
+ await leaf_path.write_bytes(orjson.dumps(data))
55
+ else:
56
+ err = KeyAlreadyExistsError()
57
+ logger.error(err.message)
58
+ raise err
59
+ else:
60
+ # Add new key to a blank leaf.
61
+ await leaf_path.write_bytes(orjson.dumps({key: value_json}))
62
+ await self._counter_documents(1)
63
+
64
+ async def update_key(
65
+ self,
66
+ key: str,
67
+ value: T,
68
+ ) -> None:
69
+ """Asynchronous method for updating key to collection.
70
+
71
+ Args:
72
+ key: Key name. Type `str`.
73
+ value: Value of key. Type `BaseModel`.
74
+
75
+ Returns:
76
+ None.
77
+ """
78
+ key = re.sub(r"\s+", " ", key.strip())
79
+ # The path to cell of collection.
80
+ leaf_path: Path = await self._get_leaf_path(key)
81
+ value_json: str = value.model_dump_json()
82
+ # Update the existing key.
83
+ if await leaf_path.exists():
84
+ # Update the existing key.
85
+ data_json: bytes = await leaf_path.read_bytes()
86
+ data: dict = orjson.loads(data_json) or {}
87
+ try:
88
+ data[key]
89
+ data[key] = value_json
90
+ await leaf_path.write_bytes(orjson.dumps(data))
91
+ except KeyError:
92
+ err = KeyNotExistsError()
93
+ logger.error(err.message)
94
+ raise err from None
95
+ else:
96
+ logger.error("The key not exists.")
97
+ raise KeyError()
98
+
99
+ async def get_key(self, key: str) -> T:
100
+ """Asynchronous method for getting value of key from collection.
101
+
102
+ Args:
103
+ key: Key name.
104
+
105
+ Returns:
106
+ Value of key or KeyError.
107
+ """
108
+ key = re.sub(r"\s+", " ", key.strip())
109
+ # The path to the database cell.
110
+ leaf_path: Path = await self._get_leaf_path(key)
111
+ # Get value of key.
112
+ if await leaf_path.exists():
113
+ data_json: bytes = await leaf_path.read_bytes()
114
+ data: dict = orjson.loads(data_json) or {}
115
+ obj: T = self._class_model.model_validate_json(data[key])
116
+ return obj
117
+ msg: str = "`get_key` - The unacceptable key value."
118
+ logger.error(msg)
119
+ raise KeyError()
120
+
121
+ async def has_key(self, key: str) -> bool:
122
+ """Asynchronous method for checking presence of key in collection.
123
+
124
+ Args:
125
+ key: Key name.
126
+
127
+ Returns:
128
+ True, if the key is present.
129
+ """
130
+ key = re.sub(r"\s+", " ", key.strip())
131
+ # Get path to cell of collection.
132
+ leaf_path: Path = await self._get_leaf_path(key)
133
+ # Checking whether there is a key.
134
+ if await leaf_path.exists():
135
+ data_json: bytes = await leaf_path.read_bytes()
136
+ data: dict = orjson.loads(data_json) or {}
137
+ try:
138
+ data[key]
139
+ return True
140
+ except KeyError:
141
+ return False
142
+ return False
143
+
144
+ async def delete_key(self, key: str) -> None:
145
+ """Asynchronous method for deleting key from collection.
146
+
147
+ Args:
148
+ key: Key name.
149
+
150
+ Returns:
151
+ None.
152
+ """
153
+ key = re.sub(r"\s+", " ", key.strip())
154
+ # The path to the database cell.
155
+ leaf_path: Path = await self._get_leaf_path(key)
156
+ # Deleting key.
157
+ if await leaf_path.exists():
158
+ data_json: bytes = await leaf_path.read_bytes()
159
+ data: dict = orjson.loads(data_json) or {}
160
+ del data[key]
161
+ await leaf_path.write_bytes(orjson.dumps(data))
162
+ await self._counter_documents(-1)
163
+ return
164
+ msg: str = "`delete_key` - The unacceptable key value."
165
+ logger.error(msg)
166
+ raise KeyError()