scruby 0.9.3__py3-none-any.whl → 0.27.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
scruby/db.py CHANGED
@@ -1,301 +1,213 @@
1
- """Creation and management of the database."""
2
-
3
- from __future__ import annotations
4
-
5
- __all__ = ("Scruby",)
6
-
7
- import concurrent.futures
8
- import contextlib
9
- import logging
10
- import zlib
11
- from collections.abc import Callable
12
- from pathlib import Path as SyncPath
13
- from shutil import rmtree
14
- from typing import Any, Never, TypeVar, assert_never
15
-
16
- import orjson
17
- from anyio import Path, to_thread
18
-
19
- from scruby import constants
20
-
21
- logger = logging.getLogger(__name__)
22
-
23
- T = TypeVar("T")
24
-
25
-
26
- class Scruby[T]:
27
- """Creation and management of database.
28
-
29
- Args:
30
- class_model: Class of Model (Pydantic).
31
- """
32
-
33
- def __init__( # noqa: D107
34
- self,
35
- class_model: T,
36
- ) -> None:
37
- self.__class_model = class_model
38
- self.__db_root = constants.DB_ROOT
39
- self.__length_reduction_hash = constants.LENGTH_REDUCTION_HASH
40
- # The maximum number of keys.
41
- match self.__length_reduction_hash:
42
- case 0:
43
- self.__max_num_keys = 4294967296
44
- case 2:
45
- self.__max_num_keys = 16777216
46
- case 4:
47
- self.__max_num_keys = 65536
48
- case 6:
49
- self.__max_num_keys = 256
50
- case _ as unreachable:
51
- msg: str = f"{unreachable} - Unacceptable value for LENGTH_REDUCTION_HASH."
52
- logger.critical(msg)
53
- assert_never(Never(unreachable))
54
-
55
- async def get_leaf_path(self, key: str) -> Path:
56
- """Asynchronous method for getting path to collection cell by key.
57
-
58
- Args:
59
- key: Key name.
60
- """
61
- if not isinstance(key, str):
62
- logger.error("The key is not a type of `str`.")
63
- raise KeyError("The key is not a type of `str`.")
64
- if len(key) == 0:
65
- logger.error("The key should not be empty.")
66
- raise KeyError("The key should not be empty.")
67
- # Key to crc32 sum.
68
- key_as_hash: str = f"{zlib.crc32(key.encode('utf-8')):08x}"[self.__length_reduction_hash :]
69
- # Convert crc32 sum in the segment of path.
70
- separated_hash: str = "/".join(list(key_as_hash))
71
- # The path of the branch to the database.
72
- branch_path: Path = Path(
73
- *(
74
- self.__db_root,
75
- self.__class_model.__name__,
76
- separated_hash,
77
- ),
78
- )
79
- # If the branch does not exist, need to create it.
80
- if not await branch_path.exists():
81
- await branch_path.mkdir(parents=True)
82
- # The path to the database cell.
83
- leaf_path: Path = Path(*(branch_path, "leaf.json"))
84
- return leaf_path
85
-
86
- async def set_key(
87
- self,
88
- key: str,
89
- value: T,
90
- ) -> None:
91
- """Asynchronous method for adding and updating keys to collection.
92
-
93
- Args:
94
- key: Key name.
95
- value: Value of key.
96
- """
97
- # The path to the database cell.
98
- leaf_path: Path = await self.get_leaf_path(key)
99
- value_json: str = value.model_dump_json()
100
- # Write key-value to the database.
101
- if await leaf_path.exists():
102
- # Add new key or update existing.
103
- data_json: bytes = await leaf_path.read_bytes()
104
- data: dict = orjson.loads(data_json) or {}
105
- data[key] = value_json
106
- await leaf_path.write_bytes(orjson.dumps(data))
107
- else:
108
- # Add new key to a blank leaf.
109
- await leaf_path.write_bytes(orjson.dumps({key: value_json}))
110
-
111
- async def get_key(self, key: str) -> T:
112
- """Asynchronous method for getting value of key from collection.
113
-
114
- Args:
115
- key: Key name.
116
- """
117
- # The path to the database cell.
118
- leaf_path: Path = await self.get_leaf_path(key)
119
- # Get value of key.
120
- if await leaf_path.exists():
121
- data_json: bytes = await leaf_path.read_bytes()
122
- data: dict = orjson.loads(data_json) or {}
123
- obj: T = self.__class_model.model_validate_json(data[key])
124
- return obj
125
- msg: str = "`get_key` - The unacceptable key value."
126
- logger.error(msg)
127
- raise KeyError()
128
-
129
- async def has_key(self, key: str) -> bool:
130
- """Asynchronous method for checking presence of key in collection.
131
-
132
- Args:
133
- key: Key name.
134
- """
135
- # The path to the database cell.
136
- leaf_path: Path = await self.get_leaf_path(key)
137
- # Checking whether there is a key.
138
- if await leaf_path.exists():
139
- data_json: bytes = await leaf_path.read_bytes()
140
- data: dict = orjson.loads(data_json) or {}
141
- try:
142
- data[key]
143
- return True
144
- except KeyError:
145
- return False
146
- return False
147
-
148
- async def delete_key(self, key: str) -> None:
149
- """Asynchronous method for deleting key from collection.
150
-
151
- Args:
152
- key: Key name.
153
- """
154
- # The path to the database cell.
155
- leaf_path: Path = await self.get_leaf_path(key)
156
- # Deleting key.
157
- if await leaf_path.exists():
158
- data_json: bytes = await leaf_path.read_bytes()
159
- data: dict = orjson.loads(data_json) or {}
160
- del data[key]
161
- await leaf_path.write_bytes(orjson.dumps(data))
162
- return
163
- msg: str = "`delete_key` - The unacceptable key value."
164
- logger.error(msg)
165
- raise KeyError()
166
-
167
- @staticmethod
168
- async def napalm() -> None:
169
- """Asynchronous method for full database deletion.
170
-
171
- The main purpose is tests.
172
-
173
- Warning:
174
- - `Be careful, this will remove all keys.`
175
- """
176
- with contextlib.suppress(FileNotFoundError):
177
- await to_thread.run_sync(rmtree, constants.DB_ROOT)
178
- return
179
-
180
- @staticmethod
181
- def search_task(
182
- key: int,
183
- filter_fn: Callable,
184
- length_reduction_hash: str,
185
- db_root: str,
186
- class_model: T,
187
- ) -> dict[str, Any] | None:
188
- """Search task."""
189
- key_as_hash: str = f"{key:08x}"[length_reduction_hash:]
190
- separated_hash: str = "/".join(list(key_as_hash))
191
- leaf_path: SyncPath = SyncPath(
192
- *(
193
- db_root,
194
- class_model.__name__,
195
- separated_hash,
196
- "leaf.json",
197
- ),
198
- )
199
- if leaf_path.exists():
200
- data_json: bytes = leaf_path.read_bytes()
201
- data: dict[str, str] = orjson.loads(data_json) or {}
202
- for _, val in data.items():
203
- doc = class_model.model_validate_json(val)
204
- if filter_fn(doc):
205
- return doc
206
- return None
207
-
208
- def find_one(
209
- self,
210
- filter_fn: Callable,
211
- max_workers: int | None = None,
212
- timeout: float | None = None,
213
- ) -> T | None:
214
- """Find a single document matching the filter.
215
-
216
- The search is based on the effect of a quantum loop.
217
- The search effectiveness depends on the number of processor threads.
218
- Ideally, hundreds and even thousands of threads are required.
219
-
220
- Args:
221
- filter_fn: A function that execute the conditions of filtering.
222
- max_workers: The maximum number of processes that can be used to
223
- execute the given calls. If None or not given then as many
224
- worker processes will be created as the machine has processors.
225
- timeout: The number of seconds to wait for the result if the future isn't done.
226
- If None, then there is no limit on the wait time.
227
- """
228
- keys: range = range(1, self.__max_num_keys)
229
- search_task_fn: Callable = self.search_task
230
- length_reduction_hash: int = self.__length_reduction_hash
231
- db_root: str = self.__db_root
232
- class_model: T = self.__class_model
233
- with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
234
- for key in keys:
235
- future = executor.submit(
236
- search_task_fn,
237
- key,
238
- filter_fn,
239
- length_reduction_hash,
240
- db_root,
241
- class_model,
242
- )
243
- doc = future.result(timeout)
244
- if doc is not None:
245
- return doc
246
- return None
247
-
248
- def find(
249
- self,
250
- filter_fn: Callable,
251
- db_query_docs_limit: int = 1000,
252
- max_workers: int | None = None,
253
- timeout: float | None = None,
254
- ) -> list[T] | None:
255
- """Find one or more documents matching the filter.
256
-
257
- The search is based on the effect of a quantum loop.
258
- The search effectiveness depends on the number of processor threads.
259
- Ideally, hundreds and even thousands of threads are required.
260
-
261
- Args:
262
- filter_fn: A function that execute the conditions of filtering.
263
- db_query_docs_limit: Limiting the number of request results. By default = 1000.
264
- max_workers: The maximum number of processes that can be used to
265
- execute the given calls. If None or not given then as many
266
- worker processes will be created as the machine has processors.
267
- timeout: The number of seconds to wait for the result if the future isn't done.
268
- If None, then there is no limit on the wait time.
269
- """
270
- keys: range = range(1, self.__max_num_keys)
271
- search_task_fn: Callable = self.search_task
272
- length_reduction_hash: int = self.__length_reduction_hash
273
- db_root: str = self.__db_root
274
- class_model: T = self.__class_model
275
- counter: int = 0
276
- with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
277
- results = []
278
- for key in keys:
279
- if counter == db_query_docs_limit:
280
- break
281
- future = executor.submit(
282
- search_task_fn,
283
- key,
284
- filter_fn,
285
- length_reduction_hash,
286
- db_root,
287
- class_model,
288
- )
289
- doc = future.result(timeout)
290
- if doc is not None:
291
- results.append(doc)
292
- counter += 1
293
- return results or None
294
-
295
- def collection_name(self) -> str:
296
- """Get collection name."""
297
- return self.__class_model.__name__
298
-
299
- def collection_full_name(self) -> str:
300
- """Get full name of collection."""
301
- return f"{self.__db_root}/{self.__class_model.__name__}"
1
+ # Scruby - Asynchronous library for building and managing a hybrid database, by scheme of key-value.
2
+ # Copyright (c) 2025 Gennady Kostyunin
3
+ # SPDX-License-Identifier: MIT
4
+ #
5
+ """Creation and management of the database."""
6
+
7
+ from __future__ import annotations
8
+
9
+ __all__ = ("Scruby",)
10
+
11
+ import contextlib
12
+ import logging
13
+ import re
14
+ import zlib
15
+ from shutil import rmtree
16
+ from typing import Any, Literal, Never, assert_never
17
+
18
+ from anyio import Path
19
+ from pydantic import BaseModel
20
+
21
+ from scruby import mixins, settings
22
+
23
+
24
+ class _Meta(BaseModel):
25
+ """Metadata of Collection."""
26
+
27
+ db_root: str
28
+ collection_name: str
29
+ hash_reduce_left: int
30
+ max_branch_number: int
31
+ counter_documents: int
32
+
33
+
34
+ class Scruby(
35
+ mixins.Keys,
36
+ mixins.Find,
37
+ mixins.CustomTask,
38
+ mixins.Collection,
39
+ mixins.Count,
40
+ mixins.Delete,
41
+ mixins.Update,
42
+ ):
43
+ """Creation and management of database."""
44
+
45
+ def __init__( # noqa: D107
46
+ self,
47
+ ) -> None:
48
+ super().__init__()
49
+ self._meta = _Meta
50
+ self._db_root = settings.DB_ROOT
51
+ self._hash_reduce_left = settings.HASH_REDUCE_LEFT
52
+ self._max_workers = settings.MAX_WORKERS
53
+ # The maximum number of branches.
54
+ match self._hash_reduce_left:
55
+ case 0:
56
+ self._max_branch_number = 4294967296
57
+ case 2:
58
+ self._max_branch_number = 16777216
59
+ case 4:
60
+ self._max_branch_number = 65536
61
+ case 6:
62
+ self._max_branch_number = 256
63
+ case _ as unreachable:
64
+ msg: str = f"{unreachable} - Unacceptable value for HASH_REDUCE_LEFT."
65
+ logging.critical(msg)
66
+ assert_never(Never(unreachable)) # pyrefly: ignore[not-callable]
67
+
68
+ @classmethod
69
+ async def collection(cls, class_model: Any) -> Any:
70
+ """Get an object to access a collection.
71
+
72
+ Args:
73
+ class_model: Class of Model (pydantic.BaseModel).
74
+
75
+ Returns:
76
+ Instance of Scruby for access a collection.
77
+ """
78
+ assert BaseModel in class_model.__bases__, "`class_model` does not contain the base class `pydantic.BaseModel`!"
79
+
80
+ instance = cls()
81
+ instance.__dict__["_class_model"] = class_model
82
+ # Caching a pati for metadata.
83
+ # The zero branch is reserved for metadata.
84
+ branch_number: int = 0
85
+ branch_number_as_hash: str = f"{branch_number:08x}"[settings.HASH_REDUCE_LEFT :]
86
+ separated_hash: str = "/".join(list(branch_number_as_hash))
87
+ meta_dir_path_tuple = (
88
+ settings.DB_ROOT,
89
+ class_model.__name__,
90
+ separated_hash,
91
+ )
92
+ instance.__dict__["_meta_path"] = Path(
93
+ *meta_dir_path_tuple,
94
+ "meta.json",
95
+ )
96
+ # Create metadata for collection, if missing.
97
+ branch_path = Path(*meta_dir_path_tuple)
98
+ if not await branch_path.exists():
99
+ await branch_path.mkdir(parents=True)
100
+ meta = _Meta(
101
+ db_root=settings.DB_ROOT,
102
+ collection_name=class_model.__name__,
103
+ hash_reduce_left=settings.HASH_REDUCE_LEFT,
104
+ max_branch_number=instance.__dict__["_max_branch_number"],
105
+ counter_documents=0,
106
+ )
107
+ meta_json = meta.model_dump_json()
108
+ meta_path = Path(*(branch_path, "meta.json"))
109
+ await meta_path.write_text(meta_json, "utf-8")
110
+ return instance
111
+
112
+ async def get_meta(self) -> _Meta:
113
+ """Asynchronous method for getting metadata of collection.
114
+
115
+ This method is for internal use.
116
+
117
+ Returns:
118
+ Metadata object.
119
+ """
120
+ meta_json = await self._meta_path.read_text()
121
+ meta: _Meta = self._meta.model_validate_json(meta_json)
122
+ return meta
123
+
124
+ async def _set_meta(self, meta: _Meta) -> None:
125
+ """Asynchronous method for updating metadata of collection.
126
+
127
+ This method is for internal use.
128
+
129
+ Args:
130
+ meta (_Meta): Metadata of Collection.
131
+
132
+ Returns:
133
+ None.
134
+ """
135
+ meta_json = meta.model_dump_json()
136
+ await self._meta_path.write_text(meta_json, "utf-8")
137
+
138
+ async def _counter_documents(self, step: Literal[1, -1]) -> None:
139
+ """Asynchronous method for management of documents in metadata of collection.
140
+
141
+ This method is for internal use.
142
+
143
+ Args:
144
+ step (Literal[1, -1]): Number of documents added or removed.
145
+
146
+ Returns:
147
+ None.
148
+ """
149
+ meta_path = self._meta_path
150
+ meta_json = await meta_path.read_text("utf-8")
151
+ meta: _Meta = self._meta.model_validate_json(meta_json)
152
+ meta.counter_documents += step
153
+ meta_json = meta.model_dump_json()
154
+ await meta_path.write_text(meta_json, "utf-8")
155
+
156
+ async def _get_leaf_path(self, key: str) -> tuple[Path, str]:
157
+ """Asynchronous method for getting path to collection cell by key.
158
+
159
+ This method is for internal use.
160
+
161
+ Args:
162
+ key (str): Key name.
163
+
164
+ Returns:
165
+ Path to cell of collection.
166
+ """
167
+ if not isinstance(key, str):
168
+ msg = "The key is not a string."
169
+ logging.error(msg)
170
+ raise KeyError(msg)
171
+ # Prepare key.
172
+ # Removes spaces at the beginning and end of a string.
173
+ # Replaces all whitespace characters with a single space.
174
+ prepared_key = re.sub(r"\s+", " ", key).strip().lower()
175
+ # Check the key for an empty string.
176
+ if len(prepared_key) == 0:
177
+ msg = "The key should not be empty."
178
+ logging.error(msg)
179
+ raise KeyError(msg)
180
+ # Key to crc32 sum.
181
+ key_as_hash: str = f"{zlib.crc32(prepared_key.encode('utf-8')):08x}"[self._hash_reduce_left :]
182
+ # Convert crc32 sum in the segment of path.
183
+ separated_hash: str = "/".join(list(key_as_hash))
184
+ # The path of the branch to the database.
185
+ branch_path: Path = Path(
186
+ *(
187
+ self._db_root,
188
+ self._class_model.__name__,
189
+ separated_hash,
190
+ ),
191
+ )
192
+ # If the branch does not exist, need to create it.
193
+ if not await branch_path.exists():
194
+ await branch_path.mkdir(parents=True)
195
+ # The path to the database cell.
196
+ leaf_path: Path = Path(*(branch_path, "leaf.json"))
197
+ return (leaf_path, prepared_key)
198
+
199
+ @staticmethod
200
+ def napalm() -> None:
201
+ """Method for full database deletion.
202
+
203
+ The main purpose is tests.
204
+
205
+ Warning:
206
+ - `Be careful, this will remove all keys.`
207
+
208
+ Returns:
209
+ None.
210
+ """
211
+ with contextlib.suppress(FileNotFoundError):
212
+ rmtree(settings.DB_ROOT)
213
+ return
scruby/errors.py ADDED
@@ -0,0 +1,45 @@
1
+ # Scruby - Asynchronous library for building and managing a hybrid database, by scheme of key-value.
2
+ # Copyright (c) 2025 Gennady Kostyunin
3
+ # SPDX-License-Identifier: MIT
4
+ #
5
+ """Scruby Exceptions."""
6
+
7
+ from __future__ import annotations
8
+
9
+ __all__ = (
10
+ "ScrubyException",
11
+ "MetadataValueError",
12
+ "KeyAlreadyExistsError",
13
+ "KeyNotExistsError",
14
+ )
15
+
16
+
17
+ class ScrubyException(Exception):
18
+ """Root Custom Exception."""
19
+
20
+ def __init__(self, *args, **kwargs) -> None: # type: ignore[no-untyped-def] # noqa: D107
21
+ super().__init__(*args, **kwargs)
22
+
23
+
24
+ class MetadataValueError(ScrubyException):
25
+ """Exception is raised if value of variable in metadata does not matching expected."""
26
+
27
+ def __init__(self, message: str) -> None: # noqa: D107
28
+ self.message = message
29
+ super().__init__(self.message)
30
+
31
+
32
+ class KeyAlreadyExistsError(ScrubyException):
33
+ """Exception is raised if the key already exists."""
34
+
35
+ def __init__(self) -> None: # noqa: D107
36
+ self.message = "The key already exists."
37
+ super().__init__(self.message)
38
+
39
+
40
+ class KeyNotExistsError(ScrubyException):
41
+ """Exception is raised If the key is not exists."""
42
+
43
+ def __init__(self) -> None: # noqa: D107
44
+ self.message = "The key not exists."
45
+ super().__init__(self.message)
@@ -0,0 +1,25 @@
1
+ # Scruby - Asynchronous library for building and managing a hybrid database, by scheme of key-value.
2
+ # Copyright (c) 2025 Gennady Kostyunin
3
+ # SPDX-License-Identifier: MIT
4
+ #
5
+ """Mixins."""
6
+
7
+ from __future__ import annotations
8
+
9
+ __all__ = (
10
+ "Collection",
11
+ "Count",
12
+ "CustomTask",
13
+ "Delete",
14
+ "Find",
15
+ "Keys",
16
+ "Update",
17
+ )
18
+
19
+ from scruby.mixins.collection import Collection
20
+ from scruby.mixins.count import Count
21
+ from scruby.mixins.custom_task import CustomTask
22
+ from scruby.mixins.delete import Delete
23
+ from scruby.mixins.docs import Keys
24
+ from scruby.mixins.find import Find
25
+ from scruby.mixins.update import Update
@@ -0,0 +1,50 @@
1
+ # Scruby - Asynchronous library for building and managing a hybrid database, by scheme of key-value.
2
+ # Copyright (c) 2025 Gennady Kostyunin
3
+ # SPDX-License-Identifier: MIT
4
+ #
5
+ """Methods for working with collections."""
6
+
7
+ from __future__ import annotations
8
+
9
+ __all__ = ("Collection",)
10
+
11
+ from shutil import rmtree
12
+
13
+ from anyio import Path, to_thread
14
+
15
+ from scruby import settings
16
+
17
+
18
+ class Collection:
19
+ """Methods for working with collections."""
20
+
21
+ def collection_name(self) -> str:
22
+ """Get collection name.
23
+
24
+ Returns:
25
+ Collection name.
26
+ """
27
+ return self._class_model.__name__
28
+
29
+ @staticmethod
30
+ async def collection_list() -> list[str]:
31
+ """Get collection list."""
32
+ target_directory = Path(settings.DB_ROOT)
33
+ # Get all entries in the directory
34
+ all_entries = Path.iterdir(target_directory)
35
+ directory_names: list[str] = [entry.name async for entry in all_entries]
36
+ return directory_names
37
+
38
+ @staticmethod
39
+ async def delete_collection(name: str) -> None:
40
+ """Asynchronous method for deleting a collection by its name.
41
+
42
+ Args:
43
+ name (str): Collection name.
44
+
45
+ Returns:
46
+ None.
47
+ """
48
+ target_directory = f"{settings.DB_ROOT}/{name}"
49
+ await to_thread.run_sync(rmtree, target_directory) # pyrefly: ignore[bad-argument-type]
50
+ return