scruby 0.9.0__py3-none-any.whl → 0.17.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
scruby/db.py CHANGED
@@ -1,290 +1,756 @@
1
- """Creation and management of the database."""
2
-
3
- from __future__ import annotations
4
-
5
- __all__ = ("Scruby",)
6
-
7
- import concurrent.futures
8
- import contextlib
9
- import zlib
10
- from collections.abc import Callable
11
- from pathlib import Path as SyncPath
12
- from shutil import rmtree
13
- from typing import Any, Never, TypeVar, assert_never
14
-
15
- import orjson
16
- from anyio import Path, to_thread
17
-
18
- from scruby import constants
19
-
20
- T = TypeVar("T")
21
-
22
-
23
- class Scruby[T]:
24
- """Creation and management of database.
25
-
26
- Args:
27
- class_model: Class of Model (Pydantic).
28
- """
29
-
30
- def __init__( # noqa: D107
31
- self,
32
- class_model: T,
33
- ) -> None:
34
- self.__class_model = class_model
35
- self.__db_root = constants.DB_ROOT
36
- self.__length_reduction_hash = constants.LENGTH_REDUCTION_HASH
37
- # The maximum number of keys.
38
- match self.__length_reduction_hash:
39
- case 0:
40
- self.__max_num_keys = 4294967296
41
- case 2:
42
- self.__max_num_keys = 16777216
43
- case 4:
44
- self.__max_num_keys = 65536
45
- case 6:
46
- self.__max_num_keys = 256
47
- case _ as unreachable:
48
- assert_never(Never(unreachable))
49
-
50
- async def get_leaf_path(self, key: str) -> Path:
51
- """Asynchronous method for getting path to collection cell by key.
52
-
53
- Args:
54
- key: Key name.
55
- """
56
- if not isinstance(key, str):
57
- raise KeyError("The key is not a type of `str`.")
58
- if len(key) == 0:
59
- raise KeyError("The key should not be empty.")
60
- # Key to crc32 sum.
61
- key_as_hash: str = f"{zlib.crc32(key.encode('utf-8')):08x}"[self.__length_reduction_hash :]
62
- # Convert crc32 sum in the segment of path.
63
- separated_hash: str = "/".join(list(key_as_hash))
64
- # The path of the branch to the database.
65
- branch_path: Path = Path(
66
- *(
67
- self.__db_root,
68
- self.__class_model.__name__,
69
- separated_hash,
70
- ),
71
- )
72
- # If the branch does not exist, need to create it.
73
- if not await branch_path.exists():
74
- await branch_path.mkdir(parents=True)
75
- # The path to the database cell.
76
- leaf_path: Path = Path(*(branch_path, "leaf.json"))
77
- return leaf_path
78
-
79
- async def set_key(
80
- self,
81
- key: str,
82
- value: T,
83
- ) -> None:
84
- """Asynchronous method for adding and updating keys to collection.
85
-
86
- Args:
87
- key: Key name.
88
- value: Value of key.
89
- """
90
- # The path to the database cell.
91
- leaf_path: Path = await self.get_leaf_path(key)
92
- value_json: str = value.model_dump_json()
93
- # Write key-value to the database.
94
- if await leaf_path.exists():
95
- # Add new key or update existing.
96
- data_json: bytes = await leaf_path.read_bytes()
97
- data: dict = orjson.loads(data_json) or {}
98
- data[key] = value_json
99
- await leaf_path.write_bytes(orjson.dumps(data))
100
- else:
101
- # Add new key to a blank leaf.
102
- await leaf_path.write_bytes(orjson.dumps({key: value_json}))
103
-
104
- async def get_key(self, key: str) -> T:
105
- """Asynchronous method for getting value of key from collection.
106
-
107
- Args:
108
- key: Key name.
109
- """
110
- # The path to the database cell.
111
- leaf_path: Path = await self.get_leaf_path(key)
112
- # Get value of key.
113
- if await leaf_path.exists():
114
- data_json: bytes = await leaf_path.read_bytes()
115
- data: dict = orjson.loads(data_json) or {}
116
- obj: T = self.__class_model.model_validate_json(data[key])
117
- return obj
118
- raise KeyError()
119
-
120
- async def has_key(self, key: str) -> bool:
121
- """Asynchronous method for checking presence of key in collection.
122
-
123
- Args:
124
- key: Key name.
125
- """
126
- # The path to the database cell.
127
- leaf_path: Path = await self.get_leaf_path(key)
128
- # Checking whether there is a key.
129
- if await leaf_path.exists():
130
- data_json: bytes = await leaf_path.read_bytes()
131
- data: dict = orjson.loads(data_json) or {}
132
- try:
133
- data[key]
134
- return True
135
- except KeyError:
136
- return False
137
- return False
138
-
139
- async def delete_key(self, key: str) -> None:
140
- """Asynchronous method for deleting key from collection.
141
-
142
- Args:
143
- key: Key name.
144
- """
145
- # The path to the database cell.
146
- leaf_path: Path = await self.get_leaf_path(key)
147
- # Deleting key.
148
- if await leaf_path.exists():
149
- data_json: bytes = await leaf_path.read_bytes()
150
- data: dict = orjson.loads(data_json) or {}
151
- del data[key]
152
- await leaf_path.write_bytes(orjson.dumps(data))
153
- return
154
- raise KeyError()
155
-
156
- @staticmethod
157
- async def napalm() -> None:
158
- """Asynchronous method for full database deletion.
159
-
160
- The main purpose is tests.
161
-
162
- Warning:
163
- - `Be careful, this will remove all keys.`
164
- """
165
- with contextlib.suppress(FileNotFoundError):
166
- await to_thread.run_sync(rmtree, constants.DB_ROOT)
167
- return
168
-
169
- @staticmethod
170
- def search_task(
171
- key: int,
172
- filter_fn: Callable,
173
- length_reduction_hash: str,
174
- db_root: str,
175
- class_model: T,
176
- ) -> dict[str, Any] | None:
177
- """Search task."""
178
- key_as_hash: str = f"{key:08x}"[length_reduction_hash:]
179
- separated_hash: str = "/".join(list(key_as_hash))
180
- leaf_path: SyncPath = SyncPath(
181
- *(
182
- db_root,
183
- class_model.__name__,
184
- separated_hash,
185
- "leaf.json",
186
- ),
187
- )
188
- if leaf_path.exists():
189
- data_json: bytes = leaf_path.read_bytes()
190
- data: dict[str, str] = orjson.loads(data_json) or {}
191
- for _, val in data.items():
192
- doc = class_model.model_validate_json(val)
193
- if filter_fn(doc):
194
- return doc
195
- return None
196
-
197
- def find_one(
198
- self,
199
- filter_fn: Callable,
200
- max_workers: int | None = None,
201
- timeout: float | None = None,
202
- ) -> T | None:
203
- """Find a single document.
204
-
205
- The search is based on the effect of a quantum loop.
206
- The search effectiveness depends on the number of processor threads.
207
- Ideally, hundreds and even thousands of threads are required.
208
-
209
- Args:
210
- filter_fn: A function that execute the conditions of filtering.
211
- max_workers: The maximum number of processes that can be used to
212
- execute the given calls. If None or not given then as many
213
- worker processes will be created as the machine has processors.
214
- timeout: The number of seconds to wait for the result if the future isn't done.
215
- If None, then there is no limit on the wait time.
216
- """
217
- keys: range = range(1, self.__max_num_keys)
218
- search_task_fn: Callable = self.search_task
219
- length_reduction_hash: int = self.__length_reduction_hash
220
- db_root: str = self.__db_root
221
- class_model: T = self.__class_model
222
- with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
223
- for key in keys:
224
- future = executor.submit(
225
- search_task_fn,
226
- key,
227
- filter_fn,
228
- length_reduction_hash,
229
- db_root,
230
- class_model,
231
- )
232
- doc = future.result(timeout)
233
- if doc is not None:
234
- return doc
235
- return None
236
-
237
- def find_many(
238
- self,
239
- filter_fn: Callable,
240
- db_query_docs_limit: int = 1000,
241
- max_workers: int | None = None,
242
- timeout: float | None = None,
243
- ) -> list[T] | None:
244
- """Find documents.
245
-
246
- The search is based on the effect of a quantum loop.
247
- The search effectiveness depends on the number of processor threads.
248
- Ideally, hundreds and even thousands of threads are required.
249
-
250
- Args:
251
- filter_fn: A function that execute the conditions of filtering.
252
- db_query_docs_limit: Limiting the number of request results. By default = 1000.
253
- max_workers: The maximum number of processes that can be used to
254
- execute the given calls. If None or not given then as many
255
- worker processes will be created as the machine has processors.
256
- timeout: The number of seconds to wait for the result if the future isn't done.
257
- If None, then there is no limit on the wait time.
258
- """
259
- keys: range = range(1, self.__max_num_keys)
260
- search_task_fn: Callable = self.search_task
261
- length_reduction_hash: int = self.__length_reduction_hash
262
- db_root: str = self.__db_root
263
- class_model: T = self.__class_model
264
- counter: int = 0
265
- with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
266
- results = []
267
- for key in keys:
268
- if counter == db_query_docs_limit:
269
- break
270
- future = executor.submit(
271
- search_task_fn,
272
- key,
273
- filter_fn,
274
- length_reduction_hash,
275
- db_root,
276
- class_model,
277
- )
278
- doc = future.result(timeout)
279
- if doc is not None:
280
- results.append(doc)
281
- counter += 1
282
- return results or None
283
-
284
- def collection_name(self) -> str:
285
- """Get collection name."""
286
- return self.__class_model.__name__
287
-
288
- def collection_full_name(self) -> str:
289
- """Get full name of collection."""
290
- return f"{self.__db_root}/{self.__class_model.__name__}"
1
+ """Creation and management of the database."""
2
+
3
+ from __future__ import annotations
4
+
5
+ __all__ = ("Scruby",)
6
+
7
+ import concurrent.futures
8
+ import contextlib
9
+ import logging
10
+ import zlib
11
+ from collections.abc import Callable
12
+ from pathlib import Path as SyncPath
13
+ from shutil import rmtree
14
+ from typing import Any, Literal, Never, TypeVar, assert_never
15
+
16
+ import orjson
17
+ from anyio import Path, to_thread
18
+ from pydantic import BaseModel
19
+
20
+ from scruby import constants
21
+ from scruby.errors import (
22
+ KeyAlreadyExistsError,
23
+ KeyNotExistsError,
24
+ )
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+ T = TypeVar("T")
29
+
30
+
31
+ class _Meta(BaseModel):
32
+ """Metadata of Collection."""
33
+
34
+ counter_documents: int
35
+
36
+
37
+ class Scruby[T]:
38
+ """Creation and management of database.
39
+
40
+ Args:
41
+ class_model: Class of Model (Pydantic).
42
+ """
43
+
44
+ def __init__( # noqa: D107
45
+ self,
46
+ class_model: T,
47
+ ) -> None:
48
+ self.__meta = _Meta
49
+ self.__class_model = class_model
50
+ self.__db_root = constants.DB_ROOT
51
+ self.__hash_reduce_left = constants.HASH_REDUCE_LEFT
52
+ # The maximum number of branches.
53
+ match self.__hash_reduce_left:
54
+ case 0:
55
+ self.__max_branch_number = 4294967296
56
+ case 2:
57
+ self.__max_branch_number = 16777216
58
+ case 4:
59
+ self.__max_branch_number = 65536
60
+ case 6:
61
+ self.__max_branch_number = 256
62
+ case _ as unreachable:
63
+ msg: str = f"{unreachable} - Unacceptable value for HASH_REDUCE_LEFT."
64
+ logger.critical(msg)
65
+ assert_never(Never(unreachable))
66
+ # Caching a pati for metadata in the form of a tuple.
67
+ # The zero branch is reserved for metadata.
68
+ branch_number: int = 0
69
+ branch_number_as_hash: str = f"{branch_number:08x}"[constants.HASH_REDUCE_LEFT :]
70
+ separated_hash: str = "/".join(list(branch_number_as_hash))
71
+ self.__meta_path_tuple = (
72
+ constants.DB_ROOT,
73
+ class_model.__name__,
74
+ separated_hash,
75
+ "meta.json",
76
+ )
77
+ # Create metadata for collection, if required.
78
+ branch_path = SyncPath(
79
+ *(
80
+ self.__db_root,
81
+ self.__class_model.__name__,
82
+ separated_hash,
83
+ ),
84
+ )
85
+ if not branch_path.exists():
86
+ branch_path.mkdir(parents=True)
87
+ meta = _Meta(
88
+ counter_documents=0,
89
+ )
90
+ meta_json = meta.model_dump_json()
91
+ meta_path = SyncPath(*(branch_path, "meta.json"))
92
+ meta_path.write_text(meta_json, "utf-8")
93
+
94
+ async def _get_meta(self) -> _Meta:
95
+ """Asynchronous method for getting metadata of collection.
96
+
97
+ This method is for internal use.
98
+
99
+ Returns:
100
+ Metadata object.
101
+ """
102
+ meta_path = Path(*self.__meta_path_tuple)
103
+ meta_json = await meta_path.read_text()
104
+ meta: _Meta = self.__meta.model_validate_json(meta_json)
105
+ return meta
106
+
107
+ async def _set_meta(self, meta: _Meta) -> None:
108
+ """Asynchronous method for updating metadata of collection.
109
+
110
+ This method is for internal use.
111
+
112
+ Returns:
113
+ None.
114
+ """
115
+ meta_json = meta.model_dump_json()
116
+ meta_path = Path(*self.__meta_path_tuple)
117
+ await meta_path.write_text(meta_json, "utf-8")
118
+
119
+ async def _counter_documents(self, step: Literal[1, -1]) -> None:
120
+ """Asynchronous method for management of documents in metadata of collection.
121
+
122
+ This method is for internal use.
123
+
124
+ Returns:
125
+ None.
126
+ """
127
+ meta_path = Path(*self.__meta_path_tuple)
128
+ meta_json = await meta_path.read_text("utf-8")
129
+ meta: _Meta = self.__meta.model_validate_json(meta_json)
130
+ meta.counter_documents += step
131
+ meta_json = meta.model_dump_json()
132
+ await meta_path.write_text(meta_json, "utf-8")
133
+
134
+ def _sync_counter_documents(self, number: int) -> None:
135
+ """Management of documents in metadata of collection.
136
+
137
+ This method is for internal use.
138
+ """
139
+ meta_path = SyncPath(*self.__meta_path_tuple)
140
+ meta_json = meta_path.read_text("utf-8")
141
+ meta: _Meta = self.__meta.model_validate_json(meta_json)
142
+ meta.counter_documents += number
143
+ meta_json = meta.model_dump_json()
144
+ meta_path.write_text(meta_json, "utf-8")
145
+
146
+ async def _get_leaf_path(self, key: str) -> Path:
147
+ """Asynchronous method for getting path to collection cell by key.
148
+
149
+ This method is for internal use.
150
+
151
+ Args:
152
+ key: Key name.
153
+
154
+ Returns:
155
+ Path to cell of collection.
156
+ """
157
+ if not isinstance(key, str):
158
+ logger.error("The key is not a type of `str`.")
159
+ raise KeyError("The key is not a type of `str`.")
160
+ if len(key) == 0:
161
+ logger.error("The key should not be empty.")
162
+ raise KeyError("The key should not be empty.")
163
+ # Key to crc32 sum.
164
+ key_as_hash: str = f"{zlib.crc32(key.encode('utf-8')):08x}"[self.__hash_reduce_left :]
165
+ # Convert crc32 sum in the segment of path.
166
+ separated_hash: str = "/".join(list(key_as_hash))
167
+ # The path of the branch to the database.
168
+ branch_path: Path = Path(
169
+ *(
170
+ self.__db_root,
171
+ self.__class_model.__name__,
172
+ separated_hash,
173
+ ),
174
+ )
175
+ # If the branch does not exist, need to create it.
176
+ if not await branch_path.exists():
177
+ await branch_path.mkdir(parents=True)
178
+ # The path to the database cell.
179
+ leaf_path: Path = Path(*(branch_path, "leaf.json"))
180
+ return leaf_path
181
+
182
+ async def add_key(
183
+ self,
184
+ key: str,
185
+ value: T,
186
+ ) -> None:
187
+ """Asynchronous method for adding key to collection.
188
+
189
+ Args:
190
+ key: Key name. Type `str`.
191
+ value: Value of key. Type `BaseModel`.
192
+
193
+ Returns:
194
+ None.
195
+ """
196
+ # The path to cell of collection.
197
+ leaf_path: Path = await self._get_leaf_path(key)
198
+ value_json: str = value.model_dump_json()
199
+ # Write key-value to collection.
200
+ if await leaf_path.exists():
201
+ # Add new key.
202
+ data_json: bytes = await leaf_path.read_bytes()
203
+ data: dict = orjson.loads(data_json) or {}
204
+ try:
205
+ data[key]
206
+ except KeyError:
207
+ data[key] = value_json
208
+ await leaf_path.write_bytes(orjson.dumps(data))
209
+ else:
210
+ err = KeyAlreadyExistsError()
211
+ logger.error(err.message)
212
+ raise err
213
+ else:
214
+ # Add new key to a blank leaf.
215
+ await leaf_path.write_bytes(orjson.dumps({key: value_json}))
216
+ await self._counter_documents(1)
217
+
218
+ async def update_key(
219
+ self,
220
+ key: str,
221
+ value: T,
222
+ ) -> None:
223
+ """Asynchronous method for updating key to collection.
224
+
225
+ Args:
226
+ key: Key name. Type `str`.
227
+ value: Value of key. Type `BaseModel`.
228
+
229
+ Returns:
230
+ None.
231
+ """
232
+ # The path to cell of collection.
233
+ leaf_path: Path = await self._get_leaf_path(key)
234
+ value_json: str = value.model_dump_json()
235
+ # Update the existing key.
236
+ if await leaf_path.exists():
237
+ # Update the existing key.
238
+ data_json: bytes = await leaf_path.read_bytes()
239
+ data: dict = orjson.loads(data_json) or {}
240
+ try:
241
+ data[key]
242
+ data[key] = value_json
243
+ await leaf_path.write_bytes(orjson.dumps(data))
244
+ except KeyError:
245
+ err = KeyNotExistsError()
246
+ logger.error(err.message)
247
+ raise err from None
248
+ else:
249
+ logger.error("The key not exists.")
250
+ raise KeyError()
251
+
252
+ async def get_key(self, key: str) -> T:
253
+ """Asynchronous method for getting value of key from collection.
254
+
255
+ Args:
256
+ key: Key name.
257
+
258
+ Returns:
259
+ Value of key or KeyError.
260
+ """
261
+ # The path to the database cell.
262
+ leaf_path: Path = await self._get_leaf_path(key)
263
+ # Get value of key.
264
+ if await leaf_path.exists():
265
+ data_json: bytes = await leaf_path.read_bytes()
266
+ data: dict = orjson.loads(data_json) or {}
267
+ obj: T = self.__class_model.model_validate_json(data[key])
268
+ return obj
269
+ msg: str = "`get_key` - The unacceptable key value."
270
+ logger.error(msg)
271
+ raise KeyError()
272
+
273
+ async def has_key(self, key: str) -> bool:
274
+ """Asynchronous method for checking presence of key in collection.
275
+
276
+ Args:
277
+ key: Key name.
278
+
279
+ Returns:
280
+ True, if the key is present.
281
+ """
282
+ # Get path to cell of collection.
283
+ leaf_path: Path = await self._get_leaf_path(key)
284
+ # Checking whether there is a key.
285
+ if await leaf_path.exists():
286
+ data_json: bytes = await leaf_path.read_bytes()
287
+ data: dict = orjson.loads(data_json) or {}
288
+ try:
289
+ data[key]
290
+ return True
291
+ except KeyError:
292
+ return False
293
+ return False
294
+
295
+ async def delete_key(self, key: str) -> None:
296
+ """Asynchronous method for deleting key from collection.
297
+
298
+ Args:
299
+ key: Key name.
300
+
301
+ Returns:
302
+ None.
303
+ """
304
+ # The path to the database cell.
305
+ leaf_path: Path = await self._get_leaf_path(key)
306
+ # Deleting key.
307
+ if await leaf_path.exists():
308
+ data_json: bytes = await leaf_path.read_bytes()
309
+ data: dict = orjson.loads(data_json) or {}
310
+ del data[key]
311
+ await leaf_path.write_bytes(orjson.dumps(data))
312
+ await self._counter_documents(-1)
313
+ return
314
+ msg: str = "`delete_key` - The unacceptable key value."
315
+ logger.error(msg)
316
+ raise KeyError()
317
+
318
+ @staticmethod
319
+ async def napalm() -> None:
320
+ """Asynchronous method for full database deletion.
321
+
322
+ The main purpose is tests.
323
+
324
+ Warning:
325
+ - `Be careful, this will remove all keys.`
326
+
327
+ Returns:
328
+ None.
329
+ """
330
+ with contextlib.suppress(FileNotFoundError):
331
+ await to_thread.run_sync(rmtree, constants.DB_ROOT)
332
+ return
333
+
334
+ @staticmethod
335
+ def _task_find(
336
+ branch_number: int,
337
+ filter_fn: Callable,
338
+ hash_reduce_left: str,
339
+ db_root: str,
340
+ class_model: T,
341
+ ) -> list[T] | None:
342
+ """Task for find documents.
343
+
344
+ This method is for internal use.
345
+
346
+ Returns:
347
+ List of documents or None.
348
+ """
349
+ branch_number_as_hash: str = f"{branch_number:08x}"[hash_reduce_left:]
350
+ separated_hash: str = "/".join(list(branch_number_as_hash))
351
+ leaf_path: SyncPath = SyncPath(
352
+ *(
353
+ db_root,
354
+ class_model.__name__,
355
+ separated_hash,
356
+ "leaf.json",
357
+ ),
358
+ )
359
+ docs: list[T] = []
360
+ if leaf_path.exists():
361
+ data_json: bytes = leaf_path.read_bytes()
362
+ data: dict[str, str] = orjson.loads(data_json) or {}
363
+ for _, val in data.items():
364
+ doc = class_model.model_validate_json(val)
365
+ if filter_fn(doc):
366
+ docs.append(doc)
367
+ return docs or None
368
+
369
+ def find_one(
370
+ self,
371
+ filter_fn: Callable,
372
+ max_workers: int | None = None,
373
+ timeout: float | None = None,
374
+ ) -> T | None:
375
+ """Finds a single document matching the filter.
376
+
377
+ The search is based on the effect of a quantum loop.
378
+ The search effectiveness depends on the number of processor threads.
379
+ Ideally, hundreds and even thousands of threads are required.
380
+
381
+ Args:
382
+ filter_fn: A function that execute the conditions of filtering.
383
+ max_workers: The maximum number of processes that can be used to
384
+ execute the given calls. If None or not given then as many
385
+ worker processes will be created as the machine has processors.
386
+ timeout: The number of seconds to wait for the result if the future isn't done.
387
+ If None, then there is no limit on the wait time.
388
+
389
+ Returns:
390
+ Document or None.
391
+ """
392
+ branch_numbers: range = range(1, self.__max_branch_number)
393
+ search_task_fn: Callable = self._task_find
394
+ hash_reduce_left: int = self.__hash_reduce_left
395
+ db_root: str = self.__db_root
396
+ class_model: T = self.__class_model
397
+ with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
398
+ for branch_number in branch_numbers:
399
+ future = executor.submit(
400
+ search_task_fn,
401
+ branch_number,
402
+ filter_fn,
403
+ hash_reduce_left,
404
+ db_root,
405
+ class_model,
406
+ )
407
+ docs = future.result(timeout)
408
+ if docs is not None:
409
+ return docs[0]
410
+ return None
411
+
412
+ def find_many(
413
+ self,
414
+ filter_fn: Callable,
415
+ limit_docs: int = 1000,
416
+ max_workers: int | None = None,
417
+ timeout: float | None = None,
418
+ ) -> list[T] | None:
419
+ """Finds one or more documents matching the filter.
420
+
421
+ The search is based on the effect of a quantum loop.
422
+ The search effectiveness depends on the number of processor threads.
423
+ Ideally, hundreds and even thousands of threads are required.
424
+
425
+ Args:
426
+ filter_fn: A function that execute the conditions of filtering.
427
+ limit_docs: Limiting the number of documents. By default = 1000.
428
+ max_workers: The maximum number of processes that can be used to
429
+ execute the given calls. If None or not given then as many
430
+ worker processes will be created as the machine has processors.
431
+ timeout: The number of seconds to wait for the result if the future isn't done.
432
+ If None, then there is no limit on the wait time.
433
+
434
+ Returns:
435
+ List of documents or None.
436
+ """
437
+ branch_numbers: range = range(1, self.__max_branch_number)
438
+ search_task_fn: Callable = self._task_find
439
+ hash_reduce_left: int = self.__hash_reduce_left
440
+ db_root: str = self.__db_root
441
+ class_model: T = self.__class_model
442
+ counter: int = 0
443
+ result: list[T] = []
444
+ with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
445
+ for branch_number in branch_numbers:
446
+ if counter >= limit_docs:
447
+ return result[:limit_docs]
448
+ future = executor.submit(
449
+ search_task_fn,
450
+ branch_number,
451
+ filter_fn,
452
+ hash_reduce_left,
453
+ db_root,
454
+ class_model,
455
+ )
456
+ docs = future.result(timeout)
457
+ if docs is not None:
458
+ for doc in docs:
459
+ if counter >= limit_docs:
460
+ return result[:limit_docs]
461
+ result.append(doc)
462
+ counter += 1
463
+ return result or None
464
+
465
+ def collection_name(self) -> str:
466
+ """Get collection name.
467
+
468
+ Returns:
469
+ Collection name.
470
+ """
471
+ return self.__class_model.__name__
472
+
473
+ def collection_full_name(self) -> str:
474
+ """Get full name of collection.
475
+
476
+ Returns:
477
+ Full name of collection.
478
+ """
479
+ return f"{self.__db_root}/{self.__class_model.__name__}"
480
+
481
+ async def estimated_document_count(self) -> int:
482
+ """Get an estimate of the number of documents in this collection using collection metadata.
483
+
484
+ Returns:
485
+ The number of documents.
486
+ """
487
+ meta = await self._get_meta()
488
+ return meta.counter_documents
489
+
490
+ def count_documents(
491
+ self,
492
+ filter_fn: Callable,
493
+ max_workers: int | None = None,
494
+ timeout: float | None = None,
495
+ ) -> int:
496
+ """Count the number of documents a matching the filter in this collection.
497
+
498
+ The search is based on the effect of a quantum loop.
499
+ The search effectiveness depends on the number of processor threads.
500
+ Ideally, hundreds and even thousands of threads are required.
501
+
502
+ Args:
503
+ filter_fn: A function that execute the conditions of filtering.
504
+ max_workers: The maximum number of processes that can be used to
505
+ execute the given calls. If None or not given then as many
506
+ worker processes will be created as the machine has processors.
507
+ timeout: The number of seconds to wait for the result if the future isn't done.
508
+ If None, then there is no limit on the wait time.
509
+
510
+ Returns:
511
+ The number of documents.
512
+ """
513
+ branch_numbers: range = range(1, self.__max_branch_number)
514
+ search_task_fn: Callable = self._task_find
515
+ hash_reduce_left: int = self.__hash_reduce_left
516
+ db_root: str = self.__db_root
517
+ class_model: T = self.__class_model
518
+ counter: int = 0
519
+ with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
520
+ for branch_number in branch_numbers:
521
+ future = executor.submit(
522
+ search_task_fn,
523
+ branch_number,
524
+ filter_fn,
525
+ hash_reduce_left,
526
+ db_root,
527
+ class_model,
528
+ )
529
+ if future.result(timeout) is not None:
530
+ counter += 1
531
+ return counter
532
+
533
+ @staticmethod
534
+ def _task_delete(
535
+ branch_number: int,
536
+ filter_fn: Callable,
537
+ hash_reduce_left: int,
538
+ db_root: str,
539
+ class_model: T,
540
+ ) -> int:
541
+ """Task for find and delete documents.
542
+
543
+ This method is for internal use.
544
+
545
+ Returns:
546
+ The number of deleted documents.
547
+ """
548
+ branch_number_as_hash: str = f"{branch_number:08x}"[hash_reduce_left:]
549
+ separated_hash: str = "/".join(list(branch_number_as_hash))
550
+ leaf_path: SyncPath = SyncPath(
551
+ *(
552
+ db_root,
553
+ class_model.__name__,
554
+ separated_hash,
555
+ "leaf.json",
556
+ ),
557
+ )
558
+ counter: int = 0
559
+ if leaf_path.exists():
560
+ data_json: bytes = leaf_path.read_bytes()
561
+ data: dict[str, str] = orjson.loads(data_json) or {}
562
+ new_state: dict[str, str] = {}
563
+ for key, val in data.items():
564
+ doc = class_model.model_validate_json(val)
565
+ if filter_fn(doc):
566
+ counter -= 1
567
+ else:
568
+ new_state[key] = val
569
+ leaf_path.write_bytes(orjson.dumps(new_state))
570
+ return counter
571
+
572
+ def delete_many(
573
+ self,
574
+ filter_fn: Callable,
575
+ max_workers: int | None = None,
576
+ timeout: float | None = None,
577
+ ) -> int:
578
+ """Delete one or more documents matching the filter.
579
+
580
+ The search is based on the effect of a quantum loop.
581
+ The search effectiveness depends on the number of processor threads.
582
+ Ideally, hundreds and even thousands of threads are required.
583
+
584
+ Args:
585
+ filter_fn: A function that execute the conditions of filtering.
586
+ max_workers: The maximum number of processes that can be used to
587
+ execute the given calls. If None or not given then as many
588
+ worker processes will be created as the machine has processors.
589
+ timeout: The number of seconds to wait for the result if the future isn't done.
590
+ If None, then there is no limit on the wait time.
591
+
592
+ Returns:
593
+ The number of deleted documents.
594
+ """
595
+ branch_numbers: range = range(1, self.__max_branch_number)
596
+ search_task_fn: Callable = self._task_delete
597
+ hash_reduce_left: int = self.__hash_reduce_left
598
+ db_root: str = self.__db_root
599
+ class_model: T = self.__class_model
600
+ counter: int = 0
601
+ with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
602
+ for branch_number in branch_numbers:
603
+ future = executor.submit(
604
+ search_task_fn,
605
+ branch_number,
606
+ filter_fn,
607
+ hash_reduce_left,
608
+ db_root,
609
+ class_model,
610
+ )
611
+ counter += future.result(timeout)
612
+ if counter < 0:
613
+ self._sync_counter_documents(counter)
614
+ return abs(counter)
615
+
616
+ @staticmethod
617
+ def _task_get_docs(
618
+ branch_number: int,
619
+ hash_reduce_left: int,
620
+ db_root: str,
621
+ class_model: T,
622
+ ) -> list[Any]:
623
+ """Get documents for custom task.
624
+
625
+ This method is for internal use.
626
+
627
+ Returns:
628
+ List of documents.
629
+ """
630
+ branch_number_as_hash: str = f"{branch_number:08x}"[hash_reduce_left:]
631
+ separated_hash: str = "/".join(list(branch_number_as_hash))
632
+ leaf_path: SyncPath = SyncPath(
633
+ *(
634
+ db_root,
635
+ class_model.__name__,
636
+ separated_hash,
637
+ "leaf.json",
638
+ ),
639
+ )
640
+ docs: list[str, T] = []
641
+ if leaf_path.exists():
642
+ data_json: bytes = leaf_path.read_bytes()
643
+ data: dict[str, str] = orjson.loads(data_json) or {}
644
+ for _, val in data.items():
645
+ docs.append(class_model.model_validate_json(val))
646
+ return docs
647
+
648
+ def run_custom_task(self, custom_task_fn: Callable, limit_docs: int = 1000) -> Any:
649
+ """Running custom task.
650
+
651
+ This method running a task created on the basis of a quantum loop.
652
+ Effectiveness running task depends on the number of processor threads.
653
+ Ideally, hundreds and even thousands of threads are required.
654
+
655
+ Args:
656
+ custom_task_fn: A function that execute the custom task.
657
+ limit_docs: Limiting the number of documents. By default = 1000.
658
+
659
+ Returns:
660
+ The result of a custom task.
661
+ """
662
+ kwargs = {
663
+ "get_docs_fn": self._task_get_docs,
664
+ "branch_numbers": range(1, self.__max_branch_number),
665
+ "hash_reduce_left": self.__hash_reduce_left,
666
+ "db_root": self.__db_root,
667
+ "class_model": self.__class_model,
668
+ "limit_docs": limit_docs,
669
+ }
670
+ return custom_task_fn(**kwargs)
671
+
672
+ @staticmethod
673
+ def _task_update(
674
+ branch_number: int,
675
+ filter_fn: Callable,
676
+ hash_reduce_left: str,
677
+ db_root: str,
678
+ class_model: T,
679
+ new_data: dict[str, Any],
680
+ ) -> int:
681
+ """Task for find documents.
682
+
683
+ This method is for internal use.
684
+
685
+ Returns:
686
+ The number of updated documents.
687
+ """
688
+ branch_number_as_hash: str = f"{branch_number:08x}"[hash_reduce_left:]
689
+ separated_hash: str = "/".join(list(branch_number_as_hash))
690
+ leaf_path: SyncPath = SyncPath(
691
+ *(
692
+ db_root,
693
+ class_model.__name__,
694
+ separated_hash,
695
+ "leaf.json",
696
+ ),
697
+ )
698
+ counter: int = 0
699
+ if leaf_path.exists():
700
+ data_json: bytes = leaf_path.read_bytes()
701
+ data: dict[str, str] = orjson.loads(data_json) or {}
702
+ new_state: dict[str, str] = {}
703
+ for _, val in data.items():
704
+ doc = class_model.model_validate_json(val)
705
+ if filter_fn(doc):
706
+ for key, value in new_data.items():
707
+ doc.__dict__[key] = value
708
+ new_state[key] = doc.model_dump_json()
709
+ counter += 1
710
+ leaf_path.write_bytes(orjson.dumps(new_state))
711
+ return counter
712
+
713
+ def update_many(
714
+ self,
715
+ filter_fn: Callable,
716
+ new_data: dict[str, Any],
717
+ max_workers: int | None = None,
718
+ timeout: float | None = None,
719
+ ) -> int:
720
+ """Updates one or more documents matching the filter.
721
+
722
+ The search is based on the effect of a quantum loop.
723
+ The search effectiveness depends on the number of processor threads.
724
+ Ideally, hundreds and even thousands of threads are required.
725
+
726
+ Args:
727
+ filter_fn: A function that execute the conditions of filtering.
728
+ new_data: New data for the fields that need to be updated.
729
+ max_workers: The maximum number of processes that can be used to
730
+ execute the given calls. If None or not given then as many
731
+ worker processes will be created as the machine has processors.
732
+ timeout: The number of seconds to wait for the result if the future isn't done.
733
+ If None, then there is no limit on the wait time.
734
+
735
+ Returns:
736
+ The number of updated documents.
737
+ """
738
+ branch_numbers: range = range(1, self.__max_branch_number)
739
+ update_task_fn: Callable = self._task_update
740
+ hash_reduce_left: int = self.__hash_reduce_left
741
+ db_root: str = self.__db_root
742
+ class_model: T = self.__class_model
743
+ counter: int = 0
744
+ with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
745
+ for branch_number in branch_numbers:
746
+ future = executor.submit(
747
+ update_task_fn,
748
+ branch_number,
749
+ filter_fn,
750
+ hash_reduce_left,
751
+ db_root,
752
+ class_model,
753
+ new_data,
754
+ )
755
+ counter += future.result(timeout)
756
+ return counter