scruby 0.17.0__py3-none-any.whl → 0.27.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
scruby/db.py CHANGED
@@ -1,97 +1,115 @@
1
+ # Scruby - Asynchronous library for building and managing a hybrid database, by scheme of key-value.
2
+ # Copyright (c) 2025 Gennady Kostyunin
3
+ # SPDX-License-Identifier: MIT
4
+ #
1
5
  """Creation and management of the database."""
2
6
 
3
7
  from __future__ import annotations
4
8
 
5
9
  __all__ = ("Scruby",)
6
10
 
7
- import concurrent.futures
8
11
  import contextlib
9
12
  import logging
13
+ import re
10
14
  import zlib
11
- from collections.abc import Callable
12
- from pathlib import Path as SyncPath
13
15
  from shutil import rmtree
14
- from typing import Any, Literal, Never, TypeVar, assert_never
16
+ from typing import Any, Literal, Never, assert_never
15
17
 
16
- import orjson
17
- from anyio import Path, to_thread
18
+ from anyio import Path
18
19
  from pydantic import BaseModel
19
20
 
20
- from scruby import constants
21
- from scruby.errors import (
22
- KeyAlreadyExistsError,
23
- KeyNotExistsError,
24
- )
25
-
26
- logger = logging.getLogger(__name__)
27
-
28
- T = TypeVar("T")
21
+ from scruby import mixins, settings
29
22
 
30
23
 
31
24
  class _Meta(BaseModel):
32
25
  """Metadata of Collection."""
33
26
 
27
+ db_root: str
28
+ collection_name: str
29
+ hash_reduce_left: int
30
+ max_branch_number: int
34
31
  counter_documents: int
35
32
 
36
33
 
37
- class Scruby[T]:
38
- """Creation and management of database.
39
-
40
- Args:
41
- class_model: Class of Model (Pydantic).
42
- """
34
+ class Scruby(
35
+ mixins.Keys,
36
+ mixins.Find,
37
+ mixins.CustomTask,
38
+ mixins.Collection,
39
+ mixins.Count,
40
+ mixins.Delete,
41
+ mixins.Update,
42
+ ):
43
+ """Creation and management of database."""
43
44
 
44
45
  def __init__( # noqa: D107
45
46
  self,
46
- class_model: T,
47
47
  ) -> None:
48
- self.__meta = _Meta
49
- self.__class_model = class_model
50
- self.__db_root = constants.DB_ROOT
51
- self.__hash_reduce_left = constants.HASH_REDUCE_LEFT
48
+ super().__init__()
49
+ self._meta = _Meta
50
+ self._db_root = settings.DB_ROOT
51
+ self._hash_reduce_left = settings.HASH_REDUCE_LEFT
52
+ self._max_workers = settings.MAX_WORKERS
52
53
  # The maximum number of branches.
53
- match self.__hash_reduce_left:
54
+ match self._hash_reduce_left:
54
55
  case 0:
55
- self.__max_branch_number = 4294967296
56
+ self._max_branch_number = 4294967296
56
57
  case 2:
57
- self.__max_branch_number = 16777216
58
+ self._max_branch_number = 16777216
58
59
  case 4:
59
- self.__max_branch_number = 65536
60
+ self._max_branch_number = 65536
60
61
  case 6:
61
- self.__max_branch_number = 256
62
+ self._max_branch_number = 256
62
63
  case _ as unreachable:
63
64
  msg: str = f"{unreachable} - Unacceptable value for HASH_REDUCE_LEFT."
64
- logger.critical(msg)
65
- assert_never(Never(unreachable))
66
- # Caching a pati for metadata in the form of a tuple.
65
+ logging.critical(msg)
66
+ assert_never(Never(unreachable)) # pyrefly: ignore[not-callable]
67
+
68
+ @classmethod
69
+ async def collection(cls, class_model: Any) -> Any:
70
+ """Get an object to access a collection.
71
+
72
+ Args:
73
+ class_model: Class of Model (pydantic.BaseModel).
74
+
75
+ Returns:
76
+ Instance of Scruby for access a collection.
77
+ """
78
+ assert BaseModel in class_model.__bases__, "`class_model` does not contain the base class `pydantic.BaseModel`!"
79
+
80
+ instance = cls()
81
+ instance.__dict__["_class_model"] = class_model
82
+ # Caching a pati for metadata.
67
83
  # The zero branch is reserved for metadata.
68
84
  branch_number: int = 0
69
- branch_number_as_hash: str = f"{branch_number:08x}"[constants.HASH_REDUCE_LEFT :]
85
+ branch_number_as_hash: str = f"{branch_number:08x}"[settings.HASH_REDUCE_LEFT :]
70
86
  separated_hash: str = "/".join(list(branch_number_as_hash))
71
- self.__meta_path_tuple = (
72
- constants.DB_ROOT,
87
+ meta_dir_path_tuple = (
88
+ settings.DB_ROOT,
73
89
  class_model.__name__,
74
90
  separated_hash,
75
- "meta.json",
76
91
  )
77
- # Create metadata for collection, if required.
78
- branch_path = SyncPath(
79
- *(
80
- self.__db_root,
81
- self.__class_model.__name__,
82
- separated_hash,
83
- ),
92
+ instance.__dict__["_meta_path"] = Path(
93
+ *meta_dir_path_tuple,
94
+ "meta.json",
84
95
  )
85
- if not branch_path.exists():
86
- branch_path.mkdir(parents=True)
96
+ # Create metadata for collection, if missing.
97
+ branch_path = Path(*meta_dir_path_tuple)
98
+ if not await branch_path.exists():
99
+ await branch_path.mkdir(parents=True)
87
100
  meta = _Meta(
101
+ db_root=settings.DB_ROOT,
102
+ collection_name=class_model.__name__,
103
+ hash_reduce_left=settings.HASH_REDUCE_LEFT,
104
+ max_branch_number=instance.__dict__["_max_branch_number"],
88
105
  counter_documents=0,
89
106
  )
90
107
  meta_json = meta.model_dump_json()
91
- meta_path = SyncPath(*(branch_path, "meta.json"))
92
- meta_path.write_text(meta_json, "utf-8")
108
+ meta_path = Path(*(branch_path, "meta.json"))
109
+ await meta_path.write_text(meta_json, "utf-8")
110
+ return instance
93
111
 
94
- async def _get_meta(self) -> _Meta:
112
+ async def get_meta(self) -> _Meta:
95
113
  """Asynchronous method for getting metadata of collection.
96
114
 
97
115
  This method is for internal use.
@@ -99,9 +117,8 @@ class Scruby[T]:
99
117
  Returns:
100
118
  Metadata object.
101
119
  """
102
- meta_path = Path(*self.__meta_path_tuple)
103
- meta_json = await meta_path.read_text()
104
- meta: _Meta = self.__meta.model_validate_json(meta_json)
120
+ meta_json = await self._meta_path.read_text()
121
+ meta: _Meta = self._meta.model_validate_json(meta_json)
105
122
  return meta
106
123
 
107
124
  async def _set_meta(self, meta: _Meta) -> None:
@@ -109,66 +126,66 @@ class Scruby[T]:
109
126
 
110
127
  This method is for internal use.
111
128
 
129
+ Args:
130
+ meta (_Meta): Metadata of Collection.
131
+
112
132
  Returns:
113
133
  None.
114
134
  """
115
135
  meta_json = meta.model_dump_json()
116
- meta_path = Path(*self.__meta_path_tuple)
117
- await meta_path.write_text(meta_json, "utf-8")
136
+ await self._meta_path.write_text(meta_json, "utf-8")
118
137
 
119
138
  async def _counter_documents(self, step: Literal[1, -1]) -> None:
120
139
  """Asynchronous method for management of documents in metadata of collection.
121
140
 
122
141
  This method is for internal use.
123
142
 
143
+ Args:
144
+ step (Literal[1, -1]): Number of documents added or removed.
145
+
124
146
  Returns:
125
147
  None.
126
148
  """
127
- meta_path = Path(*self.__meta_path_tuple)
149
+ meta_path = self._meta_path
128
150
  meta_json = await meta_path.read_text("utf-8")
129
- meta: _Meta = self.__meta.model_validate_json(meta_json)
151
+ meta: _Meta = self._meta.model_validate_json(meta_json)
130
152
  meta.counter_documents += step
131
153
  meta_json = meta.model_dump_json()
132
154
  await meta_path.write_text(meta_json, "utf-8")
133
155
 
134
- def _sync_counter_documents(self, number: int) -> None:
135
- """Management of documents in metadata of collection.
136
-
137
- This method is for internal use.
138
- """
139
- meta_path = SyncPath(*self.__meta_path_tuple)
140
- meta_json = meta_path.read_text("utf-8")
141
- meta: _Meta = self.__meta.model_validate_json(meta_json)
142
- meta.counter_documents += number
143
- meta_json = meta.model_dump_json()
144
- meta_path.write_text(meta_json, "utf-8")
145
-
146
- async def _get_leaf_path(self, key: str) -> Path:
156
+ async def _get_leaf_path(self, key: str) -> tuple[Path, str]:
147
157
  """Asynchronous method for getting path to collection cell by key.
148
158
 
149
159
  This method is for internal use.
150
160
 
151
161
  Args:
152
- key: Key name.
162
+ key (str): Key name.
153
163
 
154
164
  Returns:
155
165
  Path to cell of collection.
156
166
  """
157
167
  if not isinstance(key, str):
158
- logger.error("The key is not a type of `str`.")
159
- raise KeyError("The key is not a type of `str`.")
160
- if len(key) == 0:
161
- logger.error("The key should not be empty.")
162
- raise KeyError("The key should not be empty.")
168
+ msg = "The key is not a string."
169
+ logging.error(msg)
170
+ raise KeyError(msg)
171
+ # Prepare key.
172
+ # Removes spaces at the beginning and end of a string.
173
+ # Replaces all whitespace characters with a single space.
174
+ prepared_key = re.sub(r"\s+", " ", key).strip().lower()
175
+ # Check the key for an empty string.
176
+ if len(prepared_key) == 0:
177
+ msg = "The key should not be empty."
178
+ logging.error(msg)
179
+ raise KeyError(msg)
163
180
  # Key to crc32 sum.
164
- key_as_hash: str = f"{zlib.crc32(key.encode('utf-8')):08x}"[self.__hash_reduce_left :]
181
+ key_as_hash: str = f"{zlib.crc32(prepared_key.encode('utf-8')):08x}"[self._hash_reduce_left :]
165
182
  # Convert crc32 sum in the segment of path.
166
183
  separated_hash: str = "/".join(list(key_as_hash))
167
184
  # The path of the branch to the database.
168
185
  branch_path: Path = Path(
169
186
  *(
170
- self.__db_root,
171
- self.__class_model.__name__,
187
+ self._db_root,
188
+ self._class_model.__name__,
172
189
  separated_hash,
173
190
  ),
174
191
  )
@@ -177,147 +194,11 @@ class Scruby[T]:
177
194
  await branch_path.mkdir(parents=True)
178
195
  # The path to the database cell.
179
196
  leaf_path: Path = Path(*(branch_path, "leaf.json"))
180
- return leaf_path
181
-
182
- async def add_key(
183
- self,
184
- key: str,
185
- value: T,
186
- ) -> None:
187
- """Asynchronous method for adding key to collection.
188
-
189
- Args:
190
- key: Key name. Type `str`.
191
- value: Value of key. Type `BaseModel`.
192
-
193
- Returns:
194
- None.
195
- """
196
- # The path to cell of collection.
197
- leaf_path: Path = await self._get_leaf_path(key)
198
- value_json: str = value.model_dump_json()
199
- # Write key-value to collection.
200
- if await leaf_path.exists():
201
- # Add new key.
202
- data_json: bytes = await leaf_path.read_bytes()
203
- data: dict = orjson.loads(data_json) or {}
204
- try:
205
- data[key]
206
- except KeyError:
207
- data[key] = value_json
208
- await leaf_path.write_bytes(orjson.dumps(data))
209
- else:
210
- err = KeyAlreadyExistsError()
211
- logger.error(err.message)
212
- raise err
213
- else:
214
- # Add new key to a blank leaf.
215
- await leaf_path.write_bytes(orjson.dumps({key: value_json}))
216
- await self._counter_documents(1)
217
-
218
- async def update_key(
219
- self,
220
- key: str,
221
- value: T,
222
- ) -> None:
223
- """Asynchronous method for updating key to collection.
224
-
225
- Args:
226
- key: Key name. Type `str`.
227
- value: Value of key. Type `BaseModel`.
228
-
229
- Returns:
230
- None.
231
- """
232
- # The path to cell of collection.
233
- leaf_path: Path = await self._get_leaf_path(key)
234
- value_json: str = value.model_dump_json()
235
- # Update the existing key.
236
- if await leaf_path.exists():
237
- # Update the existing key.
238
- data_json: bytes = await leaf_path.read_bytes()
239
- data: dict = orjson.loads(data_json) or {}
240
- try:
241
- data[key]
242
- data[key] = value_json
243
- await leaf_path.write_bytes(orjson.dumps(data))
244
- except KeyError:
245
- err = KeyNotExistsError()
246
- logger.error(err.message)
247
- raise err from None
248
- else:
249
- logger.error("The key not exists.")
250
- raise KeyError()
251
-
252
- async def get_key(self, key: str) -> T:
253
- """Asynchronous method for getting value of key from collection.
254
-
255
- Args:
256
- key: Key name.
257
-
258
- Returns:
259
- Value of key or KeyError.
260
- """
261
- # The path to the database cell.
262
- leaf_path: Path = await self._get_leaf_path(key)
263
- # Get value of key.
264
- if await leaf_path.exists():
265
- data_json: bytes = await leaf_path.read_bytes()
266
- data: dict = orjson.loads(data_json) or {}
267
- obj: T = self.__class_model.model_validate_json(data[key])
268
- return obj
269
- msg: str = "`get_key` - The unacceptable key value."
270
- logger.error(msg)
271
- raise KeyError()
272
-
273
- async def has_key(self, key: str) -> bool:
274
- """Asynchronous method for checking presence of key in collection.
275
-
276
- Args:
277
- key: Key name.
278
-
279
- Returns:
280
- True, if the key is present.
281
- """
282
- # Get path to cell of collection.
283
- leaf_path: Path = await self._get_leaf_path(key)
284
- # Checking whether there is a key.
285
- if await leaf_path.exists():
286
- data_json: bytes = await leaf_path.read_bytes()
287
- data: dict = orjson.loads(data_json) or {}
288
- try:
289
- data[key]
290
- return True
291
- except KeyError:
292
- return False
293
- return False
294
-
295
- async def delete_key(self, key: str) -> None:
296
- """Asynchronous method for deleting key from collection.
297
-
298
- Args:
299
- key: Key name.
300
-
301
- Returns:
302
- None.
303
- """
304
- # The path to the database cell.
305
- leaf_path: Path = await self._get_leaf_path(key)
306
- # Deleting key.
307
- if await leaf_path.exists():
308
- data_json: bytes = await leaf_path.read_bytes()
309
- data: dict = orjson.loads(data_json) or {}
310
- del data[key]
311
- await leaf_path.write_bytes(orjson.dumps(data))
312
- await self._counter_documents(-1)
313
- return
314
- msg: str = "`delete_key` - The unacceptable key value."
315
- logger.error(msg)
316
- raise KeyError()
197
+ return (leaf_path, prepared_key)
317
198
 
318
199
  @staticmethod
319
- async def napalm() -> None:
320
- """Asynchronous method for full database deletion.
200
+ def napalm() -> None:
201
+ """Method for full database deletion.
321
202
 
322
203
  The main purpose is tests.
323
204
 
@@ -328,429 +209,5 @@ class Scruby[T]:
328
209
  None.
329
210
  """
330
211
  with contextlib.suppress(FileNotFoundError):
331
- await to_thread.run_sync(rmtree, constants.DB_ROOT)
212
+ rmtree(settings.DB_ROOT)
332
213
  return
333
-
334
- @staticmethod
335
- def _task_find(
336
- branch_number: int,
337
- filter_fn: Callable,
338
- hash_reduce_left: str,
339
- db_root: str,
340
- class_model: T,
341
- ) -> list[T] | None:
342
- """Task for find documents.
343
-
344
- This method is for internal use.
345
-
346
- Returns:
347
- List of documents or None.
348
- """
349
- branch_number_as_hash: str = f"{branch_number:08x}"[hash_reduce_left:]
350
- separated_hash: str = "/".join(list(branch_number_as_hash))
351
- leaf_path: SyncPath = SyncPath(
352
- *(
353
- db_root,
354
- class_model.__name__,
355
- separated_hash,
356
- "leaf.json",
357
- ),
358
- )
359
- docs: list[T] = []
360
- if leaf_path.exists():
361
- data_json: bytes = leaf_path.read_bytes()
362
- data: dict[str, str] = orjson.loads(data_json) or {}
363
- for _, val in data.items():
364
- doc = class_model.model_validate_json(val)
365
- if filter_fn(doc):
366
- docs.append(doc)
367
- return docs or None
368
-
369
- def find_one(
370
- self,
371
- filter_fn: Callable,
372
- max_workers: int | None = None,
373
- timeout: float | None = None,
374
- ) -> T | None:
375
- """Finds a single document matching the filter.
376
-
377
- The search is based on the effect of a quantum loop.
378
- The search effectiveness depends on the number of processor threads.
379
- Ideally, hundreds and even thousands of threads are required.
380
-
381
- Args:
382
- filter_fn: A function that execute the conditions of filtering.
383
- max_workers: The maximum number of processes that can be used to
384
- execute the given calls. If None or not given then as many
385
- worker processes will be created as the machine has processors.
386
- timeout: The number of seconds to wait for the result if the future isn't done.
387
- If None, then there is no limit on the wait time.
388
-
389
- Returns:
390
- Document or None.
391
- """
392
- branch_numbers: range = range(1, self.__max_branch_number)
393
- search_task_fn: Callable = self._task_find
394
- hash_reduce_left: int = self.__hash_reduce_left
395
- db_root: str = self.__db_root
396
- class_model: T = self.__class_model
397
- with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
398
- for branch_number in branch_numbers:
399
- future = executor.submit(
400
- search_task_fn,
401
- branch_number,
402
- filter_fn,
403
- hash_reduce_left,
404
- db_root,
405
- class_model,
406
- )
407
- docs = future.result(timeout)
408
- if docs is not None:
409
- return docs[0]
410
- return None
411
-
412
- def find_many(
413
- self,
414
- filter_fn: Callable,
415
- limit_docs: int = 1000,
416
- max_workers: int | None = None,
417
- timeout: float | None = None,
418
- ) -> list[T] | None:
419
- """Finds one or more documents matching the filter.
420
-
421
- The search is based on the effect of a quantum loop.
422
- The search effectiveness depends on the number of processor threads.
423
- Ideally, hundreds and even thousands of threads are required.
424
-
425
- Args:
426
- filter_fn: A function that execute the conditions of filtering.
427
- limit_docs: Limiting the number of documents. By default = 1000.
428
- max_workers: The maximum number of processes that can be used to
429
- execute the given calls. If None or not given then as many
430
- worker processes will be created as the machine has processors.
431
- timeout: The number of seconds to wait for the result if the future isn't done.
432
- If None, then there is no limit on the wait time.
433
-
434
- Returns:
435
- List of documents or None.
436
- """
437
- branch_numbers: range = range(1, self.__max_branch_number)
438
- search_task_fn: Callable = self._task_find
439
- hash_reduce_left: int = self.__hash_reduce_left
440
- db_root: str = self.__db_root
441
- class_model: T = self.__class_model
442
- counter: int = 0
443
- result: list[T] = []
444
- with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
445
- for branch_number in branch_numbers:
446
- if counter >= limit_docs:
447
- return result[:limit_docs]
448
- future = executor.submit(
449
- search_task_fn,
450
- branch_number,
451
- filter_fn,
452
- hash_reduce_left,
453
- db_root,
454
- class_model,
455
- )
456
- docs = future.result(timeout)
457
- if docs is not None:
458
- for doc in docs:
459
- if counter >= limit_docs:
460
- return result[:limit_docs]
461
- result.append(doc)
462
- counter += 1
463
- return result or None
464
-
465
- def collection_name(self) -> str:
466
- """Get collection name.
467
-
468
- Returns:
469
- Collection name.
470
- """
471
- return self.__class_model.__name__
472
-
473
- def collection_full_name(self) -> str:
474
- """Get full name of collection.
475
-
476
- Returns:
477
- Full name of collection.
478
- """
479
- return f"{self.__db_root}/{self.__class_model.__name__}"
480
-
481
- async def estimated_document_count(self) -> int:
482
- """Get an estimate of the number of documents in this collection using collection metadata.
483
-
484
- Returns:
485
- The number of documents.
486
- """
487
- meta = await self._get_meta()
488
- return meta.counter_documents
489
-
490
- def count_documents(
491
- self,
492
- filter_fn: Callable,
493
- max_workers: int | None = None,
494
- timeout: float | None = None,
495
- ) -> int:
496
- """Count the number of documents a matching the filter in this collection.
497
-
498
- The search is based on the effect of a quantum loop.
499
- The search effectiveness depends on the number of processor threads.
500
- Ideally, hundreds and even thousands of threads are required.
501
-
502
- Args:
503
- filter_fn: A function that execute the conditions of filtering.
504
- max_workers: The maximum number of processes that can be used to
505
- execute the given calls. If None or not given then as many
506
- worker processes will be created as the machine has processors.
507
- timeout: The number of seconds to wait for the result if the future isn't done.
508
- If None, then there is no limit on the wait time.
509
-
510
- Returns:
511
- The number of documents.
512
- """
513
- branch_numbers: range = range(1, self.__max_branch_number)
514
- search_task_fn: Callable = self._task_find
515
- hash_reduce_left: int = self.__hash_reduce_left
516
- db_root: str = self.__db_root
517
- class_model: T = self.__class_model
518
- counter: int = 0
519
- with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
520
- for branch_number in branch_numbers:
521
- future = executor.submit(
522
- search_task_fn,
523
- branch_number,
524
- filter_fn,
525
- hash_reduce_left,
526
- db_root,
527
- class_model,
528
- )
529
- if future.result(timeout) is not None:
530
- counter += 1
531
- return counter
532
-
533
- @staticmethod
534
- def _task_delete(
535
- branch_number: int,
536
- filter_fn: Callable,
537
- hash_reduce_left: int,
538
- db_root: str,
539
- class_model: T,
540
- ) -> int:
541
- """Task for find and delete documents.
542
-
543
- This method is for internal use.
544
-
545
- Returns:
546
- The number of deleted documents.
547
- """
548
- branch_number_as_hash: str = f"{branch_number:08x}"[hash_reduce_left:]
549
- separated_hash: str = "/".join(list(branch_number_as_hash))
550
- leaf_path: SyncPath = SyncPath(
551
- *(
552
- db_root,
553
- class_model.__name__,
554
- separated_hash,
555
- "leaf.json",
556
- ),
557
- )
558
- counter: int = 0
559
- if leaf_path.exists():
560
- data_json: bytes = leaf_path.read_bytes()
561
- data: dict[str, str] = orjson.loads(data_json) or {}
562
- new_state: dict[str, str] = {}
563
- for key, val in data.items():
564
- doc = class_model.model_validate_json(val)
565
- if filter_fn(doc):
566
- counter -= 1
567
- else:
568
- new_state[key] = val
569
- leaf_path.write_bytes(orjson.dumps(new_state))
570
- return counter
571
-
572
- def delete_many(
573
- self,
574
- filter_fn: Callable,
575
- max_workers: int | None = None,
576
- timeout: float | None = None,
577
- ) -> int:
578
- """Delete one or more documents matching the filter.
579
-
580
- The search is based on the effect of a quantum loop.
581
- The search effectiveness depends on the number of processor threads.
582
- Ideally, hundreds and even thousands of threads are required.
583
-
584
- Args:
585
- filter_fn: A function that execute the conditions of filtering.
586
- max_workers: The maximum number of processes that can be used to
587
- execute the given calls. If None or not given then as many
588
- worker processes will be created as the machine has processors.
589
- timeout: The number of seconds to wait for the result if the future isn't done.
590
- If None, then there is no limit on the wait time.
591
-
592
- Returns:
593
- The number of deleted documents.
594
- """
595
- branch_numbers: range = range(1, self.__max_branch_number)
596
- search_task_fn: Callable = self._task_delete
597
- hash_reduce_left: int = self.__hash_reduce_left
598
- db_root: str = self.__db_root
599
- class_model: T = self.__class_model
600
- counter: int = 0
601
- with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
602
- for branch_number in branch_numbers:
603
- future = executor.submit(
604
- search_task_fn,
605
- branch_number,
606
- filter_fn,
607
- hash_reduce_left,
608
- db_root,
609
- class_model,
610
- )
611
- counter += future.result(timeout)
612
- if counter < 0:
613
- self._sync_counter_documents(counter)
614
- return abs(counter)
615
-
616
- @staticmethod
617
- def _task_get_docs(
618
- branch_number: int,
619
- hash_reduce_left: int,
620
- db_root: str,
621
- class_model: T,
622
- ) -> list[Any]:
623
- """Get documents for custom task.
624
-
625
- This method is for internal use.
626
-
627
- Returns:
628
- List of documents.
629
- """
630
- branch_number_as_hash: str = f"{branch_number:08x}"[hash_reduce_left:]
631
- separated_hash: str = "/".join(list(branch_number_as_hash))
632
- leaf_path: SyncPath = SyncPath(
633
- *(
634
- db_root,
635
- class_model.__name__,
636
- separated_hash,
637
- "leaf.json",
638
- ),
639
- )
640
- docs: list[str, T] = []
641
- if leaf_path.exists():
642
- data_json: bytes = leaf_path.read_bytes()
643
- data: dict[str, str] = orjson.loads(data_json) or {}
644
- for _, val in data.items():
645
- docs.append(class_model.model_validate_json(val))
646
- return docs
647
-
648
- def run_custom_task(self, custom_task_fn: Callable, limit_docs: int = 1000) -> Any:
649
- """Running custom task.
650
-
651
- This method running a task created on the basis of a quantum loop.
652
- Effectiveness running task depends on the number of processor threads.
653
- Ideally, hundreds and even thousands of threads are required.
654
-
655
- Args:
656
- custom_task_fn: A function that execute the custom task.
657
- limit_docs: Limiting the number of documents. By default = 1000.
658
-
659
- Returns:
660
- The result of a custom task.
661
- """
662
- kwargs = {
663
- "get_docs_fn": self._task_get_docs,
664
- "branch_numbers": range(1, self.__max_branch_number),
665
- "hash_reduce_left": self.__hash_reduce_left,
666
- "db_root": self.__db_root,
667
- "class_model": self.__class_model,
668
- "limit_docs": limit_docs,
669
- }
670
- return custom_task_fn(**kwargs)
671
-
672
- @staticmethod
673
- def _task_update(
674
- branch_number: int,
675
- filter_fn: Callable,
676
- hash_reduce_left: str,
677
- db_root: str,
678
- class_model: T,
679
- new_data: dict[str, Any],
680
- ) -> int:
681
- """Task for find documents.
682
-
683
- This method is for internal use.
684
-
685
- Returns:
686
- The number of updated documents.
687
- """
688
- branch_number_as_hash: str = f"{branch_number:08x}"[hash_reduce_left:]
689
- separated_hash: str = "/".join(list(branch_number_as_hash))
690
- leaf_path: SyncPath = SyncPath(
691
- *(
692
- db_root,
693
- class_model.__name__,
694
- separated_hash,
695
- "leaf.json",
696
- ),
697
- )
698
- counter: int = 0
699
- if leaf_path.exists():
700
- data_json: bytes = leaf_path.read_bytes()
701
- data: dict[str, str] = orjson.loads(data_json) or {}
702
- new_state: dict[str, str] = {}
703
- for _, val in data.items():
704
- doc = class_model.model_validate_json(val)
705
- if filter_fn(doc):
706
- for key, value in new_data.items():
707
- doc.__dict__[key] = value
708
- new_state[key] = doc.model_dump_json()
709
- counter += 1
710
- leaf_path.write_bytes(orjson.dumps(new_state))
711
- return counter
712
-
713
- def update_many(
714
- self,
715
- filter_fn: Callable,
716
- new_data: dict[str, Any],
717
- max_workers: int | None = None,
718
- timeout: float | None = None,
719
- ) -> int:
720
- """Updates one or more documents matching the filter.
721
-
722
- The search is based on the effect of a quantum loop.
723
- The search effectiveness depends on the number of processor threads.
724
- Ideally, hundreds and even thousands of threads are required.
725
-
726
- Args:
727
- filter_fn: A function that execute the conditions of filtering.
728
- new_data: New data for the fields that need to be updated.
729
- max_workers: The maximum number of processes that can be used to
730
- execute the given calls. If None or not given then as many
731
- worker processes will be created as the machine has processors.
732
- timeout: The number of seconds to wait for the result if the future isn't done.
733
- If None, then there is no limit on the wait time.
734
-
735
- Returns:
736
- The number of updated documents.
737
- """
738
- branch_numbers: range = range(1, self.__max_branch_number)
739
- update_task_fn: Callable = self._task_update
740
- hash_reduce_left: int = self.__hash_reduce_left
741
- db_root: str = self.__db_root
742
- class_model: T = self.__class_model
743
- counter: int = 0
744
- with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
745
- for branch_number in branch_numbers:
746
- future = executor.submit(
747
- update_task_fn,
748
- branch_number,
749
- filter_fn,
750
- hash_reduce_left,
751
- db_root,
752
- class_model,
753
- new_data,
754
- )
755
- counter += future.result(timeout)
756
- return counter