scruby 0.14.2__py3-none-any.whl → 0.15.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scruby might be problematic. Click here for more details.
scruby/db.py
CHANGED
|
@@ -91,6 +91,9 @@ class Scruby[T]:
|
|
|
91
91
|
"""Asynchronous method for getting metadata of collection.
|
|
92
92
|
|
|
93
93
|
This method is for internal use.
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
Metadata object.
|
|
94
97
|
"""
|
|
95
98
|
meta_path = Path(*self.__meta_path_tuple)
|
|
96
99
|
meta_json = await meta_path.read_text()
|
|
@@ -101,6 +104,9 @@ class Scruby[T]:
|
|
|
101
104
|
"""Asynchronous method for updating metadata of collection.
|
|
102
105
|
|
|
103
106
|
This method is for internal use.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
None.
|
|
104
110
|
"""
|
|
105
111
|
meta_json = meta.model_dump_json()
|
|
106
112
|
meta_path = Path(*self.__meta_path_tuple)
|
|
@@ -110,6 +116,9 @@ class Scruby[T]:
|
|
|
110
116
|
"""Asynchronous method for management of documents in metadata of collection.
|
|
111
117
|
|
|
112
118
|
This method is for internal use.
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
None.
|
|
113
122
|
"""
|
|
114
123
|
meta_path = Path(*self.__meta_path_tuple)
|
|
115
124
|
meta_json = await meta_path.read_text("utf-8")
|
|
@@ -137,6 +146,9 @@ class Scruby[T]:
|
|
|
137
146
|
|
|
138
147
|
Args:
|
|
139
148
|
key: Key name.
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
Path to cell of collection.
|
|
140
152
|
"""
|
|
141
153
|
if not isinstance(key, str):
|
|
142
154
|
logger.error("The key is not a type of `str`.")
|
|
@@ -173,6 +185,9 @@ class Scruby[T]:
|
|
|
173
185
|
Args:
|
|
174
186
|
key: Key name.
|
|
175
187
|
value: Value of key.
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
None.
|
|
176
191
|
"""
|
|
177
192
|
# The path to the database cell.
|
|
178
193
|
leaf_path: Path = await self._get_leaf_path(key)
|
|
@@ -196,6 +211,9 @@ class Scruby[T]:
|
|
|
196
211
|
|
|
197
212
|
Args:
|
|
198
213
|
key: Key name.
|
|
214
|
+
|
|
215
|
+
Returns:
|
|
216
|
+
Value of key or KeyError.
|
|
199
217
|
"""
|
|
200
218
|
# The path to the database cell.
|
|
201
219
|
leaf_path: Path = await self._get_leaf_path(key)
|
|
@@ -214,6 +232,9 @@ class Scruby[T]:
|
|
|
214
232
|
|
|
215
233
|
Args:
|
|
216
234
|
key: Key name.
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
True, if the key is present.
|
|
217
238
|
"""
|
|
218
239
|
# The path to the database cell.
|
|
219
240
|
leaf_path: Path = await self._get_leaf_path(key)
|
|
@@ -233,6 +254,9 @@ class Scruby[T]:
|
|
|
233
254
|
|
|
234
255
|
Args:
|
|
235
256
|
key: Key name.
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
None.
|
|
236
260
|
"""
|
|
237
261
|
# The path to the database cell.
|
|
238
262
|
leaf_path: Path = await self._get_leaf_path(key)
|
|
@@ -256,6 +280,9 @@ class Scruby[T]:
|
|
|
256
280
|
|
|
257
281
|
Warning:
|
|
258
282
|
- `Be careful, this will remove all keys.`
|
|
283
|
+
|
|
284
|
+
Returns:
|
|
285
|
+
None.
|
|
259
286
|
"""
|
|
260
287
|
with contextlib.suppress(FileNotFoundError):
|
|
261
288
|
await to_thread.run_sync(rmtree, constants.DB_ROOT)
|
|
@@ -268,10 +295,13 @@ class Scruby[T]:
|
|
|
268
295
|
hash_reduce_left: str,
|
|
269
296
|
db_root: str,
|
|
270
297
|
class_model: T,
|
|
271
|
-
) ->
|
|
298
|
+
) -> list[T] | None:
|
|
272
299
|
"""Task for find documents.
|
|
273
300
|
|
|
274
301
|
This method is for internal use.
|
|
302
|
+
|
|
303
|
+
Returns:
|
|
304
|
+
List of documents or None.
|
|
275
305
|
"""
|
|
276
306
|
branch_number_as_hash: str = f"{branch_number:08x}"[hash_reduce_left:]
|
|
277
307
|
separated_hash: str = "/".join(list(branch_number_as_hash))
|
|
@@ -283,14 +313,15 @@ class Scruby[T]:
|
|
|
283
313
|
"leaf.json",
|
|
284
314
|
),
|
|
285
315
|
)
|
|
316
|
+
docs: list[T] = []
|
|
286
317
|
if leaf_path.exists():
|
|
287
318
|
data_json: bytes = leaf_path.read_bytes()
|
|
288
319
|
data: dict[str, str] = orjson.loads(data_json) or {}
|
|
289
320
|
for _, val in data.items():
|
|
290
321
|
doc = class_model.model_validate_json(val)
|
|
291
322
|
if filter_fn(doc):
|
|
292
|
-
|
|
293
|
-
return None
|
|
323
|
+
docs.append(doc)
|
|
324
|
+
return docs or None
|
|
294
325
|
|
|
295
326
|
def find_one(
|
|
296
327
|
self,
|
|
@@ -311,6 +342,9 @@ class Scruby[T]:
|
|
|
311
342
|
worker processes will be created as the machine has processors.
|
|
312
343
|
timeout: The number of seconds to wait for the result if the future isn't done.
|
|
313
344
|
If None, then there is no limit on the wait time.
|
|
345
|
+
|
|
346
|
+
Returns:
|
|
347
|
+
Document or None.
|
|
314
348
|
"""
|
|
315
349
|
branch_numbers: range = range(1, self.__max_branch_number)
|
|
316
350
|
search_task_fn: Callable = self._task_find
|
|
@@ -327,9 +361,9 @@ class Scruby[T]:
|
|
|
327
361
|
db_root,
|
|
328
362
|
class_model,
|
|
329
363
|
)
|
|
330
|
-
|
|
331
|
-
if
|
|
332
|
-
return
|
|
364
|
+
docs = future.result(timeout)
|
|
365
|
+
if docs is not None:
|
|
366
|
+
return docs[0]
|
|
333
367
|
return None
|
|
334
368
|
|
|
335
369
|
def find_many(
|
|
@@ -353,6 +387,9 @@ class Scruby[T]:
|
|
|
353
387
|
worker processes will be created as the machine has processors.
|
|
354
388
|
timeout: The number of seconds to wait for the result if the future isn't done.
|
|
355
389
|
If None, then there is no limit on the wait time.
|
|
390
|
+
|
|
391
|
+
Returns:
|
|
392
|
+
List of documents or None.
|
|
356
393
|
"""
|
|
357
394
|
branch_numbers: range = range(1, self.__max_branch_number)
|
|
358
395
|
search_task_fn: Callable = self._task_find
|
|
@@ -360,11 +397,11 @@ class Scruby[T]:
|
|
|
360
397
|
db_root: str = self.__db_root
|
|
361
398
|
class_model: T = self.__class_model
|
|
362
399
|
counter: int = 0
|
|
400
|
+
result: list[T] = []
|
|
363
401
|
with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
|
|
364
|
-
results = []
|
|
365
402
|
for branch_number in branch_numbers:
|
|
366
|
-
if counter
|
|
367
|
-
|
|
403
|
+
if counter >= limit_docs:
|
|
404
|
+
return result[:limit_docs]
|
|
368
405
|
future = executor.submit(
|
|
369
406
|
search_task_fn,
|
|
370
407
|
branch_number,
|
|
@@ -373,22 +410,37 @@ class Scruby[T]:
|
|
|
373
410
|
db_root,
|
|
374
411
|
class_model,
|
|
375
412
|
)
|
|
376
|
-
|
|
377
|
-
if
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
413
|
+
docs = future.result(timeout)
|
|
414
|
+
if docs is not None:
|
|
415
|
+
for doc in docs:
|
|
416
|
+
if counter >= limit_docs:
|
|
417
|
+
return result[:limit_docs]
|
|
418
|
+
result.append(doc)
|
|
419
|
+
counter += 1
|
|
420
|
+
return result or None
|
|
381
421
|
|
|
382
422
|
def collection_name(self) -> str:
|
|
383
|
-
"""Get collection name.
|
|
423
|
+
"""Get collection name.
|
|
424
|
+
|
|
425
|
+
Returns:
|
|
426
|
+
Collection name.
|
|
427
|
+
"""
|
|
384
428
|
return self.__class_model.__name__
|
|
385
429
|
|
|
386
430
|
def collection_full_name(self) -> str:
|
|
387
|
-
"""Get full name of collection.
|
|
431
|
+
"""Get full name of collection.
|
|
432
|
+
|
|
433
|
+
Returns:
|
|
434
|
+
Full name of collection.
|
|
435
|
+
"""
|
|
388
436
|
return f"{self.__db_root}/{self.__class_model.__name__}"
|
|
389
437
|
|
|
390
438
|
async def estimated_document_count(self) -> int:
|
|
391
|
-
"""Get an estimate of the number of documents in this collection using collection metadata.
|
|
439
|
+
"""Get an estimate of the number of documents in this collection using collection metadata.
|
|
440
|
+
|
|
441
|
+
Returns:
|
|
442
|
+
The number of documents.
|
|
443
|
+
"""
|
|
392
444
|
meta = await self._get_meta()
|
|
393
445
|
return meta.counter_documents
|
|
394
446
|
|
|
@@ -411,6 +463,9 @@ class Scruby[T]:
|
|
|
411
463
|
worker processes will be created as the machine has processors.
|
|
412
464
|
timeout: The number of seconds to wait for the result if the future isn't done.
|
|
413
465
|
If None, then there is no limit on the wait time.
|
|
466
|
+
|
|
467
|
+
Returns:
|
|
468
|
+
The number of documents.
|
|
414
469
|
"""
|
|
415
470
|
branch_numbers: range = range(1, self.__max_branch_number)
|
|
416
471
|
search_task_fn: Callable = self._task_find
|
|
@@ -443,6 +498,9 @@ class Scruby[T]:
|
|
|
443
498
|
"""Task for find and delete documents.
|
|
444
499
|
|
|
445
500
|
This method is for internal use.
|
|
501
|
+
|
|
502
|
+
Returns:
|
|
503
|
+
The number of deleted documents.
|
|
446
504
|
"""
|
|
447
505
|
branch_number_as_hash: str = f"{branch_number:08x}"[hash_reduce_left:]
|
|
448
506
|
separated_hash: str = "/".join(list(branch_number_as_hash))
|
|
@@ -458,14 +516,14 @@ class Scruby[T]:
|
|
|
458
516
|
if leaf_path.exists():
|
|
459
517
|
data_json: bytes = leaf_path.read_bytes()
|
|
460
518
|
data: dict[str, str] = orjson.loads(data_json) or {}
|
|
461
|
-
|
|
519
|
+
new_state: dict[str, str] = {}
|
|
462
520
|
for key, val in data.items():
|
|
463
521
|
doc = class_model.model_validate_json(val)
|
|
464
522
|
if filter_fn(doc):
|
|
465
523
|
counter -= 1
|
|
466
524
|
else:
|
|
467
|
-
|
|
468
|
-
leaf_path.write_bytes(orjson.dumps(
|
|
525
|
+
new_state[key] = val
|
|
526
|
+
leaf_path.write_bytes(orjson.dumps(new_state))
|
|
469
527
|
return counter
|
|
470
528
|
|
|
471
529
|
def delete_many(
|
|
@@ -487,6 +545,9 @@ class Scruby[T]:
|
|
|
487
545
|
worker processes will be created as the machine has processors.
|
|
488
546
|
timeout: The number of seconds to wait for the result if the future isn't done.
|
|
489
547
|
If None, then there is no limit on the wait time.
|
|
548
|
+
|
|
549
|
+
Returns:
|
|
550
|
+
The number of deleted documents.
|
|
490
551
|
"""
|
|
491
552
|
branch_numbers: range = range(1, self.__max_branch_number)
|
|
492
553
|
search_task_fn: Callable = self._task_delete
|
|
@@ -519,6 +580,9 @@ class Scruby[T]:
|
|
|
519
580
|
"""Get documents for custom task.
|
|
520
581
|
|
|
521
582
|
This method is for internal use.
|
|
583
|
+
|
|
584
|
+
Returns:
|
|
585
|
+
List of documents.
|
|
522
586
|
"""
|
|
523
587
|
branch_number_as_hash: str = f"{branch_number:08x}"[hash_reduce_left:]
|
|
524
588
|
separated_hash: str = "/".join(list(branch_number_as_hash))
|
|
@@ -530,7 +594,7 @@ class Scruby[T]:
|
|
|
530
594
|
"leaf.json",
|
|
531
595
|
),
|
|
532
596
|
)
|
|
533
|
-
docs = []
|
|
597
|
+
docs: list[str, T] = []
|
|
534
598
|
if leaf_path.exists():
|
|
535
599
|
data_json: bytes = leaf_path.read_bytes()
|
|
536
600
|
data: dict[str, str] = orjson.loads(data_json) or {}
|
|
@@ -548,6 +612,9 @@ class Scruby[T]:
|
|
|
548
612
|
Args:
|
|
549
613
|
custom_task_fn: A function that execute the custom task.
|
|
550
614
|
limit_docs: Limiting the number of documents. By default = 1000.
|
|
615
|
+
|
|
616
|
+
Returns:
|
|
617
|
+
The result of a custom task.
|
|
551
618
|
"""
|
|
552
619
|
kwargs = {
|
|
553
620
|
"get_docs_fn": self._task_get_docs,
|
|
@@ -558,3 +625,89 @@ class Scruby[T]:
|
|
|
558
625
|
"limit_docs": limit_docs,
|
|
559
626
|
}
|
|
560
627
|
return custom_task_fn(**kwargs)
|
|
628
|
+
|
|
629
|
+
@staticmethod
|
|
630
|
+
def _task_update(
|
|
631
|
+
branch_number: int,
|
|
632
|
+
filter_fn: Callable,
|
|
633
|
+
hash_reduce_left: str,
|
|
634
|
+
db_root: str,
|
|
635
|
+
class_model: T,
|
|
636
|
+
new_data: dict[str, Any],
|
|
637
|
+
) -> int:
|
|
638
|
+
"""Task for find documents.
|
|
639
|
+
|
|
640
|
+
This method is for internal use.
|
|
641
|
+
|
|
642
|
+
Returns:
|
|
643
|
+
The number of updated documents.
|
|
644
|
+
"""
|
|
645
|
+
branch_number_as_hash: str = f"{branch_number:08x}"[hash_reduce_left:]
|
|
646
|
+
separated_hash: str = "/".join(list(branch_number_as_hash))
|
|
647
|
+
leaf_path: SyncPath = SyncPath(
|
|
648
|
+
*(
|
|
649
|
+
db_root,
|
|
650
|
+
class_model.__name__,
|
|
651
|
+
separated_hash,
|
|
652
|
+
"leaf.json",
|
|
653
|
+
),
|
|
654
|
+
)
|
|
655
|
+
counter: int = 0
|
|
656
|
+
if leaf_path.exists():
|
|
657
|
+
data_json: bytes = leaf_path.read_bytes()
|
|
658
|
+
data: dict[str, str] = orjson.loads(data_json) or {}
|
|
659
|
+
new_state: dict[str, str] = {}
|
|
660
|
+
for _, val in data.items():
|
|
661
|
+
doc = class_model.model_validate_json(val)
|
|
662
|
+
if filter_fn(doc):
|
|
663
|
+
for key, value in new_data.items():
|
|
664
|
+
doc.__dict__[key] = value
|
|
665
|
+
new_state[key] = doc.model_dump_json()
|
|
666
|
+
counter += 1
|
|
667
|
+
leaf_path.write_bytes(orjson.dumps(new_state))
|
|
668
|
+
return counter
|
|
669
|
+
|
|
670
|
+
def update_many(
|
|
671
|
+
self,
|
|
672
|
+
filter_fn: Callable,
|
|
673
|
+
new_data: dict[str, Any],
|
|
674
|
+
max_workers: int | None = None,
|
|
675
|
+
timeout: float | None = None,
|
|
676
|
+
) -> int:
|
|
677
|
+
"""Updates one or more documents matching the filter.
|
|
678
|
+
|
|
679
|
+
The search is based on the effect of a quantum loop.
|
|
680
|
+
The search effectiveness depends on the number of processor threads.
|
|
681
|
+
Ideally, hundreds and even thousands of threads are required.
|
|
682
|
+
|
|
683
|
+
Args:
|
|
684
|
+
filter_fn: A function that execute the conditions of filtering.
|
|
685
|
+
new_data: New data for the fields that need to be updated.
|
|
686
|
+
max_workers: The maximum number of processes that can be used to
|
|
687
|
+
execute the given calls. If None or not given then as many
|
|
688
|
+
worker processes will be created as the machine has processors.
|
|
689
|
+
timeout: The number of seconds to wait for the result if the future isn't done.
|
|
690
|
+
If None, then there is no limit on the wait time.
|
|
691
|
+
|
|
692
|
+
Returns:
|
|
693
|
+
The number of updated documents.
|
|
694
|
+
"""
|
|
695
|
+
branch_numbers: range = range(1, self.__max_branch_number)
|
|
696
|
+
update_task_fn: Callable = self._task_update
|
|
697
|
+
hash_reduce_left: int = self.__hash_reduce_left
|
|
698
|
+
db_root: str = self.__db_root
|
|
699
|
+
class_model: T = self.__class_model
|
|
700
|
+
counter: int = 0
|
|
701
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
|
|
702
|
+
for branch_number in branch_numbers:
|
|
703
|
+
future = executor.submit(
|
|
704
|
+
update_task_fn,
|
|
705
|
+
branch_number,
|
|
706
|
+
filter_fn,
|
|
707
|
+
hash_reduce_left,
|
|
708
|
+
db_root,
|
|
709
|
+
class_model,
|
|
710
|
+
new_data,
|
|
711
|
+
)
|
|
712
|
+
counter += future.result(timeout)
|
|
713
|
+
return counter
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
scruby/__init__.py,sha256=GOVcjXmcOEDBbJQJDJlQq-x3M-VGJaMSN278EXsl2po,884
|
|
2
2
|
scruby/aggregation.py,sha256=x_9ZJQHJHDISxRvddS5A2Hb0saIcfPTh1Veyf2KgX8A,2919
|
|
3
3
|
scruby/constants.py,sha256=3LZfcxcuRqwzoB0-iogLMjKBZRdxfWJmTbyPwVRhQgY,1007
|
|
4
|
-
scruby/db.py,sha256=
|
|
4
|
+
scruby/db.py,sha256=FNW_o2JDd_RnGpOdsEfSubMH8kcO7CSoej52y9vJwnc,25769
|
|
5
5
|
scruby/errors.py,sha256=aHQri4LNcFVQrSHwjyzb1fL8O49SwjYEU4QgMOo4uyA,622
|
|
6
6
|
scruby/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
-
scruby-0.
|
|
8
|
-
scruby-0.
|
|
9
|
-
scruby-0.
|
|
10
|
-
scruby-0.
|
|
7
|
+
scruby-0.15.0.dist-info/METADATA,sha256=0-3PkQkRh7wCbeXnYaqvmE_BPeiKx3CrWT2VpC_CBPc,10925
|
|
8
|
+
scruby-0.15.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
9
|
+
scruby-0.15.0.dist-info/licenses/LICENSE,sha256=2zZINd6m_jNYlowdQImlEizyhSui5cBAJZRhWQURcEc,1095
|
|
10
|
+
scruby-0.15.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|