persidict 0.36.11__py3-none-any.whl → 0.37.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of persidict might be problematic. Click here for more details.
- persidict/file_dir_dict.py +11 -35
- persidict/jokers.py +2 -2
- persidict/persi_dict.py +84 -30
- persidict/s3_dict.py +168 -176
- {persidict-0.36.11.dist-info → persidict-0.37.1.dist-info}/METADATA +1 -1
- {persidict-0.36.11.dist-info → persidict-0.37.1.dist-info}/RECORD +7 -7
- {persidict-0.36.11.dist-info → persidict-0.37.1.dist-info}/WHEEL +0 -0
persidict/file_dir_dict.py
CHANGED
|
@@ -27,8 +27,7 @@ from .jokers import KEEP_CURRENT, DELETE_CURRENT, Joker
|
|
|
27
27
|
from .safe_chars import replace_unsafe_chars
|
|
28
28
|
from .safe_str_tuple import SafeStrTuple
|
|
29
29
|
from .safe_str_tuple_signing import sign_safe_str_tuple, unsign_safe_str_tuple
|
|
30
|
-
from .persi_dict import PersiDict, PersiDictKey
|
|
31
|
-
|
|
30
|
+
from .persi_dict import PersiDict, PersiDictKey, non_empty_persidict_key
|
|
32
31
|
|
|
33
32
|
jsonpickle_numpy.register_handlers()
|
|
34
33
|
jsonpickle_pandas.register_handlers()
|
|
@@ -69,6 +68,7 @@ class FileDirDict(PersiDict):
|
|
|
69
68
|
or deleted.
|
|
70
69
|
digest_len (int): Length of a hash suffix appended to each key path
|
|
71
70
|
element to avoid case-insensitive collisions. Use 0 to disable.
|
|
71
|
+
If you decide to enable it (not 0), we recommend at least 4.
|
|
72
72
|
base_class_for_values (Optional[type]): Optional base class that all
|
|
73
73
|
stored values must be instances of. If provided and not ``str``,
|
|
74
74
|
then file_type must be either "pkl" or "json".
|
|
@@ -456,7 +456,7 @@ class FileDirDict(PersiDict):
|
|
|
456
456
|
Returns:
|
|
457
457
|
bool: True if a file for the key exists; False otherwise.
|
|
458
458
|
"""
|
|
459
|
-
key =
|
|
459
|
+
key = non_empty_persidict_key(key)
|
|
460
460
|
filename = self._build_full_path(key)
|
|
461
461
|
return os.path.isfile(filename)
|
|
462
462
|
|
|
@@ -478,7 +478,7 @@ class FileDirDict(PersiDict):
|
|
|
478
478
|
TypeError: If the deserialized value does not match base_class_for_values
|
|
479
479
|
when it is set.
|
|
480
480
|
"""
|
|
481
|
-
key =
|
|
481
|
+
key = non_empty_persidict_key(key)
|
|
482
482
|
filename = self._build_full_path(key)
|
|
483
483
|
if not os.path.isfile(filename):
|
|
484
484
|
raise KeyError(f"File {filename} does not exist")
|
|
@@ -509,28 +509,13 @@ class FileDirDict(PersiDict):
|
|
|
509
509
|
base_class_for_values when it is set.
|
|
510
510
|
"""
|
|
511
511
|
|
|
512
|
-
|
|
512
|
+
key = non_empty_persidict_key(key)
|
|
513
|
+
PersiDict.__setitem__(self, key, value)
|
|
514
|
+
if isinstance(value, Joker):
|
|
515
|
+
# processed by base class
|
|
513
516
|
return
|
|
514
517
|
|
|
515
|
-
if value is DELETE_CURRENT:
|
|
516
|
-
self.delete_if_exists(key)
|
|
517
|
-
return
|
|
518
|
-
|
|
519
|
-
if isinstance(value, PersiDict):
|
|
520
|
-
raise TypeError(
|
|
521
|
-
f"You are not allowed to store a PersiDict "
|
|
522
|
-
+ f"inside another PersiDict.")
|
|
523
|
-
|
|
524
|
-
if self.base_class_for_values is not None:
|
|
525
|
-
if not isinstance(value, self.base_class_for_values):
|
|
526
|
-
raise TypeError(
|
|
527
|
-
f"Value must be of type {self.base_class_for_values},"
|
|
528
|
-
+ f"but it is {type(value)} instead.")
|
|
529
|
-
|
|
530
|
-
key = SafeStrTuple(key)
|
|
531
518
|
filename = self._build_full_path(key, create_subdirs=True)
|
|
532
|
-
if self.immutable_items and os.path.exists(filename):
|
|
533
|
-
raise KeyError("Can't modify an immutable item")
|
|
534
519
|
self._save_to_file(filename, value)
|
|
535
520
|
|
|
536
521
|
|
|
@@ -543,9 +528,7 @@ class FileDirDict(PersiDict):
|
|
|
543
528
|
Raises:
|
|
544
529
|
KeyError: If immutable_items is True or if the key does not exist.
|
|
545
530
|
"""
|
|
546
|
-
key =
|
|
547
|
-
if self.immutable_items:
|
|
548
|
-
raise KeyError("Can't delete immutable items")
|
|
531
|
+
key = non_empty_persidict_key(key)
|
|
549
532
|
filename = self._build_full_path(key)
|
|
550
533
|
if not os.path.isfile(filename):
|
|
551
534
|
raise KeyError(f"File {filename} does not exist")
|
|
@@ -574,15 +557,8 @@ class FileDirDict(PersiDict):
|
|
|
574
557
|
TypeError: If result_type is not a set.
|
|
575
558
|
ValueError: If result_type is empty or contains unsupported labels.
|
|
576
559
|
"""
|
|
577
|
-
if not isinstance(result_type, set):
|
|
578
|
-
raise TypeError("result_type must be a set")
|
|
579
|
-
if not (1 <= len(result_type) <= 3):
|
|
580
|
-
raise ValueError("result_type must be a non-empty subset of {'keys','values','timestamps'}")
|
|
581
|
-
allowed = {"keys", "values", "timestamps"}
|
|
582
|
-
invalid = result_type - allowed
|
|
583
|
-
if invalid:
|
|
584
|
-
raise ValueError(f"Unsupported result_type entries: {sorted(invalid)}; allowed: {sorted(allowed)}")
|
|
585
560
|
|
|
561
|
+
PersiDict._generic_iter(self, result_type)
|
|
586
562
|
walk_results = os.walk(self._base_dir)
|
|
587
563
|
ext_len = len(self.file_type) + 1
|
|
588
564
|
|
|
@@ -649,7 +625,7 @@ class FileDirDict(PersiDict):
|
|
|
649
625
|
Raises:
|
|
650
626
|
FileNotFoundError: If the key does not exist.
|
|
651
627
|
"""
|
|
652
|
-
key =
|
|
628
|
+
key = non_empty_persidict_key(key)
|
|
653
629
|
filename = self._build_full_path(key)
|
|
654
630
|
return os.path.getmtime(filename)
|
|
655
631
|
|
persidict/jokers.py
CHANGED
|
@@ -92,8 +92,8 @@ class DeleteCurrentFlag(Joker):
|
|
|
92
92
|
register_parameterizable_class(KeepCurrentFlag)
|
|
93
93
|
register_parameterizable_class(DeleteCurrentFlag)
|
|
94
94
|
|
|
95
|
-
|
|
95
|
+
_KeepCurrent = KeepCurrentFlag()
|
|
96
96
|
KEEP_CURRENT = KeepCurrentFlag()
|
|
97
97
|
|
|
98
|
-
|
|
98
|
+
_DeleteCurrent = DeleteCurrentFlag()
|
|
99
99
|
DELETE_CURRENT = DeleteCurrentFlag()
|
persidict/persi_dict.py
CHANGED
|
@@ -4,10 +4,11 @@ PersiDict defines a unified interface for persistent dictionaries. The API is
|
|
|
4
4
|
similar to Python's built-in dict with some differences (e.g., insertion order
|
|
5
5
|
is not guaranteed) and several additional convenience methods.
|
|
6
6
|
|
|
7
|
-
Keys are sequences of URL/filename-safe strings
|
|
8
|
-
Plain strings or sequences of strings are accepted
|
|
9
|
-
SafeStrTuple. Values can be
|
|
10
|
-
restricts them
|
|
7
|
+
Keys are non-empty sequences of URL/filename-safe strings
|
|
8
|
+
represented by SafeStrTuple. Plain strings or sequences of strings are accepted
|
|
9
|
+
and automatically coerced to SafeStrTuple. Values can be
|
|
10
|
+
arbitrary Python objects unless an implementation restricts them
|
|
11
|
+
via `base_class_for_values`.
|
|
11
12
|
|
|
12
13
|
Persistence means items are stored durably (e.g., in local files or cloud
|
|
13
14
|
objects) and remain accessible across process lifetimes.
|
|
@@ -35,6 +36,24 @@ If a string (or a sequence of strings) is passed to a PersiDict as a key,
|
|
|
35
36
|
it will be automatically converted into SafeStrTuple.
|
|
36
37
|
"""
|
|
37
38
|
|
|
39
|
+
|
|
40
|
+
def non_empty_persidict_key(*args) -> SafeStrTuple:
|
|
41
|
+
"""Create a non-empty SafeStrTuple from the given arguments.
|
|
42
|
+
This is a convenience function that ensures the resulting SafeStrTuple is
|
|
43
|
+
not empty, raising a KeyError if it is.
|
|
44
|
+
Args:
|
|
45
|
+
*args: Arguments to pass to SafeStrTuple constructor.
|
|
46
|
+
Returns:
|
|
47
|
+
SafeStrTuple: A non-empty SafeStrTuple instance.
|
|
48
|
+
Raises:
|
|
49
|
+
KeyError: If the resulting SafeStrTuple is empty.
|
|
50
|
+
"""
|
|
51
|
+
result = SafeStrTuple(*args)
|
|
52
|
+
if len(result) == 0:
|
|
53
|
+
raise KeyError("Key cannot be empty")
|
|
54
|
+
return result
|
|
55
|
+
|
|
56
|
+
|
|
38
57
|
class PersiDict(MutableMapping, ParameterizableClass):
|
|
39
58
|
"""Abstract dict-like interface for durable key-value stores.
|
|
40
59
|
|
|
@@ -100,10 +119,10 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
100
119
|
This supports the Parameterizable API and is absent in the
|
|
101
120
|
built-in dict.
|
|
102
121
|
"""
|
|
103
|
-
params =
|
|
104
|
-
immutable_items=self.immutable_items
|
|
105
|
-
|
|
106
|
-
|
|
122
|
+
params = dict(
|
|
123
|
+
immutable_items=self.immutable_items,
|
|
124
|
+
digest_len=self.digest_len,
|
|
125
|
+
base_class_for_values=self.base_class_for_values
|
|
107
126
|
)
|
|
108
127
|
sorted_params = sort_dict_by_keys(params)
|
|
109
128
|
return sorted_params
|
|
@@ -168,7 +187,9 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
168
187
|
Returns:
|
|
169
188
|
bool: True if key exists, False otherwise.
|
|
170
189
|
"""
|
|
171
|
-
|
|
190
|
+
if type(self) is PersiDict:
|
|
191
|
+
raise NotImplementedError("PersiDict is an abstract base class"
|
|
192
|
+
" and cannot check items directly")
|
|
172
193
|
|
|
173
194
|
|
|
174
195
|
@abstractmethod
|
|
@@ -181,7 +202,9 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
181
202
|
Returns:
|
|
182
203
|
Any: The stored value.
|
|
183
204
|
"""
|
|
184
|
-
|
|
205
|
+
if type(self) is PersiDict:
|
|
206
|
+
raise NotImplementedError("PersiDict is an abstract base class"
|
|
207
|
+
" and cannot retrieve items directly")
|
|
185
208
|
|
|
186
209
|
|
|
187
210
|
def __setitem__(self, key:PersiDictKey, value:Any):
|
|
@@ -201,12 +224,24 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
201
224
|
"""
|
|
202
225
|
if value is KEEP_CURRENT:
|
|
203
226
|
return
|
|
204
|
-
elif value is DELETE_CURRENT:
|
|
205
|
-
self.delete_if_exists(key)
|
|
206
227
|
elif self.immutable_items:
|
|
207
228
|
if key in self:
|
|
208
229
|
raise KeyError("Can't modify an immutable key-value pair")
|
|
209
|
-
|
|
230
|
+
|
|
231
|
+
key = non_empty_persidict_key(key)
|
|
232
|
+
|
|
233
|
+
if value is DELETE_CURRENT:
|
|
234
|
+
self.delete_if_exists(key)
|
|
235
|
+
return
|
|
236
|
+
|
|
237
|
+
if self.base_class_for_values is not None:
|
|
238
|
+
if not isinstance(value, self.base_class_for_values):
|
|
239
|
+
raise TypeError(f"Value must be an instance of"
|
|
240
|
+
f" {self.base_class_for_values.__name__}")
|
|
241
|
+
|
|
242
|
+
if type(self) is PersiDict:
|
|
243
|
+
raise NotImplementedError("PersiDict is an abstract base class"
|
|
244
|
+
" and cannot store items directly")
|
|
210
245
|
|
|
211
246
|
|
|
212
247
|
def __delitem__(self, key:PersiDictKey):
|
|
@@ -221,7 +256,14 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
221
256
|
"""
|
|
222
257
|
if self.immutable_items:
|
|
223
258
|
raise KeyError("Can't delete an immutable key-value pair")
|
|
224
|
-
|
|
259
|
+
if type(self) is PersiDict:
|
|
260
|
+
raise NotImplementedError("PersiDict is an abstract base class"
|
|
261
|
+
" and cannot delete items directly")
|
|
262
|
+
|
|
263
|
+
key = non_empty_persidict_key(key)
|
|
264
|
+
|
|
265
|
+
if key not in self:
|
|
266
|
+
raise KeyError(f"Key {key} not found")
|
|
225
267
|
|
|
226
268
|
|
|
227
269
|
@abstractmethod
|
|
@@ -231,7 +273,9 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
231
273
|
Returns:
|
|
232
274
|
int: Number of key-value pairs.
|
|
233
275
|
"""
|
|
234
|
-
|
|
276
|
+
if type(self) is PersiDict:
|
|
277
|
+
raise NotImplementedError("PersiDict is an abstract base class"
|
|
278
|
+
" and cannot count items directly")
|
|
235
279
|
|
|
236
280
|
|
|
237
281
|
@abstractmethod
|
|
@@ -260,7 +304,9 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
260
304
|
raise ValueError("result_type can only contain 'keys', 'values', 'timestamps'")
|
|
261
305
|
if not (1 <= len(result_type & allowed) <= 3):
|
|
262
306
|
raise ValueError("result_type must include at least one of 'keys', 'values', 'timestamps'")
|
|
263
|
-
|
|
307
|
+
if type(self) is PersiDict:
|
|
308
|
+
raise NotImplementedError("PersiDict is an abstract base class"
|
|
309
|
+
" and cannot iterate items directly")
|
|
264
310
|
|
|
265
311
|
|
|
266
312
|
def __iter__(self):
|
|
@@ -278,7 +324,7 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
278
324
|
Returns:
|
|
279
325
|
Iterator[SafeStrTuple]: Keys iterator.
|
|
280
326
|
"""
|
|
281
|
-
return
|
|
327
|
+
return self._generic_iter({"keys"})
|
|
282
328
|
|
|
283
329
|
|
|
284
330
|
def keys_and_timestamps(self):
|
|
@@ -385,7 +431,9 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
385
431
|
Raises:
|
|
386
432
|
TypeError: Always raised; PersiDict instances are not pickleable.
|
|
387
433
|
"""
|
|
388
|
-
|
|
434
|
+
if type(self) is PersiDict:
|
|
435
|
+
raise NotImplementedError("PersiDict is an abstract base class"
|
|
436
|
+
" and cannot be pickled directly")
|
|
389
437
|
|
|
390
438
|
|
|
391
439
|
def __setstate__(self, state):
|
|
@@ -394,7 +442,9 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
394
442
|
Raises:
|
|
395
443
|
TypeError: Always raised; PersiDict instances are not pickleable.
|
|
396
444
|
"""
|
|
397
|
-
|
|
445
|
+
if type(self) is PersiDict:
|
|
446
|
+
raise TypeError("PersiDict is an abstract base class"
|
|
447
|
+
" and cannot be unpickled directly")
|
|
398
448
|
|
|
399
449
|
|
|
400
450
|
def clear(self) -> None:
|
|
@@ -409,7 +459,7 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
409
459
|
for k in self.keys():
|
|
410
460
|
try:
|
|
411
461
|
del self[k]
|
|
412
|
-
except:
|
|
462
|
+
except KeyError:
|
|
413
463
|
pass
|
|
414
464
|
|
|
415
465
|
|
|
@@ -431,7 +481,7 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
431
481
|
if self.immutable_items:
|
|
432
482
|
raise KeyError("Can't delete an immutable key-value pair")
|
|
433
483
|
|
|
434
|
-
key =
|
|
484
|
+
key = non_empty_persidict_key(key)
|
|
435
485
|
|
|
436
486
|
if key in self:
|
|
437
487
|
try:
|
|
@@ -464,7 +514,9 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
464
514
|
NotImplementedError: Must be implemented by subclasses that support
|
|
465
515
|
hierarchical key spaces.
|
|
466
516
|
"""
|
|
467
|
-
|
|
517
|
+
if type(self) is PersiDict:
|
|
518
|
+
raise NotImplementedError("PersiDict is an abstract base class"
|
|
519
|
+
" and cannot create sub-dictionaries directly")
|
|
468
520
|
|
|
469
521
|
|
|
470
522
|
def subdicts(self) -> dict[str, PersiDict]:
|
|
@@ -528,7 +580,9 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
528
580
|
Raises:
|
|
529
581
|
NotImplementedError: Must be implemented by subclasses.
|
|
530
582
|
"""
|
|
531
|
-
|
|
583
|
+
if type(self) is PersiDict:
|
|
584
|
+
raise NotImplementedError("PersiDict is an abstract base class"
|
|
585
|
+
" and cannot provide timestamps directly")
|
|
532
586
|
|
|
533
587
|
|
|
534
588
|
def oldest_keys(self, max_n=None):
|
|
@@ -553,9 +607,9 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
553
607
|
return []
|
|
554
608
|
else:
|
|
555
609
|
# Use heapq.nsmallest for efficient partial sorting without loading all keys into memory
|
|
556
|
-
smallest_pairs = heapq.nsmallest(max_n
|
|
557
|
-
|
|
558
|
-
|
|
610
|
+
smallest_pairs = heapq.nsmallest(max_n,
|
|
611
|
+
self.keys_and_timestamps(),
|
|
612
|
+
key=lambda x: x[1])
|
|
559
613
|
return [key for key,_ in smallest_pairs]
|
|
560
614
|
|
|
561
615
|
|
|
@@ -591,15 +645,15 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
591
645
|
if max_n is None:
|
|
592
646
|
# If we need all keys, sort them all by timestamp in reverse order
|
|
593
647
|
key_timestamp_pairs = list(self.keys_and_timestamps())
|
|
594
|
-
key_timestamp_pairs.sort(key=lambda x:x[1], reverse=True)
|
|
648
|
+
key_timestamp_pairs.sort(key=lambda x: x[1], reverse=True)
|
|
595
649
|
return [key for key,_ in key_timestamp_pairs]
|
|
596
650
|
elif max_n <= 0:
|
|
597
651
|
return []
|
|
598
652
|
else:
|
|
599
653
|
# Use heapq.nlargest for efficient partial sorting without loading all keys into memory
|
|
600
|
-
largest_pairs = heapq.nlargest(max_n
|
|
601
|
-
|
|
602
|
-
|
|
654
|
+
largest_pairs = heapq.nlargest(max_n,
|
|
655
|
+
self.keys_and_timestamps(),
|
|
656
|
+
key=lambda item: item[1])
|
|
603
657
|
return [key for key,_ in largest_pairs]
|
|
604
658
|
|
|
605
659
|
|
persidict/s3_dict.py
CHANGED
|
@@ -15,26 +15,30 @@ from parameterizable.dict_sorter import sort_dict_by_keys
|
|
|
15
15
|
from .safe_str_tuple import SafeStrTuple
|
|
16
16
|
from .safe_str_tuple_signing import sign_safe_str_tuple, unsign_safe_str_tuple
|
|
17
17
|
from .persi_dict import PersiDict
|
|
18
|
-
from .jokers import KEEP_CURRENT, DELETE_CURRENT
|
|
19
|
-
from .file_dir_dict import FileDirDict, PersiDictKey
|
|
18
|
+
from .jokers import KEEP_CURRENT, DELETE_CURRENT, Joker
|
|
19
|
+
from .file_dir_dict import FileDirDict, PersiDictKey, non_empty_persidict_key
|
|
20
20
|
from .overlapping_multi_dict import OverlappingMultiDict
|
|
21
21
|
|
|
22
22
|
S3DICT_DEFAULT_BASE_DIR = "__s3_dict__"
|
|
23
23
|
|
|
24
24
|
class S3Dict(PersiDict):
|
|
25
|
-
"""
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
A key
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
S3Dict
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
25
|
+
"""A persistent dictionary that stores key-value pairs as S3 objects.
|
|
26
|
+
|
|
27
|
+
Each key-value pair is stored as a separate S3 object in the specified bucket.
|
|
28
|
+
|
|
29
|
+
A key can be either a string (object name without file extension) or a sequence
|
|
30
|
+
of strings representing a hierarchical path (folder structure ending with an
|
|
31
|
+
object name). Values can be instances of any Python type and are serialized
|
|
32
|
+
to S3 objects.
|
|
33
|
+
|
|
34
|
+
S3Dict supports multiple serialization formats:
|
|
35
|
+
- Binary storage using pickle ('pkl' format)
|
|
36
|
+
- Human-readable text using jsonpickle ('json' format)
|
|
37
|
+
- Plain text for string values (other formats)
|
|
38
|
+
|
|
39
|
+
Note:
|
|
40
|
+
Unlike native Python dictionaries, insertion order is not preserved.
|
|
41
|
+
Operations may incur S3 API costs and network latency.
|
|
38
42
|
"""
|
|
39
43
|
region: str
|
|
40
44
|
bucket_name: str
|
|
@@ -42,40 +46,47 @@ class S3Dict(PersiDict):
|
|
|
42
46
|
file_type: str
|
|
43
47
|
_base_dir: str
|
|
44
48
|
|
|
45
|
-
def __init__(self, bucket_name:str = "my_bucket"
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
49
|
+
def __init__(self, bucket_name: str = "my_bucket",
|
|
50
|
+
region: str = None,
|
|
51
|
+
root_prefix: str = "",
|
|
52
|
+
base_dir: str = S3DICT_DEFAULT_BASE_DIR,
|
|
53
|
+
file_type: str = "pkl",
|
|
54
|
+
immutable_items: bool = False,
|
|
55
|
+
digest_len: int = 8,
|
|
56
|
+
base_class_for_values: Optional[type] = None,
|
|
57
|
+
*args, **kwargs):
|
|
54
58
|
"""Initialize an S3-backed persistent dictionary.
|
|
55
59
|
|
|
56
60
|
Args:
|
|
57
|
-
bucket_name
|
|
58
|
-
created if it does not
|
|
59
|
-
region
|
|
60
|
-
client region
|
|
61
|
-
root_prefix
|
|
62
|
-
stored. A trailing slash is added if missing.
|
|
63
|
-
base_dir
|
|
64
|
-
|
|
65
|
-
file_type
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
digest_len
|
|
70
|
-
elements to
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
must
|
|
74
|
-
|
|
75
|
-
|
|
61
|
+
bucket_name: Name of the S3 bucket to use. The bucket will be
|
|
62
|
+
created automatically if it does not exist and permissions allow.
|
|
63
|
+
region: AWS region for the bucket. If None, uses the default
|
|
64
|
+
client region from AWS configuration.
|
|
65
|
+
root_prefix: Common S3 key prefix under which all objects are
|
|
66
|
+
stored. A trailing slash is automatically added if missing.
|
|
67
|
+
base_dir: Local directory path used for temporary files and
|
|
68
|
+
local caching of S3 objects.
|
|
69
|
+
file_type: File extension/format for stored values. Supported formats:
|
|
70
|
+
'pkl' (pickle), 'json' (jsonpickle), or custom text formats.
|
|
71
|
+
immutable_items: If True, prevents modification of existing items
|
|
72
|
+
after they are initially stored.
|
|
73
|
+
digest_len: Number of base32 MD5 hash characters appended to key
|
|
74
|
+
elements to prevent case-insensitive filename collisions.
|
|
75
|
+
Set to 0 to disable collision prevention.
|
|
76
|
+
base_class_for_values: Optional base class that all stored values
|
|
77
|
+
must inherit from. When specified (and not str), file_type
|
|
78
|
+
must be 'pkl' or 'json' for proper serialization.
|
|
79
|
+
*args: Additional positional arguments (ignored, reserved for compatibility).
|
|
80
|
+
**kwargs: Additional keyword arguments (ignored, reserved for compatibility).
|
|
81
|
+
|
|
82
|
+
Note:
|
|
83
|
+
The S3 bucket will be created if it doesn't exist and AWS permissions
|
|
84
|
+
allow. Network connectivity and valid AWS credentials are required.
|
|
76
85
|
"""
|
|
77
86
|
|
|
78
|
-
super().__init__(immutable_items = immutable_items
|
|
87
|
+
super().__init__(immutable_items = immutable_items
|
|
88
|
+
, digest_len = digest_len
|
|
89
|
+
, base_class_for_values=base_class_for_values)
|
|
79
90
|
self.file_type = file_type
|
|
80
91
|
self.etag_file_type = f"{file_type}_etag"
|
|
81
92
|
|
|
@@ -107,24 +118,25 @@ class S3Dict(PersiDict):
|
|
|
107
118
|
except ClientError as e:
|
|
108
119
|
error_code = e.response['Error']['Code']
|
|
109
120
|
if error_code == '404' or error_code == 'NotFound':
|
|
110
|
-
#
|
|
121
|
+
# Bucket does not exist, attempt to create it
|
|
111
122
|
try:
|
|
112
123
|
self.s3_client.create_bucket(Bucket=bucket_name)
|
|
113
124
|
except ClientError as create_e:
|
|
114
125
|
create_error_code = create_e.response['Error']['Code']
|
|
115
|
-
#
|
|
126
|
+
# Handle race condition where bucket was created by another process
|
|
127
|
+
# or the bucket name is already taken by another AWS account
|
|
116
128
|
if ( create_error_code == 'BucketAlreadyOwnedByYou'
|
|
117
129
|
or create_error_code == 'BucketAlreadyExists'):
|
|
118
130
|
pass
|
|
119
131
|
else:
|
|
120
|
-
raise create_e # Re-raise other unexpected creation errors
|
|
132
|
+
raise create_e # Re-raise other unexpected creation errors
|
|
121
133
|
elif error_code == '403' or error_code == 'Forbidden':
|
|
122
|
-
#
|
|
123
|
-
#
|
|
124
|
-
#
|
|
134
|
+
# Bucket exists but access is forbidden - likely a cross-account
|
|
135
|
+
# bucket with policy granting limited access. Operations may still
|
|
136
|
+
# work if the policy allows the required S3 permissions.
|
|
125
137
|
pass
|
|
126
138
|
else:
|
|
127
|
-
raise e # Re-raise other unexpected
|
|
139
|
+
raise e # Re-raise other unexpected head_bucket errors
|
|
128
140
|
|
|
129
141
|
self.bucket_name = bucket_name
|
|
130
142
|
|
|
@@ -134,15 +146,15 @@ class S3Dict(PersiDict):
|
|
|
134
146
|
|
|
135
147
|
|
|
136
148
|
def get_params(self):
|
|
137
|
-
"""Return configuration parameters
|
|
149
|
+
"""Return configuration parameters as a dictionary.
|
|
138
150
|
|
|
139
|
-
This method
|
|
140
|
-
|
|
151
|
+
This method supports the Parameterizable API and is not part of
|
|
152
|
+
the standard Python dictionary interface.
|
|
141
153
|
|
|
142
154
|
Returns:
|
|
143
155
|
dict: A mapping of parameter names to their configured values,
|
|
144
|
-
including region, bucket_name,
|
|
145
|
-
parameters from the local cache.
|
|
156
|
+
including S3-specific parameters (region, bucket_name, root_prefix)
|
|
157
|
+
combined with parameters from the local cache, sorted by key names.
|
|
146
158
|
"""
|
|
147
159
|
params = self.main_cache.get_params()
|
|
148
160
|
params["region"] = self.region
|
|
@@ -156,51 +168,57 @@ class S3Dict(PersiDict):
|
|
|
156
168
|
def base_url(self):
|
|
157
169
|
"""Return the S3 URL prefix of this dictionary.
|
|
158
170
|
|
|
159
|
-
This property is
|
|
171
|
+
This property is not part of the standard Python dictionary interface.
|
|
160
172
|
|
|
161
173
|
Returns:
|
|
162
|
-
str: The base S3 URL in the
|
|
174
|
+
str: The base S3 URL in the format "s3://<bucket>/<root_prefix>".
|
|
163
175
|
"""
|
|
164
176
|
return f"s3://{self.bucket_name}/{self.root_prefix}"
|
|
165
177
|
|
|
166
178
|
|
|
167
179
|
@property
|
|
168
180
|
def base_dir(self) -> str:
|
|
169
|
-
"""Return dictionary's base directory in the local filesystem.
|
|
181
|
+
"""Return the dictionary's base directory in the local filesystem.
|
|
170
182
|
|
|
171
|
-
This property is
|
|
183
|
+
This property is not part of the standard Python dictionary interface.
|
|
172
184
|
|
|
173
185
|
Returns:
|
|
174
|
-
str: Path to the local
|
|
186
|
+
str: Path to the local cache directory used for temporary files
|
|
187
|
+
and caching S3 objects.
|
|
175
188
|
"""
|
|
176
189
|
return self.main_cache.base_dir
|
|
177
190
|
|
|
178
191
|
|
|
179
|
-
def _build_full_objectname(self, key:PersiDictKey) -> str:
|
|
180
|
-
"""Convert a key into a full S3 object key
|
|
192
|
+
def _build_full_objectname(self, key: PersiDictKey) -> str:
|
|
193
|
+
"""Convert a key into a full S3 object key.
|
|
181
194
|
|
|
182
195
|
Args:
|
|
183
|
-
key
|
|
196
|
+
key: Dictionary key (string or sequence of strings) or SafeStrTuple.
|
|
184
197
|
|
|
185
198
|
Returns:
|
|
186
|
-
str: The
|
|
199
|
+
str: The complete S3 object key including root_prefix and file_type
|
|
200
|
+
extension, with digest-based collision prevention applied if enabled.
|
|
187
201
|
"""
|
|
188
|
-
key =
|
|
202
|
+
key = non_empty_persidict_key(key)
|
|
189
203
|
key = sign_safe_str_tuple(key, self.digest_len)
|
|
190
204
|
objectname = self.root_prefix + "/".join(key)+ "." + self.file_type
|
|
191
205
|
return objectname
|
|
192
206
|
|
|
193
207
|
|
|
194
|
-
def __contains__(self, key:PersiDictKey) -> bool:
|
|
195
|
-
"""
|
|
208
|
+
def __contains__(self, key: PersiDictKey) -> bool:
|
|
209
|
+
"""Check if the specified key exists in the dictionary.
|
|
210
|
+
|
|
211
|
+
For immutable dictionaries, checks the local cache first. Otherwise,
|
|
212
|
+
performs a HEAD request to S3 to verify object existence.
|
|
196
213
|
|
|
197
214
|
Args:
|
|
198
|
-
key
|
|
215
|
+
key: Dictionary key (string or sequence of strings) or SafeStrTuple.
|
|
199
216
|
|
|
200
217
|
Returns:
|
|
201
|
-
bool: True if the
|
|
218
|
+
bool: True if the key exists in S3 (or local cache for immutable
|
|
219
|
+
items), False otherwise.
|
|
202
220
|
"""
|
|
203
|
-
key =
|
|
221
|
+
key = non_empty_persidict_key(key)
|
|
204
222
|
if self.immutable_items and key in self.main_cache:
|
|
205
223
|
return True
|
|
206
224
|
try:
|
|
@@ -216,21 +234,24 @@ class S3Dict(PersiDict):
|
|
|
216
234
|
raise
|
|
217
235
|
|
|
218
236
|
|
|
219
|
-
def __getitem__(self, key:PersiDictKey) -> Any:
|
|
220
|
-
"""Retrieve the value stored for a key
|
|
237
|
+
def __getitem__(self, key: PersiDictKey) -> Any:
|
|
238
|
+
"""Retrieve the value stored for a key.
|
|
221
239
|
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
240
|
+
For immutable dictionaries with cached values, returns the cached copy.
|
|
241
|
+
Otherwise, fetches from S3 using conditional requests (ETags) when
|
|
242
|
+
available to minimize unnecessary downloads.
|
|
225
243
|
|
|
226
244
|
Args:
|
|
227
|
-
key
|
|
245
|
+
key: Dictionary key (string or sequence of strings) or SafeStrTuple.
|
|
228
246
|
|
|
229
247
|
Returns:
|
|
230
|
-
Any: The stored
|
|
248
|
+
Any: The deserialized value stored for the key.
|
|
249
|
+
|
|
250
|
+
Raises:
|
|
251
|
+
KeyError: If the key does not exist in S3.
|
|
231
252
|
"""
|
|
232
253
|
|
|
233
|
-
key =
|
|
254
|
+
key = non_empty_persidict_key(key)
|
|
234
255
|
|
|
235
256
|
if self.immutable_items and key in self.main_cache:
|
|
236
257
|
return self.main_cache[key]
|
|
@@ -252,8 +273,7 @@ class S3Dict(PersiDict):
|
|
|
252
273
|
s3_etag = response.get("ETag")
|
|
253
274
|
body = response['Body']
|
|
254
275
|
|
|
255
|
-
#
|
|
256
|
-
|
|
276
|
+
# Deserialize and cache the S3 object content
|
|
257
277
|
if self.file_type == 'json':
|
|
258
278
|
deserialized_value = jsonpickle.loads(body.read().decode('utf-8'))
|
|
259
279
|
elif self.file_type == 'pkl':
|
|
@@ -266,110 +286,86 @@ class S3Dict(PersiDict):
|
|
|
266
286
|
|
|
267
287
|
except ClientError as e:
|
|
268
288
|
if e.response['ResponseMetadata']['HTTPStatusCode'] == 304:
|
|
269
|
-
# 304 Not Modified:
|
|
270
|
-
# The value will be read from cache at the end of the function.
|
|
289
|
+
# HTTP 304 Not Modified: cached version is current, no download needed
|
|
271
290
|
pass
|
|
272
291
|
elif e.response.get("Error", {}).get("Code") == 'NoSuchKey':
|
|
273
292
|
raise KeyError(f"Key {key} not found in S3 bucket {self.bucket_name}")
|
|
274
293
|
else:
|
|
275
|
-
# Re-raise other client errors (
|
|
294
|
+
# Re-raise other client errors (permissions, throttling, etc.)
|
|
276
295
|
raise
|
|
277
296
|
|
|
278
297
|
return self.main_cache[key]
|
|
279
298
|
|
|
280
299
|
|
|
281
|
-
def __setitem__(self, key:PersiDictKey, value:Any):
|
|
282
|
-
"""Store a value for a key in S3 and
|
|
300
|
+
def __setitem__(self, key: PersiDictKey, value: Any):
|
|
301
|
+
"""Store a value for a key in both S3 and local cache.
|
|
283
302
|
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
S3 ETag
|
|
303
|
+
Handles special joker values (KEEP_CURRENT, DELETE_CURRENT) for
|
|
304
|
+
conditional operations. Validates value types against base_class_for_values
|
|
305
|
+
if specified, then stores locally and uploads to S3. Attempts to cache
|
|
306
|
+
the S3 ETag for efficient future retrievals.
|
|
288
307
|
|
|
289
308
|
Args:
|
|
290
|
-
key
|
|
291
|
-
value
|
|
309
|
+
key: Dictionary key (string or sequence of strings) or SafeStrTuple.
|
|
310
|
+
value: Value to store, or a joker command (KEEP_CURRENT or
|
|
292
311
|
DELETE_CURRENT from the jokers module).
|
|
293
312
|
|
|
294
313
|
Raises:
|
|
295
314
|
KeyError: If attempting to modify an existing item when
|
|
296
315
|
immutable_items is True.
|
|
297
|
-
TypeError: If value is a PersiDict or does not match
|
|
298
|
-
base_class_for_values when
|
|
316
|
+
TypeError: If value is a PersiDict instance or does not match
|
|
317
|
+
the required base_class_for_values when specified.
|
|
299
318
|
"""
|
|
300
319
|
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
self.delete_if_exists(key)
|
|
320
|
+
key = non_empty_persidict_key(key)
|
|
321
|
+
PersiDict.__setitem__(self, key, value)
|
|
322
|
+
if isinstance(value, Joker):
|
|
323
|
+
# Joker values (KEEP_CURRENT, DELETE_CURRENT) are handled by base class
|
|
306
324
|
return
|
|
307
325
|
|
|
308
|
-
if isinstance(value, PersiDict):
|
|
309
|
-
raise TypeError(
|
|
310
|
-
f"You are not allowed to store a PersiDict "
|
|
311
|
-
+ f"inside another PersiDict.")
|
|
312
|
-
|
|
313
|
-
if self.base_class_for_values is not None:
|
|
314
|
-
if not isinstance(value, self.base_class_for_values):
|
|
315
|
-
raise TypeError(
|
|
316
|
-
f"Value must be of type {self.base_class_for_values},"
|
|
317
|
-
+ f"but it is {type(value)} instead." )
|
|
318
|
-
|
|
319
|
-
key = SafeStrTuple(key)
|
|
320
|
-
|
|
321
|
-
if self.immutable_items and key in self:
|
|
322
|
-
raise KeyError("Can't modify an immutable item")
|
|
323
|
-
|
|
324
326
|
obj_name = self._build_full_objectname(key)
|
|
325
327
|
|
|
326
328
|
# Store in local cache first
|
|
327
329
|
self.main_cache[key] = value
|
|
328
330
|
|
|
329
|
-
#
|
|
331
|
+
# Upload the serialized file from local cache to S3
|
|
330
332
|
file_path = self.main_cache._build_full_path(key)
|
|
331
333
|
self.s3_client.upload_file(file_path, self.bucket_name, obj_name)
|
|
332
334
|
|
|
333
335
|
try:
|
|
336
|
+
# Cache the S3 ETag for efficient conditional requests on future reads
|
|
334
337
|
head = self.s3_client.head_object(
|
|
335
338
|
Bucket=self.bucket_name, Key=obj_name)
|
|
336
339
|
self.etag_cache[key] = head.get("ETag")
|
|
337
340
|
except ClientError:
|
|
338
|
-
#
|
|
339
|
-
# to force a re-download on the next __getitem__ call.
|
|
341
|
+
# Remove stale ETag on failure to force fresh downloads later
|
|
340
342
|
self.etag_cache.delete_if_exists(key)
|
|
341
343
|
|
|
342
344
|
|
|
343
|
-
def __delitem__(self, key:PersiDictKey):
|
|
344
|
-
"""Delete the stored value for a key from S3 and local cache.
|
|
345
|
+
def __delitem__(self, key: PersiDictKey):
|
|
346
|
+
"""Delete the stored value for a key from both S3 and local cache.
|
|
345
347
|
|
|
346
348
|
Args:
|
|
347
|
-
key
|
|
349
|
+
key: Dictionary key (string or sequence of strings) or SafeStrTuple.
|
|
348
350
|
|
|
349
351
|
Raises:
|
|
350
|
-
KeyError: If immutable_items is True, or if the key does not exist
|
|
352
|
+
KeyError: If immutable_items is True, or if the key does not exist.
|
|
351
353
|
"""
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
if self.immutable_items:
|
|
355
|
-
raise KeyError("Can't delete an immutable item")
|
|
356
|
-
|
|
357
|
-
if key not in self:
|
|
358
|
-
raise KeyError(f"Key {key} not found in S3 bucket {self.bucket_name}")
|
|
359
|
-
|
|
354
|
+
key = non_empty_persidict_key(key)
|
|
355
|
+
PersiDict.__delitem__(self, key)
|
|
360
356
|
obj_name = self._build_full_objectname(key)
|
|
361
|
-
|
|
362
357
|
self.s3_client.delete_object(Bucket = self.bucket_name, Key = obj_name)
|
|
363
358
|
self.etag_cache.delete_if_exists(key)
|
|
364
359
|
self.main_cache.delete_if_exists(key)
|
|
365
360
|
|
|
366
361
|
|
|
367
362
|
def __len__(self) -> int:
|
|
368
|
-
"""Return
|
|
363
|
+
"""Return the number of key-value pairs in the dictionary.
|
|
369
364
|
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
365
|
+
Warning:
|
|
366
|
+
This operation can be very slow and expensive on large S3 buckets
|
|
367
|
+
as it must paginate through all objects under the dictionary's prefix.
|
|
368
|
+
Avoid using in performance-critical code.
|
|
373
369
|
|
|
374
370
|
Returns:
|
|
375
371
|
int: Number of stored items under this dictionary's root_prefix.
|
|
@@ -395,56 +391,47 @@ class S3Dict(PersiDict):
|
|
|
395
391
|
|
|
396
392
|
|
|
397
393
|
def _generic_iter(self, result_type: set[str]):
|
|
398
|
-
"""Underlying implementation for
|
|
394
|
+
"""Underlying implementation for items(), keys(), and values() iterators.
|
|
399
395
|
|
|
400
|
-
|
|
396
|
+
Paginates through S3 objects under the configured root_prefix and yields
|
|
401
397
|
keys, values, and/or timestamps according to the requested result_type.
|
|
402
|
-
|
|
403
|
-
|
|
398
|
+
S3 object keys are converted to SafeStrTuple instances by removing the
|
|
399
|
+
file extension and reversing digest-based signing if enabled.
|
|
404
400
|
|
|
405
401
|
Args:
|
|
406
|
-
result_type
|
|
407
|
-
|
|
402
|
+
result_type: Non-empty subset of {"keys", "values", "timestamps"}
|
|
403
|
+
specifying which fields to yield from each dictionary entry.
|
|
408
404
|
|
|
409
405
|
Returns:
|
|
410
|
-
Iterator: A generator
|
|
406
|
+
Iterator: A generator that yields:
|
|
411
407
|
- SafeStrTuple if result_type == {"keys"}
|
|
412
|
-
- Any if result_type == {"values"}
|
|
408
|
+
- Any if result_type == {"values"}
|
|
413
409
|
- tuple[SafeStrTuple, Any] if result_type == {"keys", "values"}
|
|
414
|
-
- tuple
|
|
410
|
+
- tuple including float timestamp if "timestamps" requested
|
|
415
411
|
|
|
416
412
|
Raises:
|
|
417
|
-
ValueError: If result_type is not a set or contains
|
|
418
|
-
|
|
413
|
+
ValueError: If result_type is invalid (empty, not a set, or contains
|
|
414
|
+
unsupported field names).
|
|
419
415
|
"""
|
|
420
416
|
|
|
421
|
-
|
|
422
|
-
raise ValueError(
|
|
423
|
-
"result_type must be a set containing one to three of: 'keys', 'values', 'timestamps'"
|
|
424
|
-
)
|
|
425
|
-
if not (1 <= len(result_type) <= 3):
|
|
426
|
-
raise ValueError("result_type must be a non-empty set with at most three elements")
|
|
427
|
-
allowed = {"keys", "values", "timestamps"}
|
|
428
|
-
if not result_type.issubset(allowed):
|
|
429
|
-
invalid = ", ".join(sorted(result_type - allowed))
|
|
430
|
-
raise ValueError(f"result_type contains invalid entries: {invalid}. Allowed: {sorted(allowed)}")
|
|
431
|
-
# Intersections/length checks are implied by the above conditions.
|
|
417
|
+
PersiDict._generic_iter(self, result_type)
|
|
432
418
|
|
|
433
419
|
suffix = "." + self.file_type
|
|
434
420
|
ext_len = len(self.file_type) + 1
|
|
435
421
|
prefix_len = len(self.root_prefix)
|
|
436
422
|
|
|
437
423
|
def splitter(full_name: str) -> SafeStrTuple:
|
|
438
|
-
"""Convert an S3 object key into a SafeStrTuple without the
|
|
424
|
+
"""Convert an S3 object key into a SafeStrTuple without the file extension.
|
|
439
425
|
|
|
440
426
|
Args:
|
|
441
|
-
full_name
|
|
427
|
+
full_name: Complete S3 object key including root_prefix and extension.
|
|
442
428
|
|
|
443
429
|
Returns:
|
|
444
|
-
SafeStrTuple: The parsed key
|
|
430
|
+
SafeStrTuple: The parsed key components with digest signatures intact.
|
|
445
431
|
|
|
446
432
|
Raises:
|
|
447
|
-
ValueError: If the
|
|
433
|
+
ValueError: If the object key does not start with this dictionary's
|
|
434
|
+
root_prefix (indicating it's outside the dictionary's scope).
|
|
448
435
|
"""
|
|
449
436
|
if not full_name.startswith(self.root_prefix):
|
|
450
437
|
raise ValueError(
|
|
@@ -454,7 +441,11 @@ class S3Dict(PersiDict):
|
|
|
454
441
|
return SafeStrTuple(result)
|
|
455
442
|
|
|
456
443
|
def step():
|
|
457
|
-
"""Generator that
|
|
444
|
+
"""Generator that paginates through S3 objects and yields requested data.
|
|
445
|
+
|
|
446
|
+
Yields dictionary entries (keys, values, timestamps) according to the
|
|
447
|
+
result_type specification from the parent _generic_iter method.
|
|
448
|
+
"""
|
|
458
449
|
paginator = self.s3_client.get_paginator("list_objects_v2")
|
|
459
450
|
page_iterator = paginator.paginate(
|
|
460
451
|
Bucket=self.bucket_name, Prefix = self.root_prefix)
|
|
@@ -491,19 +482,20 @@ class S3Dict(PersiDict):
|
|
|
491
482
|
return step()
|
|
492
483
|
|
|
493
484
|
|
|
494
|
-
def get_subdict(self, key:PersiDictKey) -> S3Dict:
|
|
495
|
-
"""
|
|
485
|
+
def get_subdict(self, key: PersiDictKey) -> S3Dict:
|
|
486
|
+
"""Create a subdictionary scoped to items with the specified prefix.
|
|
496
487
|
|
|
497
|
-
|
|
498
|
-
This method is
|
|
488
|
+
Returns an empty subdictionary if no items exist under the prefix.
|
|
489
|
+
This method is not part of the standard Python dictionary interface.
|
|
499
490
|
|
|
500
491
|
Args:
|
|
501
492
|
key (PersiDictKey): A common prefix (string or sequence of strings)
|
|
502
493
|
used to scope items stored under this dictionary.
|
|
503
494
|
|
|
504
495
|
Returns:
|
|
505
|
-
S3Dict: A new S3Dict instance
|
|
506
|
-
the
|
|
496
|
+
S3Dict: A new S3Dict instance with root_prefix extended by the given
|
|
497
|
+
key, sharing the parent's bucket, region, file_type, and other
|
|
498
|
+
configuration settings.
|
|
507
499
|
"""
|
|
508
500
|
|
|
509
501
|
key = SafeStrTuple(key)
|
|
@@ -529,23 +521,23 @@ class S3Dict(PersiDict):
|
|
|
529
521
|
return new_dict
|
|
530
522
|
|
|
531
523
|
|
|
532
|
-
def timestamp(self,key:PersiDictKey) -> float:
|
|
533
|
-
"""Get last modification
|
|
524
|
+
def timestamp(self, key: PersiDictKey) -> float:
|
|
525
|
+
"""Get the last modification timestamp for a key.
|
|
534
526
|
|
|
535
|
-
This method is
|
|
527
|
+
This method is not part of the standard Python dictionary interface.
|
|
536
528
|
|
|
537
529
|
Args:
|
|
538
|
-
key
|
|
530
|
+
key: Dictionary key (string or sequence of strings) or SafeStrTuple.
|
|
539
531
|
|
|
540
532
|
Returns:
|
|
541
|
-
float: POSIX timestamp (seconds since
|
|
542
|
-
modification time as reported by S3
|
|
543
|
-
|
|
533
|
+
float: POSIX timestamp (seconds since Unix epoch) of the last
|
|
534
|
+
modification time as reported by S3. The timestamp is timezone-aware
|
|
535
|
+
and converted to UTC.
|
|
544
536
|
|
|
545
537
|
Raises:
|
|
546
538
|
KeyError: If the key does not exist in S3.
|
|
547
539
|
"""
|
|
548
|
-
key =
|
|
540
|
+
key = non_empty_persidict_key(key)
|
|
549
541
|
obj_name = self._build_full_objectname(key)
|
|
550
542
|
response = self.s3_client.head_object(Bucket=self.bucket_name, Key=obj_name)
|
|
551
543
|
return response["LastModified"].timestamp()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: persidict
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.37.1
|
|
4
4
|
Summary: Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud.
|
|
5
5
|
Keywords: persistence,dicts,distributed,parallel
|
|
6
6
|
Author: Vlad (Volodymyr) Pavlov
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
persidict/.DS_Store,sha256=1lFlJ5EFymdzGAUAaI30vcaaLHt3F1LwpG7xILf9jsM,6148
|
|
2
2
|
persidict/__init__.py,sha256=CDOSJGgCnyRTkGUTzaeg3Cqsxwx0-0EFieOtldXwAls,1380
|
|
3
|
-
persidict/file_dir_dict.py,sha256=
|
|
4
|
-
persidict/jokers.py,sha256=
|
|
3
|
+
persidict/file_dir_dict.py,sha256=2McFaDdr03g-PXlCIiG3fPb7h59LXW_3hDo0xLA17DE,24804
|
|
4
|
+
persidict/jokers.py,sha256=gTu7g2l2MIgBc3-hjvUrcwcgWs6tcbLyxB0u57M3bfU,3012
|
|
5
5
|
persidict/overlapping_multi_dict.py,sha256=UFyPEG2GbMmMHY48UmcaLHpsaxMqRH3bc_UA8S90yJo,5947
|
|
6
|
-
persidict/persi_dict.py,sha256=
|
|
7
|
-
persidict/s3_dict.py,sha256=
|
|
6
|
+
persidict/persi_dict.py,sha256=CKVHy8YELLRVgLWgo0Akbd8RznCVxqt8JHszIjqA_sI,23176
|
|
7
|
+
persidict/s3_dict.py,sha256=F5N7DlpZBkEEUsDk7OQXiqACY-mJ2SOMNrh3AlHC9qo,21454
|
|
8
8
|
persidict/safe_chars.py,sha256=H-cL9waCmDtwaRR5Y4b4oTzcBx09nc8wn8u61SVZDY0,1728
|
|
9
9
|
persidict/safe_str_tuple.py,sha256=YBTcYjUKIffznOawXb9xKjz4HaKdklrgyVtegJFmr5w,7202
|
|
10
10
|
persidict/safe_str_tuple_signing.py,sha256=mpOfx_xyprc0_c60XPB_EihI3vR1gOn6T03iCx1HwwQ,7494
|
|
11
11
|
persidict/write_once_dict.py,sha256=nv5vx9uh6VZ5Qh3HJcBgUHLnDX9KY843FbHndcy-63E,11677
|
|
12
|
-
persidict-0.
|
|
13
|
-
persidict-0.
|
|
14
|
-
persidict-0.
|
|
12
|
+
persidict-0.37.1.dist-info/WHEEL,sha256=Pi5uDq5Fdo_Rr-HD5h9BiPn9Et29Y9Sh8NhcJNnFU1c,79
|
|
13
|
+
persidict-0.37.1.dist-info/METADATA,sha256=M2EVewTSqdjfqdMi-7VHkSUKHqlL-ohlXYrcVRj7ViA,12387
|
|
14
|
+
persidict-0.37.1.dist-info/RECORD,,
|
|
File without changes
|