persidict 0.36.8__py3-none-any.whl → 0.36.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of persidict might be problematic. Click here for more details.
- persidict/file_dir_dict.py +4 -8
- persidict/jokers.py +11 -3
- persidict/overlapping_multi_dict.py +39 -27
- persidict/persi_dict.py +47 -38
- persidict/s3_dict.py +76 -116
- persidict/safe_chars.py +11 -0
- persidict/safe_str_tuple_signing.py +1 -1
- persidict/write_once_dict.py +13 -5
- {persidict-0.36.8.dist-info → persidict-0.36.9.dist-info}/METADATA +1 -1
- persidict-0.36.9.dist-info/RECORD +14 -0
- persidict-0.36.8.dist-info/RECORD +0 -14
- {persidict-0.36.8.dist-info → persidict-0.36.9.dist-info}/WHEEL +0 -0
persidict/file_dir_dict.py
CHANGED
|
@@ -45,7 +45,7 @@ class FileDirDict(PersiDict):
|
|
|
45
45
|
Insertion order is not preserved.
|
|
46
46
|
|
|
47
47
|
FileDirDict can store objects in binary files or in human-readable
|
|
48
|
-
text files (either in
|
|
48
|
+
text files (either in JSON format or as plain text).
|
|
49
49
|
"""
|
|
50
50
|
|
|
51
51
|
_base_dir:str
|
|
@@ -74,9 +74,9 @@ class FileDirDict(PersiDict):
|
|
|
74
74
|
then file_type must be either "pkl" or "json".
|
|
75
75
|
|
|
76
76
|
Raises:
|
|
77
|
-
ValueError: If
|
|
78
|
-
if
|
|
79
|
-
|
|
77
|
+
ValueError: If file_type contains unsafe characters; or
|
|
78
|
+
if configuration is inconsistent (e.g., non-str values
|
|
79
|
+
with unsupported file_type).
|
|
80
80
|
RuntimeError: If base_dir cannot be created or is not a directory.
|
|
81
81
|
"""
|
|
82
82
|
|
|
@@ -87,10 +87,6 @@ class FileDirDict(PersiDict):
|
|
|
87
87
|
if file_type != replace_unsafe_chars(file_type, ""):
|
|
88
88
|
raise ValueError("file_type contains unsafe characters")
|
|
89
89
|
self.file_type = file_type
|
|
90
|
-
if self.file_type == "__etag__":
|
|
91
|
-
raise ValueError(
|
|
92
|
-
"file_type cannot be 'etag' as it is a reserved"
|
|
93
|
-
" extension for S3 caching.")
|
|
94
90
|
|
|
95
91
|
if (base_class_for_values is None or
|
|
96
92
|
not issubclass(base_class_for_values,str)):
|
persidict/jokers.py
CHANGED
|
@@ -29,8 +29,9 @@ class Joker(ParameterizableClass):
|
|
|
29
29
|
parameterizable framework. Subclasses represent value-less commands that
|
|
30
30
|
alter persistence behavior when assigned to a key.
|
|
31
31
|
|
|
32
|
-
|
|
33
|
-
|
|
32
|
+
Note:
|
|
33
|
+
This class uses a singleton pattern where each subclass maintains
|
|
34
|
+
exactly one instance that is returned on every instantiation.
|
|
34
35
|
"""
|
|
35
36
|
_instances: dict[type, "Joker"] = {}
|
|
36
37
|
|
|
@@ -43,7 +44,14 @@ class Joker(ParameterizableClass):
|
|
|
43
44
|
return {}
|
|
44
45
|
|
|
45
46
|
def __new__(cls):
|
|
46
|
-
"""Create or return the singleton instance for the subclass.
|
|
47
|
+
"""Create or return the singleton instance for the subclass.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
cls: The class for which to create or retrieve the singleton instance.
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
Joker: The singleton instance for the specified class.
|
|
54
|
+
"""
|
|
47
55
|
if cls not in Joker._instances:
|
|
48
56
|
Joker._instances[cls] = super().__new__(cls)
|
|
49
57
|
return Joker._instances[cls]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Any, Dict, Type
|
|
3
|
+
from typing import Any, Dict, List, Type
|
|
4
4
|
|
|
5
5
|
from .persi_dict import PersiDict
|
|
6
6
|
|
|
@@ -14,17 +14,17 @@ class OverlappingMultiDict:
|
|
|
14
14
|
bucket and differ only in how items are materialized by file type.
|
|
15
15
|
|
|
16
16
|
Attributes:
|
|
17
|
-
dict_type (
|
|
18
|
-
|
|
19
|
-
shared_subdicts_params (
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
individual_subdicts_params (
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
subdicts_names (
|
|
27
|
-
|
|
17
|
+
dict_type (Type[PersiDict]): A subclass of PersiDict used to create each
|
|
18
|
+
sub-dictionary.
|
|
19
|
+
shared_subdicts_params (Dict[str, Any]): Parameters applied to every
|
|
20
|
+
created sub-dictionary (e.g., base_dir, bucket, immutable_items,
|
|
21
|
+
digest_len).
|
|
22
|
+
individual_subdicts_params (Dict[str, Dict[str, Any]]): Mapping from
|
|
23
|
+
file_type (attribute name) to a dict of parameters that are specific
|
|
24
|
+
to that sub-dictionary. These override or extend shared_subdicts_params
|
|
25
|
+
for the given file_type.
|
|
26
|
+
subdicts_names (List[str]): The list of file_type names (i.e., attribute
|
|
27
|
+
names) created.
|
|
28
28
|
|
|
29
29
|
Raises:
|
|
30
30
|
TypeError: If pickling is attempted or item access is used on the
|
|
@@ -37,17 +37,16 @@ class OverlappingMultiDict:
|
|
|
37
37
|
"""Initialize the container and create sub-dictionaries.
|
|
38
38
|
|
|
39
39
|
Args:
|
|
40
|
-
dict_type (
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
resulting dict also receives file_type=<key>.
|
|
40
|
+
dict_type (Type[PersiDict]): A subclass of PersiDict that will be
|
|
41
|
+
instantiated for each file_type provided via individual_subdicts_params.
|
|
42
|
+
shared_subdicts_params (Dict[str, Any]): Parameters shared by all
|
|
43
|
+
sub-dicts (e.g., base_dir, bucket).
|
|
44
|
+
**individual_subdicts_params (Dict[str, Dict[str, Any]]): Keyword
|
|
45
|
+
arguments where each key is a file_type (also the attribute name
|
|
46
|
+
to be created) and each value is a dict of parameters specific to
|
|
47
|
+
that sub-dict. These are merged with shared_subdicts_params when
|
|
48
|
+
constructing the sub-dict. The resulting dict also receives
|
|
49
|
+
file_type=<key>.
|
|
51
50
|
|
|
52
51
|
Raises:
|
|
53
52
|
TypeError: If dict_type is not a PersiDict subclass, or if
|
|
@@ -67,9 +66,9 @@ class OverlappingMultiDict:
|
|
|
67
66
|
raise TypeError(
|
|
68
67
|
f"Params for subdict {subdict_name!r} must be a dict")
|
|
69
68
|
self.__dict__[subdict_name] = dict_type(
|
|
70
|
-
**{**shared_subdicts_params
|
|
71
|
-
|
|
72
|
-
|
|
69
|
+
**{**shared_subdicts_params,
|
|
70
|
+
**individual_subdicts_params[subdict_name],
|
|
71
|
+
"file_type": subdict_name})
|
|
73
72
|
|
|
74
73
|
def __getstate__(self):
|
|
75
74
|
"""Prevent pickling.
|
|
@@ -82,6 +81,9 @@ class OverlappingMultiDict:
|
|
|
82
81
|
def __setstate__(self, state):
|
|
83
82
|
"""Prevent unpickling.
|
|
84
83
|
|
|
84
|
+
Args:
|
|
85
|
+
state: The state dictionary that would be used for unpickling (ignored).
|
|
86
|
+
|
|
85
87
|
Raises:
|
|
86
88
|
TypeError: Always raised; this object is not pickleable.
|
|
87
89
|
"""
|
|
@@ -93,6 +95,9 @@ class OverlappingMultiDict:
|
|
|
93
95
|
Suggest accessing items through the sub-dictionaries exposed as
|
|
94
96
|
attributes (e.g., obj.json[key]).
|
|
95
97
|
|
|
98
|
+
Args:
|
|
99
|
+
key: The key that would be accessed (ignored).
|
|
100
|
+
|
|
96
101
|
Raises:
|
|
97
102
|
TypeError: Always raised to indicate unsupported operation.
|
|
98
103
|
"""
|
|
@@ -104,6 +109,10 @@ class OverlappingMultiDict:
|
|
|
104
109
|
def __setitem__(self, key, value):
|
|
105
110
|
"""Disallow item assignment on the container itself.
|
|
106
111
|
|
|
112
|
+
Args:
|
|
113
|
+
key: The key that would be assigned (ignored).
|
|
114
|
+
value: The value that would be assigned (ignored).
|
|
115
|
+
|
|
107
116
|
Raises:
|
|
108
117
|
TypeError: Always raised to indicate unsupported operation.
|
|
109
118
|
"""
|
|
@@ -115,10 +124,13 @@ class OverlappingMultiDict:
|
|
|
115
124
|
def __delitem__(self, key):
|
|
116
125
|
"""Disallow item deletion on the container itself.
|
|
117
126
|
|
|
127
|
+
Args:
|
|
128
|
+
key: The key that would be deleted (ignored).
|
|
129
|
+
|
|
118
130
|
Raises:
|
|
119
131
|
TypeError: Always raised to indicate unsupported operation.
|
|
120
132
|
"""
|
|
121
133
|
raise TypeError(
|
|
122
134
|
"OverlappingMultiDict does not support item deletion by key. "
|
|
123
|
-
"Individual items can be
|
|
135
|
+
"Individual items can be deleted through nested dicts, "
|
|
124
136
|
f"which are available via attributes {self.subdicts_names}")
|
persidict/persi_dict.py
CHANGED
|
@@ -26,9 +26,9 @@ from .jokers import KEEP_CURRENT, DELETE_CURRENT, Joker
|
|
|
26
26
|
from .safe_str_tuple import SafeStrTuple
|
|
27
27
|
|
|
28
28
|
PersiDictKey = SafeStrTuple | Sequence[str] | str
|
|
29
|
-
"""
|
|
29
|
+
"""A value which can be used as a key for PersiDict.
|
|
30
30
|
|
|
31
|
-
PersiDict
|
|
31
|
+
PersiDict instances accept keys in the form of SafeStrTuple,
|
|
32
32
|
or a string, or a sequence of strings.
|
|
33
33
|
The characters within strings must be URL/filename-safe.
|
|
34
34
|
If a string (or a sequence of strings) is passed to a PersiDict as a key,
|
|
@@ -60,21 +60,26 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
60
60
|
immutable_items:bool
|
|
61
61
|
base_class_for_values:Optional[type]
|
|
62
62
|
|
|
63
|
-
def __init__(self
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
"""Initialize base parameters shared by all persistent
|
|
63
|
+
def __init__(self,
|
|
64
|
+
immutable_items: bool = False,
|
|
65
|
+
digest_len: int = 8,
|
|
66
|
+
base_class_for_values: Optional[type] = None,
|
|
67
|
+
*args, **kwargs):
|
|
68
|
+
"""Initialize base parameters shared by all persistent dictionaries.
|
|
69
69
|
|
|
70
70
|
Args:
|
|
71
|
-
immutable_items: If True, items cannot be modified or deleted.
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
71
|
+
immutable_items (bool): If True, items cannot be modified or deleted.
|
|
72
|
+
Defaults to False.
|
|
73
|
+
digest_len (int): Number of hash characters to append to key components
|
|
74
|
+
to avoid case-insensitive collisions. Must be non-negative.
|
|
75
|
+
Defaults to 8.
|
|
76
|
+
base_class_for_values (Optional[type]): Optional base class that values
|
|
77
|
+
must inherit from. If None, values are not type-restricted.
|
|
78
|
+
Defaults to None.
|
|
79
|
+
*args: Additional positional arguments (ignored in base class, reserved
|
|
80
|
+
for subclasses).
|
|
81
|
+
**kwargs: Additional keyword arguments (ignored in base class, reserved
|
|
82
|
+
for subclasses).
|
|
78
83
|
|
|
79
84
|
Raises:
|
|
80
85
|
ValueError: If digest_len is negative.
|
|
@@ -91,9 +96,9 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
91
96
|
"""Return configuration parameters of this dictionary.
|
|
92
97
|
|
|
93
98
|
Returns:
|
|
94
|
-
dict: A sorted
|
|
99
|
+
dict: A sorted dictionary of parameters used to reconstruct the instance.
|
|
95
100
|
This supports the Parameterizable API and is absent in the
|
|
96
|
-
|
|
101
|
+
built-in dict.
|
|
97
102
|
"""
|
|
98
103
|
params = dict(
|
|
99
104
|
immutable_items=self.immutable_items
|
|
@@ -321,20 +326,23 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
321
326
|
return self._generic_iter({"keys", "values", "timestamps"})
|
|
322
327
|
|
|
323
328
|
|
|
324
|
-
def setdefault(self, key:PersiDictKey, default:Any=None) -> Any:
|
|
325
|
-
"""Insert key with default if absent; return the value.
|
|
329
|
+
def setdefault(self, key: PersiDictKey, default: Any = None) -> Any:
|
|
330
|
+
"""Insert key with default value if absent; return the current value.
|
|
331
|
+
|
|
332
|
+
Behaves like the built-in dict.setdefault() method: if the key exists,
|
|
333
|
+
return its current value; otherwise, set the key to the default value
|
|
334
|
+
and return that default.
|
|
326
335
|
|
|
327
336
|
Args:
|
|
328
|
-
key: Key (string
|
|
329
|
-
default: Value to insert if the key is not present.
|
|
337
|
+
key (PersiDictKey): Key (string, sequence of strings, or SafeStrTuple).
|
|
338
|
+
default (Any): Value to insert if the key is not present. Defaults to None.
|
|
330
339
|
|
|
331
340
|
Returns:
|
|
332
|
-
Any: Existing value if present; otherwise the provided default.
|
|
341
|
+
Any: Existing value if key is present; otherwise the provided default value.
|
|
333
342
|
|
|
334
343
|
Raises:
|
|
335
344
|
TypeError: If default is a Joker command (KEEP_CURRENT/DELETE_CURRENT).
|
|
336
345
|
"""
|
|
337
|
-
# TODO: check edge cases to ensure the same semantics as standard dicts
|
|
338
346
|
key = SafeStrTuple(key)
|
|
339
347
|
if isinstance(default, Joker):
|
|
340
348
|
raise TypeError("default must be a regular value, not a Joker command")
|
|
@@ -345,19 +353,20 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
345
353
|
return default
|
|
346
354
|
|
|
347
355
|
|
|
348
|
-
def __eq__(self, other:PersiDict) -> bool:
|
|
356
|
+
def __eq__(self, other: PersiDict) -> bool:
|
|
349
357
|
"""Compare dictionaries for equality.
|
|
350
358
|
|
|
351
|
-
If other is a PersiDict,
|
|
352
|
-
compare as mapping by keys and values.
|
|
359
|
+
If other is a PersiDict instance, compares portable parameters for equality.
|
|
360
|
+
Otherwise, attempts to compare as a mapping by comparing all keys and values.
|
|
353
361
|
|
|
354
362
|
Args:
|
|
355
|
-
other: Another dictionary-like object.
|
|
363
|
+
other (PersiDict): Another dictionary-like object to compare against.
|
|
356
364
|
|
|
357
365
|
Returns:
|
|
358
|
-
bool: True if considered equal, False otherwise.
|
|
366
|
+
bool: True if the dictionaries are considered equal, False otherwise.
|
|
359
367
|
"""
|
|
360
368
|
if isinstance(other, PersiDict):
|
|
369
|
+
#TODO: decide whether to keep this semantics
|
|
361
370
|
return self.get_portable_params() == other.get_portable_params()
|
|
362
371
|
try:
|
|
363
372
|
if len(self) != len(other):
|
|
@@ -525,15 +534,15 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
525
534
|
def oldest_keys(self, max_n=None):
|
|
526
535
|
"""Return up to max_n oldest keys in the dictionary.
|
|
527
536
|
|
|
537
|
+
This method is absent in the original Python dict API.
|
|
538
|
+
|
|
528
539
|
Args:
|
|
529
540
|
max_n (int | None): Maximum number of keys to return. If None,
|
|
530
541
|
return all keys sorted by age (oldest first). Values <= 0
|
|
531
|
-
yield an empty list.
|
|
542
|
+
yield an empty list. Defaults to None.
|
|
532
543
|
|
|
533
544
|
Returns:
|
|
534
545
|
list[SafeStrTuple]: The oldest keys, oldest first.
|
|
535
|
-
|
|
536
|
-
This method is absent in the original Python dict API.
|
|
537
546
|
"""
|
|
538
547
|
if max_n is None:
|
|
539
548
|
# If we need all keys, sort them all by timestamp
|
|
@@ -553,6 +562,8 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
553
562
|
def oldest_values(self, max_n=None):
|
|
554
563
|
"""Return up to max_n oldest values in the dictionary.
|
|
555
564
|
|
|
565
|
+
This method is absent in the original Python dict API.
|
|
566
|
+
|
|
556
567
|
Args:
|
|
557
568
|
max_n (int | None): Maximum number of values to return. If None,
|
|
558
569
|
return values for all keys sorted by age (oldest first). Values
|
|
@@ -560,8 +571,6 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
560
571
|
|
|
561
572
|
Returns:
|
|
562
573
|
list[Any]: Values corresponding to the oldest keys.
|
|
563
|
-
|
|
564
|
-
This method is absent in the original Python dict API.
|
|
565
574
|
"""
|
|
566
575
|
return [self[k] for k in self.oldest_keys(max_n)]
|
|
567
576
|
|
|
@@ -569,15 +578,15 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
569
578
|
def newest_keys(self, max_n=None):
|
|
570
579
|
"""Return up to max_n newest keys in the dictionary.
|
|
571
580
|
|
|
581
|
+
This method is absent in the original Python dict API.
|
|
582
|
+
|
|
572
583
|
Args:
|
|
573
584
|
max_n (int | None): Maximum number of keys to return. If None,
|
|
574
585
|
return all keys sorted by age (newest first). Values <= 0
|
|
575
|
-
yield an empty list.
|
|
586
|
+
yield an empty list. Defaults to None.
|
|
576
587
|
|
|
577
588
|
Returns:
|
|
578
589
|
list[SafeStrTuple]: The newest keys, newest first.
|
|
579
|
-
|
|
580
|
-
This method is absent in the original Python dict API.
|
|
581
590
|
"""
|
|
582
591
|
if max_n is None:
|
|
583
592
|
# If we need all keys, sort them all by timestamp in reverse order
|
|
@@ -597,6 +606,8 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
597
606
|
def newest_values(self, max_n=None):
|
|
598
607
|
"""Return up to max_n newest values in the dictionary.
|
|
599
608
|
|
|
609
|
+
This method is absent in the original Python dict API.
|
|
610
|
+
|
|
600
611
|
Args:
|
|
601
612
|
max_n (int | None): Maximum number of values to return. If None,
|
|
602
613
|
return values for all keys sorted by age (newest first). Values
|
|
@@ -604,7 +615,5 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
604
615
|
|
|
605
616
|
Returns:
|
|
606
617
|
list[Any]: Values corresponding to the newest keys.
|
|
607
|
-
|
|
608
|
-
This method is absent in the original Python dict API.
|
|
609
618
|
"""
|
|
610
619
|
return [self[k] for k in self.newest_keys(max_n)]
|
persidict/s3_dict.py
CHANGED
|
@@ -5,6 +5,8 @@ import tempfile
|
|
|
5
5
|
from typing import Any, Optional
|
|
6
6
|
|
|
7
7
|
import boto3
|
|
8
|
+
import joblib
|
|
9
|
+
import jsonpickle
|
|
8
10
|
from botocore.exceptions import ClientError
|
|
9
11
|
|
|
10
12
|
import parameterizable
|
|
@@ -15,6 +17,7 @@ from .safe_str_tuple_signing import sign_safe_str_tuple, unsign_safe_str_tuple
|
|
|
15
17
|
from .persi_dict import PersiDict
|
|
16
18
|
from .jokers import KEEP_CURRENT, DELETE_CURRENT
|
|
17
19
|
from .file_dir_dict import FileDirDict, PersiDictKey
|
|
20
|
+
from .overlapping_multi_dict import OverlappingMultiDict
|
|
18
21
|
|
|
19
22
|
S3DICT_DEFAULT_BASE_DIR = "__s3_dict__"
|
|
20
23
|
|
|
@@ -70,24 +73,28 @@ class S3Dict(PersiDict):
|
|
|
70
73
|
must be "pkl" or "json".
|
|
71
74
|
*args: Ignored; reserved for compatibility.
|
|
72
75
|
**kwargs: Ignored; reserved for compatibility.
|
|
73
|
-
|
|
74
|
-
Raises:
|
|
75
|
-
ValueError: If file_type is "__etag__" (reserved) or configuration
|
|
76
|
-
is inconsistent with base_class_for_values.
|
|
77
76
|
"""
|
|
78
77
|
|
|
79
|
-
super().__init__(immutable_items = immutable_items, digest_len =
|
|
78
|
+
super().__init__(immutable_items = immutable_items, digest_len = digest_len)
|
|
80
79
|
self.file_type = file_type
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
,
|
|
80
|
+
self.etag_file_type = f"{file_type}_etag"
|
|
81
|
+
|
|
82
|
+
self.local_cache = OverlappingMultiDict(
|
|
83
|
+
dict_type=FileDirDict,
|
|
84
|
+
shared_subdicts_params={
|
|
85
|
+
"base_dir": base_dir,
|
|
86
|
+
"immutable_items": immutable_items,
|
|
87
|
+
"base_class_for_values": base_class_for_values,
|
|
88
|
+
"digest_len": digest_len
|
|
89
|
+
},
|
|
90
|
+
**{
|
|
91
|
+
self.file_type: {},
|
|
92
|
+
self.etag_file_type: {"base_class_for_values": str}
|
|
93
|
+
}
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
self.main_cache = getattr(self.local_cache, self.file_type)
|
|
97
|
+
self.etag_cache = getattr(self.local_cache, self.etag_file_type)
|
|
91
98
|
|
|
92
99
|
self.region = region
|
|
93
100
|
if region is None:
|
|
@@ -118,7 +125,7 @@ class S3Dict(PersiDict):
|
|
|
118
125
|
including region, bucket_name, and root_prefix combined with
|
|
119
126
|
parameters from the local cache.
|
|
120
127
|
"""
|
|
121
|
-
params = self.
|
|
128
|
+
params = self.main_cache.get_params()
|
|
122
129
|
params["region"] = self.region
|
|
123
130
|
params["bucket_name"] = self.bucket_name
|
|
124
131
|
params["root_prefix"] = self.root_prefix
|
|
@@ -147,7 +154,7 @@ class S3Dict(PersiDict):
|
|
|
147
154
|
Returns:
|
|
148
155
|
str: Path to the local on-disk cache directory used by S3Dict.
|
|
149
156
|
"""
|
|
150
|
-
return self.
|
|
157
|
+
return self.main_cache.base_dir
|
|
151
158
|
|
|
152
159
|
|
|
153
160
|
def _build_full_objectname(self, key:PersiDictKey) -> str:
|
|
@@ -175,50 +182,19 @@ class S3Dict(PersiDict):
|
|
|
175
182
|
bool: True if the object exists (or is cached when immutable), else False.
|
|
176
183
|
"""
|
|
177
184
|
key = SafeStrTuple(key)
|
|
178
|
-
if self.immutable_items:
|
|
179
|
-
file_name = self.local_cache._build_full_path(
|
|
180
|
-
key, create_subdirs=True)
|
|
181
|
-
if os.path.exists(file_name):
|
|
185
|
+
if self.immutable_items and key in self.main_cache:
|
|
182
186
|
return True
|
|
183
187
|
try:
|
|
184
188
|
obj_name = self._build_full_objectname(key)
|
|
185
189
|
self.s3_client.head_object(Bucket=self.bucket_name, Key=obj_name)
|
|
186
190
|
return True
|
|
187
|
-
except:
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
Args:
|
|
195
|
-
file_name (str): Path to the cached data file (without the ETag suffix).
|
|
196
|
-
etag (str): The S3 ETag value to persist alongside the cached file.
|
|
197
|
-
"""
|
|
198
|
-
if not etag:
|
|
199
|
-
return
|
|
200
|
-
etag_file_name = file_name + ".__etag__"
|
|
201
|
-
dir_name = os.path.dirname(etag_file_name)
|
|
202
|
-
# Write to a temporary file and then rename for atomicity
|
|
203
|
-
fd, temp_path = tempfile.mkstemp(dir=dir_name)
|
|
204
|
-
try:
|
|
205
|
-
with os.fdopen(fd, "w") as f:
|
|
206
|
-
f.write(etag)
|
|
207
|
-
f.flush()
|
|
208
|
-
os.fsync(f.fileno())
|
|
209
|
-
os.replace(temp_path, etag_file_name)
|
|
210
|
-
try:
|
|
211
|
-
if os.name == 'posix':
|
|
212
|
-
dir_fd = os.open(dir_name, os.O_RDONLY)
|
|
213
|
-
try:
|
|
214
|
-
os.fsync(dir_fd)
|
|
215
|
-
finally:
|
|
216
|
-
os.close(dir_fd)
|
|
217
|
-
except OSError:
|
|
218
|
-
pass
|
|
219
|
-
except:
|
|
220
|
-
os.remove(temp_path)
|
|
221
|
-
raise
|
|
191
|
+
except ClientError as e:
|
|
192
|
+
if e.response['ResponseMetadata']['HTTPStatusCode'] == 404:
|
|
193
|
+
self.main_cache.delete_if_exists(key)
|
|
194
|
+
self.etag_cache.delete_if_exists(key)
|
|
195
|
+
return False
|
|
196
|
+
else:
|
|
197
|
+
raise
|
|
222
198
|
|
|
223
199
|
|
|
224
200
|
def __getitem__(self, key:PersiDictKey) -> Any:
|
|
@@ -236,19 +212,15 @@ class S3Dict(PersiDict):
|
|
|
236
212
|
"""
|
|
237
213
|
|
|
238
214
|
key = SafeStrTuple(key)
|
|
239
|
-
file_name = self.local_cache._build_full_path(key, create_subdirs=True)
|
|
240
215
|
|
|
241
|
-
if self.immutable_items and
|
|
242
|
-
return self.
|
|
216
|
+
if self.immutable_items and key in self.main_cache:
|
|
217
|
+
return self.main_cache[key]
|
|
243
218
|
|
|
244
219
|
obj_name = self._build_full_objectname(key)
|
|
245
220
|
|
|
246
221
|
cached_etag = None
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
etag_file_name):
|
|
250
|
-
with open(etag_file_name, "r") as f:
|
|
251
|
-
cached_etag = f.read()
|
|
222
|
+
if not self.immutable_items and key in self.main_cache and key in self.etag_cache:
|
|
223
|
+
cached_etag = self.etag_cache[key]
|
|
252
224
|
|
|
253
225
|
try:
|
|
254
226
|
get_kwargs = {'Bucket': self.bucket_name, 'Key': obj_name}
|
|
@@ -261,37 +233,23 @@ class S3Dict(PersiDict):
|
|
|
261
233
|
s3_etag = response.get("ETag")
|
|
262
234
|
body = response['Body']
|
|
263
235
|
|
|
264
|
-
|
|
265
|
-
fd, temp_path = tempfile.mkstemp(dir=dir_name, prefix=".__tmp__")
|
|
266
|
-
|
|
267
|
-
try:
|
|
268
|
-
with os.fdopen(fd, 'wb') as f:
|
|
269
|
-
# Stream body to file to avoid loading all in memory
|
|
270
|
-
for chunk in body.iter_chunks():
|
|
271
|
-
f.write(chunk)
|
|
272
|
-
f.flush()
|
|
273
|
-
os.fsync(f.fileno())
|
|
274
|
-
os.replace(temp_path, file_name)
|
|
275
|
-
try:
|
|
276
|
-
if os.name == 'posix':
|
|
277
|
-
dir_fd = os.open(dir_name, os.O_RDONLY)
|
|
278
|
-
try:
|
|
279
|
-
os.fsync(dir_fd)
|
|
280
|
-
finally:
|
|
281
|
-
os.close(dir_fd)
|
|
282
|
-
except OSError:
|
|
283
|
-
pass
|
|
284
|
-
except:
|
|
285
|
-
os.remove(temp_path) # Clean up temp file on failure
|
|
286
|
-
raise
|
|
236
|
+
# Read all data into memory and store in cache
|
|
287
237
|
|
|
288
|
-
self.
|
|
238
|
+
if self.file_type == 'json':
|
|
239
|
+
deserialized_value = jsonpickle.loads(body.read().decode('utf-8'))
|
|
240
|
+
elif self.file_type == 'pkl':
|
|
241
|
+
deserialized_value = joblib.load(body)
|
|
242
|
+
else:
|
|
243
|
+
deserialized_value = body.read().decode('utf-8')
|
|
244
|
+
|
|
245
|
+
self.main_cache[key] = deserialized_value
|
|
246
|
+
self.etag_cache[key] = s3_etag
|
|
289
247
|
|
|
290
248
|
except ClientError as e:
|
|
291
249
|
error_code = e.response.get("Error", {}).get("Code")
|
|
292
250
|
if e.response['ResponseMetadata']['HTTPStatusCode'] == 304:
|
|
293
251
|
# 304 Not Modified: our cached version is up-to-date.
|
|
294
|
-
# The
|
|
252
|
+
# The value will be read from cache at the end of the function.
|
|
295
253
|
pass
|
|
296
254
|
elif e.response.get("Error", {}).get("Code") == 'NoSuchKey':
|
|
297
255
|
raise KeyError(f"Key {key} not found in S3 bucket {self.bucket_name}")
|
|
@@ -299,20 +257,21 @@ class S3Dict(PersiDict):
|
|
|
299
257
|
# Re-raise other client errors (e.g., permissions, throttling)
|
|
300
258
|
raise
|
|
301
259
|
|
|
302
|
-
return self.
|
|
260
|
+
return self.main_cache[key]
|
|
303
261
|
|
|
304
262
|
|
|
305
263
|
def __setitem__(self, key:PersiDictKey, value:Any):
|
|
306
264
|
"""Store a value for a key in S3 and update the local cache.
|
|
307
265
|
|
|
308
|
-
Interprets joker values KEEP_CURRENT and DELETE_CURRENT
|
|
309
|
-
Validates
|
|
310
|
-
local cache and uploads to S3. If possible, caches the
|
|
311
|
-
enable conditional GETs later.
|
|
266
|
+
Interprets special joker values: KEEP_CURRENT (no-op) and DELETE_CURRENT
|
|
267
|
+
(deletes the key). Validates value type if base_class_for_values is set,
|
|
268
|
+
then writes to the local cache and uploads to S3. If possible, caches the
|
|
269
|
+
S3 ETag locally to enable conditional GETs later.
|
|
312
270
|
|
|
313
271
|
Args:
|
|
314
272
|
key (PersiDictKey): Key (string or sequence of strings) or SafeStrTuple.
|
|
315
|
-
value (Any): Value to store, or a joker command
|
|
273
|
+
value (Any): Value to store, or a joker command (KEEP_CURRENT or
|
|
274
|
+
DELETE_CURRENT from the jokers module).
|
|
316
275
|
|
|
317
276
|
Raises:
|
|
318
277
|
KeyError: If attempting to modify an existing item when
|
|
@@ -344,23 +303,23 @@ class S3Dict(PersiDict):
|
|
|
344
303
|
if self.immutable_items and key in self:
|
|
345
304
|
raise KeyError("Can't modify an immutable item")
|
|
346
305
|
|
|
347
|
-
file_name = self.local_cache._build_full_path(key, create_subdirs=True)
|
|
348
306
|
obj_name = self._build_full_objectname(key)
|
|
349
307
|
|
|
350
|
-
|
|
351
|
-
self.
|
|
308
|
+
# Store in local cache first
|
|
309
|
+
self.main_cache[key] = value
|
|
310
|
+
|
|
311
|
+
# Get the file path from the cache to upload to S3
|
|
312
|
+
file_path = self.main_cache._build_full_path(key)
|
|
313
|
+
self.s3_client.upload_file(file_path, self.bucket_name, obj_name)
|
|
352
314
|
|
|
353
315
|
try:
|
|
354
316
|
head = self.s3_client.head_object(
|
|
355
317
|
Bucket=self.bucket_name, Key=obj_name)
|
|
356
|
-
|
|
357
|
-
self._write_etag_file(file_name, s3_etag)
|
|
318
|
+
self.etag_cache[key] = head.get("ETag")
|
|
358
319
|
except ClientError:
|
|
359
|
-
# If we can't get ETag, we should remove any existing etag
|
|
320
|
+
# If we can't get ETag, we should remove any existing etag
|
|
360
321
|
# to force a re-download on the next __getitem__ call.
|
|
361
|
-
|
|
362
|
-
if os.path.exists(etag_file_name):
|
|
363
|
-
os.remove(etag_file_name)
|
|
322
|
+
self.etag_cache.delete_if_exists(key)
|
|
364
323
|
|
|
365
324
|
|
|
366
325
|
def __delitem__(self, key:PersiDictKey):
|
|
@@ -370,20 +329,19 @@ class S3Dict(PersiDict):
|
|
|
370
329
|
key (PersiDictKey): Key (string or sequence of strings) or SafeStrTuple.
|
|
371
330
|
|
|
372
331
|
Raises:
|
|
373
|
-
KeyError: If immutable_items is True.
|
|
332
|
+
KeyError: If immutable_items is True, or if the key does not exist in S3.
|
|
374
333
|
"""
|
|
375
334
|
|
|
376
335
|
key = SafeStrTuple(key)
|
|
377
336
|
if self.immutable_items:
|
|
378
337
|
raise KeyError("Can't delete an immutable item")
|
|
338
|
+
|
|
379
339
|
obj_name = self._build_full_objectname(key)
|
|
340
|
+
|
|
380
341
|
self.s3_client.delete_object(Bucket = self.bucket_name, Key = obj_name)
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
etag_file_name = file_name + ".__etag__"
|
|
385
|
-
if os.path.isfile(etag_file_name):
|
|
386
|
-
os.remove(etag_file_name)
|
|
342
|
+
self.etag_cache.delete_if_exists(key)
|
|
343
|
+
self.main_cache.delete_if_exists(key)
|
|
344
|
+
|
|
387
345
|
|
|
388
346
|
def __len__(self) -> int:
|
|
389
347
|
"""Return len(self).
|
|
@@ -415,7 +373,7 @@ class S3Dict(PersiDict):
|
|
|
415
373
|
return num_files
|
|
416
374
|
|
|
417
375
|
|
|
418
|
-
def _generic_iter(self, result_type: str):
|
|
376
|
+
def _generic_iter(self, result_type: set[str]):
|
|
419
377
|
"""Underlying implementation for .items()/.keys()/.values() iterators.
|
|
420
378
|
|
|
421
379
|
Iterates over S3 objects under the configured root_prefix and yields
|
|
@@ -529,13 +487,12 @@ class S3Dict(PersiDict):
|
|
|
529
487
|
|
|
530
488
|
key = SafeStrTuple(key)
|
|
531
489
|
if len(key):
|
|
532
|
-
key = SafeStrTuple(key)
|
|
533
490
|
key = sign_safe_str_tuple(key, self.digest_len)
|
|
534
491
|
full_root_prefix = self.root_prefix + "/".join(key)
|
|
535
492
|
else:
|
|
536
493
|
full_root_prefix = self.root_prefix
|
|
537
494
|
|
|
538
|
-
new_dir_path = self.
|
|
495
|
+
new_dir_path = self.main_cache._build_full_path(
|
|
539
496
|
key, create_subdirs = True, is_file_path = False)
|
|
540
497
|
|
|
541
498
|
new_dict = S3Dict(
|
|
@@ -561,9 +518,12 @@ class S3Dict(PersiDict):
|
|
|
561
518
|
|
|
562
519
|
Returns:
|
|
563
520
|
float: POSIX timestamp (seconds since the Unix epoch) of the last
|
|
564
|
-
modification time as reported by S3 for the object.
|
|
521
|
+
modification time as reported by S3 for the object. The timestamp
|
|
522
|
+
is timezone-aware and converted to UTC.
|
|
523
|
+
|
|
524
|
+
Raises:
|
|
525
|
+
KeyError: If the key does not exist in S3.
|
|
565
526
|
"""
|
|
566
|
-
# TODO: check work with timezones
|
|
567
527
|
key = SafeStrTuple(key)
|
|
568
528
|
obj_name = self._build_full_objectname(key)
|
|
569
529
|
response = self.s3_client.head_object(Bucket=self.bucket_name, Key=obj_name)
|
persidict/safe_chars.py
CHANGED
|
@@ -1,6 +1,17 @@
|
|
|
1
|
+
"""Safe character handling utilities for URL and filesystem compatibility.
|
|
2
|
+
|
|
3
|
+
This module defines character sets and length constraints for building strings
|
|
4
|
+
that are safe for use in URLs, filenames, and other contexts where character
|
|
5
|
+
restrictions apply.
|
|
6
|
+
"""
|
|
1
7
|
import string
|
|
2
8
|
|
|
9
|
+
# Set of characters considered safe for filenames and URL components.
|
|
10
|
+
# Includes ASCII letters (a-z, A-Z), digits (0-9), and special chars: ()_-~.=
|
|
3
11
|
SAFE_CHARS_SET = set(string.ascii_letters + string.digits + "()_-~.=")
|
|
12
|
+
|
|
13
|
+
# Maximum length for safe strings to ensure compatibility with various filesystems
|
|
14
|
+
# and URL length limitations. Set to 254 to stay well under most system limits.
|
|
4
15
|
SAFE_STRING_MAX_LENGTH = 254
|
|
5
16
|
|
|
6
17
|
def get_safe_chars() -> set[str]:
|
|
@@ -114,7 +114,7 @@ def _add_all_suffixes_if_absent(
|
|
|
114
114
|
|
|
115
115
|
new_seq = []
|
|
116
116
|
for s in str_seq:
|
|
117
|
-
new_seq.append(_add_signature_suffix_if_absent(s,digest_len))
|
|
117
|
+
new_seq.append(_add_signature_suffix_if_absent(s, digest_len))
|
|
118
118
|
|
|
119
119
|
new_seq = SafeStrTuple(*new_seq)
|
|
120
120
|
|
persidict/write_once_dict.py
CHANGED
|
@@ -268,7 +268,7 @@ class WriteOnceDict(PersiDict):
|
|
|
268
268
|
"""Delegate iteration to the wrapped dict.
|
|
269
269
|
|
|
270
270
|
Args:
|
|
271
|
-
iter_type:
|
|
271
|
+
iter_type: Type of iterator: 'items' and/or 'keys' and/or 'timestamps'.
|
|
272
272
|
|
|
273
273
|
Returns:
|
|
274
274
|
Any: Iterator from the wrapped dictionary.
|
|
@@ -299,13 +299,21 @@ class WriteOnceDict(PersiDict):
|
|
|
299
299
|
return getattr(self._wrapped_dict, name)
|
|
300
300
|
|
|
301
301
|
@property
|
|
302
|
-
def base_dir(self):
|
|
303
|
-
"""Base directory of the wrapped dict (if applicable).
|
|
302
|
+
def base_dir(self) -> str|None:
|
|
303
|
+
"""Base directory of the wrapped dict (if applicable).
|
|
304
|
+
|
|
305
|
+
Returns:
|
|
306
|
+
str | None: The base directory path, or None if not applicable.
|
|
307
|
+
"""
|
|
304
308
|
return self._wrapped_dict.base_dir
|
|
305
309
|
|
|
306
310
|
@property
|
|
307
|
-
def base_url(self):
|
|
308
|
-
"""Base URL of the wrapped dict (if applicable).
|
|
311
|
+
def base_url(self) -> str|None:
|
|
312
|
+
"""Base URL of the wrapped dict (if applicable).
|
|
313
|
+
|
|
314
|
+
Returns:
|
|
315
|
+
str | None: The base URL, or None if not applicable.
|
|
316
|
+
"""
|
|
309
317
|
return self._wrapped_dict.base_url
|
|
310
318
|
|
|
311
319
|
def get_subdict(self, prefix_key: PersiDictKey) -> WriteOnceDict:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: persidict
|
|
3
|
-
Version: 0.36.
|
|
3
|
+
Version: 0.36.9
|
|
4
4
|
Summary: Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud.
|
|
5
5
|
Keywords: persistence,dicts,distributed,parallel
|
|
6
6
|
Author: Vlad (Volodymyr) Pavlov
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
persidict/.DS_Store,sha256=1lFlJ5EFymdzGAUAaI30vcaaLHt3F1LwpG7xILf9jsM,6148
|
|
2
|
+
persidict/__init__.py,sha256=CDOSJGgCnyRTkGUTzaeg3Cqsxwx0-0EFieOtldXwAls,1380
|
|
3
|
+
persidict/file_dir_dict.py,sha256=JJ5oEyaqwTm9g_tUrVfut0IYI7bd5B2lhxrLzadTohA,25541
|
|
4
|
+
persidict/jokers.py,sha256=7ibh0ccfkEm3EvKIOhH9ShfZ0_MBKYMvKa1rwqHg1hk,3010
|
|
5
|
+
persidict/overlapping_multi_dict.py,sha256=UFyPEG2GbMmMHY48UmcaLHpsaxMqRH3bc_UA8S90yJo,5947
|
|
6
|
+
persidict/persi_dict.py,sha256=Q7fGs9LFPxSLtC0jJwDOP1AVD9_t01SnwdN4RVBMZtg,20660
|
|
7
|
+
persidict/s3_dict.py,sha256=GOFTpSwFESoGxEykS7TVjkw0VRIxRon-xXytrnwAuTY,19905
|
|
8
|
+
persidict/safe_chars.py,sha256=H-cL9waCmDtwaRR5Y4b4oTzcBx09nc8wn8u61SVZDY0,1728
|
|
9
|
+
persidict/safe_str_tuple.py,sha256=YBTcYjUKIffznOawXb9xKjz4HaKdklrgyVtegJFmr5w,7202
|
|
10
|
+
persidict/safe_str_tuple_signing.py,sha256=mpOfx_xyprc0_c60XPB_EihI3vR1gOn6T03iCx1HwwQ,7494
|
|
11
|
+
persidict/write_once_dict.py,sha256=nv5vx9uh6VZ5Qh3HJcBgUHLnDX9KY843FbHndcy-63E,11677
|
|
12
|
+
persidict-0.36.9.dist-info/WHEEL,sha256=Pi5uDq5Fdo_Rr-HD5h9BiPn9Et29Y9Sh8NhcJNnFU1c,79
|
|
13
|
+
persidict-0.36.9.dist-info/METADATA,sha256=h4j6Waop0pzsEVTRDj-Sx2NMa-GZfs2AGnbrh7gxeC8,12387
|
|
14
|
+
persidict-0.36.9.dist-info/RECORD,,
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
persidict/.DS_Store,sha256=1lFlJ5EFymdzGAUAaI30vcaaLHt3F1LwpG7xILf9jsM,6148
|
|
2
|
-
persidict/__init__.py,sha256=CDOSJGgCnyRTkGUTzaeg3Cqsxwx0-0EFieOtldXwAls,1380
|
|
3
|
-
persidict/file_dir_dict.py,sha256=IDRb6a3YQvM7Gf0jbqKkTi4VuSPecTw6Ca6HZ947Qj8,25784
|
|
4
|
-
persidict/jokers.py,sha256=Ow4tWOTTMGKvolJyVuEF-oEgE_u3vDZtA9UFwTdhNV4,2731
|
|
5
|
-
persidict/overlapping_multi_dict.py,sha256=gBiHaCb5pTGNW3ZrakgaiGDid6oCfoP7Vq1rxXGnFWg,5476
|
|
6
|
-
persidict/persi_dict.py,sha256=DIMQaY4gE8NSYTlHlk9rfOJJEYUuLV8kmQ-gc474py4,20052
|
|
7
|
-
persidict/s3_dict.py,sha256=VKDqY9sASffeXtfbavVWk8-umrioIG5Xq57Qqg1wPH4,21522
|
|
8
|
-
persidict/safe_chars.py,sha256=9Qy24fu2dmiJOdmCF8mKZULfQaRp7H4oxfgDXeLgogI,1160
|
|
9
|
-
persidict/safe_str_tuple.py,sha256=YBTcYjUKIffznOawXb9xKjz4HaKdklrgyVtegJFmr5w,7202
|
|
10
|
-
persidict/safe_str_tuple_signing.py,sha256=RQAj4fnpRVaOe0KpwLler1UTaeNOgXCQpU3t80ixtxg,7493
|
|
11
|
-
persidict/write_once_dict.py,sha256=-lPQ_yuU62pczHT0BYO6SFbiZBKFq8Tj9ln3jCzNDzA,11443
|
|
12
|
-
persidict-0.36.8.dist-info/WHEEL,sha256=Pi5uDq5Fdo_Rr-HD5h9BiPn9Et29Y9Sh8NhcJNnFU1c,79
|
|
13
|
-
persidict-0.36.8.dist-info/METADATA,sha256=816s1lWkpNdgJMfVS16sDaYDyHFzaLZRDHVVMs86Slo,12387
|
|
14
|
-
persidict-0.36.8.dist-info/RECORD,,
|
|
File without changes
|