persidict 0.26.0__tar.gz → 0.31.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of persidict might be problematic. Click here for more details.
- {persidict-0.26.0 → persidict-0.31.0}/PKG-INFO +1 -1
- {persidict-0.26.0 → persidict-0.31.0}/pyproject.toml +1 -1
- {persidict-0.26.0 → persidict-0.31.0}/src/persidict/__init__.py +4 -2
- persidict-0.31.0/src/persidict/overlapping_multi_dict.py +57 -0
- persidict-0.31.0/src/persidict/write_once_dict.py +180 -0
- {persidict-0.26.0 → persidict-0.31.0}/README.md +0 -0
- {persidict-0.26.0 → persidict-0.31.0}/src/persidict/.DS_Store +0 -0
- {persidict-0.26.0 → persidict-0.31.0}/src/persidict/file_dir_dict.py +0 -0
- {persidict-0.26.0 → persidict-0.31.0}/src/persidict/jokers.py +0 -0
- {persidict-0.26.0 → persidict-0.31.0}/src/persidict/persi_dict.py +0 -0
- {persidict-0.26.0 → persidict-0.31.0}/src/persidict/s3_dict.py +0 -0
- {persidict-0.26.0 → persidict-0.31.0}/src/persidict/safe_chars.py +0 -0
- {persidict-0.26.0 → persidict-0.31.0}/src/persidict/safe_str_tuple.py +0 -0
- {persidict-0.26.0 → persidict-0.31.0}/src/persidict/safe_str_tuple_signing.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: persidict
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.31.0
|
|
4
4
|
Summary: Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud.
|
|
5
5
|
Keywords: persistence,dicts,distributed,parallel
|
|
6
6
|
Author: Vlad (Volodymyr) Pavlov
|
|
@@ -4,7 +4,7 @@ build-backend = "uv_build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "persidict"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.31.0"
|
|
8
8
|
description = "Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -24,8 +24,10 @@ and replace_unsafe_chars(), which replaces forbidden characters in a string.
|
|
|
24
24
|
"""
|
|
25
25
|
from .safe_chars import get_safe_chars, replace_unsafe_chars
|
|
26
26
|
from .safe_str_tuple import SafeStrTuple
|
|
27
|
-
from .persi_dict import PersiDict
|
|
27
|
+
from .persi_dict import PersiDict, PersiDictKey
|
|
28
28
|
from .file_dir_dict import FileDirDict
|
|
29
29
|
from .s3_dict import S3Dict
|
|
30
|
+
from .write_once_dict import WriteOnceDict
|
|
30
31
|
from .jokers import Joker, KeepCurrentFlag, DeleteCurrentFlag
|
|
31
|
-
from .jokers import KEEP_CURRENT, DELETE_CURRENT
|
|
32
|
+
from .jokers import KEEP_CURRENT, DELETE_CURRENT
|
|
33
|
+
from .overlapping_multi_dict import OverlappingMultiDict
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
from .persi_dict import PersiDict
|
|
2
|
+
|
|
3
|
+
class OverlappingMultiDict:
|
|
4
|
+
"""A class that holds several PersiDict objects with different fyle_type-s.
|
|
5
|
+
|
|
6
|
+
The class is designed to be used as a container for several PersiDict objects
|
|
7
|
+
that have different file_type-s. All inner PersiDict objects
|
|
8
|
+
have the same dir_name attribute. Each inner PersiDict object is accessible
|
|
9
|
+
as an attribute of the OverlappingMultiDict object.
|
|
10
|
+
The attribute name is the same as the file_type
|
|
11
|
+
of the inner PersiDict object.
|
|
12
|
+
|
|
13
|
+
OverlappingMultiDict allows to store several PersiDict objects
|
|
14
|
+
in a single object, which can be useful for managing multiple types of data
|
|
15
|
+
in a single file directory or in an s3 bucket.
|
|
16
|
+
|
|
17
|
+
"""
|
|
18
|
+
def __init__(self
|
|
19
|
+
, dict_type:type
|
|
20
|
+
, shared_subdicts_params:dict
|
|
21
|
+
, **individual_subdicts_params):
|
|
22
|
+
assert issubclass(dict_type, PersiDict)
|
|
23
|
+
assert isinstance(shared_subdicts_params, dict)
|
|
24
|
+
self.dict_type = dict_type
|
|
25
|
+
self.shared_subdicts_params = shared_subdicts_params
|
|
26
|
+
self.individual_subdicts_params = individual_subdicts_params
|
|
27
|
+
self.subdicts_names = list(individual_subdicts_params.keys())
|
|
28
|
+
for subdict_name in individual_subdicts_params:
|
|
29
|
+
assert isinstance(individual_subdicts_params[subdict_name], dict)
|
|
30
|
+
self.__dict__[subdict_name] = dict_type(
|
|
31
|
+
**{**shared_subdicts_params
|
|
32
|
+
,**individual_subdicts_params[subdict_name]
|
|
33
|
+
,"file_type":subdict_name})
|
|
34
|
+
|
|
35
|
+
def __getstate__(self):
|
|
36
|
+
raise TypeError("OverlappingMultiDict cannot be pickled.")
|
|
37
|
+
|
|
38
|
+
def __setstate__(self, state):
|
|
39
|
+
raise TypeError("OverlappingMultiDict cannot be pickled.")
|
|
40
|
+
|
|
41
|
+
def __getitem__(self, key):
|
|
42
|
+
raise TypeError(
|
|
43
|
+
"OverlappingMultiDict does not support item access by key. "
|
|
44
|
+
"Individual items should be accessed through nested dicts, "
|
|
45
|
+
f"which are available via attributes {self.subdicts_names}")
|
|
46
|
+
|
|
47
|
+
def __setitem__(self, key, value):
|
|
48
|
+
raise TypeError(
|
|
49
|
+
"OverlappingMultiDict does not support item assignment by key. "
|
|
50
|
+
"Individual items should be accessed through nested dicts, "
|
|
51
|
+
f"which are available via attributes {self.subdicts_names}")
|
|
52
|
+
|
|
53
|
+
def __delitem__(self, key):
|
|
54
|
+
raise TypeError(
|
|
55
|
+
"OverlappingMultiDict does not support item deletion by key. "
|
|
56
|
+
"Individual items can be deletedthrough nested dicts, "
|
|
57
|
+
f"which are available via attributes {self.subdicts_names}")
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
|
|
5
|
+
from deepdiff import DeepDiff
|
|
6
|
+
from parameterizable import register_parameterizable_class
|
|
7
|
+
from .jokers import KEEP_CURRENT, KeepCurrentFlag
|
|
8
|
+
from .persi_dict import PersiDict
|
|
9
|
+
from .file_dir_dict import FileDirDict
|
|
10
|
+
import random
|
|
11
|
+
import sys
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
import joblib.hashing
|
|
15
|
+
from .persi_dict import PersiDictKey
|
|
16
|
+
|
|
17
|
+
def _get_md5_signature(x:Any) -> str:
|
|
18
|
+
"""Return base16 MD5 hash signature of an object.
|
|
19
|
+
|
|
20
|
+
Uses joblib's Hasher (or NumpyHasher). It uses Pickle for serialization,
|
|
21
|
+
except for NumPy arrays, which use optimized custom routines.
|
|
22
|
+
"""
|
|
23
|
+
if 'numpy' in sys.modules:
|
|
24
|
+
hasher = joblib.hashing.NumpyHasher(hash_name='md5')
|
|
25
|
+
else:
|
|
26
|
+
hasher = joblib.hashing.Hasher(hash_name='md5')
|
|
27
|
+
hash_signature = hasher.hash(x)
|
|
28
|
+
return str(hash_signature)
|
|
29
|
+
|
|
30
|
+
class WriteOnceDict(PersiDict):
|
|
31
|
+
""" A dictionary that always keeps the first value assigned to a key.
|
|
32
|
+
|
|
33
|
+
If a key is already set, it randomly checks the value against the value
|
|
34
|
+
that was first set. If the new value is different, it raises a
|
|
35
|
+
ValueError exception. Once can control the frequency of these checks
|
|
36
|
+
or even completely disable them by setting `p_consistency_checks` to 0.
|
|
37
|
+
|
|
38
|
+
"""
|
|
39
|
+
_wrapped_dict: PersiDict
|
|
40
|
+
_p_consistency_checks: float | None
|
|
41
|
+
_consistency_checks_attempted: int
|
|
42
|
+
_consistency_checks_passed: int
|
|
43
|
+
|
|
44
|
+
def __init__(self
|
|
45
|
+
, wrapped_dict:PersiDict | None = None
|
|
46
|
+
, p_consistency_checks: float | None=None):
|
|
47
|
+
if wrapped_dict is None:
|
|
48
|
+
wrapped_dict = FileDirDict(immutable_items = True)
|
|
49
|
+
assert isinstance(wrapped_dict, PersiDict)
|
|
50
|
+
assert wrapped_dict.immutable_items == True
|
|
51
|
+
self.p_consistency_checks = p_consistency_checks
|
|
52
|
+
PersiDict.__init__(self
|
|
53
|
+
, base_class_for_values=wrapped_dict.base_class_for_values
|
|
54
|
+
, immutable_items=True
|
|
55
|
+
, digest_len=wrapped_dict.digest_len)
|
|
56
|
+
self._wrapped_dict = wrapped_dict
|
|
57
|
+
self._consistency_checks_passed = 0
|
|
58
|
+
self._consistency_checks_attempted = 0
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def p_consistency_checks(self) -> float:
|
|
63
|
+
""" Probability of checking the value against the first value set. """
|
|
64
|
+
return self._p_consistency_checks
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@p_consistency_checks.setter
|
|
68
|
+
def p_consistency_checks(self, value: float|None|KeepCurrentFlag) -> None:
|
|
69
|
+
if value is KEEP_CURRENT:
|
|
70
|
+
if hasattr(self, '_p_consistency_checks'):
|
|
71
|
+
return
|
|
72
|
+
else:
|
|
73
|
+
raise ValueError(
|
|
74
|
+
f"KEEP_CURRENT can't be used to initialize p_consistency_checks.")
|
|
75
|
+
if value is None:
|
|
76
|
+
value = 0.0
|
|
77
|
+
if not (0 <= value <= 1):
|
|
78
|
+
raise ValueError(
|
|
79
|
+
f"p_consistency_checks must be in [0, 1], "
|
|
80
|
+
f"got {value}.")
|
|
81
|
+
self._p_consistency_checks = value
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@property
|
|
85
|
+
def consistency_checks_failed(self) -> int:
|
|
86
|
+
""" Returns the number of failed consistency checks. """
|
|
87
|
+
return self._consistency_checks_attempted - self._consistency_checks_passed
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@property
|
|
91
|
+
def consistency_checks_attempted(self) -> int:
|
|
92
|
+
""" Returns the number of attempted consistency checks. """
|
|
93
|
+
return self._consistency_checks_attempted
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def consistency_checks_passed(self) -> int:
|
|
98
|
+
""" Returns the number of successful consistency checks. """
|
|
99
|
+
return self._consistency_checks_passed
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def get_params(self):
|
|
103
|
+
params = dict(
|
|
104
|
+
wrapped_dict = self._wrapped_dict,
|
|
105
|
+
p_consistency_checks = self.p_consistency_checks)
|
|
106
|
+
sorted_params = dict(sorted(params.items()))
|
|
107
|
+
return sorted_params
|
|
108
|
+
|
|
109
|
+
def __setitem__(self, key, value):
|
|
110
|
+
""" Set the value of a key if it is not already set.
|
|
111
|
+
|
|
112
|
+
If the key is already set, it checks the value
|
|
113
|
+
against the value that was first set.
|
|
114
|
+
"""
|
|
115
|
+
check_needed = False
|
|
116
|
+
|
|
117
|
+
try: # extra protections to better handle concurrent writes
|
|
118
|
+
if key in self._wrapped_dict:
|
|
119
|
+
check_needed = True
|
|
120
|
+
else:
|
|
121
|
+
self._wrapped_dict[key] = value
|
|
122
|
+
except:
|
|
123
|
+
time.sleep(random.random()/random.randint(1,5))
|
|
124
|
+
if key in self._wrapped_dict:
|
|
125
|
+
check_needed = True
|
|
126
|
+
else:
|
|
127
|
+
self._wrapped_dict[key] = value
|
|
128
|
+
|
|
129
|
+
if not key in self._wrapped_dict:
|
|
130
|
+
raise KeyError(
|
|
131
|
+
f"Key {key} was not set in the wrapped dict "
|
|
132
|
+
+ f"{self._wrapped_dict}. This should not happen.")
|
|
133
|
+
|
|
134
|
+
if check_needed and self.p_consistency_checks > 0:
|
|
135
|
+
if random.random() < self.p_consistency_checks:
|
|
136
|
+
self._consistency_checks_attempted += 1
|
|
137
|
+
signature_old = _get_md5_signature(self._wrapped_dict[key])
|
|
138
|
+
signature_new = _get_md5_signature(value)
|
|
139
|
+
if signature_old != signature_new:
|
|
140
|
+
diff_dict = DeepDiff(self._wrapped_dict[key], value)
|
|
141
|
+
raise ValueError(
|
|
142
|
+
f"Key {key} is already set "
|
|
143
|
+
+ f"to {self._wrapped_dict[key]} "
|
|
144
|
+
+ f"and the new value {value} is different, "
|
|
145
|
+
+ f"which is not allowed. Details here: {diff_dict} ")
|
|
146
|
+
self._consistency_checks_passed += 1
|
|
147
|
+
|
|
148
|
+
def __contains__(self, item):
|
|
149
|
+
return item in self._wrapped_dict
|
|
150
|
+
|
|
151
|
+
def __getitem__(self, key):
|
|
152
|
+
return self._wrapped_dict[key]
|
|
153
|
+
|
|
154
|
+
def __len__(self):
|
|
155
|
+
return len(self._wrapped_dict)
|
|
156
|
+
|
|
157
|
+
def _generic_iter(self, iter_type: str):
|
|
158
|
+
return self._wrapped_dict._generic_iter(iter_type)
|
|
159
|
+
|
|
160
|
+
def timestamp(self, key:PersiDictKey) -> float:
|
|
161
|
+
return self._wrapped_dict.timestamp(key)
|
|
162
|
+
|
|
163
|
+
def __getattr__(self, name):
|
|
164
|
+
# Forward attribute access to the wrapped object
|
|
165
|
+
return getattr(self._wrapped_dict, name)
|
|
166
|
+
|
|
167
|
+
@property
|
|
168
|
+
def base_dir(self):
|
|
169
|
+
return self._wrapped_dict.base_dir
|
|
170
|
+
|
|
171
|
+
@property
|
|
172
|
+
def base_url(self):
|
|
173
|
+
return self._wrapped_dict.base_url
|
|
174
|
+
|
|
175
|
+
def get_subdict(self, prefix_key:PersiDictKey) -> WriteOnceDict:
|
|
176
|
+
subdict = self._wrapped_dict.get_subdict(prefix_key)
|
|
177
|
+
result = WriteOnceDict(subdict, self.p_consistency_checks)
|
|
178
|
+
return result
|
|
179
|
+
|
|
180
|
+
register_parameterizable_class(WriteOnceDict)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|