persidict 0.25.0__tar.gz → 0.30.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of persidict might be problematic. Click here for more details.
- {persidict-0.25.0 → persidict-0.30.0}/PKG-INFO +4 -3
- {persidict-0.25.0 → persidict-0.30.0}/README.md +3 -2
- {persidict-0.25.0 → persidict-0.30.0}/pyproject.toml +1 -1
- {persidict-0.25.0 → persidict-0.30.0}/src/persidict/__init__.py +4 -2
- {persidict-0.25.0 → persidict-0.30.0}/src/persidict/file_dir_dict.py +6 -3
- persidict-0.30.0/src/persidict/jokers.py +51 -0
- {persidict-0.25.0 → persidict-0.30.0}/src/persidict/persi_dict.py +6 -4
- {persidict-0.25.0 → persidict-0.30.0}/src/persidict/s3_dict.py +6 -3
- persidict-0.30.0/src/persidict/write_once_dict.py +180 -0
- persidict-0.25.0/src/persidict/nochange_const.py +0 -28
- {persidict-0.25.0 → persidict-0.30.0}/src/persidict/.DS_Store +0 -0
- {persidict-0.25.0 → persidict-0.30.0}/src/persidict/safe_chars.py +0 -0
- {persidict-0.25.0 → persidict-0.30.0}/src/persidict/safe_str_tuple.py +0 -0
- {persidict-0.25.0 → persidict-0.30.0}/src/persidict/safe_str_tuple_signing.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: persidict
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.30.0
|
|
4
4
|
Summary: Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud.
|
|
5
5
|
Keywords: persistence,dicts,distributed,parallel
|
|
6
6
|
Author: Vlad (Volodymyr) Pavlov
|
|
@@ -161,8 +161,9 @@ that simultaneously work with the same instance of a dictionary.
|
|
|
161
161
|
`oldest_keys()`, `newest_values()`, `oldest_values()`,
|
|
162
162
|
`get_params()`, `get_metaparams()`, and `get_default_metaparams()`,
|
|
163
163
|
which are not available in native Python dicts.
|
|
164
|
-
* You can use
|
|
165
|
-
to avoid actually setting/updating a value.
|
|
164
|
+
* You can use KEEP_CURRENT constant as a fake new value
|
|
165
|
+
to avoid actually setting/updating a value. Or DELETE_CURRENT as
|
|
166
|
+
a fake new value to delete the previous value from a dictionary.
|
|
166
167
|
|
|
167
168
|
## Fine Tuning
|
|
168
169
|
|
|
@@ -128,8 +128,9 @@ that simultaneously work with the same instance of a dictionary.
|
|
|
128
128
|
`oldest_keys()`, `newest_values()`, `oldest_values()`,
|
|
129
129
|
`get_params()`, `get_metaparams()`, and `get_default_metaparams()`,
|
|
130
130
|
which are not available in native Python dicts.
|
|
131
|
-
* You can use
|
|
132
|
-
to avoid actually setting/updating a value.
|
|
131
|
+
* You can use KEEP_CURRENT constant as a fake new value
|
|
132
|
+
to avoid actually setting/updating a value. Or DELETE_CURRENT as
|
|
133
|
+
a fake new value to delete the previous value from a dictionary.
|
|
133
134
|
|
|
134
135
|
## Fine Tuning
|
|
135
136
|
|
|
@@ -4,7 +4,7 @@ build-backend = "uv_build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "persidict"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.30.0"
|
|
8
8
|
description = "Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -24,7 +24,9 @@ and replace_unsafe_chars(), which replaces forbidden characters in a string.
|
|
|
24
24
|
"""
|
|
25
25
|
from .safe_chars import get_safe_chars, replace_unsafe_chars
|
|
26
26
|
from .safe_str_tuple import SafeStrTuple
|
|
27
|
-
from .persi_dict import PersiDict
|
|
27
|
+
from .persi_dict import PersiDict, PersiDictKey
|
|
28
28
|
from .file_dir_dict import FileDirDict
|
|
29
29
|
from .s3_dict import S3Dict
|
|
30
|
-
from .
|
|
30
|
+
from .write_once_dict import WriteOnceDict
|
|
31
|
+
from .jokers import Joker, KeepCurrentFlag, DeleteCurrentFlag
|
|
32
|
+
from .jokers import KEEP_CURRENT, DELETE_CURRENT
|
|
@@ -12,7 +12,6 @@ from __future__ import annotations
|
|
|
12
12
|
import os
|
|
13
13
|
import random
|
|
14
14
|
import time
|
|
15
|
-
from abc import abstractmethod
|
|
16
15
|
from typing import Any, Optional
|
|
17
16
|
|
|
18
17
|
import joblib
|
|
@@ -21,7 +20,7 @@ import jsonpickle.ext.numpy as jsonpickle_numpy
|
|
|
21
20
|
import jsonpickle.ext.pandas as jsonpickle_pandas
|
|
22
21
|
import parameterizable
|
|
23
22
|
|
|
24
|
-
from .
|
|
23
|
+
from .jokers import KEEP_CURRENT, DELETE_CURRENT, Joker
|
|
25
24
|
from .safe_chars import replace_unsafe_chars
|
|
26
25
|
from .safe_str_tuple import SafeStrTuple
|
|
27
26
|
from .safe_str_tuple_signing import sign_safe_str_tuple, unsign_safe_str_tuple
|
|
@@ -317,7 +316,11 @@ class FileDirDict(PersiDict):
|
|
|
317
316
|
def __setitem__(self, key:PersiDictKey, value:Any):
|
|
318
317
|
"""Set self[key] to value."""
|
|
319
318
|
|
|
320
|
-
if value is
|
|
319
|
+
if value is KEEP_CURRENT:
|
|
320
|
+
return
|
|
321
|
+
|
|
322
|
+
if value is DELETE_CURRENT:
|
|
323
|
+
self.delete_if_exists(key)
|
|
321
324
|
return
|
|
322
325
|
|
|
323
326
|
if isinstance(value, PersiDict):
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""A singleton constant to indicate no change in a value.
|
|
2
|
+
|
|
3
|
+
When updating a value in a persistent dictionary,
|
|
4
|
+
use KEEP_CURRENT as the new value to indicate that
|
|
5
|
+
the existing value should remain unchanged.
|
|
6
|
+
"""
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from parameterizable import (
|
|
10
|
+
ParameterizableClass
|
|
11
|
+
, register_parameterizable_class)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Joker(ParameterizableClass):
|
|
15
|
+
_instances = {}
|
|
16
|
+
|
|
17
|
+
def get_params(self) -> dict[str, Any]:
|
|
18
|
+
return {}
|
|
19
|
+
|
|
20
|
+
def __new__(cls):
|
|
21
|
+
if cls not in Joker._instances:
|
|
22
|
+
Joker._instances[cls] = super().__new__(cls)
|
|
23
|
+
return Joker._instances[cls]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class KeepCurrentFlag(Joker):
|
|
27
|
+
"""A singleton constant to indicate no change in a value.
|
|
28
|
+
|
|
29
|
+
When updating a value in a persistent dictionary,
|
|
30
|
+
use KeepCurrent as the new value to indicate that
|
|
31
|
+
the existing value (if any) should remain unchanged.
|
|
32
|
+
"""
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
class DeleteCurrentFlag(Joker):
|
|
36
|
+
"""A singleton constant to indicate that the current value should be deleted.
|
|
37
|
+
|
|
38
|
+
When updating a value in a persistent dictionary,
|
|
39
|
+
use DeleteCurrentFlag as the new value to indicate that
|
|
40
|
+
the existing value (if any) should be removed from the dictionary.
|
|
41
|
+
"""
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
register_parameterizable_class(KeepCurrentFlag)
|
|
45
|
+
register_parameterizable_class(DeleteCurrentFlag)
|
|
46
|
+
|
|
47
|
+
KeepCurrent = KeepCurrentFlag()
|
|
48
|
+
KEEP_CURRENT = KeepCurrentFlag()
|
|
49
|
+
|
|
50
|
+
DeleteCurrent = DeleteCurrentFlag()
|
|
51
|
+
DELETE_CURRENT = DeleteCurrentFlag()
|
|
@@ -26,7 +26,7 @@ from parameterizable import ParameterizableClass
|
|
|
26
26
|
from typing import Any, Sequence, Optional
|
|
27
27
|
from collections.abc import MutableMapping
|
|
28
28
|
|
|
29
|
-
from .
|
|
29
|
+
from .jokers import KEEP_CURRENT, DELETE_CURRENT, Joker
|
|
30
30
|
from .safe_str_tuple import SafeStrTuple
|
|
31
31
|
|
|
32
32
|
PersiDictKey = SafeStrTuple | Sequence[str] | str
|
|
@@ -161,9 +161,11 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
161
161
|
|
|
162
162
|
def __setitem__(self, key:PersiDictKey, value:Any):
|
|
163
163
|
"""Set self[key] to value."""
|
|
164
|
-
if value is
|
|
164
|
+
if value is KEEP_CURRENT:
|
|
165
165
|
return
|
|
166
|
-
|
|
166
|
+
elif value is DELETE_CURRENT:
|
|
167
|
+
self.delete_if_exists(key)
|
|
168
|
+
elif self.immutable_items:
|
|
167
169
|
if key in self:
|
|
168
170
|
raise KeyError("Can't modify an immutable key-value pair")
|
|
169
171
|
raise NotImplementedError
|
|
@@ -216,7 +218,7 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
216
218
|
"""
|
|
217
219
|
# TODO: check edge cases to ensure the same semantics as standard dicts
|
|
218
220
|
key = SafeStrTuple(key)
|
|
219
|
-
assert not default
|
|
221
|
+
assert not isinstance(default, Joker)
|
|
220
222
|
if key in self:
|
|
221
223
|
return self[key]
|
|
222
224
|
else:
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import os
|
|
4
|
-
from abc import abstractmethod
|
|
5
4
|
from typing import Any, Optional
|
|
6
5
|
|
|
7
6
|
import boto3
|
|
@@ -10,7 +9,7 @@ import parameterizable
|
|
|
10
9
|
from .safe_str_tuple import SafeStrTuple
|
|
11
10
|
from .safe_str_tuple_signing import sign_safe_str_tuple, unsign_safe_str_tuple
|
|
12
11
|
from .persi_dict import PersiDict
|
|
13
|
-
from .
|
|
12
|
+
from .jokers import KEEP_CURRENT, DELETE_CURRENT
|
|
14
13
|
from .file_dir_dict import FileDirDict, PersiDictKey
|
|
15
14
|
|
|
16
15
|
S3DICT_DEFAULT_BASE_DIR = "__s3_dict__"
|
|
@@ -191,7 +190,11 @@ class S3Dict(PersiDict):
|
|
|
191
190
|
def __setitem__(self, key:PersiDictKey, value:Any):
|
|
192
191
|
"""Set self[key] to value. """
|
|
193
192
|
|
|
194
|
-
if value is
|
|
193
|
+
if value is KEEP_CURRENT:
|
|
194
|
+
return
|
|
195
|
+
|
|
196
|
+
if value is DELETE_CURRENT:
|
|
197
|
+
self.delete_if_exists(key)
|
|
195
198
|
return
|
|
196
199
|
|
|
197
200
|
if isinstance(value, PersiDict):
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
|
|
5
|
+
from deepdiff import DeepDiff
|
|
6
|
+
from parameterizable import register_parameterizable_class
|
|
7
|
+
from .jokers import KEEP_CURRENT, KeepCurrentFlag
|
|
8
|
+
from .persi_dict import PersiDict
|
|
9
|
+
from .file_dir_dict import FileDirDict
|
|
10
|
+
import random
|
|
11
|
+
import sys
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
import joblib.hashing
|
|
15
|
+
from .persi_dict import PersiDictKey
|
|
16
|
+
|
|
17
|
+
def _get_md5_signature(x:Any) -> str:
|
|
18
|
+
"""Return base16 MD5 hash signature of an object.
|
|
19
|
+
|
|
20
|
+
Uses joblib's Hasher (or NumpyHasher). It uses Pickle for serialization,
|
|
21
|
+
except for NumPy arrays, which use optimized custom routines.
|
|
22
|
+
"""
|
|
23
|
+
if 'numpy' in sys.modules:
|
|
24
|
+
hasher = joblib.hashing.NumpyHasher(hash_name='md5')
|
|
25
|
+
else:
|
|
26
|
+
hasher = joblib.hashing.Hasher(hash_name='md5')
|
|
27
|
+
hash_signature = hasher.hash(x)
|
|
28
|
+
return str(hash_signature)
|
|
29
|
+
|
|
30
|
+
class WriteOnceDict(PersiDict):
|
|
31
|
+
""" A dictionary that always keeps the first value assigned to a key.
|
|
32
|
+
|
|
33
|
+
If a key is already set, it randomly checks the value against the value
|
|
34
|
+
that was first set. If the new value is different, it raises a
|
|
35
|
+
ValueError exception. Once can control the frequency of these checks
|
|
36
|
+
or even completely disable them by setting `p_consistency_checks` to 0.
|
|
37
|
+
|
|
38
|
+
"""
|
|
39
|
+
_wrapped_dict: PersiDict
|
|
40
|
+
_p_consistency_checks: float | None
|
|
41
|
+
_consistency_checks_attempted: int
|
|
42
|
+
_consistency_checks_passed: int
|
|
43
|
+
|
|
44
|
+
def __init__(self
|
|
45
|
+
, wrapped_dict:PersiDict | None = None
|
|
46
|
+
, p_consistency_checks: float | None=None):
|
|
47
|
+
if wrapped_dict is None:
|
|
48
|
+
wrapped_dict = FileDirDict(immutable_items = True)
|
|
49
|
+
assert isinstance(wrapped_dict, PersiDict)
|
|
50
|
+
assert wrapped_dict.immutable_items == True
|
|
51
|
+
self.p_consistency_checks = p_consistency_checks
|
|
52
|
+
PersiDict.__init__(self
|
|
53
|
+
, base_class_for_values=wrapped_dict.base_class_for_values
|
|
54
|
+
, immutable_items=True
|
|
55
|
+
, digest_len=wrapped_dict.digest_len)
|
|
56
|
+
self._wrapped_dict = wrapped_dict
|
|
57
|
+
self._consistency_checks_passed = 0
|
|
58
|
+
self._consistency_checks_attempted = 0
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def p_consistency_checks(self) -> float:
|
|
63
|
+
""" Probability of checking the value against the first value set. """
|
|
64
|
+
return self._p_consistency_checks
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@p_consistency_checks.setter
|
|
68
|
+
def p_consistency_checks(self, value: float|None|KeepCurrentFlag) -> None:
|
|
69
|
+
if value is KEEP_CURRENT:
|
|
70
|
+
if hasattr(self, '_p_consistency_checks'):
|
|
71
|
+
return
|
|
72
|
+
else:
|
|
73
|
+
raise ValueError(
|
|
74
|
+
f"KEEP_CURRENT can't be used to initialize p_consistency_checks.")
|
|
75
|
+
if value is None:
|
|
76
|
+
value = 0.0
|
|
77
|
+
if not (0 <= value <= 1):
|
|
78
|
+
raise ValueError(
|
|
79
|
+
f"p_consistency_checks must be in [0, 1], "
|
|
80
|
+
f"got {value}.")
|
|
81
|
+
self._p_consistency_checks = value
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@property
|
|
85
|
+
def consistency_checks_failed(self) -> int:
|
|
86
|
+
""" Returns the number of failed consistency checks. """
|
|
87
|
+
return self._consistency_checks_attempted - self._consistency_checks_passed
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@property
|
|
91
|
+
def consistency_checks_attempted(self) -> int:
|
|
92
|
+
""" Returns the number of attempted consistency checks. """
|
|
93
|
+
return self._consistency_checks_attempted
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def consistency_checks_passed(self) -> int:
|
|
98
|
+
""" Returns the number of successful consistency checks. """
|
|
99
|
+
return self._consistency_checks_passed
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def get_params(self):
|
|
103
|
+
params = dict(
|
|
104
|
+
wrapped_dict = self._wrapped_dict,
|
|
105
|
+
p_consistency_checks = self.p_consistency_checks)
|
|
106
|
+
sorted_params = dict(sorted(params.items()))
|
|
107
|
+
return sorted_params
|
|
108
|
+
|
|
109
|
+
def __setitem__(self, key, value):
|
|
110
|
+
""" Set the value of a key if it is not already set.
|
|
111
|
+
|
|
112
|
+
If the key is already set, it checks the value
|
|
113
|
+
against the value that was first set.
|
|
114
|
+
"""
|
|
115
|
+
check_needed = False
|
|
116
|
+
|
|
117
|
+
try: # extra protections to better handle concurrent writes
|
|
118
|
+
if key in self._wrapped_dict:
|
|
119
|
+
check_needed = True
|
|
120
|
+
else:
|
|
121
|
+
self._wrapped_dict[key] = value
|
|
122
|
+
except:
|
|
123
|
+
time.sleep(random.random()/random.randint(1,5))
|
|
124
|
+
if key in self._wrapped_dict:
|
|
125
|
+
check_needed = True
|
|
126
|
+
else:
|
|
127
|
+
self._wrapped_dict[key] = value
|
|
128
|
+
|
|
129
|
+
if not key in self._wrapped_dict:
|
|
130
|
+
raise KeyError(
|
|
131
|
+
f"Key {key} was not set in the wrapped dict "
|
|
132
|
+
+ f"{self._wrapped_dict}. This should not happen.")
|
|
133
|
+
|
|
134
|
+
if check_needed and self.p_consistency_checks > 0:
|
|
135
|
+
if random.random() < self.p_consistency_checks:
|
|
136
|
+
self._consistency_checks_attempted += 1
|
|
137
|
+
signature_old = _get_md5_signature(self._wrapped_dict[key])
|
|
138
|
+
signature_new = _get_md5_signature(value)
|
|
139
|
+
if signature_old != signature_new:
|
|
140
|
+
diff_dict = DeepDiff(self._wrapped_dict[key], value)
|
|
141
|
+
raise ValueError(
|
|
142
|
+
f"Key {key} is already set "
|
|
143
|
+
+ f"to {self._wrapped_dict[key]} "
|
|
144
|
+
+ f"and the new value {value} is different, "
|
|
145
|
+
+ f"which is not allowed. Details here: {diff_dict} ")
|
|
146
|
+
self._consistency_checks_passed += 1
|
|
147
|
+
|
|
148
|
+
def __contains__(self, item):
|
|
149
|
+
return item in self._wrapped_dict
|
|
150
|
+
|
|
151
|
+
def __getitem__(self, key):
|
|
152
|
+
return self._wrapped_dict[key]
|
|
153
|
+
|
|
154
|
+
def __len__(self):
|
|
155
|
+
return len(self._wrapped_dict)
|
|
156
|
+
|
|
157
|
+
def _generic_iter(self, iter_type: str):
|
|
158
|
+
return self._wrapped_dict._generic_iter(iter_type)
|
|
159
|
+
|
|
160
|
+
def timestamp(self, key:PersiDictKey) -> float:
|
|
161
|
+
return self._wrapped_dict.timestamp(key)
|
|
162
|
+
|
|
163
|
+
def __getattr__(self, name):
|
|
164
|
+
# Forward attribute access to the wrapped object
|
|
165
|
+
return getattr(self._wrapped_dict, name)
|
|
166
|
+
|
|
167
|
+
@property
|
|
168
|
+
def base_dir(self):
|
|
169
|
+
return self._wrapped_dict.base_dir
|
|
170
|
+
|
|
171
|
+
@property
|
|
172
|
+
def base_url(self):
|
|
173
|
+
return self._wrapped_dict.base_url
|
|
174
|
+
|
|
175
|
+
def get_subdict(self, prefix_key:PersiDictKey) -> WriteOnceDict:
|
|
176
|
+
subdict = self._wrapped_dict.get_subdict(prefix_key)
|
|
177
|
+
result = WriteOnceDict(subdict, self.p_consistency_checks)
|
|
178
|
+
return result
|
|
179
|
+
|
|
180
|
+
register_parameterizable_class(WriteOnceDict)
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
"""A singleton constant to indicate no change in a value.
|
|
2
|
-
|
|
3
|
-
When updating a value in a persistent dictionary,
|
|
4
|
-
use NO_CHANGE as the new value to indicate that
|
|
5
|
-
the existing value should remain unchanged.
|
|
6
|
-
"""
|
|
7
|
-
from typing import Any
|
|
8
|
-
|
|
9
|
-
from parameterizable import (
|
|
10
|
-
ParameterizableClass
|
|
11
|
-
, register_parameterizable_class)
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class NoChangeFlag(ParameterizableClass):
|
|
15
|
-
_instance = None
|
|
16
|
-
|
|
17
|
-
def __new__(cls):
|
|
18
|
-
if cls._instance is None:
|
|
19
|
-
cls._instance = super().__new__(cls)
|
|
20
|
-
return cls._instance
|
|
21
|
-
|
|
22
|
-
def get_params(self) -> dict[str, Any]:
|
|
23
|
-
return {}
|
|
24
|
-
|
|
25
|
-
register_parameterizable_class(NoChangeFlag)
|
|
26
|
-
|
|
27
|
-
NoChange = NoChangeFlag()
|
|
28
|
-
NO_CHANGE = NoChangeFlag()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|