persidict 0.26.0__tar.gz → 0.31.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of persidict might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: persidict
3
- Version: 0.26.0
3
+ Version: 0.31.0
4
4
  Summary: Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud.
5
5
  Keywords: persistence,dicts,distributed,parallel
6
6
  Author: Vlad (Volodymyr) Pavlov
@@ -4,7 +4,7 @@ build-backend = "uv_build"
4
4
 
5
5
  [project]
6
6
  name = "persidict"
7
- version = "0.26.0"
7
+ version = "0.31.0"
8
8
  description = "Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -24,8 +24,10 @@ and replace_unsafe_chars(), which replaces forbidden characters in a string.
24
24
  """
25
25
  from .safe_chars import get_safe_chars, replace_unsafe_chars
26
26
  from .safe_str_tuple import SafeStrTuple
27
- from .persi_dict import PersiDict
27
+ from .persi_dict import PersiDict, PersiDictKey
28
28
  from .file_dir_dict import FileDirDict
29
29
  from .s3_dict import S3Dict
30
+ from .write_once_dict import WriteOnceDict
30
31
  from .jokers import Joker, KeepCurrentFlag, DeleteCurrentFlag
31
- from .jokers import KEEP_CURRENT, DELETE_CURRENT
32
+ from .jokers import KEEP_CURRENT, DELETE_CURRENT
33
+ from .overlapping_multi_dict import OverlappingMultiDict
@@ -0,0 +1,57 @@
1
+ from .persi_dict import PersiDict
2
+
3
+ class OverlappingMultiDict:
4
+ """A class that holds several PersiDict objects with different fyle_type-s.
5
+
6
+ The class is designed to be used as a container for several PersiDict objects
7
+ that have different file_type-s. All inner PersiDict objects
8
+ have the same dir_name attribute. Each inner PersiDict object is accessible
9
+ as an attribute of the OverlappingMultiDict object.
10
+ The attribute name is the same as the file_type
11
+ of the inner PersiDict object.
12
+
13
+ OverlappingMultiDict allows to store several PersiDict objects
14
+ in a single object, which can be useful for managing multiple types of data
15
+ in a single file directory or in an s3 bucket.
16
+
17
+ """
18
+ def __init__(self
19
+ , dict_type:type
20
+ , shared_subdicts_params:dict
21
+ , **individual_subdicts_params):
22
+ assert issubclass(dict_type, PersiDict)
23
+ assert isinstance(shared_subdicts_params, dict)
24
+ self.dict_type = dict_type
25
+ self.shared_subdicts_params = shared_subdicts_params
26
+ self.individual_subdicts_params = individual_subdicts_params
27
+ self.subdicts_names = list(individual_subdicts_params.keys())
28
+ for subdict_name in individual_subdicts_params:
29
+ assert isinstance(individual_subdicts_params[subdict_name], dict)
30
+ self.__dict__[subdict_name] = dict_type(
31
+ **{**shared_subdicts_params
32
+ ,**individual_subdicts_params[subdict_name]
33
+ ,"file_type":subdict_name})
34
+
35
+ def __getstate__(self):
36
+ raise TypeError("OverlappingMultiDict cannot be pickled.")
37
+
38
+ def __setstate__(self, state):
39
+ raise TypeError("OverlappingMultiDict cannot be pickled.")
40
+
41
+ def __getitem__(self, key):
42
+ raise TypeError(
43
+ "OverlappingMultiDict does not support item access by key. "
44
+ "Individual items should be accessed through nested dicts, "
45
+ f"which are available via attributes {self.subdicts_names}")
46
+
47
+ def __setitem__(self, key, value):
48
+ raise TypeError(
49
+ "OverlappingMultiDict does not support item assignment by key. "
50
+ "Individual items should be accessed through nested dicts, "
51
+ f"which are available via attributes {self.subdicts_names}")
52
+
53
+ def __delitem__(self, key):
54
+ raise TypeError(
55
+ "OverlappingMultiDict does not support item deletion by key. "
56
+ "Individual items can be deletedthrough nested dicts, "
57
+ f"which are available via attributes {self.subdicts_names}")
@@ -0,0 +1,180 @@
1
+ from __future__ import annotations
2
+
3
+ import time
4
+
5
+ from deepdiff import DeepDiff
6
+ from parameterizable import register_parameterizable_class
7
+ from .jokers import KEEP_CURRENT, KeepCurrentFlag
8
+ from .persi_dict import PersiDict
9
+ from .file_dir_dict import FileDirDict
10
+ import random
11
+ import sys
12
+ from typing import Any
13
+
14
+ import joblib.hashing
15
+ from .persi_dict import PersiDictKey
16
+
17
+ def _get_md5_signature(x:Any) -> str:
18
+ """Return base16 MD5 hash signature of an object.
19
+
20
+ Uses joblib's Hasher (or NumpyHasher). It uses Pickle for serialization,
21
+ except for NumPy arrays, which use optimized custom routines.
22
+ """
23
+ if 'numpy' in sys.modules:
24
+ hasher = joblib.hashing.NumpyHasher(hash_name='md5')
25
+ else:
26
+ hasher = joblib.hashing.Hasher(hash_name='md5')
27
+ hash_signature = hasher.hash(x)
28
+ return str(hash_signature)
29
+
30
+ class WriteOnceDict(PersiDict):
31
+ """ A dictionary that always keeps the first value assigned to a key.
32
+
33
+ If a key is already set, it randomly checks the value against the value
34
+ that was first set. If the new value is different, it raises a
35
+ ValueError exception. Once can control the frequency of these checks
36
+ or even completely disable them by setting `p_consistency_checks` to 0.
37
+
38
+ """
39
+ _wrapped_dict: PersiDict
40
+ _p_consistency_checks: float | None
41
+ _consistency_checks_attempted: int
42
+ _consistency_checks_passed: int
43
+
44
+ def __init__(self
45
+ , wrapped_dict:PersiDict | None = None
46
+ , p_consistency_checks: float | None=None):
47
+ if wrapped_dict is None:
48
+ wrapped_dict = FileDirDict(immutable_items = True)
49
+ assert isinstance(wrapped_dict, PersiDict)
50
+ assert wrapped_dict.immutable_items == True
51
+ self.p_consistency_checks = p_consistency_checks
52
+ PersiDict.__init__(self
53
+ , base_class_for_values=wrapped_dict.base_class_for_values
54
+ , immutable_items=True
55
+ , digest_len=wrapped_dict.digest_len)
56
+ self._wrapped_dict = wrapped_dict
57
+ self._consistency_checks_passed = 0
58
+ self._consistency_checks_attempted = 0
59
+
60
+
61
+ @property
62
+ def p_consistency_checks(self) -> float:
63
+ """ Probability of checking the value against the first value set. """
64
+ return self._p_consistency_checks
65
+
66
+
67
+ @p_consistency_checks.setter
68
+ def p_consistency_checks(self, value: float|None|KeepCurrentFlag) -> None:
69
+ if value is KEEP_CURRENT:
70
+ if hasattr(self, '_p_consistency_checks'):
71
+ return
72
+ else:
73
+ raise ValueError(
74
+ f"KEEP_CURRENT can't be used to initialize p_consistency_checks.")
75
+ if value is None:
76
+ value = 0.0
77
+ if not (0 <= value <= 1):
78
+ raise ValueError(
79
+ f"p_consistency_checks must be in [0, 1], "
80
+ f"got {value}.")
81
+ self._p_consistency_checks = value
82
+
83
+
84
+ @property
85
+ def consistency_checks_failed(self) -> int:
86
+ """ Returns the number of failed consistency checks. """
87
+ return self._consistency_checks_attempted - self._consistency_checks_passed
88
+
89
+
90
+ @property
91
+ def consistency_checks_attempted(self) -> int:
92
+ """ Returns the number of attempted consistency checks. """
93
+ return self._consistency_checks_attempted
94
+
95
+
96
+ @property
97
+ def consistency_checks_passed(self) -> int:
98
+ """ Returns the number of successful consistency checks. """
99
+ return self._consistency_checks_passed
100
+
101
+
102
+ def get_params(self):
103
+ params = dict(
104
+ wrapped_dict = self._wrapped_dict,
105
+ p_consistency_checks = self.p_consistency_checks)
106
+ sorted_params = dict(sorted(params.items()))
107
+ return sorted_params
108
+
109
+ def __setitem__(self, key, value):
110
+ """ Set the value of a key if it is not already set.
111
+
112
+ If the key is already set, it checks the value
113
+ against the value that was first set.
114
+ """
115
+ check_needed = False
116
+
117
+ try: # extra protections to better handle concurrent writes
118
+ if key in self._wrapped_dict:
119
+ check_needed = True
120
+ else:
121
+ self._wrapped_dict[key] = value
122
+ except:
123
+ time.sleep(random.random()/random.randint(1,5))
124
+ if key in self._wrapped_dict:
125
+ check_needed = True
126
+ else:
127
+ self._wrapped_dict[key] = value
128
+
129
+ if not key in self._wrapped_dict:
130
+ raise KeyError(
131
+ f"Key {key} was not set in the wrapped dict "
132
+ + f"{self._wrapped_dict}. This should not happen.")
133
+
134
+ if check_needed and self.p_consistency_checks > 0:
135
+ if random.random() < self.p_consistency_checks:
136
+ self._consistency_checks_attempted += 1
137
+ signature_old = _get_md5_signature(self._wrapped_dict[key])
138
+ signature_new = _get_md5_signature(value)
139
+ if signature_old != signature_new:
140
+ diff_dict = DeepDiff(self._wrapped_dict[key], value)
141
+ raise ValueError(
142
+ f"Key {key} is already set "
143
+ + f"to {self._wrapped_dict[key]} "
144
+ + f"and the new value {value} is different, "
145
+ + f"which is not allowed. Details here: {diff_dict} ")
146
+ self._consistency_checks_passed += 1
147
+
148
+ def __contains__(self, item):
149
+ return item in self._wrapped_dict
150
+
151
+ def __getitem__(self, key):
152
+ return self._wrapped_dict[key]
153
+
154
+ def __len__(self):
155
+ return len(self._wrapped_dict)
156
+
157
+ def _generic_iter(self, iter_type: str):
158
+ return self._wrapped_dict._generic_iter(iter_type)
159
+
160
+ def timestamp(self, key:PersiDictKey) -> float:
161
+ return self._wrapped_dict.timestamp(key)
162
+
163
+ def __getattr__(self, name):
164
+ # Forward attribute access to the wrapped object
165
+ return getattr(self._wrapped_dict, name)
166
+
167
+ @property
168
+ def base_dir(self):
169
+ return self._wrapped_dict.base_dir
170
+
171
+ @property
172
+ def base_url(self):
173
+ return self._wrapped_dict.base_url
174
+
175
+ def get_subdict(self, prefix_key:PersiDictKey) -> WriteOnceDict:
176
+ subdict = self._wrapped_dict.get_subdict(prefix_key)
177
+ result = WriteOnceDict(subdict, self.p_consistency_checks)
178
+ return result
179
+
180
+ register_parameterizable_class(WriteOnceDict)
File without changes