persidict 0.25.0__tar.gz → 0.30.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of persidict might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: persidict
3
- Version: 0.25.0
3
+ Version: 0.30.0
4
4
  Summary: Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud.
5
5
  Keywords: persistence,dicts,distributed,parallel
6
6
  Author: Vlad (Volodymyr) Pavlov
@@ -161,8 +161,9 @@ that simultaneously work with the same instance of a dictionary.
161
161
  `oldest_keys()`, `newest_values()`, `oldest_values()`,
162
162
  `get_params()`, `get_metaparams()`, and `get_default_metaparams()`,
163
163
  which are not available in native Python dicts.
164
- * You can use NO_CHANGE constant as a fake new value
165
- to avoid actually setting/updating a value.
164
+ * You can use KEEP_CURRENT constant as a fake new value
165
+ to avoid actually setting/updating a value. Or DELETE_CURRENT as
166
+ a fake new value to delete the previous value from a dictionary.
166
167
 
167
168
  ## Fine Tuning
168
169
 
@@ -128,8 +128,9 @@ that simultaneously work with the same instance of a dictionary.
128
128
  `oldest_keys()`, `newest_values()`, `oldest_values()`,
129
129
  `get_params()`, `get_metaparams()`, and `get_default_metaparams()`,
130
130
  which are not available in native Python dicts.
131
- * You can use NO_CHANGE constant as a fake new value
132
- to avoid actually setting/updating a value.
131
+ * You can use KEEP_CURRENT constant as a fake new value
132
+ to avoid actually setting/updating a value. Or DELETE_CURRENT as
133
+ a fake new value to delete the previous value from a dictionary.
133
134
 
134
135
  ## Fine Tuning
135
136
 
@@ -4,7 +4,7 @@ build-backend = "uv_build"
4
4
 
5
5
  [project]
6
6
  name = "persidict"
7
- version = "0.25.0"
7
+ version = "0.30.0"
8
8
  description = "Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -24,7 +24,9 @@ and replace_unsafe_chars(), which replaces forbidden characters in a string.
24
24
  """
25
25
  from .safe_chars import get_safe_chars, replace_unsafe_chars
26
26
  from .safe_str_tuple import SafeStrTuple
27
- from .persi_dict import PersiDict
27
+ from .persi_dict import PersiDict, PersiDictKey
28
28
  from .file_dir_dict import FileDirDict
29
29
  from .s3_dict import S3Dict
30
- from .nochange_const import *
30
+ from .write_once_dict import WriteOnceDict
31
+ from .jokers import Joker, KeepCurrentFlag, DeleteCurrentFlag
32
+ from .jokers import KEEP_CURRENT, DELETE_CURRENT
@@ -12,7 +12,6 @@ from __future__ import annotations
12
12
  import os
13
13
  import random
14
14
  import time
15
- from abc import abstractmethod
16
15
  from typing import Any, Optional
17
16
 
18
17
  import joblib
@@ -21,7 +20,7 @@ import jsonpickle.ext.numpy as jsonpickle_numpy
21
20
  import jsonpickle.ext.pandas as jsonpickle_pandas
22
21
  import parameterizable
23
22
 
24
- from .nochange_const import NO_CHANGE
23
+ from .jokers import KEEP_CURRENT, DELETE_CURRENT, Joker
25
24
  from .safe_chars import replace_unsafe_chars
26
25
  from .safe_str_tuple import SafeStrTuple
27
26
  from .safe_str_tuple_signing import sign_safe_str_tuple, unsign_safe_str_tuple
@@ -317,7 +316,11 @@ class FileDirDict(PersiDict):
317
316
  def __setitem__(self, key:PersiDictKey, value:Any):
318
317
  """Set self[key] to value."""
319
318
 
320
- if value is NO_CHANGE:
319
+ if value is KEEP_CURRENT:
320
+ return
321
+
322
+ if value is DELETE_CURRENT:
323
+ self.delete_if_exists(key)
321
324
  return
322
325
 
323
326
  if isinstance(value, PersiDict):
@@ -0,0 +1,51 @@
1
+ """A singleton constant to indicate no change in a value.
2
+
3
+ When updating a value in a persistent dictionary,
4
+ use KEEP_CURRENT as the new value to indicate that
5
+ the existing value should remain unchanged.
6
+ """
7
+ from typing import Any
8
+
9
+ from parameterizable import (
10
+ ParameterizableClass
11
+ , register_parameterizable_class)
12
+
13
+
14
+ class Joker(ParameterizableClass):
15
+ _instances = {}
16
+
17
+ def get_params(self) -> dict[str, Any]:
18
+ return {}
19
+
20
+ def __new__(cls):
21
+ if cls not in Joker._instances:
22
+ Joker._instances[cls] = super().__new__(cls)
23
+ return Joker._instances[cls]
24
+
25
+
26
+ class KeepCurrentFlag(Joker):
27
+ """A singleton constant to indicate no change in a value.
28
+
29
+ When updating a value in a persistent dictionary,
30
+ use KeepCurrent as the new value to indicate that
31
+ the existing value (if any) should remain unchanged.
32
+ """
33
+ pass
34
+
35
+ class DeleteCurrentFlag(Joker):
36
+ """A singleton constant to indicate that the current value should be deleted.
37
+
38
+ When updating a value in a persistent dictionary,
39
+ use DeleteCurrentFlag as the new value to indicate that
40
+ the existing value (if any) should be removed from the dictionary.
41
+ """
42
+ pass
43
+
44
+ register_parameterizable_class(KeepCurrentFlag)
45
+ register_parameterizable_class(DeleteCurrentFlag)
46
+
47
+ KeepCurrent = KeepCurrentFlag()
48
+ KEEP_CURRENT = KeepCurrentFlag()
49
+
50
+ DeleteCurrent = DeleteCurrentFlag()
51
+ DELETE_CURRENT = DeleteCurrentFlag()
@@ -26,7 +26,7 @@ from parameterizable import ParameterizableClass
26
26
  from typing import Any, Sequence, Optional
27
27
  from collections.abc import MutableMapping
28
28
 
29
- from .nochange_const import NO_CHANGE
29
+ from .jokers import KEEP_CURRENT, DELETE_CURRENT, Joker
30
30
  from .safe_str_tuple import SafeStrTuple
31
31
 
32
32
  PersiDictKey = SafeStrTuple | Sequence[str] | str
@@ -161,9 +161,11 @@ class PersiDict(MutableMapping, ParameterizableClass):
161
161
 
162
162
  def __setitem__(self, key:PersiDictKey, value:Any):
163
163
  """Set self[key] to value."""
164
- if value is NO_CHANGE:
164
+ if value is KEEP_CURRENT:
165
165
  return
166
- if self.immutable_items:
166
+ elif value is DELETE_CURRENT:
167
+ self.delete_if_exists(key)
168
+ elif self.immutable_items:
167
169
  if key in self:
168
170
  raise KeyError("Can't modify an immutable key-value pair")
169
171
  raise NotImplementedError
@@ -216,7 +218,7 @@ class PersiDict(MutableMapping, ParameterizableClass):
216
218
  """
217
219
  # TODO: check edge cases to ensure the same semantics as standard dicts
218
220
  key = SafeStrTuple(key)
219
- assert not default is NO_CHANGE
221
+ assert not isinstance(default, Joker)
220
222
  if key in self:
221
223
  return self[key]
222
224
  else:
@@ -1,7 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import os
4
- from abc import abstractmethod
5
4
  from typing import Any, Optional
6
5
 
7
6
  import boto3
@@ -10,7 +9,7 @@ import parameterizable
10
9
  from .safe_str_tuple import SafeStrTuple
11
10
  from .safe_str_tuple_signing import sign_safe_str_tuple, unsign_safe_str_tuple
12
11
  from .persi_dict import PersiDict
13
- from .nochange_const import NO_CHANGE
12
+ from .jokers import KEEP_CURRENT, DELETE_CURRENT
14
13
  from .file_dir_dict import FileDirDict, PersiDictKey
15
14
 
16
15
  S3DICT_DEFAULT_BASE_DIR = "__s3_dict__"
@@ -191,7 +190,11 @@ class S3Dict(PersiDict):
191
190
  def __setitem__(self, key:PersiDictKey, value:Any):
192
191
  """Set self[key] to value. """
193
192
 
194
- if value is NO_CHANGE:
193
+ if value is KEEP_CURRENT:
194
+ return
195
+
196
+ if value is DELETE_CURRENT:
197
+ self.delete_if_exists(key)
195
198
  return
196
199
 
197
200
  if isinstance(value, PersiDict):
@@ -0,0 +1,180 @@
1
+ from __future__ import annotations
2
+
3
+ import time
4
+
5
+ from deepdiff import DeepDiff
6
+ from parameterizable import register_parameterizable_class
7
+ from .jokers import KEEP_CURRENT, KeepCurrentFlag
8
+ from .persi_dict import PersiDict
9
+ from .file_dir_dict import FileDirDict
10
+ import random
11
+ import sys
12
+ from typing import Any
13
+
14
+ import joblib.hashing
15
+ from .persi_dict import PersiDictKey
16
+
17
+ def _get_md5_signature(x:Any) -> str:
18
+ """Return base16 MD5 hash signature of an object.
19
+
20
+ Uses joblib's Hasher (or NumpyHasher). It uses Pickle for serialization,
21
+ except for NumPy arrays, which use optimized custom routines.
22
+ """
23
+ if 'numpy' in sys.modules:
24
+ hasher = joblib.hashing.NumpyHasher(hash_name='md5')
25
+ else:
26
+ hasher = joblib.hashing.Hasher(hash_name='md5')
27
+ hash_signature = hasher.hash(x)
28
+ return str(hash_signature)
29
+
30
+ class WriteOnceDict(PersiDict):
31
+ """ A dictionary that always keeps the first value assigned to a key.
32
+
33
+ If a key is already set, it randomly checks the value against the value
34
+ that was first set. If the new value is different, it raises a
35
+ ValueError exception. Once can control the frequency of these checks
36
+ or even completely disable them by setting `p_consistency_checks` to 0.
37
+
38
+ """
39
+ _wrapped_dict: PersiDict
40
+ _p_consistency_checks: float | None
41
+ _consistency_checks_attempted: int
42
+ _consistency_checks_passed: int
43
+
44
+ def __init__(self
45
+ , wrapped_dict:PersiDict | None = None
46
+ , p_consistency_checks: float | None=None):
47
+ if wrapped_dict is None:
48
+ wrapped_dict = FileDirDict(immutable_items = True)
49
+ assert isinstance(wrapped_dict, PersiDict)
50
+ assert wrapped_dict.immutable_items == True
51
+ self.p_consistency_checks = p_consistency_checks
52
+ PersiDict.__init__(self
53
+ , base_class_for_values=wrapped_dict.base_class_for_values
54
+ , immutable_items=True
55
+ , digest_len=wrapped_dict.digest_len)
56
+ self._wrapped_dict = wrapped_dict
57
+ self._consistency_checks_passed = 0
58
+ self._consistency_checks_attempted = 0
59
+
60
+
61
+ @property
62
+ def p_consistency_checks(self) -> float:
63
+ """ Probability of checking the value against the first value set. """
64
+ return self._p_consistency_checks
65
+
66
+
67
+ @p_consistency_checks.setter
68
+ def p_consistency_checks(self, value: float|None|KeepCurrentFlag) -> None:
69
+ if value is KEEP_CURRENT:
70
+ if hasattr(self, '_p_consistency_checks'):
71
+ return
72
+ else:
73
+ raise ValueError(
74
+ f"KEEP_CURRENT can't be used to initialize p_consistency_checks.")
75
+ if value is None:
76
+ value = 0.0
77
+ if not (0 <= value <= 1):
78
+ raise ValueError(
79
+ f"p_consistency_checks must be in [0, 1], "
80
+ f"got {value}.")
81
+ self._p_consistency_checks = value
82
+
83
+
84
+ @property
85
+ def consistency_checks_failed(self) -> int:
86
+ """ Returns the number of failed consistency checks. """
87
+ return self._consistency_checks_attempted - self._consistency_checks_passed
88
+
89
+
90
+ @property
91
+ def consistency_checks_attempted(self) -> int:
92
+ """ Returns the number of attempted consistency checks. """
93
+ return self._consistency_checks_attempted
94
+
95
+
96
+ @property
97
+ def consistency_checks_passed(self) -> int:
98
+ """ Returns the number of successful consistency checks. """
99
+ return self._consistency_checks_passed
100
+
101
+
102
+ def get_params(self):
103
+ params = dict(
104
+ wrapped_dict = self._wrapped_dict,
105
+ p_consistency_checks = self.p_consistency_checks)
106
+ sorted_params = dict(sorted(params.items()))
107
+ return sorted_params
108
+
109
+ def __setitem__(self, key, value):
110
+ """ Set the value of a key if it is not already set.
111
+
112
+ If the key is already set, it checks the value
113
+ against the value that was first set.
114
+ """
115
+ check_needed = False
116
+
117
+ try: # extra protections to better handle concurrent writes
118
+ if key in self._wrapped_dict:
119
+ check_needed = True
120
+ else:
121
+ self._wrapped_dict[key] = value
122
+ except:
123
+ time.sleep(random.random()/random.randint(1,5))
124
+ if key in self._wrapped_dict:
125
+ check_needed = True
126
+ else:
127
+ self._wrapped_dict[key] = value
128
+
129
+ if not key in self._wrapped_dict:
130
+ raise KeyError(
131
+ f"Key {key} was not set in the wrapped dict "
132
+ + f"{self._wrapped_dict}. This should not happen.")
133
+
134
+ if check_needed and self.p_consistency_checks > 0:
135
+ if random.random() < self.p_consistency_checks:
136
+ self._consistency_checks_attempted += 1
137
+ signature_old = _get_md5_signature(self._wrapped_dict[key])
138
+ signature_new = _get_md5_signature(value)
139
+ if signature_old != signature_new:
140
+ diff_dict = DeepDiff(self._wrapped_dict[key], value)
141
+ raise ValueError(
142
+ f"Key {key} is already set "
143
+ + f"to {self._wrapped_dict[key]} "
144
+ + f"and the new value {value} is different, "
145
+ + f"which is not allowed. Details here: {diff_dict} ")
146
+ self._consistency_checks_passed += 1
147
+
148
+ def __contains__(self, item):
149
+ return item in self._wrapped_dict
150
+
151
+ def __getitem__(self, key):
152
+ return self._wrapped_dict[key]
153
+
154
+ def __len__(self):
155
+ return len(self._wrapped_dict)
156
+
157
+ def _generic_iter(self, iter_type: str):
158
+ return self._wrapped_dict._generic_iter(iter_type)
159
+
160
+ def timestamp(self, key:PersiDictKey) -> float:
161
+ return self._wrapped_dict.timestamp(key)
162
+
163
+ def __getattr__(self, name):
164
+ # Forward attribute access to the wrapped object
165
+ return getattr(self._wrapped_dict, name)
166
+
167
+ @property
168
+ def base_dir(self):
169
+ return self._wrapped_dict.base_dir
170
+
171
+ @property
172
+ def base_url(self):
173
+ return self._wrapped_dict.base_url
174
+
175
+ def get_subdict(self, prefix_key:PersiDictKey) -> WriteOnceDict:
176
+ subdict = self._wrapped_dict.get_subdict(prefix_key)
177
+ result = WriteOnceDict(subdict, self.p_consistency_checks)
178
+ return result
179
+
180
+ register_parameterizable_class(WriteOnceDict)
@@ -1,28 +0,0 @@
1
- """A singleton constant to indicate no change in a value.
2
-
3
- When updating a value in a persistent dictionary,
4
- use NO_CHANGE as the new value to indicate that
5
- the existing value should remain unchanged.
6
- """
7
- from typing import Any
8
-
9
- from parameterizable import (
10
- ParameterizableClass
11
- , register_parameterizable_class)
12
-
13
-
14
- class NoChangeFlag(ParameterizableClass):
15
- _instance = None
16
-
17
- def __new__(cls):
18
- if cls._instance is None:
19
- cls._instance = super().__new__(cls)
20
- return cls._instance
21
-
22
- def get_params(self) -> dict[str, Any]:
23
- return {}
24
-
25
- register_parameterizable_class(NoChangeFlag)
26
-
27
- NoChange = NoChangeFlag()
28
- NO_CHANGE = NoChangeFlag()