persidict 0.38.0__py3-none-any.whl → 0.103.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of persidict might be problematic. Click here for more details.
- persidict/__init__.py +41 -24
- persidict/basic_s3_dict.py +595 -0
- persidict/cached_appendonly_dict.py +247 -0
- persidict/cached_mutable_dict.py +248 -0
- persidict/empty_dict.py +171 -0
- persidict/file_dir_dict.py +130 -122
- persidict/local_dict.py +502 -0
- persidict/overlapping_multi_dict.py +23 -15
- persidict/persi_dict.py +281 -148
- persidict/s3_dict_file_dir_cached.py +215 -0
- persidict/{s3_dict.py → s3_dict_legacy.py} +111 -90
- persidict/safe_chars.py +13 -0
- persidict/safe_str_tuple.py +28 -6
- persidict/singletons.py +232 -0
- persidict/write_once_dict.py +47 -30
- {persidict-0.38.0.dist-info → persidict-0.103.0.dist-info}/METADATA +34 -24
- persidict-0.103.0.dist-info/RECORD +19 -0
- {persidict-0.38.0.dist-info → persidict-0.103.0.dist-info}/WHEEL +1 -1
- persidict/.DS_Store +0 -0
- persidict/jokers.py +0 -99
- persidict-0.38.0.dist-info/RECORD +0 -14
persidict/safe_chars.py
CHANGED
|
@@ -43,3 +43,16 @@ def replace_unsafe_chars(a_str: str, replace_with: str) -> str:
|
|
|
43
43
|
result_list = [(c if c in safe_chars else replace_with) for c in a_str]
|
|
44
44
|
result_str = "".join(result_list)
|
|
45
45
|
return result_str
|
|
46
|
+
|
|
47
|
+
def contains_unsafe_chars(a_str: str) -> bool:
|
|
48
|
+
"""Check if a string contains unsafe characters.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
a_str (str): Input string to check for unsafe characters.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
bool: True if the string contains any character not in the safe
|
|
55
|
+
character set, False otherwise.
|
|
56
|
+
"""
|
|
57
|
+
safe_chars = get_safe_chars()
|
|
58
|
+
return any(c not in safe_chars for c in a_str)
|
persidict/safe_str_tuple.py
CHANGED
|
@@ -42,7 +42,7 @@ def _is_sequence_not_mapping(obj: Any) -> bool:
|
|
|
42
42
|
|
|
43
43
|
|
|
44
44
|
class SafeStrTuple(Sequence, Hashable):
|
|
45
|
-
"""An immutable sequence of
|
|
45
|
+
"""An immutable sequence of URL/filename-safe strings.
|
|
46
46
|
|
|
47
47
|
The sequence is flat (no nested structures) and hashable, making it suitable
|
|
48
48
|
for use as a dictionary key. All strings are validated to contain only
|
|
@@ -55,7 +55,7 @@ class SafeStrTuple(Sequence, Hashable):
|
|
|
55
55
|
def __init__(self, *args, **kwargs):
|
|
56
56
|
"""Initialize from strings or nested sequences of strings.
|
|
57
57
|
|
|
58
|
-
The constructor accepts
|
|
58
|
+
The constructor accepts zero or more arguments which may be:
|
|
59
59
|
- a SafeStrTuple
|
|
60
60
|
- a single string
|
|
61
61
|
- a sequence (list/tuple/etc.) containing any of the above recursively
|
|
@@ -66,7 +66,7 @@ class SafeStrTuple(Sequence, Hashable):
|
|
|
66
66
|
SAFE_STRING_MAX_LENGTH.
|
|
67
67
|
|
|
68
68
|
Args:
|
|
69
|
-
*args:
|
|
69
|
+
*args: Zero or more inputs (strings, sequences, or SafeStrTuple) that
|
|
70
70
|
will be flattened into a tuple of safe strings.
|
|
71
71
|
**kwargs: Not supported.
|
|
72
72
|
|
|
@@ -78,8 +78,6 @@ class SafeStrTuple(Sequence, Hashable):
|
|
|
78
78
|
"""
|
|
79
79
|
if len(kwargs) != 0:
|
|
80
80
|
raise TypeError(f"Unexpected keyword arguments: {list(kwargs.keys())}")
|
|
81
|
-
if len(args) == 0:
|
|
82
|
-
raise TypeError("At least one argument is required")
|
|
83
81
|
candidate_strings = []
|
|
84
82
|
for a in args:
|
|
85
83
|
if isinstance(a, SafeStrTuple):
|
|
@@ -213,4 +211,28 @@ class SafeStrTuple(Sequence, Hashable):
|
|
|
213
211
|
Returns:
|
|
214
212
|
SafeStrTuple: A new instance with elements in reverse order.
|
|
215
213
|
"""
|
|
216
|
-
return SafeStrTuple(*reversed(self.strings))
|
|
214
|
+
return SafeStrTuple(*reversed(self.strings))
|
|
215
|
+
|
|
216
|
+
class NonEmptySafeStrTuple(SafeStrTuple):
|
|
217
|
+
"""A SafeStrTuple that must contain at least one string.
|
|
218
|
+
|
|
219
|
+
This subclass enforces that the tuple is non-empty.
|
|
220
|
+
"""
|
|
221
|
+
|
|
222
|
+
def __init__(self, *args, **kwargs):
|
|
223
|
+
"""Initialize and enforce non-empty constraint.
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
*args: One or more inputs (strings, sequences, or SafeStrTuple) that
|
|
227
|
+
will be flattened into a tuple of safe strings.
|
|
228
|
+
**kwargs: Not supported.
|
|
229
|
+
|
|
230
|
+
Raises:
|
|
231
|
+
TypeError: If unexpected keyword arguments are provided, if no args
|
|
232
|
+
are provided, or if an argument has an invalid type.
|
|
233
|
+
ValueError: If a string is empty, too long, contains disallowed
|
|
234
|
+
characters, or if the resulting tuple is empty.
|
|
235
|
+
"""
|
|
236
|
+
super().__init__(*args, **kwargs)
|
|
237
|
+
if len(self.strings) == 0:
|
|
238
|
+
raise ValueError("NonEmptySafeStrTuple must contain at least one string")
|
persidict/singletons.py
ADDED
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
"""Special singleton markers used to modify values in PersiDict without data payload.
|
|
2
|
+
|
|
3
|
+
This module defines two singleton flags used as "joker" values when writing to
|
|
4
|
+
persistent dictionaries:
|
|
5
|
+
|
|
6
|
+
- KEEP_CURRENT: keep the current value unchanged.
|
|
7
|
+
- DELETE_CURRENT: delete the current value if it exists.
|
|
8
|
+
|
|
9
|
+
These flags are intended to be passed as the value part in dict-style
|
|
10
|
+
assignments (e.g., d[key] = KEEP_CURRENT) and are interpreted by PersiDict
|
|
11
|
+
implementations.
|
|
12
|
+
|
|
13
|
+
Examples:
|
|
14
|
+
>>> from persidict.singletons import KEEP_CURRENT, DELETE_CURRENT
|
|
15
|
+
>>> d[key] = KEEP_CURRENT # Do not alter existing value
|
|
16
|
+
>>> d[key] = DELETE_CURRENT # Remove key if present
|
|
17
|
+
"""
|
|
18
|
+
from typing import Any
|
|
19
|
+
|
|
20
|
+
from parameterizable import (
|
|
21
|
+
ParameterizableClass
|
|
22
|
+
, register_parameterizable_class)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class Singleton(ParameterizableClass):
|
|
26
|
+
"""Base class for singleton classes.
|
|
27
|
+
|
|
28
|
+
This class implements a singleton pattern where each subclass maintains
|
|
29
|
+
exactly one instance that is returned on every instantiation.
|
|
30
|
+
"""
|
|
31
|
+
_instances: dict[type, "Singleton"] = {}
|
|
32
|
+
|
|
33
|
+
def get_params(self) -> dict[str, Any]:
|
|
34
|
+
"""Return parameters for parameterizable API.
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
dict[str, Any]: Always an empty dict for joker flags.
|
|
38
|
+
"""
|
|
39
|
+
return {}
|
|
40
|
+
|
|
41
|
+
def __new__(cls):
|
|
42
|
+
"""Create or return the singleton instance for the subclass.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
cls: The class for which to create or retrieve the singleton instance.
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
Joker: The singleton instance for the specified class.
|
|
49
|
+
"""
|
|
50
|
+
if cls not in Singleton._instances:
|
|
51
|
+
Singleton._instances[cls] = super().__new__(cls)
|
|
52
|
+
return Singleton._instances[cls]
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class Joker(Singleton):
|
|
56
|
+
"""Base class for joker flags.
|
|
57
|
+
|
|
58
|
+
Subclasses represent value-less commands that
|
|
59
|
+
alter persistence behavior when assigned to a key.
|
|
60
|
+
"""
|
|
61
|
+
pass
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class KeepCurrentFlag(Joker):
|
|
65
|
+
"""Flag instructing PersiDict to keep the current value unchanged.
|
|
66
|
+
|
|
67
|
+
Usage:
|
|
68
|
+
Assign this flag instead of a real value to indicate that an existing
|
|
69
|
+
value should not be modified.
|
|
70
|
+
|
|
71
|
+
Examples:
|
|
72
|
+
>>> d[key] = KEEP_CURRENT
|
|
73
|
+
|
|
74
|
+
Note:
|
|
75
|
+
This is a singleton class; constructing it repeatedly returns the same
|
|
76
|
+
instance.
|
|
77
|
+
"""
|
|
78
|
+
pass
|
|
79
|
+
|
|
80
|
+
class DeleteCurrentFlag(Joker):
|
|
81
|
+
"""Flag instructing PersiDict to delete the current value for a key.
|
|
82
|
+
|
|
83
|
+
Usage:
|
|
84
|
+
Assign this flag instead of a real value to remove the key if it
|
|
85
|
+
exists. If the key is absent, implementations will typically no-op.
|
|
86
|
+
|
|
87
|
+
Examples:
|
|
88
|
+
>>> d[key] = DELETE_CURRENT
|
|
89
|
+
|
|
90
|
+
Note:
|
|
91
|
+
This is a singleton class; constructing it repeatedly returns the same
|
|
92
|
+
instance.
|
|
93
|
+
"""
|
|
94
|
+
pass
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class StatusFlag(Singleton):
|
|
98
|
+
"""Base class for process status flags.
|
|
99
|
+
|
|
100
|
+
Subclasses represent status flags that can be used to control
|
|
101
|
+
processing flow in various contexts.
|
|
102
|
+
"""
|
|
103
|
+
pass
|
|
104
|
+
|
|
105
|
+
class ETagHasNotChangedFlag(StatusFlag):
|
|
106
|
+
"""Flag indicating that an ETag has not changed.
|
|
107
|
+
|
|
108
|
+
Usage:
|
|
109
|
+
This flag can be used in contexts where a notification is needed
|
|
110
|
+
to indicate that an ETag (entity tag) has not changed, typically in
|
|
111
|
+
web or caching scenarios.
|
|
112
|
+
"""
|
|
113
|
+
|
|
114
|
+
pass
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
class ContinueNormalExecutionFlag(StatusFlag):
|
|
118
|
+
"""Flag indicating to continue normal execution without special handling.
|
|
119
|
+
|
|
120
|
+
Usage:
|
|
121
|
+
This flag can be used in contexts where a notification is needed
|
|
122
|
+
to indicate that normal processing should proceed without alteration.
|
|
123
|
+
|
|
124
|
+
Note:
|
|
125
|
+
This is a singleton class; constructing it repeatedly returns the same
|
|
126
|
+
instance.
|
|
127
|
+
"""
|
|
128
|
+
pass
|
|
129
|
+
|
|
130
|
+
class ExecutionIsCompleteFlag(StatusFlag):
|
|
131
|
+
"""Flag indicating no more processing is required.
|
|
132
|
+
|
|
133
|
+
Usage:
|
|
134
|
+
This flag can be used in contexts where a notification is needed
|
|
135
|
+
to indicate that all necessary processing steps were
|
|
136
|
+
finished successfully and nore further action is needed.
|
|
137
|
+
|
|
138
|
+
Note:
|
|
139
|
+
This is a singleton class; constructing it repeatedly returns the same
|
|
140
|
+
instance.
|
|
141
|
+
"""
|
|
142
|
+
pass
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
register_parameterizable_class(KeepCurrentFlag)
|
|
146
|
+
register_parameterizable_class(DeleteCurrentFlag)
|
|
147
|
+
register_parameterizable_class(ContinueNormalExecutionFlag)
|
|
148
|
+
register_parameterizable_class(ExecutionIsCompleteFlag)
|
|
149
|
+
register_parameterizable_class(ETagHasNotChangedFlag)
|
|
150
|
+
|
|
151
|
+
_KeepCurrent = KeepCurrentFlag()
|
|
152
|
+
KEEP_CURRENT = KeepCurrentFlag()
|
|
153
|
+
"""Flag indicating that the current value should be kept unchanged.
|
|
154
|
+
|
|
155
|
+
This flag can be assigned to a key in a PersiDict to indicate that any existing
|
|
156
|
+
value for that key should not be modified during an update operation.
|
|
157
|
+
|
|
158
|
+
If assigned to a key that does not exist, the operation will succeed without
|
|
159
|
+
eny change.
|
|
160
|
+
|
|
161
|
+
Example:
|
|
162
|
+
>>> d = PersiDict()
|
|
163
|
+
>>> d['key'] = 'value'
|
|
164
|
+
>>> d['key'] = KEEP_CURRENT # Keeps 'value' unchanged
|
|
165
|
+
"""
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
_DeleteCurrent = DeleteCurrentFlag()
|
|
169
|
+
DELETE_CURRENT = DeleteCurrentFlag()
|
|
170
|
+
"""Flag indicating that the current value should be deleted.
|
|
171
|
+
|
|
172
|
+
This flag can be assigned to a key in a PersiDict to indicate that any existing
|
|
173
|
+
value for that key should be deleted during an update operation.
|
|
174
|
+
|
|
175
|
+
If assigned to a key that does not exist, the operation will succeed without
|
|
176
|
+
any change.
|
|
177
|
+
|
|
178
|
+
Example:
|
|
179
|
+
>>> d = PersiDict()
|
|
180
|
+
>>> d['key'] = 'value'
|
|
181
|
+
>>> d['key'] = DELETE_CURRENT # same as d.discard('key')
|
|
182
|
+
"""
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
_ContinueNormalExecution = ContinueNormalExecutionFlag()
|
|
186
|
+
CONTINUE_NORMAL_EXECUTION = ContinueNormalExecutionFlag()
|
|
187
|
+
"""Flag indicating that normal execution should continue.
|
|
188
|
+
|
|
189
|
+
This flag can be used in process flow control contexts to signal that normal
|
|
190
|
+
execution should proceed without any special handling or alterations.
|
|
191
|
+
|
|
192
|
+
When this flag is returned from a processing step, it indicates that the
|
|
193
|
+
operation completed successfully and the next step in the normal execution
|
|
194
|
+
flow should be performed.
|
|
195
|
+
|
|
196
|
+
Example:
|
|
197
|
+
>>> if pre_process_input(data) is CONTINUE_NORMAL_EXECUTION:
|
|
198
|
+
... # Continue with next step
|
|
199
|
+
... perform_next_step()
|
|
200
|
+
"""
|
|
201
|
+
|
|
202
|
+
_ExecutionIsComplete = ExecutionIsCompleteFlag()
|
|
203
|
+
EXECUTION_IS_COMPLETE = ExecutionIsCompleteFlag()
|
|
204
|
+
"""Flag indicating that execution is complete, no further processing is needed.
|
|
205
|
+
|
|
206
|
+
This flag can be used in process flow control contexts to signal that all necessary
|
|
207
|
+
processing has been completed successfully and no additional steps are required.
|
|
208
|
+
|
|
209
|
+
When this flag is returned from a processing step, it indicates that the
|
|
210
|
+
operation completed successfully and no further processing should be performed.
|
|
211
|
+
|
|
212
|
+
Example:
|
|
213
|
+
>>> if pre_process_input(data) is EXECUTION_IS_COMPLETE:
|
|
214
|
+
... # Skip remaining steps
|
|
215
|
+
... return result
|
|
216
|
+
"""
|
|
217
|
+
|
|
218
|
+
_ETagHasNotChanged = ETagHasNotChangedFlag()
|
|
219
|
+
ETAG_HAS_NOT_CHANGED = ETagHasNotChangedFlag()
|
|
220
|
+
"""Flag indicating that an ETag value has not changed.
|
|
221
|
+
|
|
222
|
+
This flag can be used in contexts where a notification is needed to indicate
|
|
223
|
+
that an ETag (entity tag) comparison shows no changes.
|
|
224
|
+
|
|
225
|
+
When this flag is returned from a processing step, it indicates that the
|
|
226
|
+
resource's ETag matches and no content updates are necessary.
|
|
227
|
+
|
|
228
|
+
Example:
|
|
229
|
+
>>> if check_resource_etag(url) is ETAG_HAS_NOT_CHANGED:
|
|
230
|
+
... # Skip resource update
|
|
231
|
+
... return cached_content
|
|
232
|
+
"""
|
persidict/write_once_dict.py
CHANGED
|
@@ -1,3 +1,13 @@
|
|
|
1
|
+
"""Write-once dictionary with probabilistic consistency checking.
|
|
2
|
+
|
|
3
|
+
This module provides WriteOnceDict, a wrapper around PersiDict that supports
|
|
4
|
+
an alternative behavior of append-only dictionaries.
|
|
5
|
+
It allows repeated writes to an existing key but assumes that
|
|
6
|
+
all the subsequent writes have exactly the same value as the first one,
|
|
7
|
+
so they can be safely ignored. Random consistency checks ensure that
|
|
8
|
+
repeated writes contain the same values, helping detect data consistency issues.
|
|
9
|
+
Setting the probability of random checks to 0 disables them.
|
|
10
|
+
"""
|
|
1
11
|
from __future__ import annotations
|
|
2
12
|
|
|
3
13
|
import time
|
|
@@ -5,8 +15,9 @@ import time
|
|
|
5
15
|
from deepdiff import DeepDiff
|
|
6
16
|
from parameterizable import register_parameterizable_class, sort_dict_by_keys
|
|
7
17
|
|
|
8
|
-
from .
|
|
9
|
-
from .
|
|
18
|
+
from . import SafeStrTuple
|
|
19
|
+
from .singletons import KEEP_CURRENT, KeepCurrentFlag, ETagHasNotChangedFlag
|
|
20
|
+
from .persi_dict import PersiDict, NonEmptyPersiDictKey
|
|
10
21
|
from .file_dir_dict import FileDirDict
|
|
11
22
|
import random
|
|
12
23
|
import sys
|
|
@@ -72,7 +83,7 @@ class WriteOnceDict(PersiDict):
|
|
|
72
83
|
|
|
73
84
|
Args:
|
|
74
85
|
wrapped_dict: The underlying persistent dictionary to wrap. If not
|
|
75
|
-
provided, a FileDirDict with
|
|
86
|
+
provided, a FileDirDict with append_only=True is created.
|
|
76
87
|
p_consistency_checks: Probability in [0, 1] to perform a
|
|
77
88
|
consistency check when a key already exists. ``None`` means 0.0
|
|
78
89
|
(disabled).
|
|
@@ -82,17 +93,16 @@ class WriteOnceDict(PersiDict):
|
|
|
82
93
|
ValueError: If ``wrapped_dict`` does not enforce immutable items.
|
|
83
94
|
"""
|
|
84
95
|
if wrapped_dict is None:
|
|
85
|
-
wrapped_dict = FileDirDict(
|
|
96
|
+
wrapped_dict = FileDirDict(append_only=True)
|
|
86
97
|
if not isinstance(wrapped_dict, PersiDict):
|
|
87
98
|
raise TypeError("wrapped_dict must be a PersiDict instance")
|
|
88
|
-
if wrapped_dict.
|
|
89
|
-
raise ValueError("wrapped_dict must be append-only
|
|
90
|
-
"(immutable_items==True)")
|
|
99
|
+
if wrapped_dict.append_only is not True:
|
|
100
|
+
raise ValueError("wrapped_dict must be append-only")
|
|
91
101
|
self.p_consistency_checks = p_consistency_checks
|
|
92
102
|
PersiDict.__init__(self,
|
|
93
103
|
base_class_for_values=wrapped_dict.base_class_for_values,
|
|
94
|
-
|
|
95
|
-
|
|
104
|
+
serialization_format=wrapped_dict.serialization_format,
|
|
105
|
+
append_only=True)
|
|
96
106
|
self._wrapped_dict = wrapped_dict
|
|
97
107
|
self._consistency_checks_passed = 0
|
|
98
108
|
self._consistency_checks_attempted = 0
|
|
@@ -181,7 +191,10 @@ class WriteOnceDict(PersiDict):
|
|
|
181
191
|
sorted_params = sort_dict_by_keys(params)
|
|
182
192
|
return sorted_params
|
|
183
193
|
|
|
184
|
-
def
|
|
194
|
+
def set_item_get_etag(self, key: NonEmptyPersiDictKey, value: Any) -> str|None:
|
|
195
|
+
raise NotImplementedError("Operation not supported on WriteOnceDict.")
|
|
196
|
+
|
|
197
|
+
def __setitem__(self, key:NonEmptyPersiDictKey, value):
|
|
185
198
|
"""Set a value for a key, preserving the first assignment.
|
|
186
199
|
|
|
187
200
|
If the key is new, the value is stored. If the key already exists,
|
|
@@ -233,7 +246,7 @@ class WriteOnceDict(PersiDict):
|
|
|
233
246
|
+ f"which is not allowed. Details here: {diff_dict} ")
|
|
234
247
|
self._consistency_checks_passed += 1
|
|
235
248
|
|
|
236
|
-
def __contains__(self, item:
|
|
249
|
+
def __contains__(self, item:NonEmptyPersiDictKey):
|
|
237
250
|
"""Check if a key exists in the dictionary.
|
|
238
251
|
|
|
239
252
|
Args:
|
|
@@ -244,7 +257,7 @@ class WriteOnceDict(PersiDict):
|
|
|
244
257
|
"""
|
|
245
258
|
return item in self._wrapped_dict
|
|
246
259
|
|
|
247
|
-
def __getitem__(self, key:
|
|
260
|
+
def __getitem__(self, key:NonEmptyPersiDictKey):
|
|
248
261
|
"""Retrieve a value by key.
|
|
249
262
|
|
|
250
263
|
Args:
|
|
@@ -255,6 +268,20 @@ class WriteOnceDict(PersiDict):
|
|
|
255
268
|
"""
|
|
256
269
|
return self._wrapped_dict[key]
|
|
257
270
|
|
|
271
|
+
|
|
272
|
+
def get_item_if_etag_changed(self, key: NonEmptyPersiDictKey, etag: str | None
|
|
273
|
+
) -> tuple[Any, str|None] |ETagHasNotChangedFlag:
|
|
274
|
+
"""Retrieve a value and its etag if the etag is new.
|
|
275
|
+
Args:
|
|
276
|
+
key: Key to look up.
|
|
277
|
+
etag: Previous etag to compare against.
|
|
278
|
+
Returns:
|
|
279
|
+
tuple[Any, str|None] | ETagHasNotChangedFlag: Stored value and its
|
|
280
|
+
etag if the etag is new, or ETAG_HAS_NOT_CHANGED if the
|
|
281
|
+
etag matches the current one.
|
|
282
|
+
"""
|
|
283
|
+
return self._wrapped_dict.get_item_if_etag_changed(key, etag)
|
|
284
|
+
|
|
258
285
|
def __len__(self):
|
|
259
286
|
"""Return the number of items stored.
|
|
260
287
|
|
|
@@ -274,7 +301,7 @@ class WriteOnceDict(PersiDict):
|
|
|
274
301
|
"""
|
|
275
302
|
return self._wrapped_dict._generic_iter(iter_type)
|
|
276
303
|
|
|
277
|
-
def timestamp(self, key:
|
|
304
|
+
def timestamp(self, key: NonEmptyPersiDictKey) -> float:
|
|
278
305
|
"""Return the timestamp for a given key.
|
|
279
306
|
|
|
280
307
|
Args:
|
|
@@ -297,25 +324,14 @@ class WriteOnceDict(PersiDict):
|
|
|
297
324
|
"""
|
|
298
325
|
return getattr(self._wrapped_dict, name)
|
|
299
326
|
|
|
300
|
-
@property
|
|
301
|
-
def base_dir(self) -> str|None:
|
|
302
|
-
"""Base directory of the wrapped dict (if applicable).
|
|
303
|
-
|
|
304
|
-
Returns:
|
|
305
|
-
str | None: The base directory path, or None if not applicable.
|
|
306
|
-
"""
|
|
307
|
-
return self._wrapped_dict.base_dir
|
|
308
327
|
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
str | None: The base URL, or None if not applicable.
|
|
315
|
-
"""
|
|
316
|
-
return self._wrapped_dict.base_url
|
|
328
|
+
def __delitem__(self, key):
|
|
329
|
+
raise TypeError(
|
|
330
|
+
f"{self.__class__.__name__} has immutable items "
|
|
331
|
+
"and does not support deletion.")
|
|
332
|
+
|
|
317
333
|
|
|
318
|
-
def get_subdict(self, prefix_key:
|
|
334
|
+
def get_subdict(self, prefix_key: NonEmptyPersiDictKey) -> WriteOnceDict:
|
|
319
335
|
"""Return a WriteOnceDict view over a sub-keyspace.
|
|
320
336
|
|
|
321
337
|
Args:
|
|
@@ -330,4 +346,5 @@ class WriteOnceDict(PersiDict):
|
|
|
330
346
|
result = WriteOnceDict(subdict, self.p_consistency_checks)
|
|
331
347
|
return result
|
|
332
348
|
|
|
349
|
+
|
|
333
350
|
register_parameterizable_class(WriteOnceDict)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: persidict
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.103.0
|
|
4
4
|
Summary: Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud.
|
|
5
5
|
Keywords: persistence,dicts,distributed,parallel
|
|
6
6
|
Author: Vlad (Volodymyr) Pavlov
|
|
@@ -18,11 +18,12 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
|
18
18
|
Requires-Dist: parameterizable
|
|
19
19
|
Requires-Dist: lz4
|
|
20
20
|
Requires-Dist: joblib
|
|
21
|
-
Requires-Dist: numpy
|
|
22
|
-
Requires-Dist: pandas
|
|
23
21
|
Requires-Dist: jsonpickle
|
|
24
22
|
Requires-Dist: deepdiff
|
|
23
|
+
Requires-Dist: boto3
|
|
25
24
|
Requires-Dist: boto3 ; extra == 'aws'
|
|
25
|
+
Requires-Dist: numpy ; extra == 'dev'
|
|
26
|
+
Requires-Dist: pandas ; extra == 'dev'
|
|
26
27
|
Requires-Dist: boto3 ; extra == 'dev'
|
|
27
28
|
Requires-Dist: moto ; extra == 'dev'
|
|
28
29
|
Requires-Dist: pytest ; extra == 'dev'
|
|
@@ -44,7 +45,8 @@ storing each value as its own file or S3 object. Keys are limited to
|
|
|
44
45
|
text strings or sequences of strings.
|
|
45
46
|
|
|
46
47
|
In contrast to traditional persistent dictionaries (e.g., Python’s `shelve)`,
|
|
47
|
-
`persidict` is designed
|
|
48
|
+
`persidict` is [designed](https://github.com/pythagoras-dev/persidict/blob/master/design_principles.md)
|
|
49
|
+
for distributed environments where multiple processes
|
|
48
50
|
on different machines concurrently work with the same store.
|
|
49
51
|
|
|
50
52
|
## 2. Why Use It?
|
|
@@ -160,8 +162,8 @@ print(f"API Key: {cloud_config['api_key']}")
|
|
|
160
162
|
You can also constrain values to a specific class.
|
|
161
163
|
* **Order**: Insertion order is not preserved.
|
|
162
164
|
* **Additional Methods**: `PersiDict` provides extra methods not in the standard
|
|
163
|
-
dict API, such as `timestamp()`, `
|
|
164
|
-
, `
|
|
165
|
+
dict API, such as `timestamp()`, `etag()`, `random_key()`, `newest_keys()`
|
|
166
|
+
, `subdicts()`, `discard()`, `get_params()` and more.
|
|
165
167
|
* **Special Values**: Use `KEEP_CURRENT` to avoid updating a value
|
|
166
168
|
and `DELETE_CURRENT` to delete a value during an assignment.
|
|
167
169
|
|
|
@@ -172,13 +174,14 @@ and `DELETE_CURRENT` to delete a value during an assignment.
|
|
|
172
174
|
* **`PersiDict`**: The abstract base class that defines the common interface
|
|
173
175
|
for all persistent dictionaries in the package. It's the foundation
|
|
174
176
|
upon which everything else is built.
|
|
175
|
-
* **`
|
|
176
|
-
as a key in any `PersiDict`. It can be a `
|
|
177
|
+
* **`NonEmptyPersiDictKey`**: A type hint that specifies what can be used
|
|
178
|
+
as a key in any `PersiDict`. It can be a `NonEmptySafeStrTuple`, a single string,
|
|
177
179
|
or a sequence of strings. When a `PesiDict` method requires a key as an input,
|
|
178
|
-
it will accept any of these types and convert them to
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
180
|
+
it will accept any of these types and convert them to
|
|
181
|
+
a `NonEmptySafeStrTuple` internally.
|
|
182
|
+
* **`NonEmptySafeStrTuple`**: The core data structure for keys.
|
|
183
|
+
It's an immutable, flat tuple of non-empty, URL/filename-safe strings,
|
|
184
|
+
ensuring that keys are consistent and safe for various storage backends.
|
|
182
185
|
When a `PersiDict` method returns a key, it will always be in this format.
|
|
183
186
|
|
|
184
187
|
### 5.2 Main Implementations
|
|
@@ -191,24 +194,25 @@ suitable for distributed environments.
|
|
|
191
194
|
|
|
192
195
|
### 5.3 Key Parameters
|
|
193
196
|
|
|
194
|
-
* **`
|
|
195
|
-
determines the serialization format
|
|
196
|
-
Common options are `"pkl"` (pickle) and `"json"`.
|
|
197
|
-
Any other value is treated as plain text for string storage.
|
|
197
|
+
* **`serialization_format`**: A key parameter for `FileDirDict` and `S3Dict` that
|
|
198
|
+
determines the serialization format used to store values.
|
|
199
|
+
Common options are `"pkl"` (pickle) and `"json"`.
|
|
200
|
+
Any other value is treated as plain text for string storage.
|
|
198
201
|
* **`base_class_for_values`**: An optional parameter for any `PersiDict`
|
|
199
202
|
that enforces type checking on all stored values, ensuring they are
|
|
200
203
|
instances of a specific class.
|
|
201
|
-
* **`
|
|
202
|
-
|
|
204
|
+
* **`append_only`**: A boolean parameter that makes items inside a `PersiDict` immutable,
|
|
205
|
+
preventing them from modification or deletion.
|
|
203
206
|
* **`digest_len`**: An integer that specifies the length of a hash suffix
|
|
204
|
-
added to key components to prevent collisions
|
|
207
|
+
added to key components in `FileDirDict` to prevent collisions
|
|
208
|
+
on case-insensitive file systems.
|
|
205
209
|
* **`base_dir`**: A string specifying the directory path where a `FileDirDict`
|
|
206
210
|
stores its files. For `S3Dict`, this directory is used to cache files locally.
|
|
207
211
|
* **`bucket_name`**: A string specifying the name of the S3 bucket where
|
|
208
212
|
an `S3Dict` stores its objects.
|
|
209
213
|
* **`region`**: An optional string specifying the AWS region for the S3 bucket.
|
|
210
214
|
|
|
211
|
-
### 5.4 Advanced Classes
|
|
215
|
+
### 5.4 Advanced and Supporting Classes
|
|
212
216
|
|
|
213
217
|
* **`WriteOnceDict`**: A wrapper that enforces write-once behavior
|
|
214
218
|
on any `PersiDict`, ignoring subsequent writes to the same key.
|
|
@@ -216,7 +220,13 @@ It also allows for random consistency checks to ensure subsequent
|
|
|
216
220
|
writes to the same key always match the original value.
|
|
217
221
|
* **`OverlappingMultiDict`**: An advanced container that holds
|
|
218
222
|
multiple `PersiDict` instances sharing the same storage
|
|
219
|
-
but with different `
|
|
223
|
+
but with different `serialization_format`s.
|
|
224
|
+
* **`LocalDict`**: An in-memory `PersiDict` backed by
|
|
225
|
+
a RAM-only hierarchical store.
|
|
226
|
+
* **`EmptyDict`**: A minimal implementation of `PersiDict` that behaves
|
|
227
|
+
like a null device in OS - accepts all writes but discards them,
|
|
228
|
+
returns nothing on reads. Always appears empty
|
|
229
|
+
regardless of operations performed on it.
|
|
220
230
|
|
|
221
231
|
### 5.5 Special "Joker" Values
|
|
222
232
|
|
|
@@ -241,7 +251,7 @@ from the dictionary when assigned to a key.
|
|
|
241
251
|
| `newest_values(max_n=None)` | `list[Any]` | Returns a list of values corresponding to the newest keys. |
|
|
242
252
|
| `get_subdict(prefix_key)` | `PersiDict` | Returns a new `PersiDict` instance that provides a view into a subset of keys sharing a common prefix. |
|
|
243
253
|
| `subdicts()` | `dict[str, PersiDict]` | Returns a dictionary mapping all first-level key prefixes to their corresponding sub-dictionary views. |
|
|
244
|
-
| `
|
|
254
|
+
| `discard(key)` | `bool` | Deletes a key-value pair if it exists and returns `True`; otherwise, returns `False`. |
|
|
245
255
|
| `get_params()` | `dict` | Returns a dictionary of the instance's configuration parameters, supporting the `parameterizable` API. |
|
|
246
256
|
|
|
247
257
|
## 7. Installation
|
|
@@ -278,14 +288,14 @@ pip install persidict[dev]
|
|
|
278
288
|
* [jsonpickle](https://jsonpickle.github.io)
|
|
279
289
|
* [joblib](https://joblib.readthedocs.io)
|
|
280
290
|
* [lz4](https://python-lz4.readthedocs.io)
|
|
281
|
-
* [pandas](https://pandas.pydata.org)
|
|
282
|
-
* [numpy](https://numpy.org)
|
|
283
291
|
* [deepdiff](https://zepworks.com/deepdiff)
|
|
284
292
|
|
|
285
293
|
For AWS S3 support (`S3Dict`), you will also need:
|
|
286
294
|
* [boto3](https://boto3.readthedocs.io)
|
|
287
295
|
|
|
288
296
|
For development and testing, the following packages are used:
|
|
297
|
+
* [pandas](https://pandas.pydata.org)
|
|
298
|
+
* [numpy](https://numpy.org)
|
|
289
299
|
* [pytest](https://pytest.org)
|
|
290
300
|
* [moto](http://getmoto.org)
|
|
291
301
|
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
persidict/__init__.py,sha256=8PvvqAjitbHDe9aLWglsHThQDEnc7lkdu5jKAuJUZr4,2427
|
|
2
|
+
persidict/basic_s3_dict.py,sha256=i_P2B_IvXlFXMhpvKFloK5cfbQrgpMFyk60B5ca_Ihc,23230
|
|
3
|
+
persidict/cached_appendonly_dict.py,sha256=T1rCf2PYR0wDCcMLyB8FJ6eqlBgkeFO6ChZb0E96yP8,9754
|
|
4
|
+
persidict/cached_mutable_dict.py,sha256=N-oInUol1y_VlZKyQ0TNPuwolcSbMLIyjtDoxo93A8Y,8979
|
|
5
|
+
persidict/empty_dict.py,sha256=B1RnYdbo4rmnYfqWfUJPbKh18biuClHT9kFD2DCIfWo,6033
|
|
6
|
+
persidict/file_dir_dict.py,sha256=TuqSCGLTPxK70sps4PVH_4WnfmY5RrkrqbBx13E92Mc,30782
|
|
7
|
+
persidict/local_dict.py,sha256=HOJU_InRLLag7tG6j2D3Pa2MoyRy23SjJyYOKyD6b2A,21683
|
|
8
|
+
persidict/overlapping_multi_dict.py,sha256=vh8SeZjhCN1pIZTcU0i4N6GlD9eZcSsdeZYEenk0xjc,6490
|
|
9
|
+
persidict/persi_dict.py,sha256=vUxu0mlveVLoLhppiPdZAJ_6Qfijw8QaTQsmXHKQSQc,29138
|
|
10
|
+
persidict/s3_dict_file_dir_cached.py,sha256=C6WoBO35UrPzNWRkFQcJ5fNrchtH_gwflJpa2aKPUsI,8628
|
|
11
|
+
persidict/s3_dict_legacy.py,sha256=OM01o4Zs9_dB08iXb0VA-xUkKLx8TEK_ij_2CWHpn0c,22601
|
|
12
|
+
persidict/safe_chars.py,sha256=gKYXA4RDuOVy_vhGXn8y0BFNHyuOvLsFyaJQXL013Go,2129
|
|
13
|
+
persidict/safe_str_tuple.py,sha256=BSRMNgLfZoAwS7ZkiVA3fV2sfj2i_NO1tjstVP-XGOU,8047
|
|
14
|
+
persidict/safe_str_tuple_signing.py,sha256=mpOfx_xyprc0_c60XPB_EihI3vR1gOn6T03iCx1HwwQ,7494
|
|
15
|
+
persidict/singletons.py,sha256=eNo0SYJLtBksrb1jnIXOabhUQgi2uemEPpji5vrrIyk,7230
|
|
16
|
+
persidict/write_once_dict.py,sha256=W3ABdX-Tt4SJnULUBWIattSP4bOnd_8tHT_Rdt_83_4,12691
|
|
17
|
+
persidict-0.103.0.dist-info/WHEEL,sha256=-neZj6nU9KAMg2CnCY6T3w8J53nx1kFGw_9HfoSzM60,79
|
|
18
|
+
persidict-0.103.0.dist-info/METADATA,sha256=3iLXoS735wH5tj-Xl_3M-ycTt7UDGr4SdRY7eKSkCf4,12944
|
|
19
|
+
persidict-0.103.0.dist-info/RECORD,,
|
persidict/.DS_Store
DELETED
|
Binary file
|