persidict 0.32.3__tar.gz → 0.32.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of persidict might be problematic. Click here for more details.
- {persidict-0.32.3 → persidict-0.32.8}/PKG-INFO +1 -1
- {persidict-0.32.3 → persidict-0.32.8}/pyproject.toml +1 -1
- persidict-0.32.8/src/persidict/.DS_Store +0 -0
- {persidict-0.32.3 → persidict-0.32.8}/src/persidict/file_dir_dict.py +31 -39
- {persidict-0.32.3 → persidict-0.32.8}/src/persidict/persi_dict.py +64 -37
- {persidict-0.32.3 → persidict-0.32.8}/src/persidict/s3_dict.py +24 -22
- {persidict-0.32.3 → persidict-0.32.8}/src/persidict/safe_chars.py +1 -2
- {persidict-0.32.3 → persidict-0.32.8}/src/persidict/write_once_dict.py +1 -0
- {persidict-0.32.3 → persidict-0.32.8}/README.md +0 -0
- {persidict-0.32.3 → persidict-0.32.8}/src/persidict/__init__.py +0 -0
- {persidict-0.32.3 → persidict-0.32.8}/src/persidict/jokers.py +0 -0
- {persidict-0.32.3 → persidict-0.32.8}/src/persidict/overlapping_multi_dict.py +0 -0
- {persidict-0.32.3 → persidict-0.32.8}/src/persidict/safe_str_tuple.py +0 -0
- {persidict-0.32.3 → persidict-0.32.8}/src/persidict/safe_str_tuple_signing.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: persidict
|
|
3
|
-
Version: 0.32.
|
|
3
|
+
Version: 0.32.8
|
|
4
4
|
Summary: Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud.
|
|
5
5
|
Keywords: persistence,dicts,distributed,parallel
|
|
6
6
|
Author: Vlad (Volodymyr) Pavlov
|
|
@@ -4,7 +4,7 @@ build-backend = "uv_build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "persidict"
|
|
7
|
-
version = "0.32.
|
|
7
|
+
version = "0.32.8"
|
|
8
8
|
description = "Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
Binary file
|
|
@@ -103,17 +103,6 @@ class FileDirDict(PersiDict):
|
|
|
103
103
|
self._base_dir = os.path.abspath(base_dir)
|
|
104
104
|
|
|
105
105
|
|
|
106
|
-
def __repr__(self):
|
|
107
|
-
"""Return repr(self)."""
|
|
108
|
-
|
|
109
|
-
repr_str = super().__repr__()
|
|
110
|
-
repr_str = repr_str[:-1] + f", _base_dir={self._base_dir}"
|
|
111
|
-
repr_str += f", file_type={self.file_type}"
|
|
112
|
-
repr_str += " )"
|
|
113
|
-
|
|
114
|
-
return repr_str
|
|
115
|
-
|
|
116
|
-
|
|
117
106
|
def get_params(self):
|
|
118
107
|
"""Return configuration parameters of the dictionary.
|
|
119
108
|
|
|
@@ -150,23 +139,9 @@ class FileDirDict(PersiDict):
|
|
|
150
139
|
def __len__(self) -> int:
|
|
151
140
|
""" Get the number of key-value pairs in the dictionary."""
|
|
152
141
|
|
|
153
|
-
num_files = 0
|
|
154
142
|
suffix = "." + self.file_type
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
while stack:
|
|
158
|
-
path = stack.pop()
|
|
159
|
-
try:
|
|
160
|
-
with os.scandir(path) as it:
|
|
161
|
-
for entry in it:
|
|
162
|
-
if entry.is_dir(follow_symlinks=False):
|
|
163
|
-
stack.append(entry.path)
|
|
164
|
-
elif entry.is_file(follow_symlinks=False) and entry.name.endswith(suffix):
|
|
165
|
-
num_files += 1
|
|
166
|
-
except PermissionError:
|
|
167
|
-
continue
|
|
168
|
-
|
|
169
|
-
return num_files
|
|
143
|
+
return sum(1 for _, _, files in os.walk(self._base_dir)
|
|
144
|
+
for f in files if f.endswith(suffix))
|
|
170
145
|
|
|
171
146
|
|
|
172
147
|
def clear(self) -> None:
|
|
@@ -185,6 +160,7 @@ class FileDirDict(PersiDict):
|
|
|
185
160
|
len(os.listdir(subdir_name)) == 0 ):
|
|
186
161
|
os.rmdir(subdir_name)
|
|
187
162
|
|
|
163
|
+
|
|
188
164
|
def _build_full_path(self
|
|
189
165
|
, key:SafeStrTuple
|
|
190
166
|
, create_subdirs:bool=False
|
|
@@ -393,24 +369,28 @@ class FileDirDict(PersiDict):
|
|
|
393
369
|
os.remove(filename)
|
|
394
370
|
|
|
395
371
|
|
|
396
|
-
def _generic_iter(self,
|
|
372
|
+
def _generic_iter(self, result_type: set[str]):
|
|
397
373
|
"""Underlying implementation for .items()/.keys()/.values() iterators"""
|
|
398
|
-
assert
|
|
374
|
+
assert isinstance(result_type, set)
|
|
375
|
+
assert 1 <= len(result_type) <= 3
|
|
376
|
+
assert len(result_type | {"keys", "values", "timestamps"}) == 3
|
|
377
|
+
assert 1 <= len(result_type & {"keys", "values", "timestamps"}) <= 3
|
|
378
|
+
|
|
399
379
|
walk_results = os.walk(self._base_dir)
|
|
400
380
|
ext_len = len(self.file_type) + 1
|
|
401
381
|
|
|
402
382
|
def splitter(dir_path: str):
|
|
403
383
|
"""Transform a dirname into a PersiDictKey key"""
|
|
404
|
-
|
|
384
|
+
splitted_str = []
|
|
405
385
|
if dir_path == ".":
|
|
406
|
-
return
|
|
386
|
+
return splitted_str
|
|
407
387
|
while True:
|
|
408
388
|
head, tail = os.path.split(dir_path)
|
|
409
|
-
|
|
389
|
+
splitted_str = [tail] + splitted_str
|
|
410
390
|
dir_path = head
|
|
411
391
|
if len(head) == 0:
|
|
412
392
|
break
|
|
413
|
-
return tuple(
|
|
393
|
+
return tuple(splitted_str)
|
|
414
394
|
|
|
415
395
|
def step():
|
|
416
396
|
suffix = "." + self.file_type
|
|
@@ -423,14 +403,26 @@ class FileDirDict(PersiDict):
|
|
|
423
403
|
result_key = (*splitter(prefix_key), f[:-ext_len])
|
|
424
404
|
result_key = SafeStrTuple(result_key)
|
|
425
405
|
|
|
426
|
-
|
|
427
|
-
|
|
406
|
+
to_return = []
|
|
407
|
+
|
|
408
|
+
if "keys" in result_type:
|
|
409
|
+
key_to_return= unsign_safe_str_tuple(
|
|
428
410
|
result_key, self.digest_len)
|
|
429
|
-
|
|
430
|
-
|
|
411
|
+
to_return.append(key_to_return)
|
|
412
|
+
|
|
413
|
+
if "values" in result_type:
|
|
414
|
+
full_path = os.path.join(dir_name, f)
|
|
415
|
+
value_to_return = self._read_from_file(full_path)
|
|
416
|
+
to_return.append(value_to_return)
|
|
417
|
+
|
|
418
|
+
if len(result_type) == 1:
|
|
419
|
+
yield to_return[0]
|
|
431
420
|
else:
|
|
432
|
-
|
|
433
|
-
|
|
421
|
+
if "timestamps" in result_type:
|
|
422
|
+
timestamp_to_return = os.path.getmtime(
|
|
423
|
+
os.path.join(dir_name, f))
|
|
424
|
+
to_return.append(timestamp_to_return)
|
|
425
|
+
yield tuple(to_return)
|
|
434
426
|
|
|
435
427
|
return step()
|
|
436
428
|
|
|
@@ -21,6 +21,7 @@ even after the Python process that created the dictionary has terminated.
|
|
|
21
21
|
from __future__ import annotations
|
|
22
22
|
|
|
23
23
|
from abc import abstractmethod
|
|
24
|
+
import heapq
|
|
24
25
|
import random
|
|
25
26
|
from parameterizable import ParameterizableClass, sort_dict_by_keys
|
|
26
27
|
from typing import Any, Sequence, Optional
|
|
@@ -87,7 +88,7 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
87
88
|
, immutable_items:bool = False
|
|
88
89
|
, digest_len:int = 8
|
|
89
90
|
, base_class_for_values:Optional[type] = None
|
|
90
|
-
, *args, **
|
|
91
|
+
, *args, **kwargs):
|
|
91
92
|
self.digest_len = int(digest_len)
|
|
92
93
|
if digest_len < 0:
|
|
93
94
|
raise ValueError("digest_len must be non-negative")
|
|
@@ -133,13 +134,9 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
133
134
|
|
|
134
135
|
def __repr__(self) -> str:
|
|
135
136
|
"""Return repr(self)"""
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
repr_str += f", digest_len={self.digest_len}"
|
|
140
|
-
repr_str += f", base_class_for_values={self.base_class_for_values}"
|
|
141
|
-
repr_str += ")"
|
|
142
|
-
return repr_str
|
|
137
|
+
params = self.get_params()
|
|
138
|
+
params_str = ', '.join(f'{k}={v!r}' for k, v in params.items())
|
|
139
|
+
return f'{self.__class__.__name__}({params_str})'
|
|
143
140
|
|
|
144
141
|
|
|
145
142
|
def __str__(self) -> str:
|
|
@@ -185,30 +182,48 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
185
182
|
|
|
186
183
|
|
|
187
184
|
@abstractmethod
|
|
188
|
-
def _generic_iter(self,
|
|
189
|
-
"""Underlying implementation for
|
|
190
|
-
assert
|
|
185
|
+
def _generic_iter(self, result_type: set[str]) -> Any:
|
|
186
|
+
"""Underlying implementation for items/keys/values/... iterators"""
|
|
187
|
+
assert isinstance(result_type, set)
|
|
188
|
+
assert 1 <= len(result_type) <= 3
|
|
189
|
+
assert len(result_type | {"keys", "values", "timestamps"}) == 3
|
|
190
|
+
assert 1 <= len(result_type & {"keys", "values", "timestamps"}) <= 3
|
|
191
191
|
raise NotImplementedError
|
|
192
192
|
|
|
193
193
|
|
|
194
194
|
def __iter__(self):
|
|
195
195
|
"""Implement iter(self)."""
|
|
196
|
-
return self._generic_iter("keys")
|
|
196
|
+
return self._generic_iter({"keys"})
|
|
197
197
|
|
|
198
198
|
|
|
199
199
|
def keys(self):
|
|
200
|
-
"""
|
|
201
|
-
return
|
|
200
|
+
"""iterator object that provides access to keys"""
|
|
201
|
+
return self._generic_iter({"keys"})
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def keys_and_timestamps(self):
|
|
205
|
+
"""iterator object that provides access to keys and timestamps"""
|
|
206
|
+
return self._generic_iter({"keys", "timestamps"})
|
|
202
207
|
|
|
203
208
|
|
|
204
209
|
def values(self):
|
|
205
210
|
"""D.values() -> iterator object that provides access to D's values"""
|
|
206
|
-
return self._generic_iter("values")
|
|
211
|
+
return self._generic_iter({"values"})
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def values_and_timestamps(self):
|
|
215
|
+
"""iterator object that provides access to values and timestamps"""
|
|
216
|
+
return self._generic_iter({"values", "timestamps"})
|
|
207
217
|
|
|
208
218
|
|
|
209
219
|
def items(self):
|
|
210
220
|
"""D.items() -> iterator object that provides access to D's items"""
|
|
211
|
-
return self._generic_iter("
|
|
221
|
+
return self._generic_iter({"keys", "values"})
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def items_and_timestamps(self):
|
|
225
|
+
"""iterator object that provides access to keys, values, and timestamps"""
|
|
226
|
+
return self._generic_iter({"keys", "values", "timestamps"})
|
|
212
227
|
|
|
213
228
|
|
|
214
229
|
def setdefault(self, key:PersiDictKey, default:Any=None) -> Any:
|
|
@@ -350,39 +365,51 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
350
365
|
|
|
351
366
|
This method is absent in the original Python dict API.
|
|
352
367
|
"""
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
368
|
+
if max_n is None:
|
|
369
|
+
# If we need all keys, sort them all by timestamp
|
|
370
|
+
key_timestamp_pairs = list(self.keys_and_timestamps())
|
|
371
|
+
key_timestamp_pairs.sort(key=lambda x: x[1])
|
|
372
|
+
return [key for key,_ in key_timestamp_pairs]
|
|
373
|
+
elif max_n <= 0:
|
|
374
|
+
return []
|
|
375
|
+
else:
|
|
376
|
+
# Use heapq.nsmallest for efficient partial sorting without loading all keys into memory
|
|
377
|
+
smallest_pairs = heapq.nsmallest(max_n
|
|
378
|
+
, self.keys_and_timestamps()
|
|
379
|
+
, key=lambda x: x[1])
|
|
380
|
+
return [key for key,_ in smallest_pairs]
|
|
360
381
|
|
|
361
382
|
|
|
362
|
-
def
|
|
363
|
-
"""Return max_n the
|
|
383
|
+
def oldest_values(self, max_n=None):
|
|
384
|
+
"""Return max_n the oldest values in the dictionary.
|
|
364
385
|
|
|
365
|
-
If max_n is None, return all
|
|
386
|
+
If max_n is None, return all values.
|
|
366
387
|
|
|
367
388
|
This method is absent in the original Python dict API.
|
|
368
389
|
"""
|
|
369
|
-
|
|
370
|
-
# all_keys.sort(key=lambda k: self.timestamp(k), reverse=True)
|
|
371
|
-
all_keys.sort(key=self.timestamp, reverse=True)
|
|
372
|
-
if max_n is None or max_n > len(all_keys):
|
|
373
|
-
max_n = len(all_keys)
|
|
374
|
-
result = all_keys[:max_n]
|
|
375
|
-
return result
|
|
390
|
+
return [self[k] for k in self.oldest_keys(max_n)]
|
|
376
391
|
|
|
377
392
|
|
|
378
|
-
def
|
|
379
|
-
"""Return max_n the
|
|
393
|
+
def newest_keys(self, max_n=None):
|
|
394
|
+
"""Return max_n the newest keys in the dictionary.
|
|
380
395
|
|
|
381
|
-
If max_n is None, return all
|
|
396
|
+
If max_n is None, return all keys.
|
|
382
397
|
|
|
383
398
|
This method is absent in the original Python dict API.
|
|
384
399
|
"""
|
|
385
|
-
|
|
400
|
+
if max_n is None:
|
|
401
|
+
# If we need all keys, sort them all by timestamp in reverse order
|
|
402
|
+
key_timestamp_pairs = list(self.keys_and_timestamps())
|
|
403
|
+
key_timestamp_pairs.sort(key=lambda x:x[1], reverse=True)
|
|
404
|
+
return [key for key,_ in key_timestamp_pairs]
|
|
405
|
+
elif max_n <= 0:
|
|
406
|
+
return []
|
|
407
|
+
else:
|
|
408
|
+
# Use heapq.nlargest for efficient partial sorting without loading all keys into memory
|
|
409
|
+
largest_pairs = heapq.nlargest(max_n
|
|
410
|
+
, self.keys_and_timestamps()
|
|
411
|
+
, key=lambda item: item[1])
|
|
412
|
+
return [key for key,_ in largest_pairs]
|
|
386
413
|
|
|
387
414
|
|
|
388
415
|
def newest_values(self, max_n=None):
|
|
@@ -96,20 +96,6 @@ class S3Dict(PersiDict):
|
|
|
96
96
|
self.root_prefix += "/"
|
|
97
97
|
|
|
98
98
|
|
|
99
|
-
def __repr__(self) -> str:
|
|
100
|
-
"""Return repr(self)."""
|
|
101
|
-
|
|
102
|
-
repr_str = super().__repr__()
|
|
103
|
-
repr_str = repr_str[:-1] + f", _base_dir={self.local_cache._base_dir}"
|
|
104
|
-
repr_str += f", file_type={self.file_type}"
|
|
105
|
-
repr_str += f", region={self.region}"
|
|
106
|
-
repr_str += f", bucket_name={self.bucket_name}"
|
|
107
|
-
repr_str += f", root_prefix={self.root_prefix}"
|
|
108
|
-
repr_str += " )"
|
|
109
|
-
|
|
110
|
-
return repr_str
|
|
111
|
-
|
|
112
|
-
|
|
113
99
|
def get_params(self):
|
|
114
100
|
"""Return configuration parameters of the object as a dictionary.
|
|
115
101
|
|
|
@@ -269,9 +255,14 @@ class S3Dict(PersiDict):
|
|
|
269
255
|
return num_files
|
|
270
256
|
|
|
271
257
|
|
|
272
|
-
def _generic_iter(self,
|
|
258
|
+
def _generic_iter(self, result_type: str):
|
|
273
259
|
"""Underlying implementation for .items()/.keys()/.values() iterators"""
|
|
274
|
-
|
|
260
|
+
|
|
261
|
+
assert isinstance(result_type, set)
|
|
262
|
+
assert 1 <= len(result_type) <= 3
|
|
263
|
+
assert len(result_type | {"keys", "values", "timestamps"}) == 3
|
|
264
|
+
assert 1 <= len(result_type & {"keys", "values", "timestamps"}) <= 3
|
|
265
|
+
|
|
275
266
|
suffix = "." + self.file_type
|
|
276
267
|
ext_len = len(self.file_type) + 1
|
|
277
268
|
prefix_len = len(self.root_prefix)
|
|
@@ -295,14 +286,25 @@ class S3Dict(PersiDict):
|
|
|
295
286
|
if not obj_name.endswith(suffix):
|
|
296
287
|
continue
|
|
297
288
|
obj_key = splitter(obj_name)
|
|
298
|
-
|
|
299
|
-
|
|
289
|
+
|
|
290
|
+
to_return = []
|
|
291
|
+
|
|
292
|
+
if "keys" in result_type:
|
|
293
|
+
key_to_return = unsign_safe_str_tuple(
|
|
300
294
|
obj_key, self.digest_len)
|
|
301
|
-
|
|
302
|
-
|
|
295
|
+
to_return.append(key_to_return)
|
|
296
|
+
|
|
297
|
+
if "values" in result_type:
|
|
298
|
+
value_to_return = self[obj_key]
|
|
299
|
+
to_return.append(value_to_return)
|
|
300
|
+
|
|
301
|
+
if len(result_type) == 1:
|
|
302
|
+
yield to_return[0]
|
|
303
303
|
else:
|
|
304
|
-
|
|
305
|
-
|
|
304
|
+
if "timestamps" in result_type:
|
|
305
|
+
timestamp_to_return = key["LastModified"].timestamp()
|
|
306
|
+
to_return.append(timestamp_to_return)
|
|
307
|
+
yield tuple(to_return)
|
|
306
308
|
|
|
307
309
|
return step()
|
|
308
310
|
|
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
import string
|
|
2
|
-
from copy import deepcopy
|
|
3
2
|
|
|
4
3
|
SAFE_CHARS_SET = set(string.ascii_letters + string.digits + "()_-~.=")
|
|
5
4
|
SAFE_STRING_MAX_LENGTH = 254
|
|
6
5
|
|
|
7
6
|
def get_safe_chars() -> set[str]:
|
|
8
7
|
"""Return a set of allowed characters."""
|
|
9
|
-
return
|
|
8
|
+
return SAFE_CHARS_SET.copy()
|
|
10
9
|
|
|
11
10
|
def replace_unsafe_chars(a_str:str, replace_with:str) -> str :
|
|
12
11
|
""" Replace unsafe (special) characters with allowed (safe) ones."""
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|