persidict 0.37.0__py3-none-any.whl → 0.37.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of persidict might be problematic. Click here for more details.
- persidict/file_dir_dict.py +201 -78
- persidict/persi_dict.py +27 -3
- persidict/s3_dict.py +7 -9
- persidict/safe_str_tuple.py +0 -2
- persidict/write_once_dict.py +0 -1
- {persidict-0.37.0.dist-info → persidict-0.37.2.dist-info}/METADATA +1 -1
- persidict-0.37.2.dist-info/RECORD +14 -0
- persidict-0.37.0.dist-info/RECORD +0 -14
- {persidict-0.37.0.dist-info → persidict-0.37.2.dist-info}/WHEEL +0 -0
persidict/file_dir_dict.py
CHANGED
|
@@ -23,13 +23,70 @@ import jsonpickle.ext.pandas as jsonpickle_pandas
|
|
|
23
23
|
import parameterizable
|
|
24
24
|
from parameterizable import sort_dict_by_keys
|
|
25
25
|
|
|
26
|
-
from .jokers import
|
|
26
|
+
from .jokers import Joker
|
|
27
27
|
from .safe_chars import replace_unsafe_chars
|
|
28
28
|
from .safe_str_tuple import SafeStrTuple
|
|
29
29
|
from .safe_str_tuple_signing import sign_safe_str_tuple, unsign_safe_str_tuple
|
|
30
|
-
from .persi_dict import PersiDict, PersiDictKey
|
|
30
|
+
from .persi_dict import PersiDict, PersiDictKey, non_empty_persidict_key
|
|
31
31
|
|
|
32
32
|
|
|
33
|
+
if os.name == 'nt':
|
|
34
|
+
import msvcrt
|
|
35
|
+
import ctypes
|
|
36
|
+
from ctypes import wintypes
|
|
37
|
+
|
|
38
|
+
GENERIC_READ = 0x80000000
|
|
39
|
+
FILE_SHARE_READ = 0x00000001
|
|
40
|
+
FILE_SHARE_WRITE = 0x00000002
|
|
41
|
+
FILE_SHARE_DELETE = 0x00000004
|
|
42
|
+
OPEN_EXISTING = 3
|
|
43
|
+
INVALID_HANDLE_VALUE = -1
|
|
44
|
+
|
|
45
|
+
CreateFileW = ctypes.windll.kernel32.CreateFileW
|
|
46
|
+
CreateFileW.argtypes = [wintypes.LPWSTR, wintypes.DWORD, wintypes.DWORD, wintypes.LPVOID, wintypes.DWORD, wintypes.DWORD, wintypes.HANDLE]
|
|
47
|
+
CreateFileW.restype = wintypes.HANDLE
|
|
48
|
+
|
|
49
|
+
CloseHandle = ctypes.windll.kernel32.CloseHandle
|
|
50
|
+
CloseHandle.argtypes = [wintypes.HANDLE]
|
|
51
|
+
CloseHandle.restype = wintypes.BOOL
|
|
52
|
+
|
|
53
|
+
def add_long_path_prefix(path: str) -> str:
|
|
54
|
+
"""Add the '\\\\?\\' prefix to a path on Windows to support long paths.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
path (str): The original file or directory path.
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
str: The modified path with the '\\\\?\\' prefix if on Windows
|
|
61
|
+
and not already present; otherwise, the original path.
|
|
62
|
+
"""
|
|
63
|
+
if not path.startswith('\\\\?\\'):
|
|
64
|
+
return f'\\\\?\\{path}'
|
|
65
|
+
else:
|
|
66
|
+
return path
|
|
67
|
+
|
|
68
|
+
def drop_long_path_prefix(path: str) -> str:
|
|
69
|
+
"""Remove the '\\\\?\\' prefix from a path on Windows if present.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
path (str): The file or directory path, possibly with the '\\\\?\\' prefix.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
str: The path without the '\\\\?\\' prefix if it was present; otherwise,
|
|
76
|
+
the original path.
|
|
77
|
+
"""
|
|
78
|
+
if path.startswith('\\\\?\\'):
|
|
79
|
+
return path[4:]
|
|
80
|
+
else:
|
|
81
|
+
return path
|
|
82
|
+
|
|
83
|
+
else:
|
|
84
|
+
def add_long_path_prefix(path: str) -> str:
|
|
85
|
+
return path
|
|
86
|
+
|
|
87
|
+
def drop_long_path_prefix(path: str) -> str:
|
|
88
|
+
return path
|
|
89
|
+
|
|
33
90
|
jsonpickle_numpy.register_handlers()
|
|
34
91
|
jsonpickle_pandas.register_handlers()
|
|
35
92
|
|
|
@@ -95,17 +152,16 @@ class FileDirDict(PersiDict):
|
|
|
95
152
|
raise ValueError("For non-string values file_type must be either 'pkl' or 'json'.")
|
|
96
153
|
|
|
97
154
|
base_dir = str(base_dir)
|
|
155
|
+
self._base_dir = os.path.abspath(base_dir)
|
|
156
|
+
self._base_dir = add_long_path_prefix(self._base_dir)
|
|
98
157
|
|
|
99
|
-
if os.path.isfile(
|
|
158
|
+
if os.path.isfile(self._base_dir):
|
|
100
159
|
raise ValueError(f"{base_dir} is a file, not a directory.")
|
|
101
160
|
|
|
102
|
-
os.makedirs(
|
|
103
|
-
if not os.path.isdir(
|
|
161
|
+
os.makedirs(self._base_dir, exist_ok=True)
|
|
162
|
+
if not os.path.isdir(self._base_dir):
|
|
104
163
|
raise RuntimeError(f"Failed to create or access directory: {base_dir}")
|
|
105
164
|
|
|
106
|
-
# self.base_dir_param = _base_dir
|
|
107
|
-
self._base_dir = os.path.abspath(base_dir)
|
|
108
|
-
|
|
109
165
|
|
|
110
166
|
def get_params(self):
|
|
111
167
|
"""Return configuration parameters of the dictionary.
|
|
@@ -135,8 +191,7 @@ class FileDirDict(PersiDict):
|
|
|
135
191
|
Returns:
|
|
136
192
|
str: URL of the underlying storage in the form "file://<abs_path>".
|
|
137
193
|
"""
|
|
138
|
-
return pathlib.Path(self._base_dir).as_uri()
|
|
139
|
-
|
|
194
|
+
return pathlib.Path(self._base_dir).resolve().as_uri()
|
|
140
195
|
|
|
141
196
|
|
|
142
197
|
@property
|
|
@@ -148,7 +203,7 @@ class FileDirDict(PersiDict):
|
|
|
148
203
|
Returns:
|
|
149
204
|
str: Absolute path to the base directory used by this dictionary.
|
|
150
205
|
"""
|
|
151
|
-
return self._base_dir
|
|
206
|
+
return drop_long_path_prefix(self._base_dir)
|
|
152
207
|
|
|
153
208
|
|
|
154
209
|
def __len__(self) -> int:
|
|
@@ -221,69 +276,77 @@ class FileDirDict(PersiDict):
|
|
|
221
276
|
the key prefix.
|
|
222
277
|
|
|
223
278
|
Returns:
|
|
224
|
-
str: An absolute path within base_dir corresponding to the key.
|
|
279
|
+
str: An absolute path within base_dir corresponding to the key. On
|
|
280
|
+
Windows, this path is prefixed with '\\\\?\\' to support paths
|
|
281
|
+
longer than 260 characters.
|
|
225
282
|
"""
|
|
226
283
|
|
|
227
284
|
key = sign_safe_str_tuple(key, self.digest_len)
|
|
228
|
-
|
|
229
|
-
dir_names =
|
|
285
|
+
key_components = [self._base_dir] + list(key.strings)
|
|
286
|
+
dir_names = key_components[:-1] if is_file_path else key_components
|
|
287
|
+
|
|
288
|
+
dir_path = str(os.path.join(*dir_names))
|
|
230
289
|
|
|
231
290
|
if create_subdirs:
|
|
232
|
-
|
|
233
|
-
|
|
291
|
+
path_for_makedirs = dir_path
|
|
292
|
+
path_for_makedirs = add_long_path_prefix(path_for_makedirs)
|
|
293
|
+
os.makedirs(path_for_makedirs, exist_ok=True)
|
|
234
294
|
|
|
235
295
|
if is_file_path:
|
|
236
|
-
file_name =
|
|
237
|
-
|
|
296
|
+
file_name = key_components[-1] + "." + self.file_type
|
|
297
|
+
final_path = os.path.join(dir_path, file_name)
|
|
238
298
|
else:
|
|
239
|
-
|
|
299
|
+
final_path = dir_path
|
|
240
300
|
|
|
301
|
+
return add_long_path_prefix(final_path)
|
|
241
302
|
|
|
242
|
-
def _build_key_from_full_path(self, full_path:str)->SafeStrTuple:
|
|
243
|
-
"""Convert an absolute filesystem path back into a SafeStrTuple key.
|
|
244
303
|
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
components according to digest_len.
|
|
304
|
+
def _build_key_from_full_path(self, full_path:str)->SafeStrTuple:
|
|
305
|
+
"""Convert an absolute filesystem path back into a SafeStrTuple key.
|
|
248
306
|
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
307
|
+
This function reverses _build_full_path, stripping base_dir, removing the
|
|
308
|
+
file_type extension if the path points to a file, and unsigning the key
|
|
309
|
+
components according to digest_len.
|
|
252
310
|
|
|
253
|
-
|
|
254
|
-
|
|
311
|
+
Args:
|
|
312
|
+
full_path (str): Absolute path within the dictionary's base
|
|
313
|
+
directory.
|
|
255
314
|
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
"""
|
|
315
|
+
Returns:
|
|
316
|
+
SafeStrTuple: The reconstructed (unsigned) key.
|
|
259
317
|
|
|
260
|
-
|
|
261
|
-
|
|
318
|
+
Raises:
|
|
319
|
+
ValueError: If full_path is not located under base_dir.
|
|
320
|
+
"""
|
|
262
321
|
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
322
|
+
# Remove the base directory from the path
|
|
323
|
+
if not full_path.startswith(self._base_dir):
|
|
324
|
+
raise ValueError(f"Path {full_path} is not within base directory {self._base_dir}")
|
|
266
325
|
|
|
267
|
-
|
|
268
|
-
|
|
326
|
+
# Get the relative path
|
|
327
|
+
rel_path = os.path.relpath(
|
|
328
|
+
drop_long_path_prefix(full_path),
|
|
329
|
+
drop_long_path_prefix(self._base_dir))
|
|
330
|
+
rel_path = os.path.normpath(rel_path)
|
|
269
331
|
|
|
270
|
-
|
|
271
|
-
|
|
332
|
+
if not rel_path or rel_path == ".":
|
|
333
|
+
return SafeStrTuple()
|
|
272
334
|
|
|
273
|
-
|
|
274
|
-
|
|
335
|
+
# Split the path into components
|
|
336
|
+
path_components = rel_path.split(os.sep)
|
|
275
337
|
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
338
|
+
# If it's a file path, remove the file extension from the last component
|
|
339
|
+
suffix = "." + self.file_type
|
|
340
|
+
if path_components[-1].endswith(suffix):
|
|
341
|
+
path_components[-1] = path_components[-1][:-len(suffix)]
|
|
279
342
|
|
|
280
|
-
|
|
281
|
-
|
|
343
|
+
# Create a SafeStrTuple from the path components
|
|
344
|
+
key = SafeStrTuple(*path_components)
|
|
282
345
|
|
|
283
|
-
|
|
284
|
-
|
|
346
|
+
# Unsign the key
|
|
347
|
+
key = unsign_safe_str_tuple(key, self.digest_len)
|
|
285
348
|
|
|
286
|
-
|
|
349
|
+
return key
|
|
287
350
|
|
|
288
351
|
|
|
289
352
|
def get_subdict(self, key:PersiDictKey) -> FileDirDict:
|
|
@@ -302,7 +365,9 @@ class FileDirDict(PersiDict):
|
|
|
302
365
|
"""
|
|
303
366
|
key = SafeStrTuple(key)
|
|
304
367
|
full_dir_path = self._build_full_path(
|
|
305
|
-
key,
|
|
368
|
+
key,
|
|
369
|
+
create_subdirs = True,
|
|
370
|
+
is_file_path = False)
|
|
306
371
|
return FileDirDict(
|
|
307
372
|
base_dir= full_dir_path
|
|
308
373
|
, file_type=self.file_type
|
|
@@ -320,17 +385,50 @@ class FileDirDict(PersiDict):
|
|
|
320
385
|
Returns:
|
|
321
386
|
Any: The deserialized value according to file_type.
|
|
322
387
|
"""
|
|
388
|
+
file_open_mode = 'rb' if self.file_type == "pkl" else 'r'
|
|
389
|
+
if os.name == 'nt':
|
|
390
|
+
handle = CreateFileW(file_name, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_DELETE | FILE_SHARE_WRITE, None, OPEN_EXISTING, 0, None)
|
|
391
|
+
if int(handle) == INVALID_HANDLE_VALUE:
|
|
392
|
+
error_code = ctypes.GetLastError()
|
|
393
|
+
raise ctypes.WinError(error_code)
|
|
394
|
+
|
|
395
|
+
fd = None
|
|
396
|
+
try:
|
|
397
|
+
if self.file_type == "pkl":
|
|
398
|
+
fd_open_mode = os.O_RDONLY | os.O_BINARY
|
|
399
|
+
else:
|
|
400
|
+
fd_open_mode = os.O_RDONLY
|
|
401
|
+
fd = msvcrt.open_osfhandle(int(handle),fd_open_mode)
|
|
402
|
+
except Exception:
|
|
403
|
+
CloseHandle(handle)
|
|
404
|
+
raise
|
|
323
405
|
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
406
|
+
try:
|
|
407
|
+
f = os.fdopen(fd, file_open_mode)
|
|
408
|
+
fd = None
|
|
409
|
+
except Exception:
|
|
410
|
+
if fd is not None:
|
|
411
|
+
os.close(fd)
|
|
412
|
+
raise
|
|
413
|
+
|
|
414
|
+
with f:
|
|
415
|
+
if self.file_type == "pkl":
|
|
416
|
+
result = joblib.load(f)
|
|
417
|
+
elif self.file_type == "json":
|
|
418
|
+
result = jsonpickle.loads(f.read())
|
|
419
|
+
else:
|
|
420
|
+
result = f.read()
|
|
421
|
+
|
|
422
|
+
return result
|
|
330
423
|
else:
|
|
331
|
-
with open(file_name,
|
|
332
|
-
|
|
333
|
-
|
|
424
|
+
with open(file_name, file_open_mode) as f:
|
|
425
|
+
if self.file_type == "pkl":
|
|
426
|
+
result = joblib.load(f)
|
|
427
|
+
elif self.file_type == "json":
|
|
428
|
+
result = jsonpickle.loads(f.read())
|
|
429
|
+
else:
|
|
430
|
+
result = f.read()
|
|
431
|
+
return result
|
|
334
432
|
|
|
335
433
|
|
|
336
434
|
def _read_from_file(self,file_name:str) -> Any:
|
|
@@ -356,14 +454,14 @@ class FileDirDict(PersiDict):
|
|
|
356
454
|
raise ValueError("When base_class_for_values is not str,"
|
|
357
455
|
+ " file_type must be pkl or json.")
|
|
358
456
|
|
|
359
|
-
n_retries =
|
|
457
|
+
n_retries = 12
|
|
360
458
|
# extra protections to better handle concurrent writes
|
|
361
459
|
for i in range(n_retries):
|
|
362
460
|
try:
|
|
363
461
|
return self._read_from_file_impl(file_name)
|
|
364
462
|
except Exception as e:
|
|
365
463
|
if i < n_retries - 1:
|
|
366
|
-
time.sleep(random.uniform(0.01, 0.
|
|
464
|
+
time.sleep(random.uniform(0.01, 0.2) * (1.5 ** i))
|
|
367
465
|
else:
|
|
368
466
|
raise e
|
|
369
467
|
|
|
@@ -407,12 +505,36 @@ class FileDirDict(PersiDict):
|
|
|
407
505
|
os.fsync(dir_fd)
|
|
408
506
|
finally:
|
|
409
507
|
os.close(dir_fd)
|
|
508
|
+
elif os.name == 'nt':
|
|
509
|
+
# On Windows, try to flush directory metadata
|
|
510
|
+
# This is less reliable than on POSIX systems
|
|
511
|
+
try:
|
|
512
|
+
handle = CreateFileW(
|
|
513
|
+
dir_name,
|
|
514
|
+
GENERIC_READ,
|
|
515
|
+
FILE_SHARE_READ | FILE_SHARE_WRITE,
|
|
516
|
+
None,
|
|
517
|
+
OPEN_EXISTING,
|
|
518
|
+
0x02000000, # FILE_FLAG_BACKUP_SEMANTICS (needed for directories)
|
|
519
|
+
None
|
|
520
|
+
)
|
|
521
|
+
if int(handle) != INVALID_HANDLE_VALUE:
|
|
522
|
+
try:
|
|
523
|
+
kernel32 = ctypes.windll.kernel32
|
|
524
|
+
kernel32.FlushFileBuffers(handle)
|
|
525
|
+
finally:
|
|
526
|
+
CloseHandle(handle)
|
|
527
|
+
except:
|
|
528
|
+
pass
|
|
529
|
+
|
|
410
530
|
except OSError:
|
|
411
531
|
pass
|
|
412
532
|
|
|
413
533
|
except:
|
|
414
|
-
|
|
415
|
-
|
|
534
|
+
try:
|
|
535
|
+
os.remove(temp_path)
|
|
536
|
+
finally:
|
|
537
|
+
raise
|
|
416
538
|
|
|
417
539
|
def _save_to_file(self, file_name:str, value:Any) -> None:
|
|
418
540
|
"""Save a value to a file with retry/backoff.
|
|
@@ -435,7 +557,7 @@ class FileDirDict(PersiDict):
|
|
|
435
557
|
raise ValueError("When base_class_for_values is not str,"
|
|
436
558
|
+ " file_type must be pkl or json.")
|
|
437
559
|
|
|
438
|
-
n_retries =
|
|
560
|
+
n_retries = 12
|
|
439
561
|
# extra protections to better handle concurrent writes
|
|
440
562
|
for i in range(n_retries):
|
|
441
563
|
try:
|
|
@@ -443,7 +565,7 @@ class FileDirDict(PersiDict):
|
|
|
443
565
|
return
|
|
444
566
|
except Exception as e:
|
|
445
567
|
if i < n_retries - 1:
|
|
446
|
-
time.sleep(random.uniform(0.01, 0.
|
|
568
|
+
time.sleep(random.uniform(0.01, 0.2) * (1.5 ** i))
|
|
447
569
|
else:
|
|
448
570
|
raise e
|
|
449
571
|
|
|
@@ -457,7 +579,7 @@ class FileDirDict(PersiDict):
|
|
|
457
579
|
Returns:
|
|
458
580
|
bool: True if a file for the key exists; False otherwise.
|
|
459
581
|
"""
|
|
460
|
-
key =
|
|
582
|
+
key = non_empty_persidict_key(key)
|
|
461
583
|
filename = self._build_full_path(key)
|
|
462
584
|
return os.path.isfile(filename)
|
|
463
585
|
|
|
@@ -479,7 +601,7 @@ class FileDirDict(PersiDict):
|
|
|
479
601
|
TypeError: If the deserialized value does not match base_class_for_values
|
|
480
602
|
when it is set.
|
|
481
603
|
"""
|
|
482
|
-
key =
|
|
604
|
+
key = non_empty_persidict_key(key)
|
|
483
605
|
filename = self._build_full_path(key)
|
|
484
606
|
if not os.path.isfile(filename):
|
|
485
607
|
raise KeyError(f"File {filename} does not exist")
|
|
@@ -510,7 +632,7 @@ class FileDirDict(PersiDict):
|
|
|
510
632
|
base_class_for_values when it is set.
|
|
511
633
|
"""
|
|
512
634
|
|
|
513
|
-
key =
|
|
635
|
+
key = non_empty_persidict_key(key)
|
|
514
636
|
PersiDict.__setitem__(self, key, value)
|
|
515
637
|
if isinstance(value, Joker):
|
|
516
638
|
# processed by base class
|
|
@@ -529,8 +651,7 @@ class FileDirDict(PersiDict):
|
|
|
529
651
|
Raises:
|
|
530
652
|
KeyError: If immutable_items is True or if the key does not exist.
|
|
531
653
|
"""
|
|
532
|
-
key =
|
|
533
|
-
PersiDict.__delitem__(self, key)
|
|
654
|
+
key = non_empty_persidict_key(key)
|
|
534
655
|
filename = self._build_full_path(key)
|
|
535
656
|
if not os.path.isfile(filename):
|
|
536
657
|
raise KeyError(f"File {filename} does not exist")
|
|
@@ -584,7 +705,8 @@ class FileDirDict(PersiDict):
|
|
|
584
705
|
for f in files:
|
|
585
706
|
if f.endswith(suffix):
|
|
586
707
|
prefix_key = os.path.relpath(
|
|
587
|
-
dir_name,
|
|
708
|
+
drop_long_path_prefix(dir_name),
|
|
709
|
+
start=drop_long_path_prefix(self._base_dir))
|
|
588
710
|
|
|
589
711
|
result_key = (*splitter(prefix_key), f[:-ext_len])
|
|
590
712
|
result_key = SafeStrTuple(result_key)
|
|
@@ -627,7 +749,7 @@ class FileDirDict(PersiDict):
|
|
|
627
749
|
Raises:
|
|
628
750
|
FileNotFoundError: If the key does not exist.
|
|
629
751
|
"""
|
|
630
|
-
key =
|
|
752
|
+
key = non_empty_persidict_key(key)
|
|
631
753
|
filename = self._build_full_path(key)
|
|
632
754
|
return os.path.getmtime(filename)
|
|
633
755
|
|
|
@@ -645,7 +767,7 @@ class FileDirDict(PersiDict):
|
|
|
645
767
|
# canonicalise extension once
|
|
646
768
|
ext = None
|
|
647
769
|
if self.file_type:
|
|
648
|
-
ext = self.file_type
|
|
770
|
+
ext = self.file_type
|
|
649
771
|
if not ext.startswith("."):
|
|
650
772
|
ext = "." + ext
|
|
651
773
|
|
|
@@ -663,7 +785,7 @@ class FileDirDict(PersiDict):
|
|
|
663
785
|
continue
|
|
664
786
|
|
|
665
787
|
# cheap name test before stat()
|
|
666
|
-
if ext and not ent.name.
|
|
788
|
+
if ext and not ent.name.endswith(ext):
|
|
667
789
|
continue
|
|
668
790
|
|
|
669
791
|
if ent.is_file(follow_symlinks=False):
|
|
@@ -676,7 +798,8 @@ class FileDirDict(PersiDict):
|
|
|
676
798
|
if winner is None:
|
|
677
799
|
return None
|
|
678
800
|
else:
|
|
679
|
-
|
|
680
|
-
|
|
801
|
+
winner = os.path.abspath(winner)
|
|
802
|
+
winner = add_long_path_prefix(winner)
|
|
803
|
+
return self._build_key_from_full_path(winner)
|
|
681
804
|
|
|
682
805
|
parameterizable.register_parameterizable_class(FileDirDict)
|
persidict/persi_dict.py
CHANGED
|
@@ -36,6 +36,24 @@ If a string (or a sequence of strings) is passed to a PersiDict as a key,
|
|
|
36
36
|
it will be automatically converted into SafeStrTuple.
|
|
37
37
|
"""
|
|
38
38
|
|
|
39
|
+
|
|
40
|
+
def non_empty_persidict_key(*args) -> SafeStrTuple:
|
|
41
|
+
"""Create a non-empty SafeStrTuple from the given arguments.
|
|
42
|
+
This is a convenience function that ensures the resulting SafeStrTuple is
|
|
43
|
+
not empty, raising a KeyError if it is.
|
|
44
|
+
Args:
|
|
45
|
+
*args: Arguments to pass to SafeStrTuple constructor.
|
|
46
|
+
Returns:
|
|
47
|
+
SafeStrTuple: A non-empty SafeStrTuple instance.
|
|
48
|
+
Raises:
|
|
49
|
+
KeyError: If the resulting SafeStrTuple is empty.
|
|
50
|
+
"""
|
|
51
|
+
result = SafeStrTuple(*args)
|
|
52
|
+
if len(result) == 0:
|
|
53
|
+
raise KeyError("Key cannot be empty")
|
|
54
|
+
return result
|
|
55
|
+
|
|
56
|
+
|
|
39
57
|
class PersiDict(MutableMapping, ParameterizableClass):
|
|
40
58
|
"""Abstract dict-like interface for durable key-value stores.
|
|
41
59
|
|
|
@@ -209,8 +227,12 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
209
227
|
elif self.immutable_items:
|
|
210
228
|
if key in self:
|
|
211
229
|
raise KeyError("Can't modify an immutable key-value pair")
|
|
212
|
-
|
|
230
|
+
|
|
231
|
+
key = non_empty_persidict_key(key)
|
|
232
|
+
|
|
233
|
+
if value is DELETE_CURRENT:
|
|
213
234
|
self.delete_if_exists(key)
|
|
235
|
+
return
|
|
214
236
|
|
|
215
237
|
if self.base_class_for_values is not None:
|
|
216
238
|
if not isinstance(value, self.base_class_for_values):
|
|
@@ -237,7 +259,9 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
237
259
|
if type(self) is PersiDict:
|
|
238
260
|
raise NotImplementedError("PersiDict is an abstract base class"
|
|
239
261
|
" and cannot delete items directly")
|
|
240
|
-
|
|
262
|
+
|
|
263
|
+
key = non_empty_persidict_key(key)
|
|
264
|
+
|
|
241
265
|
if key not in self:
|
|
242
266
|
raise KeyError(f"Key {key} not found")
|
|
243
267
|
|
|
@@ -457,7 +481,7 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
457
481
|
if self.immutable_items:
|
|
458
482
|
raise KeyError("Can't delete an immutable key-value pair")
|
|
459
483
|
|
|
460
|
-
key =
|
|
484
|
+
key = non_empty_persidict_key(key)
|
|
461
485
|
|
|
462
486
|
if key in self:
|
|
463
487
|
try:
|
persidict/s3_dict.py
CHANGED
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import os
|
|
4
|
-
import tempfile
|
|
5
3
|
from typing import Any, Optional
|
|
6
4
|
|
|
7
5
|
import boto3
|
|
@@ -16,7 +14,7 @@ from .safe_str_tuple import SafeStrTuple
|
|
|
16
14
|
from .safe_str_tuple_signing import sign_safe_str_tuple, unsign_safe_str_tuple
|
|
17
15
|
from .persi_dict import PersiDict
|
|
18
16
|
from .jokers import KEEP_CURRENT, DELETE_CURRENT, Joker
|
|
19
|
-
from .file_dir_dict import FileDirDict, PersiDictKey
|
|
17
|
+
from .file_dir_dict import FileDirDict, PersiDictKey, non_empty_persidict_key
|
|
20
18
|
from .overlapping_multi_dict import OverlappingMultiDict
|
|
21
19
|
|
|
22
20
|
S3DICT_DEFAULT_BASE_DIR = "__s3_dict__"
|
|
@@ -199,7 +197,7 @@ class S3Dict(PersiDict):
|
|
|
199
197
|
str: The complete S3 object key including root_prefix and file_type
|
|
200
198
|
extension, with digest-based collision prevention applied if enabled.
|
|
201
199
|
"""
|
|
202
|
-
key =
|
|
200
|
+
key = non_empty_persidict_key(key)
|
|
203
201
|
key = sign_safe_str_tuple(key, self.digest_len)
|
|
204
202
|
objectname = self.root_prefix + "/".join(key)+ "." + self.file_type
|
|
205
203
|
return objectname
|
|
@@ -218,7 +216,7 @@ class S3Dict(PersiDict):
|
|
|
218
216
|
bool: True if the key exists in S3 (or local cache for immutable
|
|
219
217
|
items), False otherwise.
|
|
220
218
|
"""
|
|
221
|
-
key =
|
|
219
|
+
key = non_empty_persidict_key(key)
|
|
222
220
|
if self.immutable_items and key in self.main_cache:
|
|
223
221
|
return True
|
|
224
222
|
try:
|
|
@@ -251,7 +249,7 @@ class S3Dict(PersiDict):
|
|
|
251
249
|
KeyError: If the key does not exist in S3.
|
|
252
250
|
"""
|
|
253
251
|
|
|
254
|
-
key =
|
|
252
|
+
key = non_empty_persidict_key(key)
|
|
255
253
|
|
|
256
254
|
if self.immutable_items and key in self.main_cache:
|
|
257
255
|
return self.main_cache[key]
|
|
@@ -317,7 +315,7 @@ class S3Dict(PersiDict):
|
|
|
317
315
|
the required base_class_for_values when specified.
|
|
318
316
|
"""
|
|
319
317
|
|
|
320
|
-
key =
|
|
318
|
+
key = non_empty_persidict_key(key)
|
|
321
319
|
PersiDict.__setitem__(self, key, value)
|
|
322
320
|
if isinstance(value, Joker):
|
|
323
321
|
# Joker values (KEEP_CURRENT, DELETE_CURRENT) are handled by base class
|
|
@@ -351,7 +349,7 @@ class S3Dict(PersiDict):
|
|
|
351
349
|
Raises:
|
|
352
350
|
KeyError: If immutable_items is True, or if the key does not exist.
|
|
353
351
|
"""
|
|
354
|
-
key =
|
|
352
|
+
key = non_empty_persidict_key(key)
|
|
355
353
|
PersiDict.__delitem__(self, key)
|
|
356
354
|
obj_name = self._build_full_objectname(key)
|
|
357
355
|
self.s3_client.delete_object(Bucket = self.bucket_name, Key = obj_name)
|
|
@@ -537,7 +535,7 @@ class S3Dict(PersiDict):
|
|
|
537
535
|
Raises:
|
|
538
536
|
KeyError: If the key does not exist in S3.
|
|
539
537
|
"""
|
|
540
|
-
key =
|
|
538
|
+
key = non_empty_persidict_key(key)
|
|
541
539
|
obj_name = self._build_full_objectname(key)
|
|
542
540
|
response = self.s3_client.head_object(Bucket=self.bucket_name, Key=obj_name)
|
|
543
541
|
return response["LastModified"].timestamp()
|
persidict/safe_str_tuple.py
CHANGED
|
@@ -98,8 +98,6 @@ class SafeStrTuple(Sequence, Hashable):
|
|
|
98
98
|
candidate_strings.extend(SafeStrTuple(*a).strings)
|
|
99
99
|
else:
|
|
100
100
|
raise TypeError(f"Invalid argument type: {type(a)}")
|
|
101
|
-
if len(candidate_strings) == 0:
|
|
102
|
-
raise ValueError("At least one non-empty valid string is required")
|
|
103
101
|
self.strings = tuple(candidate_strings)
|
|
104
102
|
|
|
105
103
|
@property
|
persidict/write_once_dict.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: persidict
|
|
3
|
-
Version: 0.37.
|
|
3
|
+
Version: 0.37.2
|
|
4
4
|
Summary: Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud.
|
|
5
5
|
Keywords: persistence,dicts,distributed,parallel
|
|
6
6
|
Author: Vlad (Volodymyr) Pavlov
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
persidict/.DS_Store,sha256=1lFlJ5EFymdzGAUAaI30vcaaLHt3F1LwpG7xILf9jsM,6148
|
|
2
|
+
persidict/__init__.py,sha256=CDOSJGgCnyRTkGUTzaeg3Cqsxwx0-0EFieOtldXwAls,1380
|
|
3
|
+
persidict/file_dir_dict.py,sha256=Wll343YPkYGyg3mLaOLwBakYLBPNieqKdWpxORIIwwA,29548
|
|
4
|
+
persidict/jokers.py,sha256=gTu7g2l2MIgBc3-hjvUrcwcgWs6tcbLyxB0u57M3bfU,3012
|
|
5
|
+
persidict/overlapping_multi_dict.py,sha256=UFyPEG2GbMmMHY48UmcaLHpsaxMqRH3bc_UA8S90yJo,5947
|
|
6
|
+
persidict/persi_dict.py,sha256=CKVHy8YELLRVgLWgo0Akbd8RznCVxqt8JHszIjqA_sI,23176
|
|
7
|
+
persidict/s3_dict.py,sha256=44jJPinE0bNHiCw2apFRzNZ_4IxIWOirIsLJqrObnuI,21428
|
|
8
|
+
persidict/safe_chars.py,sha256=H-cL9waCmDtwaRR5Y4b4oTzcBx09nc8wn8u61SVZDY0,1728
|
|
9
|
+
persidict/safe_str_tuple.py,sha256=YBTcYjUKIffznOawXb9xKjz4HaKdklrgyVtegJFmr5w,7202
|
|
10
|
+
persidict/safe_str_tuple_signing.py,sha256=mpOfx_xyprc0_c60XPB_EihI3vR1gOn6T03iCx1HwwQ,7494
|
|
11
|
+
persidict/write_once_dict.py,sha256=Fthmpltm2yK3FmpbNGV7KQVLhsroQenxwavG8Z95Jts,11649
|
|
12
|
+
persidict-0.37.2.dist-info/WHEEL,sha256=Pi5uDq5Fdo_Rr-HD5h9BiPn9Et29Y9Sh8NhcJNnFU1c,79
|
|
13
|
+
persidict-0.37.2.dist-info/METADATA,sha256=nJavGC2ccuT1wyD4egOVyAfYb2K6zbrAwvCJKfq33Cw,12387
|
|
14
|
+
persidict-0.37.2.dist-info/RECORD,,
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
persidict/.DS_Store,sha256=1lFlJ5EFymdzGAUAaI30vcaaLHt3F1LwpG7xILf9jsM,6148
|
|
2
|
-
persidict/__init__.py,sha256=CDOSJGgCnyRTkGUTzaeg3Cqsxwx0-0EFieOtldXwAls,1380
|
|
3
|
-
persidict/file_dir_dict.py,sha256=_ZGEQXmU5Sg4-PJOO4bYKhL0z6yYryVmce9lpML5OxQ,24766
|
|
4
|
-
persidict/jokers.py,sha256=gTu7g2l2MIgBc3-hjvUrcwcgWs6tcbLyxB0u57M3bfU,3012
|
|
5
|
-
persidict/overlapping_multi_dict.py,sha256=UFyPEG2GbMmMHY48UmcaLHpsaxMqRH3bc_UA8S90yJo,5947
|
|
6
|
-
persidict/persi_dict.py,sha256=q0Xvq5PO5Lmx3Nwe-fbU3Klgyx39T8PMKcXYR7xduzg,22506
|
|
7
|
-
persidict/s3_dict.py,sha256=dYUTvGNqxIk3PpArn9uYbSv-4zzlRiPPYinYpTcJzSc,21363
|
|
8
|
-
persidict/safe_chars.py,sha256=H-cL9waCmDtwaRR5Y4b4oTzcBx09nc8wn8u61SVZDY0,1728
|
|
9
|
-
persidict/safe_str_tuple.py,sha256=oibohVs0xah3mSVl5aN0pQWiQeaz4jjWtEdoBSn-jac,7322
|
|
10
|
-
persidict/safe_str_tuple_signing.py,sha256=mpOfx_xyprc0_c60XPB_EihI3vR1gOn6T03iCx1HwwQ,7494
|
|
11
|
-
persidict/write_once_dict.py,sha256=nv5vx9uh6VZ5Qh3HJcBgUHLnDX9KY843FbHndcy-63E,11677
|
|
12
|
-
persidict-0.37.0.dist-info/WHEEL,sha256=Pi5uDq5Fdo_Rr-HD5h9BiPn9Et29Y9Sh8NhcJNnFU1c,79
|
|
13
|
-
persidict-0.37.0.dist-info/METADATA,sha256=vCPprij19SxfnU6qWI9MNz78n6iT9bTUpMZsUT901mY,12387
|
|
14
|
-
persidict-0.37.0.dist-info/RECORD,,
|
|
File without changes
|