persidict 0.32.7__tar.gz → 0.34.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of persidict might be problematic. Click here for more details.
- {persidict-0.32.7 → persidict-0.34.1}/PKG-INFO +1 -2
- {persidict-0.32.7 → persidict-0.34.1}/pyproject.toml +1 -2
- {persidict-0.32.7 → persidict-0.34.1}/src/persidict/file_dir_dict.py +69 -68
- {persidict-0.32.7 → persidict-0.34.1}/src/persidict/s3_dict.py +106 -31
- {persidict-0.32.7 → persidict-0.34.1}/src/persidict/safe_chars.py +1 -2
- {persidict-0.32.7 → persidict-0.34.1}/src/persidict/safe_str_tuple.py +1 -1
- {persidict-0.32.7 → persidict-0.34.1}/src/persidict/write_once_dict.py +14 -11
- {persidict-0.32.7 → persidict-0.34.1}/README.md +0 -0
- {persidict-0.32.7 → persidict-0.34.1}/src/persidict/.DS_Store +0 -0
- {persidict-0.32.7 → persidict-0.34.1}/src/persidict/__init__.py +0 -0
- {persidict-0.32.7 → persidict-0.34.1}/src/persidict/jokers.py +0 -0
- {persidict-0.32.7 → persidict-0.34.1}/src/persidict/overlapping_multi_dict.py +0 -0
- {persidict-0.32.7 → persidict-0.34.1}/src/persidict/persi_dict.py +0 -0
- {persidict-0.32.7 → persidict-0.34.1}/src/persidict/safe_str_tuple_signing.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: persidict
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.34.1
|
|
4
4
|
Summary: Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud.
|
|
5
5
|
Keywords: persistence,dicts,distributed,parallel
|
|
6
6
|
Author: Vlad (Volodymyr) Pavlov
|
|
@@ -21,7 +21,6 @@ Requires-Dist: joblib
|
|
|
21
21
|
Requires-Dist: numpy
|
|
22
22
|
Requires-Dist: pandas
|
|
23
23
|
Requires-Dist: jsonpickle
|
|
24
|
-
Requires-Dist: joblib
|
|
25
24
|
Requires-Dist: deepdiff
|
|
26
25
|
Requires-Dist: boto3 ; extra == 'aws'
|
|
27
26
|
Requires-Dist: boto3 ; extra == 'dev'
|
|
@@ -4,7 +4,7 @@ build-backend = "uv_build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "persidict"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.34.1"
|
|
8
8
|
description = "Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -31,7 +31,6 @@ dependencies = [
|
|
|
31
31
|
"numpy",
|
|
32
32
|
"pandas",
|
|
33
33
|
"jsonpickle",
|
|
34
|
-
"joblib",
|
|
35
34
|
"deepdiff"
|
|
36
35
|
]
|
|
37
36
|
|
|
@@ -11,6 +11,7 @@ from __future__ import annotations
|
|
|
11
11
|
|
|
12
12
|
import os
|
|
13
13
|
import random
|
|
14
|
+
import tempfile
|
|
14
15
|
import time
|
|
15
16
|
from typing import Any, Optional
|
|
16
17
|
|
|
@@ -79,6 +80,10 @@ class FileDirDict(PersiDict):
|
|
|
79
80
|
|
|
80
81
|
assert file_type == replace_unsafe_chars(file_type, "")
|
|
81
82
|
self.file_type = file_type
|
|
83
|
+
if self.file_type == "__etag__":
|
|
84
|
+
raise ValueError(
|
|
85
|
+
"file_type cannot be 'etag' as it is a reserved"
|
|
86
|
+
" extension for S3 caching.")
|
|
82
87
|
|
|
83
88
|
if (base_class_for_values is None or
|
|
84
89
|
not issubclass(base_class_for_values,str)):
|
|
@@ -90,13 +95,7 @@ class FileDirDict(PersiDict):
|
|
|
90
95
|
if os.path.isfile(base_dir):
|
|
91
96
|
raise ValueError(f"{base_dir} is a file, not a directory.")
|
|
92
97
|
|
|
93
|
-
|
|
94
|
-
if not os.path.isdir(base_dir):
|
|
95
|
-
os.mkdir(base_dir)
|
|
96
|
-
except:
|
|
97
|
-
time.sleep(random.random()/random.randint(1, 3))
|
|
98
|
-
if not os.path.isdir(base_dir):
|
|
99
|
-
os.mkdir(base_dir)
|
|
98
|
+
os.makedirs(base_dir, exist_ok=True)
|
|
100
99
|
assert os.path.isdir(base_dir)
|
|
101
100
|
|
|
102
101
|
# self.base_dir_param = _base_dir
|
|
@@ -137,25 +136,16 @@ class FileDirDict(PersiDict):
|
|
|
137
136
|
|
|
138
137
|
|
|
139
138
|
def __len__(self) -> int:
|
|
140
|
-
""" Get the number of key-value pairs in the dictionary.
|
|
141
|
-
|
|
142
|
-
num_files = 0
|
|
143
|
-
suffix = "." + self.file_type
|
|
144
|
-
stack = [self._base_dir]
|
|
139
|
+
""" Get the number of key-value pairs in the dictionary.
|
|
145
140
|
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
for entry in it:
|
|
151
|
-
if entry.is_dir(follow_symlinks=False):
|
|
152
|
-
stack.append(entry.path)
|
|
153
|
-
elif entry.is_file(follow_symlinks=False) and entry.name.endswith(suffix):
|
|
154
|
-
num_files += 1
|
|
155
|
-
except PermissionError:
|
|
156
|
-
continue
|
|
141
|
+
WARNING: This operation can be slow on large dictionaries as it
|
|
142
|
+
needs to recursively walk the entire base directory.
|
|
143
|
+
Avoid using it in performance-sensitive code.
|
|
144
|
+
"""
|
|
157
145
|
|
|
158
|
-
|
|
146
|
+
suffix = "." + self.file_type
|
|
147
|
+
return sum(1 for _, _, files in os.walk(self._base_dir)
|
|
148
|
+
for f in files if f.endswith(suffix))
|
|
159
149
|
|
|
160
150
|
|
|
161
151
|
def clear(self) -> None:
|
|
@@ -164,6 +154,9 @@ class FileDirDict(PersiDict):
|
|
|
164
154
|
if self.immutable_items:
|
|
165
155
|
raise KeyError("Can't clear a dict that contains immutable items")
|
|
166
156
|
|
|
157
|
+
# we can't use shutil.rmtree() because
|
|
158
|
+
# there may be overlapping dictionaries
|
|
159
|
+
# with different file_type-s
|
|
167
160
|
for subdir_info in os.walk(self._base_dir, topdown=False):
|
|
168
161
|
(subdir_name, _, files) = subdir_info
|
|
169
162
|
suffix = "." + self.file_type
|
|
@@ -174,6 +167,7 @@ class FileDirDict(PersiDict):
|
|
|
174
167
|
len(os.listdir(subdir_name)) == 0 ):
|
|
175
168
|
os.rmdir(subdir_name)
|
|
176
169
|
|
|
170
|
+
|
|
177
171
|
def _build_full_path(self
|
|
178
172
|
, key:SafeStrTuple
|
|
179
173
|
, create_subdirs:bool=False
|
|
@@ -185,17 +179,8 @@ class FileDirDict(PersiDict):
|
|
|
185
179
|
dir_names = key[:-1] if is_file_path else key
|
|
186
180
|
|
|
187
181
|
if create_subdirs:
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
new_dir = os.path.join(current_dir, dir_name)
|
|
191
|
-
try: # extra protection to better handle concurrent access
|
|
192
|
-
if not os.path.isdir(new_dir):
|
|
193
|
-
os.mkdir(new_dir)
|
|
194
|
-
except:
|
|
195
|
-
time.sleep(random.random()/random.randint(1, 3))
|
|
196
|
-
if not os.path.isdir(new_dir):
|
|
197
|
-
os.mkdir(new_dir)
|
|
198
|
-
current_dir = new_dir
|
|
182
|
+
dir_path = os.path.join(*dir_names)
|
|
183
|
+
os.makedirs(dir_path, exist_ok=True)
|
|
199
184
|
|
|
200
185
|
if is_file_path:
|
|
201
186
|
file_name = key[-1] + "." + self.file_type
|
|
@@ -282,25 +267,50 @@ class FileDirDict(PersiDict):
|
|
|
282
267
|
for i in range(n_retries):
|
|
283
268
|
try:
|
|
284
269
|
return self._read_from_file_impl(file_name)
|
|
285
|
-
except:
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
270
|
+
except Exception as e:
|
|
271
|
+
if i < n_retries - 1:
|
|
272
|
+
time.sleep(random.uniform(0.01, 0.1) * (2 ** i))
|
|
273
|
+
else:
|
|
274
|
+
raise e
|
|
289
275
|
|
|
290
276
|
|
|
291
277
|
def _save_to_file_impl(self, file_name:str, value:Any) -> None:
|
|
292
278
|
"""Save a value to a file. """
|
|
293
279
|
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
280
|
+
dir_name = os.path.dirname(file_name)
|
|
281
|
+
# Use a temporary file and atomic rename to prevent data corruption
|
|
282
|
+
fd, temp_path = tempfile.mkstemp(dir=dir_name, prefix=".__tmp__")
|
|
283
|
+
|
|
284
|
+
try:
|
|
285
|
+
if self.file_type == "pkl":
|
|
286
|
+
with open(fd, 'wb') as f:
|
|
287
|
+
joblib.dump(value, f, compress='lz4')
|
|
288
|
+
f.flush()
|
|
289
|
+
os.fsync(f.fileno())
|
|
290
|
+
elif self.file_type == "json":
|
|
291
|
+
with open(fd, 'w') as f:
|
|
292
|
+
f.write(jsonpickle.dumps(value, indent=4))
|
|
293
|
+
f.flush()
|
|
294
|
+
os.fsync(f.fileno())
|
|
295
|
+
else:
|
|
296
|
+
with open(fd, 'w') as f:
|
|
297
|
+
f.write(value)
|
|
298
|
+
f.flush()
|
|
299
|
+
os.fsync(f.fileno())
|
|
300
|
+
os.replace(temp_path, file_name)
|
|
301
|
+
try:
|
|
302
|
+
if os.name == 'posix':
|
|
303
|
+
dir_fd = os.open(dir_name, os.O_RDONLY)
|
|
304
|
+
try:
|
|
305
|
+
os.fsync(dir_fd)
|
|
306
|
+
finally:
|
|
307
|
+
os.close(dir_fd)
|
|
308
|
+
except OSError:
|
|
309
|
+
pass
|
|
303
310
|
|
|
311
|
+
except:
|
|
312
|
+
os.remove(temp_path)
|
|
313
|
+
raise
|
|
304
314
|
|
|
305
315
|
def _save_to_file(self, file_name:str, value:Any) -> None:
|
|
306
316
|
"""Save a value to a file. """
|
|
@@ -310,16 +320,17 @@ class FileDirDict(PersiDict):
|
|
|
310
320
|
raise ValueError("When base_class_for_values is not str,"
|
|
311
321
|
+ " file_type must be pkl or json.")
|
|
312
322
|
|
|
313
|
-
n_retries =
|
|
323
|
+
n_retries = 8
|
|
314
324
|
# extra protections to better handle concurrent writes
|
|
315
325
|
for i in range(n_retries):
|
|
316
|
-
try:
|
|
326
|
+
try:
|
|
317
327
|
self._save_to_file_impl(file_name, value)
|
|
318
328
|
return
|
|
319
|
-
except:
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
329
|
+
except Exception as e:
|
|
330
|
+
if i < n_retries - 1:
|
|
331
|
+
time.sleep(random.uniform(0.01, 0.1) * (2 ** i))
|
|
332
|
+
else:
|
|
333
|
+
raise e
|
|
323
334
|
|
|
324
335
|
|
|
325
336
|
def __contains__(self, key:PersiDictKey) -> bool:
|
|
@@ -394,16 +405,9 @@ class FileDirDict(PersiDict):
|
|
|
394
405
|
|
|
395
406
|
def splitter(dir_path: str):
|
|
396
407
|
"""Transform a dirname into a PersiDictKey key"""
|
|
397
|
-
splitted_str = []
|
|
398
408
|
if dir_path == ".":
|
|
399
|
-
return
|
|
400
|
-
|
|
401
|
-
head, tail = os.path.split(dir_path)
|
|
402
|
-
splitted_str = [tail] + splitted_str
|
|
403
|
-
dir_path = head
|
|
404
|
-
if len(head) == 0:
|
|
405
|
-
break
|
|
406
|
-
return tuple(splitted_str)
|
|
409
|
+
return []
|
|
410
|
+
return dir_path.split(os.sep)
|
|
407
411
|
|
|
408
412
|
def step():
|
|
409
413
|
suffix = "." + self.file_type
|
|
@@ -424,7 +428,8 @@ class FileDirDict(PersiDict):
|
|
|
424
428
|
to_return.append(key_to_return)
|
|
425
429
|
|
|
426
430
|
if "values" in result_type:
|
|
427
|
-
|
|
431
|
+
full_path = os.path.join(dir_name, f)
|
|
432
|
+
value_to_return = self._read_from_file(full_path)
|
|
428
433
|
to_return.append(value_to_return)
|
|
429
434
|
|
|
430
435
|
if len(result_type) == 1:
|
|
@@ -451,7 +456,6 @@ class FileDirDict(PersiDict):
|
|
|
451
456
|
|
|
452
457
|
def random_key(self) -> PersiDictKey | None:
|
|
453
458
|
# canonicalise extension once
|
|
454
|
-
early_exit_cap = 10_000
|
|
455
459
|
ext = None
|
|
456
460
|
if self.file_type:
|
|
457
461
|
ext = self.file_type.lower()
|
|
@@ -479,9 +483,6 @@ class FileDirDict(PersiDict):
|
|
|
479
483
|
seen += 1
|
|
480
484
|
if random.random() < 1 / seen: # reservoir k=1
|
|
481
485
|
winner = ent.path
|
|
482
|
-
# early‑exit when cap reached
|
|
483
|
-
if early_exit_cap and seen >= early_exit_cap:
|
|
484
|
-
return self._build_key_from_full_path(os.path.abspath(winner))
|
|
485
486
|
except PermissionError:
|
|
486
487
|
continue
|
|
487
488
|
|
|
@@ -1,9 +1,12 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import os
|
|
4
|
+
import tempfile
|
|
4
5
|
from typing import Any, Optional
|
|
5
6
|
|
|
6
7
|
import boto3
|
|
8
|
+
from botocore.exceptions import ClientError
|
|
9
|
+
|
|
7
10
|
import parameterizable
|
|
8
11
|
from parameterizable.dict_sorter import sort_dict_by_keys
|
|
9
12
|
|
|
@@ -60,16 +63,19 @@ class S3Dict(PersiDict):
|
|
|
60
63
|
check types of values in the dictionary. If not specified,
|
|
61
64
|
no type checking will be performed and all types will be allowed.
|
|
62
65
|
|
|
63
|
-
file_type is extension, which will be used for all files in the dictionary.
|
|
66
|
+
file_type is an extension, which will be used for all files in the dictionary.
|
|
64
67
|
If file_type has one of two values: "lz4" or "json", it defines
|
|
65
68
|
which file format will be used by FileDirDict to store values.
|
|
66
69
|
For all other values of file_type, the file format will always be plain
|
|
67
|
-
text. "lz4" or "json" allow
|
|
70
|
+
text. "lz4" or "json" allow storing arbitrary Python objects,
|
|
68
71
|
while all other file_type-s only work with str objects.
|
|
69
72
|
"""
|
|
70
73
|
|
|
71
74
|
super().__init__(immutable_items = immutable_items, digest_len = 0)
|
|
72
75
|
self.file_type = file_type
|
|
76
|
+
if self.file_type == "__etag__":
|
|
77
|
+
raise ValueError(
|
|
78
|
+
"file_type cannot be 'etag' as it is a reserved extension for caching.")
|
|
73
79
|
|
|
74
80
|
self.local_cache = FileDirDict(
|
|
75
81
|
base_dir= base_dir
|
|
@@ -152,26 +158,89 @@ class S3Dict(PersiDict):
|
|
|
152
158
|
return False
|
|
153
159
|
|
|
154
160
|
|
|
161
|
+
def _write_etag_file(self, file_name: str, etag: str):
|
|
162
|
+
"""Atomically write the ETag to its cache file."""
|
|
163
|
+
if not etag:
|
|
164
|
+
return
|
|
165
|
+
etag_file_name = file_name + ".__etag__"
|
|
166
|
+
dir_name = os.path.dirname(etag_file_name)
|
|
167
|
+
# Write to a temporary file and then rename for atomicity
|
|
168
|
+
fd, temp_path = tempfile.mkstemp(dir=dir_name)
|
|
169
|
+
try:
|
|
170
|
+
with os.fdopen(fd, "w") as f:
|
|
171
|
+
f.write(etag)
|
|
172
|
+
f.flush()
|
|
173
|
+
os.fsync(f.fileno())
|
|
174
|
+
os.replace(temp_path, etag_file_name)
|
|
175
|
+
try:
|
|
176
|
+
if os.name == 'posix':
|
|
177
|
+
dir_fd = os.open(dir_name, os.O_RDONLY)
|
|
178
|
+
try:
|
|
179
|
+
os.fsync(dir_fd)
|
|
180
|
+
finally:
|
|
181
|
+
os.close(dir_fd)
|
|
182
|
+
except OSError:
|
|
183
|
+
pass
|
|
184
|
+
except:
|
|
185
|
+
os.remove(temp_path)
|
|
186
|
+
raise
|
|
187
|
+
|
|
188
|
+
|
|
155
189
|
def __getitem__(self, key:PersiDictKey) -> Any:
|
|
156
190
|
"""X.__getitem__(y) is an equivalent to X[y]. """
|
|
157
191
|
|
|
158
192
|
key = SafeStrTuple(key)
|
|
159
193
|
file_name = self.local_cache._build_full_path(key, create_subdirs=True)
|
|
160
194
|
|
|
161
|
-
if self.immutable_items:
|
|
195
|
+
if self.immutable_items and os.path.exists(file_name):
|
|
196
|
+
return self.local_cache._read_from_file(file_name)
|
|
197
|
+
|
|
198
|
+
obj_name = self._build_full_objectname(key)
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
try:
|
|
202
|
+
head = self.s3_client.head_object(
|
|
203
|
+
Bucket=self.bucket_name, Key=obj_name)
|
|
204
|
+
s3_etag = head.get("ETag")
|
|
205
|
+
except ClientError as e:
|
|
206
|
+
if e.response['Error']['Code'] == '404':
|
|
207
|
+
raise KeyError(f"Key {key} not found in S3 bucket {self.bucket_name}")
|
|
208
|
+
else:
|
|
209
|
+
# Re-raise other client errors (e.g., permissions, throttling)
|
|
210
|
+
raise
|
|
211
|
+
|
|
212
|
+
etag_file_name = file_name + ".__etag__"
|
|
213
|
+
if not self.immutable_items and os.path.exists(file_name) and os.path.exists(etag_file_name):
|
|
214
|
+
with open(etag_file_name, "r") as f:
|
|
215
|
+
cached_etag = f.read()
|
|
216
|
+
if cached_etag == s3_etag:
|
|
217
|
+
return self.local_cache._read_from_file(file_name)
|
|
218
|
+
|
|
219
|
+
dir_name = os.path.dirname(file_name)
|
|
220
|
+
fd, temp_path = tempfile.mkstemp(dir=dir_name, prefix=".__tmp__")
|
|
221
|
+
|
|
222
|
+
try:
|
|
223
|
+
with os.fdopen(fd, 'wb') as f:
|
|
224
|
+
self.s3_client.download_fileobj(self.bucket_name, obj_name, f)
|
|
225
|
+
f.flush()
|
|
226
|
+
os.fsync(f.fileno())
|
|
227
|
+
os.replace(temp_path, file_name)
|
|
162
228
|
try:
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
229
|
+
if os.name == 'posix':
|
|
230
|
+
dir_fd = os.open(dir_name, os.O_RDONLY)
|
|
231
|
+
try:
|
|
232
|
+
os.fsync(dir_fd)
|
|
233
|
+
finally:
|
|
234
|
+
os.close(dir_fd)
|
|
235
|
+
except OSError:
|
|
166
236
|
pass
|
|
237
|
+
except:
|
|
238
|
+
os.remove(temp_path) # Clean up temp file on failure
|
|
239
|
+
raise
|
|
167
240
|
|
|
168
|
-
|
|
169
|
-
self.s3_client.download_file(self.bucket_name, obj_name, file_name)
|
|
170
|
-
result = self.local_cache._read_from_file(file_name)
|
|
171
|
-
if not self.immutable_items:
|
|
172
|
-
os.remove(file_name)
|
|
241
|
+
self._write_etag_file(file_name, s3_etag)
|
|
173
242
|
|
|
174
|
-
return
|
|
243
|
+
return self.local_cache._read_from_file(file_name)
|
|
175
244
|
|
|
176
245
|
|
|
177
246
|
def __setitem__(self, key:PersiDictKey, value:Any):
|
|
@@ -196,28 +265,27 @@ class S3Dict(PersiDict):
|
|
|
196
265
|
+ f"but it is {type(value)} instead." )
|
|
197
266
|
|
|
198
267
|
key = SafeStrTuple(key)
|
|
199
|
-
file_name = self.local_cache._build_full_path(key, create_subdirs=True)
|
|
200
|
-
obj_name = self._build_full_objectname(key)
|
|
201
268
|
|
|
202
|
-
if self.immutable_items:
|
|
203
|
-
|
|
204
|
-
if os.path.exists(file_name):
|
|
205
|
-
key_is_present = True
|
|
206
|
-
else:
|
|
207
|
-
try:
|
|
208
|
-
self.s3_client.head_object(
|
|
209
|
-
Bucket=self.bucket_name, Key=obj_name)
|
|
210
|
-
key_is_present = True
|
|
211
|
-
except:
|
|
212
|
-
key_is_present = False
|
|
269
|
+
if self.immutable_items and key in self:
|
|
270
|
+
raise KeyError("Can't modify an immutable item")
|
|
213
271
|
|
|
214
|
-
|
|
215
|
-
|
|
272
|
+
file_name = self.local_cache._build_full_path(key, create_subdirs=True)
|
|
273
|
+
obj_name = self._build_full_objectname(key)
|
|
216
274
|
|
|
217
275
|
self.local_cache._save_to_file(file_name, value)
|
|
218
276
|
self.s3_client.upload_file(file_name, self.bucket_name, obj_name)
|
|
219
|
-
|
|
220
|
-
|
|
277
|
+
|
|
278
|
+
try:
|
|
279
|
+
head = self.s3_client.head_object(
|
|
280
|
+
Bucket=self.bucket_name, Key=obj_name)
|
|
281
|
+
s3_etag = head.get("ETag")
|
|
282
|
+
self._write_etag_file(file_name, s3_etag)
|
|
283
|
+
except ClientError:
|
|
284
|
+
# If we can't get ETag, we should remove any existing etag file
|
|
285
|
+
# to force a re-download on the next __getitem__ call.
|
|
286
|
+
etag_file_name = file_name + ".__etag__"
|
|
287
|
+
if os.path.exists(etag_file_name):
|
|
288
|
+
os.remove(etag_file_name)
|
|
221
289
|
|
|
222
290
|
|
|
223
291
|
def __delitem__(self, key:PersiDictKey):
|
|
@@ -231,10 +299,17 @@ class S3Dict(PersiDict):
|
|
|
231
299
|
file_name = self.local_cache._build_full_path(key)
|
|
232
300
|
if os.path.isfile(file_name):
|
|
233
301
|
os.remove(file_name)
|
|
234
|
-
|
|
302
|
+
etag_file_name = file_name + ".__etag__"
|
|
303
|
+
if os.path.isfile(etag_file_name):
|
|
304
|
+
os.remove(etag_file_name)
|
|
235
305
|
|
|
236
306
|
def __len__(self) -> int:
|
|
237
|
-
"""Return len(self).
|
|
307
|
+
"""Return len(self).
|
|
308
|
+
|
|
309
|
+
WARNING: This operation can be very slow and costly on large S3 buckets
|
|
310
|
+
as it needs to iterate over all objects in the dictionary's prefix.
|
|
311
|
+
Avoid using it in performance-sensitive code.
|
|
312
|
+
"""
|
|
238
313
|
|
|
239
314
|
num_files = 0
|
|
240
315
|
suffix = "." + self.file_type
|
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
import string
|
|
2
|
-
from copy import deepcopy
|
|
3
2
|
|
|
4
3
|
SAFE_CHARS_SET = set(string.ascii_letters + string.digits + "()_-~.=")
|
|
5
4
|
SAFE_STRING_MAX_LENGTH = 254
|
|
6
5
|
|
|
7
6
|
def get_safe_chars() -> set[str]:
|
|
8
7
|
"""Return a set of allowed characters."""
|
|
9
|
-
return
|
|
8
|
+
return SAFE_CHARS_SET.copy()
|
|
10
9
|
|
|
11
10
|
def replace_unsafe_chars(a_str:str, replace_with:str) -> str :
|
|
12
11
|
""" Replace unsafe (special) characters with allowed (safe) ones."""
|
|
@@ -43,7 +43,7 @@ class SafeStrTuple(Sequence, Hashable):
|
|
|
43
43
|
elif isinstance(a, str):
|
|
44
44
|
assert len(a) > 0
|
|
45
45
|
assert len(a) < SAFE_STRING_MAX_LENGTH
|
|
46
|
-
assert
|
|
46
|
+
assert all(c in SAFE_CHARS_SET for c in a)
|
|
47
47
|
candidate_strings.append(a)
|
|
48
48
|
elif _is_sequence_not_mapping(a):
|
|
49
49
|
if len(a) > 0:
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import time
|
|
4
|
+
from functools import cache
|
|
4
5
|
|
|
5
6
|
from deepdiff import DeepDiff
|
|
6
7
|
from parameterizable import register_parameterizable_class, sort_dict_by_keys
|
|
@@ -115,17 +116,19 @@ class WriteOnceDict(PersiDict):
|
|
|
115
116
|
"""
|
|
116
117
|
check_needed = False
|
|
117
118
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
119
|
+
n_retries = 8
|
|
120
|
+
for i in range(n_retries):
|
|
121
|
+
try: # extra protections to better handle concurrent writes
|
|
122
|
+
if key in self._wrapped_dict:
|
|
123
|
+
check_needed = True
|
|
124
|
+
else:
|
|
125
|
+
self._wrapped_dict[key] = value
|
|
126
|
+
break
|
|
127
|
+
except Exception as e:
|
|
128
|
+
if i < n_retries - 1:
|
|
129
|
+
time.sleep(random.uniform(0.01, 0.1) * (2 ** i))
|
|
130
|
+
else:
|
|
131
|
+
raise e
|
|
129
132
|
|
|
130
133
|
if not key in self._wrapped_dict:
|
|
131
134
|
raise KeyError(
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|