persidict 0.31.1__tar.gz → 0.32.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {persidict-0.31.1 → persidict-0.32.1}/PKG-INFO +5 -3
- {persidict-0.31.1 → persidict-0.32.1}/README.md +1 -1
- {persidict-0.31.1 → persidict-0.32.1}/pyproject.toml +5 -3
- {persidict-0.31.1 → persidict-0.32.1}/src/persidict/file_dir_dict.py +90 -7
- {persidict-0.31.1 → persidict-0.32.1}/src/persidict/persi_dict.py +25 -7
- persidict-0.31.1/src/persidict/.DS_Store +0 -0
- {persidict-0.31.1 → persidict-0.32.1}/src/persidict/__init__.py +0 -0
- {persidict-0.31.1 → persidict-0.32.1}/src/persidict/jokers.py +0 -0
- {persidict-0.31.1 → persidict-0.32.1}/src/persidict/overlapping_multi_dict.py +0 -0
- {persidict-0.31.1 → persidict-0.32.1}/src/persidict/s3_dict.py +0 -0
- {persidict-0.31.1 → persidict-0.32.1}/src/persidict/safe_chars.py +0 -0
- {persidict-0.31.1 → persidict-0.32.1}/src/persidict/safe_str_tuple.py +0 -0
- {persidict-0.31.1 → persidict-0.32.1}/src/persidict/safe_str_tuple_signing.py +0 -0
- {persidict-0.31.1 → persidict-0.32.1}/src/persidict/write_once_dict.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: persidict
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.32.1
|
|
4
4
|
Summary: Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud.
|
|
5
5
|
Keywords: persistence,dicts,distributed,parallel
|
|
6
6
|
Author: Vlad (Volodymyr) Pavlov
|
|
@@ -15,12 +15,14 @@ Classifier: License :: OSI Approved :: MIT License
|
|
|
15
15
|
Classifier: Operating System :: OS Independent
|
|
16
16
|
Classifier: Topic :: Software Development :: Libraries
|
|
17
17
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
18
|
+
Requires-Dist: parameterizable
|
|
18
19
|
Requires-Dist: lz4
|
|
19
20
|
Requires-Dist: joblib
|
|
20
21
|
Requires-Dist: numpy
|
|
21
22
|
Requires-Dist: pandas
|
|
22
23
|
Requires-Dist: jsonpickle
|
|
23
|
-
Requires-Dist:
|
|
24
|
+
Requires-Dist: joblib
|
|
25
|
+
Requires-Dist: deepdiff
|
|
24
26
|
Requires-Dist: boto3 ; extra == 'aws'
|
|
25
27
|
Requires-Dist: boto3 ; extra == 'dev'
|
|
26
28
|
Requires-Dist: moto ; extra == 'dev'
|
|
@@ -37,7 +39,7 @@ Simple persistent dictionaries for Python.
|
|
|
37
39
|
|
|
38
40
|
## What Is It?
|
|
39
41
|
|
|
40
|
-
`persidict` offers a simple persistent key-value store for Python.
|
|
42
|
+
`persidict` offers a very simple persistent key-value store for Python.
|
|
41
43
|
It saves the content of the dictionary in a folder on a disk
|
|
42
44
|
or in an S3 bucket on AWS. Each value is stored as a separate file / S3 object.
|
|
43
45
|
Only text strings or sequences of strings are allowed as keys.
|
|
@@ -4,7 +4,7 @@ Simple persistent dictionaries for Python.
|
|
|
4
4
|
|
|
5
5
|
## What Is It?
|
|
6
6
|
|
|
7
|
-
`persidict` offers a simple persistent key-value store for Python.
|
|
7
|
+
`persidict` offers a very simple persistent key-value store for Python.
|
|
8
8
|
It saves the content of the dictionary in a folder on a disk
|
|
9
9
|
or in an S3 bucket on AWS. Each value is stored as a separate file / S3 object.
|
|
10
10
|
Only text strings or sequences of strings are allowed as keys.
|
|
@@ -4,7 +4,7 @@ build-backend = "uv_build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "persidict"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.32.1"
|
|
8
8
|
description = "Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -25,12 +25,14 @@ classifiers = [
|
|
|
25
25
|
"Topic :: Software Development :: Libraries :: Python Modules"
|
|
26
26
|
]
|
|
27
27
|
dependencies = [
|
|
28
|
+
"parameterizable",
|
|
28
29
|
"lz4",
|
|
29
30
|
"joblib",
|
|
30
31
|
"numpy",
|
|
31
32
|
"pandas",
|
|
32
33
|
"jsonpickle",
|
|
33
|
-
"
|
|
34
|
+
"joblib",
|
|
35
|
+
"deepdiff"
|
|
34
36
|
]
|
|
35
37
|
|
|
36
38
|
[project.urls]
|
|
@@ -45,4 +47,4 @@ dev = [
|
|
|
45
47
|
|
|
46
48
|
aws = [
|
|
47
49
|
"boto3"
|
|
48
|
-
]
|
|
50
|
+
]
|
|
@@ -152,11 +152,20 @@ class FileDirDict(PersiDict):
|
|
|
152
152
|
|
|
153
153
|
num_files = 0
|
|
154
154
|
suffix = "." + self.file_type
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
155
|
+
stack = [self._base_dir]
|
|
156
|
+
|
|
157
|
+
while stack:
|
|
158
|
+
path = stack.pop()
|
|
159
|
+
try:
|
|
160
|
+
with os.scandir(path) as it:
|
|
161
|
+
for entry in it:
|
|
162
|
+
if entry.is_dir(follow_symlinks=False):
|
|
163
|
+
stack.append(entry.path)
|
|
164
|
+
elif entry.is_file(follow_symlinks=False) and entry.name.endswith(suffix):
|
|
165
|
+
num_files += 1
|
|
166
|
+
except PermissionError:
|
|
167
|
+
continue
|
|
168
|
+
|
|
160
169
|
return num_files
|
|
161
170
|
|
|
162
171
|
|
|
@@ -203,7 +212,39 @@ class FileDirDict(PersiDict):
|
|
|
203
212
|
file_name = key[-1] + "." + self.file_type
|
|
204
213
|
return os.path.join(*dir_names, file_name)
|
|
205
214
|
else:
|
|
206
|
-
return os.path.join(*dir_names)
|
|
215
|
+
return str(os.path.join(*dir_names))
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _build_key_from_full_path(self, full_path:str)->SafeStrTuple:
|
|
219
|
+
"""Convert a filesystem path back into a key."""
|
|
220
|
+
|
|
221
|
+
# Ensure we're working with absolute paths
|
|
222
|
+
full_path = os.path.abspath(full_path)
|
|
223
|
+
|
|
224
|
+
# Remove the base directory from the path
|
|
225
|
+
if not full_path.startswith(self._base_dir):
|
|
226
|
+
raise ValueError(f"Path {full_path} is not within base directory {self._base_dir}")
|
|
227
|
+
|
|
228
|
+
# Get the relative path
|
|
229
|
+
rel_path = full_path[len(self._base_dir):].lstrip(os.sep)
|
|
230
|
+
|
|
231
|
+
if not rel_path:
|
|
232
|
+
return SafeStrTuple()
|
|
233
|
+
|
|
234
|
+
# Split the path into components
|
|
235
|
+
path_components = rel_path.split(os.sep)
|
|
236
|
+
|
|
237
|
+
# If it's a file path, remove the file extension from the last component
|
|
238
|
+
if os.path.isfile(full_path) and path_components[-1].endswith("." + self.file_type):
|
|
239
|
+
path_components[-1] = path_components[-1][:-len("." + self.file_type)]
|
|
240
|
+
|
|
241
|
+
# Create a SafeStrTuple from the path components
|
|
242
|
+
key = SafeStrTuple(*path_components)
|
|
243
|
+
|
|
244
|
+
# Unsign the key
|
|
245
|
+
key = unsign_safe_str_tuple(key, self.digest_len)
|
|
246
|
+
|
|
247
|
+
return key
|
|
207
248
|
|
|
208
249
|
|
|
209
250
|
def get_subdict(self, key:PersiDictKey) -> FileDirDict:
|
|
@@ -404,4 +445,46 @@ class FileDirDict(PersiDict):
|
|
|
404
445
|
return os.path.getmtime(filename)
|
|
405
446
|
|
|
406
447
|
|
|
407
|
-
|
|
448
|
+
def random_key(self) -> PersiDictKey | None:
|
|
449
|
+
# canonicalise extension once
|
|
450
|
+
early_exit_cap = 10_000
|
|
451
|
+
ext = None
|
|
452
|
+
if self.file_type:
|
|
453
|
+
ext = self.file_type.lower()
|
|
454
|
+
if not ext.startswith("."):
|
|
455
|
+
ext = "." + ext
|
|
456
|
+
|
|
457
|
+
stack = [self._base_dir]
|
|
458
|
+
winner: Optional[str] = None
|
|
459
|
+
seen = 0
|
|
460
|
+
|
|
461
|
+
while stack:
|
|
462
|
+
path = stack.pop()
|
|
463
|
+
try:
|
|
464
|
+
with os.scandir(path) as it:
|
|
465
|
+
for ent in it:
|
|
466
|
+
if ent.is_dir(follow_symlinks=False):
|
|
467
|
+
stack.append(ent.path)
|
|
468
|
+
continue
|
|
469
|
+
|
|
470
|
+
# cheap name test before stat()
|
|
471
|
+
if ext and not ent.name.lower().endswith(ext):
|
|
472
|
+
continue
|
|
473
|
+
|
|
474
|
+
if ent.is_file(follow_symlinks=False):
|
|
475
|
+
seen += 1
|
|
476
|
+
if random.random() < 1 / seen: # reservoir k=1
|
|
477
|
+
winner = ent.path
|
|
478
|
+
# early‑exit when cap reached
|
|
479
|
+
if early_exit_cap and seen >= early_exit_cap:
|
|
480
|
+
return self._build_key_from_full_path(os.path.abspath(winner))
|
|
481
|
+
except PermissionError:
|
|
482
|
+
continue
|
|
483
|
+
|
|
484
|
+
if winner is None:
|
|
485
|
+
return None
|
|
486
|
+
else:
|
|
487
|
+
return self._build_key_from_full_path(os.path.abspath(winner))
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
parameterizable.register_parameterizable_class(FileDirDict)
|
|
@@ -302,15 +302,33 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
302
302
|
return result_subdicts
|
|
303
303
|
|
|
304
304
|
|
|
305
|
-
def
|
|
306
|
-
"""Return a
|
|
305
|
+
def random_key(self) -> PersiDictKey | None:
|
|
306
|
+
"""Return a random key from the dictionary.
|
|
307
|
+
|
|
308
|
+
Returns a single random key if the dictionary is not empty.
|
|
309
|
+
Returns None if the dictionary is empty.
|
|
307
310
|
|
|
308
311
|
This method is absent in the original Python dict API.
|
|
312
|
+
|
|
313
|
+
Implementation uses reservoir sampling to select a uniformly random key
|
|
314
|
+
in streaming time, without loading all keys into memory or using len().
|
|
309
315
|
"""
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
316
|
+
iterator = iter(self.keys())
|
|
317
|
+
try:
|
|
318
|
+
# Get the first key
|
|
319
|
+
result = next(iterator)
|
|
320
|
+
except StopIteration:
|
|
321
|
+
# Dictionary is empty
|
|
322
|
+
return None
|
|
323
|
+
|
|
324
|
+
# Reservoir sampling algorithm
|
|
325
|
+
i = 2
|
|
326
|
+
for key in iterator:
|
|
327
|
+
# Select current key with probability 1/i
|
|
328
|
+
if random.random() < 1/i:
|
|
329
|
+
result = key
|
|
330
|
+
i += 1
|
|
331
|
+
|
|
314
332
|
return result
|
|
315
333
|
|
|
316
334
|
|
|
@@ -372,4 +390,4 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
372
390
|
|
|
373
391
|
This method is absent in the original Python dict API.
|
|
374
392
|
"""
|
|
375
|
-
return [self[k] for k in self.newest_keys(max_n)]
|
|
393
|
+
return [self[k] for k in self.newest_keys(max_n)]
|
|
Binary file
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|