persidict 0.31.0__tar.gz → 0.32.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of persidict might be problematic. Click here for more details.
- {persidict-0.31.0 → persidict-0.32.0}/PKG-INFO +4 -3
- {persidict-0.31.0 → persidict-0.32.0}/README.md +1 -1
- {persidict-0.31.0 → persidict-0.32.0}/pyproject.toml +4 -3
- {persidict-0.31.0 → persidict-0.32.0}/src/persidict/file_dir_dict.py +92 -8
- {persidict-0.31.0 → persidict-0.32.0}/src/persidict/persi_dict.py +27 -9
- {persidict-0.31.0 → persidict-0.32.0}/src/persidict/s3_dict.py +2 -1
- {persidict-0.31.0 → persidict-0.32.0}/src/persidict/write_once_dict.py +3 -2
- persidict-0.31.0/src/persidict/.DS_Store +0 -0
- {persidict-0.31.0 → persidict-0.32.0}/src/persidict/__init__.py +0 -0
- {persidict-0.31.0 → persidict-0.32.0}/src/persidict/jokers.py +0 -0
- {persidict-0.31.0 → persidict-0.32.0}/src/persidict/overlapping_multi_dict.py +0 -0
- {persidict-0.31.0 → persidict-0.32.0}/src/persidict/safe_chars.py +0 -0
- {persidict-0.31.0 → persidict-0.32.0}/src/persidict/safe_str_tuple.py +0 -0
- {persidict-0.31.0 → persidict-0.32.0}/src/persidict/safe_str_tuple_signing.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: persidict
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.32.0
|
|
4
4
|
Summary: Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud.
|
|
5
5
|
Keywords: persistence,dicts,distributed,parallel
|
|
6
6
|
Author: Vlad (Volodymyr) Pavlov
|
|
@@ -20,7 +20,8 @@ Requires-Dist: joblib
|
|
|
20
20
|
Requires-Dist: numpy
|
|
21
21
|
Requires-Dist: pandas
|
|
22
22
|
Requires-Dist: jsonpickle
|
|
23
|
-
Requires-Dist:
|
|
23
|
+
Requires-Dist: joblib
|
|
24
|
+
Requires-Dist: deepdiff
|
|
24
25
|
Requires-Dist: boto3 ; extra == 'aws'
|
|
25
26
|
Requires-Dist: boto3 ; extra == 'dev'
|
|
26
27
|
Requires-Dist: moto ; extra == 'dev'
|
|
@@ -37,7 +38,7 @@ Simple persistent dictionaries for Python.
|
|
|
37
38
|
|
|
38
39
|
## What Is It?
|
|
39
40
|
|
|
40
|
-
`persidict` offers a simple persistent key-value store for Python.
|
|
41
|
+
`persidict` offers a very simple persistent key-value store for Python.
|
|
41
42
|
It saves the content of the dictionary in a folder on a disk
|
|
42
43
|
or in an S3 bucket on AWS. Each value is stored as a separate file / S3 object.
|
|
43
44
|
Only text strings or sequences of strings are allowed as keys.
|
|
@@ -4,7 +4,7 @@ Simple persistent dictionaries for Python.
|
|
|
4
4
|
|
|
5
5
|
## What Is It?
|
|
6
6
|
|
|
7
|
-
`persidict` offers a simple persistent key-value store for Python.
|
|
7
|
+
`persidict` offers a very simple persistent key-value store for Python.
|
|
8
8
|
It saves the content of the dictionary in a folder on a disk
|
|
9
9
|
or in an S3 bucket on AWS. Each value is stored as a separate file / S3 object.
|
|
10
10
|
Only text strings or sequences of strings are allowed as keys.
|
|
@@ -4,7 +4,7 @@ build-backend = "uv_build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "persidict"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.32.0"
|
|
8
8
|
description = "Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -30,7 +30,8 @@ dependencies = [
|
|
|
30
30
|
"numpy",
|
|
31
31
|
"pandas",
|
|
32
32
|
"jsonpickle",
|
|
33
|
-
"
|
|
33
|
+
"joblib",
|
|
34
|
+
"deepdiff"
|
|
34
35
|
]
|
|
35
36
|
|
|
36
37
|
[project.urls]
|
|
@@ -45,4 +46,4 @@ dev = [
|
|
|
45
46
|
|
|
46
47
|
aws = [
|
|
47
48
|
"boto3"
|
|
48
|
-
]
|
|
49
|
+
]
|
|
@@ -19,6 +19,7 @@ import jsonpickle
|
|
|
19
19
|
import jsonpickle.ext.numpy as jsonpickle_numpy
|
|
20
20
|
import jsonpickle.ext.pandas as jsonpickle_pandas
|
|
21
21
|
import parameterizable
|
|
22
|
+
from parameterizable import sort_dict_by_keys
|
|
22
23
|
|
|
23
24
|
from .jokers import KEEP_CURRENT, DELETE_CURRENT, Joker
|
|
24
25
|
from .safe_chars import replace_unsafe_chars
|
|
@@ -124,7 +125,7 @@ class FileDirDict(PersiDict):
|
|
|
124
125
|
base_dir=self.base_dir
|
|
125
126
|
, file_type=self.file_type)
|
|
126
127
|
params.update(additional_params)
|
|
127
|
-
sorted_params =
|
|
128
|
+
sorted_params = sort_dict_by_keys(params)
|
|
128
129
|
return sorted_params
|
|
129
130
|
|
|
130
131
|
|
|
@@ -151,11 +152,20 @@ class FileDirDict(PersiDict):
|
|
|
151
152
|
|
|
152
153
|
num_files = 0
|
|
153
154
|
suffix = "." + self.file_type
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
155
|
+
stack = [self._base_dir]
|
|
156
|
+
|
|
157
|
+
while stack:
|
|
158
|
+
path = stack.pop()
|
|
159
|
+
try:
|
|
160
|
+
with os.scandir(path) as it:
|
|
161
|
+
for entry in it:
|
|
162
|
+
if entry.is_dir(follow_symlinks=False):
|
|
163
|
+
stack.append(entry.path)
|
|
164
|
+
elif entry.is_file(follow_symlinks=False) and entry.name.endswith(suffix):
|
|
165
|
+
num_files += 1
|
|
166
|
+
except PermissionError:
|
|
167
|
+
continue
|
|
168
|
+
|
|
159
169
|
return num_files
|
|
160
170
|
|
|
161
171
|
|
|
@@ -202,7 +212,39 @@ class FileDirDict(PersiDict):
|
|
|
202
212
|
file_name = key[-1] + "." + self.file_type
|
|
203
213
|
return os.path.join(*dir_names, file_name)
|
|
204
214
|
else:
|
|
205
|
-
return os.path.join(*dir_names)
|
|
215
|
+
return str(os.path.join(*dir_names))
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _build_key_from_full_path(self, full_path:str)->SafeStrTuple:
|
|
219
|
+
"""Convert a filesystem path back into a key."""
|
|
220
|
+
|
|
221
|
+
# Ensure we're working with absolute paths
|
|
222
|
+
full_path = os.path.abspath(full_path)
|
|
223
|
+
|
|
224
|
+
# Remove the base directory from the path
|
|
225
|
+
if not full_path.startswith(self._base_dir):
|
|
226
|
+
raise ValueError(f"Path {full_path} is not within base directory {self._base_dir}")
|
|
227
|
+
|
|
228
|
+
# Get the relative path
|
|
229
|
+
rel_path = full_path[len(self._base_dir):].lstrip(os.sep)
|
|
230
|
+
|
|
231
|
+
if not rel_path:
|
|
232
|
+
return SafeStrTuple()
|
|
233
|
+
|
|
234
|
+
# Split the path into components
|
|
235
|
+
path_components = rel_path.split(os.sep)
|
|
236
|
+
|
|
237
|
+
# If it's a file path, remove the file extension from the last component
|
|
238
|
+
if os.path.isfile(full_path) and path_components[-1].endswith("." + self.file_type):
|
|
239
|
+
path_components[-1] = path_components[-1][:-len("." + self.file_type)]
|
|
240
|
+
|
|
241
|
+
# Create a SafeStrTuple from the path components
|
|
242
|
+
key = SafeStrTuple(*path_components)
|
|
243
|
+
|
|
244
|
+
# Unsign the key
|
|
245
|
+
key = unsign_safe_str_tuple(key, self.digest_len)
|
|
246
|
+
|
|
247
|
+
return key
|
|
206
248
|
|
|
207
249
|
|
|
208
250
|
def get_subdict(self, key:PersiDictKey) -> FileDirDict:
|
|
@@ -403,4 +445,46 @@ class FileDirDict(PersiDict):
|
|
|
403
445
|
return os.path.getmtime(filename)
|
|
404
446
|
|
|
405
447
|
|
|
406
|
-
|
|
448
|
+
def random_key(self) -> PersiDictKey | None:
|
|
449
|
+
# canonicalise extension once
|
|
450
|
+
early_exit_cap = 10_000
|
|
451
|
+
ext = None
|
|
452
|
+
if self.file_type:
|
|
453
|
+
ext = self.file_type.lower()
|
|
454
|
+
if not ext.startswith("."):
|
|
455
|
+
ext = "." + ext
|
|
456
|
+
|
|
457
|
+
stack = [self._base_dir]
|
|
458
|
+
winner: Optional[str] = None
|
|
459
|
+
seen = 0
|
|
460
|
+
|
|
461
|
+
while stack:
|
|
462
|
+
path = stack.pop()
|
|
463
|
+
try:
|
|
464
|
+
with os.scandir(path) as it:
|
|
465
|
+
for ent in it:
|
|
466
|
+
if ent.is_dir(follow_symlinks=False):
|
|
467
|
+
stack.append(ent.path)
|
|
468
|
+
continue
|
|
469
|
+
|
|
470
|
+
# cheap name test before stat()
|
|
471
|
+
if ext and not ent.name.lower().endswith(ext):
|
|
472
|
+
continue
|
|
473
|
+
|
|
474
|
+
if ent.is_file(follow_symlinks=False):
|
|
475
|
+
seen += 1
|
|
476
|
+
if random.random() < 1 / seen: # reservoir k=1
|
|
477
|
+
winner = ent.path
|
|
478
|
+
# early‑exit when cap reached
|
|
479
|
+
if early_exit_cap and seen >= early_exit_cap:
|
|
480
|
+
return self._build_key_from_full_path(os.path.abspath(winner))
|
|
481
|
+
except PermissionError:
|
|
482
|
+
continue
|
|
483
|
+
|
|
484
|
+
if winner is None:
|
|
485
|
+
return None
|
|
486
|
+
else:
|
|
487
|
+
return self._build_key_from_full_path(os.path.abspath(winner))
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
parameterizable.register_parameterizable_class(FileDirDict)
|
|
@@ -22,7 +22,7 @@ from __future__ import annotations
|
|
|
22
22
|
|
|
23
23
|
from abc import abstractmethod
|
|
24
24
|
import random
|
|
25
|
-
from parameterizable import ParameterizableClass
|
|
25
|
+
from parameterizable import ParameterizableClass, sort_dict_by_keys
|
|
26
26
|
from typing import Any, Sequence, Optional
|
|
27
27
|
from collections.abc import MutableMapping
|
|
28
28
|
|
|
@@ -107,7 +107,7 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
107
107
|
, digest_len=self.digest_len
|
|
108
108
|
, base_class_for_values=self.base_class_for_values
|
|
109
109
|
)
|
|
110
|
-
sorted_params =
|
|
110
|
+
sorted_params = sort_dict_by_keys(params)
|
|
111
111
|
return sorted_params
|
|
112
112
|
|
|
113
113
|
|
|
@@ -302,15 +302,33 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
302
302
|
return result_subdicts
|
|
303
303
|
|
|
304
304
|
|
|
305
|
-
def
|
|
306
|
-
"""Return a
|
|
305
|
+
def random_key(self) -> PersiDictKey | None:
|
|
306
|
+
"""Return a random key from the dictionary.
|
|
307
|
+
|
|
308
|
+
Returns a single random key if the dictionary is not empty.
|
|
309
|
+
Returns None if the dictionary is empty.
|
|
307
310
|
|
|
308
311
|
This method is absent in the original Python dict API.
|
|
312
|
+
|
|
313
|
+
Implementation uses reservoir sampling to select a uniformly random key
|
|
314
|
+
in streaming time, without loading all keys into memory or using len().
|
|
309
315
|
"""
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
316
|
+
iterator = iter(self.keys())
|
|
317
|
+
try:
|
|
318
|
+
# Get the first key
|
|
319
|
+
result = next(iterator)
|
|
320
|
+
except StopIteration:
|
|
321
|
+
# Dictionary is empty
|
|
322
|
+
return None
|
|
323
|
+
|
|
324
|
+
# Reservoir sampling algorithm
|
|
325
|
+
i = 2
|
|
326
|
+
for key in iterator:
|
|
327
|
+
# Select current key with probability 1/i
|
|
328
|
+
if random.random() < 1/i:
|
|
329
|
+
result = key
|
|
330
|
+
i += 1
|
|
331
|
+
|
|
314
332
|
return result
|
|
315
333
|
|
|
316
334
|
|
|
@@ -372,4 +390,4 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
372
390
|
|
|
373
391
|
This method is absent in the original Python dict API.
|
|
374
392
|
"""
|
|
375
|
-
return [self[k] for k in self.newest_keys(max_n)]
|
|
393
|
+
return [self[k] for k in self.newest_keys(max_n)]
|
|
@@ -5,6 +5,7 @@ from typing import Any, Optional
|
|
|
5
5
|
|
|
6
6
|
import boto3
|
|
7
7
|
import parameterizable
|
|
8
|
+
from parameterizable.dict_sorter import sort_dict_by_keys
|
|
8
9
|
|
|
9
10
|
from .safe_str_tuple import SafeStrTuple
|
|
10
11
|
from .safe_str_tuple_signing import sign_safe_str_tuple, unsign_safe_str_tuple
|
|
@@ -119,7 +120,7 @@ class S3Dict(PersiDict):
|
|
|
119
120
|
params["region"] = self.region
|
|
120
121
|
params["bucket_name"] = self.bucket_name
|
|
121
122
|
params["root_prefix"] = self.root_prefix
|
|
122
|
-
sorted_params =
|
|
123
|
+
sorted_params = sort_dict_by_keys(params)
|
|
123
124
|
return sorted_params
|
|
124
125
|
|
|
125
126
|
|
|
@@ -3,7 +3,8 @@ from __future__ import annotations
|
|
|
3
3
|
import time
|
|
4
4
|
|
|
5
5
|
from deepdiff import DeepDiff
|
|
6
|
-
from parameterizable import register_parameterizable_class
|
|
6
|
+
from parameterizable import register_parameterizable_class, sort_dict_by_keys
|
|
7
|
+
|
|
7
8
|
from .jokers import KEEP_CURRENT, KeepCurrentFlag
|
|
8
9
|
from .persi_dict import PersiDict
|
|
9
10
|
from .file_dir_dict import FileDirDict
|
|
@@ -103,7 +104,7 @@ class WriteOnceDict(PersiDict):
|
|
|
103
104
|
params = dict(
|
|
104
105
|
wrapped_dict = self._wrapped_dict,
|
|
105
106
|
p_consistency_checks = self.p_consistency_checks)
|
|
106
|
-
sorted_params =
|
|
107
|
+
sorted_params = sort_dict_by_keys(params)
|
|
107
108
|
return sorted_params
|
|
108
109
|
|
|
109
110
|
def __setitem__(self, key, value):
|
|
Binary file
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|