persidict 0.32.3__py3-none-any.whl → 0.32.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of persidict might be problematic. Click here for more details.
- persidict/.DS_Store +0 -0
- persidict/file_dir_dict.py +27 -23
- persidict/persi_dict.py +64 -37
- persidict/s3_dict.py +24 -22
- {persidict-0.32.3.dist-info → persidict-0.32.7.dist-info}/METADATA +1 -1
- {persidict-0.32.3.dist-info → persidict-0.32.7.dist-info}/RECORD +7 -6
- {persidict-0.32.3.dist-info → persidict-0.32.7.dist-info}/WHEEL +1 -1
persidict/.DS_Store
ADDED
|
Binary file
|
persidict/file_dir_dict.py
CHANGED
|
@@ -103,17 +103,6 @@ class FileDirDict(PersiDict):
|
|
|
103
103
|
self._base_dir = os.path.abspath(base_dir)
|
|
104
104
|
|
|
105
105
|
|
|
106
|
-
def __repr__(self):
|
|
107
|
-
"""Return repr(self)."""
|
|
108
|
-
|
|
109
|
-
repr_str = super().__repr__()
|
|
110
|
-
repr_str = repr_str[:-1] + f", _base_dir={self._base_dir}"
|
|
111
|
-
repr_str += f", file_type={self.file_type}"
|
|
112
|
-
repr_str += " )"
|
|
113
|
-
|
|
114
|
-
return repr_str
|
|
115
|
-
|
|
116
|
-
|
|
117
106
|
def get_params(self):
|
|
118
107
|
"""Return configuration parameters of the dictionary.
|
|
119
108
|
|
|
@@ -393,24 +382,28 @@ class FileDirDict(PersiDict):
|
|
|
393
382
|
os.remove(filename)
|
|
394
383
|
|
|
395
384
|
|
|
396
|
-
def _generic_iter(self,
|
|
385
|
+
def _generic_iter(self, result_type: set[str]):
|
|
397
386
|
"""Underlying implementation for .items()/.keys()/.values() iterators"""
|
|
398
|
-
assert
|
|
387
|
+
assert isinstance(result_type, set)
|
|
388
|
+
assert 1 <= len(result_type) <= 3
|
|
389
|
+
assert len(result_type | {"keys", "values", "timestamps"}) == 3
|
|
390
|
+
assert 1 <= len(result_type & {"keys", "values", "timestamps"}) <= 3
|
|
391
|
+
|
|
399
392
|
walk_results = os.walk(self._base_dir)
|
|
400
393
|
ext_len = len(self.file_type) + 1
|
|
401
394
|
|
|
402
395
|
def splitter(dir_path: str):
|
|
403
396
|
"""Transform a dirname into a PersiDictKey key"""
|
|
404
|
-
|
|
397
|
+
splitted_str = []
|
|
405
398
|
if dir_path == ".":
|
|
406
|
-
return
|
|
399
|
+
return splitted_str
|
|
407
400
|
while True:
|
|
408
401
|
head, tail = os.path.split(dir_path)
|
|
409
|
-
|
|
402
|
+
splitted_str = [tail] + splitted_str
|
|
410
403
|
dir_path = head
|
|
411
404
|
if len(head) == 0:
|
|
412
405
|
break
|
|
413
|
-
return tuple(
|
|
406
|
+
return tuple(splitted_str)
|
|
414
407
|
|
|
415
408
|
def step():
|
|
416
409
|
suffix = "." + self.file_type
|
|
@@ -423,14 +416,25 @@ class FileDirDict(PersiDict):
|
|
|
423
416
|
result_key = (*splitter(prefix_key), f[:-ext_len])
|
|
424
417
|
result_key = SafeStrTuple(result_key)
|
|
425
418
|
|
|
426
|
-
|
|
427
|
-
|
|
419
|
+
to_return = []
|
|
420
|
+
|
|
421
|
+
if "keys" in result_type:
|
|
422
|
+
key_to_return= unsign_safe_str_tuple(
|
|
428
423
|
result_key, self.digest_len)
|
|
429
|
-
|
|
430
|
-
|
|
424
|
+
to_return.append(key_to_return)
|
|
425
|
+
|
|
426
|
+
if "values" in result_type:
|
|
427
|
+
value_to_return = self[result_key]
|
|
428
|
+
to_return.append(value_to_return)
|
|
429
|
+
|
|
430
|
+
if len(result_type) == 1:
|
|
431
|
+
yield to_return[0]
|
|
431
432
|
else:
|
|
432
|
-
|
|
433
|
-
|
|
433
|
+
if "timestamps" in result_type:
|
|
434
|
+
timestamp_to_return = os.path.getmtime(
|
|
435
|
+
os.path.join(dir_name, f))
|
|
436
|
+
to_return.append(timestamp_to_return)
|
|
437
|
+
yield tuple(to_return)
|
|
434
438
|
|
|
435
439
|
return step()
|
|
436
440
|
|
persidict/persi_dict.py
CHANGED
|
@@ -21,6 +21,7 @@ even after the Python process that created the dictionary has terminated.
|
|
|
21
21
|
from __future__ import annotations
|
|
22
22
|
|
|
23
23
|
from abc import abstractmethod
|
|
24
|
+
import heapq
|
|
24
25
|
import random
|
|
25
26
|
from parameterizable import ParameterizableClass, sort_dict_by_keys
|
|
26
27
|
from typing import Any, Sequence, Optional
|
|
@@ -87,7 +88,7 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
87
88
|
, immutable_items:bool = False
|
|
88
89
|
, digest_len:int = 8
|
|
89
90
|
, base_class_for_values:Optional[type] = None
|
|
90
|
-
, *args, **
|
|
91
|
+
, *args, **kwargs):
|
|
91
92
|
self.digest_len = int(digest_len)
|
|
92
93
|
if digest_len < 0:
|
|
93
94
|
raise ValueError("digest_len must be non-negative")
|
|
@@ -133,13 +134,9 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
133
134
|
|
|
134
135
|
def __repr__(self) -> str:
|
|
135
136
|
"""Return repr(self)"""
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
repr_str += f", digest_len={self.digest_len}"
|
|
140
|
-
repr_str += f", base_class_for_values={self.base_class_for_values}"
|
|
141
|
-
repr_str += ")"
|
|
142
|
-
return repr_str
|
|
137
|
+
params = self.get_params()
|
|
138
|
+
params_str = ', '.join(f'{k}={v!r}' for k, v in params.items())
|
|
139
|
+
return f'{self.__class__.__name__}({params_str})'
|
|
143
140
|
|
|
144
141
|
|
|
145
142
|
def __str__(self) -> str:
|
|
@@ -185,30 +182,48 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
185
182
|
|
|
186
183
|
|
|
187
184
|
@abstractmethod
|
|
188
|
-
def _generic_iter(self,
|
|
189
|
-
"""Underlying implementation for
|
|
190
|
-
assert
|
|
185
|
+
def _generic_iter(self, result_type: set[str]) -> Any:
|
|
186
|
+
"""Underlying implementation for items/keys/values/... iterators"""
|
|
187
|
+
assert isinstance(result_type, set)
|
|
188
|
+
assert 1 <= len(result_type) <= 3
|
|
189
|
+
assert len(result_type | {"keys", "values", "timestamps"}) == 3
|
|
190
|
+
assert 1 <= len(result_type & {"keys", "values", "timestamps"}) <= 3
|
|
191
191
|
raise NotImplementedError
|
|
192
192
|
|
|
193
193
|
|
|
194
194
|
def __iter__(self):
|
|
195
195
|
"""Implement iter(self)."""
|
|
196
|
-
return self._generic_iter("keys")
|
|
196
|
+
return self._generic_iter({"keys"})
|
|
197
197
|
|
|
198
198
|
|
|
199
199
|
def keys(self):
|
|
200
|
-
"""
|
|
201
|
-
return
|
|
200
|
+
"""iterator object that provides access to keys"""
|
|
201
|
+
return self._generic_iter({"keys"})
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def keys_and_timestamps(self):
|
|
205
|
+
"""iterator object that provides access to keys and timestamps"""
|
|
206
|
+
return self._generic_iter({"keys", "timestamps"})
|
|
202
207
|
|
|
203
208
|
|
|
204
209
|
def values(self):
|
|
205
210
|
"""D.values() -> iterator object that provides access to D's values"""
|
|
206
|
-
return self._generic_iter("values")
|
|
211
|
+
return self._generic_iter({"values"})
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def values_and_timestamps(self):
|
|
215
|
+
"""iterator object that provides access to values and timestamps"""
|
|
216
|
+
return self._generic_iter({"values", "timestamps"})
|
|
207
217
|
|
|
208
218
|
|
|
209
219
|
def items(self):
|
|
210
220
|
"""D.items() -> iterator object that provides access to D's items"""
|
|
211
|
-
return self._generic_iter("
|
|
221
|
+
return self._generic_iter({"keys", "values"})
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def items_and_timestamps(self):
|
|
225
|
+
"""iterator object that provides access to keys, values, and timestamps"""
|
|
226
|
+
return self._generic_iter({"keys", "values", "timestamps"})
|
|
212
227
|
|
|
213
228
|
|
|
214
229
|
def setdefault(self, key:PersiDictKey, default:Any=None) -> Any:
|
|
@@ -350,39 +365,51 @@ class PersiDict(MutableMapping, ParameterizableClass):
|
|
|
350
365
|
|
|
351
366
|
This method is absent in the original Python dict API.
|
|
352
367
|
"""
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
368
|
+
if max_n is None:
|
|
369
|
+
# If we need all keys, sort them all by timestamp
|
|
370
|
+
key_timestamp_pairs = list(self.keys_and_timestamps())
|
|
371
|
+
key_timestamp_pairs.sort(key=lambda x: x[1])
|
|
372
|
+
return [key for key,_ in key_timestamp_pairs]
|
|
373
|
+
elif max_n <= 0:
|
|
374
|
+
return []
|
|
375
|
+
else:
|
|
376
|
+
# Use heapq.nsmallest for efficient partial sorting without loading all keys into memory
|
|
377
|
+
smallest_pairs = heapq.nsmallest(max_n
|
|
378
|
+
, self.keys_and_timestamps()
|
|
379
|
+
, key=lambda x: x[1])
|
|
380
|
+
return [key for key,_ in smallest_pairs]
|
|
360
381
|
|
|
361
382
|
|
|
362
|
-
def
|
|
363
|
-
"""Return max_n the
|
|
383
|
+
def oldest_values(self, max_n=None):
|
|
384
|
+
"""Return max_n the oldest values in the dictionary.
|
|
364
385
|
|
|
365
|
-
If max_n is None, return all
|
|
386
|
+
If max_n is None, return all values.
|
|
366
387
|
|
|
367
388
|
This method is absent in the original Python dict API.
|
|
368
389
|
"""
|
|
369
|
-
|
|
370
|
-
# all_keys.sort(key=lambda k: self.timestamp(k), reverse=True)
|
|
371
|
-
all_keys.sort(key=self.timestamp, reverse=True)
|
|
372
|
-
if max_n is None or max_n > len(all_keys):
|
|
373
|
-
max_n = len(all_keys)
|
|
374
|
-
result = all_keys[:max_n]
|
|
375
|
-
return result
|
|
390
|
+
return [self[k] for k in self.oldest_keys(max_n)]
|
|
376
391
|
|
|
377
392
|
|
|
378
|
-
def
|
|
379
|
-
"""Return max_n the
|
|
393
|
+
def newest_keys(self, max_n=None):
|
|
394
|
+
"""Return max_n the newest keys in the dictionary.
|
|
380
395
|
|
|
381
|
-
If max_n is None, return all
|
|
396
|
+
If max_n is None, return all keys.
|
|
382
397
|
|
|
383
398
|
This method is absent in the original Python dict API.
|
|
384
399
|
"""
|
|
385
|
-
|
|
400
|
+
if max_n is None:
|
|
401
|
+
# If we need all keys, sort them all by timestamp in reverse order
|
|
402
|
+
key_timestamp_pairs = list(self.keys_and_timestamps())
|
|
403
|
+
key_timestamp_pairs.sort(key=lambda x:x[1], reverse=True)
|
|
404
|
+
return [key for key,_ in key_timestamp_pairs]
|
|
405
|
+
elif max_n <= 0:
|
|
406
|
+
return []
|
|
407
|
+
else:
|
|
408
|
+
# Use heapq.nlargest for efficient partial sorting without loading all keys into memory
|
|
409
|
+
largest_pairs = heapq.nlargest(max_n
|
|
410
|
+
, self.keys_and_timestamps()
|
|
411
|
+
, key=lambda item: item[1])
|
|
412
|
+
return [key for key,_ in largest_pairs]
|
|
386
413
|
|
|
387
414
|
|
|
388
415
|
def newest_values(self, max_n=None):
|
persidict/s3_dict.py
CHANGED
|
@@ -96,20 +96,6 @@ class S3Dict(PersiDict):
|
|
|
96
96
|
self.root_prefix += "/"
|
|
97
97
|
|
|
98
98
|
|
|
99
|
-
def __repr__(self) -> str:
|
|
100
|
-
"""Return repr(self)."""
|
|
101
|
-
|
|
102
|
-
repr_str = super().__repr__()
|
|
103
|
-
repr_str = repr_str[:-1] + f", _base_dir={self.local_cache._base_dir}"
|
|
104
|
-
repr_str += f", file_type={self.file_type}"
|
|
105
|
-
repr_str += f", region={self.region}"
|
|
106
|
-
repr_str += f", bucket_name={self.bucket_name}"
|
|
107
|
-
repr_str += f", root_prefix={self.root_prefix}"
|
|
108
|
-
repr_str += " )"
|
|
109
|
-
|
|
110
|
-
return repr_str
|
|
111
|
-
|
|
112
|
-
|
|
113
99
|
def get_params(self):
|
|
114
100
|
"""Return configuration parameters of the object as a dictionary.
|
|
115
101
|
|
|
@@ -269,9 +255,14 @@ class S3Dict(PersiDict):
|
|
|
269
255
|
return num_files
|
|
270
256
|
|
|
271
257
|
|
|
272
|
-
def _generic_iter(self,
|
|
258
|
+
def _generic_iter(self, result_type: str):
|
|
273
259
|
"""Underlying implementation for .items()/.keys()/.values() iterators"""
|
|
274
|
-
|
|
260
|
+
|
|
261
|
+
assert isinstance(result_type, set)
|
|
262
|
+
assert 1 <= len(result_type) <= 3
|
|
263
|
+
assert len(result_type | {"keys", "values", "timestamps"}) == 3
|
|
264
|
+
assert 1 <= len(result_type & {"keys", "values", "timestamps"}) <= 3
|
|
265
|
+
|
|
275
266
|
suffix = "." + self.file_type
|
|
276
267
|
ext_len = len(self.file_type) + 1
|
|
277
268
|
prefix_len = len(self.root_prefix)
|
|
@@ -295,14 +286,25 @@ class S3Dict(PersiDict):
|
|
|
295
286
|
if not obj_name.endswith(suffix):
|
|
296
287
|
continue
|
|
297
288
|
obj_key = splitter(obj_name)
|
|
298
|
-
|
|
299
|
-
|
|
289
|
+
|
|
290
|
+
to_return = []
|
|
291
|
+
|
|
292
|
+
if "keys" in result_type:
|
|
293
|
+
key_to_return = unsign_safe_str_tuple(
|
|
300
294
|
obj_key, self.digest_len)
|
|
301
|
-
|
|
302
|
-
|
|
295
|
+
to_return.append(key_to_return)
|
|
296
|
+
|
|
297
|
+
if "values" in result_type:
|
|
298
|
+
value_to_return = self[obj_key]
|
|
299
|
+
to_return.append(value_to_return)
|
|
300
|
+
|
|
301
|
+
if len(result_type) == 1:
|
|
302
|
+
yield to_return[0]
|
|
303
303
|
else:
|
|
304
|
-
|
|
305
|
-
|
|
304
|
+
if "timestamps" in result_type:
|
|
305
|
+
timestamp_to_return = key["LastModified"].timestamp()
|
|
306
|
+
to_return.append(timestamp_to_return)
|
|
307
|
+
yield tuple(to_return)
|
|
306
308
|
|
|
307
309
|
return step()
|
|
308
310
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: persidict
|
|
3
|
-
Version: 0.32.
|
|
3
|
+
Version: 0.32.7
|
|
4
4
|
Summary: Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud.
|
|
5
5
|
Keywords: persistence,dicts,distributed,parallel
|
|
6
6
|
Author: Vlad (Volodymyr) Pavlov
|
|
@@ -1,13 +1,14 @@
|
|
|
1
|
+
persidict/.DS_Store,sha256=1lFlJ5EFymdzGAUAaI30vcaaLHt3F1LwpG7xILf9jsM,6148
|
|
1
2
|
persidict/__init__.py,sha256=CDOSJGgCnyRTkGUTzaeg3Cqsxwx0-0EFieOtldXwAls,1380
|
|
2
|
-
persidict/file_dir_dict.py,sha256=
|
|
3
|
+
persidict/file_dir_dict.py,sha256=3uoAua47pIJwpCPkXtF2dIskHm73kZogy15yknLfLUk,17939
|
|
3
4
|
persidict/jokers.py,sha256=kX4bE-jKWTM2ki7JOmm_2uJS8zm8u6InZ_V12xo2ImI,1436
|
|
4
5
|
persidict/overlapping_multi_dict.py,sha256=a-lUbmY15_HrDq6jSIt8F8tJboqbeYiuRQeW4elf_oU,2663
|
|
5
|
-
persidict/persi_dict.py,sha256=
|
|
6
|
-
persidict/s3_dict.py,sha256=
|
|
6
|
+
persidict/persi_dict.py,sha256=SF6aWs6kCeeW-bZ9HJwx0sPX7Xav_aURqeSZ-j5quv0,14266
|
|
7
|
+
persidict/s3_dict.py,sha256=awEIu7ehh4FgKircC10lcHp86xvUctXA_6jxUmC3Hy8,12480
|
|
7
8
|
persidict/safe_chars.py,sha256=WaIOliBdLlZ12CepEa1C3b1VPxvX4s2hvnZanCxM6BQ,565
|
|
8
9
|
persidict/safe_str_tuple.py,sha256=cTk5BL3r-yE62EKf7VngTaUpZAdsATJPIjiCGqQzkyU,3717
|
|
9
10
|
persidict/safe_str_tuple_signing.py,sha256=5uCjAVZRqOou-KpDZw-Exboc3-3vuayJMqrrt8aZ0ck,3742
|
|
10
11
|
persidict/write_once_dict.py,sha256=dqzA3amVDK9D8ygU8NFT5B87smAHYWdEn78Fssj6s_g,6396
|
|
11
|
-
persidict-0.32.
|
|
12
|
-
persidict-0.32.
|
|
13
|
-
persidict-0.32.
|
|
12
|
+
persidict-0.32.7.dist-info/WHEEL,sha256=Jb20R3Ili4n9P1fcwuLup21eQ5r9WXhs4_qy7VTrgPI,79
|
|
13
|
+
persidict-0.32.7.dist-info/METADATA,sha256=MG-CqgwJx0yz3aounX3ZT-dvD-9s2HdTzTDo9mQXN14,9334
|
|
14
|
+
persidict-0.32.7.dist-info/RECORD,,
|