persidict 0.32.3__tar.gz → 0.32.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of persidict might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: persidict
3
- Version: 0.32.3
3
+ Version: 0.32.7
4
4
  Summary: Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud.
5
5
  Keywords: persistence,dicts,distributed,parallel
6
6
  Author: Vlad (Volodymyr) Pavlov
@@ -4,7 +4,7 @@ build-backend = "uv_build"
4
4
 
5
5
  [project]
6
6
  name = "persidict"
7
- version = "0.32.3"
7
+ version = "0.32.7"
8
8
  description = "Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
Binary file
@@ -103,17 +103,6 @@ class FileDirDict(PersiDict):
103
103
  self._base_dir = os.path.abspath(base_dir)
104
104
 
105
105
 
106
- def __repr__(self):
107
- """Return repr(self)."""
108
-
109
- repr_str = super().__repr__()
110
- repr_str = repr_str[:-1] + f", _base_dir={self._base_dir}"
111
- repr_str += f", file_type={self.file_type}"
112
- repr_str += " )"
113
-
114
- return repr_str
115
-
116
-
117
106
  def get_params(self):
118
107
  """Return configuration parameters of the dictionary.
119
108
 
@@ -393,24 +382,28 @@ class FileDirDict(PersiDict):
393
382
  os.remove(filename)
394
383
 
395
384
 
396
- def _generic_iter(self, iter_type: str):
385
+ def _generic_iter(self, result_type: set[str]):
397
386
  """Underlying implementation for .items()/.keys()/.values() iterators"""
398
- assert iter_type in {"keys", "values", "items"}
387
+ assert isinstance(result_type, set)
388
+ assert 1 <= len(result_type) <= 3
389
+ assert len(result_type | {"keys", "values", "timestamps"}) == 3
390
+ assert 1 <= len(result_type & {"keys", "values", "timestamps"}) <= 3
391
+
399
392
  walk_results = os.walk(self._base_dir)
400
393
  ext_len = len(self.file_type) + 1
401
394
 
402
395
  def splitter(dir_path: str):
403
396
  """Transform a dirname into a PersiDictKey key"""
404
- result = []
397
+ splitted_str = []
405
398
  if dir_path == ".":
406
- return result
399
+ return splitted_str
407
400
  while True:
408
401
  head, tail = os.path.split(dir_path)
409
- result = [tail] + result
402
+ splitted_str = [tail] + splitted_str
410
403
  dir_path = head
411
404
  if len(head) == 0:
412
405
  break
413
- return tuple(result)
406
+ return tuple(splitted_str)
414
407
 
415
408
  def step():
416
409
  suffix = "." + self.file_type
@@ -423,14 +416,25 @@ class FileDirDict(PersiDict):
423
416
  result_key = (*splitter(prefix_key), f[:-ext_len])
424
417
  result_key = SafeStrTuple(result_key)
425
418
 
426
- if iter_type == "keys":
427
- yield unsign_safe_str_tuple(
419
+ to_return = []
420
+
421
+ if "keys" in result_type:
422
+ key_to_return= unsign_safe_str_tuple(
428
423
  result_key, self.digest_len)
429
- elif iter_type == "values":
430
- yield self[result_key]
424
+ to_return.append(key_to_return)
425
+
426
+ if "values" in result_type:
427
+ value_to_return = self[result_key]
428
+ to_return.append(value_to_return)
429
+
430
+ if len(result_type) == 1:
431
+ yield to_return[0]
431
432
  else:
432
- yield (unsign_safe_str_tuple(
433
- result_key, self.digest_len), self[result_key])
433
+ if "timestamps" in result_type:
434
+ timestamp_to_return = os.path.getmtime(
435
+ os.path.join(dir_name, f))
436
+ to_return.append(timestamp_to_return)
437
+ yield tuple(to_return)
434
438
 
435
439
  return step()
436
440
 
@@ -21,6 +21,7 @@ even after the Python process that created the dictionary has terminated.
21
21
  from __future__ import annotations
22
22
 
23
23
  from abc import abstractmethod
24
+ import heapq
24
25
  import random
25
26
  from parameterizable import ParameterizableClass, sort_dict_by_keys
26
27
  from typing import Any, Sequence, Optional
@@ -87,7 +88,7 @@ class PersiDict(MutableMapping, ParameterizableClass):
87
88
  , immutable_items:bool = False
88
89
  , digest_len:int = 8
89
90
  , base_class_for_values:Optional[type] = None
90
- , *args, **kwargas):
91
+ , *args, **kwargs):
91
92
  self.digest_len = int(digest_len)
92
93
  if digest_len < 0:
93
94
  raise ValueError("digest_len must be non-negative")
@@ -133,13 +134,9 @@ class PersiDict(MutableMapping, ParameterizableClass):
133
134
 
134
135
  def __repr__(self) -> str:
135
136
  """Return repr(self)"""
136
- repr_str = self.__class__.__name__ + "("
137
- repr_str += repr(dict(self.items()))
138
- repr_str += f", immutable_items={self.immutable_items}"
139
- repr_str += f", digest_len={self.digest_len}"
140
- repr_str += f", base_class_for_values={self.base_class_for_values}"
141
- repr_str += ")"
142
- return repr_str
137
+ params = self.get_params()
138
+ params_str = ', '.join(f'{k}={v!r}' for k, v in params.items())
139
+ return f'{self.__class__.__name__}({params_str})'
143
140
 
144
141
 
145
142
  def __str__(self) -> str:
@@ -185,30 +182,48 @@ class PersiDict(MutableMapping, ParameterizableClass):
185
182
 
186
183
 
187
184
  @abstractmethod
188
- def _generic_iter(self, iter_type: str):
189
- """Underlying implementation for .items()/.keys()/.values() iterators"""
190
- assert iter_type in {"keys", "values", "items"}
185
+ def _generic_iter(self, result_type: set[str]) -> Any:
186
+ """Underlying implementation for items/keys/values/... iterators"""
187
+ assert isinstance(result_type, set)
188
+ assert 1 <= len(result_type) <= 3
189
+ assert len(result_type | {"keys", "values", "timestamps"}) == 3
190
+ assert 1 <= len(result_type & {"keys", "values", "timestamps"}) <= 3
191
191
  raise NotImplementedError
192
192
 
193
193
 
194
194
  def __iter__(self):
195
195
  """Implement iter(self)."""
196
- return self._generic_iter("keys")
196
+ return self._generic_iter({"keys"})
197
197
 
198
198
 
199
199
  def keys(self):
200
- """D.keys() -> iterator object that provides access to D's keys"""
201
- return self._generic_iter("keys")
200
+ """iterator object that provides access to keys"""
201
+ return self._generic_iter({"keys"})
202
+
203
+
204
+ def keys_and_timestamps(self):
205
+ """iterator object that provides access to keys and timestamps"""
206
+ return self._generic_iter({"keys", "timestamps"})
202
207
 
203
208
 
204
209
  def values(self):
205
210
  """D.values() -> iterator object that provides access to D's values"""
206
- return self._generic_iter("values")
211
+ return self._generic_iter({"values"})
212
+
213
+
214
+ def values_and_timestamps(self):
215
+ """iterator object that provides access to values and timestamps"""
216
+ return self._generic_iter({"values", "timestamps"})
207
217
 
208
218
 
209
219
  def items(self):
210
220
  """D.items() -> iterator object that provides access to D's items"""
211
- return self._generic_iter("items")
221
+ return self._generic_iter({"keys", "values"})
222
+
223
+
224
+ def items_and_timestamps(self):
225
+ """iterator object that provides access to keys, values, and timestamps"""
226
+ return self._generic_iter({"keys", "values", "timestamps"})
212
227
 
213
228
 
214
229
  def setdefault(self, key:PersiDictKey, default:Any=None) -> Any:
@@ -350,39 +365,51 @@ class PersiDict(MutableMapping, ParameterizableClass):
350
365
 
351
366
  This method is absent in the original Python dict API.
352
367
  """
353
- all_keys = list(self.keys())
354
- # all_keys.sort(key=lambda k: self.timestamp(k))
355
- all_keys.sort(key=self.timestamp)
356
- if max_n is None or max_n > len(all_keys):
357
- max_n = len(all_keys)
358
- result = all_keys[:max_n]
359
- return result
368
+ if max_n is None:
369
+ # If we need all keys, sort them all by timestamp
370
+ key_timestamp_pairs = list(self.keys_and_timestamps())
371
+ key_timestamp_pairs.sort(key=lambda x: x[1])
372
+ return [key for key,_ in key_timestamp_pairs]
373
+ elif max_n <= 0:
374
+ return []
375
+ else:
376
+ # Use heapq.nsmallest for efficient partial sorting without loading all keys into memory
377
+ smallest_pairs = heapq.nsmallest(max_n
378
+ , self.keys_and_timestamps()
379
+ , key=lambda x: x[1])
380
+ return [key for key,_ in smallest_pairs]
360
381
 
361
382
 
362
- def newest_keys(self, max_n=None):
363
- """Return max_n the newest keys in the dictionary.
383
+ def oldest_values(self, max_n=None):
384
+ """Return max_n the oldest values in the dictionary.
364
385
 
365
- If max_n is None, return all keys.
386
+ If max_n is None, return all values.
366
387
 
367
388
  This method is absent in the original Python dict API.
368
389
  """
369
- all_keys = list(self.keys())
370
- # all_keys.sort(key=lambda k: self.timestamp(k), reverse=True)
371
- all_keys.sort(key=self.timestamp, reverse=True)
372
- if max_n is None or max_n > len(all_keys):
373
- max_n = len(all_keys)
374
- result = all_keys[:max_n]
375
- return result
390
+ return [self[k] for k in self.oldest_keys(max_n)]
376
391
 
377
392
 
378
- def oldest_values(self, max_n=None):
379
- """Return max_n the oldest values in the dictionary.
393
+ def newest_keys(self, max_n=None):
394
+ """Return max_n the newest keys in the dictionary.
380
395
 
381
- If max_n is None, return all values.
396
+ If max_n is None, return all keys.
382
397
 
383
398
  This method is absent in the original Python dict API.
384
399
  """
385
- return [self[k] for k in self.oldest_keys(max_n)]
400
+ if max_n is None:
401
+ # If we need all keys, sort them all by timestamp in reverse order
402
+ key_timestamp_pairs = list(self.keys_and_timestamps())
403
+ key_timestamp_pairs.sort(key=lambda x:x[1], reverse=True)
404
+ return [key for key,_ in key_timestamp_pairs]
405
+ elif max_n <= 0:
406
+ return []
407
+ else:
408
+ # Use heapq.nlargest for efficient partial sorting without loading all keys into memory
409
+ largest_pairs = heapq.nlargest(max_n
410
+ , self.keys_and_timestamps()
411
+ , key=lambda item: item[1])
412
+ return [key for key,_ in largest_pairs]
386
413
 
387
414
 
388
415
  def newest_values(self, max_n=None):
@@ -96,20 +96,6 @@ class S3Dict(PersiDict):
96
96
  self.root_prefix += "/"
97
97
 
98
98
 
99
- def __repr__(self) -> str:
100
- """Return repr(self)."""
101
-
102
- repr_str = super().__repr__()
103
- repr_str = repr_str[:-1] + f", _base_dir={self.local_cache._base_dir}"
104
- repr_str += f", file_type={self.file_type}"
105
- repr_str += f", region={self.region}"
106
- repr_str += f", bucket_name={self.bucket_name}"
107
- repr_str += f", root_prefix={self.root_prefix}"
108
- repr_str += " )"
109
-
110
- return repr_str
111
-
112
-
113
99
  def get_params(self):
114
100
  """Return configuration parameters of the object as a dictionary.
115
101
 
@@ -269,9 +255,14 @@ class S3Dict(PersiDict):
269
255
  return num_files
270
256
 
271
257
 
272
- def _generic_iter(self, iter_type: str):
258
+ def _generic_iter(self, result_type: str):
273
259
  """Underlying implementation for .items()/.keys()/.values() iterators"""
274
- assert iter_type in {"keys", "values", "items"}
260
+
261
+ assert isinstance(result_type, set)
262
+ assert 1 <= len(result_type) <= 3
263
+ assert len(result_type | {"keys", "values", "timestamps"}) == 3
264
+ assert 1 <= len(result_type & {"keys", "values", "timestamps"}) <= 3
265
+
275
266
  suffix = "." + self.file_type
276
267
  ext_len = len(self.file_type) + 1
277
268
  prefix_len = len(self.root_prefix)
@@ -295,14 +286,25 @@ class S3Dict(PersiDict):
295
286
  if not obj_name.endswith(suffix):
296
287
  continue
297
288
  obj_key = splitter(obj_name)
298
- if iter_type == "keys":
299
- yield unsign_safe_str_tuple(
289
+
290
+ to_return = []
291
+
292
+ if "keys" in result_type:
293
+ key_to_return = unsign_safe_str_tuple(
300
294
  obj_key, self.digest_len)
301
- elif iter_type == "values":
302
- yield self[obj_key]
295
+ to_return.append(key_to_return)
296
+
297
+ if "values" in result_type:
298
+ value_to_return = self[obj_key]
299
+ to_return.append(value_to_return)
300
+
301
+ if len(result_type) == 1:
302
+ yield to_return[0]
303
303
  else:
304
- yield (unsign_safe_str_tuple(
305
- obj_key, self.digest_len), self[obj_key])
304
+ if "timestamps" in result_type:
305
+ timestamp_to_return = key["LastModified"].timestamp()
306
+ to_return.append(timestamp_to_return)
307
+ yield tuple(to_return)
306
308
 
307
309
  return step()
308
310
 
File without changes