persidict 0.32.2__py3-none-any.whl → 0.32.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of persidict might be problematic. Click here for more details.

persidict/.DS_Store ADDED
Binary file
persidict/__init__.py CHANGED
File without changes
@@ -103,17 +103,6 @@ class FileDirDict(PersiDict):
103
103
  self._base_dir = os.path.abspath(base_dir)
104
104
 
105
105
 
106
- def __repr__(self):
107
- """Return repr(self)."""
108
-
109
- repr_str = super().__repr__()
110
- repr_str = repr_str[:-1] + f", _base_dir={self._base_dir}"
111
- repr_str += f", file_type={self.file_type}"
112
- repr_str += " )"
113
-
114
- return repr_str
115
-
116
-
117
106
  def get_params(self):
118
107
  """Return configuration parameters of the dictionary.
119
108
 
@@ -393,24 +382,28 @@ class FileDirDict(PersiDict):
393
382
  os.remove(filename)
394
383
 
395
384
 
396
- def _generic_iter(self, iter_type: str):
385
+ def _generic_iter(self, result_type: set[str]):
397
386
  """Underlying implementation for .items()/.keys()/.values() iterators"""
398
- assert iter_type in {"keys", "values", "items"}
387
+ assert isinstance(result_type, set)
388
+ assert 1 <= len(result_type) <= 3
389
+ assert len(result_type | {"keys", "values", "timestamps"}) == 3
390
+ assert 1 <= len(result_type & {"keys", "values", "timestamps"}) <= 3
391
+
399
392
  walk_results = os.walk(self._base_dir)
400
393
  ext_len = len(self.file_type) + 1
401
394
 
402
395
  def splitter(dir_path: str):
403
396
  """Transform a dirname into a PersiDictKey key"""
404
- result = []
397
+ splitted_str = []
405
398
  if dir_path == ".":
406
- return result
399
+ return splitted_str
407
400
  while True:
408
401
  head, tail = os.path.split(dir_path)
409
- result = [tail] + result
402
+ splitted_str = [tail] + splitted_str
410
403
  dir_path = head
411
404
  if len(head) == 0:
412
405
  break
413
- return tuple(result)
406
+ return tuple(splitted_str)
414
407
 
415
408
  def step():
416
409
  suffix = "." + self.file_type
@@ -423,14 +416,25 @@ class FileDirDict(PersiDict):
423
416
  result_key = (*splitter(prefix_key), f[:-ext_len])
424
417
  result_key = SafeStrTuple(result_key)
425
418
 
426
- if iter_type == "keys":
427
- yield unsign_safe_str_tuple(
419
+ to_return = []
420
+
421
+ if "keys" in result_type:
422
+ key_to_return= unsign_safe_str_tuple(
428
423
  result_key, self.digest_len)
429
- elif iter_type == "values":
430
- yield self[result_key]
424
+ to_return.append(key_to_return)
425
+
426
+ if "values" in result_type:
427
+ value_to_return = self[result_key]
428
+ to_return.append(value_to_return)
429
+
430
+ if len(result_type) == 1:
431
+ yield to_return[0]
431
432
  else:
432
- yield (unsign_safe_str_tuple(
433
- result_key, self.digest_len), self[result_key])
433
+ if "timestamps" in result_type:
434
+ timestamp_to_return = os.path.getmtime(
435
+ os.path.join(dir_name, f))
436
+ to_return.append(timestamp_to_return)
437
+ yield tuple(to_return)
434
438
 
435
439
  return step()
436
440
 
persidict/jokers.py CHANGED
File without changes
File without changes
persidict/persi_dict.py CHANGED
@@ -21,6 +21,7 @@ even after the Python process that created the dictionary has terminated.
21
21
  from __future__ import annotations
22
22
 
23
23
  from abc import abstractmethod
24
+ import heapq
24
25
  import random
25
26
  from parameterizable import ParameterizableClass, sort_dict_by_keys
26
27
  from typing import Any, Sequence, Optional
@@ -87,7 +88,7 @@ class PersiDict(MutableMapping, ParameterizableClass):
87
88
  , immutable_items:bool = False
88
89
  , digest_len:int = 8
89
90
  , base_class_for_values:Optional[type] = None
90
- , *args, **kwargas):
91
+ , *args, **kwargs):
91
92
  self.digest_len = int(digest_len)
92
93
  if digest_len < 0:
93
94
  raise ValueError("digest_len must be non-negative")
@@ -133,13 +134,9 @@ class PersiDict(MutableMapping, ParameterizableClass):
133
134
 
134
135
  def __repr__(self) -> str:
135
136
  """Return repr(self)"""
136
- repr_str = self.__class__.__name__ + "("
137
- repr_str += repr(dict(self.items()))
138
- repr_str += f", immutable_items={self.immutable_items}"
139
- repr_str += f", digest_len={self.digest_len}"
140
- repr_str += f", base_class_for_values={self.base_class_for_values}"
141
- repr_str += ")"
142
- return repr_str
137
+ params = self.get_params()
138
+ params_str = ', '.join(f'{k}={v!r}' for k, v in params.items())
139
+ return f'{self.__class__.__name__}({params_str})'
143
140
 
144
141
 
145
142
  def __str__(self) -> str:
@@ -185,30 +182,48 @@ class PersiDict(MutableMapping, ParameterizableClass):
185
182
 
186
183
 
187
184
  @abstractmethod
188
- def _generic_iter(self, iter_type: str):
189
- """Underlying implementation for .items()/.keys()/.values() iterators"""
190
- assert iter_type in {"keys", "values", "items"}
185
+ def _generic_iter(self, result_type: set[str]) -> Any:
186
+ """Underlying implementation for items/keys/values/... iterators"""
187
+ assert isinstance(result_type, set)
188
+ assert 1 <= len(result_type) <= 3
189
+ assert len(result_type | {"keys", "values", "timestamps"}) == 3
190
+ assert 1 <= len(result_type & {"keys", "values", "timestamps"}) <= 3
191
191
  raise NotImplementedError
192
192
 
193
193
 
194
194
  def __iter__(self):
195
195
  """Implement iter(self)."""
196
- return self._generic_iter("keys")
196
+ return self._generic_iter({"keys"})
197
197
 
198
198
 
199
199
  def keys(self):
200
- """D.keys() -> iterator object that provides access to D's keys"""
201
- return self._generic_iter("keys")
200
+ """iterator object that provides access to keys"""
201
+ return self._generic_iter({"keys"})
202
+
203
+
204
+ def keys_and_timestamps(self):
205
+ """iterator object that provides access to keys and timestamps"""
206
+ return self._generic_iter({"keys", "timestamps"})
202
207
 
203
208
 
204
209
  def values(self):
205
210
  """D.values() -> iterator object that provides access to D's values"""
206
- return self._generic_iter("values")
211
+ return self._generic_iter({"values"})
212
+
213
+
214
+ def values_and_timestamps(self):
215
+ """iterator object that provides access to values and timestamps"""
216
+ return self._generic_iter({"values", "timestamps"})
207
217
 
208
218
 
209
219
  def items(self):
210
220
  """D.items() -> iterator object that provides access to D's items"""
211
- return self._generic_iter("items")
221
+ return self._generic_iter({"keys", "values"})
222
+
223
+
224
+ def items_and_timestamps(self):
225
+ """iterator object that provides access to keys, values, and timestamps"""
226
+ return self._generic_iter({"keys", "values", "timestamps"})
212
227
 
213
228
 
214
229
  def setdefault(self, key:PersiDictKey, default:Any=None) -> Any:
@@ -350,39 +365,51 @@ class PersiDict(MutableMapping, ParameterizableClass):
350
365
 
351
366
  This method is absent in the original Python dict API.
352
367
  """
353
- all_keys = list(self.keys())
354
- # all_keys.sort(key=lambda k: self.timestamp(k))
355
- all_keys.sort(key=self.timestamp)
356
- if max_n is None or max_n > len(all_keys):
357
- max_n = len(all_keys)
358
- result = all_keys[:max_n]
359
- return result
368
+ if max_n is None:
369
+ # If we need all keys, sort them all by timestamp
370
+ key_timestamp_pairs = list(self.keys_and_timestamps())
371
+ key_timestamp_pairs.sort(key=lambda x: x[1])
372
+ return [key for key,_ in key_timestamp_pairs]
373
+ elif max_n <= 0:
374
+ return []
375
+ else:
376
+ # Use heapq.nsmallest for efficient partial sorting without loading all keys into memory
377
+ smallest_pairs = heapq.nsmallest(max_n
378
+ , self.keys_and_timestamps()
379
+ , key=lambda x: x[1])
380
+ return [key for key,_ in smallest_pairs]
360
381
 
361
382
 
362
- def newest_keys(self, max_n=None):
363
- """Return max_n the newest keys in the dictionary.
383
+ def oldest_values(self, max_n=None):
384
+ """Return max_n the oldest values in the dictionary.
364
385
 
365
- If max_n is None, return all keys.
386
+ If max_n is None, return all values.
366
387
 
367
388
  This method is absent in the original Python dict API.
368
389
  """
369
- all_keys = list(self.keys())
370
- # all_keys.sort(key=lambda k: self.timestamp(k), reverse=True)
371
- all_keys.sort(key=self.timestamp, reverse=True)
372
- if max_n is None or max_n > len(all_keys):
373
- max_n = len(all_keys)
374
- result = all_keys[:max_n]
375
- return result
390
+ return [self[k] for k in self.oldest_keys(max_n)]
376
391
 
377
392
 
378
- def oldest_values(self, max_n=None):
379
- """Return max_n the oldest values in the dictionary.
393
+ def newest_keys(self, max_n=None):
394
+ """Return max_n the newest keys in the dictionary.
380
395
 
381
- If max_n is None, return all values.
396
+ If max_n is None, return all keys.
382
397
 
383
398
  This method is absent in the original Python dict API.
384
399
  """
385
- return [self[k] for k in self.oldest_keys(max_n)]
400
+ if max_n is None:
401
+ # If we need all keys, sort them all by timestamp in reverse order
402
+ key_timestamp_pairs = list(self.keys_and_timestamps())
403
+ key_timestamp_pairs.sort(key=lambda x:x[1], reverse=True)
404
+ return [key for key,_ in key_timestamp_pairs]
405
+ elif max_n <= 0:
406
+ return []
407
+ else:
408
+ # Use heapq.nlargest for efficient partial sorting without loading all keys into memory
409
+ largest_pairs = heapq.nlargest(max_n
410
+ , self.keys_and_timestamps()
411
+ , key=lambda item: item[1])
412
+ return [key for key,_ in largest_pairs]
386
413
 
387
414
 
388
415
  def newest_values(self, max_n=None):
persidict/s3_dict.py CHANGED
@@ -96,20 +96,6 @@ class S3Dict(PersiDict):
96
96
  self.root_prefix += "/"
97
97
 
98
98
 
99
- def __repr__(self) -> str:
100
- """Return repr(self)."""
101
-
102
- repr_str = super().__repr__()
103
- repr_str = repr_str[:-1] + f", _base_dir={self.local_cache._base_dir}"
104
- repr_str += f", file_type={self.file_type}"
105
- repr_str += f", region={self.region}"
106
- repr_str += f", bucket_name={self.bucket_name}"
107
- repr_str += f", root_prefix={self.root_prefix}"
108
- repr_str += " )"
109
-
110
- return repr_str
111
-
112
-
113
99
  def get_params(self):
114
100
  """Return configuration parameters of the object as a dictionary.
115
101
 
@@ -253,23 +239,30 @@ class S3Dict(PersiDict):
253
239
  num_files = 0
254
240
  suffix = "." + self.file_type
255
241
 
256
- paginator = self.s3_client.get_paginator("list_objects")
242
+ paginator = self.s3_client.get_paginator("list_objects_v2")
257
243
  page_iterator = paginator.paginate(
258
244
  Bucket=self.bucket_name, Prefix = self.root_prefix)
259
245
 
260
246
  for page in page_iterator:
261
- if "Contents" in page:
262
- for key in page["Contents"]:
263
- obj_name = key["Key"]
264
- if obj_name.endswith(suffix):
265
- num_files += 1
247
+ contents = page.get("Contents")
248
+ if not contents:
249
+ continue
250
+ for key in contents:
251
+ obj_name = key["Key"]
252
+ if obj_name.endswith(suffix):
253
+ num_files += 1
266
254
 
267
255
  return num_files
268
256
 
269
257
 
270
- def _generic_iter(self, iter_type: str):
258
+ def _generic_iter(self, result_type: str):
271
259
  """Underlying implementation for .items()/.keys()/.values() iterators"""
272
- assert iter_type in {"keys", "values", "items"}
260
+
261
+ assert isinstance(result_type, set)
262
+ assert 1 <= len(result_type) <= 3
263
+ assert len(result_type | {"keys", "values", "timestamps"}) == 3
264
+ assert 1 <= len(result_type & {"keys", "values", "timestamps"}) <= 3
265
+
273
266
  suffix = "." + self.file_type
274
267
  ext_len = len(self.file_type) + 1
275
268
  prefix_len = len(self.root_prefix)
@@ -280,25 +273,38 @@ class S3Dict(PersiDict):
280
273
  return SafeStrTuple(result)
281
274
 
282
275
  def step():
283
- paginator = self.s3_client.get_paginator("list_objects")
276
+ paginator = self.s3_client.get_paginator("list_objects_v2")
284
277
  page_iterator = paginator.paginate(
285
278
  Bucket=self.bucket_name, Prefix = self.root_prefix)
286
279
 
287
280
  for page in page_iterator:
288
- if "Contents" in page:
289
- for key in page["Contents"]:
290
- obj_name = key["Key"]
291
- if not obj_name.endswith(suffix):
292
- continue
293
- obj_key = splitter(obj_name)
294
- if iter_type == "keys":
295
- yield unsign_safe_str_tuple(
296
- obj_key, self.digest_len)
297
- elif iter_type == "values":
298
- yield self[obj_key]
299
- else:
300
- yield (unsign_safe_str_tuple(
301
- obj_key, self.digest_len), self[obj_key])
281
+ contents = page.get("Contents")
282
+ if not contents:
283
+ continue
284
+ for key in contents:
285
+ obj_name = key["Key"]
286
+ if not obj_name.endswith(suffix):
287
+ continue
288
+ obj_key = splitter(obj_name)
289
+
290
+ to_return = []
291
+
292
+ if "keys" in result_type:
293
+ key_to_return = unsign_safe_str_tuple(
294
+ obj_key, self.digest_len)
295
+ to_return.append(key_to_return)
296
+
297
+ if "values" in result_type:
298
+ value_to_return = self[obj_key]
299
+ to_return.append(value_to_return)
300
+
301
+ if len(result_type) == 1:
302
+ yield to_return[0]
303
+ else:
304
+ if "timestamps" in result_type:
305
+ timestamp_to_return = key["LastModified"].timestamp()
306
+ to_return.append(timestamp_to_return)
307
+ yield tuple(to_return)
302
308
 
303
309
  return step()
304
310
 
persidict/safe_chars.py CHANGED
File without changes
File without changes
File without changes
File without changes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: persidict
3
- Version: 0.32.2
3
+ Version: 0.32.7
4
4
  Summary: Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud.
5
5
  Keywords: persistence,dicts,distributed,parallel
6
6
  Author: Vlad (Volodymyr) Pavlov
@@ -0,0 +1,14 @@
1
+ persidict/.DS_Store,sha256=1lFlJ5EFymdzGAUAaI30vcaaLHt3F1LwpG7xILf9jsM,6148
2
+ persidict/__init__.py,sha256=CDOSJGgCnyRTkGUTzaeg3Cqsxwx0-0EFieOtldXwAls,1380
3
+ persidict/file_dir_dict.py,sha256=3uoAua47pIJwpCPkXtF2dIskHm73kZogy15yknLfLUk,17939
4
+ persidict/jokers.py,sha256=kX4bE-jKWTM2ki7JOmm_2uJS8zm8u6InZ_V12xo2ImI,1436
5
+ persidict/overlapping_multi_dict.py,sha256=a-lUbmY15_HrDq6jSIt8F8tJboqbeYiuRQeW4elf_oU,2663
6
+ persidict/persi_dict.py,sha256=SF6aWs6kCeeW-bZ9HJwx0sPX7Xav_aURqeSZ-j5quv0,14266
7
+ persidict/s3_dict.py,sha256=awEIu7ehh4FgKircC10lcHp86xvUctXA_6jxUmC3Hy8,12480
8
+ persidict/safe_chars.py,sha256=WaIOliBdLlZ12CepEa1C3b1VPxvX4s2hvnZanCxM6BQ,565
9
+ persidict/safe_str_tuple.py,sha256=cTk5BL3r-yE62EKf7VngTaUpZAdsATJPIjiCGqQzkyU,3717
10
+ persidict/safe_str_tuple_signing.py,sha256=5uCjAVZRqOou-KpDZw-Exboc3-3vuayJMqrrt8aZ0ck,3742
11
+ persidict/write_once_dict.py,sha256=dqzA3amVDK9D8ygU8NFT5B87smAHYWdEn78Fssj6s_g,6396
12
+ persidict-0.32.7.dist-info/WHEEL,sha256=Jb20R3Ili4n9P1fcwuLup21eQ5r9WXhs4_qy7VTrgPI,79
13
+ persidict-0.32.7.dist-info/METADATA,sha256=MG-CqgwJx0yz3aounX3ZT-dvD-9s2HdTzTDo9mQXN14,9334
14
+ persidict-0.32.7.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: uv 0.7.19
2
+ Generator: uv 0.8.15
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,13 +0,0 @@
1
- persidict/__init__.py,sha256=0833922468029f2453906513cda7a0dc2aacc70c74fb410589e3ad95d5f0025b,1380
2
- persidict/file_dir_dict.py,sha256=0c650ba2d5c17ab65ea5dd5247544f68c77e3027b022876b18ea19ade51d78a6,17527
3
- persidict/jokers.py,sha256=917e1b13e8ca593336922ec93a69bfdae252f339bcbba22767f575db1a362262,1436
4
- persidict/overlapping_multi_dict.py,sha256=6be9546e6635e7f1eb0eaea3488b7c17cb496e8a9b7988ae450796e1e95ffe85,2663
5
- persidict/persi_dict.py,sha256=e4f962437df50e0422d58057ab23446802a916d46afd63e69c9fe9196f12f7e8,12900
6
- persidict/s3_dict.py,sha256=0d8942037cee409d8bda8f63f923a8385f4f9f1fac03e765ec5f5fc675334ae2,12292
7
- persidict/safe_chars.py,sha256=59a20e96205d2e5675d827a911ad42ddbd553f1bd7e2cda1be765a9c2c4ce814,565
8
- persidict/safe_str_tuple.py,sha256=71393904bdebfb213ad8429fed59e04da52964076c01324f2238821aa4339325,3717
9
- persidict/safe_str_tuple_signing.py,sha256=e6e0a3015651a8ea2ef8aa43670f84c5ba1cdfedefb9ac8932aaebb7c699d1c9,3742
10
- persidict/write_once_dict.py,sha256=76acc0dda9950caf43f32814f0d153e41f3bb260076167449fbf05b2c8fab3f8,6396
11
- persidict-0.32.2.dist-info/WHEEL,sha256=607c46fee47e440c91332c738096ff0f5e54ca3b0818ee85462dd5172a38e793,79
12
- persidict-0.32.2.dist-info/METADATA,sha256=54072df5d3f92135b1ff868e96bf0de7a406185f243090b3725c0b5d46acf736,9334
13
- persidict-0.32.2.dist-info/RECORD,,