persidict 0.32.3__py3-none-any.whl → 0.32.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of persidict might be problematic. Click here for more details.

persidict/.DS_Store ADDED
Binary file
@@ -103,17 +103,6 @@ class FileDirDict(PersiDict):
103
103
  self._base_dir = os.path.abspath(base_dir)
104
104
 
105
105
 
106
- def __repr__(self):
107
- """Return repr(self)."""
108
-
109
- repr_str = super().__repr__()
110
- repr_str = repr_str[:-1] + f", _base_dir={self._base_dir}"
111
- repr_str += f", file_type={self.file_type}"
112
- repr_str += " )"
113
-
114
- return repr_str
115
-
116
-
117
106
  def get_params(self):
118
107
  """Return configuration parameters of the dictionary.
119
108
 
@@ -150,23 +139,9 @@ class FileDirDict(PersiDict):
150
139
  def __len__(self) -> int:
151
140
  """ Get the number of key-value pairs in the dictionary."""
152
141
 
153
- num_files = 0
154
142
  suffix = "." + self.file_type
155
- stack = [self._base_dir]
156
-
157
- while stack:
158
- path = stack.pop()
159
- try:
160
- with os.scandir(path) as it:
161
- for entry in it:
162
- if entry.is_dir(follow_symlinks=False):
163
- stack.append(entry.path)
164
- elif entry.is_file(follow_symlinks=False) and entry.name.endswith(suffix):
165
- num_files += 1
166
- except PermissionError:
167
- continue
168
-
169
- return num_files
143
+ return sum(1 for _, _, files in os.walk(self._base_dir)
144
+ for f in files if f.endswith(suffix))
170
145
 
171
146
 
172
147
  def clear(self) -> None:
@@ -185,6 +160,7 @@ class FileDirDict(PersiDict):
185
160
  len(os.listdir(subdir_name)) == 0 ):
186
161
  os.rmdir(subdir_name)
187
162
 
163
+
188
164
  def _build_full_path(self
189
165
  , key:SafeStrTuple
190
166
  , create_subdirs:bool=False
@@ -393,24 +369,28 @@ class FileDirDict(PersiDict):
393
369
  os.remove(filename)
394
370
 
395
371
 
396
- def _generic_iter(self, iter_type: str):
372
+ def _generic_iter(self, result_type: set[str]):
397
373
  """Underlying implementation for .items()/.keys()/.values() iterators"""
398
- assert iter_type in {"keys", "values", "items"}
374
+ assert isinstance(result_type, set)
375
+ assert 1 <= len(result_type) <= 3
376
+ assert len(result_type | {"keys", "values", "timestamps"}) == 3
377
+ assert 1 <= len(result_type & {"keys", "values", "timestamps"}) <= 3
378
+
399
379
  walk_results = os.walk(self._base_dir)
400
380
  ext_len = len(self.file_type) + 1
401
381
 
402
382
  def splitter(dir_path: str):
403
383
  """Transform a dirname into a PersiDictKey key"""
404
- result = []
384
+ splitted_str = []
405
385
  if dir_path == ".":
406
- return result
386
+ return splitted_str
407
387
  while True:
408
388
  head, tail = os.path.split(dir_path)
409
- result = [tail] + result
389
+ splitted_str = [tail] + splitted_str
410
390
  dir_path = head
411
391
  if len(head) == 0:
412
392
  break
413
- return tuple(result)
393
+ return tuple(splitted_str)
414
394
 
415
395
  def step():
416
396
  suffix = "." + self.file_type
@@ -423,14 +403,26 @@ class FileDirDict(PersiDict):
423
403
  result_key = (*splitter(prefix_key), f[:-ext_len])
424
404
  result_key = SafeStrTuple(result_key)
425
405
 
426
- if iter_type == "keys":
427
- yield unsign_safe_str_tuple(
406
+ to_return = []
407
+
408
+ if "keys" in result_type:
409
+ key_to_return= unsign_safe_str_tuple(
428
410
  result_key, self.digest_len)
429
- elif iter_type == "values":
430
- yield self[result_key]
411
+ to_return.append(key_to_return)
412
+
413
+ if "values" in result_type:
414
+ full_path = os.path.join(dir_name, f)
415
+ value_to_return = self._read_from_file(full_path)
416
+ to_return.append(value_to_return)
417
+
418
+ if len(result_type) == 1:
419
+ yield to_return[0]
431
420
  else:
432
- yield (unsign_safe_str_tuple(
433
- result_key, self.digest_len), self[result_key])
421
+ if "timestamps" in result_type:
422
+ timestamp_to_return = os.path.getmtime(
423
+ os.path.join(dir_name, f))
424
+ to_return.append(timestamp_to_return)
425
+ yield tuple(to_return)
434
426
 
435
427
  return step()
436
428
 
persidict/persi_dict.py CHANGED
@@ -21,6 +21,7 @@ even after the Python process that created the dictionary has terminated.
21
21
  from __future__ import annotations
22
22
 
23
23
  from abc import abstractmethod
24
+ import heapq
24
25
  import random
25
26
  from parameterizable import ParameterizableClass, sort_dict_by_keys
26
27
  from typing import Any, Sequence, Optional
@@ -87,7 +88,7 @@ class PersiDict(MutableMapping, ParameterizableClass):
87
88
  , immutable_items:bool = False
88
89
  , digest_len:int = 8
89
90
  , base_class_for_values:Optional[type] = None
90
- , *args, **kwargas):
91
+ , *args, **kwargs):
91
92
  self.digest_len = int(digest_len)
92
93
  if digest_len < 0:
93
94
  raise ValueError("digest_len must be non-negative")
@@ -133,13 +134,9 @@ class PersiDict(MutableMapping, ParameterizableClass):
133
134
 
134
135
  def __repr__(self) -> str:
135
136
  """Return repr(self)"""
136
- repr_str = self.__class__.__name__ + "("
137
- repr_str += repr(dict(self.items()))
138
- repr_str += f", immutable_items={self.immutable_items}"
139
- repr_str += f", digest_len={self.digest_len}"
140
- repr_str += f", base_class_for_values={self.base_class_for_values}"
141
- repr_str += ")"
142
- return repr_str
137
+ params = self.get_params()
138
+ params_str = ', '.join(f'{k}={v!r}' for k, v in params.items())
139
+ return f'{self.__class__.__name__}({params_str})'
143
140
 
144
141
 
145
142
  def __str__(self) -> str:
@@ -185,30 +182,48 @@ class PersiDict(MutableMapping, ParameterizableClass):
185
182
 
186
183
 
187
184
  @abstractmethod
188
- def _generic_iter(self, iter_type: str):
189
- """Underlying implementation for .items()/.keys()/.values() iterators"""
190
- assert iter_type in {"keys", "values", "items"}
185
+ def _generic_iter(self, result_type: set[str]) -> Any:
186
+ """Underlying implementation for items/keys/values/... iterators"""
187
+ assert isinstance(result_type, set)
188
+ assert 1 <= len(result_type) <= 3
189
+ assert len(result_type | {"keys", "values", "timestamps"}) == 3
190
+ assert 1 <= len(result_type & {"keys", "values", "timestamps"}) <= 3
191
191
  raise NotImplementedError
192
192
 
193
193
 
194
194
  def __iter__(self):
195
195
  """Implement iter(self)."""
196
- return self._generic_iter("keys")
196
+ return self._generic_iter({"keys"})
197
197
 
198
198
 
199
199
  def keys(self):
200
- """D.keys() -> iterator object that provides access to D's keys"""
201
- return self._generic_iter("keys")
200
+ """iterator object that provides access to keys"""
201
+ return self._generic_iter({"keys"})
202
+
203
+
204
+ def keys_and_timestamps(self):
205
+ """iterator object that provides access to keys and timestamps"""
206
+ return self._generic_iter({"keys", "timestamps"})
202
207
 
203
208
 
204
209
  def values(self):
205
210
  """D.values() -> iterator object that provides access to D's values"""
206
- return self._generic_iter("values")
211
+ return self._generic_iter({"values"})
212
+
213
+
214
+ def values_and_timestamps(self):
215
+ """iterator object that provides access to values and timestamps"""
216
+ return self._generic_iter({"values", "timestamps"})
207
217
 
208
218
 
209
219
  def items(self):
210
220
  """D.items() -> iterator object that provides access to D's items"""
211
- return self._generic_iter("items")
221
+ return self._generic_iter({"keys", "values"})
222
+
223
+
224
+ def items_and_timestamps(self):
225
+ """iterator object that provides access to keys, values, and timestamps"""
226
+ return self._generic_iter({"keys", "values", "timestamps"})
212
227
 
213
228
 
214
229
  def setdefault(self, key:PersiDictKey, default:Any=None) -> Any:
@@ -350,39 +365,51 @@ class PersiDict(MutableMapping, ParameterizableClass):
350
365
 
351
366
  This method is absent in the original Python dict API.
352
367
  """
353
- all_keys = list(self.keys())
354
- # all_keys.sort(key=lambda k: self.timestamp(k))
355
- all_keys.sort(key=self.timestamp)
356
- if max_n is None or max_n > len(all_keys):
357
- max_n = len(all_keys)
358
- result = all_keys[:max_n]
359
- return result
368
+ if max_n is None:
369
+ # If we need all keys, sort them all by timestamp
370
+ key_timestamp_pairs = list(self.keys_and_timestamps())
371
+ key_timestamp_pairs.sort(key=lambda x: x[1])
372
+ return [key for key,_ in key_timestamp_pairs]
373
+ elif max_n <= 0:
374
+ return []
375
+ else:
376
+ # Use heapq.nsmallest for efficient partial sorting without loading all keys into memory
377
+ smallest_pairs = heapq.nsmallest(max_n
378
+ , self.keys_and_timestamps()
379
+ , key=lambda x: x[1])
380
+ return [key for key,_ in smallest_pairs]
360
381
 
361
382
 
362
- def newest_keys(self, max_n=None):
363
- """Return max_n the newest keys in the dictionary.
383
+ def oldest_values(self, max_n=None):
384
+ """Return max_n the oldest values in the dictionary.
364
385
 
365
- If max_n is None, return all keys.
386
+ If max_n is None, return all values.
366
387
 
367
388
  This method is absent in the original Python dict API.
368
389
  """
369
- all_keys = list(self.keys())
370
- # all_keys.sort(key=lambda k: self.timestamp(k), reverse=True)
371
- all_keys.sort(key=self.timestamp, reverse=True)
372
- if max_n is None or max_n > len(all_keys):
373
- max_n = len(all_keys)
374
- result = all_keys[:max_n]
375
- return result
390
+ return [self[k] for k in self.oldest_keys(max_n)]
376
391
 
377
392
 
378
- def oldest_values(self, max_n=None):
379
- """Return max_n the oldest values in the dictionary.
393
+ def newest_keys(self, max_n=None):
394
+ """Return max_n the newest keys in the dictionary.
380
395
 
381
- If max_n is None, return all values.
396
+ If max_n is None, return all keys.
382
397
 
383
398
  This method is absent in the original Python dict API.
384
399
  """
385
- return [self[k] for k in self.oldest_keys(max_n)]
400
+ if max_n is None:
401
+ # If we need all keys, sort them all by timestamp in reverse order
402
+ key_timestamp_pairs = list(self.keys_and_timestamps())
403
+ key_timestamp_pairs.sort(key=lambda x:x[1], reverse=True)
404
+ return [key for key,_ in key_timestamp_pairs]
405
+ elif max_n <= 0:
406
+ return []
407
+ else:
408
+ # Use heapq.nlargest for efficient partial sorting without loading all keys into memory
409
+ largest_pairs = heapq.nlargest(max_n
410
+ , self.keys_and_timestamps()
411
+ , key=lambda item: item[1])
412
+ return [key for key,_ in largest_pairs]
386
413
 
387
414
 
388
415
  def newest_values(self, max_n=None):
persidict/s3_dict.py CHANGED
@@ -96,20 +96,6 @@ class S3Dict(PersiDict):
96
96
  self.root_prefix += "/"
97
97
 
98
98
 
99
- def __repr__(self) -> str:
100
- """Return repr(self)."""
101
-
102
- repr_str = super().__repr__()
103
- repr_str = repr_str[:-1] + f", _base_dir={self.local_cache._base_dir}"
104
- repr_str += f", file_type={self.file_type}"
105
- repr_str += f", region={self.region}"
106
- repr_str += f", bucket_name={self.bucket_name}"
107
- repr_str += f", root_prefix={self.root_prefix}"
108
- repr_str += " )"
109
-
110
- return repr_str
111
-
112
-
113
99
  def get_params(self):
114
100
  """Return configuration parameters of the object as a dictionary.
115
101
 
@@ -269,9 +255,14 @@ class S3Dict(PersiDict):
269
255
  return num_files
270
256
 
271
257
 
272
- def _generic_iter(self, iter_type: str):
258
+ def _generic_iter(self, result_type: str):
273
259
  """Underlying implementation for .items()/.keys()/.values() iterators"""
274
- assert iter_type in {"keys", "values", "items"}
260
+
261
+ assert isinstance(result_type, set)
262
+ assert 1 <= len(result_type) <= 3
263
+ assert len(result_type | {"keys", "values", "timestamps"}) == 3
264
+ assert 1 <= len(result_type & {"keys", "values", "timestamps"}) <= 3
265
+
275
266
  suffix = "." + self.file_type
276
267
  ext_len = len(self.file_type) + 1
277
268
  prefix_len = len(self.root_prefix)
@@ -295,14 +286,25 @@ class S3Dict(PersiDict):
295
286
  if not obj_name.endswith(suffix):
296
287
  continue
297
288
  obj_key = splitter(obj_name)
298
- if iter_type == "keys":
299
- yield unsign_safe_str_tuple(
289
+
290
+ to_return = []
291
+
292
+ if "keys" in result_type:
293
+ key_to_return = unsign_safe_str_tuple(
300
294
  obj_key, self.digest_len)
301
- elif iter_type == "values":
302
- yield self[obj_key]
295
+ to_return.append(key_to_return)
296
+
297
+ if "values" in result_type:
298
+ value_to_return = self[obj_key]
299
+ to_return.append(value_to_return)
300
+
301
+ if len(result_type) == 1:
302
+ yield to_return[0]
303
303
  else:
304
- yield (unsign_safe_str_tuple(
305
- obj_key, self.digest_len), self[obj_key])
304
+ if "timestamps" in result_type:
305
+ timestamp_to_return = key["LastModified"].timestamp()
306
+ to_return.append(timestamp_to_return)
307
+ yield tuple(to_return)
306
308
 
307
309
  return step()
308
310
 
persidict/safe_chars.py CHANGED
@@ -1,12 +1,11 @@
1
1
  import string
2
- from copy import deepcopy
3
2
 
4
3
  SAFE_CHARS_SET = set(string.ascii_letters + string.digits + "()_-~.=")
5
4
  SAFE_STRING_MAX_LENGTH = 254
6
5
 
7
6
  def get_safe_chars() -> set[str]:
8
7
  """Return a set of allowed characters."""
9
- return deepcopy(SAFE_CHARS_SET)
8
+ return SAFE_CHARS_SET.copy()
10
9
 
11
10
  def replace_unsafe_chars(a_str:str, replace_with:str) -> str :
12
11
  """ Replace unsafe (special) characters with allowed (safe) ones."""
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import time
4
+ from functools import cache
4
5
 
5
6
  from deepdiff import DeepDiff
6
7
  from parameterizable import register_parameterizable_class, sort_dict_by_keys
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: persidict
3
- Version: 0.32.3
3
+ Version: 0.32.8
4
4
  Summary: Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud.
5
5
  Keywords: persistence,dicts,distributed,parallel
6
6
  Author: Vlad (Volodymyr) Pavlov
@@ -0,0 +1,14 @@
1
+ persidict/.DS_Store,sha256=1lFlJ5EFymdzGAUAaI30vcaaLHt3F1LwpG7xILf9jsM,6148
2
+ persidict/__init__.py,sha256=CDOSJGgCnyRTkGUTzaeg3Cqsxwx0-0EFieOtldXwAls,1380
3
+ persidict/file_dir_dict.py,sha256=SijMlIbte9kNbEyryjK4-cXakQl35gCAo1HaO1WcaU4,17589
4
+ persidict/jokers.py,sha256=kX4bE-jKWTM2ki7JOmm_2uJS8zm8u6InZ_V12xo2ImI,1436
5
+ persidict/overlapping_multi_dict.py,sha256=a-lUbmY15_HrDq6jSIt8F8tJboqbeYiuRQeW4elf_oU,2663
6
+ persidict/persi_dict.py,sha256=SF6aWs6kCeeW-bZ9HJwx0sPX7Xav_aURqeSZ-j5quv0,14266
7
+ persidict/s3_dict.py,sha256=awEIu7ehh4FgKircC10lcHp86xvUctXA_6jxUmC3Hy8,12480
8
+ persidict/safe_chars.py,sha256=HjK1MwROYy_U9ui-rhg1i3nGkj52K4OFWD-wCCcnJ7Y,536
9
+ persidict/safe_str_tuple.py,sha256=cTk5BL3r-yE62EKf7VngTaUpZAdsATJPIjiCGqQzkyU,3717
10
+ persidict/safe_str_tuple_signing.py,sha256=5uCjAVZRqOou-KpDZw-Exboc3-3vuayJMqrrt8aZ0ck,3742
11
+ persidict/write_once_dict.py,sha256=NBzaw38zxWVbvCj8OR4T-7w6K41qNrr0gpyr23CRcNQ,6424
12
+ persidict-0.32.8.dist-info/WHEEL,sha256=Jb20R3Ili4n9P1fcwuLup21eQ5r9WXhs4_qy7VTrgPI,79
13
+ persidict-0.32.8.dist-info/METADATA,sha256=Bp5vJrqVsfXCNle3grOIArHx-l931DmhJ9NUCSs-uMY,9334
14
+ persidict-0.32.8.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: uv 0.8.13
2
+ Generator: uv 0.8.15
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,13 +0,0 @@
1
- persidict/__init__.py,sha256=CDOSJGgCnyRTkGUTzaeg3Cqsxwx0-0EFieOtldXwAls,1380
2
- persidict/file_dir_dict.py,sha256=DGULotXBerZepd1SR1RPaMd-MCewIodrGOoZreUdeKY,17527
3
- persidict/jokers.py,sha256=kX4bE-jKWTM2ki7JOmm_2uJS8zm8u6InZ_V12xo2ImI,1436
4
- persidict/overlapping_multi_dict.py,sha256=a-lUbmY15_HrDq6jSIt8F8tJboqbeYiuRQeW4elf_oU,2663
5
- persidict/persi_dict.py,sha256=5PliQ331DgQi1YBXqyNEaAKpFtRq_WPmnJ_pGW8S9-g,12900
6
- persidict/s3_dict.py,sha256=OCCVawtmCJM79K4xTvmbn6U2oZt49JZ-1MBoMx7o9m0,12348
7
- persidict/safe_chars.py,sha256=WaIOliBdLlZ12CepEa1C3b1VPxvX4s2hvnZanCxM6BQ,565
8
- persidict/safe_str_tuple.py,sha256=cTk5BL3r-yE62EKf7VngTaUpZAdsATJPIjiCGqQzkyU,3717
9
- persidict/safe_str_tuple_signing.py,sha256=5uCjAVZRqOou-KpDZw-Exboc3-3vuayJMqrrt8aZ0ck,3742
10
- persidict/write_once_dict.py,sha256=dqzA3amVDK9D8ygU8NFT5B87smAHYWdEn78Fssj6s_g,6396
11
- persidict-0.32.3.dist-info/WHEEL,sha256=4n27za1eEkOnA7dNjN6C5-O2rUiw6iapszm14Uj-Qmk,79
12
- persidict-0.32.3.dist-info/METADATA,sha256=5JSZ2ZGCbIndTyyIh0265V9A6S0hEMpm0Yn5erhzHuk,9334
13
- persidict-0.32.3.dist-info/RECORD,,