persidict 0.31.1__py3-none-any.whl → 0.32.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of persidict might be problematic. Click here for more details.

@@ -152,11 +152,20 @@ class FileDirDict(PersiDict):
152
152
 
153
153
  num_files = 0
154
154
  suffix = "." + self.file_type
155
- for subdir_info in os.walk(self._base_dir):
156
- files = subdir_info[2]
157
- files = [f_name for f_name in files
158
- if f_name.endswith(suffix)]
159
- num_files += len(files)
155
+ stack = [self._base_dir]
156
+
157
+ while stack:
158
+ path = stack.pop()
159
+ try:
160
+ with os.scandir(path) as it:
161
+ for entry in it:
162
+ if entry.is_dir(follow_symlinks=False):
163
+ stack.append(entry.path)
164
+ elif entry.is_file(follow_symlinks=False) and entry.name.endswith(suffix):
165
+ num_files += 1
166
+ except PermissionError:
167
+ continue
168
+
160
169
  return num_files
161
170
 
162
171
 
@@ -203,7 +212,39 @@ class FileDirDict(PersiDict):
203
212
  file_name = key[-1] + "." + self.file_type
204
213
  return os.path.join(*dir_names, file_name)
205
214
  else:
206
- return os.path.join(*dir_names)
215
+ return str(os.path.join(*dir_names))
216
+
217
+
218
+ def _build_key_from_full_path(self, full_path:str)->SafeStrTuple:
219
+ """Convert a filesystem path back into a key."""
220
+
221
+ # Ensure we're working with absolute paths
222
+ full_path = os.path.abspath(full_path)
223
+
224
+ # Remove the base directory from the path
225
+ if not full_path.startswith(self._base_dir):
226
+ raise ValueError(f"Path {full_path} is not within base directory {self._base_dir}")
227
+
228
+ # Get the relative path
229
+ rel_path = full_path[len(self._base_dir):].lstrip(os.sep)
230
+
231
+ if not rel_path:
232
+ return SafeStrTuple()
233
+
234
+ # Split the path into components
235
+ path_components = rel_path.split(os.sep)
236
+
237
+ # If it's a file path, remove the file extension from the last component
238
+ if os.path.isfile(full_path) and path_components[-1].endswith("." + self.file_type):
239
+ path_components[-1] = path_components[-1][:-len("." + self.file_type)]
240
+
241
+ # Create a SafeStrTuple from the path components
242
+ key = SafeStrTuple(*path_components)
243
+
244
+ # Unsign the key
245
+ key = unsign_safe_str_tuple(key, self.digest_len)
246
+
247
+ return key
207
248
 
208
249
 
209
250
  def get_subdict(self, key:PersiDictKey) -> FileDirDict:
@@ -404,4 +445,46 @@ class FileDirDict(PersiDict):
404
445
  return os.path.getmtime(filename)
405
446
 
406
447
 
407
- parameterizable.register_parameterizable_class(FileDirDict)
448
+ def random_key(self) -> PersiDictKey | None:
449
+ # canonicalise extension once
450
+ early_exit_cap = 10_000
451
+ ext = None
452
+ if self.file_type:
453
+ ext = self.file_type.lower()
454
+ if not ext.startswith("."):
455
+ ext = "." + ext
456
+
457
+ stack = [self._base_dir]
458
+ winner: Optional[str] = None
459
+ seen = 0
460
+
461
+ while stack:
462
+ path = stack.pop()
463
+ try:
464
+ with os.scandir(path) as it:
465
+ for ent in it:
466
+ if ent.is_dir(follow_symlinks=False):
467
+ stack.append(ent.path)
468
+ continue
469
+
470
+ # cheap name test before stat()
471
+ if ext and not ent.name.lower().endswith(ext):
472
+ continue
473
+
474
+ if ent.is_file(follow_symlinks=False):
475
+ seen += 1
476
+ if random.random() < 1 / seen: # reservoir k=1
477
+ winner = ent.path
478
+ # early‑exit when cap reached
479
+ if early_exit_cap and seen >= early_exit_cap:
480
+ return self._build_key_from_full_path(os.path.abspath(winner))
481
+ except PermissionError:
482
+ continue
483
+
484
+ if winner is None:
485
+ return None
486
+ else:
487
+ return self._build_key_from_full_path(os.path.abspath(winner))
488
+
489
+
490
+ parameterizable.register_parameterizable_class(FileDirDict)
persidict/persi_dict.py CHANGED
@@ -302,15 +302,33 @@ class PersiDict(MutableMapping, ParameterizableClass):
302
302
  return result_subdicts
303
303
 
304
304
 
305
- def random_keys(self, max_n:int):
306
- """Return a list of random keys from the dictionary.
305
+ def random_key(self) -> PersiDictKey | None:
306
+ """Return a random key from the dictionary.
307
+
308
+ Returns a single random key if the dictionary is not empty.
309
+ Returns None if the dictionary is empty.
307
310
 
308
311
  This method is absent in the original Python dict API.
312
+
313
+ Implementation uses reservoir sampling to select a uniformly random key
314
+ in streaming time, without loading all keys into memory or using len().
309
315
  """
310
- all_keys = list(self.keys())
311
- if max_n > len(all_keys):
312
- max_n = len(all_keys)
313
- result = random.sample(all_keys, max_n)
316
+ iterator = iter(self.keys())
317
+ try:
318
+ # Get the first key
319
+ result = next(iterator)
320
+ except StopIteration:
321
+ # Dictionary is empty
322
+ return None
323
+
324
+ # Reservoir sampling algorithm
325
+ i = 2
326
+ for key in iterator:
327
+ # Select current key with probability 1/i
328
+ if random.random() < 1/i:
329
+ result = key
330
+ i += 1
331
+
314
332
  return result
315
333
 
316
334
 
@@ -372,4 +390,4 @@ class PersiDict(MutableMapping, ParameterizableClass):
372
390
 
373
391
  This method is absent in the original Python dict API.
374
392
  """
375
- return [self[k] for k in self.newest_keys(max_n)]
393
+ return [self[k] for k in self.newest_keys(max_n)]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: persidict
3
- Version: 0.31.1
3
+ Version: 0.32.1
4
4
  Summary: Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud.
5
5
  Keywords: persistence,dicts,distributed,parallel
6
6
  Author: Vlad (Volodymyr) Pavlov
@@ -15,12 +15,14 @@ Classifier: License :: OSI Approved :: MIT License
15
15
  Classifier: Operating System :: OS Independent
16
16
  Classifier: Topic :: Software Development :: Libraries
17
17
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
18
+ Requires-Dist: parameterizable
18
19
  Requires-Dist: lz4
19
20
  Requires-Dist: joblib
20
21
  Requires-Dist: numpy
21
22
  Requires-Dist: pandas
22
23
  Requires-Dist: jsonpickle
23
- Requires-Dist: parameterizable
24
+ Requires-Dist: joblib
25
+ Requires-Dist: deepdiff
24
26
  Requires-Dist: boto3 ; extra == 'aws'
25
27
  Requires-Dist: boto3 ; extra == 'dev'
26
28
  Requires-Dist: moto ; extra == 'dev'
@@ -37,7 +39,7 @@ Simple persistent dictionaries for Python.
37
39
 
38
40
  ## What Is It?
39
41
 
40
- `persidict` offers a simple persistent key-value store for Python.
42
+ `persidict` offers a very simple persistent key-value store for Python.
41
43
  It saves the content of the dictionary in a folder on a disk
42
44
  or in an S3 bucket on AWS. Each value is stored as a separate file / S3 object.
43
45
  Only text strings or sequences of strings are allowed as keys.
@@ -1,14 +1,13 @@
1
- persidict/.DS_Store,sha256=d65165279105ca6773180500688df4bdc69a2c7b771752f0a46ef120b7fd8ec3,6148
2
1
  persidict/__init__.py,sha256=0833922468029f2453906513cda7a0dc2aacc70c74fb410589e3ad95d5f0025b,1380
3
- persidict/file_dir_dict.py,sha256=9587105acda4d148e25562510703573f21cff343a4757b6983f41ed4a5e9f0fb,14600
2
+ persidict/file_dir_dict.py,sha256=0c650ba2d5c17ab65ea5dd5247544f68c77e3027b022876b18ea19ade51d78a6,17527
4
3
  persidict/jokers.py,sha256=917e1b13e8ca593336922ec93a69bfdae252f339bcbba22767f575db1a362262,1436
5
4
  persidict/overlapping_multi_dict.py,sha256=6be9546e6635e7f1eb0eaea3488b7c17cb496e8a9b7988ae450796e1e95ffe85,2663
6
- persidict/persi_dict.py,sha256=ea04f5ae68544a18071b9ac95a1a9bb71cf7b04dcfa25d61c0a92d4c2375d4d4,12223
5
+ persidict/persi_dict.py,sha256=f9c290b7574312f2b536848f9d0fb6bd960c5fbea7f7615407ac37590441c5d5,12782
7
6
  persidict/s3_dict.py,sha256=0d8942037cee409d8bda8f63f923a8385f4f9f1fac03e765ec5f5fc675334ae2,12292
8
7
  persidict/safe_chars.py,sha256=59a20e96205d2e5675d827a911ad42ddbd553f1bd7e2cda1be765a9c2c4ce814,565
9
8
  persidict/safe_str_tuple.py,sha256=71393904bdebfb213ad8429fed59e04da52964076c01324f2238821aa4339325,3717
10
9
  persidict/safe_str_tuple_signing.py,sha256=e6e0a3015651a8ea2ef8aa43670f84c5ba1cdfedefb9ac8932aaebb7c699d1c9,3742
11
10
  persidict/write_once_dict.py,sha256=76acc0dda9950caf43f32814f0d153e41f3bb260076167449fbf05b2c8fab3f8,6396
12
- persidict-0.31.1.dist-info/WHEEL,sha256=c133ef911c90b05f7e14d8679ba99146f9154fcd271b7398cf8f672283b94e05,79
13
- persidict-0.31.1.dist-info/METADATA,sha256=ff478f3098ee316b0efd01678494992d84acc354fe7e5cf081686dd33c1b6a55,9283
14
- persidict-0.31.1.dist-info/RECORD,,
11
+ persidict-0.32.1.dist-info/WHEEL,sha256=7de84e261f5edc1201bd668234a371ec3eb94db037d1f75c0139430c1319ab31,79
12
+ persidict-0.32.1.dist-info/METADATA,sha256=8ba566bc5722b8e061a45d62344f2b612c9b4507d7a087351f966c8d782695dc,9334
13
+ persidict-0.32.1.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: uv 0.7.13
2
+ Generator: uv 0.7.16
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
persidict/.DS_Store DELETED
Binary file