persidict 0.34.1__tar.gz → 0.34.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of persidict might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: persidict
3
- Version: 0.34.1
3
+ Version: 0.34.3
4
4
  Summary: Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud.
5
5
  Keywords: persistence,dicts,distributed,parallel
6
6
  Author: Vlad (Volodymyr) Pavlov
@@ -158,10 +158,9 @@ that simultaneously work with the same instance of a dictionary.
158
158
  * Insertion order is not preserved.
159
159
  * You cannot assign initial key-value pairs to a dictionary in its constructor.
160
160
  * `PersiDict` API has additional methods `delete_if_exists()`, `timestamp()`,
161
- `get_subdict()`, `subdicts()`, `random_keys()`, `newest_keys()`,
162
- `oldest_keys()`, `newest_values()`, `oldest_values()`,
163
- `get_params()`, `get_metaparams()`, and `get_default_metaparams()`,
164
- which are not available in native Python dicts.
161
+ `get_subdict()`, `subdicts()`, `random_key()`, `newest_keys()`,
162
+ `oldest_keys()`, `newest_values()`, `oldest_values()`, and
163
+ `get_params()`, which are not available in native Python dicts.
165
164
  * You can use KEEP_CURRENT constant as a fake new value
166
165
  to avoid actually setting/updating a value. Or DELETE_CURRENT as
167
166
  a fake new value to delete the previous value from a dictionary.
@@ -124,10 +124,9 @@ that simultaneously work with the same instance of a dictionary.
124
124
  * Insertion order is not preserved.
125
125
  * You cannot assign initial key-value pairs to a dictionary in its constructor.
126
126
  * `PersiDict` API has additional methods `delete_if_exists()`, `timestamp()`,
127
- `get_subdict()`, `subdicts()`, `random_keys()`, `newest_keys()`,
128
- `oldest_keys()`, `newest_values()`, `oldest_values()`,
129
- `get_params()`, `get_metaparams()`, and `get_default_metaparams()`,
130
- which are not available in native Python dicts.
127
+ `get_subdict()`, `subdicts()`, `random_key()`, `newest_keys()`,
128
+ `oldest_keys()`, `newest_values()`, `oldest_values()`, and
129
+ `get_params()`, which are not available in native Python dicts.
131
130
  * You can use KEEP_CURRENT constant as a fake new value
132
131
  to avoid actually setting/updating a value. Or DELETE_CURRENT as
133
132
  a fake new value to delete the previous value from a dictionary.
@@ -4,7 +4,7 @@ build-backend = "uv_build"
4
4
 
5
5
  [project]
6
6
  name = "persidict"
7
- version = "0.34.1"
7
+ version = "0.34.3"
8
8
  description = "Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -1,11 +1,11 @@
1
- """ Persistent dictionaries that store key-value pairs on local disks.
2
-
3
- This functionality is implemented by the class FileDirDict
4
- (inherited from PersiDict): a dictionary that
5
- stores key-value pairs as files on a local hard-drive.
6
- A key is used to compose a filename, while a value is stored in the file
7
- as a binary, or as a json object, or as a plain text
8
- (depends on configuration parameters).
1
+ """Persistent dictionary implementation backed by local files.
2
+
3
+ FileDirDict stores each key-value pair in a separate file under a base
4
+ directory. Keys determine directory structure and filename; values are
5
+ serialized depending on ``file_type``.
6
+
7
+ - file_type="pkl" or "json": arbitrary Python objects via pickle/jsonpickle.
8
+ - any other value: strings are stored as plain text.
9
9
  """
10
10
  from __future__ import annotations
11
11
 
@@ -56,22 +56,27 @@ class FileDirDict(PersiDict):
56
56
  , immutable_items:bool = False
57
57
  , digest_len:int = 8
58
58
  , base_class_for_values: Optional[type] = None):
59
- """A constructor defines location of the store and file format to use.
60
-
61
- _base_dir is a directory that will contain all the files in
62
- the FileDirDict. If the directory does not exist, it will be created.
63
-
64
- base_class_for_values constraints the type of values that can be
65
- stored in the dictionary. If specified, it will be used to
66
- check types of values in the dictionary. If not specified,
67
- no type checking will be performed and all types will be allowed.
68
-
69
- file_type is extension, which will be used for all files in the dictionary.
70
- If file_type has one of two values: "pkl" or "json", it defines
71
- which file format will be used by FileDirDict to store values.
72
- For all other values of file_type, the file format will always be plain
73
- text. "pkl" and "json" allow to store arbitrary Python objects,
74
- while all other file_type-s only work with str objects.
59
+ """Initialize a filesystem-backed persistent dictionary.
60
+
61
+ Args:
62
+ base_dir (str): Base directory where all files are stored. Created
63
+ if it does not exist.
64
+ file_type (str): File extension/format to use for stored values.
65
+ - "pkl" or "json": arbitrary Python objects are supported.
66
+ - any other value: only strings are supported and stored as text.
67
+ immutable_items (bool): If True, existing items cannot be modified
68
+ or deleted.
69
+ digest_len (int): Length of a hash suffix appended to each key path
70
+ element to avoid case-insensitive collisions. Use 0 to disable.
71
+ base_class_for_values (Optional[type]): Optional base class that all
72
+ stored values must be instances of. If provided and not ``str``,
73
+ then file_type must be either "pkl" or "json".
74
+
75
+ Raises:
76
+ ValueError: If base_dir points to a file; if file_type is "__etag__";
77
+ or if configuration is inconsistent (e.g., non-str values with
78
+ unsupported file_type).
79
+ AssertionError: If file_type contains unsafe characters.
75
80
  """
76
81
 
77
82
  super().__init__(immutable_items = immutable_items
@@ -105,8 +110,12 @@ class FileDirDict(PersiDict):
105
110
  def get_params(self):
106
111
  """Return configuration parameters of the dictionary.
107
112
 
108
- This method is needed to support Parameterizable API.
109
- The method is absent in the original dict API.
113
+ This method is needed to support the Parameterizable API and is absent
114
+ in the standard dict API.
115
+
116
+ Returns:
117
+ dict: A mapping of parameter names to values including base_dir and
118
+ file_type merged with the base PersiDict parameters.
110
119
  """
111
120
  params = PersiDict.get_params(self)
112
121
  additional_params = dict(
@@ -122,6 +131,9 @@ class FileDirDict(PersiDict):
122
131
  """Return dictionary's URL.
123
132
 
124
133
  This property is absent in the original dict API.
134
+
135
+ Returns:
136
+ str: URL of the underlying storage in the form "file://<abs_path>".
125
137
  """
126
138
  return f"file://{self._base_dir}"
127
139
 
@@ -131,16 +143,25 @@ class FileDirDict(PersiDict):
131
143
  """Return dictionary's base directory.
132
144
 
133
145
  This property is absent in the original dict API.
146
+
147
+ Returns:
148
+ str: Absolute path to the base directory used by this dictionary.
134
149
  """
135
150
  return self._base_dir
136
151
 
137
152
 
138
153
  def __len__(self) -> int:
139
- """ Get the number of key-value pairs in the dictionary.
154
+ """Return the number of key-value pairs in the dictionary.
155
+
156
+ This performs a recursive traversal of the base directory.
140
157
 
141
- WARNING: This operation can be slow on large dictionaries as it
142
- needs to recursively walk the entire base directory.
143
- Avoid using it in performance-sensitive code.
158
+ Returns:
159
+ int: Count of stored items.
160
+
161
+ Note:
162
+ This operation can be slow on large dictionaries as it walks the
163
+ entire directory tree. Avoid using it in performance-sensitive
164
+ code paths.
144
165
  """
145
166
 
146
167
  suffix = "." + self.file_type
@@ -149,7 +170,11 @@ class FileDirDict(PersiDict):
149
170
 
150
171
 
151
172
  def clear(self) -> None:
152
- """ Remove all elements from the dictionary."""
173
+ """Remove all elements from the dictionary.
174
+
175
+ Raises:
176
+ KeyError: If immutable_items is True.
177
+ """
153
178
 
154
179
  if self.immutable_items:
155
180
  raise KeyError("Can't clear a dict that contains immutable items")
@@ -172,7 +197,26 @@ class FileDirDict(PersiDict):
172
197
  , key:SafeStrTuple
173
198
  , create_subdirs:bool=False
174
199
  , is_file_path:bool=True) -> str:
175
- """Convert a key into a filesystem path."""
200
+ """Convert a key into an absolute filesystem path.
201
+
202
+ Transforms a SafeStrTuple into either a directory path or a file path
203
+ inside this dictionary's base directory. When is_file_path is True, the
204
+ final component is treated as a filename with the configured file_type
205
+ extension. When create_subdirs is True, missing intermediate directories
206
+ are created.
207
+
208
+ Args:
209
+ key (SafeStrTuple): The key to convert. It will be temporarily
210
+ signed according to digest_len to produce collision-safe names.
211
+ create_subdirs (bool): If True, create any missing intermediate
212
+ directories.
213
+ is_file_path (bool): If True, return a file path ending with
214
+ ".{file_type}"; otherwise return just the directory path for
215
+ the key prefix.
216
+
217
+ Returns:
218
+ str: An absolute path within base_dir corresponding to the key.
219
+ """
176
220
 
177
221
  key = sign_safe_str_tuple(key, self.digest_len)
178
222
  key = [self._base_dir] + list(key.strings)
@@ -190,7 +234,22 @@ class FileDirDict(PersiDict):
190
234
 
191
235
 
192
236
  def _build_key_from_full_path(self, full_path:str)->SafeStrTuple:
193
- """Convert a filesystem path back into a key."""
237
+ """Convert an absolute filesystem path back into a SafeStrTuple key.
238
+
239
+ This function reverses _build_full_path, stripping base_dir, removing the
240
+ file_type extension if the path points to a file, and unsigning the key
241
+ components according to digest_len.
242
+
243
+ Args:
244
+ full_path (str): Absolute path within the dictionary's base
245
+ directory.
246
+
247
+ Returns:
248
+ SafeStrTuple: The reconstructed (unsigned) key.
249
+
250
+ Raises:
251
+ ValueError: If full_path is not located under base_dir.
252
+ """
194
253
 
195
254
  # Ensure we're working with absolute paths
196
255
  full_path = os.path.abspath(full_path)
@@ -225,8 +284,15 @@ class FileDirDict(PersiDict):
225
284
  """Get a subdictionary containing items with the same prefix key.
226
285
 
227
286
  For non-existing prefix key, an empty sub-dictionary is returned.
228
-
229
287
  This method is absent in the original dict API.
288
+
289
+ Args:
290
+ key (PersiDictKey): Prefix key (string or sequence of strings) that
291
+ identifies the subdirectory.
292
+
293
+ Returns:
294
+ FileDirDict: A new FileDirDict instance rooted at the specified
295
+ subdirectory, sharing the same parameters as this dictionary.
230
296
  """
231
297
  key = SafeStrTuple(key)
232
298
  full_dir_path = self._build_full_path(
@@ -240,7 +306,14 @@ class FileDirDict(PersiDict):
240
306
 
241
307
 
242
308
  def _read_from_file_impl(self, file_name:str) -> Any:
243
- """Read a value from a file. """
309
+ """Read a value from a single file without retries.
310
+
311
+ Args:
312
+ file_name (str): Absolute path to the file to read.
313
+
314
+ Returns:
315
+ Any: The deserialized value according to file_type.
316
+ """
244
317
 
245
318
  if self.file_type == "pkl":
246
319
  with open(file_name, 'rb') as f:
@@ -255,7 +328,22 @@ class FileDirDict(PersiDict):
255
328
 
256
329
 
257
330
  def _read_from_file(self,file_name:str) -> Any:
258
- """Read a value from a file. """
331
+ """Read a value from a file with retry/backoff for concurrency.
332
+
333
+ Validates that the configured file_type is compatible with the allowed
334
+ value types, then attempts to read the file using an exponential backoff
335
+ to better tolerate concurrent writers.
336
+
337
+ Args:
338
+ file_name (str): Absolute path of the file to read.
339
+
340
+ Returns:
341
+ Any: The deserialized value according to file_type.
342
+
343
+ Raises:
344
+ ValueError: If file_type is incompatible with non-string values.
345
+ Exception: Propagates the last exception if all retries fail.
346
+ """
259
347
 
260
348
  if not (self.file_type in {"pkl", "json"} or issubclass(
261
349
  self.base_class_for_values, str)):
@@ -275,7 +363,15 @@ class FileDirDict(PersiDict):
275
363
 
276
364
 
277
365
  def _save_to_file_impl(self, file_name:str, value:Any) -> None:
278
- """Save a value to a file. """
366
+ """Write a single value to a file atomically (no retries).
367
+
368
+ Uses a temporary file and atomic rename to avoid partial writes and to
369
+ reduce the chance of readers observing corrupted data.
370
+
371
+ Args:
372
+ file_name (str): Absolute destination file path.
373
+ value (Any): Value to serialize and save.
374
+ """
279
375
 
280
376
  dir_name = os.path.dirname(file_name)
281
377
  # Use a temporary file and atomic rename to prevent data corruption
@@ -313,7 +409,20 @@ class FileDirDict(PersiDict):
313
409
  raise
314
410
 
315
411
  def _save_to_file(self, file_name:str, value:Any) -> None:
316
- """Save a value to a file. """
412
+ """Save a value to a file with retry/backoff.
413
+
414
+ Ensures the configured file_type is compatible with value types and then
415
+ writes the value using an exponential backoff to better tolerate
416
+ concurrent readers/writers.
417
+
418
+ Args:
419
+ file_name (str): Absolute destination file path.
420
+ value (Any): Value to serialize and save.
421
+
422
+ Raises:
423
+ ValueError: If file_type is incompatible with non-string values.
424
+ Exception: Propagates the last exception if all retries fail.
425
+ """
317
426
 
318
427
  if not (self.file_type in {"pkl", "json"} or issubclass(
319
428
  self.base_class_for_values, str)):
@@ -334,14 +443,36 @@ class FileDirDict(PersiDict):
334
443
 
335
444
 
336
445
  def __contains__(self, key:PersiDictKey) -> bool:
337
- """True if the dictionary has the specified key, else False. """
446
+ """Check whether a key exists in the dictionary.
447
+
448
+ Args:
449
+ key (PersiDictKey): Key (string or sequence of strings) or SafeStrTuple.
450
+
451
+ Returns:
452
+ bool: True if a file for the key exists; False otherwise.
453
+ """
338
454
  key = SafeStrTuple(key)
339
455
  filename = self._build_full_path(key)
340
456
  return os.path.isfile(filename)
341
457
 
342
458
 
343
459
  def __getitem__(self, key:PersiDictKey) -> Any:
344
- """ Implementation for x[y] syntax. """
460
+ """Retrieve the value stored for a key.
461
+
462
+ Equivalent to obj[key]. Reads the corresponding file from the disk and
463
+ deserializes according to file_type.
464
+
465
+ Args:
466
+ key (PersiDictKey): Key (string or sequence of strings) or SafeStrTuple.
467
+
468
+ Returns:
469
+ Any: The stored value.
470
+
471
+ Raises:
472
+ KeyError: If the file for the key does not exist.
473
+ TypeError: If the deserialized value does not match base_class_for_values
474
+ when it is set.
475
+ """
345
476
  key = SafeStrTuple(key)
346
477
  filename = self._build_full_path(key)
347
478
  if not os.path.isfile(filename):
@@ -356,7 +487,22 @@ class FileDirDict(PersiDict):
356
487
 
357
488
 
358
489
  def __setitem__(self, key:PersiDictKey, value:Any):
359
- """Set self[key] to value."""
490
+ """Store a value for a key on the disk.
491
+
492
+ Interprets joker values KEEP_CURRENT and DELETE_CURRENT accordingly.
493
+ Validates value type if base_class_for_values is set, then serializes
494
+ and writes to a file determined by the key and file_type.
495
+
496
+ Args:
497
+ key (PersiDictKey): Key (string or sequence of strings) or SafeStrTuple.
498
+ value (Any): Value to store, or a joker command.
499
+
500
+ Raises:
501
+ KeyError: If attempting to modify an existing item when
502
+ immutable_items is True.
503
+ TypeError: If the value is a PersiDict or does not match
504
+ base_class_for_values when it is set.
505
+ """
360
506
 
361
507
  if value is KEEP_CURRENT:
362
508
  return
@@ -384,7 +530,14 @@ class FileDirDict(PersiDict):
384
530
 
385
531
 
386
532
  def __delitem__(self, key:PersiDictKey) -> None:
387
- """Delete self[key]."""
533
+ """Delete the stored value for a key.
534
+
535
+ Args:
536
+ key (PersiDictKey): Key (string or sequence of strings) or SafeStrTuple.
537
+
538
+ Raises:
539
+ KeyError: If immutable_items is True or if the key does not exist.
540
+ """
388
541
  key = SafeStrTuple(key)
389
542
  assert not self.immutable_items, "Can't delete immutable items"
390
543
  filename = self._build_full_path(key)
@@ -394,7 +547,23 @@ class FileDirDict(PersiDict):
394
547
 
395
548
 
396
549
  def _generic_iter(self, result_type: set[str]):
397
- """Underlying implementation for .items()/.keys()/.values() iterators"""
550
+ """Underlying implementation for .items()/.keys()/.values() iterators.
551
+
552
+ Produces generators over keys, values, and/or timestamps by traversing
553
+ the directory tree under base_dir. Keys are converted back from paths by
554
+ removing the file extension and unsigning according to digest_len.
555
+
556
+ Args:
557
+ result_type (set[str]): Any non-empty subset of {"keys", "values",
558
+ "timestamps"} specifying which fields to yield.
559
+
560
+ Returns:
561
+ Iterator: A generator yielding:
562
+ - SafeStrTuple if result_type == {"keys"}
563
+ - Any if result_type == {"values"}
564
+ - tuple[SafeStrTuple, Any] if result_type == {"keys", "values"}
565
+ - tuple[..., float] including POSIX timestamp if "timestamps" is requested.
566
+ """
398
567
  assert isinstance(result_type, set)
399
568
  assert 1 <= len(result_type) <= 3
400
569
  assert len(result_type | {"keys", "values", "timestamps"}) == 3
@@ -404,12 +573,20 @@ class FileDirDict(PersiDict):
404
573
  ext_len = len(self.file_type) + 1
405
574
 
406
575
  def splitter(dir_path: str):
407
- """Transform a dirname into a PersiDictKey key"""
576
+ """Transform a relative dirname into SafeStrTuple components.
577
+
578
+ Args:
579
+ dir_path (str): Relative path under base_dir (e.g., "a/b").
580
+
581
+ Returns:
582
+ list[str]: List of safe string components (may be empty).
583
+ """
408
584
  if dir_path == ".":
409
585
  return []
410
586
  return dir_path.split(os.sep)
411
587
 
412
588
  def step():
589
+ """Generator that yields entries based on result_type."""
413
590
  suffix = "." + self.file_type
414
591
  for dir_name, _, files in walk_results:
415
592
  for f in files:
@@ -448,6 +625,15 @@ class FileDirDict(PersiDict):
448
625
  """Get last modification time (in seconds, Unix epoch time).
449
626
 
450
627
  This method is absent in the original dict API.
628
+
629
+ Args:
630
+ key (PersiDictKey): Key whose timestamp to return.
631
+
632
+ Returns:
633
+ float: POSIX timestamp of the underlying file.
634
+
635
+ Raises:
636
+ FileNotFoundError: If the key does not exist.
451
637
  """
452
638
  key = SafeStrTuple(key)
453
639
  filename = self._build_full_path(key)
@@ -455,6 +641,15 @@ class FileDirDict(PersiDict):
455
641
 
456
642
 
457
643
  def random_key(self) -> PersiDictKey | None:
644
+ """Return a uniformly random key from the dictionary, or None if empty.
645
+
646
+ Performs a full directory traversal using reservoir sampling
647
+ (k=1) to select a random file matching the configured file_type without
648
+ loading all keys into memory.
649
+
650
+ Returns:
651
+ PersiDictKey | None: A random key if any items exist; otherwise None.
652
+ """
458
653
  # canonicalise extension once
459
654
  ext = None
460
655
  if self.file_type:
@@ -0,0 +1,91 @@
1
+ """Special singleton markers used to modify values in PersiDict without data payload.
2
+
3
+ This module defines two singleton flags used as "joker" values when writing to
4
+ persistent dictionaries:
5
+
6
+ - KEEP_CURRENT: keep the current value unchanged.
7
+ - DELETE_CURRENT: delete the current value if it exists.
8
+
9
+ These flags are intended to be passed as the value part in dict-style
10
+ assignments (e.g., d[key] = KEEP_CURRENT) and are interpreted by PersiDict
11
+ implementations.
12
+
13
+ Examples:
14
+ >>> from persidict.jokers import KEEP_CURRENT, DELETE_CURRENT
15
+ >>> d[key] = KEEP_CURRENT # Do not alter existing value
16
+ >>> d[key] = DELETE_CURRENT # Remove key if present
17
+ """
18
+ from typing import Any
19
+
20
+ from parameterizable import (
21
+ ParameterizableClass
22
+ , register_parameterizable_class)
23
+
24
+
25
+ class Joker(ParameterizableClass):
26
+ """Base class for singleton joker flags.
27
+
28
+ Implements a per-subclass singleton pattern and integrates with the
29
+ parameterizable framework. Subclasses represent value-less commands that
30
+ alter persistence behavior when assigned to a key.
31
+
32
+ Returns:
33
+ Joker: The singleton instance for the subclass when instantiated.
34
+ """
35
+ _instances = {}
36
+
37
+ def get_params(self) -> dict[str, Any]:
38
+ """Return parameters for parameterizable API.
39
+
40
+ Returns:
41
+ dict[str, Any]: Always an empty dict for joker flags.
42
+ """
43
+ return {}
44
+
45
+ def __new__(cls):
46
+ """Create or return the singleton instance for the subclass."""
47
+ if cls not in Joker._instances:
48
+ Joker._instances[cls] = super().__new__(cls)
49
+ return Joker._instances[cls]
50
+
51
+
52
+ class KeepCurrentFlag(Joker):
53
+ """Flag instructing PersiDict to keep the current value unchanged.
54
+
55
+ Usage:
56
+ Assign this flag instead of a real value to indicate that an existing
57
+ value should not be modified.
58
+
59
+ Examples:
60
+ >>> d[key] = KEEP_CURRENT
61
+
62
+ Note:
63
+ This is a singleton class; constructing it repeatedly returns the same
64
+ instance.
65
+ """
66
+ pass
67
+
68
+ class DeleteCurrentFlag(Joker):
69
+ """Flag instructing PersiDict to delete the current value for a key.
70
+
71
+ Usage:
72
+ Assign this flag instead of a real value to remove the key if it
73
+ exists. If the key is absent, implementations will typically no-op.
74
+
75
+ Examples:
76
+ >>> d[key] = DELETE_CURRENT
77
+
78
+ Note:
79
+ This is a singleton class; constructing it repeatedly returns the same
80
+ instance.
81
+ """
82
+ pass
83
+
84
+ register_parameterizable_class(KeepCurrentFlag)
85
+ register_parameterizable_class(DeleteCurrentFlag)
86
+
87
+ KeepCurrent = KeepCurrentFlag()
88
+ KEEP_CURRENT = KeepCurrentFlag()
89
+
90
+ DeleteCurrent = DeleteCurrentFlag()
91
+ DELETE_CURRENT = DeleteCurrentFlag()