persidict 0.36.11__py3-none-any.whl → 0.37.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of persidict might be problematic. Click here for more details.

@@ -69,6 +69,7 @@ class FileDirDict(PersiDict):
69
69
  or deleted.
70
70
  digest_len (int): Length of a hash suffix appended to each key path
71
71
  element to avoid case-insensitive collisions. Use 0 to disable.
72
+ If you decide to enable it (not 0), we recommend at least 4.
72
73
  base_class_for_values (Optional[type]): Optional base class that all
73
74
  stored values must be instances of. If provided and not ``str``,
74
75
  then file_type must be either "pkl" or "json".
@@ -509,28 +510,13 @@ class FileDirDict(PersiDict):
509
510
  base_class_for_values when it is set.
510
511
  """
511
512
 
512
- if value is KEEP_CURRENT:
513
- return
514
-
515
- if value is DELETE_CURRENT:
516
- self.delete_if_exists(key)
513
+ key = SafeStrTuple(key)
514
+ PersiDict.__setitem__(self, key, value)
515
+ if isinstance(value, Joker):
516
+ # processed by base class
517
517
  return
518
518
 
519
- if isinstance(value, PersiDict):
520
- raise TypeError(
521
- f"You are not allowed to store a PersiDict "
522
- + f"inside another PersiDict.")
523
-
524
- if self.base_class_for_values is not None:
525
- if not isinstance(value, self.base_class_for_values):
526
- raise TypeError(
527
- f"Value must be of type {self.base_class_for_values},"
528
- + f"but it is {type(value)} instead.")
529
-
530
- key = SafeStrTuple(key)
531
519
  filename = self._build_full_path(key, create_subdirs=True)
532
- if self.immutable_items and os.path.exists(filename):
533
- raise KeyError("Can't modify an immutable item")
534
520
  self._save_to_file(filename, value)
535
521
 
536
522
 
@@ -544,8 +530,7 @@ class FileDirDict(PersiDict):
544
530
  KeyError: If immutable_items is True or if the key does not exist.
545
531
  """
546
532
  key = SafeStrTuple(key)
547
- if self.immutable_items:
548
- raise KeyError("Can't delete immutable items")
533
+ PersiDict.__delitem__(self, key)
549
534
  filename = self._build_full_path(key)
550
535
  if not os.path.isfile(filename):
551
536
  raise KeyError(f"File {filename} does not exist")
@@ -574,15 +559,8 @@ class FileDirDict(PersiDict):
574
559
  TypeError: If result_type is not a set.
575
560
  ValueError: If result_type is empty or contains unsupported labels.
576
561
  """
577
- if not isinstance(result_type, set):
578
- raise TypeError("result_type must be a set")
579
- if not (1 <= len(result_type) <= 3):
580
- raise ValueError("result_type must be a non-empty subset of {'keys','values','timestamps'}")
581
- allowed = {"keys", "values", "timestamps"}
582
- invalid = result_type - allowed
583
- if invalid:
584
- raise ValueError(f"Unsupported result_type entries: {sorted(invalid)}; allowed: {sorted(allowed)}")
585
562
 
563
+ PersiDict._generic_iter(self, result_type)
586
564
  walk_results = os.walk(self._base_dir)
587
565
  ext_len = len(self.file_type) + 1
588
566
 
persidict/jokers.py CHANGED
@@ -92,8 +92,8 @@ class DeleteCurrentFlag(Joker):
92
92
  register_parameterizable_class(KeepCurrentFlag)
93
93
  register_parameterizable_class(DeleteCurrentFlag)
94
94
 
95
- KeepCurrent = KeepCurrentFlag()
95
+ _KeepCurrent = KeepCurrentFlag()
96
96
  KEEP_CURRENT = KeepCurrentFlag()
97
97
 
98
- DeleteCurrent = DeleteCurrentFlag()
98
+ _DeleteCurrent = DeleteCurrentFlag()
99
99
  DELETE_CURRENT = DeleteCurrentFlag()
persidict/persi_dict.py CHANGED
@@ -4,10 +4,11 @@ PersiDict defines a unified interface for persistent dictionaries. The API is
4
4
  similar to Python's built-in dict with some differences (e.g., insertion order
5
5
  is not guaranteed) and several additional convenience methods.
6
6
 
7
- Keys are sequences of URL/filename-safe strings represented by SafeStrTuple.
8
- Plain strings or sequences of strings are accepted and automatically coerced to
9
- SafeStrTuple. Values can be arbitrary Python objects unless an implementation
10
- restricts them via ``base_class_for_values``.
7
+ Keys are non-empty sequences of URL/filename-safe strings
8
+ represented by SafeStrTuple. Plain strings or sequences of strings are accepted
9
+ and automatically coerced to SafeStrTuple. Values can be
10
+ arbitrary Python objects unless an implementation restricts them
11
+ via `base_class_for_values`.
11
12
 
12
13
  Persistence means items are stored durably (e.g., in local files or cloud
13
14
  objects) and remain accessible across process lifetimes.
@@ -100,10 +101,10 @@ class PersiDict(MutableMapping, ParameterizableClass):
100
101
  This supports the Parameterizable API and is absent in the
101
102
  built-in dict.
102
103
  """
103
- params = dict(
104
- immutable_items=self.immutable_items
105
- , digest_len=self.digest_len
106
- , base_class_for_values=self.base_class_for_values
104
+ params = dict(
105
+ immutable_items=self.immutable_items,
106
+ digest_len=self.digest_len,
107
+ base_class_for_values=self.base_class_for_values
107
108
  )
108
109
  sorted_params = sort_dict_by_keys(params)
109
110
  return sorted_params
@@ -168,7 +169,9 @@ class PersiDict(MutableMapping, ParameterizableClass):
168
169
  Returns:
169
170
  bool: True if key exists, False otherwise.
170
171
  """
171
- raise NotImplementedError
172
+ if type(self) is PersiDict:
173
+ raise NotImplementedError("PersiDict is an abstract base class"
174
+ " and cannot check items directly")
172
175
 
173
176
 
174
177
  @abstractmethod
@@ -181,7 +184,9 @@ class PersiDict(MutableMapping, ParameterizableClass):
181
184
  Returns:
182
185
  Any: The stored value.
183
186
  """
184
- raise NotImplementedError
187
+ if type(self) is PersiDict:
188
+ raise NotImplementedError("PersiDict is an abstract base class"
189
+ " and cannot retrieve items directly")
185
190
 
186
191
 
187
192
  def __setitem__(self, key:PersiDictKey, value:Any):
@@ -201,12 +206,20 @@ class PersiDict(MutableMapping, ParameterizableClass):
201
206
  """
202
207
  if value is KEEP_CURRENT:
203
208
  return
204
- elif value is DELETE_CURRENT:
205
- self.delete_if_exists(key)
206
209
  elif self.immutable_items:
207
210
  if key in self:
208
211
  raise KeyError("Can't modify an immutable key-value pair")
209
- raise NotImplementedError
212
+ elif value is DELETE_CURRENT:
213
+ self.delete_if_exists(key)
214
+
215
+ if self.base_class_for_values is not None:
216
+ if not isinstance(value, self.base_class_for_values):
217
+ raise TypeError(f"Value must be an instance of"
218
+ f" {self.base_class_for_values.__name__}")
219
+
220
+ if type(self) is PersiDict:
221
+ raise NotImplementedError("PersiDict is an abstract base class"
222
+ " and cannot store items directly")
210
223
 
211
224
 
212
225
  def __delitem__(self, key:PersiDictKey):
@@ -221,7 +234,12 @@ class PersiDict(MutableMapping, ParameterizableClass):
221
234
  """
222
235
  if self.immutable_items:
223
236
  raise KeyError("Can't delete an immutable key-value pair")
224
- raise NotImplementedError
237
+ if type(self) is PersiDict:
238
+ raise NotImplementedError("PersiDict is an abstract base class"
239
+ " and cannot delete items directly")
240
+ key = SafeStrTuple(key)
241
+ if key not in self:
242
+ raise KeyError(f"Key {key} not found")
225
243
 
226
244
 
227
245
  @abstractmethod
@@ -231,7 +249,9 @@ class PersiDict(MutableMapping, ParameterizableClass):
231
249
  Returns:
232
250
  int: Number of key-value pairs.
233
251
  """
234
- raise NotImplementedError
252
+ if type(self) is PersiDict:
253
+ raise NotImplementedError("PersiDict is an abstract base class"
254
+ " and cannot count items directly")
235
255
 
236
256
 
237
257
  @abstractmethod
@@ -260,7 +280,9 @@ class PersiDict(MutableMapping, ParameterizableClass):
260
280
  raise ValueError("result_type can only contain 'keys', 'values', 'timestamps'")
261
281
  if not (1 <= len(result_type & allowed) <= 3):
262
282
  raise ValueError("result_type must include at least one of 'keys', 'values', 'timestamps'")
263
- raise NotImplementedError
283
+ if type(self) is PersiDict:
284
+ raise NotImplementedError("PersiDict is an abstract base class"
285
+ " and cannot iterate items directly")
264
286
 
265
287
 
266
288
  def __iter__(self):
@@ -278,7 +300,7 @@ class PersiDict(MutableMapping, ParameterizableClass):
278
300
  Returns:
279
301
  Iterator[SafeStrTuple]: Keys iterator.
280
302
  """
281
- return self._generic_iter({"keys"})
303
+ return self._generic_iter({"keys"})
282
304
 
283
305
 
284
306
  def keys_and_timestamps(self):
@@ -385,7 +407,9 @@ class PersiDict(MutableMapping, ParameterizableClass):
385
407
  Raises:
386
408
  TypeError: Always raised; PersiDict instances are not pickleable.
387
409
  """
388
- raise TypeError("PersiDict is not picklable.")
410
+ if type(self) is PersiDict:
411
+ raise NotImplementedError("PersiDict is an abstract base class"
412
+ " and cannot be pickled directly")
389
413
 
390
414
 
391
415
  def __setstate__(self, state):
@@ -394,7 +418,9 @@ class PersiDict(MutableMapping, ParameterizableClass):
394
418
  Raises:
395
419
  TypeError: Always raised; PersiDict instances are not pickleable.
396
420
  """
397
- raise TypeError("PersiDict is not picklable.")
421
+ if type(self) is PersiDict:
422
+ raise TypeError("PersiDict is an abstract base class"
423
+ " and cannot be unpickled directly")
398
424
 
399
425
 
400
426
  def clear(self) -> None:
@@ -409,7 +435,7 @@ class PersiDict(MutableMapping, ParameterizableClass):
409
435
  for k in self.keys():
410
436
  try:
411
437
  del self[k]
412
- except:
438
+ except KeyError:
413
439
  pass
414
440
 
415
441
 
@@ -464,7 +490,9 @@ class PersiDict(MutableMapping, ParameterizableClass):
464
490
  NotImplementedError: Must be implemented by subclasses that support
465
491
  hierarchical key spaces.
466
492
  """
467
- raise NotImplementedError
493
+ if type(self) is PersiDict:
494
+ raise NotImplementedError("PersiDict is an abstract base class"
495
+ " and cannot create sub-dictionaries directly")
468
496
 
469
497
 
470
498
  def subdicts(self) -> dict[str, PersiDict]:
@@ -528,7 +556,9 @@ class PersiDict(MutableMapping, ParameterizableClass):
528
556
  Raises:
529
557
  NotImplementedError: Must be implemented by subclasses.
530
558
  """
531
- raise NotImplementedError
559
+ if type(self) is PersiDict:
560
+ raise NotImplementedError("PersiDict is an abstract base class"
561
+ " and cannot provide timestamps directly")
532
562
 
533
563
 
534
564
  def oldest_keys(self, max_n=None):
@@ -553,9 +583,9 @@ class PersiDict(MutableMapping, ParameterizableClass):
553
583
  return []
554
584
  else:
555
585
  # Use heapq.nsmallest for efficient partial sorting without loading all keys into memory
556
- smallest_pairs = heapq.nsmallest(max_n
557
- , self.keys_and_timestamps()
558
- , key=lambda x: x[1])
586
+ smallest_pairs = heapq.nsmallest(max_n,
587
+ self.keys_and_timestamps(),
588
+ key=lambda x: x[1])
559
589
  return [key for key,_ in smallest_pairs]
560
590
 
561
591
 
@@ -591,15 +621,15 @@ class PersiDict(MutableMapping, ParameterizableClass):
591
621
  if max_n is None:
592
622
  # If we need all keys, sort them all by timestamp in reverse order
593
623
  key_timestamp_pairs = list(self.keys_and_timestamps())
594
- key_timestamp_pairs.sort(key=lambda x:x[1], reverse=True)
624
+ key_timestamp_pairs.sort(key=lambda x: x[1], reverse=True)
595
625
  return [key for key,_ in key_timestamp_pairs]
596
626
  elif max_n <= 0:
597
627
  return []
598
628
  else:
599
629
  # Use heapq.nlargest for efficient partial sorting without loading all keys into memory
600
- largest_pairs = heapq.nlargest(max_n
601
- , self.keys_and_timestamps()
602
- , key=lambda item: item[1])
630
+ largest_pairs = heapq.nlargest(max_n,
631
+ self.keys_and_timestamps(),
632
+ key=lambda item: item[1])
603
633
  return [key for key,_ in largest_pairs]
604
634
 
605
635
 
persidict/s3_dict.py CHANGED
@@ -15,26 +15,30 @@ from parameterizable.dict_sorter import sort_dict_by_keys
15
15
  from .safe_str_tuple import SafeStrTuple
16
16
  from .safe_str_tuple_signing import sign_safe_str_tuple, unsign_safe_str_tuple
17
17
  from .persi_dict import PersiDict
18
- from .jokers import KEEP_CURRENT, DELETE_CURRENT
18
+ from .jokers import KEEP_CURRENT, DELETE_CURRENT, Joker
19
19
  from .file_dir_dict import FileDirDict, PersiDictKey
20
20
  from .overlapping_multi_dict import OverlappingMultiDict
21
21
 
22
22
  S3DICT_DEFAULT_BASE_DIR = "__s3_dict__"
23
23
 
24
24
  class S3Dict(PersiDict):
25
- """ A persistent dictionary that stores key-value pairs as S3 objects.
26
-
27
- A new object is created for each key-value pair.
28
-
29
- A key is either an objectname (a 'filename' without an extension),
30
- or a sequence of folder names (object name prefixes) that ends
31
- with an objectname. A value can be an instance of any Python type,
32
- and will be stored as an S3-object.
33
-
34
- S3Dict can store objects in binary objects (as pickles)
35
- or in human-readable texts objects (using jsonpickles).
36
-
37
- Unlike in native Python dictionaries, insertion order is not preserved.
25
+ """A persistent dictionary that stores key-value pairs as S3 objects.
26
+
27
+ Each key-value pair is stored as a separate S3 object in the specified bucket.
28
+
29
+ A key can be either a string (object name without file extension) or a sequence
30
+ of strings representing a hierarchical path (folder structure ending with an
31
+ object name). Values can be instances of any Python type and are serialized
32
+ to S3 objects.
33
+
34
+ S3Dict supports multiple serialization formats:
35
+ - Binary storage using pickle ('pkl' format)
36
+ - Human-readable text using jsonpickle ('json' format)
37
+ - Plain text for string values (other formats)
38
+
39
+ Note:
40
+ Unlike native Python dictionaries, insertion order is not preserved.
41
+ Operations may incur S3 API costs and network latency.
38
42
  """
39
43
  region: str
40
44
  bucket_name: str
@@ -42,40 +46,47 @@ class S3Dict(PersiDict):
42
46
  file_type: str
43
47
  _base_dir: str
44
48
 
45
- def __init__(self, bucket_name:str = "my_bucket"
46
- , region:str = None
47
- , root_prefix:str = ""
48
- , base_dir:str = S3DICT_DEFAULT_BASE_DIR
49
- , file_type:str = "pkl"
50
- , immutable_items:bool = False
51
- , digest_len:int = 8
52
- , base_class_for_values:Optional[type] = None
53
- ,*args ,**kwargs):
49
+ def __init__(self, bucket_name: str = "my_bucket",
50
+ region: str = None,
51
+ root_prefix: str = "",
52
+ base_dir: str = S3DICT_DEFAULT_BASE_DIR,
53
+ file_type: str = "pkl",
54
+ immutable_items: bool = False,
55
+ digest_len: int = 8,
56
+ base_class_for_values: Optional[type] = None,
57
+ *args, **kwargs):
54
58
  """Initialize an S3-backed persistent dictionary.
55
59
 
56
60
  Args:
57
- bucket_name (str): Name of the S3 bucket to use. The bucket will be
58
- created if it does not already exist.
59
- region (str | None): AWS region of the bucket. If None, the default
60
- client region is used.
61
- root_prefix (str): Common S3 key prefix under which all objects are
62
- stored. A trailing slash is added if missing.
63
- base_dir (str): Local directory used for temporary files and a
64
- small on-disk cache.
65
- file_type (str): Extension/format for stored values. "pkl" or
66
- "json" store arbitrary Python objects; other values imply plain
67
- text and only allow str values.
68
- immutable_items (bool): If True, disallow changing existing items.
69
- digest_len (int): Number of base32 MD5 characters appended to key
70
- elements to avoid case-insensitive collisions. Use 0 to disable.
71
- base_class_for_values (type | None): Optional base class that all
72
- values must inherit from. If provided and not str, file_type
73
- must be "pkl" or "json".
74
- *args: Ignored; reserved for compatibility.
75
- **kwargs: Ignored; reserved for compatibility.
61
+ bucket_name: Name of the S3 bucket to use. The bucket will be
62
+ created automatically if it does not exist and permissions allow.
63
+ region: AWS region for the bucket. If None, uses the default
64
+ client region from AWS configuration.
65
+ root_prefix: Common S3 key prefix under which all objects are
66
+ stored. A trailing slash is automatically added if missing.
67
+ base_dir: Local directory path used for temporary files and
68
+ local caching of S3 objects.
69
+ file_type: File extension/format for stored values. Supported formats:
70
+ 'pkl' (pickle), 'json' (jsonpickle), or custom text formats.
71
+ immutable_items: If True, prevents modification of existing items
72
+ after they are initially stored.
73
+ digest_len: Number of base32 MD5 hash characters appended to key
74
+ elements to prevent case-insensitive filename collisions.
75
+ Set to 0 to disable collision prevention.
76
+ base_class_for_values: Optional base class that all stored values
77
+ must inherit from. When specified (and not str), file_type
78
+ must be 'pkl' or 'json' for proper serialization.
79
+ *args: Additional positional arguments (ignored, reserved for compatibility).
80
+ **kwargs: Additional keyword arguments (ignored, reserved for compatibility).
81
+
82
+ Note:
83
+ The S3 bucket will be created if it doesn't exist and AWS permissions
84
+ allow. Network connectivity and valid AWS credentials are required.
76
85
  """
77
86
 
78
- super().__init__(immutable_items = immutable_items, digest_len = digest_len)
87
+ super().__init__(immutable_items = immutable_items
88
+ , digest_len = digest_len
89
+ , base_class_for_values=base_class_for_values)
79
90
  self.file_type = file_type
80
91
  self.etag_file_type = f"{file_type}_etag"
81
92
 
@@ -107,24 +118,25 @@ class S3Dict(PersiDict):
107
118
  except ClientError as e:
108
119
  error_code = e.response['Error']['Code']
109
120
  if error_code == '404' or error_code == 'NotFound':
110
- # The bucket does not exist, so attempt to create it.
121
+ # Bucket does not exist, attempt to create it
111
122
  try:
112
123
  self.s3_client.create_bucket(Bucket=bucket_name)
113
124
  except ClientError as create_e:
114
125
  create_error_code = create_e.response['Error']['Code']
115
- # Handles the race condition and the bucket-is-taken error
126
+ # Handle race condition where bucket was created by another process
127
+ # or the bucket name is already taken by another AWS account
116
128
  if ( create_error_code == 'BucketAlreadyOwnedByYou'
117
129
  or create_error_code == 'BucketAlreadyExists'):
118
130
  pass
119
131
  else:
120
- raise create_e # Re-raise other unexpected creation errors.
132
+ raise create_e # Re-raise other unexpected creation errors
121
133
  elif error_code == '403' or error_code == 'Forbidden':
122
- # The bucket exists, but access is forbidden.
123
- # This is likely a cross-account bucket with a policy that grants
124
- # access to you. Subsequent calls will fail if permissions are not granted.
134
+ # Bucket exists but access is forbidden - likely a cross-account
135
+ # bucket with policy granting limited access. Operations may still
136
+ # work if the policy allows the required S3 permissions.
125
137
  pass
126
138
  else:
127
- raise e # Re-raise other unexpected ClientErrors on head_bucket.
139
+ raise e # Re-raise other unexpected head_bucket errors
128
140
 
129
141
  self.bucket_name = bucket_name
130
142
 
@@ -134,15 +146,15 @@ class S3Dict(PersiDict):
134
146
 
135
147
 
136
148
  def get_params(self):
137
- """Return configuration parameters of the object as a dictionary.
149
+ """Return configuration parameters as a dictionary.
138
150
 
139
- This method is needed to support Parameterizable API.
140
- The method is absent in the original dict API.
151
+ This method supports the Parameterizable API and is not part of
152
+ the standard Python dictionary interface.
141
153
 
142
154
  Returns:
143
155
  dict: A mapping of parameter names to their configured values,
144
- including region, bucket_name, and root_prefix combined with
145
- parameters from the local cache.
156
+ including S3-specific parameters (region, bucket_name, root_prefix)
157
+ combined with parameters from the local cache, sorted by key names.
146
158
  """
147
159
  params = self.main_cache.get_params()
148
160
  params["region"] = self.region
@@ -156,34 +168,36 @@ class S3Dict(PersiDict):
156
168
  def base_url(self):
157
169
  """Return the S3 URL prefix of this dictionary.
158
170
 
159
- This property is absent in the original dict API.
171
+ This property is not part of the standard Python dictionary interface.
160
172
 
161
173
  Returns:
162
- str: The base S3 URL in the form "s3://<bucket>/<root_prefix>".
174
+ str: The base S3 URL in the format "s3://<bucket>/<root_prefix>".
163
175
  """
164
176
  return f"s3://{self.bucket_name}/{self.root_prefix}"
165
177
 
166
178
 
167
179
  @property
168
180
  def base_dir(self) -> str:
169
- """Return dictionary's base directory in the local filesystem.
181
+ """Return the dictionary's base directory in the local filesystem.
170
182
 
171
- This property is absent in the original dict API.
183
+ This property is not part of the standard Python dictionary interface.
172
184
 
173
185
  Returns:
174
- str: Path to the local on-disk cache directory used by S3Dict.
186
+ str: Path to the local cache directory used for temporary files
187
+ and caching S3 objects.
175
188
  """
176
189
  return self.main_cache.base_dir
177
190
 
178
191
 
179
- def _build_full_objectname(self, key:PersiDictKey) -> str:
180
- """Convert a key into a full S3 object key (object name).
192
+ def _build_full_objectname(self, key: PersiDictKey) -> str:
193
+ """Convert a key into a full S3 object key.
181
194
 
182
195
  Args:
183
- key (PersiDictKey): Key (string or sequence of strings) or SafeStrTuple.
196
+ key: Dictionary key (string or sequence of strings) or SafeStrTuple.
184
197
 
185
198
  Returns:
186
- str: The full S3 key under root_prefix with file_type suffix applied.
199
+ str: The complete S3 object key including root_prefix and file_type
200
+ extension, with digest-based collision prevention applied if enabled.
187
201
  """
188
202
  key = SafeStrTuple(key)
189
203
  key = sign_safe_str_tuple(key, self.digest_len)
@@ -191,14 +205,18 @@ class S3Dict(PersiDict):
191
205
  return objectname
192
206
 
193
207
 
194
- def __contains__(self, key:PersiDictKey) -> bool:
195
- """Return True if the specified key exists in S3.
208
+ def __contains__(self, key: PersiDictKey) -> bool:
209
+ """Check if the specified key exists in the dictionary.
210
+
211
+ For immutable dictionaries, checks the local cache first. Otherwise,
212
+ performs a HEAD request to S3 to verify object existence.
196
213
 
197
214
  Args:
198
- key (PersiDictKey): Key (string or sequence of strings) or SafeStrTuple.
215
+ key: Dictionary key (string or sequence of strings) or SafeStrTuple.
199
216
 
200
217
  Returns:
201
- bool: True if the object exists (or is cached when immutable), else False.
218
+ bool: True if the key exists in S3 (or local cache for immutable
219
+ items), False otherwise.
202
220
  """
203
221
  key = SafeStrTuple(key)
204
222
  if self.immutable_items and key in self.main_cache:
@@ -216,18 +234,21 @@ class S3Dict(PersiDict):
216
234
  raise
217
235
 
218
236
 
219
- def __getitem__(self, key:PersiDictKey) -> Any:
220
- """Retrieve the value stored for a key from S3 or local cache.
237
+ def __getitem__(self, key: PersiDictKey) -> Any:
238
+ """Retrieve the value stored for a key.
221
239
 
222
- If immutable_items is True and a local cached file exists, that cache is
223
- returned. Otherwise, the object is fetched from S3, with conditional
224
- requests used when possible.
240
+ For immutable dictionaries with cached values, returns the cached copy.
241
+ Otherwise, fetches from S3 using conditional requests (ETags) when
242
+ available to minimize unnecessary downloads.
225
243
 
226
244
  Args:
227
- key (PersiDictKey): Key (string or sequence of strings) or SafeStrTuple.
245
+ key: Dictionary key (string or sequence of strings) or SafeStrTuple.
228
246
 
229
247
  Returns:
230
- Any: The stored value.
248
+ Any: The deserialized value stored for the key.
249
+
250
+ Raises:
251
+ KeyError: If the key does not exist in S3.
231
252
  """
232
253
 
233
254
  key = SafeStrTuple(key)
@@ -252,8 +273,7 @@ class S3Dict(PersiDict):
252
273
  s3_etag = response.get("ETag")
253
274
  body = response['Body']
254
275
 
255
- # Read all data into memory and store in cache
256
-
276
+ # Deserialize and cache the S3 object content
257
277
  if self.file_type == 'json':
258
278
  deserialized_value = jsonpickle.loads(body.read().decode('utf-8'))
259
279
  elif self.file_type == 'pkl':
@@ -266,110 +286,86 @@ class S3Dict(PersiDict):
266
286
 
267
287
  except ClientError as e:
268
288
  if e.response['ResponseMetadata']['HTTPStatusCode'] == 304:
269
- # 304 Not Modified: our cached version is up-to-date.
270
- # The value will be read from cache at the end of the function.
289
+ # HTTP 304 Not Modified: cached version is current, no download needed
271
290
  pass
272
291
  elif e.response.get("Error", {}).get("Code") == 'NoSuchKey':
273
292
  raise KeyError(f"Key {key} not found in S3 bucket {self.bucket_name}")
274
293
  else:
275
- # Re-raise other client errors (e.g., permissions, throttling)
294
+ # Re-raise other client errors (permissions, throttling, etc.)
276
295
  raise
277
296
 
278
297
  return self.main_cache[key]
279
298
 
280
299
 
281
- def __setitem__(self, key:PersiDictKey, value:Any):
282
- """Store a value for a key in S3 and update the local cache.
300
+ def __setitem__(self, key: PersiDictKey, value: Any):
301
+ """Store a value for a key in both S3 and local cache.
283
302
 
284
- Interprets special joker values: KEEP_CURRENT (no-op) and DELETE_CURRENT
285
- (deletes the key). Validates value type if base_class_for_values is set,
286
- then writes to the local cache and uploads to S3. If possible, caches the
287
- S3 ETag locally to enable conditional GETs later.
303
+ Handles special joker values (KEEP_CURRENT, DELETE_CURRENT) for
304
+ conditional operations. Validates value types against base_class_for_values
305
+ if specified, then stores locally and uploads to S3. Attempts to cache
306
+ the S3 ETag for efficient future retrievals.
288
307
 
289
308
  Args:
290
- key (PersiDictKey): Key (string or sequence of strings) or SafeStrTuple.
291
- value (Any): Value to store, or a joker command (KEEP_CURRENT or
309
+ key: Dictionary key (string or sequence of strings) or SafeStrTuple.
310
+ value: Value to store, or a joker command (KEEP_CURRENT or
292
311
  DELETE_CURRENT from the jokers module).
293
312
 
294
313
  Raises:
295
314
  KeyError: If attempting to modify an existing item when
296
315
  immutable_items is True.
297
- TypeError: If value is a PersiDict or does not match
298
- base_class_for_values when it is set.
316
+ TypeError: If value is a PersiDict instance or does not match
317
+ the required base_class_for_values when specified.
299
318
  """
300
319
 
301
- if value is KEEP_CURRENT:
302
- return
303
-
304
- if value is DELETE_CURRENT:
305
- self.delete_if_exists(key)
306
- return
307
-
308
- if isinstance(value, PersiDict):
309
- raise TypeError(
310
- f"You are not allowed to store a PersiDict "
311
- + f"inside another PersiDict.")
312
-
313
- if self.base_class_for_values is not None:
314
- if not isinstance(value, self.base_class_for_values):
315
- raise TypeError(
316
- f"Value must be of type {self.base_class_for_values},"
317
- + f"but it is {type(value)} instead." )
318
-
319
320
  key = SafeStrTuple(key)
320
-
321
- if self.immutable_items and key in self:
322
- raise KeyError("Can't modify an immutable item")
321
+ PersiDict.__setitem__(self, key, value)
322
+ if isinstance(value, Joker):
323
+ # Joker values (KEEP_CURRENT, DELETE_CURRENT) are handled by base class
324
+ return
323
325
 
324
326
  obj_name = self._build_full_objectname(key)
325
327
 
326
328
  # Store in local cache first
327
329
  self.main_cache[key] = value
328
330
 
329
- # Get the file path from the cache to upload to S3
331
+ # Upload the serialized file from local cache to S3
330
332
  file_path = self.main_cache._build_full_path(key)
331
333
  self.s3_client.upload_file(file_path, self.bucket_name, obj_name)
332
334
 
333
335
  try:
336
+ # Cache the S3 ETag for efficient conditional requests on future reads
334
337
  head = self.s3_client.head_object(
335
338
  Bucket=self.bucket_name, Key=obj_name)
336
339
  self.etag_cache[key] = head.get("ETag")
337
340
  except ClientError:
338
- # If we can't get ETag, we should remove any existing etag
339
- # to force a re-download on the next __getitem__ call.
341
+ # Remove stale ETag on failure to force fresh downloads later
340
342
  self.etag_cache.delete_if_exists(key)
341
343
 
342
344
 
343
- def __delitem__(self, key:PersiDictKey):
344
- """Delete the stored value for a key from S3 and local cache.
345
+ def __delitem__(self, key: PersiDictKey):
346
+ """Delete the stored value for a key from both S3 and local cache.
345
347
 
346
348
  Args:
347
- key (PersiDictKey): Key (string or sequence of strings) or SafeStrTuple.
349
+ key: Dictionary key (string or sequence of strings) or SafeStrTuple.
348
350
 
349
351
  Raises:
350
- KeyError: If immutable_items is True, or if the key does not exist in S3.
352
+ KeyError: If immutable_items is True, or if the key does not exist.
351
353
  """
352
-
353
354
  key = SafeStrTuple(key)
354
- if self.immutable_items:
355
- raise KeyError("Can't delete an immutable item")
356
-
357
- if key not in self:
358
- raise KeyError(f"Key {key} not found in S3 bucket {self.bucket_name}")
359
-
355
+ PersiDict.__delitem__(self, key)
360
356
  obj_name = self._build_full_objectname(key)
361
-
362
357
  self.s3_client.delete_object(Bucket = self.bucket_name, Key = obj_name)
363
358
  self.etag_cache.delete_if_exists(key)
364
359
  self.main_cache.delete_if_exists(key)
365
360
 
366
361
 
367
362
  def __len__(self) -> int:
368
- """Return len(self).
363
+ """Return the number of key-value pairs in the dictionary.
369
364
 
370
- WARNING: This operation can be very slow and costly on large S3 buckets
371
- as it needs to iterate over all objects in the dictionary's prefix.
372
- Avoid using it in performance-sensitive code.
365
+ Warning:
366
+ This operation can be very slow and expensive on large S3 buckets
367
+ as it must paginate through all objects under the dictionary's prefix.
368
+ Avoid using in performance-critical code.
373
369
 
374
370
  Returns:
375
371
  int: Number of stored items under this dictionary's root_prefix.
@@ -395,56 +391,47 @@ class S3Dict(PersiDict):
395
391
 
396
392
 
397
393
  def _generic_iter(self, result_type: set[str]):
398
- """Underlying implementation for .items()/.keys()/.values() iterators.
394
+ """Underlying implementation for items(), keys(), and values() iterators.
399
395
 
400
- Iterates over S3 objects under the configured root_prefix and yields
396
+ Paginates through S3 objects under the configured root_prefix and yields
401
397
  keys, values, and/or timestamps according to the requested result_type.
402
- Keys are mapped to SafeStrTuple by removing the file extension and
403
- unsigning based on digest_len.
398
+ S3 object keys are converted to SafeStrTuple instances by removing the
399
+ file extension and reversing digest-based signing if enabled.
404
400
 
405
401
  Args:
406
- result_type (set[str]): Any non-empty subset of {"keys", "values",
407
- "timestamps"} specifying which fields to yield.
402
+ result_type: Non-empty subset of {"keys", "values", "timestamps"}
403
+ specifying which fields to yield from each dictionary entry.
408
404
 
409
405
  Returns:
410
- Iterator: A generator yielding:
406
+ Iterator: A generator that yields:
411
407
  - SafeStrTuple if result_type == {"keys"}
412
- - Any if result_type == {"values"}
408
+ - Any if result_type == {"values"}
413
409
  - tuple[SafeStrTuple, Any] if result_type == {"keys", "values"}
414
- - tuple[..., float] including POSIX timestamp if "timestamps" is requested.
410
+ - tuple including float timestamp if "timestamps" requested
415
411
 
416
412
  Raises:
417
- ValueError: If result_type is not a set or contains entries other than
418
- "keys", "values", and/or "timestamps", or if it is empty.
413
+ ValueError: If result_type is invalid (empty, not a set, or contains
414
+ unsupported field names).
419
415
  """
420
416
 
421
- if not isinstance(result_type, set):
422
- raise ValueError(
423
- "result_type must be a set containing one to three of: 'keys', 'values', 'timestamps'"
424
- )
425
- if not (1 <= len(result_type) <= 3):
426
- raise ValueError("result_type must be a non-empty set with at most three elements")
427
- allowed = {"keys", "values", "timestamps"}
428
- if not result_type.issubset(allowed):
429
- invalid = ", ".join(sorted(result_type - allowed))
430
- raise ValueError(f"result_type contains invalid entries: {invalid}. Allowed: {sorted(allowed)}")
431
- # Intersections/length checks are implied by the above conditions.
417
+ PersiDict._generic_iter(self, result_type)
432
418
 
433
419
  suffix = "." + self.file_type
434
420
  ext_len = len(self.file_type) + 1
435
421
  prefix_len = len(self.root_prefix)
436
422
 
437
423
  def splitter(full_name: str) -> SafeStrTuple:
438
- """Convert an S3 object key into a SafeStrTuple without the suffix.
424
+ """Convert an S3 object key into a SafeStrTuple without the file extension.
439
425
 
440
426
  Args:
441
- full_name (str): Full S3 object key (including root_prefix).
427
+ full_name: Complete S3 object key including root_prefix and extension.
442
428
 
443
429
  Returns:
444
- SafeStrTuple: The parsed key parts, still signed.
430
+ SafeStrTuple: The parsed key components with digest signatures intact.
445
431
 
446
432
  Raises:
447
- ValueError: If the provided key does not start with this dictionary's root_prefix.
433
+ ValueError: If the object key does not start with this dictionary's
434
+ root_prefix (indicating it's outside the dictionary's scope).
448
435
  """
449
436
  if not full_name.startswith(self.root_prefix):
450
437
  raise ValueError(
@@ -454,7 +441,11 @@ class S3Dict(PersiDict):
454
441
  return SafeStrTuple(result)
455
442
 
456
443
  def step():
457
- """Generator that pages through S3 and yields entries based on result_type."""
444
+ """Generator that paginates through S3 objects and yields requested data.
445
+
446
+ Yields dictionary entries (keys, values, timestamps) according to the
447
+ result_type specification from the parent _generic_iter method.
448
+ """
458
449
  paginator = self.s3_client.get_paginator("list_objects_v2")
459
450
  page_iterator = paginator.paginate(
460
451
  Bucket=self.bucket_name, Prefix = self.root_prefix)
@@ -491,19 +482,20 @@ class S3Dict(PersiDict):
491
482
  return step()
492
483
 
493
484
 
494
- def get_subdict(self, key:PersiDictKey) -> S3Dict:
495
- """Get a subdictionary containing items with the same prefix key.
485
+ def get_subdict(self, key: PersiDictKey) -> S3Dict:
486
+ """Create a subdictionary scoped to items with the specified prefix.
496
487
 
497
- For a non-existing prefix key, an empty sub-dictionary is returned.
498
- This method is absent in the original dict API.
488
+ Returns an empty subdictionary if no items exist under the prefix.
489
+ This method is not part of the standard Python dictionary interface.
499
490
 
500
491
  Args:
501
492
  key (PersiDictKey): A common prefix (string or sequence of strings)
502
493
  used to scope items stored under this dictionary.
503
494
 
504
495
  Returns:
505
- S3Dict: A new S3Dict instance rooted at the given prefix, sharing
506
- the same bucket, region, serialization, and immutability settings.
496
+ S3Dict: A new S3Dict instance with root_prefix extended by the given
497
+ key, sharing the parent's bucket, region, file_type, and other
498
+ configuration settings.
507
499
  """
508
500
 
509
501
  key = SafeStrTuple(key)
@@ -529,18 +521,18 @@ class S3Dict(PersiDict):
529
521
  return new_dict
530
522
 
531
523
 
532
- def timestamp(self,key:PersiDictKey) -> float:
533
- """Get last modification time (Unix epoch seconds) for a key.
524
+ def timestamp(self, key: PersiDictKey) -> float:
525
+ """Get the last modification timestamp for a key.
534
526
 
535
- This method is absent in the original dict API.
527
+ This method is not part of the standard Python dictionary interface.
536
528
 
537
529
  Args:
538
- key (PersiDictKey): Key (string or sequence of strings) or SafeStrTuple.
530
+ key: Dictionary key (string or sequence of strings) or SafeStrTuple.
539
531
 
540
532
  Returns:
541
- float: POSIX timestamp (seconds since the Unix epoch) of the last
542
- modification time as reported by S3 for the object. The timestamp
543
- is timezone-aware and converted to UTC.
533
+ float: POSIX timestamp (seconds since Unix epoch) of the last
534
+ modification time as reported by S3. The timestamp is timezone-aware
535
+ and converted to UTC.
544
536
 
545
537
  Raises:
546
538
  KeyError: If the key does not exist in S3.
@@ -98,6 +98,8 @@ class SafeStrTuple(Sequence, Hashable):
98
98
  candidate_strings.extend(SafeStrTuple(*a).strings)
99
99
  else:
100
100
  raise TypeError(f"Invalid argument type: {type(a)}")
101
+ if len(candidate_strings) == 0:
102
+ raise ValueError("At least one non-empty valid string is required")
101
103
  self.strings = tuple(candidate_strings)
102
104
 
103
105
  @property
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: persidict
3
- Version: 0.36.11
3
+ Version: 0.37.0
4
4
  Summary: Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud.
5
5
  Keywords: persistence,dicts,distributed,parallel
6
6
  Author: Vlad (Volodymyr) Pavlov
@@ -0,0 +1,14 @@
1
+ persidict/.DS_Store,sha256=1lFlJ5EFymdzGAUAaI30vcaaLHt3F1LwpG7xILf9jsM,6148
2
+ persidict/__init__.py,sha256=CDOSJGgCnyRTkGUTzaeg3Cqsxwx0-0EFieOtldXwAls,1380
3
+ persidict/file_dir_dict.py,sha256=_ZGEQXmU5Sg4-PJOO4bYKhL0z6yYryVmce9lpML5OxQ,24766
4
+ persidict/jokers.py,sha256=gTu7g2l2MIgBc3-hjvUrcwcgWs6tcbLyxB0u57M3bfU,3012
5
+ persidict/overlapping_multi_dict.py,sha256=UFyPEG2GbMmMHY48UmcaLHpsaxMqRH3bc_UA8S90yJo,5947
6
+ persidict/persi_dict.py,sha256=q0Xvq5PO5Lmx3Nwe-fbU3Klgyx39T8PMKcXYR7xduzg,22506
7
+ persidict/s3_dict.py,sha256=dYUTvGNqxIk3PpArn9uYbSv-4zzlRiPPYinYpTcJzSc,21363
8
+ persidict/safe_chars.py,sha256=H-cL9waCmDtwaRR5Y4b4oTzcBx09nc8wn8u61SVZDY0,1728
9
+ persidict/safe_str_tuple.py,sha256=oibohVs0xah3mSVl5aN0pQWiQeaz4jjWtEdoBSn-jac,7322
10
+ persidict/safe_str_tuple_signing.py,sha256=mpOfx_xyprc0_c60XPB_EihI3vR1gOn6T03iCx1HwwQ,7494
11
+ persidict/write_once_dict.py,sha256=nv5vx9uh6VZ5Qh3HJcBgUHLnDX9KY843FbHndcy-63E,11677
12
+ persidict-0.37.0.dist-info/WHEEL,sha256=Pi5uDq5Fdo_Rr-HD5h9BiPn9Et29Y9Sh8NhcJNnFU1c,79
13
+ persidict-0.37.0.dist-info/METADATA,sha256=vCPprij19SxfnU6qWI9MNz78n6iT9bTUpMZsUT901mY,12387
14
+ persidict-0.37.0.dist-info/RECORD,,
@@ -1,14 +0,0 @@
1
- persidict/.DS_Store,sha256=1lFlJ5EFymdzGAUAaI30vcaaLHt3F1LwpG7xILf9jsM,6148
2
- persidict/__init__.py,sha256=CDOSJGgCnyRTkGUTzaeg3Cqsxwx0-0EFieOtldXwAls,1380
3
- persidict/file_dir_dict.py,sha256=mWMgLN_uj-IlqIbyVdbABLVxiB-t2wh2rbv98sUWyrY,25758
4
- persidict/jokers.py,sha256=7ibh0ccfkEm3EvKIOhH9ShfZ0_MBKYMvKa1rwqHg1hk,3010
5
- persidict/overlapping_multi_dict.py,sha256=UFyPEG2GbMmMHY48UmcaLHpsaxMqRH3bc_UA8S90yJo,5947
6
- persidict/persi_dict.py,sha256=Q7fGs9LFPxSLtC0jJwDOP1AVD9_t01SnwdN4RVBMZtg,20660
7
- persidict/s3_dict.py,sha256=0o2RslAkdE75N9zDuqQMyiSbO0uzdQmiNmZSfHZzfxw,21137
8
- persidict/safe_chars.py,sha256=H-cL9waCmDtwaRR5Y4b4oTzcBx09nc8wn8u61SVZDY0,1728
9
- persidict/safe_str_tuple.py,sha256=YBTcYjUKIffznOawXb9xKjz4HaKdklrgyVtegJFmr5w,7202
10
- persidict/safe_str_tuple_signing.py,sha256=mpOfx_xyprc0_c60XPB_EihI3vR1gOn6T03iCx1HwwQ,7494
11
- persidict/write_once_dict.py,sha256=nv5vx9uh6VZ5Qh3HJcBgUHLnDX9KY843FbHndcy-63E,11677
12
- persidict-0.36.11.dist-info/WHEEL,sha256=Pi5uDq5Fdo_Rr-HD5h9BiPn9Et29Y9Sh8NhcJNnFU1c,79
13
- persidict-0.36.11.dist-info/METADATA,sha256=T3NKQWLF_OlBZTx4wg3x16RUiTlcYignxP9UVrNrVPc,12388
14
- persidict-0.36.11.dist-info/RECORD,,