persidict 0.36.8__py3-none-any.whl → 0.36.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of persidict might be problematic. Click here for more details.

@@ -45,7 +45,7 @@ class FileDirDict(PersiDict):
45
45
  Insertion order is not preserved.
46
46
 
47
47
  FileDirDict can store objects in binary files or in human-readable
48
- text files (either in jason format or as a plain text).
48
+ text files (either in JSON format or as plain text).
49
49
  """
50
50
 
51
51
  _base_dir:str
@@ -74,9 +74,9 @@ class FileDirDict(PersiDict):
74
74
  then file_type must be either "pkl" or "json".
75
75
 
76
76
  Raises:
77
- ValueError: If base_dir points to a file; if file_type is "__etag__";
78
- if file_type contains unsafe characters; or if configuration is
79
- inconsistent (e.g., non-str values with unsupported file_type).
77
+ ValueError: If file_type contains unsafe characters; or
78
+ if configuration is inconsistent (e.g., non-str values
79
+ with unsupported file_type).
80
80
  RuntimeError: If base_dir cannot be created or is not a directory.
81
81
  """
82
82
 
@@ -87,10 +87,6 @@ class FileDirDict(PersiDict):
87
87
  if file_type != replace_unsafe_chars(file_type, ""):
88
88
  raise ValueError("file_type contains unsafe characters")
89
89
  self.file_type = file_type
90
- if self.file_type == "__etag__":
91
- raise ValueError(
92
- "file_type cannot be 'etag' as it is a reserved"
93
- " extension for S3 caching.")
94
90
 
95
91
  if (base_class_for_values is None or
96
92
  not issubclass(base_class_for_values,str)):
persidict/jokers.py CHANGED
@@ -29,8 +29,9 @@ class Joker(ParameterizableClass):
29
29
  parameterizable framework. Subclasses represent value-less commands that
30
30
  alter persistence behavior when assigned to a key.
31
31
 
32
- Returns:
33
- Joker: The singleton instance for the subclass when instantiated.
32
+ Note:
33
+ This class uses a singleton pattern where each subclass maintains
34
+ exactly one instance that is returned on every instantiation.
34
35
  """
35
36
  _instances: dict[type, "Joker"] = {}
36
37
 
@@ -43,7 +44,14 @@ class Joker(ParameterizableClass):
43
44
  return {}
44
45
 
45
46
  def __new__(cls):
46
- """Create or return the singleton instance for the subclass."""
47
+ """Create or return the singleton instance for the subclass.
48
+
49
+ Args:
50
+ cls: The class for which to create or retrieve the singleton instance.
51
+
52
+ Returns:
53
+ Joker: The singleton instance for the specified class.
54
+ """
47
55
  if cls not in Joker._instances:
48
56
  Joker._instances[cls] = super().__new__(cls)
49
57
  return Joker._instances[cls]
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Any, Dict, Type
3
+ from typing import Any, Dict, List, Type
4
4
 
5
5
  from .persi_dict import PersiDict
6
6
 
@@ -14,17 +14,17 @@ class OverlappingMultiDict:
14
14
  bucket and differ only in how items are materialized by file type.
15
15
 
16
16
  Attributes:
17
- dict_type (type):
18
- A subclass of PersiDict used to create each sub-dictionary.
19
- shared_subdicts_params (dict):
20
- Parameters applied to every created sub-dictionary (e.g., base_dir,
21
- bucket, immutable_items, digest_len).
22
- individual_subdicts_params (dict):
23
- Mapping from file_type (attribute name) to a dict of parameters that
24
- are specific to that sub-dictionary. These override or extend
25
- shared_subdicts_params for the given file_type.
26
- subdicts_names (list[str]):
27
- The list of file_type names (i.e., attribute names) created.
17
+ dict_type (Type[PersiDict]): A subclass of PersiDict used to create each
18
+ sub-dictionary.
19
+ shared_subdicts_params (Dict[str, Any]): Parameters applied to every
20
+ created sub-dictionary (e.g., base_dir, bucket, immutable_items,
21
+ digest_len).
22
+ individual_subdicts_params (Dict[str, Dict[str, Any]]): Mapping from
23
+ file_type (attribute name) to a dict of parameters that are specific
24
+ to that sub-dictionary. These override or extend shared_subdicts_params
25
+ for the given file_type.
26
+ subdicts_names (List[str]): The list of file_type names (i.e., attribute
27
+ names) created.
28
28
 
29
29
  Raises:
30
30
  TypeError: If pickling is attempted or item access is used on the
@@ -37,17 +37,16 @@ class OverlappingMultiDict:
37
37
  """Initialize the container and create sub-dictionaries.
38
38
 
39
39
  Args:
40
- dict_type (type):
41
- A subclass of PersiDict that will be instantiated for each
42
- file_type provided via individual_subdicts_params.
43
- shared_subdicts_params (dict):
44
- Parameters shared by all sub-dicts (e.g., base_dir, bucket).
45
- **individual_subdicts_params: Dict[str, dict]
46
- Keyword arguments where each key is a file_type (also the
47
- attribute name to be created) and each value is a dict of
48
- parameters specific to that sub-dict. These are merged with
49
- shared_subdicts_params when constructing the sub-dict. The
50
- resulting dict also receives file_type=<key>.
40
+ dict_type (Type[PersiDict]): A subclass of PersiDict that will be
41
+ instantiated for each file_type provided via individual_subdicts_params.
42
+ shared_subdicts_params (Dict[str, Any]): Parameters shared by all
43
+ sub-dicts (e.g., base_dir, bucket).
44
+ **individual_subdicts_params (Dict[str, Dict[str, Any]]): Keyword
45
+ arguments where each key is a file_type (also the attribute name
46
+ to be created) and each value is a dict of parameters specific to
47
+ that sub-dict. These are merged with shared_subdicts_params when
48
+ constructing the sub-dict. The resulting dict also receives
49
+ file_type=<key>.
51
50
 
52
51
  Raises:
53
52
  TypeError: If dict_type is not a PersiDict subclass, or if
@@ -67,9 +66,9 @@ class OverlappingMultiDict:
67
66
  raise TypeError(
68
67
  f"Params for subdict {subdict_name!r} must be a dict")
69
68
  self.__dict__[subdict_name] = dict_type(
70
- **{**shared_subdicts_params
71
- ,**individual_subdicts_params[subdict_name]
72
- ,"file_type":subdict_name})
69
+ **{**shared_subdicts_params,
70
+ **individual_subdicts_params[subdict_name],
71
+ "file_type": subdict_name})
73
72
 
74
73
  def __getstate__(self):
75
74
  """Prevent pickling.
@@ -82,6 +81,9 @@ class OverlappingMultiDict:
82
81
  def __setstate__(self, state):
83
82
  """Prevent unpickling.
84
83
 
84
+ Args:
85
+ state: The state dictionary that would be used for unpickling (ignored).
86
+
85
87
  Raises:
86
88
  TypeError: Always raised; this object is not pickleable.
87
89
  """
@@ -93,6 +95,9 @@ class OverlappingMultiDict:
93
95
  Suggest accessing items through the sub-dictionaries exposed as
94
96
  attributes (e.g., obj.json[key]).
95
97
 
98
+ Args:
99
+ key: The key that would be accessed (ignored).
100
+
96
101
  Raises:
97
102
  TypeError: Always raised to indicate unsupported operation.
98
103
  """
@@ -104,6 +109,10 @@ class OverlappingMultiDict:
104
109
  def __setitem__(self, key, value):
105
110
  """Disallow item assignment on the container itself.
106
111
 
112
+ Args:
113
+ key: The key that would be assigned (ignored).
114
+ value: The value that would be assigned (ignored).
115
+
107
116
  Raises:
108
117
  TypeError: Always raised to indicate unsupported operation.
109
118
  """
@@ -115,10 +124,13 @@ class OverlappingMultiDict:
115
124
  def __delitem__(self, key):
116
125
  """Disallow item deletion on the container itself.
117
126
 
127
+ Args:
128
+ key: The key that would be deleted (ignored).
129
+
118
130
  Raises:
119
131
  TypeError: Always raised to indicate unsupported operation.
120
132
  """
121
133
  raise TypeError(
122
134
  "OverlappingMultiDict does not support item deletion by key. "
123
- "Individual items can be deletedthrough nested dicts, "
135
+ "Individual items can be deleted through nested dicts, "
124
136
  f"which are available via attributes {self.subdicts_names}")
persidict/persi_dict.py CHANGED
@@ -26,9 +26,9 @@ from .jokers import KEEP_CURRENT, DELETE_CURRENT, Joker
26
26
  from .safe_str_tuple import SafeStrTuple
27
27
 
28
28
  PersiDictKey = SafeStrTuple | Sequence[str] | str
29
- """ A value which can be used as a key for PersiDict.
29
+ """A value which can be used as a key for PersiDict.
30
30
 
31
- PersiDict-s accept keys on a form of SafeStrTuple,
31
+ PersiDict instances accept keys in the form of SafeStrTuple,
32
32
  or a string, or a sequence of strings.
33
33
  The characters within strings must be URL/filename-safe.
34
34
  If a string (or a sequence of strings) is passed to a PersiDict as a key,
@@ -60,21 +60,26 @@ class PersiDict(MutableMapping, ParameterizableClass):
60
60
  immutable_items:bool
61
61
  base_class_for_values:Optional[type]
62
62
 
63
- def __init__(self
64
- , immutable_items:bool = False
65
- , digest_len:int = 8
66
- , base_class_for_values:Optional[type] = None
67
- , *args, **kwargs):
68
- """Initialize base parameters shared by all persistent dicts.
63
+ def __init__(self,
64
+ immutable_items: bool = False,
65
+ digest_len: int = 8,
66
+ base_class_for_values: Optional[type] = None,
67
+ *args, **kwargs):
68
+ """Initialize base parameters shared by all persistent dictionaries.
69
69
 
70
70
  Args:
71
- immutable_items: If True, items cannot be modified or deleted.
72
- digest_len: Number of hash characters to append to key components to
73
- avoid case-insensitive collisions. Must be non-negative.
74
- base_class_for_values: Optional base class that values must inherit
75
- from; if None, values are not type-restricted.
76
- *args: Ignored in the base class (reserved for subclasses).
77
- **kwargs: Ignored in the base class (reserved for subclasses).
71
+ immutable_items (bool): If True, items cannot be modified or deleted.
72
+ Defaults to False.
73
+ digest_len (int): Number of hash characters to append to key components
74
+ to avoid case-insensitive collisions. Must be non-negative.
75
+ Defaults to 8.
76
+ base_class_for_values (Optional[type]): Optional base class that values
77
+ must inherit from. If None, values are not type-restricted.
78
+ Defaults to None.
79
+ *args: Additional positional arguments (ignored in base class, reserved
80
+ for subclasses).
81
+ **kwargs: Additional keyword arguments (ignored in base class, reserved
82
+ for subclasses).
78
83
 
79
84
  Raises:
80
85
  ValueError: If digest_len is negative.
@@ -91,9 +96,9 @@ class PersiDict(MutableMapping, ParameterizableClass):
91
96
  """Return configuration parameters of this dictionary.
92
97
 
93
98
  Returns:
94
- dict: A sorted dict of parameters used to reconstruct the instance.
99
+ dict: A sorted dictionary of parameters used to reconstruct the instance.
95
100
  This supports the Parameterizable API and is absent in the
96
- builtin dict.
101
+ built-in dict.
97
102
  """
98
103
  params = dict(
99
104
  immutable_items=self.immutable_items
@@ -321,20 +326,23 @@ class PersiDict(MutableMapping, ParameterizableClass):
321
326
  return self._generic_iter({"keys", "values", "timestamps"})
322
327
 
323
328
 
324
- def setdefault(self, key:PersiDictKey, default:Any=None) -> Any:
325
- """Insert key with default if absent; return the value.
329
+ def setdefault(self, key: PersiDictKey, default: Any = None) -> Any:
330
+ """Insert key with default value if absent; return the current value.
331
+
332
+ Behaves like the built-in dict.setdefault() method: if the key exists,
333
+ return its current value; otherwise, set the key to the default value
334
+ and return that default.
326
335
 
327
336
  Args:
328
- key: Key (string or sequence of strings) or SafeStrTuple.
329
- default: Value to insert if the key is not present.
337
+ key (PersiDictKey): Key (string, sequence of strings, or SafeStrTuple).
338
+ default (Any): Value to insert if the key is not present. Defaults to None.
330
339
 
331
340
  Returns:
332
- Any: Existing value if present; otherwise the provided default.
341
+ Any: Existing value if key is present; otherwise the provided default value.
333
342
 
334
343
  Raises:
335
344
  TypeError: If default is a Joker command (KEEP_CURRENT/DELETE_CURRENT).
336
345
  """
337
- # TODO: check edge cases to ensure the same semantics as standard dicts
338
346
  key = SafeStrTuple(key)
339
347
  if isinstance(default, Joker):
340
348
  raise TypeError("default must be a regular value, not a Joker command")
@@ -345,19 +353,20 @@ class PersiDict(MutableMapping, ParameterizableClass):
345
353
  return default
346
354
 
347
355
 
348
- def __eq__(self, other:PersiDict) -> bool:
356
+ def __eq__(self, other: PersiDict) -> bool:
349
357
  """Compare dictionaries for equality.
350
358
 
351
- If other is a PersiDict, compare portable params. Otherwise, attempt to
352
- compare as mapping by keys and values.
359
+ If other is a PersiDict instance, compares portable parameters for equality.
360
+ Otherwise, attempts to compare as a mapping by comparing all keys and values.
353
361
 
354
362
  Args:
355
- other: Another dictionary-like object.
363
+ other (PersiDict): Another dictionary-like object to compare against.
356
364
 
357
365
  Returns:
358
- bool: True if considered equal, False otherwise.
366
+ bool: True if the dictionaries are considered equal, False otherwise.
359
367
  """
360
368
  if isinstance(other, PersiDict):
369
+ #TODO: decide whether to keep this semantics
361
370
  return self.get_portable_params() == other.get_portable_params()
362
371
  try:
363
372
  if len(self) != len(other):
@@ -525,15 +534,15 @@ class PersiDict(MutableMapping, ParameterizableClass):
525
534
  def oldest_keys(self, max_n=None):
526
535
  """Return up to max_n oldest keys in the dictionary.
527
536
 
537
+ This method is absent in the original Python dict API.
538
+
528
539
  Args:
529
540
  max_n (int | None): Maximum number of keys to return. If None,
530
541
  return all keys sorted by age (oldest first). Values <= 0
531
- yield an empty list.
542
+ yield an empty list. Defaults to None.
532
543
 
533
544
  Returns:
534
545
  list[SafeStrTuple]: The oldest keys, oldest first.
535
-
536
- This method is absent in the original Python dict API.
537
546
  """
538
547
  if max_n is None:
539
548
  # If we need all keys, sort them all by timestamp
@@ -553,6 +562,8 @@ class PersiDict(MutableMapping, ParameterizableClass):
553
562
  def oldest_values(self, max_n=None):
554
563
  """Return up to max_n oldest values in the dictionary.
555
564
 
565
+ This method is absent in the original Python dict API.
566
+
556
567
  Args:
557
568
  max_n (int | None): Maximum number of values to return. If None,
558
569
  return values for all keys sorted by age (oldest first). Values
@@ -560,8 +571,6 @@ class PersiDict(MutableMapping, ParameterizableClass):
560
571
 
561
572
  Returns:
562
573
  list[Any]: Values corresponding to the oldest keys.
563
-
564
- This method is absent in the original Python dict API.
565
574
  """
566
575
  return [self[k] for k in self.oldest_keys(max_n)]
567
576
 
@@ -569,15 +578,15 @@ class PersiDict(MutableMapping, ParameterizableClass):
569
578
  def newest_keys(self, max_n=None):
570
579
  """Return up to max_n newest keys in the dictionary.
571
580
 
581
+ This method is absent in the original Python dict API.
582
+
572
583
  Args:
573
584
  max_n (int | None): Maximum number of keys to return. If None,
574
585
  return all keys sorted by age (newest first). Values <= 0
575
- yield an empty list.
586
+ yield an empty list. Defaults to None.
576
587
 
577
588
  Returns:
578
589
  list[SafeStrTuple]: The newest keys, newest first.
579
-
580
- This method is absent in the original Python dict API.
581
590
  """
582
591
  if max_n is None:
583
592
  # If we need all keys, sort them all by timestamp in reverse order
@@ -597,6 +606,8 @@ class PersiDict(MutableMapping, ParameterizableClass):
597
606
  def newest_values(self, max_n=None):
598
607
  """Return up to max_n newest values in the dictionary.
599
608
 
609
+ This method is absent in the original Python dict API.
610
+
600
611
  Args:
601
612
  max_n (int | None): Maximum number of values to return. If None,
602
613
  return values for all keys sorted by age (newest first). Values
@@ -604,7 +615,5 @@ class PersiDict(MutableMapping, ParameterizableClass):
604
615
 
605
616
  Returns:
606
617
  list[Any]: Values corresponding to the newest keys.
607
-
608
- This method is absent in the original Python dict API.
609
618
  """
610
619
  return [self[k] for k in self.newest_keys(max_n)]
persidict/s3_dict.py CHANGED
@@ -5,6 +5,8 @@ import tempfile
5
5
  from typing import Any, Optional
6
6
 
7
7
  import boto3
8
+ import joblib
9
+ import jsonpickle
8
10
  from botocore.exceptions import ClientError
9
11
 
10
12
  import parameterizable
@@ -15,6 +17,7 @@ from .safe_str_tuple_signing import sign_safe_str_tuple, unsign_safe_str_tuple
15
17
  from .persi_dict import PersiDict
16
18
  from .jokers import KEEP_CURRENT, DELETE_CURRENT
17
19
  from .file_dir_dict import FileDirDict, PersiDictKey
20
+ from .overlapping_multi_dict import OverlappingMultiDict
18
21
 
19
22
  S3DICT_DEFAULT_BASE_DIR = "__s3_dict__"
20
23
 
@@ -70,24 +73,28 @@ class S3Dict(PersiDict):
70
73
  must be "pkl" or "json".
71
74
  *args: Ignored; reserved for compatibility.
72
75
  **kwargs: Ignored; reserved for compatibility.
73
-
74
- Raises:
75
- ValueError: If file_type is "__etag__" (reserved) or configuration
76
- is inconsistent with base_class_for_values.
77
76
  """
78
77
 
79
- super().__init__(immutable_items = immutable_items, digest_len = 0)
78
+ super().__init__(immutable_items = immutable_items, digest_len = digest_len)
80
79
  self.file_type = file_type
81
- if self.file_type == "__etag__":
82
- raise ValueError(
83
- "file_type cannot be 'etag' as it is a reserved extension for caching.")
84
-
85
- self.local_cache = FileDirDict(
86
- base_dir= base_dir
87
- , file_type = file_type
88
- , immutable_items = immutable_items
89
- , base_class_for_values=base_class_for_values
90
- , digest_len = digest_len)
80
+ self.etag_file_type = f"{file_type}_etag"
81
+
82
+ self.local_cache = OverlappingMultiDict(
83
+ dict_type=FileDirDict,
84
+ shared_subdicts_params={
85
+ "base_dir": base_dir,
86
+ "immutable_items": immutable_items,
87
+ "base_class_for_values": base_class_for_values,
88
+ "digest_len": digest_len
89
+ },
90
+ **{
91
+ self.file_type: {},
92
+ self.etag_file_type: {"base_class_for_values": str}
93
+ }
94
+ )
95
+
96
+ self.main_cache = getattr(self.local_cache, self.file_type)
97
+ self.etag_cache = getattr(self.local_cache, self.etag_file_type)
91
98
 
92
99
  self.region = region
93
100
  if region is None:
@@ -118,7 +125,7 @@ class S3Dict(PersiDict):
118
125
  including region, bucket_name, and root_prefix combined with
119
126
  parameters from the local cache.
120
127
  """
121
- params = self.local_cache.get_params()
128
+ params = self.main_cache.get_params()
122
129
  params["region"] = self.region
123
130
  params["bucket_name"] = self.bucket_name
124
131
  params["root_prefix"] = self.root_prefix
@@ -147,7 +154,7 @@ class S3Dict(PersiDict):
147
154
  Returns:
148
155
  str: Path to the local on-disk cache directory used by S3Dict.
149
156
  """
150
- return self.local_cache.base_dir
157
+ return self.main_cache.base_dir
151
158
 
152
159
 
153
160
  def _build_full_objectname(self, key:PersiDictKey) -> str:
@@ -175,50 +182,19 @@ class S3Dict(PersiDict):
175
182
  bool: True if the object exists (or is cached when immutable), else False.
176
183
  """
177
184
  key = SafeStrTuple(key)
178
- if self.immutable_items:
179
- file_name = self.local_cache._build_full_path(
180
- key, create_subdirs=True)
181
- if os.path.exists(file_name):
185
+ if self.immutable_items and key in self.main_cache:
182
186
  return True
183
187
  try:
184
188
  obj_name = self._build_full_objectname(key)
185
189
  self.s3_client.head_object(Bucket=self.bucket_name, Key=obj_name)
186
190
  return True
187
- except:
188
- return False
189
-
190
-
191
- def _write_etag_file(self, file_name: str, etag: str):
192
- """Atomically write the ETag to its cache file.
193
-
194
- Args:
195
- file_name (str): Path to the cached data file (without the ETag suffix).
196
- etag (str): The S3 ETag value to persist alongside the cached file.
197
- """
198
- if not etag:
199
- return
200
- etag_file_name = file_name + ".__etag__"
201
- dir_name = os.path.dirname(etag_file_name)
202
- # Write to a temporary file and then rename for atomicity
203
- fd, temp_path = tempfile.mkstemp(dir=dir_name)
204
- try:
205
- with os.fdopen(fd, "w") as f:
206
- f.write(etag)
207
- f.flush()
208
- os.fsync(f.fileno())
209
- os.replace(temp_path, etag_file_name)
210
- try:
211
- if os.name == 'posix':
212
- dir_fd = os.open(dir_name, os.O_RDONLY)
213
- try:
214
- os.fsync(dir_fd)
215
- finally:
216
- os.close(dir_fd)
217
- except OSError:
218
- pass
219
- except:
220
- os.remove(temp_path)
221
- raise
191
+ except ClientError as e:
192
+ if e.response['ResponseMetadata']['HTTPStatusCode'] == 404:
193
+ self.main_cache.delete_if_exists(key)
194
+ self.etag_cache.delete_if_exists(key)
195
+ return False
196
+ else:
197
+ raise
222
198
 
223
199
 
224
200
  def __getitem__(self, key:PersiDictKey) -> Any:
@@ -236,19 +212,15 @@ class S3Dict(PersiDict):
236
212
  """
237
213
 
238
214
  key = SafeStrTuple(key)
239
- file_name = self.local_cache._build_full_path(key, create_subdirs=True)
240
215
 
241
- if self.immutable_items and os.path.exists(file_name):
242
- return self.local_cache._read_from_file(file_name)
216
+ if self.immutable_items and key in self.main_cache:
217
+ return self.main_cache[key]
243
218
 
244
219
  obj_name = self._build_full_objectname(key)
245
220
 
246
221
  cached_etag = None
247
- etag_file_name = file_name + ".__etag__"
248
- if not self.immutable_items and os.path.exists(file_name) and os.path.exists(
249
- etag_file_name):
250
- with open(etag_file_name, "r") as f:
251
- cached_etag = f.read()
222
+ if not self.immutable_items and key in self.main_cache and key in self.etag_cache:
223
+ cached_etag = self.etag_cache[key]
252
224
 
253
225
  try:
254
226
  get_kwargs = {'Bucket': self.bucket_name, 'Key': obj_name}
@@ -261,37 +233,23 @@ class S3Dict(PersiDict):
261
233
  s3_etag = response.get("ETag")
262
234
  body = response['Body']
263
235
 
264
- dir_name = os.path.dirname(file_name)
265
- fd, temp_path = tempfile.mkstemp(dir=dir_name, prefix=".__tmp__")
266
-
267
- try:
268
- with os.fdopen(fd, 'wb') as f:
269
- # Stream body to file to avoid loading all in memory
270
- for chunk in body.iter_chunks():
271
- f.write(chunk)
272
- f.flush()
273
- os.fsync(f.fileno())
274
- os.replace(temp_path, file_name)
275
- try:
276
- if os.name == 'posix':
277
- dir_fd = os.open(dir_name, os.O_RDONLY)
278
- try:
279
- os.fsync(dir_fd)
280
- finally:
281
- os.close(dir_fd)
282
- except OSError:
283
- pass
284
- except:
285
- os.remove(temp_path) # Clean up temp file on failure
286
- raise
236
+ # Read all data into memory and store in cache
287
237
 
288
- self._write_etag_file(file_name, s3_etag)
238
+ if self.file_type == 'json':
239
+ deserialized_value = jsonpickle.loads(body.read().decode('utf-8'))
240
+ elif self.file_type == 'pkl':
241
+ deserialized_value = joblib.load(body)
242
+ else:
243
+ deserialized_value = body.read().decode('utf-8')
244
+
245
+ self.main_cache[key] = deserialized_value
246
+ self.etag_cache[key] = s3_etag
289
247
 
290
248
  except ClientError as e:
291
249
  error_code = e.response.get("Error", {}).get("Code")
292
250
  if e.response['ResponseMetadata']['HTTPStatusCode'] == 304:
293
251
  # 304 Not Modified: our cached version is up-to-date.
294
- # The file will be read from cache at the end of the function.
252
+ # The value will be read from cache at the end of the function.
295
253
  pass
296
254
  elif e.response.get("Error", {}).get("Code") == 'NoSuchKey':
297
255
  raise KeyError(f"Key {key} not found in S3 bucket {self.bucket_name}")
@@ -299,20 +257,21 @@ class S3Dict(PersiDict):
299
257
  # Re-raise other client errors (e.g., permissions, throttling)
300
258
  raise
301
259
 
302
- return self.local_cache._read_from_file(file_name)
260
+ return self.main_cache[key]
303
261
 
304
262
 
305
263
  def __setitem__(self, key:PersiDictKey, value:Any):
306
264
  """Store a value for a key in S3 and update the local cache.
307
265
 
308
- Interprets joker values KEEP_CURRENT and DELETE_CURRENT accordingly.
309
- Validates a value type if base_class_for_values is set, then writes to the
310
- local cache and uploads to S3. If possible, caches the S3 ETag locally to
311
- enable conditional GETs later.
266
+ Interprets special joker values: KEEP_CURRENT (no-op) and DELETE_CURRENT
267
+ (deletes the key). Validates value type if base_class_for_values is set,
268
+ then writes to the local cache and uploads to S3. If possible, caches the
269
+ S3 ETag locally to enable conditional GETs later.
312
270
 
313
271
  Args:
314
272
  key (PersiDictKey): Key (string or sequence of strings) or SafeStrTuple.
315
- value (Any): Value to store, or a joker command.
273
+ value (Any): Value to store, or a joker command (KEEP_CURRENT or
274
+ DELETE_CURRENT from the jokers module).
316
275
 
317
276
  Raises:
318
277
  KeyError: If attempting to modify an existing item when
@@ -344,23 +303,23 @@ class S3Dict(PersiDict):
344
303
  if self.immutable_items and key in self:
345
304
  raise KeyError("Can't modify an immutable item")
346
305
 
347
- file_name = self.local_cache._build_full_path(key, create_subdirs=True)
348
306
  obj_name = self._build_full_objectname(key)
349
307
 
350
- self.local_cache._save_to_file(file_name, value)
351
- self.s3_client.upload_file(file_name, self.bucket_name, obj_name)
308
+ # Store in local cache first
309
+ self.main_cache[key] = value
310
+
311
+ # Get the file path from the cache to upload to S3
312
+ file_path = self.main_cache._build_full_path(key)
313
+ self.s3_client.upload_file(file_path, self.bucket_name, obj_name)
352
314
 
353
315
  try:
354
316
  head = self.s3_client.head_object(
355
317
  Bucket=self.bucket_name, Key=obj_name)
356
- s3_etag = head.get("ETag")
357
- self._write_etag_file(file_name, s3_etag)
318
+ self.etag_cache[key] = head.get("ETag")
358
319
  except ClientError:
359
- # If we can't get ETag, we should remove any existing etag file
320
+ # If we can't get ETag, we should remove any existing etag
360
321
  # to force a re-download on the next __getitem__ call.
361
- etag_file_name = file_name + ".__etag__"
362
- if os.path.exists(etag_file_name):
363
- os.remove(etag_file_name)
322
+ self.etag_cache.delete_if_exists(key)
364
323
 
365
324
 
366
325
  def __delitem__(self, key:PersiDictKey):
@@ -370,20 +329,19 @@ class S3Dict(PersiDict):
370
329
  key (PersiDictKey): Key (string or sequence of strings) or SafeStrTuple.
371
330
 
372
331
  Raises:
373
- KeyError: If immutable_items is True.
332
+ KeyError: If immutable_items is True, or if the key does not exist in S3.
374
333
  """
375
334
 
376
335
  key = SafeStrTuple(key)
377
336
  if self.immutable_items:
378
337
  raise KeyError("Can't delete an immutable item")
338
+
379
339
  obj_name = self._build_full_objectname(key)
340
+
380
341
  self.s3_client.delete_object(Bucket = self.bucket_name, Key = obj_name)
381
- file_name = self.local_cache._build_full_path(key)
382
- if os.path.isfile(file_name):
383
- os.remove(file_name)
384
- etag_file_name = file_name + ".__etag__"
385
- if os.path.isfile(etag_file_name):
386
- os.remove(etag_file_name)
342
+ self.etag_cache.delete_if_exists(key)
343
+ self.main_cache.delete_if_exists(key)
344
+
387
345
 
388
346
  def __len__(self) -> int:
389
347
  """Return len(self).
@@ -415,7 +373,7 @@ class S3Dict(PersiDict):
415
373
  return num_files
416
374
 
417
375
 
418
- def _generic_iter(self, result_type: str):
376
+ def _generic_iter(self, result_type: set[str]):
419
377
  """Underlying implementation for .items()/.keys()/.values() iterators.
420
378
 
421
379
  Iterates over S3 objects under the configured root_prefix and yields
@@ -529,13 +487,12 @@ class S3Dict(PersiDict):
529
487
 
530
488
  key = SafeStrTuple(key)
531
489
  if len(key):
532
- key = SafeStrTuple(key)
533
490
  key = sign_safe_str_tuple(key, self.digest_len)
534
491
  full_root_prefix = self.root_prefix + "/".join(key)
535
492
  else:
536
493
  full_root_prefix = self.root_prefix
537
494
 
538
- new_dir_path = self.local_cache._build_full_path(
495
+ new_dir_path = self.main_cache._build_full_path(
539
496
  key, create_subdirs = True, is_file_path = False)
540
497
 
541
498
  new_dict = S3Dict(
@@ -561,9 +518,12 @@ class S3Dict(PersiDict):
561
518
 
562
519
  Returns:
563
520
  float: POSIX timestamp (seconds since the Unix epoch) of the last
564
- modification time as reported by S3 for the object.
521
+ modification time as reported by S3 for the object. The timestamp
522
+ is timezone-aware and converted to UTC.
523
+
524
+ Raises:
525
+ KeyError: If the key does not exist in S3.
565
526
  """
566
- # TODO: check work with timezones
567
527
  key = SafeStrTuple(key)
568
528
  obj_name = self._build_full_objectname(key)
569
529
  response = self.s3_client.head_object(Bucket=self.bucket_name, Key=obj_name)
persidict/safe_chars.py CHANGED
@@ -1,6 +1,17 @@
1
+ """Safe character handling utilities for URL and filesystem compatibility.
2
+
3
+ This module defines character sets and length constraints for building strings
4
+ that are safe for use in URLs, filenames, and other contexts where character
5
+ restrictions apply.
6
+ """
1
7
  import string
2
8
 
9
+ # Set of characters considered safe for filenames and URL components.
10
+ # Includes ASCII letters (a-z, A-Z), digits (0-9), and special chars: ()_-~.=
3
11
  SAFE_CHARS_SET = set(string.ascii_letters + string.digits + "()_-~.=")
12
+
13
+ # Maximum length for safe strings to ensure compatibility with various filesystems
14
+ # and URL length limitations. Set to 254 to stay well under most system limits.
4
15
  SAFE_STRING_MAX_LENGTH = 254
5
16
 
6
17
  def get_safe_chars() -> set[str]:
@@ -114,7 +114,7 @@ def _add_all_suffixes_if_absent(
114
114
 
115
115
  new_seq = []
116
116
  for s in str_seq:
117
- new_seq.append(_add_signature_suffix_if_absent(s,digest_len))
117
+ new_seq.append(_add_signature_suffix_if_absent(s, digest_len))
118
118
 
119
119
  new_seq = SafeStrTuple(*new_seq)
120
120
 
@@ -268,7 +268,7 @@ class WriteOnceDict(PersiDict):
268
268
  """Delegate iteration to the wrapped dict.
269
269
 
270
270
  Args:
271
- iter_type: tType of iterator: 'items' and/or 'keys' and/or 'timestamps'.
271
+ iter_type: Type of iterator: 'items' and/or 'keys' and/or 'timestamps'.
272
272
 
273
273
  Returns:
274
274
  Any: Iterator from the wrapped dictionary.
@@ -299,13 +299,21 @@ class WriteOnceDict(PersiDict):
299
299
  return getattr(self._wrapped_dict, name)
300
300
 
301
301
  @property
302
- def base_dir(self):
303
- """Base directory of the wrapped dict (if applicable)."""
302
+ def base_dir(self) -> str|None:
303
+ """Base directory of the wrapped dict (if applicable).
304
+
305
+ Returns:
306
+ str | None: The base directory path, or None if not applicable.
307
+ """
304
308
  return self._wrapped_dict.base_dir
305
309
 
306
310
  @property
307
- def base_url(self):
308
- """Base URL of the wrapped dict (if applicable)."""
311
+ def base_url(self) -> str|None:
312
+ """Base URL of the wrapped dict (if applicable).
313
+
314
+ Returns:
315
+ str | None: The base URL, or None if not applicable.
316
+ """
309
317
  return self._wrapped_dict.base_url
310
318
 
311
319
  def get_subdict(self, prefix_key: PersiDictKey) -> WriteOnceDict:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: persidict
3
- Version: 0.36.8
3
+ Version: 0.36.9
4
4
  Summary: Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud.
5
5
  Keywords: persistence,dicts,distributed,parallel
6
6
  Author: Vlad (Volodymyr) Pavlov
@@ -0,0 +1,14 @@
1
+ persidict/.DS_Store,sha256=1lFlJ5EFymdzGAUAaI30vcaaLHt3F1LwpG7xILf9jsM,6148
2
+ persidict/__init__.py,sha256=CDOSJGgCnyRTkGUTzaeg3Cqsxwx0-0EFieOtldXwAls,1380
3
+ persidict/file_dir_dict.py,sha256=JJ5oEyaqwTm9g_tUrVfut0IYI7bd5B2lhxrLzadTohA,25541
4
+ persidict/jokers.py,sha256=7ibh0ccfkEm3EvKIOhH9ShfZ0_MBKYMvKa1rwqHg1hk,3010
5
+ persidict/overlapping_multi_dict.py,sha256=UFyPEG2GbMmMHY48UmcaLHpsaxMqRH3bc_UA8S90yJo,5947
6
+ persidict/persi_dict.py,sha256=Q7fGs9LFPxSLtC0jJwDOP1AVD9_t01SnwdN4RVBMZtg,20660
7
+ persidict/s3_dict.py,sha256=GOFTpSwFESoGxEykS7TVjkw0VRIxRon-xXytrnwAuTY,19905
8
+ persidict/safe_chars.py,sha256=H-cL9waCmDtwaRR5Y4b4oTzcBx09nc8wn8u61SVZDY0,1728
9
+ persidict/safe_str_tuple.py,sha256=YBTcYjUKIffznOawXb9xKjz4HaKdklrgyVtegJFmr5w,7202
10
+ persidict/safe_str_tuple_signing.py,sha256=mpOfx_xyprc0_c60XPB_EihI3vR1gOn6T03iCx1HwwQ,7494
11
+ persidict/write_once_dict.py,sha256=nv5vx9uh6VZ5Qh3HJcBgUHLnDX9KY843FbHndcy-63E,11677
12
+ persidict-0.36.9.dist-info/WHEEL,sha256=Pi5uDq5Fdo_Rr-HD5h9BiPn9Et29Y9Sh8NhcJNnFU1c,79
13
+ persidict-0.36.9.dist-info/METADATA,sha256=h4j6Waop0pzsEVTRDj-Sx2NMa-GZfs2AGnbrh7gxeC8,12387
14
+ persidict-0.36.9.dist-info/RECORD,,
@@ -1,14 +0,0 @@
1
- persidict/.DS_Store,sha256=1lFlJ5EFymdzGAUAaI30vcaaLHt3F1LwpG7xILf9jsM,6148
2
- persidict/__init__.py,sha256=CDOSJGgCnyRTkGUTzaeg3Cqsxwx0-0EFieOtldXwAls,1380
3
- persidict/file_dir_dict.py,sha256=IDRb6a3YQvM7Gf0jbqKkTi4VuSPecTw6Ca6HZ947Qj8,25784
4
- persidict/jokers.py,sha256=Ow4tWOTTMGKvolJyVuEF-oEgE_u3vDZtA9UFwTdhNV4,2731
5
- persidict/overlapping_multi_dict.py,sha256=gBiHaCb5pTGNW3ZrakgaiGDid6oCfoP7Vq1rxXGnFWg,5476
6
- persidict/persi_dict.py,sha256=DIMQaY4gE8NSYTlHlk9rfOJJEYUuLV8kmQ-gc474py4,20052
7
- persidict/s3_dict.py,sha256=VKDqY9sASffeXtfbavVWk8-umrioIG5Xq57Qqg1wPH4,21522
8
- persidict/safe_chars.py,sha256=9Qy24fu2dmiJOdmCF8mKZULfQaRp7H4oxfgDXeLgogI,1160
9
- persidict/safe_str_tuple.py,sha256=YBTcYjUKIffznOawXb9xKjz4HaKdklrgyVtegJFmr5w,7202
10
- persidict/safe_str_tuple_signing.py,sha256=RQAj4fnpRVaOe0KpwLler1UTaeNOgXCQpU3t80ixtxg,7493
11
- persidict/write_once_dict.py,sha256=-lPQ_yuU62pczHT0BYO6SFbiZBKFq8Tj9ln3jCzNDzA,11443
12
- persidict-0.36.8.dist-info/WHEEL,sha256=Pi5uDq5Fdo_Rr-HD5h9BiPn9Et29Y9Sh8NhcJNnFU1c,79
13
- persidict-0.36.8.dist-info/METADATA,sha256=816s1lWkpNdgJMfVS16sDaYDyHFzaLZRDHVVMs86Slo,12387
14
- persidict-0.36.8.dist-info/RECORD,,