persidict 0.36.8__py3-none-any.whl → 0.36.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of persidict might be problematic. Click here for more details.

@@ -45,7 +45,7 @@ class FileDirDict(PersiDict):
45
45
  Insertion order is not preserved.
46
46
 
47
47
  FileDirDict can store objects in binary files or in human-readable
48
- text files (either in jason format or as a plain text).
48
+ text files (either in JSON format or as plain text).
49
49
  """
50
50
 
51
51
  _base_dir:str
@@ -74,9 +74,9 @@ class FileDirDict(PersiDict):
74
74
  then file_type must be either "pkl" or "json".
75
75
 
76
76
  Raises:
77
- ValueError: If base_dir points to a file; if file_type is "__etag__";
78
- if file_type contains unsafe characters; or if configuration is
79
- inconsistent (e.g., non-str values with unsupported file_type).
77
+ ValueError: If file_type contains unsafe characters; or
78
+ if configuration is inconsistent (e.g., non-str values
79
+ with unsupported file_type).
80
80
  RuntimeError: If base_dir cannot be created or is not a directory.
81
81
  """
82
82
 
@@ -87,10 +87,6 @@ class FileDirDict(PersiDict):
87
87
  if file_type != replace_unsafe_chars(file_type, ""):
88
88
  raise ValueError("file_type contains unsafe characters")
89
89
  self.file_type = file_type
90
- if self.file_type == "__etag__":
91
- raise ValueError(
92
- "file_type cannot be 'etag' as it is a reserved"
93
- " extension for S3 caching.")
94
90
 
95
91
  if (base_class_for_values is None or
96
92
  not issubclass(base_class_for_values,str)):
persidict/jokers.py CHANGED
@@ -29,8 +29,9 @@ class Joker(ParameterizableClass):
29
29
  parameterizable framework. Subclasses represent value-less commands that
30
30
  alter persistence behavior when assigned to a key.
31
31
 
32
- Returns:
33
- Joker: The singleton instance for the subclass when instantiated.
32
+ Note:
33
+ This class uses a singleton pattern where each subclass maintains
34
+ exactly one instance that is returned on every instantiation.
34
35
  """
35
36
  _instances: dict[type, "Joker"] = {}
36
37
 
@@ -43,7 +44,14 @@ class Joker(ParameterizableClass):
43
44
  return {}
44
45
 
45
46
  def __new__(cls):
46
- """Create or return the singleton instance for the subclass."""
47
+ """Create or return the singleton instance for the subclass.
48
+
49
+ Args:
50
+ cls: The class for which to create or retrieve the singleton instance.
51
+
52
+ Returns:
53
+ Joker: The singleton instance for the specified class.
54
+ """
47
55
  if cls not in Joker._instances:
48
56
  Joker._instances[cls] = super().__new__(cls)
49
57
  return Joker._instances[cls]
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Any, Dict, Type
3
+ from typing import Any, Dict, List, Type
4
4
 
5
5
  from .persi_dict import PersiDict
6
6
 
@@ -14,17 +14,17 @@ class OverlappingMultiDict:
14
14
  bucket and differ only in how items are materialized by file type.
15
15
 
16
16
  Attributes:
17
- dict_type (type):
18
- A subclass of PersiDict used to create each sub-dictionary.
19
- shared_subdicts_params (dict):
20
- Parameters applied to every created sub-dictionary (e.g., base_dir,
21
- bucket, immutable_items, digest_len).
22
- individual_subdicts_params (dict):
23
- Mapping from file_type (attribute name) to a dict of parameters that
24
- are specific to that sub-dictionary. These override or extend
25
- shared_subdicts_params for the given file_type.
26
- subdicts_names (list[str]):
27
- The list of file_type names (i.e., attribute names) created.
17
+ dict_type (Type[PersiDict]): A subclass of PersiDict used to create each
18
+ sub-dictionary.
19
+ shared_subdicts_params (Dict[str, Any]): Parameters applied to every
20
+ created sub-dictionary (e.g., base_dir, bucket, immutable_items,
21
+ digest_len).
22
+ individual_subdicts_params (Dict[str, Dict[str, Any]]): Mapping from
23
+ file_type (attribute name) to a dict of parameters that are specific
24
+ to that sub-dictionary. These override or extend shared_subdicts_params
25
+ for the given file_type.
26
+ subdicts_names (List[str]): The list of file_type names (i.e., attribute
27
+ names) created.
28
28
 
29
29
  Raises:
30
30
  TypeError: If pickling is attempted or item access is used on the
@@ -37,17 +37,16 @@ class OverlappingMultiDict:
37
37
  """Initialize the container and create sub-dictionaries.
38
38
 
39
39
  Args:
40
- dict_type (type):
41
- A subclass of PersiDict that will be instantiated for each
42
- file_type provided via individual_subdicts_params.
43
- shared_subdicts_params (dict):
44
- Parameters shared by all sub-dicts (e.g., base_dir, bucket).
45
- **individual_subdicts_params: Dict[str, dict]
46
- Keyword arguments where each key is a file_type (also the
47
- attribute name to be created) and each value is a dict of
48
- parameters specific to that sub-dict. These are merged with
49
- shared_subdicts_params when constructing the sub-dict. The
50
- resulting dict also receives file_type=<key>.
40
+ dict_type (Type[PersiDict]): A subclass of PersiDict that will be
41
+ instantiated for each file_type provided via individual_subdicts_params.
42
+ shared_subdicts_params (Dict[str, Any]): Parameters shared by all
43
+ sub-dicts (e.g., base_dir, bucket).
44
+ **individual_subdicts_params (Dict[str, Dict[str, Any]]): Keyword
45
+ arguments where each key is a file_type (also the attribute name
46
+ to be created) and each value is a dict of parameters specific to
47
+ that sub-dict. These are merged with shared_subdicts_params when
48
+ constructing the sub-dict. The resulting dict also receives
49
+ file_type=<key>.
51
50
 
52
51
  Raises:
53
52
  TypeError: If dict_type is not a PersiDict subclass, or if
@@ -67,9 +66,9 @@ class OverlappingMultiDict:
67
66
  raise TypeError(
68
67
  f"Params for subdict {subdict_name!r} must be a dict")
69
68
  self.__dict__[subdict_name] = dict_type(
70
- **{**shared_subdicts_params
71
- ,**individual_subdicts_params[subdict_name]
72
- ,"file_type":subdict_name})
69
+ **{**shared_subdicts_params,
70
+ **individual_subdicts_params[subdict_name],
71
+ "file_type": subdict_name})
73
72
 
74
73
  def __getstate__(self):
75
74
  """Prevent pickling.
@@ -82,6 +81,9 @@ class OverlappingMultiDict:
82
81
  def __setstate__(self, state):
83
82
  """Prevent unpickling.
84
83
 
84
+ Args:
85
+ state: The state dictionary that would be used for unpickling (ignored).
86
+
85
87
  Raises:
86
88
  TypeError: Always raised; this object is not pickleable.
87
89
  """
@@ -93,6 +95,9 @@ class OverlappingMultiDict:
93
95
  Suggest accessing items through the sub-dictionaries exposed as
94
96
  attributes (e.g., obj.json[key]).
95
97
 
98
+ Args:
99
+ key: The key that would be accessed (ignored).
100
+
96
101
  Raises:
97
102
  TypeError: Always raised to indicate unsupported operation.
98
103
  """
@@ -104,6 +109,10 @@ class OverlappingMultiDict:
104
109
  def __setitem__(self, key, value):
105
110
  """Disallow item assignment on the container itself.
106
111
 
112
+ Args:
113
+ key: The key that would be assigned (ignored).
114
+ value: The value that would be assigned (ignored).
115
+
107
116
  Raises:
108
117
  TypeError: Always raised to indicate unsupported operation.
109
118
  """
@@ -115,10 +124,13 @@ class OverlappingMultiDict:
115
124
  def __delitem__(self, key):
116
125
  """Disallow item deletion on the container itself.
117
126
 
127
+ Args:
128
+ key: The key that would be deleted (ignored).
129
+
118
130
  Raises:
119
131
  TypeError: Always raised to indicate unsupported operation.
120
132
  """
121
133
  raise TypeError(
122
134
  "OverlappingMultiDict does not support item deletion by key. "
123
- "Individual items can be deletedthrough nested dicts, "
135
+ "Individual items can be deleted through nested dicts, "
124
136
  f"which are available via attributes {self.subdicts_names}")
persidict/persi_dict.py CHANGED
@@ -26,9 +26,9 @@ from .jokers import KEEP_CURRENT, DELETE_CURRENT, Joker
26
26
  from .safe_str_tuple import SafeStrTuple
27
27
 
28
28
  PersiDictKey = SafeStrTuple | Sequence[str] | str
29
- """ A value which can be used as a key for PersiDict.
29
+ """A value which can be used as a key for PersiDict.
30
30
 
31
- PersiDict-s accept keys on a form of SafeStrTuple,
31
+ PersiDict instances accept keys in the form of SafeStrTuple,
32
32
  or a string, or a sequence of strings.
33
33
  The characters within strings must be URL/filename-safe.
34
34
  If a string (or a sequence of strings) is passed to a PersiDict as a key,
@@ -60,21 +60,26 @@ class PersiDict(MutableMapping, ParameterizableClass):
60
60
  immutable_items:bool
61
61
  base_class_for_values:Optional[type]
62
62
 
63
- def __init__(self
64
- , immutable_items:bool = False
65
- , digest_len:int = 8
66
- , base_class_for_values:Optional[type] = None
67
- , *args, **kwargs):
68
- """Initialize base parameters shared by all persistent dicts.
63
+ def __init__(self,
64
+ immutable_items: bool = False,
65
+ digest_len: int = 8,
66
+ base_class_for_values: Optional[type] = None,
67
+ *args, **kwargs):
68
+ """Initialize base parameters shared by all persistent dictionaries.
69
69
 
70
70
  Args:
71
- immutable_items: If True, items cannot be modified or deleted.
72
- digest_len: Number of hash characters to append to key components to
73
- avoid case-insensitive collisions. Must be non-negative.
74
- base_class_for_values: Optional base class that values must inherit
75
- from; if None, values are not type-restricted.
76
- *args: Ignored in the base class (reserved for subclasses).
77
- **kwargs: Ignored in the base class (reserved for subclasses).
71
+ immutable_items (bool): If True, items cannot be modified or deleted.
72
+ Defaults to False.
73
+ digest_len (int): Number of hash characters to append to key components
74
+ to avoid case-insensitive collisions. Must be non-negative.
75
+ Defaults to 8.
76
+ base_class_for_values (Optional[type]): Optional base class that values
77
+ must inherit from. If None, values are not type-restricted.
78
+ Defaults to None.
79
+ *args: Additional positional arguments (ignored in base class, reserved
80
+ for subclasses).
81
+ **kwargs: Additional keyword arguments (ignored in base class, reserved
82
+ for subclasses).
78
83
 
79
84
  Raises:
80
85
  ValueError: If digest_len is negative.
@@ -91,9 +96,9 @@ class PersiDict(MutableMapping, ParameterizableClass):
91
96
  """Return configuration parameters of this dictionary.
92
97
 
93
98
  Returns:
94
- dict: A sorted dict of parameters used to reconstruct the instance.
99
+ dict: A sorted dictionary of parameters used to reconstruct the instance.
95
100
  This supports the Parameterizable API and is absent in the
96
- builtin dict.
101
+ built-in dict.
97
102
  """
98
103
  params = dict(
99
104
  immutable_items=self.immutable_items
@@ -321,20 +326,23 @@ class PersiDict(MutableMapping, ParameterizableClass):
321
326
  return self._generic_iter({"keys", "values", "timestamps"})
322
327
 
323
328
 
324
- def setdefault(self, key:PersiDictKey, default:Any=None) -> Any:
325
- """Insert key with default if absent; return the value.
329
+ def setdefault(self, key: PersiDictKey, default: Any = None) -> Any:
330
+ """Insert key with default value if absent; return the current value.
331
+
332
+ Behaves like the built-in dict.setdefault() method: if the key exists,
333
+ return its current value; otherwise, set the key to the default value
334
+ and return that default.
326
335
 
327
336
  Args:
328
- key: Key (string or sequence of strings) or SafeStrTuple.
329
- default: Value to insert if the key is not present.
337
+ key (PersiDictKey): Key (string, sequence of strings, or SafeStrTuple).
338
+ default (Any): Value to insert if the key is not present. Defaults to None.
330
339
 
331
340
  Returns:
332
- Any: Existing value if present; otherwise the provided default.
341
+ Any: Existing value if key is present; otherwise the provided default value.
333
342
 
334
343
  Raises:
335
344
  TypeError: If default is a Joker command (KEEP_CURRENT/DELETE_CURRENT).
336
345
  """
337
- # TODO: check edge cases to ensure the same semantics as standard dicts
338
346
  key = SafeStrTuple(key)
339
347
  if isinstance(default, Joker):
340
348
  raise TypeError("default must be a regular value, not a Joker command")
@@ -345,19 +353,20 @@ class PersiDict(MutableMapping, ParameterizableClass):
345
353
  return default
346
354
 
347
355
 
348
- def __eq__(self, other:PersiDict) -> bool:
356
+ def __eq__(self, other: PersiDict) -> bool:
349
357
  """Compare dictionaries for equality.
350
358
 
351
- If other is a PersiDict, compare portable params. Otherwise, attempt to
352
- compare as mapping by keys and values.
359
+ If other is a PersiDict instance, compares portable parameters for equality.
360
+ Otherwise, attempts to compare as a mapping by comparing all keys and values.
353
361
 
354
362
  Args:
355
- other: Another dictionary-like object.
363
+ other (PersiDict): Another dictionary-like object to compare against.
356
364
 
357
365
  Returns:
358
- bool: True if considered equal, False otherwise.
366
+ bool: True if the dictionaries are considered equal, False otherwise.
359
367
  """
360
368
  if isinstance(other, PersiDict):
369
+ #TODO: decide whether to keep this semantics
361
370
  return self.get_portable_params() == other.get_portable_params()
362
371
  try:
363
372
  if len(self) != len(other):
@@ -525,15 +534,15 @@ class PersiDict(MutableMapping, ParameterizableClass):
525
534
  def oldest_keys(self, max_n=None):
526
535
  """Return up to max_n oldest keys in the dictionary.
527
536
 
537
+ This method is absent in the original Python dict API.
538
+
528
539
  Args:
529
540
  max_n (int | None): Maximum number of keys to return. If None,
530
541
  return all keys sorted by age (oldest first). Values <= 0
531
- yield an empty list.
542
+ yield an empty list. Defaults to None.
532
543
 
533
544
  Returns:
534
545
  list[SafeStrTuple]: The oldest keys, oldest first.
535
-
536
- This method is absent in the original Python dict API.
537
546
  """
538
547
  if max_n is None:
539
548
  # If we need all keys, sort them all by timestamp
@@ -553,6 +562,8 @@ class PersiDict(MutableMapping, ParameterizableClass):
553
562
  def oldest_values(self, max_n=None):
554
563
  """Return up to max_n oldest values in the dictionary.
555
564
 
565
+ This method is absent in the original Python dict API.
566
+
556
567
  Args:
557
568
  max_n (int | None): Maximum number of values to return. If None,
558
569
  return values for all keys sorted by age (oldest first). Values
@@ -560,8 +571,6 @@ class PersiDict(MutableMapping, ParameterizableClass):
560
571
 
561
572
  Returns:
562
573
  list[Any]: Values corresponding to the oldest keys.
563
-
564
- This method is absent in the original Python dict API.
565
574
  """
566
575
  return [self[k] for k in self.oldest_keys(max_n)]
567
576
 
@@ -569,15 +578,15 @@ class PersiDict(MutableMapping, ParameterizableClass):
569
578
  def newest_keys(self, max_n=None):
570
579
  """Return up to max_n newest keys in the dictionary.
571
580
 
581
+ This method is absent in the original Python dict API.
582
+
572
583
  Args:
573
584
  max_n (int | None): Maximum number of keys to return. If None,
574
585
  return all keys sorted by age (newest first). Values <= 0
575
- yield an empty list.
586
+ yield an empty list. Defaults to None.
576
587
 
577
588
  Returns:
578
589
  list[SafeStrTuple]: The newest keys, newest first.
579
-
580
- This method is absent in the original Python dict API.
581
590
  """
582
591
  if max_n is None:
583
592
  # If we need all keys, sort them all by timestamp in reverse order
@@ -597,6 +606,8 @@ class PersiDict(MutableMapping, ParameterizableClass):
597
606
  def newest_values(self, max_n=None):
598
607
  """Return up to max_n newest values in the dictionary.
599
608
 
609
+ This method is absent in the original Python dict API.
610
+
600
611
  Args:
601
612
  max_n (int | None): Maximum number of values to return. If None,
602
613
  return values for all keys sorted by age (newest first). Values
@@ -604,7 +615,5 @@ class PersiDict(MutableMapping, ParameterizableClass):
604
615
 
605
616
  Returns:
606
617
  list[Any]: Values corresponding to the newest keys.
607
-
608
- This method is absent in the original Python dict API.
609
618
  """
610
619
  return [self[k] for k in self.newest_keys(max_n)]
persidict/s3_dict.py CHANGED
@@ -5,6 +5,8 @@ import tempfile
5
5
  from typing import Any, Optional
6
6
 
7
7
  import boto3
8
+ import joblib
9
+ import jsonpickle
8
10
  from botocore.exceptions import ClientError
9
11
 
10
12
  import parameterizable
@@ -15,6 +17,7 @@ from .safe_str_tuple_signing import sign_safe_str_tuple, unsign_safe_str_tuple
15
17
  from .persi_dict import PersiDict
16
18
  from .jokers import KEEP_CURRENT, DELETE_CURRENT
17
19
  from .file_dir_dict import FileDirDict, PersiDictKey
20
+ from .overlapping_multi_dict import OverlappingMultiDict
18
21
 
19
22
  S3DICT_DEFAULT_BASE_DIR = "__s3_dict__"
20
23
 
@@ -70,24 +73,28 @@ class S3Dict(PersiDict):
70
73
  must be "pkl" or "json".
71
74
  *args: Ignored; reserved for compatibility.
72
75
  **kwargs: Ignored; reserved for compatibility.
73
-
74
- Raises:
75
- ValueError: If file_type is "__etag__" (reserved) or configuration
76
- is inconsistent with base_class_for_values.
77
76
  """
78
77
 
79
- super().__init__(immutable_items = immutable_items, digest_len = 0)
78
+ super().__init__(immutable_items = immutable_items, digest_len = digest_len)
80
79
  self.file_type = file_type
81
- if self.file_type == "__etag__":
82
- raise ValueError(
83
- "file_type cannot be 'etag' as it is a reserved extension for caching.")
84
-
85
- self.local_cache = FileDirDict(
86
- base_dir= base_dir
87
- , file_type = file_type
88
- , immutable_items = immutable_items
89
- , base_class_for_values=base_class_for_values
90
- , digest_len = digest_len)
80
+ self.etag_file_type = f"{file_type}_etag"
81
+
82
+ self.local_cache = OverlappingMultiDict(
83
+ dict_type=FileDirDict,
84
+ shared_subdicts_params={
85
+ "base_dir": base_dir,
86
+ "immutable_items": immutable_items,
87
+ "base_class_for_values": base_class_for_values,
88
+ "digest_len": digest_len
89
+ },
90
+ **{
91
+ self.file_type: {},
92
+ self.etag_file_type: {"base_class_for_values": str}
93
+ }
94
+ )
95
+
96
+ self.main_cache = getattr(self.local_cache, self.file_type)
97
+ self.etag_cache = getattr(self.local_cache, self.etag_file_type)
91
98
 
92
99
  self.region = region
93
100
  if region is None:
@@ -96,9 +103,28 @@ class S3Dict(PersiDict):
96
103
  self.s3_client = boto3.client('s3', region_name=region)
97
104
 
98
105
  try:
99
- self.s3_client.create_bucket(Bucket=bucket_name)
100
- except:
101
- pass
106
+ self.s3_client.head_bucket(Bucket=bucket_name)
107
+ except ClientError as e:
108
+ error_code = e.response['Error']['Code']
109
+ if error_code == '404' or error_code == 'NotFound':
110
+ # The bucket does not exist, so attempt to create it.
111
+ try:
112
+ self.s3_client.create_bucket(Bucket=bucket_name)
113
+ except ClientError as create_e:
114
+ create_error_code = create_e.response['Error']['Code']
115
+ # Handles the race condition and the bucket-is-taken error
116
+ if ( create_error_code == 'BucketAlreadyOwnedByYou'
117
+ or create_error_code == 'BucketAlreadyExists'):
118
+ pass
119
+ else:
120
+ raise create_e # Re-raise other unexpected creation errors.
121
+ elif error_code == '403' or error_code == 'Forbidden':
122
+ # The bucket exists, but access is forbidden.
123
+ # This is likely a cross-account bucket with a policy that grants
124
+ # access to you. Subsequent calls will fail if permissions are not granted.
125
+ pass
126
+ else:
127
+ raise e # Re-raise other unexpected ClientErrors on head_bucket.
102
128
 
103
129
  self.bucket_name = bucket_name
104
130
 
@@ -118,7 +144,7 @@ class S3Dict(PersiDict):
118
144
  including region, bucket_name, and root_prefix combined with
119
145
  parameters from the local cache.
120
146
  """
121
- params = self.local_cache.get_params()
147
+ params = self.main_cache.get_params()
122
148
  params["region"] = self.region
123
149
  params["bucket_name"] = self.bucket_name
124
150
  params["root_prefix"] = self.root_prefix
@@ -147,7 +173,7 @@ class S3Dict(PersiDict):
147
173
  Returns:
148
174
  str: Path to the local on-disk cache directory used by S3Dict.
149
175
  """
150
- return self.local_cache.base_dir
176
+ return self.main_cache.base_dir
151
177
 
152
178
 
153
179
  def _build_full_objectname(self, key:PersiDictKey) -> str:
@@ -175,50 +201,19 @@ class S3Dict(PersiDict):
175
201
  bool: True if the object exists (or is cached when immutable), else False.
176
202
  """
177
203
  key = SafeStrTuple(key)
178
- if self.immutable_items:
179
- file_name = self.local_cache._build_full_path(
180
- key, create_subdirs=True)
181
- if os.path.exists(file_name):
204
+ if self.immutable_items and key in self.main_cache:
182
205
  return True
183
206
  try:
184
207
  obj_name = self._build_full_objectname(key)
185
208
  self.s3_client.head_object(Bucket=self.bucket_name, Key=obj_name)
186
209
  return True
187
- except:
188
- return False
189
-
190
-
191
- def _write_etag_file(self, file_name: str, etag: str):
192
- """Atomically write the ETag to its cache file.
193
-
194
- Args:
195
- file_name (str): Path to the cached data file (without the ETag suffix).
196
- etag (str): The S3 ETag value to persist alongside the cached file.
197
- """
198
- if not etag:
199
- return
200
- etag_file_name = file_name + ".__etag__"
201
- dir_name = os.path.dirname(etag_file_name)
202
- # Write to a temporary file and then rename for atomicity
203
- fd, temp_path = tempfile.mkstemp(dir=dir_name)
204
- try:
205
- with os.fdopen(fd, "w") as f:
206
- f.write(etag)
207
- f.flush()
208
- os.fsync(f.fileno())
209
- os.replace(temp_path, etag_file_name)
210
- try:
211
- if os.name == 'posix':
212
- dir_fd = os.open(dir_name, os.O_RDONLY)
213
- try:
214
- os.fsync(dir_fd)
215
- finally:
216
- os.close(dir_fd)
217
- except OSError:
218
- pass
219
- except:
220
- os.remove(temp_path)
221
- raise
210
+ except ClientError as e:
211
+ if e.response['ResponseMetadata']['HTTPStatusCode'] == 404:
212
+ self.main_cache.delete_if_exists(key)
213
+ self.etag_cache.delete_if_exists(key)
214
+ return False
215
+ else:
216
+ raise
222
217
 
223
218
 
224
219
  def __getitem__(self, key:PersiDictKey) -> Any:
@@ -236,19 +231,15 @@ class S3Dict(PersiDict):
236
231
  """
237
232
 
238
233
  key = SafeStrTuple(key)
239
- file_name = self.local_cache._build_full_path(key, create_subdirs=True)
240
234
 
241
- if self.immutable_items and os.path.exists(file_name):
242
- return self.local_cache._read_from_file(file_name)
235
+ if self.immutable_items and key in self.main_cache:
236
+ return self.main_cache[key]
243
237
 
244
238
  obj_name = self._build_full_objectname(key)
245
239
 
246
240
  cached_etag = None
247
- etag_file_name = file_name + ".__etag__"
248
- if not self.immutable_items and os.path.exists(file_name) and os.path.exists(
249
- etag_file_name):
250
- with open(etag_file_name, "r") as f:
251
- cached_etag = f.read()
241
+ if not self.immutable_items and key in self.main_cache and key in self.etag_cache:
242
+ cached_etag = self.etag_cache[key]
252
243
 
253
244
  try:
254
245
  get_kwargs = {'Bucket': self.bucket_name, 'Key': obj_name}
@@ -261,37 +252,22 @@ class S3Dict(PersiDict):
261
252
  s3_etag = response.get("ETag")
262
253
  body = response['Body']
263
254
 
264
- dir_name = os.path.dirname(file_name)
265
- fd, temp_path = tempfile.mkstemp(dir=dir_name, prefix=".__tmp__")
266
-
267
- try:
268
- with os.fdopen(fd, 'wb') as f:
269
- # Stream body to file to avoid loading all in memory
270
- for chunk in body.iter_chunks():
271
- f.write(chunk)
272
- f.flush()
273
- os.fsync(f.fileno())
274
- os.replace(temp_path, file_name)
275
- try:
276
- if os.name == 'posix':
277
- dir_fd = os.open(dir_name, os.O_RDONLY)
278
- try:
279
- os.fsync(dir_fd)
280
- finally:
281
- os.close(dir_fd)
282
- except OSError:
283
- pass
284
- except:
285
- os.remove(temp_path) # Clean up temp file on failure
286
- raise
255
+ # Read all data into memory and store in cache
287
256
 
288
- self._write_etag_file(file_name, s3_etag)
257
+ if self.file_type == 'json':
258
+ deserialized_value = jsonpickle.loads(body.read().decode('utf-8'))
259
+ elif self.file_type == 'pkl':
260
+ deserialized_value = joblib.load(body)
261
+ else:
262
+ deserialized_value = body.read().decode('utf-8')
263
+
264
+ self.main_cache[key] = deserialized_value
265
+ self.etag_cache[key] = s3_etag
289
266
 
290
267
  except ClientError as e:
291
- error_code = e.response.get("Error", {}).get("Code")
292
268
  if e.response['ResponseMetadata']['HTTPStatusCode'] == 304:
293
269
  # 304 Not Modified: our cached version is up-to-date.
294
- # The file will be read from cache at the end of the function.
270
+ # The value will be read from cache at the end of the function.
295
271
  pass
296
272
  elif e.response.get("Error", {}).get("Code") == 'NoSuchKey':
297
273
  raise KeyError(f"Key {key} not found in S3 bucket {self.bucket_name}")
@@ -299,20 +275,21 @@ class S3Dict(PersiDict):
299
275
  # Re-raise other client errors (e.g., permissions, throttling)
300
276
  raise
301
277
 
302
- return self.local_cache._read_from_file(file_name)
278
+ return self.main_cache[key]
303
279
 
304
280
 
305
281
  def __setitem__(self, key:PersiDictKey, value:Any):
306
282
  """Store a value for a key in S3 and update the local cache.
307
283
 
308
- Interprets joker values KEEP_CURRENT and DELETE_CURRENT accordingly.
309
- Validates a value type if base_class_for_values is set, then writes to the
310
- local cache and uploads to S3. If possible, caches the S3 ETag locally to
311
- enable conditional GETs later.
284
+ Interprets special joker values: KEEP_CURRENT (no-op) and DELETE_CURRENT
285
+ (deletes the key). Validates value type if base_class_for_values is set,
286
+ then writes to the local cache and uploads to S3. If possible, caches the
287
+ S3 ETag locally to enable conditional GETs later.
312
288
 
313
289
  Args:
314
290
  key (PersiDictKey): Key (string or sequence of strings) or SafeStrTuple.
315
- value (Any): Value to store, or a joker command.
291
+ value (Any): Value to store, or a joker command (KEEP_CURRENT or
292
+ DELETE_CURRENT from the jokers module).
316
293
 
317
294
  Raises:
318
295
  KeyError: If attempting to modify an existing item when
@@ -344,23 +321,23 @@ class S3Dict(PersiDict):
344
321
  if self.immutable_items and key in self:
345
322
  raise KeyError("Can't modify an immutable item")
346
323
 
347
- file_name = self.local_cache._build_full_path(key, create_subdirs=True)
348
324
  obj_name = self._build_full_objectname(key)
349
325
 
350
- self.local_cache._save_to_file(file_name, value)
351
- self.s3_client.upload_file(file_name, self.bucket_name, obj_name)
326
+ # Store in local cache first
327
+ self.main_cache[key] = value
328
+
329
+ # Get the file path from the cache to upload to S3
330
+ file_path = self.main_cache._build_full_path(key)
331
+ self.s3_client.upload_file(file_path, self.bucket_name, obj_name)
352
332
 
353
333
  try:
354
334
  head = self.s3_client.head_object(
355
335
  Bucket=self.bucket_name, Key=obj_name)
356
- s3_etag = head.get("ETag")
357
- self._write_etag_file(file_name, s3_etag)
336
+ self.etag_cache[key] = head.get("ETag")
358
337
  except ClientError:
359
- # If we can't get ETag, we should remove any existing etag file
338
+ # If we can't get ETag, we should remove any existing etag
360
339
  # to force a re-download on the next __getitem__ call.
361
- etag_file_name = file_name + ".__etag__"
362
- if os.path.exists(etag_file_name):
363
- os.remove(etag_file_name)
340
+ self.etag_cache.delete_if_exists(key)
364
341
 
365
342
 
366
343
  def __delitem__(self, key:PersiDictKey):
@@ -370,20 +347,22 @@ class S3Dict(PersiDict):
370
347
  key (PersiDictKey): Key (string or sequence of strings) or SafeStrTuple.
371
348
 
372
349
  Raises:
373
- KeyError: If immutable_items is True.
350
+ KeyError: If immutable_items is True, or if the key does not exist in S3.
374
351
  """
375
352
 
376
353
  key = SafeStrTuple(key)
377
354
  if self.immutable_items:
378
355
  raise KeyError("Can't delete an immutable item")
356
+
357
+ if key not in self:
358
+ raise KeyError(f"Key {key} not found in S3 bucket {self.bucket_name}")
359
+
379
360
  obj_name = self._build_full_objectname(key)
361
+
380
362
  self.s3_client.delete_object(Bucket = self.bucket_name, Key = obj_name)
381
- file_name = self.local_cache._build_full_path(key)
382
- if os.path.isfile(file_name):
383
- os.remove(file_name)
384
- etag_file_name = file_name + ".__etag__"
385
- if os.path.isfile(etag_file_name):
386
- os.remove(etag_file_name)
363
+ self.etag_cache.delete_if_exists(key)
364
+ self.main_cache.delete_if_exists(key)
365
+
387
366
 
388
367
  def __len__(self) -> int:
389
368
  """Return len(self).
@@ -415,7 +394,7 @@ class S3Dict(PersiDict):
415
394
  return num_files
416
395
 
417
396
 
418
- def _generic_iter(self, result_type: str):
397
+ def _generic_iter(self, result_type: set[str]):
419
398
  """Underlying implementation for .items()/.keys()/.values() iterators.
420
399
 
421
400
  Iterates over S3 objects under the configured root_prefix and yields
@@ -529,13 +508,12 @@ class S3Dict(PersiDict):
529
508
 
530
509
  key = SafeStrTuple(key)
531
510
  if len(key):
532
- key = SafeStrTuple(key)
533
511
  key = sign_safe_str_tuple(key, self.digest_len)
534
512
  full_root_prefix = self.root_prefix + "/".join(key)
535
513
  else:
536
514
  full_root_prefix = self.root_prefix
537
515
 
538
- new_dir_path = self.local_cache._build_full_path(
516
+ new_dir_path = self.main_cache._build_full_path(
539
517
  key, create_subdirs = True, is_file_path = False)
540
518
 
541
519
  new_dict = S3Dict(
@@ -561,9 +539,12 @@ class S3Dict(PersiDict):
561
539
 
562
540
  Returns:
563
541
  float: POSIX timestamp (seconds since the Unix epoch) of the last
564
- modification time as reported by S3 for the object.
542
+ modification time as reported by S3 for the object. The timestamp
543
+ is timezone-aware and converted to UTC.
544
+
545
+ Raises:
546
+ KeyError: If the key does not exist in S3.
565
547
  """
566
- # TODO: check work with timezones
567
548
  key = SafeStrTuple(key)
568
549
  obj_name = self._build_full_objectname(key)
569
550
  response = self.s3_client.head_object(Bucket=self.bucket_name, Key=obj_name)
persidict/safe_chars.py CHANGED
@@ -1,6 +1,17 @@
1
+ """Safe character handling utilities for URL and filesystem compatibility.
2
+
3
+ This module defines character sets and length constraints for building strings
4
+ that are safe for use in URLs, filenames, and other contexts where character
5
+ restrictions apply.
6
+ """
1
7
  import string
2
8
 
9
+ # Set of characters considered safe for filenames and URL components.
10
+ # Includes ASCII letters (a-z, A-Z), digits (0-9), and special chars: ()_-~.=
3
11
  SAFE_CHARS_SET = set(string.ascii_letters + string.digits + "()_-~.=")
12
+
13
+ # Maximum length for safe strings to ensure compatibility with various filesystems
14
+ # and URL length limitations. Set to 254 to stay well under most system limits.
4
15
  SAFE_STRING_MAX_LENGTH = 254
5
16
 
6
17
  def get_safe_chars() -> set[str]:
@@ -114,7 +114,7 @@ def _add_all_suffixes_if_absent(
114
114
 
115
115
  new_seq = []
116
116
  for s in str_seq:
117
- new_seq.append(_add_signature_suffix_if_absent(s,digest_len))
117
+ new_seq.append(_add_signature_suffix_if_absent(s, digest_len))
118
118
 
119
119
  new_seq = SafeStrTuple(*new_seq)
120
120
 
@@ -268,7 +268,7 @@ class WriteOnceDict(PersiDict):
268
268
  """Delegate iteration to the wrapped dict.
269
269
 
270
270
  Args:
271
- iter_type: tType of iterator: 'items' and/or 'keys' and/or 'timestamps'.
271
+ iter_type: Type of iterator: 'items' and/or 'keys' and/or 'timestamps'.
272
272
 
273
273
  Returns:
274
274
  Any: Iterator from the wrapped dictionary.
@@ -299,13 +299,21 @@ class WriteOnceDict(PersiDict):
299
299
  return getattr(self._wrapped_dict, name)
300
300
 
301
301
  @property
302
- def base_dir(self):
303
- """Base directory of the wrapped dict (if applicable)."""
302
+ def base_dir(self) -> str|None:
303
+ """Base directory of the wrapped dict (if applicable).
304
+
305
+ Returns:
306
+ str | None: The base directory path, or None if not applicable.
307
+ """
304
308
  return self._wrapped_dict.base_dir
305
309
 
306
310
  @property
307
- def base_url(self):
308
- """Base URL of the wrapped dict (if applicable)."""
311
+ def base_url(self) -> str|None:
312
+ """Base URL of the wrapped dict (if applicable).
313
+
314
+ Returns:
315
+ str | None: The base URL, or None if not applicable.
316
+ """
309
317
  return self._wrapped_dict.base_url
310
318
 
311
319
  def get_subdict(self, prefix_key: PersiDictKey) -> WriteOnceDict:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: persidict
3
- Version: 0.36.8
3
+ Version: 0.36.10
4
4
  Summary: Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud.
5
5
  Keywords: persistence,dicts,distributed,parallel
6
6
  Author: Vlad (Volodymyr) Pavlov
@@ -0,0 +1,14 @@
1
+ persidict/.DS_Store,sha256=1lFlJ5EFymdzGAUAaI30vcaaLHt3F1LwpG7xILf9jsM,6148
2
+ persidict/__init__.py,sha256=CDOSJGgCnyRTkGUTzaeg3Cqsxwx0-0EFieOtldXwAls,1380
3
+ persidict/file_dir_dict.py,sha256=JJ5oEyaqwTm9g_tUrVfut0IYI7bd5B2lhxrLzadTohA,25541
4
+ persidict/jokers.py,sha256=7ibh0ccfkEm3EvKIOhH9ShfZ0_MBKYMvKa1rwqHg1hk,3010
5
+ persidict/overlapping_multi_dict.py,sha256=UFyPEG2GbMmMHY48UmcaLHpsaxMqRH3bc_UA8S90yJo,5947
6
+ persidict/persi_dict.py,sha256=Q7fGs9LFPxSLtC0jJwDOP1AVD9_t01SnwdN4RVBMZtg,20660
7
+ persidict/s3_dict.py,sha256=0o2RslAkdE75N9zDuqQMyiSbO0uzdQmiNmZSfHZzfxw,21137
8
+ persidict/safe_chars.py,sha256=H-cL9waCmDtwaRR5Y4b4oTzcBx09nc8wn8u61SVZDY0,1728
9
+ persidict/safe_str_tuple.py,sha256=YBTcYjUKIffznOawXb9xKjz4HaKdklrgyVtegJFmr5w,7202
10
+ persidict/safe_str_tuple_signing.py,sha256=mpOfx_xyprc0_c60XPB_EihI3vR1gOn6T03iCx1HwwQ,7494
11
+ persidict/write_once_dict.py,sha256=nv5vx9uh6VZ5Qh3HJcBgUHLnDX9KY843FbHndcy-63E,11677
12
+ persidict-0.36.10.dist-info/WHEEL,sha256=Pi5uDq5Fdo_Rr-HD5h9BiPn9Et29Y9Sh8NhcJNnFU1c,79
13
+ persidict-0.36.10.dist-info/METADATA,sha256=iJ9RdvGe3i_xO3yTM8J8iaZ6k2lwd_ezDl8kzqioywc,12388
14
+ persidict-0.36.10.dist-info/RECORD,,
@@ -1,14 +0,0 @@
1
- persidict/.DS_Store,sha256=1lFlJ5EFymdzGAUAaI30vcaaLHt3F1LwpG7xILf9jsM,6148
2
- persidict/__init__.py,sha256=CDOSJGgCnyRTkGUTzaeg3Cqsxwx0-0EFieOtldXwAls,1380
3
- persidict/file_dir_dict.py,sha256=IDRb6a3YQvM7Gf0jbqKkTi4VuSPecTw6Ca6HZ947Qj8,25784
4
- persidict/jokers.py,sha256=Ow4tWOTTMGKvolJyVuEF-oEgE_u3vDZtA9UFwTdhNV4,2731
5
- persidict/overlapping_multi_dict.py,sha256=gBiHaCb5pTGNW3ZrakgaiGDid6oCfoP7Vq1rxXGnFWg,5476
6
- persidict/persi_dict.py,sha256=DIMQaY4gE8NSYTlHlk9rfOJJEYUuLV8kmQ-gc474py4,20052
7
- persidict/s3_dict.py,sha256=VKDqY9sASffeXtfbavVWk8-umrioIG5Xq57Qqg1wPH4,21522
8
- persidict/safe_chars.py,sha256=9Qy24fu2dmiJOdmCF8mKZULfQaRp7H4oxfgDXeLgogI,1160
9
- persidict/safe_str_tuple.py,sha256=YBTcYjUKIffznOawXb9xKjz4HaKdklrgyVtegJFmr5w,7202
10
- persidict/safe_str_tuple_signing.py,sha256=RQAj4fnpRVaOe0KpwLler1UTaeNOgXCQpU3t80ixtxg,7493
11
- persidict/write_once_dict.py,sha256=-lPQ_yuU62pczHT0BYO6SFbiZBKFq8Tj9ln3jCzNDzA,11443
12
- persidict-0.36.8.dist-info/WHEEL,sha256=Pi5uDq5Fdo_Rr-HD5h9BiPn9Et29Y9Sh8NhcJNnFU1c,79
13
- persidict-0.36.8.dist-info/METADATA,sha256=816s1lWkpNdgJMfVS16sDaYDyHFzaLZRDHVVMs86Slo,12387
14
- persidict-0.36.8.dist-info/RECORD,,