persidict 0.38.0__py3-none-any.whl → 0.103.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of persidict might be problematic. Click here for more details.

@@ -0,0 +1,215 @@
1
+ """S3Dict_FileDirCached implementation that mimics S3Dict_Legacy but uses BasicS3Dict, FileDirDict, and cached classes."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Optional
6
+
7
+ import parameterizable
8
+ from parameterizable import sort_dict_by_keys
9
+
10
+ from .basic_s3_dict import BasicS3Dict
11
+ from .file_dir_dict import FileDirDict, FILEDIRDICT_DEFAULT_BASE_DIR
12
+ from .cached_appendonly_dict import AppendOnlyDictCached
13
+ from .cached_mutable_dict import MutableDictCached
14
+ from .persi_dict import PersiDict, NonEmptyPersiDictKey, PersiDictKey
15
+ from .safe_str_tuple import NonEmptySafeStrTuple
16
+ from .overlapping_multi_dict import OverlappingMultiDict
17
+
18
+
19
+ # Default base directory for S3Dict_FileDirCached local cache
20
+ S3DICT_NEW_DEFAULT_BASE_DIR = "__s3_dict__"
21
+
22
+
23
+ class S3Dict_FileDirCached(PersiDict):
24
+ """S3-backed persistent dictionary using BasicS3Dict with local caching.
25
+
26
+ This class mimics the interface and behavior of S3Dict_Legacy but internally uses
27
+ BasicS3Dict for S3 operations combined with FileDirDict-based local caching
28
+ via the cached wrapper classes (AppendOnlyDictCached/MutableDictCached).
29
+
30
+ The architecture layers caching on top of BasicS3Dict to provide:
31
+ - Fast local access for frequently accessed items
32
+ - Efficient batch operations
33
+ - ETag-based change detection for mutable dictionaries
34
+ - Optimized append-only performance when append_only=True
35
+ """
36
+
37
+ def __init__(self, bucket_name: str = "my_bucket",
38
+ region: str = None,
39
+ root_prefix: str = "",
40
+ base_dir: str = S3DICT_NEW_DEFAULT_BASE_DIR,
41
+ serialization_format: str = "pkl",
42
+ digest_len: int = 8,
43
+ append_only: bool = False,
44
+ base_class_for_values: Optional[type] = None,
45
+ *args, **kwargs):
46
+ """Initialize an S3-backed persistent dictionary with local caching.
47
+
48
+ Args:
49
+ bucket_name: Name of the S3 bucket to use.
50
+ region: AWS region for the bucket.
51
+ root_prefix: Common S3 key prefix under which all objects are stored.
52
+ base_dir: Local directory path for caching.
53
+ serialization_format: File extension/format for stored values.
54
+ digest_len: Number of base32 MD5 hash characters for collision prevention.
55
+ append_only: If True, prevents modification/deletion of existing items.
56
+ base_class_for_values: Optional base class that all stored values must inherit from.
57
+ *args: Additional positional arguments.
58
+ **kwargs: Additional keyword arguments.
59
+ """
60
+ super().__init__(append_only=append_only,
61
+ base_class_for_values=base_class_for_values,
62
+ serialization_format=serialization_format)
63
+
64
+ # Create the main S3 storage using BasicS3Dict
65
+ self._main_dict = BasicS3Dict(
66
+ bucket_name=bucket_name,
67
+ region=region,
68
+ root_prefix=root_prefix,
69
+ serialization_format=serialization_format,
70
+ append_only=append_only,
71
+ base_class_for_values=base_class_for_values
72
+ )
73
+
74
+ # Set up local cache parameters for FileDirDict
75
+ individual_subdicts_params = {self.serialization_format: {}}
76
+
77
+ if not append_only:
78
+ self.etag_serialization_format = f"{self.serialization_format}_etag"
79
+ individual_subdicts_params[self.etag_serialization_format] = {
80
+ "base_class_for_values": str}
81
+
82
+ # Create local cache using OverlappingMultiDict with FileDirDict
83
+ self.local_cache = OverlappingMultiDict(
84
+ dict_type=FileDirDict,
85
+ shared_subdicts_params={
86
+ "base_dir": base_dir,
87
+ "append_only": append_only,
88
+ "base_class_for_values": base_class_for_values,
89
+ "digest_len": digest_len
90
+ },
91
+ **individual_subdicts_params)
92
+
93
+ # Get the data cache
94
+ self._data_cache = getattr(self.local_cache, self.serialization_format)
95
+
96
+ # Create the appropriate cached wrapper
97
+ if append_only:
98
+ # Use AppendOnlyDictCached for append-only mode
99
+ self._cached_dict = AppendOnlyDictCached(
100
+ main_dict=self._main_dict,
101
+ data_cache=self._data_cache
102
+ )
103
+ else:
104
+ # Use MutableDictCached for mutable mode with ETag cache
105
+ self._etag_cache = getattr(self.local_cache, self.etag_serialization_format)
106
+ self._cached_dict = MutableDictCached(
107
+ main_dict=self._main_dict,
108
+ data_cache=self._data_cache,
109
+ etag_cache=self._etag_cache
110
+ )
111
+
112
+ @property
113
+ def digest_len(self) -> int:
114
+ """Get the digest length used for collision prevention."""
115
+ return self._data_cache.digest_len
116
+
117
+ def get_params(self):
118
+ """Return configuration parameters as a dictionary."""
119
+ # Get params from the main dict and local cache
120
+ params = self._main_dict.get_params()
121
+ cache_params = self._data_cache.get_params()
122
+
123
+ # Add cache-specific params
124
+ params["base_dir"] = cache_params["base_dir"]
125
+ params["digest_len"] = cache_params["digest_len"]
126
+
127
+ params = sort_dict_by_keys(params)
128
+
129
+ return params
130
+
131
+ @property
132
+ def base_url(self) -> str:
133
+ """Get the base S3 URL."""
134
+ return self._main_dict.base_url
135
+
136
+ @property
137
+ def base_dir(self) -> str:
138
+ """Get the base directory for local cache."""
139
+ return self._data_cache.base_dir
140
+
141
+ def __contains__(self, key: NonEmptyPersiDictKey) -> bool:
142
+ """Check if key exists in the dictionary."""
143
+ return self._cached_dict.__contains__(key)
144
+
145
+ def __getitem__(self, key: NonEmptyPersiDictKey) -> Any:
146
+ """Get item from dictionary."""
147
+ return self._cached_dict.__getitem__(key)
148
+
149
+ def __setitem__(self, key: NonEmptyPersiDictKey, value: Any) -> None:
150
+ """Set item in dictionary."""
151
+ self._cached_dict.__setitem__(key, value)
152
+
153
+ def __delitem__(self, key: NonEmptyPersiDictKey) -> None:
154
+ """Delete item from dictionary."""
155
+ self._cached_dict.__delitem__(key)
156
+
157
+ def __len__(self) -> int:
158
+ """Get number of items in dictionary."""
159
+ return self._cached_dict.__len__()
160
+
161
+ def _generic_iter(self, result_type: set[str]):
162
+ """Generic iteration over dictionary items."""
163
+ return self._cached_dict._generic_iter(result_type)
164
+
165
+ def get_subdict(self, key: PersiDictKey):
166
+ """Get a subdictionary for the given key prefix."""
167
+ return self._main_dict.get_subdict(key)
168
+
169
+ def timestamp(self, key: NonEmptyPersiDictKey):
170
+ """Get the timestamp of when the item was last modified."""
171
+ return self._cached_dict.timestamp(key)
172
+
173
+ # Additional methods that might be needed for ETag support
174
+ def get_item_if_etag_changed(self, key: NonEmptyPersiDictKey, etag: Optional[str]):
175
+ """Get item only if ETag has changed (for mutable dicts)."""
176
+ if hasattr(self._cached_dict, 'get_item_if_etag_changed'):
177
+ return self._cached_dict.get_item_if_etag_changed(key, etag)
178
+ else:
179
+ # For append-only dicts, just get the item
180
+ return self._cached_dict.__getitem__(key)
181
+
182
+ def set_item_get_etag(self, key: NonEmptyPersiDictKey, value: Any):
183
+ """Set item and return ETag (for mutable dicts)."""
184
+ if hasattr(self._cached_dict, 'set_item_get_etag'):
185
+ return self._cached_dict.set_item_get_etag(key, value)
186
+ else:
187
+ # For append-only dicts, just set the item
188
+ self._cached_dict.__setitem__(key, value)
189
+ return None
190
+
191
+ def discard(self, key: NonEmptyPersiDictKey) -> bool:
192
+ """Delete an item without raising an exception if it doesn't exist.
193
+
194
+ This method fixes the issue where cached dictionaries return multiple
195
+ success counts for a single key deletion.
196
+
197
+ Args:
198
+ key: Key to delete.
199
+
200
+ Returns:
201
+ bool: True if the item existed and was deleted; False otherwise.
202
+ """
203
+ key = NonEmptySafeStrTuple(key)
204
+
205
+ try:
206
+ del self[key]
207
+ return True
208
+ except KeyError:
209
+ return False
210
+
211
+
212
+ S3Dict = S3Dict_FileDirCached # Alias for backward compatibility
213
+
214
+
215
+ parameterizable.register_parameterizable_class(S3Dict_FileDirCached)
@@ -10,47 +10,55 @@ from botocore.exceptions import ClientError
10
10
  import parameterizable
11
11
  from parameterizable.dict_sorter import sort_dict_by_keys
12
12
 
13
- from .safe_str_tuple import SafeStrTuple
13
+ from .safe_str_tuple import SafeStrTuple, NonEmptySafeStrTuple
14
14
  from .safe_str_tuple_signing import sign_safe_str_tuple, unsign_safe_str_tuple
15
- from .persi_dict import PersiDict
16
- from .jokers import KEEP_CURRENT, DELETE_CURRENT, Joker
17
- from .file_dir_dict import FileDirDict, PersiDictKey, non_empty_persidict_key
15
+ from .persi_dict import PersiDict, NonEmptyPersiDictKey
16
+ from .singletons import Joker, EXECUTION_IS_COMPLETE, ETagHasNotChangedFlag
17
+ from .file_dir_dict import FileDirDict, PersiDictKey
18
18
  from .overlapping_multi_dict import OverlappingMultiDict
19
19
 
20
20
  S3DICT_DEFAULT_BASE_DIR = "__s3_dict__"
21
21
 
22
- class S3Dict(PersiDict):
23
- """A persistent dictionary that stores key-value pairs as S3 objects.
22
+ class S3Dict_Legacy(PersiDict):
23
+ """A persistent dictionary that stores key-value pairs as S3 objects with local caching.
24
24
 
25
25
  Each key-value pair is stored as a separate S3 object in the specified bucket.
26
+ S3Dict_Legacy provides intelligent local caching to minimize S3 API calls and improve
27
+ performance by using conditional requests with ETags to detect changes.
26
28
 
27
29
  A key can be either a string (object name without file extension) or a sequence
28
30
  of strings representing a hierarchical path (folder structure ending with an
29
31
  object name). Values can be instances of any Python type and are serialized
30
32
  to S3 objects.
31
33
 
32
- S3Dict supports multiple serialization formats:
34
+ S3Dict_Legacy supports multiple serialization formats:
33
35
  - Binary storage using pickle ('pkl' format)
34
36
  - Human-readable text using jsonpickle ('json' format)
35
37
  - Plain text for string values (other formats)
36
38
 
39
+ Key Features:
40
+ - Local file-based caching for improved read performance
41
+ - ETag-based conditional requests to minimize unnecessary downloads
42
+ - Automatic cache invalidation when S3 objects change
43
+ - Seamless fallback to S3 when cached data is stale
44
+
37
45
  Note:
38
46
  Unlike native Python dictionaries, insertion order is not preserved.
39
- Operations may incur S3 API costs and network latency.
47
+ Operations may incur S3 API costs and network latency, though caching
48
+ significantly reduces this overhead for repeated access patterns.
40
49
  """
41
50
  region: str
42
51
  bucket_name: str
43
52
  root_prefix: str
44
- file_type: str
45
53
  _base_dir: str
46
54
 
47
55
  def __init__(self, bucket_name: str = "my_bucket",
48
56
  region: str = None,
49
57
  root_prefix: str = "",
50
58
  base_dir: str = S3DICT_DEFAULT_BASE_DIR,
51
- file_type: str = "pkl",
52
- immutable_items: bool = False,
59
+ serialization_format: str = "pkl",
53
60
  digest_len: int = 8,
61
+ append_only: bool = False,
54
62
  base_class_for_values: Optional[type] = None,
55
63
  *args, **kwargs):
56
64
  """Initialize an S3-backed persistent dictionary.
@@ -64,15 +72,15 @@ class S3Dict(PersiDict):
64
72
  stored. A trailing slash is automatically added if missing.
65
73
  base_dir: Local directory path used for temporary files and
66
74
  local caching of S3 objects.
67
- file_type: File extension/format for stored values. Supported formats:
75
+ serialization_format: File extension/format for stored values. Supported formats:
68
76
  'pkl' (pickle), 'json' (jsonpickle), or custom text formats.
69
- immutable_items: If True, prevents modification of existing items
77
+ append_only: If True, prevents modification of existing items
70
78
  after they are initially stored.
71
79
  digest_len: Number of base32 MD5 hash characters appended to key
72
80
  elements to prevent case-insensitive filename collisions.
73
81
  Set to 0 to disable collision prevention.
74
82
  base_class_for_values: Optional base class that all stored values
75
- must inherit from. When specified (and not str), file_type
83
+ must inherit from. When specified (and not str), serialization_format
76
84
  must be 'pkl' or 'json' for proper serialization.
77
85
  *args: Additional positional arguments (ignored, reserved for compatibility).
78
86
  **kwargs: Additional keyword arguments (ignored, reserved for compatibility).
@@ -82,28 +90,29 @@ class S3Dict(PersiDict):
82
90
  allow. Network connectivity and valid AWS credentials are required.
83
91
  """
84
92
 
85
- super().__init__(immutable_items = immutable_items
86
- , digest_len = digest_len
87
- , base_class_for_values=base_class_for_values)
88
- self.file_type = file_type
89
- self.etag_file_type = f"{file_type}_etag"
93
+ super().__init__(append_only=append_only,
94
+ base_class_for_values=base_class_for_values,
95
+ serialization_format=serialization_format)
96
+ individual_subdicts_params = {self.serialization_format: {}}
97
+
98
+ if not append_only:
99
+ self.etag_serialization_format = f"{self.serialization_format}_etag"
100
+ individual_subdicts_params[self.etag_serialization_format] = {
101
+ "base_class_for_values": str}
90
102
 
91
103
  self.local_cache = OverlappingMultiDict(
92
104
  dict_type=FileDirDict,
93
105
  shared_subdicts_params={
94
106
  "base_dir": base_dir,
95
- "immutable_items": immutable_items,
107
+ "append_only": append_only,
96
108
  "base_class_for_values": base_class_for_values,
97
109
  "digest_len": digest_len
98
110
  },
99
- **{
100
- self.file_type: {},
101
- self.etag_file_type: {"base_class_for_values": str}
102
- }
103
- )
111
+ **individual_subdicts_params)
104
112
 
105
- self.main_cache = getattr(self.local_cache, self.file_type)
106
- self.etag_cache = getattr(self.local_cache, self.etag_file_type)
113
+ self.main_cache = getattr(self.local_cache, self.serialization_format)
114
+ if not self.append_only:
115
+ self.etag_cache = getattr(self.local_cache, self.etag_serialization_format)
107
116
 
108
117
  self.region = region
109
118
  if region is None:
@@ -138,10 +147,14 @@ class S3Dict(PersiDict):
138
147
 
139
148
  self.bucket_name = bucket_name
140
149
 
141
- self.root_prefix=root_prefix
150
+ self.root_prefix = root_prefix
142
151
  if len(self.root_prefix) and self.root_prefix[-1] != "/":
143
152
  self.root_prefix += "/"
144
153
 
154
+ @property
155
+ def digest_len(self) -> int:
156
+ return self.main_cache.digest_len
157
+
145
158
 
146
159
  def get_params(self):
147
160
  """Return configuration parameters as a dictionary.
@@ -187,23 +200,23 @@ class S3Dict(PersiDict):
187
200
  return self.main_cache.base_dir
188
201
 
189
202
 
190
- def _build_full_objectname(self, key: PersiDictKey) -> str:
203
+ def _build_full_objectname(self, key: NonEmptyPersiDictKey) -> str:
191
204
  """Convert a key into a full S3 object key.
192
205
 
193
206
  Args:
194
207
  key: Dictionary key (string or sequence of strings) or SafeStrTuple.
195
208
 
196
209
  Returns:
197
- str: The complete S3 object key including root_prefix and file_type
210
+ str: The complete S3 object key including root_prefix and serialization_format
198
211
  extension, with digest-based collision prevention applied if enabled.
199
212
  """
200
- key = non_empty_persidict_key(key)
201
- key = sign_safe_str_tuple(key, self.digest_len)
202
- objectname = self.root_prefix + "/".join(key)+ "." + self.file_type
213
+ key = NonEmptySafeStrTuple(key)
214
+ key = sign_safe_str_tuple(key, 0)
215
+ objectname = self.root_prefix + "/".join(key) + "." + self.serialization_format
203
216
  return objectname
204
217
 
205
218
 
206
- def __contains__(self, key: PersiDictKey) -> bool:
219
+ def __contains__(self, key: NonEmptyPersiDictKey) -> bool:
207
220
  """Check if the specified key exists in the dictionary.
208
221
 
209
222
  For immutable dictionaries, checks the local cache first. Otherwise,
@@ -216,8 +229,8 @@ class S3Dict(PersiDict):
216
229
  bool: True if the key exists in S3 (or local cache for immutable
217
230
  items), False otherwise.
218
231
  """
219
- key = non_empty_persidict_key(key)
220
- if self.immutable_items and key in self.main_cache:
232
+ key = NonEmptySafeStrTuple(key)
233
+ if self.append_only and key in self.main_cache:
221
234
  return True
222
235
  try:
223
236
  obj_name = self._build_full_objectname(key)
@@ -225,14 +238,14 @@ class S3Dict(PersiDict):
225
238
  return True
226
239
  except ClientError as e:
227
240
  if e.response['ResponseMetadata']['HTTPStatusCode'] == 404:
228
- self.main_cache.delete_if_exists(key)
229
- self.etag_cache.delete_if_exists(key)
241
+ self.main_cache.discard(key)
242
+ self.etag_cache.discard(key)
230
243
  return False
231
244
  else:
232
245
  raise
233
246
 
234
247
 
235
- def __getitem__(self, key: PersiDictKey) -> Any:
248
+ def __getitem__(self, key: NonEmptyPersiDictKey) -> Any:
236
249
  """Retrieve the value stored for a key.
237
250
 
238
251
  For immutable dictionaries with cached values, returns the cached copy.
@@ -249,15 +262,15 @@ class S3Dict(PersiDict):
249
262
  KeyError: If the key does not exist in S3.
250
263
  """
251
264
 
252
- key = non_empty_persidict_key(key)
265
+ key = NonEmptySafeStrTuple(key)
253
266
 
254
- if self.immutable_items and key in self.main_cache:
267
+ if self.append_only and key in self.main_cache:
255
268
  return self.main_cache[key]
256
269
 
257
270
  obj_name = self._build_full_objectname(key)
258
271
 
259
272
  cached_etag = None
260
- if not self.immutable_items and key in self.main_cache and key in self.etag_cache:
273
+ if not self.append_only and key in self.main_cache and key in self.etag_cache:
261
274
  cached_etag = self.etag_cache[key]
262
275
 
263
276
  try:
@@ -268,19 +281,22 @@ class S3Dict(PersiDict):
268
281
  response = self.s3_client.get_object(**get_kwargs)
269
282
 
270
283
  # 200 OK: object was downloaded, either because it's new or changed.
271
- s3_etag = response.get("ETag")
272
284
  body = response['Body']
273
285
 
274
286
  # Deserialize and cache the S3 object content
275
- if self.file_type == 'json':
287
+ if self.serialization_format == 'json':
276
288
  deserialized_value = jsonpickle.loads(body.read().decode('utf-8'))
277
- elif self.file_type == 'pkl':
289
+ elif self.serialization_format == 'pkl':
278
290
  deserialized_value = joblib.load(body)
279
291
  else:
280
292
  deserialized_value = body.read().decode('utf-8')
281
293
 
282
294
  self.main_cache[key] = deserialized_value
283
- self.etag_cache[key] = s3_etag
295
+
296
+ if not self.append_only:
297
+ # Cache the S3 ETag for future conditional requests
298
+ s3_etag = response.get("ETag")
299
+ self.etag_cache[key] = s3_etag
284
300
 
285
301
  except ClientError as e:
286
302
  if e.response['ResponseMetadata']['HTTPStatusCode'] == 304:
@@ -295,7 +311,7 @@ class S3Dict(PersiDict):
295
311
  return self.main_cache[key]
296
312
 
297
313
 
298
- def __setitem__(self, key: PersiDictKey, value: Any):
314
+ def __setitem__(self, key: NonEmptyPersiDictKey, value: Any):
299
315
  """Store a value for a key in both S3 and local cache.
300
316
 
301
317
  Handles special joker values (KEEP_CURRENT, DELETE_CURRENT) for
@@ -304,22 +320,20 @@ class S3Dict(PersiDict):
304
320
  the S3 ETag for efficient future retrievals.
305
321
 
306
322
  Args:
307
- key: Dictionary key (string or sequence of strings) or SafeStrTuple.
323
+ key: Dictionary key (string or sequence of strings) or NonEmptyPersiDictKey.
308
324
  value: Value to store, or a joker command (KEEP_CURRENT or
309
- DELETE_CURRENT from the jokers module).
325
+ DELETE_CURRENT).
310
326
 
311
327
  Raises:
312
328
  KeyError: If attempting to modify an existing item when
313
- immutable_items is True.
329
+ append_only is True.
314
330
  TypeError: If value is a PersiDict instance or does not match
315
331
  the required base_class_for_values when specified.
316
332
  """
317
333
 
318
- key = non_empty_persidict_key(key)
319
- PersiDict.__setitem__(self, key, value)
320
- if isinstance(value, Joker):
321
- # Joker values (KEEP_CURRENT, DELETE_CURRENT) are handled by base class
322
- return
334
+ key = NonEmptySafeStrTuple(key)
335
+ if self._process_setitem_args(key, value) is EXECUTION_IS_COMPLETE:
336
+ return None
323
337
 
324
338
  obj_name = self._build_full_objectname(key)
325
339
 
@@ -330,6 +344,11 @@ class S3Dict(PersiDict):
330
344
  file_path = self.main_cache._build_full_path(key)
331
345
  self.s3_client.upload_file(file_path, self.bucket_name, obj_name)
332
346
 
347
+ if self.append_only:
348
+ # For immutable items, the local cache is authoritative; no need to
349
+ # verify ETag from S3 as the item cannot change after initial upload
350
+ return
351
+
333
352
  try:
334
353
  # Cache the S3 ETag for efficient conditional requests on future reads
335
354
  head = self.s3_client.head_object(
@@ -337,24 +356,25 @@ class S3Dict(PersiDict):
337
356
  self.etag_cache[key] = head.get("ETag")
338
357
  except ClientError:
339
358
  # Remove stale ETag on failure to force fresh downloads later
340
- self.etag_cache.delete_if_exists(key)
359
+ self.etag_cache.discard(key)
341
360
 
342
361
 
343
- def __delitem__(self, key: PersiDictKey):
362
+ def __delitem__(self, key: NonEmptyPersiDictKey):
344
363
  """Delete the stored value for a key from both S3 and local cache.
345
364
 
346
365
  Args:
347
- key: Dictionary key (string or sequence of strings) or SafeStrTuple.
366
+ key: Dictionary key (string or sequence of strings)
367
+ or NonEmptyPersiDictKey.
348
368
 
349
369
  Raises:
350
- KeyError: If immutable_items is True, or if the key does not exist.
370
+ KeyError: If append_only is True, or if the key does not exist.
351
371
  """
352
- key = non_empty_persidict_key(key)
353
- PersiDict.__delitem__(self, key)
372
+ key = NonEmptySafeStrTuple(key)
373
+ self._process_delitem_args(key)
354
374
  obj_name = self._build_full_objectname(key)
355
- self.s3_client.delete_object(Bucket = self.bucket_name, Key = obj_name)
356
- self.etag_cache.delete_if_exists(key)
357
- self.main_cache.delete_if_exists(key)
375
+ self.s3_client.delete_object(Bucket=self.bucket_name, Key=obj_name)
376
+ self.etag_cache.discard(key)
377
+ self.main_cache.discard(key)
358
378
 
359
379
 
360
380
  def __len__(self) -> int:
@@ -370,11 +390,11 @@ class S3Dict(PersiDict):
370
390
  """
371
391
 
372
392
  num_files = 0
373
- suffix = "." + self.file_type
393
+ suffix = "." + self.serialization_format
374
394
 
375
395
  paginator = self.s3_client.get_paginator("list_objects_v2")
376
396
  page_iterator = paginator.paginate(
377
- Bucket=self.bucket_name, Prefix = self.root_prefix)
397
+ Bucket=self.bucket_name, Prefix=self.root_prefix)
378
398
 
379
399
  for page in page_iterator:
380
400
  contents = page.get("Contents")
@@ -412,10 +432,10 @@ class S3Dict(PersiDict):
412
432
  unsupported field names).
413
433
  """
414
434
 
415
- PersiDict._generic_iter(self, result_type)
435
+ self._process_generic_iter_args(result_type)
416
436
 
417
- suffix = "." + self.file_type
418
- ext_len = len(self.file_type) + 1
437
+ suffix = "." + self.serialization_format
438
+ ext_len = len(self.serialization_format) + 1
419
439
  prefix_len = len(self.root_prefix)
420
440
 
421
441
  def splitter(full_name: str) -> SafeStrTuple:
@@ -446,7 +466,7 @@ class S3Dict(PersiDict):
446
466
  """
447
467
  paginator = self.s3_client.get_paginator("list_objects_v2")
448
468
  page_iterator = paginator.paginate(
449
- Bucket=self.bucket_name, Prefix = self.root_prefix)
469
+ Bucket=self.bucket_name, Prefix=self.root_prefix)
450
470
 
451
471
  for page in page_iterator:
452
472
  contents = page.get("Contents")
@@ -462,7 +482,7 @@ class S3Dict(PersiDict):
462
482
 
463
483
  if "keys" in result_type:
464
484
  key_to_return = unsign_safe_str_tuple(
465
- obj_key, self.digest_len)
485
+ obj_key, 0)
466
486
  to_return.append(key_to_return)
467
487
 
468
488
  if "values" in result_type:
@@ -480,10 +500,11 @@ class S3Dict(PersiDict):
480
500
  return step()
481
501
 
482
502
 
483
- def get_subdict(self, key: PersiDictKey) -> S3Dict:
503
+ def get_subdict(self, key: PersiDictKey) -> S3Dict_Legacy:
484
504
  """Create a subdictionary scoped to items with the specified prefix.
485
505
 
486
506
  Returns an empty subdictionary if no items exist under the prefix.
507
+ If the prefix is empty, the entire dictionary is returned.
487
508
  This method is not part of the standard Python dictionary interface.
488
509
 
489
510
  Args:
@@ -491,35 +512,34 @@ class S3Dict(PersiDict):
491
512
  used to scope items stored under this dictionary.
492
513
 
493
514
  Returns:
494
- S3Dict: A new S3Dict instance with root_prefix extended by the given
495
- key, sharing the parent's bucket, region, file_type, and other
515
+ S3Dict_Legacy: A new S3Dict instance with root_prefix extended by the given
516
+ key, sharing the parent's bucket, region, serialization_format, and other
496
517
  configuration settings.
497
518
  """
498
519
 
499
520
  key = SafeStrTuple(key)
500
521
  if len(key):
501
- key = sign_safe_str_tuple(key, self.digest_len)
502
- full_root_prefix = self.root_prefix + "/".join(key)
522
+ key = sign_safe_str_tuple(key, 0)
523
+ full_root_prefix = self.root_prefix + "/".join(key)
503
524
  else:
504
525
  full_root_prefix = self.root_prefix
505
526
 
506
527
  new_dir_path = self.main_cache._build_full_path(
507
- key, create_subdirs = True, is_file_path = False)
508
-
509
- new_dict = S3Dict(
510
- bucket_name = self.bucket_name
511
- , region = self.region
512
- , root_prefix = full_root_prefix
513
- , base_dir = new_dir_path
514
- , file_type = self.file_type
515
- , immutable_items = self.immutable_items
516
- , digest_len = self.digest_len
517
- , base_class_for_values = self.base_class_for_values)
528
+ key, create_subdirs=True, is_file_path=False)
529
+
530
+ new_dict = S3Dict_Legacy(
531
+ bucket_name=self.bucket_name,
532
+ region=self.region,
533
+ root_prefix=full_root_prefix,
534
+ base_dir=new_dir_path,
535
+ serialization_format=self.serialization_format,
536
+ append_only=self.append_only,
537
+ base_class_for_values=self.base_class_for_values)
518
538
 
519
539
  return new_dict
520
540
 
521
541
 
522
- def timestamp(self, key: PersiDictKey) -> float:
542
+ def timestamp(self, key: NonEmptyPersiDictKey) -> float:
523
543
  """Get the last modification timestamp for a key.
524
544
 
525
545
  This method is not part of the standard Python dictionary interface.
@@ -535,10 +555,11 @@ class S3Dict(PersiDict):
535
555
  Raises:
536
556
  KeyError: If the key does not exist in S3.
537
557
  """
538
- key = non_empty_persidict_key(key)
558
+ key = NonEmptySafeStrTuple(key)
539
559
  obj_name = self._build_full_objectname(key)
540
560
  response = self.s3_client.head_object(Bucket=self.bucket_name, Key=obj_name)
541
561
  return response["LastModified"].timestamp()
542
562
 
543
563
 
544
- parameterizable.register_parameterizable_class(S3Dict)
564
+ parameterizable.register_parameterizable_class(S3Dict_Legacy)
565
+