persidict 0.38.0__py3-none-any.whl → 0.104.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of persidict might be problematic. Click here for more details.
- persidict/__init__.py +41 -24
- persidict/basic_s3_dict.py +595 -0
- persidict/cached_appendonly_dict.py +247 -0
- persidict/cached_mutable_dict.py +248 -0
- persidict/empty_dict.py +171 -0
- persidict/file_dir_dict.py +131 -123
- persidict/local_dict.py +502 -0
- persidict/overlapping_multi_dict.py +23 -15
- persidict/persi_dict.py +281 -148
- persidict/s3_dict_file_dir_cached.py +215 -0
- persidict/{s3_dict.py → s3_dict_legacy.py} +111 -90
- persidict/safe_chars.py +13 -0
- persidict/safe_str_tuple.py +28 -6
- persidict/singletons.py +231 -0
- persidict/write_once_dict.py +48 -32
- {persidict-0.38.0.dist-info → persidict-0.104.0.dist-info}/METADATA +34 -24
- persidict-0.104.0.dist-info/RECORD +19 -0
- {persidict-0.38.0.dist-info → persidict-0.104.0.dist-info}/WHEEL +1 -1
- persidict/.DS_Store +0 -0
- persidict/jokers.py +0 -99
- persidict-0.38.0.dist-info/RECORD +0 -14
persidict/local_dict.py
ADDED
|
@@ -0,0 +1,502 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
from copy import deepcopy
|
|
5
|
+
from typing import Any, Optional, Iterable
|
|
6
|
+
|
|
7
|
+
import parameterizable
|
|
8
|
+
|
|
9
|
+
from .persi_dict import PersiDict, NonEmptyPersiDictKey
|
|
10
|
+
from .safe_str_tuple import SafeStrTuple, NonEmptySafeStrTuple
|
|
11
|
+
from .singletons import EXECUTION_IS_COMPLETE
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class _RAMBackend:
|
|
15
|
+
"""In-memory hierarchical storage backing LocalDict.
|
|
16
|
+
|
|
17
|
+
This lightweight backend models a directory-like tree entirely in RAM and
|
|
18
|
+
is used by LocalDict to provide a PersiDict-compliant interface without any
|
|
19
|
+
disk or network I/O. Keys are sequences of safe strings. Each path segment
|
|
20
|
+
maps to a child RAMBackend node, while leaf entries are stored in a values
|
|
21
|
+
bucket per serialization_format.
|
|
22
|
+
|
|
23
|
+
Attributes:
|
|
24
|
+
subdicts (dict[str, _RAMBackend]):
|
|
25
|
+
Mapping of first-level key segment to a child RAMBackend representing
|
|
26
|
+
the corresponding subtree.
|
|
27
|
+
values (dict[str, dict[str, tuple[Any, float]]]):
|
|
28
|
+
Mapping of serialization_format to a dictionary of leaf-name -> (value, timestamp)
|
|
29
|
+
pairs. The timestamp is a POSIX float seconds value (time.time()).
|
|
30
|
+
|
|
31
|
+
Notes:
|
|
32
|
+
- This backend is intentionally minimal and does not enforce character
|
|
33
|
+
safety of key segments or serialization_format; that validation is handled by
|
|
34
|
+
higher-level classes (e.g., PersiDict/LocalDict).
|
|
35
|
+
- Not thread-safe or process-safe. If used concurrently, external
|
|
36
|
+
synchronization is required.
|
|
37
|
+
- Memory-only: all data is lost when the object is discarded.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __init__(self):
|
|
41
|
+
"""Initialize an empty in-memory tree.
|
|
42
|
+
|
|
43
|
+
Creates empty containers for child subtrees and for value buckets
|
|
44
|
+
grouped by serialization_format. No arguments; the backend starts empty.
|
|
45
|
+
|
|
46
|
+
Attributes initialized:
|
|
47
|
+
subdicts: Empty mapping for first-level child nodes.
|
|
48
|
+
values: Empty mapping for per-serialization_format value buckets.
|
|
49
|
+
"""
|
|
50
|
+
self.subdicts: dict[str, _RAMBackend] = {}
|
|
51
|
+
self.values: dict[str, dict[str, tuple[Any, float]]] = {}
|
|
52
|
+
|
|
53
|
+
def child(self, name: str) -> "_RAMBackend":
|
|
54
|
+
"""Return a child node for the given path segment, creating if missing.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
name (str): A single safe string segment representing the first-level
|
|
58
|
+
part of a hierarchical key.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
_RAMBackend: The existing or newly created child backend for the
|
|
62
|
+
provided segment.
|
|
63
|
+
|
|
64
|
+
Notes:
|
|
65
|
+
- This method mutates the structure by creating a child node when
|
|
66
|
+
it does not exist yet.
|
|
67
|
+
"""
|
|
68
|
+
child_backend = self.subdicts.get(name)
|
|
69
|
+
if child_backend is None:
|
|
70
|
+
child_backend = _RAMBackend()
|
|
71
|
+
self.subdicts[name] = child_backend
|
|
72
|
+
return child_backend
|
|
73
|
+
|
|
74
|
+
def get_values_bucket(self, serialization_format: str) -> dict[str, tuple[Any, float]]:
|
|
75
|
+
"""Return the per-serialization_format bucket for leaf values, creating if absent.
|
|
76
|
+
|
|
77
|
+
The bucket maps a leaf key (final segment string) to a tuple of
|
|
78
|
+
(value, timestamp). The timestamp is the POSIX time when the value was
|
|
79
|
+
last written.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
serialization_format (str): Object type label under which values are
|
|
83
|
+
grouped (e.g., "pkl", "json"). No validation is performed here.
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
dict[str, tuple[Any, float]]: The mutable mapping for this serialization_format.
|
|
87
|
+
Modifications affect the backend state directly.
|
|
88
|
+
"""
|
|
89
|
+
bucket = self.values.get(serialization_format)
|
|
90
|
+
if bucket is None:
|
|
91
|
+
bucket = {}
|
|
92
|
+
self.values[serialization_format] = bucket
|
|
93
|
+
return bucket
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class LocalDict(PersiDict):
|
|
97
|
+
"""In-memory PersiDict backed by a RAM-only hierarchical store.
|
|
98
|
+
|
|
99
|
+
LocalDict mirrors FileDirDict semantics but keeps all data in process
|
|
100
|
+
memory using a simple tree structure (RAMBackend). It is useful for tests
|
|
101
|
+
and ephemeral workloads where durability is not required. Keys are
|
|
102
|
+
hierarchical sequences of safe strings (SafeStrTuple). Values are stored
|
|
103
|
+
per serialization_format and tracked with modification timestamps, providing the same
|
|
104
|
+
API surface as other PersiDict implementations.
|
|
105
|
+
|
|
106
|
+
Attributes:
|
|
107
|
+
append_only (bool): If True, items are immutable and cannot be
|
|
108
|
+
modified or deleted after initial creation.
|
|
109
|
+
base_class_for_values (type | None): Optional base class that all
|
|
110
|
+
stored values must inherit from. If None, any type is accepted (with
|
|
111
|
+
serialization_format restrictions enforced by the base class).
|
|
112
|
+
serialization_format (str): Logical serialization/format label (e.g., "pkl",
|
|
113
|
+
"json") used as a namespace for values and timestamps within the
|
|
114
|
+
backend.
|
|
115
|
+
_backend (_RAMBackend): The in-memory tree that actually stores data.
|
|
116
|
+
|
|
117
|
+
Notes:
|
|
118
|
+
- Not thread-safe or process-safe; use external synchronization if
|
|
119
|
+
accessed concurrently.
|
|
120
|
+
- Memory-only: all data is lost when the object is garbage-collected or
|
|
121
|
+
the process exits.
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
def __init__(self,
|
|
125
|
+
backend: Optional[_RAMBackend] = None,
|
|
126
|
+
serialization_format: str = "pkl",
|
|
127
|
+
append_only: bool = False,
|
|
128
|
+
base_class_for_values: Optional[type] = None,
|
|
129
|
+
prune_interval: Optional[int] = 64, *args, **kwargs):
|
|
130
|
+
"""Initialize an in-memory persistent dictionary.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
backend (_RAMBackend | None): Optional existing RAMBackend tree to
|
|
134
|
+
use. If None, a new empty backend is created.
|
|
135
|
+
serialization_format (str): Logical serialization/format label under which
|
|
136
|
+
values are grouped (e.g., "pkl", "json"). Defaults to "pkl".
|
|
137
|
+
append_only (bool): If True, items are immutable and cannot
|
|
138
|
+
be modified or deleted after the first write. Defaults to False.
|
|
139
|
+
base_class_for_values (type | None): Optional base class that all
|
|
140
|
+
stored values must inherit from. If None, any type is accepted
|
|
141
|
+
(subject to serialization_format restrictions). Defaults to None.
|
|
142
|
+
prune_interval (int | None): If None or <= 0, disables pruning.
|
|
143
|
+
Otherwise, run pruning only once every N destructive
|
|
144
|
+
operations (deletions/clears). Higher values reduce pruning
|
|
145
|
+
overhead at the cost of keeping some empty nodes around until
|
|
146
|
+
the next prune. Defaults to 64.
|
|
147
|
+
|
|
148
|
+
Raises:
|
|
149
|
+
ValueError: Propagated from PersiDict if serialization_format is empty, has
|
|
150
|
+
unsafe characters, or is incompatible with value type policy.
|
|
151
|
+
TypeError: Propagated from PersiDict if base_class_for_values has an
|
|
152
|
+
invalid type.
|
|
153
|
+
"""
|
|
154
|
+
self._backend = backend or _RAMBackend()
|
|
155
|
+
# Pruning throttling
|
|
156
|
+
if prune_interval is None:
|
|
157
|
+
self._prune_interval = None
|
|
158
|
+
else:
|
|
159
|
+
try:
|
|
160
|
+
pi = int(prune_interval)
|
|
161
|
+
except (TypeError, ValueError):
|
|
162
|
+
pi = 64
|
|
163
|
+
self._prune_interval = None if pi <= 0 else pi
|
|
164
|
+
self._ops_since_prune: int = 0
|
|
165
|
+
PersiDict.__init__(self,
|
|
166
|
+
append_only=append_only,
|
|
167
|
+
base_class_for_values=base_class_for_values,
|
|
168
|
+
serialization_format=serialization_format)
|
|
169
|
+
|
|
170
|
+
def get_params(self):
|
|
171
|
+
"""Return constructor parameters needed to recreate this instance.
|
|
172
|
+
|
|
173
|
+
Note that the backend object itself is included as a reference; copying
|
|
174
|
+
or reconstructing a LocalDict with this parameter will share the same
|
|
175
|
+
in-memory store.
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
dict: A dictionary of parameters (sorted by key) suitable for
|
|
179
|
+
passing to the constructor.
|
|
180
|
+
"""
|
|
181
|
+
params = dict(
|
|
182
|
+
backend=self._backend,
|
|
183
|
+
append_only=self.append_only,
|
|
184
|
+
base_class_for_values=self.base_class_for_values,
|
|
185
|
+
serialization_format=self.serialization_format,
|
|
186
|
+
)
|
|
187
|
+
# PersiDict.get_params sorts keys; we can reuse it by temporarily
|
|
188
|
+
# creating the dict in the same form and letting the parent handle sort.
|
|
189
|
+
# But parent doesn't know about backend. We'll sort locally.
|
|
190
|
+
return dict(sorted(params.items(), key=lambda kv: kv[0]))
|
|
191
|
+
|
|
192
|
+
# No base_url/base_dir override: keep defaults (None)
|
|
193
|
+
|
|
194
|
+
def __len__(self) -> int:
|
|
195
|
+
"""Return the total number of items stored for this serialization_format.
|
|
196
|
+
|
|
197
|
+
Counts all keys across the entire in-memory tree that belong to the
|
|
198
|
+
current serialization_format namespace.
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
int: Total number of items.
|
|
202
|
+
"""
|
|
203
|
+
def count(node: _RAMBackend) -> int:
|
|
204
|
+
total = len(node.values.get(self.serialization_format, {}))
|
|
205
|
+
for child in node.subdicts.values():
|
|
206
|
+
total += count(child)
|
|
207
|
+
return total
|
|
208
|
+
return count(self._backend)
|
|
209
|
+
|
|
210
|
+
def clear(self) -> None:
|
|
211
|
+
"""Remove all items under this serialization_format across the entire tree.
|
|
212
|
+
|
|
213
|
+
Only entries stored for the current serialization_format are removed; data for
|
|
214
|
+
other serialization formats remains intact.
|
|
215
|
+
"""
|
|
216
|
+
# Override for efficiency (optional). Remove only our serialization_format data.
|
|
217
|
+
if self.append_only:
|
|
218
|
+
raise KeyError("Can't delete an immutable key-value pair")
|
|
219
|
+
|
|
220
|
+
def clear_ft(node: _RAMBackend):
|
|
221
|
+
node.values.pop(self.serialization_format, None)
|
|
222
|
+
for ch in node.subdicts.values():
|
|
223
|
+
clear_ft(ch)
|
|
224
|
+
clear_ft(self._backend)
|
|
225
|
+
# Throttled pruning: run only once per prune_interval destructive ops
|
|
226
|
+
self._maybe_prune()
|
|
227
|
+
|
|
228
|
+
def _maybe_prune(self) -> None:
|
|
229
|
+
"""Increment destructive-op counter and prune when threshold reached.
|
|
230
|
+
|
|
231
|
+
Pruning the entire in-memory tree can be relatively expensive for large
|
|
232
|
+
datasets. To amortize the cost, we only prune once every
|
|
233
|
+
``self._prune_interval`` deletions/clears. This keeps memory usage
|
|
234
|
+
bounded over time without incurring per-operation full-tree traversals.
|
|
235
|
+
"""
|
|
236
|
+
if self._prune_interval is None:
|
|
237
|
+
return
|
|
238
|
+
self._ops_since_prune += 1
|
|
239
|
+
if self._ops_since_prune >= self._prune_interval:
|
|
240
|
+
self._prune_empty_subtrees(self._backend)
|
|
241
|
+
self._ops_since_prune = 0
|
|
242
|
+
|
|
243
|
+
def _prune_empty_subtrees(self, node: Optional[_RAMBackend] = None) -> bool:
|
|
244
|
+
"""Remove empty per-serialization_format buckets and prunes empty subtrees.
|
|
245
|
+
|
|
246
|
+
This walks the in-memory tree and:
|
|
247
|
+
- Deletes value buckets that became empty (no leaves for any serialization_format).
|
|
248
|
+
- Recursively deletes child subdicts that become empty after pruning.
|
|
249
|
+
|
|
250
|
+
A node is considered empty if it has no children (subdicts) and no
|
|
251
|
+
non-empty value buckets in ``values``. The method returns a boolean
|
|
252
|
+
indicating whether the given node is now empty.
|
|
253
|
+
|
|
254
|
+
Args:
|
|
255
|
+
node (_RAMBackend | None): Node to prune; defaults to the root.
|
|
256
|
+
|
|
257
|
+
Returns:
|
|
258
|
+
bool: True if the node is empty after pruning; False otherwise.
|
|
259
|
+
"""
|
|
260
|
+
if node is None:
|
|
261
|
+
node = self._backend
|
|
262
|
+
# First, prune children depth-first
|
|
263
|
+
for name, child in list(node.subdicts.items()):
|
|
264
|
+
if self._prune_empty_subtrees(child):
|
|
265
|
+
del node.subdicts[name]
|
|
266
|
+
# Next, drop empty value buckets for any serialization_format
|
|
267
|
+
for ft, bucket in list(node.values.items()):
|
|
268
|
+
if not bucket: # empty dict
|
|
269
|
+
del node.values[ft]
|
|
270
|
+
# Node is empty if it has no children and no value buckets left
|
|
271
|
+
return not node.subdicts and not node.values
|
|
272
|
+
|
|
273
|
+
def _navigate_to_parent(self
|
|
274
|
+
, key: SafeStrTuple
|
|
275
|
+
, create_if_missing: bool = True
|
|
276
|
+
) -> tuple[Optional[_RAMBackend], str]:
|
|
277
|
+
"""Resolve a hierarchical key to its parent node and leaf name.
|
|
278
|
+
|
|
279
|
+
This helper walks all segments of the key except the last one to find
|
|
280
|
+
the corresponding RAMBackend node that contains the leaf bucket for this
|
|
281
|
+
serialization_format.
|
|
282
|
+
|
|
283
|
+
Behavior:
|
|
284
|
+
- When create_if_missing is True (default), missing intermediate
|
|
285
|
+
nodes are created as needed (write-path semantics).
|
|
286
|
+
- When create_if_missing is False, traversal stops and returns
|
|
287
|
+
(None, None) if an intermediate node is absent (read-path
|
|
288
|
+
semantics), ensuring no phantom nodes are created.
|
|
289
|
+
|
|
290
|
+
Args:
|
|
291
|
+
key (SafeStrTuple): Full hierarchical key. Must be non-empty; the
|
|
292
|
+
last segment is treated as the leaf item name.
|
|
293
|
+
create_if_missing (bool): Whether to create intermediate nodes while
|
|
294
|
+
traversing. Defaults to True.
|
|
295
|
+
|
|
296
|
+
Returns:
|
|
297
|
+
tuple[Optional[_RAMBackend], str]: A pair consisting of the backend
|
|
298
|
+
node that would hold the leaf bucket (or None if not found during
|
|
299
|
+
lookup when create_if_missing=False) and the leaf segment (final
|
|
300
|
+
component).
|
|
301
|
+
"""
|
|
302
|
+
backend_node: Optional[_RAMBackend] = self._backend
|
|
303
|
+
for segment in key[:-1]:
|
|
304
|
+
if backend_node is None:
|
|
305
|
+
break
|
|
306
|
+
if create_if_missing:
|
|
307
|
+
backend_node = backend_node.child(segment)
|
|
308
|
+
else:
|
|
309
|
+
backend_node = backend_node.subdicts.get(segment)
|
|
310
|
+
if backend_node is None:
|
|
311
|
+
# Early exit: path does not exist and we shouldn't create it
|
|
312
|
+
return None, None
|
|
313
|
+
return backend_node, key[-1]
|
|
314
|
+
|
|
315
|
+
def __contains__(self, key: NonEmptyPersiDictKey) -> bool:
|
|
316
|
+
"""Return True if the key exists in the current serialization_format namespace.
|
|
317
|
+
|
|
318
|
+
Args:
|
|
319
|
+
key (NonEmptyPersiDictKey): Key (string/sequence or SafeStrTuple).
|
|
320
|
+
|
|
321
|
+
Returns:
|
|
322
|
+
bool: True if the key is present; False otherwise.
|
|
323
|
+
"""
|
|
324
|
+
key = NonEmptySafeStrTuple(key)
|
|
325
|
+
parent_node, leaf = self._navigate_to_parent(key, create_if_missing=False)
|
|
326
|
+
if parent_node is None:
|
|
327
|
+
return False
|
|
328
|
+
bucket = parent_node.values.get(self.serialization_format, {})
|
|
329
|
+
return leaf in bucket
|
|
330
|
+
|
|
331
|
+
def __getitem__(self, key: NonEmptyPersiDictKey) -> Any:
|
|
332
|
+
"""Retrieve the value stored for a key.
|
|
333
|
+
|
|
334
|
+
Args:
|
|
335
|
+
key (NonEmptyPersiDictKey): Key (string/sequence or SafeStrTuple).
|
|
336
|
+
|
|
337
|
+
Returns:
|
|
338
|
+
Any: The stored value.
|
|
339
|
+
|
|
340
|
+
Raises:
|
|
341
|
+
KeyError: If the key does not exist.
|
|
342
|
+
TypeError: If base_class_for_values is set and the stored value does
|
|
343
|
+
not match it.
|
|
344
|
+
"""
|
|
345
|
+
key = NonEmptySafeStrTuple(key)
|
|
346
|
+
parent_node, leaf = self._navigate_to_parent(key, create_if_missing=False)
|
|
347
|
+
if parent_node is None:
|
|
348
|
+
raise KeyError(f"Key {key} not found")
|
|
349
|
+
bucket = parent_node.values.get(self.serialization_format, {})
|
|
350
|
+
if leaf not in bucket:
|
|
351
|
+
raise KeyError(f"Key {key} not found")
|
|
352
|
+
value = bucket[leaf][0]
|
|
353
|
+
if self.base_class_for_values is not None:
|
|
354
|
+
if not isinstance(value, self.base_class_for_values):
|
|
355
|
+
raise TypeError(
|
|
356
|
+
f"Value must be of type {self.base_class_for_values},"
|
|
357
|
+
f" but it is {type(value)} instead.")
|
|
358
|
+
return value
|
|
359
|
+
|
|
360
|
+
def __setitem__(self, key: NonEmptyPersiDictKey, value: Any):
|
|
361
|
+
"""Store a value for a key.
|
|
362
|
+
|
|
363
|
+
Interprets joker values (KEEP_CURRENT, DELETE_CURRENT) using the base
|
|
364
|
+
class helper and enforces optional type restrictions if
|
|
365
|
+
base_class_for_values is set.
|
|
366
|
+
|
|
367
|
+
Args:
|
|
368
|
+
key (NonEmptyPersiDictKey): Key (string/sequence or SafeStrTuple).
|
|
369
|
+
value (Any): Value to store, or a joker.
|
|
370
|
+
|
|
371
|
+
Raises:
|
|
372
|
+
KeyError: If attempting to modify an existing item when
|
|
373
|
+
append_only is True.
|
|
374
|
+
TypeError: If value is a PersiDict or does not match
|
|
375
|
+
base_class_for_values when it is set.
|
|
376
|
+
"""
|
|
377
|
+
key = NonEmptySafeStrTuple(key)
|
|
378
|
+
if self._process_setitem_args(key, value) is EXECUTION_IS_COMPLETE:
|
|
379
|
+
return None
|
|
380
|
+
parent_node, leaf = self._navigate_to_parent(key)
|
|
381
|
+
bucket = parent_node.get_values_bucket(self.serialization_format)
|
|
382
|
+
bucket[leaf] = (deepcopy(value), time.time())
|
|
383
|
+
|
|
384
|
+
def __delitem__(self, key: NonEmptyPersiDictKey) -> None:
|
|
385
|
+
"""Delete a stored value for a key.
|
|
386
|
+
|
|
387
|
+
Args:
|
|
388
|
+
key (NonEmptyPersiDictKey): Key (string/sequence or SafeStrTuple).
|
|
389
|
+
|
|
390
|
+
Raises:
|
|
391
|
+
KeyError: If append_only is True or the key does not exist.
|
|
392
|
+
"""
|
|
393
|
+
key = NonEmptySafeStrTuple(key)
|
|
394
|
+
self._process_delitem_args(key)
|
|
395
|
+
parent_node, leaf = self._navigate_to_parent(key, create_if_missing=False)
|
|
396
|
+
if parent_node is None:
|
|
397
|
+
raise KeyError(f"Key {key} not found")
|
|
398
|
+
bucket = parent_node.values.get(self.serialization_format, {})
|
|
399
|
+
if leaf not in bucket:
|
|
400
|
+
raise KeyError(f"Key {key} not found")
|
|
401
|
+
del bucket[leaf]
|
|
402
|
+
# Throttled pruning: run only once per prune_interval destructive ops
|
|
403
|
+
self._maybe_prune()
|
|
404
|
+
|
|
405
|
+
def _generic_iter(self, result_type: set[str]):
|
|
406
|
+
"""Underlying implementation for keys/values/items/timestamps iterators.
|
|
407
|
+
|
|
408
|
+
Traverses the in-memory tree and yields entries based on the requested
|
|
409
|
+
result_type. The shapes of yielded items mirror FileDirDict:
|
|
410
|
+
- {"keys"} -> SafeStrTuple
|
|
411
|
+
- {"values"} -> Any
|
|
412
|
+
- {"keys", "values"} -> tuple[SafeStrTuple, Any]
|
|
413
|
+
- {"keys", "values", "timestamps"} or {"keys", "timestamps"}
|
|
414
|
+
-> tuples that end with a float POSIX timestamp.
|
|
415
|
+
|
|
416
|
+
Args:
|
|
417
|
+
result_type (set[str]): Any non-empty subset of {"keys", "values",
|
|
418
|
+
"timestamps"} specifying which fields to yield.
|
|
419
|
+
|
|
420
|
+
Returns:
|
|
421
|
+
Iterator: A generator over requested items.
|
|
422
|
+
|
|
423
|
+
Raises:
|
|
424
|
+
TypeError: If result_type is not a set.
|
|
425
|
+
ValueError: If result_type is empty or contains unsupported labels.
|
|
426
|
+
"""
|
|
427
|
+
self._process_generic_iter_args(result_type)
|
|
428
|
+
|
|
429
|
+
def walk(prefix: tuple[str, ...], node: _RAMBackend):
|
|
430
|
+
# yield values at this level
|
|
431
|
+
bucket = node.values.get(self.serialization_format, {})
|
|
432
|
+
for leaf, (val, ts) in bucket.items():
|
|
433
|
+
full_key = SafeStrTuple((*prefix, leaf))
|
|
434
|
+
to_return: list[Any] = []
|
|
435
|
+
if "keys" in result_type:
|
|
436
|
+
to_return.append(full_key)
|
|
437
|
+
if "values" in result_type:
|
|
438
|
+
to_return.append(val)
|
|
439
|
+
if len(result_type) == 1:
|
|
440
|
+
yield to_return[0]
|
|
441
|
+
else:
|
|
442
|
+
if "timestamps" in result_type:
|
|
443
|
+
to_return.append(ts)
|
|
444
|
+
yield tuple(to_return)
|
|
445
|
+
# then recurse into children
|
|
446
|
+
for name, child in node.subdicts.items():
|
|
447
|
+
yield from walk((*prefix, name), child)
|
|
448
|
+
|
|
449
|
+
return walk((), self._backend)
|
|
450
|
+
|
|
451
|
+
def timestamp(self, key: NonEmptyPersiDictKey) -> float:
|
|
452
|
+
"""Return the last modification time of a key.
|
|
453
|
+
|
|
454
|
+
Args:
|
|
455
|
+
key (NonEmptyPersiDictKey): Key (string/sequence or SafeStrTuple).
|
|
456
|
+
|
|
457
|
+
Returns:
|
|
458
|
+
float: POSIX timestamp (seconds since Unix epoch) when the value was
|
|
459
|
+
last written.
|
|
460
|
+
|
|
461
|
+
Raises:
|
|
462
|
+
KeyError: If the key does not exist.
|
|
463
|
+
"""
|
|
464
|
+
key = NonEmptySafeStrTuple(key)
|
|
465
|
+
parent_node, leaf = self._navigate_to_parent(key, create_if_missing=False)
|
|
466
|
+
if parent_node is None:
|
|
467
|
+
raise KeyError(f"Key {key} not found")
|
|
468
|
+
bucket = parent_node.values.get(self.serialization_format, {})
|
|
469
|
+
if leaf not in bucket:
|
|
470
|
+
raise KeyError(f"Key {key} not found")
|
|
471
|
+
return bucket[leaf][1]
|
|
472
|
+
|
|
473
|
+
def get_subdict(self, prefix_key: Iterable[str] | SafeStrTuple) -> PersiDict:
|
|
474
|
+
"""Return a view rooted at the given key prefix.
|
|
475
|
+
|
|
476
|
+
The returned LocalDict shares the same underlying RAMBackend, but its
|
|
477
|
+
root is moved to the subtree identified by prefix_key. If intermediate
|
|
478
|
+
nodes do not exist, they are created (resulting in an empty subdict).
|
|
479
|
+
Modifications to a sub-dictionary will affect the parent dictionary
|
|
480
|
+
and any other sub-dictionaries that share the same backend.
|
|
481
|
+
|
|
482
|
+
Args:
|
|
483
|
+
prefix_key (Iterable[str] | SafeStrTuple): Key prefix identifying the
|
|
484
|
+
subtree to expose. May be empty to refer to the current root.
|
|
485
|
+
|
|
486
|
+
Returns:
|
|
487
|
+
PersiDict: A LocalDict instance whose operations are restricted to
|
|
488
|
+
the keys under the specified prefix.
|
|
489
|
+
"""
|
|
490
|
+
prefix = SafeStrTuple(prefix_key) if not isinstance(prefix_key, SafeStrTuple) else prefix_key
|
|
491
|
+
root_node = self._backend
|
|
492
|
+
for segment in prefix:
|
|
493
|
+
root_node = root_node.child(segment)
|
|
494
|
+
# Create a new LocalDict rooted at this backend
|
|
495
|
+
return LocalDict(backend=root_node,
|
|
496
|
+
serialization_format=self.serialization_format,
|
|
497
|
+
append_only=self.append_only,
|
|
498
|
+
base_class_for_values=self.base_class_for_values,
|
|
499
|
+
prune_interval=self._prune_interval)
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
# parameterizable.register_parameterizable_class(LocalDict)
|
|
@@ -1,3 +1,11 @@
|
|
|
1
|
+
"""Container for multiple PersiDict instances with different serialization formats.
|
|
2
|
+
|
|
3
|
+
This module provides OverlappingMultiDict, which creates and manages multiple
|
|
4
|
+
PersiDict sub-dictionaries that share common parameters but use different
|
|
5
|
+
serialization_format values. Each sub-dictionary is exposed as an attribute named after
|
|
6
|
+
its serialization_format, enabling organized storage of different data formats in the
|
|
7
|
+
same logical location.
|
|
8
|
+
"""
|
|
1
9
|
from __future__ import annotations
|
|
2
10
|
|
|
3
11
|
from typing import Any, Dict, List, Type
|
|
@@ -5,25 +13,25 @@ from typing import Any, Dict, List, Type
|
|
|
5
13
|
from .persi_dict import PersiDict
|
|
6
14
|
|
|
7
15
|
class OverlappingMultiDict:
|
|
8
|
-
"""Container for multiple PersiDict instances
|
|
16
|
+
"""Container for multiple PersiDict instances, differing only by serialization_format.
|
|
9
17
|
|
|
10
18
|
This class instantiates several sub-dictionaries (PersiDict subclasses) that
|
|
11
|
-
share common parameters but differ by their
|
|
12
|
-
exposed as an attribute whose name equals the
|
|
19
|
+
share common parameters but differ by their serialization_format. Each sub-dictionary is
|
|
20
|
+
exposed as an attribute whose name equals the serialization_format (e.g., obj.json, obj.csv).
|
|
13
21
|
All sub-dictionaries typically point to the same underlying base directory or
|
|
14
|
-
bucket and differ only in how items are materialized by
|
|
22
|
+
bucket and differ only in how items are materialized by serialization format.
|
|
15
23
|
|
|
16
24
|
Attributes:
|
|
17
25
|
dict_type (Type[PersiDict]): A subclass of PersiDict used to create each
|
|
18
26
|
sub-dictionary.
|
|
19
27
|
shared_subdicts_params (Dict[str, Any]): Parameters applied to every
|
|
20
|
-
created sub-dictionary (e.g., base_dir, bucket,
|
|
28
|
+
created sub-dictionary (e.g., base_dir, bucket, append_only,
|
|
21
29
|
digest_len).
|
|
22
30
|
individual_subdicts_params (Dict[str, Dict[str, Any]]): Mapping from
|
|
23
|
-
|
|
31
|
+
serialization_format (attribute name) to a dict of parameters that are specific
|
|
24
32
|
to that sub-dictionary. These override or extend shared_subdicts_params
|
|
25
|
-
for the given
|
|
26
|
-
subdicts_names (List[str]): The list of
|
|
33
|
+
for the given serialization_format.
|
|
34
|
+
subdicts_names (List[str]): The list of serialization_format names (i.e., attribute
|
|
27
35
|
names) created.
|
|
28
36
|
|
|
29
37
|
Raises:
|
|
@@ -38,15 +46,15 @@ class OverlappingMultiDict:
|
|
|
38
46
|
|
|
39
47
|
Args:
|
|
40
48
|
dict_type (Type[PersiDict]): A subclass of PersiDict that will be
|
|
41
|
-
instantiated for each
|
|
49
|
+
instantiated for each serialization_format provided via individual_subdicts_params.
|
|
42
50
|
shared_subdicts_params (Dict[str, Any]): Parameters shared by all
|
|
43
51
|
sub-dicts (e.g., base_dir, bucket).
|
|
44
52
|
**individual_subdicts_params (Dict[str, Dict[str, Any]]): Keyword
|
|
45
|
-
arguments where each key is a
|
|
53
|
+
arguments where each key is a serialization_format (also the attribute name
|
|
46
54
|
to be created) and each value is a dict of parameters specific to
|
|
47
55
|
that sub-dict. These are merged with shared_subdicts_params when
|
|
48
56
|
constructing the sub-dict. The resulting dict also receives
|
|
49
|
-
|
|
57
|
+
serialization_format=<key>.
|
|
50
58
|
|
|
51
59
|
Raises:
|
|
52
60
|
TypeError: If dict_type is not a PersiDict subclass, or if
|
|
@@ -68,7 +76,7 @@ class OverlappingMultiDict:
|
|
|
68
76
|
self.__dict__[subdict_name] = dict_type(
|
|
69
77
|
**{**shared_subdicts_params,
|
|
70
78
|
**individual_subdicts_params[subdict_name],
|
|
71
|
-
"
|
|
79
|
+
"serialization_format": subdict_name})
|
|
72
80
|
|
|
73
81
|
def __getstate__(self):
|
|
74
82
|
"""Prevent pickling.
|
|
@@ -99,7 +107,7 @@ class OverlappingMultiDict:
|
|
|
99
107
|
key: The key that would be accessed (ignored).
|
|
100
108
|
|
|
101
109
|
Raises:
|
|
102
|
-
TypeError: Always raised to indicate unsupported operation.
|
|
110
|
+
TypeError: Always raised to indicate an unsupported operation.
|
|
103
111
|
"""
|
|
104
112
|
raise TypeError(
|
|
105
113
|
"OverlappingMultiDict does not support item access by key. "
|
|
@@ -114,7 +122,7 @@ class OverlappingMultiDict:
|
|
|
114
122
|
value: The value that would be assigned (ignored).
|
|
115
123
|
|
|
116
124
|
Raises:
|
|
117
|
-
TypeError: Always raised to indicate unsupported operation.
|
|
125
|
+
TypeError: Always raised to indicate an unsupported operation.
|
|
118
126
|
"""
|
|
119
127
|
raise TypeError(
|
|
120
128
|
"OverlappingMultiDict does not support item assignment by key. "
|
|
@@ -128,7 +136,7 @@ class OverlappingMultiDict:
|
|
|
128
136
|
key: The key that would be deleted (ignored).
|
|
129
137
|
|
|
130
138
|
Raises:
|
|
131
|
-
TypeError: Always raised to indicate unsupported operation.
|
|
139
|
+
TypeError: Always raised to indicate an unsupported operation.
|
|
132
140
|
"""
|
|
133
141
|
raise TypeError(
|
|
134
142
|
"OverlappingMultiDict does not support item deletion by key. "
|