PyperCache 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- PyperCache/__init__.py +25 -0
- PyperCache/core/__init__.py +7 -0
- PyperCache/core/cache.py +126 -0
- PyperCache/core/cache_record.py +217 -0
- PyperCache/core/request_logger.py +107 -0
- PyperCache/models/apimodel.py +49 -0
- PyperCache/py.typed +1 -0
- PyperCache/query/__init__.py +10 -0
- PyperCache/query/json_injester.py +436 -0
- PyperCache/storage/__init__.py +28 -0
- PyperCache/storage/backends.py +106 -0
- PyperCache/storage/base.py +103 -0
- PyperCache/storage/chunked_dictionary.py +297 -0
- PyperCache/storage/factory.py +40 -0
- PyperCache/storage/sqlite_storage.py +485 -0
- PyperCache/utils/__init__.py +25 -0
- PyperCache/utils/collections.py +28 -0
- PyperCache/utils/fs.py +46 -0
- PyperCache/utils/patterns.py +97 -0
- PyperCache/utils/profiling.py +44 -0
- PyperCache/utils/sentinel.py +26 -0
- PyperCache/utils/serialization.py +175 -0
- PyperCache/utils/typing_cast.py +72 -0
- pypercache-0.1.0.dist-info/METADATA +92 -0
- pypercache-0.1.0.dist-info/RECORD +28 -0
- pypercache-0.1.0.dist-info/WHEEL +5 -0
- pypercache-0.1.0.dist-info/licenses/LICENSE +21 -0
- pypercache-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,436 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple, Type, TypeVar, Union
|
|
6
|
+
|
|
7
|
+
from lark import Lark, Transformer
|
|
8
|
+
|
|
9
|
+
from ..utils.sentinel import UNSET
|
|
10
|
+
from ..utils.typing_cast import instantiate_type
|
|
11
|
+
|
|
12
|
+
# # ---------------------------------------------------------------------------
|
|
13
|
+
# # Sentinel
|
|
14
|
+
# # ---------------------------------------------------------------------------
|
|
15
|
+
|
|
16
|
+
# class UNSET:
|
|
17
|
+
# """Sentinel type representing a missing or unresolved value.
|
|
18
|
+
|
|
19
|
+
# Used instead of None so that None can be a legitimate return value
|
|
20
|
+
# from a query.
|
|
21
|
+
# """
|
|
22
|
+
|
|
23
|
+
# _instance: Optional[UNSET] = None
|
|
24
|
+
|
|
25
|
+
# def __new__(cls) -> UNSET:
|
|
26
|
+
# # Singleton — there is only ever one UNSET instance.
|
|
27
|
+
# if cls._instance is None:
|
|
28
|
+
# cls._instance = super().__new__(cls)
|
|
29
|
+
# return cls._instance
|
|
30
|
+
|
|
31
|
+
# def __repr__(self) -> str:
|
|
32
|
+
# return "JsonInjest.UNSET"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# # Convenience singleton so callers can write `is UNSET` rather than `== UNSET`.
|
|
36
|
+
# UNSET = UNSET()
|
|
37
|
+
|
|
38
|
+
T = TypeVar("T")
|
|
39
|
+
|
|
40
|
+
# ---------------------------------------------------------------------------
|
|
41
|
+
# Query AST nodes
|
|
42
|
+
# ---------------------------------------------------------------------------
|
|
43
|
+
|
|
44
|
+
@dataclass(frozen=True)
|
|
45
|
+
class JIPath:
|
|
46
|
+
"""A dot-separated key path used to navigate a JSON structure.
|
|
47
|
+
|
|
48
|
+
Example: ``"a.b.c"`` → ``JIPath(keys=('a', 'b', 'c'))``
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
keys: Tuple[str, ...]
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dataclass(frozen=True)
|
|
55
|
+
class JIMatch:
|
|
56
|
+
"""A filter expression that keeps only items where a nested key equals a value.
|
|
57
|
+
|
|
58
|
+
Example: ``"?type=admin"`` → ``JIMatch(key_path=JIPath(('type',)), value='admin')``
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
key_path: JIPath
|
|
62
|
+
value: Any
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@dataclass(frozen=True)
|
|
66
|
+
class JIExistsFilter:
|
|
67
|
+
"""Filter: keeps only items where a key exists (no value check).
|
|
68
|
+
|
|
69
|
+
On a **dict** cursor: returns ``UNSET`` if absent, cursor unchanged if present.
|
|
70
|
+
On a **list** cursor: returns only elements that contain ``key_path``.
|
|
71
|
+
|
|
72
|
+
Example: ``"?name"`` -> ``JIExistsFilter(key_path=JIPath(('name',)))``
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
key_path: JIPath
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclass(frozen=True)
|
|
79
|
+
class JIPluck:
|
|
80
|
+
"""Pluck: extracts a key value from each element.
|
|
81
|
+
|
|
82
|
+
On a **dict** cursor: navigates to ``key_path``, returns ``UNSET`` if absent.
|
|
83
|
+
On a **list** cursor: extracts ``key_path`` from every element, collecting hits.
|
|
84
|
+
|
|
85
|
+
Example: ``"?name*"`` -> ``JIPluck(key_path=JIPath(('name',)))``
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
key_path: JIPath
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
# ---------------------------------------------------------------------------
|
|
92
|
+
# Grammar & transformer
|
|
93
|
+
# ---------------------------------------------------------------------------
|
|
94
|
+
|
|
95
|
+
#: Lark grammar for the selector mini-language.
|
|
96
|
+
#:
|
|
97
|
+
#: Syntax examples:
|
|
98
|
+
#: ``"users"`` – navigate to the ``users`` key
|
|
99
|
+
#: ``"users.0.name"`` – navigate nested keys / indices
|
|
100
|
+
#: ``"users?role=admin"`` – filter list items where role == "admin"
|
|
101
|
+
#: ``"?name"`` – safe-get ``name`` from the current cursor
|
|
102
|
+
#: ``"users?role"`` – pluck ``role`` from every item in ``users``
|
|
103
|
+
#: ``"users?role?label"`` – chain safe-gets: pluck ``role``, then ``label``
|
|
104
|
+
_GRAMMAR = r"""
|
|
105
|
+
start: path_expr? (selector tail?)*
|
|
106
|
+
tail: "." ITEM ("." ITEM)*
|
|
107
|
+
selector: exists_expr | pluck_expr | match_expr
|
|
108
|
+
exists_expr: "?" path_expr
|
|
109
|
+
pluck_expr: "?" path_expr "*"
|
|
110
|
+
match_expr: "?" path_expr "=" match_value
|
|
111
|
+
path_expr: ITEM ("." ITEM)*
|
|
112
|
+
|
|
113
|
+
match_value: NUM_LITERAL | ESCAPED_STRING | ITEM
|
|
114
|
+
NUM_LITERAL: "#" /-?[0-9]+(\.[0-9]+)?/
|
|
115
|
+
|
|
116
|
+
ITEM: CNAME | ESCAPED_STRING | INT
|
|
117
|
+
INT: /-?[0-9]+/
|
|
118
|
+
|
|
119
|
+
_STRING_INNER: /[a-zA-Z0-9-]+/
|
|
120
|
+
STRING: _STRING_INNER /(?<!\\)(\\\\)*?/
|
|
121
|
+
|
|
122
|
+
%ignore WS
|
|
123
|
+
|
|
124
|
+
%import common.CNAME
|
|
125
|
+
%import common.ESCAPED_STRING
|
|
126
|
+
%import common.WS
|
|
127
|
+
"""
|
|
128
|
+
|
|
129
|
+
QueryParser = Lark(_GRAMMAR, parser="lalr")
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _dequote(s: str) -> str:
|
|
133
|
+
"""Strip matching single or double quotes from a string token, if present."""
|
|
134
|
+
if len(s) >= 2 and s[0] == s[-1] and s[0] in ('"', "'"):
|
|
135
|
+
return s[1:-1]
|
|
136
|
+
return s
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class JIQuery(Transformer):
|
|
140
|
+
"""Lark transformer that converts a parse tree into a list of AST nodes."""
|
|
141
|
+
|
|
142
|
+
def start(self, children: List[Any]) -> List[Any]:
|
|
143
|
+
return children
|
|
144
|
+
|
|
145
|
+
def tail(self, children: List[str]) -> JIPath:
|
|
146
|
+
return JIPath(tuple(children))
|
|
147
|
+
|
|
148
|
+
def path_expr(self, children: List[str]) -> JIPath:
|
|
149
|
+
return JIPath(tuple(children))
|
|
150
|
+
|
|
151
|
+
def match_value(self, children: List[Any]) -> Any: # noqa: N802
|
|
152
|
+
value = children[0]
|
|
153
|
+
# ESCAPED_STRING tokens arrive still quoted (e.g. '"foo"'); dequote them
|
|
154
|
+
# so the returned value is a plain str, not the raw token with quotes.
|
|
155
|
+
if isinstance(value, str) and len(value) >= 2 and value[0] == value[-1] and value[0] in ('"', "'"):
|
|
156
|
+
return _dequote(value)
|
|
157
|
+
return value
|
|
158
|
+
|
|
159
|
+
def NUM_LITERAL(self, token: Any) -> Union[int, float]: # noqa: N802
|
|
160
|
+
raw = str(token)[1:] # strip leading '#'
|
|
161
|
+
return float(raw) if "." in raw else int(raw)
|
|
162
|
+
|
|
163
|
+
def exists_expr(self, children: List[Any]) -> JIExistsFilter:
|
|
164
|
+
return JIExistsFilter(key_path=children[0])
|
|
165
|
+
|
|
166
|
+
def pluck_expr(self, children: List[Any]) -> JIPluck:
|
|
167
|
+
return JIPluck(key_path=children[0])
|
|
168
|
+
|
|
169
|
+
def match_expr(self, children: List[Any]) -> JIMatch:
|
|
170
|
+
key_path, value = children
|
|
171
|
+
return JIMatch(key_path, value)
|
|
172
|
+
|
|
173
|
+
def selector(self, children: List[Any]) -> Any:
|
|
174
|
+
return children[0]
|
|
175
|
+
|
|
176
|
+
def ESCAPED_STRING(self, token: Any) -> str: # noqa: N802 – must match Lark terminal name
|
|
177
|
+
return _dequote(str(token))
|
|
178
|
+
|
|
179
|
+
def ITEM(self, token: Any) -> str: # noqa: N802
|
|
180
|
+
return _dequote(str(token))
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
# ---------------------------------------------------------------------------
|
|
184
|
+
# Main class
|
|
185
|
+
# ---------------------------------------------------------------------------
|
|
186
|
+
|
|
187
|
+
class JsonInjester:
|
|
188
|
+
"""Lightweight query interface for navigating and filtering JSON data.
|
|
189
|
+
|
|
190
|
+
Parameters
|
|
191
|
+
----------
|
|
192
|
+
json_data:
|
|
193
|
+
Either a raw JSON string or an already-parsed dictionary.
|
|
194
|
+
root:
|
|
195
|
+
Optional dot-separated path to use as the starting cursor.
|
|
196
|
+
For example, ``root="data.users"`` is equivalent to immediately
|
|
197
|
+
calling ``.get("data.users")`` and using that as the new root.
|
|
198
|
+
default_tail:
|
|
199
|
+
When a ``get()`` call resolves to a ``dict``, automatically
|
|
200
|
+
follow this additional selector before returning. Useful when
|
|
201
|
+
every value in a collection has the same wrapper key.
|
|
202
|
+
"""
|
|
203
|
+
|
|
204
|
+
def __init__(
|
|
205
|
+
self,
|
|
206
|
+
json_data: Union[str, Dict[str, Any]],
|
|
207
|
+
root: Optional[str] = None,
|
|
208
|
+
default_tail: Optional[str] = None,
|
|
209
|
+
) -> None:
|
|
210
|
+
self.default_tail = default_tail
|
|
211
|
+
|
|
212
|
+
if isinstance(json_data, str):
|
|
213
|
+
self.data: Any = json.loads(json_data)
|
|
214
|
+
elif isinstance(json_data, (dict, list)):
|
|
215
|
+
self.data = json_data
|
|
216
|
+
else:
|
|
217
|
+
raise ValueError(
|
|
218
|
+
f"json_data must be a str, dict, or list, got {type(json_data).__name__!r}"
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
if root is not None:
|
|
222
|
+
self.data = self._move_cursor(self.data, JIPath(tuple(root.split("."))))
|
|
223
|
+
|
|
224
|
+
# ------------------------------------------------------------------
|
|
225
|
+
# Public API
|
|
226
|
+
# ------------------------------------------------------------------
|
|
227
|
+
|
|
228
|
+
def has(self, selector: str) -> bool:
|
|
229
|
+
"""Return ``True`` if the selector resolves to an existing value."""
|
|
230
|
+
return self.get(selector) is not UNSET
|
|
231
|
+
|
|
232
|
+
def get(
|
|
233
|
+
self,
|
|
234
|
+
selector: str,
|
|
235
|
+
default_value: Any = UNSET,
|
|
236
|
+
select_first: bool = False,
|
|
237
|
+
cast: Optional[Type[T]] = None,
|
|
238
|
+
) -> Any:
|
|
239
|
+
"""Evaluate *selector* against the current data and return the result.
|
|
240
|
+
|
|
241
|
+
Parameters
|
|
242
|
+
----------
|
|
243
|
+
selector:
|
|
244
|
+
A dot-separated path, optionally followed by a ``?key=value``
|
|
245
|
+
filter expression. Examples::
|
|
246
|
+
|
|
247
|
+
"name"
|
|
248
|
+
"address.city"
|
|
249
|
+
"users?role=admin"
|
|
250
|
+
|
|
251
|
+
default_value:
|
|
252
|
+
Returned when the path does not exist or resolves to ``None``.
|
|
253
|
+
Defaults to ``UNSET`` (the sentinel), which means "no default".
|
|
254
|
+
select_first:
|
|
255
|
+
Determines if to return a List of matches or first available item.
|
|
256
|
+
When none are found, returns ``UNSET``.
|
|
257
|
+
cast:
|
|
258
|
+
When provided and the resolved value is a ``dict``, the dict is
|
|
259
|
+
passed as keyword arguments to this type/constructor.
|
|
260
|
+
|
|
261
|
+
Returns
|
|
262
|
+
-------
|
|
263
|
+
Any
|
|
264
|
+
The resolved value, ``default_value`` if the path is missing,
|
|
265
|
+
or ``UNSET`` if no default was supplied and the path is missing.
|
|
266
|
+
"""
|
|
267
|
+
tree = QueryParser.parse(selector)
|
|
268
|
+
actions: List[Union[JIPath, JIMatch]] = JIQuery().transform(tree)
|
|
269
|
+
|
|
270
|
+
cursor: Any = self.data
|
|
271
|
+
result: Any = cursor
|
|
272
|
+
|
|
273
|
+
for action in actions:
|
|
274
|
+
if isinstance(action, JIPath):
|
|
275
|
+
if isinstance(cursor, list):
|
|
276
|
+
# A bare path against a list is ambiguous — require ?key syntax.
|
|
277
|
+
if cursor is self.data:
|
|
278
|
+
raise TypeError(
|
|
279
|
+
f"Cannot use a bare path selector {'.'.join(action.keys)!r} "
|
|
280
|
+
"directly on a list root. "
|
|
281
|
+
"Use '?key' to pluck a field from each element, "
|
|
282
|
+
"or '?key=value' to filter."
|
|
283
|
+
)
|
|
284
|
+
# Cursor is already a list produced by a prior filter/pluck.
|
|
285
|
+
result = self._pluck_from_list(cursor, action)
|
|
286
|
+
cursor = result
|
|
287
|
+
else:
|
|
288
|
+
cursor = self._move_cursor(cursor, action)
|
|
289
|
+
if cursor is UNSET:
|
|
290
|
+
return default_value
|
|
291
|
+
result = cursor
|
|
292
|
+
|
|
293
|
+
elif isinstance(action, JIExistsFilter):
|
|
294
|
+
result = self._apply_exists_filter(cursor, action)
|
|
295
|
+
if result is UNSET:
|
|
296
|
+
return default_value
|
|
297
|
+
cursor = result
|
|
298
|
+
|
|
299
|
+
elif isinstance(action, JIPluck):
|
|
300
|
+
result = self._apply_pluck(cursor, action)
|
|
301
|
+
if result is UNSET:
|
|
302
|
+
return default_value
|
|
303
|
+
cursor = result
|
|
304
|
+
|
|
305
|
+
elif isinstance(action, JIMatch):
|
|
306
|
+
result = self._apply_filter(cursor, action)
|
|
307
|
+
cursor = result
|
|
308
|
+
|
|
309
|
+
# Optionally follow a default tail selector when the result is a dict.
|
|
310
|
+
if isinstance(result, dict) and self.default_tail:
|
|
311
|
+
result = JsonInjester(result).get(self.default_tail)
|
|
312
|
+
|
|
313
|
+
# Optionally cast a dict result to the requested type using the
|
|
314
|
+
# shared instantiation helper so generics and models are handled.
|
|
315
|
+
if cast is not None and isinstance(result, dict):
|
|
316
|
+
# Preserve previous behaviour for arbitrary callables (e.g. lambdas)
|
|
317
|
+
if callable(cast) and not isinstance(cast, type):
|
|
318
|
+
result = cast(result)
|
|
319
|
+
else:
|
|
320
|
+
result = instantiate_type(cast, result)
|
|
321
|
+
|
|
322
|
+
# Fall back to default_value when the resolved result is None.
|
|
323
|
+
if result is None and default_value is not UNSET:
|
|
324
|
+
return default_value
|
|
325
|
+
|
|
326
|
+
if select_first is True and isinstance(result, list):
|
|
327
|
+
return [*result, UNSET][0]
|
|
328
|
+
|
|
329
|
+
return result
|
|
330
|
+
|
|
331
|
+
# ------------------------------------------------------------------
|
|
332
|
+
# Internal helpers
|
|
333
|
+
# ------------------------------------------------------------------
|
|
334
|
+
|
|
335
|
+
def _move_cursor(self, cursor: Any, path: JIPath) -> Any:
|
|
336
|
+
"""Walk *cursor* along each key in *path*.
|
|
337
|
+
|
|
338
|
+
Returns ``UNSET`` if any key is absent or if an intermediate node
|
|
339
|
+
is not a dict.
|
|
340
|
+
"""
|
|
341
|
+
for key in path.keys:
|
|
342
|
+
if not isinstance(cursor, dict):
|
|
343
|
+
raise AttributeError(
|
|
344
|
+
f"Expected a dict while navigating key {key!r}, "
|
|
345
|
+
f"got {type(cursor).__name__!r}"
|
|
346
|
+
)
|
|
347
|
+
if key not in cursor:
|
|
348
|
+
return UNSET
|
|
349
|
+
cursor = cursor[key]
|
|
350
|
+
return cursor
|
|
351
|
+
|
|
352
|
+
def _pluck_from_list(self, lst: List[Any], path: JIPath) -> List[Any]:
|
|
353
|
+
"""Navigate *path* inside every dict element of *lst*, collecting hits."""
|
|
354
|
+
results: List[Any] = []
|
|
355
|
+
for item in lst:
|
|
356
|
+
if isinstance(item, dict):
|
|
357
|
+
value = self._move_cursor(item, path)
|
|
358
|
+
if value is not UNSET:
|
|
359
|
+
results.append(value)
|
|
360
|
+
elif isinstance(item, tuple) and len(item) == 2:
|
|
361
|
+
# (key, value) pairs produced by _apply_filter on a dict-of-dicts
|
|
362
|
+
_, item_dict = item
|
|
363
|
+
if isinstance(item_dict, dict):
|
|
364
|
+
value = self._move_cursor(item_dict, path)
|
|
365
|
+
if value is not UNSET:
|
|
366
|
+
results.append(value)
|
|
367
|
+
return results
|
|
368
|
+
|
|
369
|
+
def _apply_exists_filter(self, cursor: Any, cond: JIExistsFilter) -> Any:
|
|
370
|
+
"""Return items/cursor where ``cond.key_path`` exists (no value check).
|
|
371
|
+
|
|
372
|
+
* **dict** cursor – returns ``UNSET`` if key absent, else the cursor itself.
|
|
373
|
+
* **list** cursor – returns only elements that contain ``key_path``.
|
|
374
|
+
* Scalar – treat as missing, return ``UNSET``.
|
|
375
|
+
"""
|
|
376
|
+
if isinstance(cursor, dict):
|
|
377
|
+
value = self._move_cursor(cursor, cond.key_path)
|
|
378
|
+
return UNSET if value is UNSET else cursor
|
|
379
|
+
|
|
380
|
+
if isinstance(cursor, list):
|
|
381
|
+
results = []
|
|
382
|
+
for item in cursor:
|
|
383
|
+
if isinstance(item, dict):
|
|
384
|
+
if self._move_cursor(item, cond.key_path) is not UNSET:
|
|
385
|
+
results.append(item)
|
|
386
|
+
return results
|
|
387
|
+
|
|
388
|
+
return UNSET
|
|
389
|
+
|
|
390
|
+
def _apply_pluck(self, cursor: Any, pluck: JIPluck) -> Any:
|
|
391
|
+
"""Extract ``pluck.key_path`` value from each element.
|
|
392
|
+
|
|
393
|
+
* **dict** cursor – navigates and returns ``UNSET`` if key absent.
|
|
394
|
+
* **list** cursor – plucks from every element, collecting non-``UNSET`` hits.
|
|
395
|
+
* Scalar – treat as missing, return ``UNSET``.
|
|
396
|
+
"""
|
|
397
|
+
if isinstance(cursor, dict):
|
|
398
|
+
return self._move_cursor(cursor, pluck.key_path)
|
|
399
|
+
|
|
400
|
+
if isinstance(cursor, list):
|
|
401
|
+
return self._pluck_from_list(cursor, pluck.key_path)
|
|
402
|
+
|
|
403
|
+
return UNSET
|
|
404
|
+
|
|
405
|
+
def _apply_filter(
|
|
406
|
+
self,
|
|
407
|
+
cursor: Any,
|
|
408
|
+
match: JIMatch,
|
|
409
|
+
) -> List[Any]:
|
|
410
|
+
"""Return the subset of *cursor* items that satisfy *match*.
|
|
411
|
+
|
|
412
|
+
Handles two container shapes:
|
|
413
|
+
|
|
414
|
+
* **list of dicts** – each element is checked directly.
|
|
415
|
+
* **dict of dicts** – each ``(key, value)`` pair is checked;
|
|
416
|
+
matching pairs are returned as ``(key, value)`` tuples.
|
|
417
|
+
"""
|
|
418
|
+
results: List[Any] = []
|
|
419
|
+
|
|
420
|
+
for item in cursor:
|
|
421
|
+
try:
|
|
422
|
+
if isinstance(item, dict):
|
|
423
|
+
# cursor is a list; item is one element.
|
|
424
|
+
test_value = self._move_cursor(item, match.key_path)
|
|
425
|
+
if test_value == match.value:
|
|
426
|
+
results.append(item)
|
|
427
|
+
elif isinstance(item, str):
|
|
428
|
+
# cursor is a dict; item is a key string.
|
|
429
|
+
test_value = self._move_cursor(cursor[item], match.key_path)
|
|
430
|
+
if test_value == match.value:
|
|
431
|
+
results.append((item, cursor[item]))
|
|
432
|
+
except KeyError:
|
|
433
|
+
# Key absent in this item — skip silently.
|
|
434
|
+
pass
|
|
435
|
+
|
|
436
|
+
return results
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Storage submodule: provides cache persistence backends, chunked storage,
|
|
3
|
+
and factory utilities.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
# Base interface
|
|
7
|
+
from .base import StorageMechanism
|
|
8
|
+
|
|
9
|
+
# Concrete backends
|
|
10
|
+
from .backends import JSONStorage, PickleStorage, ChunkedStorage
|
|
11
|
+
|
|
12
|
+
# Chunked storage core
|
|
13
|
+
from .chunked_dictionary import ChunkedDictionary
|
|
14
|
+
|
|
15
|
+
from .sqlite_storage import SQLiteStorage
|
|
16
|
+
|
|
17
|
+
# Factory
|
|
18
|
+
from .factory import get_storage_mechanism
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"StorageMechanism",
|
|
22
|
+
"JSONStorage",
|
|
23
|
+
"PickleStorage",
|
|
24
|
+
"ChunkedStorage",
|
|
25
|
+
"ChunkedDictionary",
|
|
26
|
+
"SQLiteStorage",
|
|
27
|
+
"get_storage_mechanism",
|
|
28
|
+
]
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""Concrete storage backends: JSON, Pickle, and ChunkedDictionary."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import jsonpickle
|
|
5
|
+
from collections.abc import MutableMapping
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Dict
|
|
8
|
+
|
|
9
|
+
from PyperCache.storage.base import StorageMechanism
|
|
10
|
+
from PyperCache.storage.chunked_dictionary import ChunkedDictionary
|
|
11
|
+
from PyperCache.utils.serialization import PickleStore
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class JSONStorage(StorageMechanism):
|
|
15
|
+
"""Storage backend that serialises cache records as a single JSON file.
|
|
16
|
+
|
|
17
|
+
Uses standard JSON for simple data, falls back to jsonpickle for complex
|
|
18
|
+
Python objects to ensure serialization safety.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def _impl__touch_store(self, filepath: Path) -> bool:
|
|
22
|
+
filepath.touch(exist_ok=True)
|
|
23
|
+
return True
|
|
24
|
+
|
|
25
|
+
def _impl__load(self, filepath: Path) -> Dict[str, dict]:
|
|
26
|
+
with open(filepath, "r") as f:
|
|
27
|
+
content = f.read().strip()
|
|
28
|
+
return jsonpickle.decode(content) if content else {}
|
|
29
|
+
|
|
30
|
+
def _impl__save(self, cache_records_dict: Dict[str, dict], filepath: Path):
|
|
31
|
+
try:
|
|
32
|
+
json_str = json.dumps(cache_records_dict)
|
|
33
|
+
except (TypeError, ValueError):
|
|
34
|
+
json_str = jsonpickle.encode(cache_records_dict)
|
|
35
|
+
with open(filepath, "w") as fp:
|
|
36
|
+
fp.write(json_str)
|
|
37
|
+
|
|
38
|
+
def _impl__update_record(self, key: str, data: dict):
|
|
39
|
+
record = self.get_record(key)
|
|
40
|
+
record.update(data)
|
|
41
|
+
self.save(self.records)
|
|
42
|
+
|
|
43
|
+
def _impl__erase_everything(self):
|
|
44
|
+
self.records = {}
|
|
45
|
+
self.save(self.records)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class PickleStorage(StorageMechanism):
|
|
49
|
+
"""Storage backend that serialises cache records using Python's pickle format."""
|
|
50
|
+
|
|
51
|
+
def _impl__touch_store(self, filepath: Path) -> bool:
|
|
52
|
+
PickleStore.touch_file({}, filepath)
|
|
53
|
+
return True
|
|
54
|
+
|
|
55
|
+
def _impl__load(self, filepath: Path) -> Dict[str, dict]:
|
|
56
|
+
return PickleStore.load_object(filepath)
|
|
57
|
+
|
|
58
|
+
def _impl__save(self, cache_records_dict: Dict[str, dict], filepath: Path):
|
|
59
|
+
PickleStore.save_object(cache_records_dict, filepath)
|
|
60
|
+
|
|
61
|
+
def _impl__update_record(self, key: str, data: dict):
|
|
62
|
+
record = self.get_record(key)
|
|
63
|
+
record.update(data)
|
|
64
|
+
self.save(self.records)
|
|
65
|
+
|
|
66
|
+
def _impl__erase_everything(self):
|
|
67
|
+
self.records = {}
|
|
68
|
+
self.save(self.records)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class ChunkedStorage(StorageMechanism):
|
|
72
|
+
"""Storage backend backed by a :class:`ChunkedDictionary`.
|
|
73
|
+
|
|
74
|
+
Records are written atomically per-key rather than flushing the entire
|
|
75
|
+
dataset at once, making this backend suitable for large caches.
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
def _impl__touch_store(self, filepath: Path) -> bool:
|
|
79
|
+
datastore_path = filepath.parent
|
|
80
|
+
if not ChunkedDictionary.directory_contains_chunked_dictionary(datastore_path):
|
|
81
|
+
self.chunked_dict = ChunkedDictionary.from_dict(
|
|
82
|
+
{}, datastore_path, 15 * 1024 * 1024
|
|
83
|
+
)
|
|
84
|
+
return ChunkedDictionary.directory_contains_chunked_dictionary(datastore_path)
|
|
85
|
+
|
|
86
|
+
def _impl__load(self, filepath: Path) -> MutableMapping[str, dict]:
|
|
87
|
+
"""Load from disk and return the live ChunkedDictionary as self.records.
|
|
88
|
+
|
|
89
|
+
Unlike the JSON and Pickle backends, this returns the ChunkedDictionary
|
|
90
|
+
itself rather than a plain dict snapshot. Writes made via
|
|
91
|
+
self.records[key] = ... are therefore persisted atomically per-key
|
|
92
|
+
without a full dataset flush.
|
|
93
|
+
"""
|
|
94
|
+
self.chunked_dict = ChunkedDictionary.from_disk(filepath)
|
|
95
|
+
return self.chunked_dict
|
|
96
|
+
|
|
97
|
+
def _impl__save(self, cache_records_dict: Dict[str, dict], filepath: Path):
|
|
98
|
+
self.chunked_dict.manifest.save()
|
|
99
|
+
|
|
100
|
+
def _impl__update_record(self, key: str, data: dict):
|
|
101
|
+
record = self.get_record(key)
|
|
102
|
+
record.update(data)
|
|
103
|
+
self.chunked_dict[key] = record.as_dict()
|
|
104
|
+
|
|
105
|
+
def _impl__erase_everything(self):
|
|
106
|
+
self.chunked_dict.erase_everything()
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"""Abstract base class for cache storage backends."""
|
|
2
|
+
|
|
3
|
+
import threading
|
|
4
|
+
from abc import ABC, abstractmethod
|
|
5
|
+
from collections.abc import MutableMapping
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Dict, Type
|
|
8
|
+
|
|
9
|
+
from PyperCache.core.cache_record import CacheRecord
|
|
10
|
+
from PyperCache.utils.fs import ensure_dirs_exist
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class StorageMechanism(ABC):
|
|
14
|
+
"""Abstract base class defining the interface for cache storage backends.
|
|
15
|
+
|
|
16
|
+
Subclasses implement the ``_impl__*`` methods to support different
|
|
17
|
+
serialisation formats (JSON, Pickle, ChunkedDictionary). All public
|
|
18
|
+
methods acquire a threading lock so instances are safe to share across
|
|
19
|
+
threads.
|
|
20
|
+
|
|
21
|
+
``self.records`` is typed as ``MutableMapping[str, dict]`` rather than
|
|
22
|
+
``dict`` so that :class:`ChunkedStorage` can back it with a
|
|
23
|
+
:class:`ChunkedDictionary` — a disk-backed mapping that satisfies the
|
|
24
|
+
same protocol without loading every record into memory at once.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(self, filepath: str):
|
|
28
|
+
self.lock = threading.Lock()
|
|
29
|
+
self.__filepath = filepath
|
|
30
|
+
self.records: MutableMapping[str, dict] = self.load()
|
|
31
|
+
|
|
32
|
+
@property
|
|
33
|
+
def filepath(self) -> Path:
|
|
34
|
+
return Path(self.__filepath)
|
|
35
|
+
|
|
36
|
+
# ------------------------------------------------------------------
|
|
37
|
+
# Public API
|
|
38
|
+
# ------------------------------------------------------------------
|
|
39
|
+
|
|
40
|
+
def load(self) -> MutableMapping[str, dict]:
|
|
41
|
+
"""Ensure the backing store exists, then load and return all records."""
|
|
42
|
+
with self.lock:
|
|
43
|
+
ensure_dirs_exist(self.filepath)
|
|
44
|
+
self.touch_store()
|
|
45
|
+
return self._impl__load(self.filepath)
|
|
46
|
+
|
|
47
|
+
def save(self, data: dict):
|
|
48
|
+
"""Persist *data* to the backing store, creating it first if needed."""
|
|
49
|
+
with self.lock:
|
|
50
|
+
ensure_dirs_exist(self.filepath)
|
|
51
|
+
self.touch_store()
|
|
52
|
+
return self._impl__save(data, self.filepath)
|
|
53
|
+
|
|
54
|
+
def get_record(self, key: str) -> CacheRecord:
|
|
55
|
+
"""Return the :class:`CacheRecord` associated with *key*."""
|
|
56
|
+
return CacheRecord(self.records[key])
|
|
57
|
+
|
|
58
|
+
def update_record(self, key: str, data: dict):
|
|
59
|
+
"""Merge *data* into the existing record at *key*."""
|
|
60
|
+
self._impl__update_record(key, data)
|
|
61
|
+
|
|
62
|
+
def store_record(self, key: str, cache_record_dict: dict):
|
|
63
|
+
"""Insert or overwrite the record at *key* and persist immediately."""
|
|
64
|
+
key = str(key)
|
|
65
|
+
self.records[key] = cache_record_dict
|
|
66
|
+
self.save(self.records)
|
|
67
|
+
|
|
68
|
+
def erase_everything(self):
|
|
69
|
+
"""Delete every record from the backing store."""
|
|
70
|
+
self._impl__erase_everything()
|
|
71
|
+
|
|
72
|
+
def touch_store(self):
|
|
73
|
+
"""Create the backing store at :attr:`filepath` if it does not exist."""
|
|
74
|
+
if not self._impl__touch_store(self.filepath):
|
|
75
|
+
raise Exception("New datastore could not be created.")
|
|
76
|
+
|
|
77
|
+
# ------------------------------------------------------------------
|
|
78
|
+
# Abstract implementation hooks
|
|
79
|
+
# ------------------------------------------------------------------
|
|
80
|
+
|
|
81
|
+
@abstractmethod
|
|
82
|
+
def _impl__touch_store(self, filepath: Path) -> bool:
|
|
83
|
+
"""Create an empty store at *filepath* if one does not already exist."""
|
|
84
|
+
|
|
85
|
+
@abstractmethod
|
|
86
|
+
def _impl__load(self, filepath: Path) -> MutableMapping[str, dict]:
|
|
87
|
+
"""Deserialise and return all records from *filepath*.
|
|
88
|
+
|
|
89
|
+
May return a plain ``dict`` or any ``MutableMapping`` implementation
|
|
90
|
+
(e.g. :class:`ChunkedDictionary`) that satisfies the same protocol.
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
@abstractmethod
|
|
94
|
+
def _impl__save(self, cache_records_dict: Dict[str, dict], filepath: Path):
|
|
95
|
+
"""Serialise *cache_records_dict* and write it to *filepath*."""
|
|
96
|
+
|
|
97
|
+
@abstractmethod
|
|
98
|
+
def _impl__update_record(self, key: str, data: dict):
|
|
99
|
+
"""Merge *data* into the record at *key* and persist the change."""
|
|
100
|
+
|
|
101
|
+
@abstractmethod
|
|
102
|
+
def _impl__erase_everything(self):
|
|
103
|
+
"""Remove every record from the backing store."""
|