PyperCache 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- PyperCache/__init__.py +25 -0
- PyperCache/core/__init__.py +7 -0
- PyperCache/core/cache.py +126 -0
- PyperCache/core/cache_record.py +217 -0
- PyperCache/core/request_logger.py +107 -0
- PyperCache/models/apimodel.py +49 -0
- PyperCache/py.typed +1 -0
- PyperCache/query/__init__.py +10 -0
- PyperCache/query/json_injester.py +436 -0
- PyperCache/storage/__init__.py +28 -0
- PyperCache/storage/backends.py +106 -0
- PyperCache/storage/base.py +103 -0
- PyperCache/storage/chunked_dictionary.py +297 -0
- PyperCache/storage/factory.py +40 -0
- PyperCache/storage/sqlite_storage.py +485 -0
- PyperCache/utils/__init__.py +25 -0
- PyperCache/utils/collections.py +28 -0
- PyperCache/utils/fs.py +46 -0
- PyperCache/utils/patterns.py +97 -0
- PyperCache/utils/profiling.py +44 -0
- PyperCache/utils/sentinel.py +26 -0
- PyperCache/utils/serialization.py +175 -0
- PyperCache/utils/typing_cast.py +72 -0
- pypercache-0.1.0.dist-info/METADATA +92 -0
- pypercache-0.1.0.dist-info/RECORD +28 -0
- pypercache-0.1.0.dist-info/WHEEL +5 -0
- pypercache-0.1.0.dist-info/licenses/LICENSE +21 -0
- pypercache-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,485 @@
|
|
|
1
|
+
"""High-performance SQLite cache storage backend.
|
|
2
|
+
|
|
3
|
+
IO Strategy — Write-Behind Buffer
|
|
4
|
+
──────────────────────────────────
|
|
5
|
+
The dominant cost in the naive SQLite backend is per-write fsync: every
|
|
6
|
+
``commit()`` forces the OS to flush pages to disk. For large JSON/HTML
|
|
7
|
+
records this is the single biggest latency killer.
|
|
8
|
+
|
|
9
|
+
This backend eliminates that by:
|
|
10
|
+
|
|
11
|
+
1. **Hot read cache** — all records are loaded into memory on open.
|
|
12
|
+
``get_record`` never touches disk (O(1) dict lookup).
|
|
13
|
+
|
|
14
|
+
2. **Write-behind dirty buffer** — ``store_record`` / ``update_record``
|
|
15
|
+
write into the in-memory dict *and* mark the key dirty. No disk IO at
|
|
16
|
+
all until a flush is triggered.
|
|
17
|
+
|
|
18
|
+
3. **Batch flush** — dirty keys are persisted in a *single* transaction
|
|
19
|
+
(one fsync regardless of how many records changed). Flush is triggered
|
|
20
|
+
by any of:
|
|
21
|
+
a. ``DIRTY_FLUSH_THRESHOLD`` dirty keys accumulated (default 50)
|
|
22
|
+
b. ``FLUSH_INTERVAL_SECONDS`` wall-clock seconds elapsed (default 5 s)
|
|
23
|
+
— a background daemon thread handles this automatically.
|
|
24
|
+
c. Explicit ``flush()`` call.
|
|
25
|
+
d. ``close()`` / context-manager ``__exit__``.
|
|
26
|
+
|
|
27
|
+
4. **Single-commit bulk upsert** — the flush uses ``executemany`` inside
|
|
28
|
+
one ``BEGIN … COMMIT`` block, so N dirty records → 1 fsync.
|
|
29
|
+
|
|
30
|
+
5. **WAL mode** — concurrent readers are never blocked by the writer.
|
|
31
|
+
|
|
32
|
+
Schema
|
|
33
|
+
──────
|
|
34
|
+
Each cache record is stored with its fields in dedicated columns rather
|
|
35
|
+
than a single serialised JSON blob:
|
|
36
|
+
|
|
37
|
+
key TEXT PRIMARY KEY
|
|
38
|
+
cast TEXT — type/cast metadata
|
|
39
|
+
expiry REAL — expiry timestamp (Unix epoch, nullable)
|
|
40
|
+
timestamp REAL — record creation/update time (Unix epoch)
|
|
41
|
+
data BLOB — raw payload bytes (JSON, msgpack-prefixed, or raw BLOB)
|
|
42
|
+
|
|
43
|
+
Trade-off: a process crash between flushes can lose at most
|
|
44
|
+
``FLUSH_INTERVAL_SECONDS`` seconds of writes. For a *cache* this is
|
|
45
|
+
always acceptable — stale-miss on restart is far cheaper than per-write
|
|
46
|
+
fsync latency under load.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
import json
|
|
50
|
+
import jsonpickle
|
|
51
|
+
import msgpack
|
|
52
|
+
import sqlite3
|
|
53
|
+
import threading
|
|
54
|
+
import time
|
|
55
|
+
from collections.abc import MutableMapping
|
|
56
|
+
from pathlib import Path
|
|
57
|
+
from typing import Dict, Iterator, Optional, Set
|
|
58
|
+
|
|
59
|
+
from PyperCache.storage.base import StorageMechanism
|
|
60
|
+
from PyperCache.utils.fs import ensure_dirs_exist
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# ---------------------------------------------------------------------------
|
|
64
|
+
# Tunables
|
|
65
|
+
# ---------------------------------------------------------------------------
|
|
66
|
+
|
|
67
|
+
DIRTY_FLUSH_THRESHOLD: int = 50 # flush when this many keys are dirty
|
|
68
|
+
FLUSH_INTERVAL_SECONDS: float = 5.0 # background flush cadence in seconds
|
|
69
|
+
|
|
70
|
+
# ---------------------------------------------------------------------------
|
|
71
|
+
# Schema helpers
|
|
72
|
+
# ---------------------------------------------------------------------------
|
|
73
|
+
|
|
74
|
+
# Canonical field names that map to dedicated columns.
|
|
75
|
+
_COLUMNS = ("cast", "expiry", "timestamp", "data")
|
|
76
|
+
|
|
77
|
+
_CREATE_TABLE = """
|
|
78
|
+
CREATE TABLE IF NOT EXISTS cache_records (
|
|
79
|
+
key TEXT PRIMARY KEY,
|
|
80
|
+
"cast" TEXT,
|
|
81
|
+
expiry REAL,
|
|
82
|
+
timestamp REAL,
|
|
83
|
+
data BLOB
|
|
84
|
+
)
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
_UPSERT = """
|
|
88
|
+
INSERT INTO cache_records (key, "cast", expiry, timestamp, data)
|
|
89
|
+
VALUES (:key, :cast, :expiry, :timestamp, :data)
|
|
90
|
+
ON CONFLICT(key) DO UPDATE SET
|
|
91
|
+
"cast" = excluded."cast",
|
|
92
|
+
expiry = excluded.expiry,
|
|
93
|
+
timestamp = excluded.timestamp,
|
|
94
|
+
data = excluded.data
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
_SELECT_ALL = """SELECT key, "cast", expiry, timestamp, data FROM cache_records"""
|
|
98
|
+
_DELETE_KEY = "DELETE FROM cache_records WHERE key = ?"
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
_MSGPACK_PREFIX = b'\x00' # null byte never appears at the start of valid JSON
|
|
102
|
+
_JSONPICKLE_PREFIX = b'\x01' # SOH byte — fallback for types msgpack can't handle
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _serialize_data(value) -> Optional[bytes]:
|
|
106
|
+
"""Encode the ``data`` field to bytes for SQLite BLOB storage.
|
|
107
|
+
|
|
108
|
+
Encoding ladder (first success wins):
|
|
109
|
+
- ``None`` → NULL in DB
|
|
110
|
+
- ``bytes`` → raw BLOB as-is
|
|
111
|
+
- JSON-able → plain UTF-8 JSON text
|
|
112
|
+
- msgpack-able → ``\\x00`` + msgpack bytes (e.g. dicts containing bytes)
|
|
113
|
+
- anything else → ``\\x01`` + jsonpickle JSON (arbitrary Python objects)
|
|
114
|
+
"""
|
|
115
|
+
if value is None:
|
|
116
|
+
return None
|
|
117
|
+
if isinstance(value, (bytes, bytearray)):
|
|
118
|
+
return bytes(value) # pure bytes → BLOB as-is
|
|
119
|
+
try:
|
|
120
|
+
return json.dumps(value).encode() # plain JSON for normal data
|
|
121
|
+
except (TypeError, ValueError):
|
|
122
|
+
pass
|
|
123
|
+
try:
|
|
124
|
+
return _MSGPACK_PREFIX + msgpack.dumps(value) # dicts containing bytes etc.
|
|
125
|
+
except Exception:
|
|
126
|
+
pass
|
|
127
|
+
return _JSONPICKLE_PREFIX + jsonpickle.encode(value).encode() # arbitrary objects
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _deserialize_data(raw) -> object:
|
|
131
|
+
"""Decode bytes retrieved from the BLOB column back to a Python object.
|
|
132
|
+
|
|
133
|
+
- ``None`` → ``None``
|
|
134
|
+
- ``\\x00`` prefix → msgpack decode
|
|
135
|
+
- ``\\x01`` prefix → jsonpickle decode
|
|
136
|
+
- valid UTF-8 JSON → parsed object
|
|
137
|
+
- non-JSON bytes → returned as raw ``bytes``
|
|
138
|
+
"""
|
|
139
|
+
if raw is None:
|
|
140
|
+
return None
|
|
141
|
+
if isinstance(raw, (bytes, bytearray)):
|
|
142
|
+
if raw.startswith(_MSGPACK_PREFIX):
|
|
143
|
+
return msgpack.loads(memoryview(raw)[1:], raw=False)
|
|
144
|
+
if raw.startswith(_JSONPICKLE_PREFIX):
|
|
145
|
+
return jsonpickle.decode(memoryview(raw)[1:].tobytes().decode())
|
|
146
|
+
try:
|
|
147
|
+
return json.loads(raw.decode())
|
|
148
|
+
except (UnicodeDecodeError, json.JSONDecodeError):
|
|
149
|
+
return bytes(raw) # pure BLOB fallback
|
|
150
|
+
# SQLite may return a str if the column affinity kicked in
|
|
151
|
+
try:
|
|
152
|
+
return json.loads(raw)
|
|
153
|
+
except (TypeError, json.JSONDecodeError):
|
|
154
|
+
return raw
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _row_to_record(row: tuple) -> dict:
|
|
158
|
+
"""Convert a ``(key, cast, expiry, timestamp, data)`` DB row to a dict."""
|
|
159
|
+
_key, cast, expiry, timestamp, raw_data = row
|
|
160
|
+
return {
|
|
161
|
+
"cast": cast,
|
|
162
|
+
"expiry": expiry,
|
|
163
|
+
"timestamp": timestamp,
|
|
164
|
+
"data": _deserialize_data(raw_data),
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _record_to_params(key: str, record: dict) -> dict:
|
|
169
|
+
"""Convert an in-memory record dict to named params for :data:`_UPSERT`."""
|
|
170
|
+
return {
|
|
171
|
+
"key": key,
|
|
172
|
+
"cast": record.get("cast"),
|
|
173
|
+
"expiry": record.get("expiry"),
|
|
174
|
+
"timestamp": record.get("timestamp"),
|
|
175
|
+
"data": _serialize_data(record.get("data")),
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
# ---------------------------------------------------------------------------
|
|
180
|
+
# In-memory MutableMapping with dirty tracking
|
|
181
|
+
# ---------------------------------------------------------------------------
|
|
182
|
+
|
|
183
|
+
class _BufferedMapping(MutableMapping):
|
|
184
|
+
"""In-memory ``dict`` with a dirty-key set for write-behind flushing.
|
|
185
|
+
|
|
186
|
+
All reads are served from ``_store`` (pure RAM, zero IO).
|
|
187
|
+
All writes update ``_store`` and mark the key in ``_dirty``.
|
|
188
|
+
|
|
189
|
+
The owning :class:`SQLiteStorage` inspects ``_dirty`` and calls
|
|
190
|
+
:meth:`pop_dirty` to drain it during a flush.
|
|
191
|
+
"""
|
|
192
|
+
|
|
193
|
+
def __init__(self, initial: Dict[str, dict]):
|
|
194
|
+
self._store: Dict[str, dict] = initial
|
|
195
|
+
self._dirty: Set[str] = set()
|
|
196
|
+
self._deleted: Set[str] = set()
|
|
197
|
+
|
|
198
|
+
# MutableMapping protocol --------------------------------------------------
|
|
199
|
+
|
|
200
|
+
def __getitem__(self, key: str) -> dict:
|
|
201
|
+
return self._store[key] # pure RAM — zero IO
|
|
202
|
+
|
|
203
|
+
def __setitem__(self, key: str, value: dict):
|
|
204
|
+
self._store[key] = value
|
|
205
|
+
self._dirty.add(key)
|
|
206
|
+
self._deleted.discard(key) # un-delete if re-inserted
|
|
207
|
+
|
|
208
|
+
def __delitem__(self, key: str):
|
|
209
|
+
if key not in self._store:
|
|
210
|
+
raise KeyError(key)
|
|
211
|
+
del self._store[key]
|
|
212
|
+
self._dirty.discard(key)
|
|
213
|
+
self._deleted.add(key)
|
|
214
|
+
|
|
215
|
+
def __iter__(self) -> Iterator[str]:
|
|
216
|
+
return iter(self._store) # pure RAM
|
|
217
|
+
|
|
218
|
+
def __len__(self) -> int:
|
|
219
|
+
return len(self._store) # pure RAM
|
|
220
|
+
|
|
221
|
+
def __contains__(self, key: object) -> bool:
|
|
222
|
+
return key in self._store # pure RAM
|
|
223
|
+
|
|
224
|
+
# Dirty-buffer helpers -----------------------------------------------------
|
|
225
|
+
|
|
226
|
+
@property
|
|
227
|
+
def dirty_count(self) -> int:
|
|
228
|
+
return len(self._dirty)
|
|
229
|
+
|
|
230
|
+
def pop_dirty(self) -> tuple[Dict[str, dict], Set[str]]:
|
|
231
|
+
"""Return pending upserts and deletes, then clear both sets."""
|
|
232
|
+
upserts = {k: self._store[k] for k in self._dirty if k in self._store}
|
|
233
|
+
deletes = set(self._deleted)
|
|
234
|
+
self._dirty.clear()
|
|
235
|
+
self._deleted.clear()
|
|
236
|
+
return upserts, deletes
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
# ---------------------------------------------------------------------------
|
|
240
|
+
# StorageMechanism implementation
|
|
241
|
+
# ---------------------------------------------------------------------------
|
|
242
|
+
|
|
243
|
+
class SQLiteStorage(StorageMechanism):
|
|
244
|
+
"""Cache storage backend with a write-behind buffer over SQLite.
|
|
245
|
+
|
|
246
|
+
Reads are always served from RAM (after a one-time bulk load on open).
|
|
247
|
+
Writes accumulate in a dirty buffer and are flushed to disk in a single
|
|
248
|
+
batched transaction, collapsing N fsyncs into 1.
|
|
249
|
+
|
|
250
|
+
Each cache record is stored in a typed, columnar schema::
|
|
251
|
+
|
|
252
|
+
key TEXT PRIMARY KEY
|
|
253
|
+
cast TEXT
|
|
254
|
+
expiry REAL (Unix epoch float, nullable)
|
|
255
|
+
timestamp REAL (Unix epoch float)
|
|
256
|
+
data BLOB
|
|
257
|
+
|
|
258
|
+
Usage::
|
|
259
|
+
|
|
260
|
+
# Recommended: use as a context manager so close() always runs.
|
|
261
|
+
with SQLiteStorage("path/to/cache.db") as store:
|
|
262
|
+
store.store_record("page-1", {
|
|
263
|
+
"cast": "html",
|
|
264
|
+
"expiry": 1700000000.0,
|
|
265
|
+
"timestamp": 1699990000.0,
|
|
266
|
+
"data": b"<p>...</p>",
|
|
267
|
+
})
|
|
268
|
+
record = store.get_record("page-1")
|
|
269
|
+
|
|
270
|
+
# Manual lifecycle:
|
|
271
|
+
store = SQLiteStorage("cache.db")
|
|
272
|
+
store.store_record("k", {"cast": "json", "timestamp": time.time(), "data": b"{}"})
|
|
273
|
+
store.flush() # explicit flush without closing
|
|
274
|
+
store.close() # flushes + closes connection
|
|
275
|
+
|
|
276
|
+
Args:
|
|
277
|
+
filepath: Path to the ``.db`` file. Created automatically.
|
|
278
|
+
flush_interval: Seconds between background auto-flushes (default 5 s).
|
|
279
|
+
dirty_threshold: Flush immediately when this many keys are dirty
|
|
280
|
+
(default 50).
|
|
281
|
+
"""
|
|
282
|
+
|
|
283
|
+
_TABLE = "cache_records"
|
|
284
|
+
|
|
285
|
+
def __init__(
|
|
286
|
+
self,
|
|
287
|
+
filepath: str,
|
|
288
|
+
flush_interval: float = FLUSH_INTERVAL_SECONDS,
|
|
289
|
+
dirty_threshold: int = DIRTY_FLUSH_THRESHOLD,
|
|
290
|
+
):
|
|
291
|
+
self._conn: sqlite3.Connection | None = None
|
|
292
|
+
self._flush_interval = flush_interval
|
|
293
|
+
self._dirty_threshold = dirty_threshold
|
|
294
|
+
self._flush_lock = threading.Lock() # serialises actual DB writes
|
|
295
|
+
self._closed = False
|
|
296
|
+
|
|
297
|
+
# super().__init__ calls load() which opens the connection and
|
|
298
|
+
# populates self.records with a _BufferedMapping.
|
|
299
|
+
super().__init__(filepath)
|
|
300
|
+
|
|
301
|
+
# Start background flush thread after records are ready.
|
|
302
|
+
self._bg_thread = threading.Thread(
|
|
303
|
+
target=self._background_flush_loop,
|
|
304
|
+
daemon=True,
|
|
305
|
+
name=f"SQLiteStorage-flush-{filepath}",
|
|
306
|
+
)
|
|
307
|
+
self._bg_thread.start()
|
|
308
|
+
|
|
309
|
+
# ------------------------------------------------------------------
|
|
310
|
+
# Connection management
|
|
311
|
+
# ------------------------------------------------------------------
|
|
312
|
+
|
|
313
|
+
def _open_connection(self, filepath: Path) -> sqlite3.Connection:
|
|
314
|
+
if self._conn is None:
|
|
315
|
+
self._conn = sqlite3.connect(
|
|
316
|
+
str(filepath),
|
|
317
|
+
check_same_thread=False, # serialised by _flush_lock + base lock
|
|
318
|
+
isolation_level=None, # we manage transactions manually
|
|
319
|
+
)
|
|
320
|
+
# WAL: readers never block writers; writers never block readers.
|
|
321
|
+
self._conn.execute("PRAGMA journal_mode=WAL")
|
|
322
|
+
# Larger pages suit big binary blobs.
|
|
323
|
+
self._conn.execute("PRAGMA page_size=8192")
|
|
324
|
+
# OS handles durability — we don't need per-commit fsync.
|
|
325
|
+
self._conn.execute("PRAGMA synchronous=NORMAL")
|
|
326
|
+
# 64 MB shared-memory cache — reduces repeated page reads.
|
|
327
|
+
self._conn.execute("PRAGMA cache_size=-65536")
|
|
328
|
+
return self._conn
|
|
329
|
+
|
|
330
|
+
# ------------------------------------------------------------------
|
|
331
|
+
# Background flush loop
|
|
332
|
+
# ------------------------------------------------------------------
|
|
333
|
+
|
|
334
|
+
def _background_flush_loop(self):
|
|
335
|
+
"""Daemon thread: flush dirty buffer every ``_flush_interval`` seconds."""
|
|
336
|
+
while not self._closed:
|
|
337
|
+
time.sleep(self._flush_interval)
|
|
338
|
+
if not self._closed:
|
|
339
|
+
self._do_flush()
|
|
340
|
+
|
|
341
|
+
# ------------------------------------------------------------------
|
|
342
|
+
# Flush logic
|
|
343
|
+
# ------------------------------------------------------------------
|
|
344
|
+
|
|
345
|
+
def _do_flush(self):
|
|
346
|
+
"""Write all dirty records to SQLite in one transaction (one fsync)."""
|
|
347
|
+
if not isinstance(self.records, _BufferedMapping):
|
|
348
|
+
return
|
|
349
|
+
if self.records.dirty_count == 0 and not self.records._deleted:
|
|
350
|
+
return
|
|
351
|
+
|
|
352
|
+
with self._flush_lock:
|
|
353
|
+
upserts, deletes = self.records.pop_dirty()
|
|
354
|
+
|
|
355
|
+
if not upserts and not deletes:
|
|
356
|
+
return
|
|
357
|
+
|
|
358
|
+
conn = self._conn
|
|
359
|
+
if conn is None:
|
|
360
|
+
return
|
|
361
|
+
|
|
362
|
+
try:
|
|
363
|
+
conn.execute("BEGIN")
|
|
364
|
+
|
|
365
|
+
if upserts:
|
|
366
|
+
conn.executemany(
|
|
367
|
+
_UPSERT,
|
|
368
|
+
(_record_to_params(k, v) for k, v in upserts.items()),
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
if deletes:
|
|
372
|
+
conn.executemany(
|
|
373
|
+
_DELETE_KEY,
|
|
374
|
+
((k,) for k in deletes),
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
conn.execute("COMMIT") # one fsync for all dirty records
|
|
378
|
+
except sqlite3.Error:
|
|
379
|
+
conn.execute("ROLLBACK")
|
|
380
|
+
raise
|
|
381
|
+
|
|
382
|
+
def flush(self):
|
|
383
|
+
"""Public API: force an immediate flush of the dirty buffer."""
|
|
384
|
+
self._do_flush()
|
|
385
|
+
|
|
386
|
+
def _maybe_flush(self):
|
|
387
|
+
"""Flush eagerly if the dirty buffer has hit the threshold."""
|
|
388
|
+
if (
|
|
389
|
+
isinstance(self.records, _BufferedMapping)
|
|
390
|
+
and self.records.dirty_count >= self._dirty_threshold
|
|
391
|
+
):
|
|
392
|
+
self._do_flush()
|
|
393
|
+
|
|
394
|
+
# ------------------------------------------------------------------
|
|
395
|
+
# StorageMechanism public API overrides
|
|
396
|
+
# ------------------------------------------------------------------
|
|
397
|
+
|
|
398
|
+
def store_record(self, key: str, cache_record_dict: dict):
|
|
399
|
+
"""Insert or overwrite *key* in the in-memory buffer and mark dirty.
|
|
400
|
+
|
|
401
|
+
Overrides the base class to skip the synchronous ``save()`` call that
|
|
402
|
+
would otherwise acquire the lock and call ``touch_store()`` on every
|
|
403
|
+
write. Persistence is handled lazily by the dirty-buffer flush cycle.
|
|
404
|
+
"""
|
|
405
|
+
self.records[str(key)] = cache_record_dict # __setitem__ marks dirty
|
|
406
|
+
self._maybe_flush() # flush only if threshold hit
|
|
407
|
+
|
|
408
|
+
# ------------------------------------------------------------------
|
|
409
|
+
# StorageMechanism abstract hooks
|
|
410
|
+
# ------------------------------------------------------------------
|
|
411
|
+
|
|
412
|
+
def _impl__touch_store(self, filepath: Path) -> bool:
|
|
413
|
+
"""Create the SQLite file and records table if absent."""
|
|
414
|
+
try:
|
|
415
|
+
conn = self._open_connection(filepath)
|
|
416
|
+
conn.execute(_CREATE_TABLE)
|
|
417
|
+
conn.commit()
|
|
418
|
+
return True
|
|
419
|
+
except sqlite3.Error:
|
|
420
|
+
return False
|
|
421
|
+
|
|
422
|
+
def _impl__load(self, filepath: Path) -> MutableMapping[str, dict]:
|
|
423
|
+
"""Bulk-load every record from disk into a :class:`_BufferedMapping`.
|
|
424
|
+
|
|
425
|
+
This is the only full-table scan that ever happens. After this
|
|
426
|
+
point all reads are served from RAM. Each row is unpacked into a
|
|
427
|
+
plain dict with keys ``cast``, ``expiry``, ``timestamp``, ``data``.
|
|
428
|
+
"""
|
|
429
|
+
conn = self._open_connection(filepath)
|
|
430
|
+
rows = conn.execute(_SELECT_ALL).fetchall()
|
|
431
|
+
initial = {row[0]: _row_to_record(row) for row in rows}
|
|
432
|
+
return _BufferedMapping(initial)
|
|
433
|
+
|
|
434
|
+
def _impl__save(self, cache_records_dict: Dict[str, dict], filepath: Path):
|
|
435
|
+
"""Called by the base class after store_record; threshold-check only."""
|
|
436
|
+
self._maybe_flush()
|
|
437
|
+
|
|
438
|
+
def _impl__update_record(self, key: str, data: dict):
|
|
439
|
+
"""Merge *data* into the existing record (pure RAM) and mark dirty.
|
|
440
|
+
|
|
441
|
+
Only the four recognised columns (``cast``, ``expiry``, ``timestamp``,
|
|
442
|
+
``data``) are merged; unknown keys in *data* are ignored so that
|
|
443
|
+
callers cannot accidentally introduce columns that do not exist in
|
|
444
|
+
the schema.
|
|
445
|
+
"""
|
|
446
|
+
existing = self.records.get(key, {})
|
|
447
|
+
for col in _COLUMNS:
|
|
448
|
+
if col in data:
|
|
449
|
+
existing[col] = data[col]
|
|
450
|
+
self.records[key] = existing # __setitem__ marks dirty — no IO
|
|
451
|
+
|
|
452
|
+
def _impl__erase_everything(self):
|
|
453
|
+
"""Clear the in-memory buffer and the on-disk table atomically."""
|
|
454
|
+
with self._flush_lock:
|
|
455
|
+
if isinstance(self.records, _BufferedMapping):
|
|
456
|
+
self.records._dirty.clear()
|
|
457
|
+
self.records._deleted.clear()
|
|
458
|
+
self.records._store.clear()
|
|
459
|
+
if self._conn:
|
|
460
|
+
self._conn.execute("BEGIN")
|
|
461
|
+
self._conn.execute(f"DELETE FROM {self._TABLE}")
|
|
462
|
+
self._conn.execute("COMMIT")
|
|
463
|
+
|
|
464
|
+
# ------------------------------------------------------------------
|
|
465
|
+
# Lifecycle
|
|
466
|
+
# ------------------------------------------------------------------
|
|
467
|
+
|
|
468
|
+
def close(self):
|
|
469
|
+
"""Flush all dirty records, then close the SQLite connection.
|
|
470
|
+
|
|
471
|
+
Always call this (or use the context manager) so pending writes
|
|
472
|
+
are not lost when the process exits.
|
|
473
|
+
"""
|
|
474
|
+
self._closed = True
|
|
475
|
+
self._do_flush() # final flush — nothing left behind
|
|
476
|
+
if self._conn:
|
|
477
|
+
self._conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
|
|
478
|
+
self._conn.close()
|
|
479
|
+
self._conn = None
|
|
480
|
+
|
|
481
|
+
def __enter__(self):
|
|
482
|
+
return self
|
|
483
|
+
|
|
484
|
+
def __exit__(self, *_):
|
|
485
|
+
self.close()
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""PyperCache.utils — public re-export surface.
|
|
2
|
+
|
|
3
|
+
All symbols that were previously importable from the flat ``utils`` module
|
|
4
|
+
remain importable from here, so existing ``from utils import X`` calls
|
|
5
|
+
continue to work after a simple search-and-replace of the import target.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from PyperCache.utils.collections import convert_defaultdict_to_dict
|
|
9
|
+
from PyperCache.utils.fs import ensure_dirs_exist, open_folder
|
|
10
|
+
from PyperCache.utils.patterns import ClassRepository, singleton
|
|
11
|
+
from PyperCache.utils.profiling import Profiler
|
|
12
|
+
from PyperCache.utils.serialization import DataSerializer, PickleStore
|
|
13
|
+
from PyperCache.utils.sentinel import UNSET
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"ClassRepository",
|
|
17
|
+
"convert_defaultdict_to_dict",
|
|
18
|
+
"DataSerializer",
|
|
19
|
+
"ensure_dirs_exist",
|
|
20
|
+
"open_folder",
|
|
21
|
+
"PickleStore",
|
|
22
|
+
"Profiler",
|
|
23
|
+
"singleton",
|
|
24
|
+
"UNSET",
|
|
25
|
+
]
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""Collection utilities."""
|
|
2
|
+
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def convert_defaultdict_to_dict(data: Any) -> Any:
|
|
8
|
+
"""Recursively convert nested ``defaultdict`` instances to plain ``dict``.
|
|
9
|
+
|
|
10
|
+
Also traverses nested lists so that defaultdicts at any depth are converted.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
data: The object to convert. Non-dict/list values are returned as-is.
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
The same structure with every ``defaultdict`` replaced by a ``dict``.
|
|
17
|
+
"""
|
|
18
|
+
if isinstance(data, defaultdict):
|
|
19
|
+
data = dict(data)
|
|
20
|
+
|
|
21
|
+
if isinstance(data, dict):
|
|
22
|
+
for key, value in data.items():
|
|
23
|
+
data[key] = convert_defaultdict_to_dict(value)
|
|
24
|
+
elif isinstance(data, list):
|
|
25
|
+
for i, item in enumerate(data):
|
|
26
|
+
data[i] = convert_defaultdict_to_dict(item)
|
|
27
|
+
|
|
28
|
+
return data
|
PyperCache/utils/fs.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""Filesystem helpers: directory creation and platform file-explorer launcher."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import sys
|
|
5
|
+
import subprocess
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def ensure_dirs_exist(path: str) -> None:
|
|
10
|
+
"""Create all intermediate directories required for *path* if they don't exist.
|
|
11
|
+
|
|
12
|
+
If the final component of *path* contains a ``.`` it is treated as a
|
|
13
|
+
filename and only its parent directories are created; otherwise the full
|
|
14
|
+
path is treated as a directory tree and created in its entirety.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
path: A file or directory path whose parent directories should exist.
|
|
18
|
+
"""
|
|
19
|
+
p = Path(path)
|
|
20
|
+
|
|
21
|
+
if len(p.parts) <= 1:
|
|
22
|
+
return # Nothing to create for a bare filename/single component
|
|
23
|
+
|
|
24
|
+
# Determine whether the last part looks like a file (has an extension).
|
|
25
|
+
if "." in p.parts[-1]:
|
|
26
|
+
dir_path = Path(*p.parts[:-1])
|
|
27
|
+
else:
|
|
28
|
+
dir_path = p
|
|
29
|
+
|
|
30
|
+
if not dir_path.exists():
|
|
31
|
+
os.makedirs(dir_path)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def open_folder(path: Path) -> None:
|
|
35
|
+
"""Open *path* in the system file explorer, cross-platform.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
path: Directory to open.
|
|
39
|
+
"""
|
|
40
|
+
path = path.resolve()
|
|
41
|
+
if sys.platform == "win32":
|
|
42
|
+
os.startfile(str(path))
|
|
43
|
+
elif sys.platform == "darwin":
|
|
44
|
+
subprocess.run(["open", str(path)], check=False)
|
|
45
|
+
else:
|
|
46
|
+
subprocess.run(["xdg-open", str(path)], check=False)
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"""Design-pattern utilities: singleton decorator and class registry."""
|
|
2
|
+
|
|
3
|
+
from functools import wraps
|
|
4
|
+
from typing import Any, Callable, Dict, List, Type, TypeVar
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
T = TypeVar("T")
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def singleton(cls: Type[T]) -> Callable[..., T]:
|
|
11
|
+
"""Class decorator that enforces the singleton pattern.
|
|
12
|
+
|
|
13
|
+
The first call constructs and caches the instance. If the class defines
|
|
14
|
+
``__post_init__``, it is invoked immediately after construction. Subsequent
|
|
15
|
+
calls return the cached instance regardless of the arguments passed.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
cls: The class to wrap as a singleton.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
A wrapper function that always returns the single shared instance.
|
|
22
|
+
"""
|
|
23
|
+
instances: Dict[type, Any] = {}
|
|
24
|
+
|
|
25
|
+
@wraps(cls)
|
|
26
|
+
def get_instance(*args: Any, **kwargs: Any) -> T:
|
|
27
|
+
if cls not in instances:
|
|
28
|
+
instances[cls] = cls(*args, **kwargs)
|
|
29
|
+
if hasattr(instances[cls], "__post_init__"):
|
|
30
|
+
instances[cls].__post_init__()
|
|
31
|
+
return instances[cls]
|
|
32
|
+
|
|
33
|
+
return get_instance
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@singleton
|
|
37
|
+
class ClassRepository:
|
|
38
|
+
"""A singleton registry that maps class names to their types.
|
|
39
|
+
|
|
40
|
+
Useful for dynamic instantiation by name — e.g. deserialising objects
|
|
41
|
+
whose concrete type is stored as a string.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(self) -> None:
|
|
45
|
+
self.classes: Dict[str, type] = {}
|
|
46
|
+
# map fully-qualified name -> type
|
|
47
|
+
self.fqclasses: Dict[str, type] = {}
|
|
48
|
+
|
|
49
|
+
def add_module_classes(self, globals_dict: Dict[str, Any]) -> None:
|
|
50
|
+
"""Discover and register every class defined in *globals_dict*.
|
|
51
|
+
|
|
52
|
+
Typically called with ``globals()`` from the module you want to index.
|
|
53
|
+
``ClassRepository`` itself is excluded to avoid self-registration.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
globals_dict: The global namespace to scan (pass ``globals()``).
|
|
57
|
+
"""
|
|
58
|
+
for name, obj in globals_dict.items():
|
|
59
|
+
if name != "ClassRepository" and isinstance(obj, type):
|
|
60
|
+
self.classes[name] = obj
|
|
61
|
+
|
|
62
|
+
def add_class(self, cls: type) -> None:
|
|
63
|
+
"""Register a single class.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
cls: The class to register.
|
|
67
|
+
|
|
68
|
+
Raises:
|
|
69
|
+
TypeError: If *cls* is not a type.
|
|
70
|
+
"""
|
|
71
|
+
if not isinstance(cls, type):
|
|
72
|
+
raise TypeError("'cls' must be a type.")
|
|
73
|
+
self.classes[cls.__name__] = cls
|
|
74
|
+
fq = f"{cls.__module__}.{cls.__name__}"
|
|
75
|
+
self.fqclasses[fq] = cls
|
|
76
|
+
|
|
77
|
+
def get_class(self, class_name: str) -> type | None:
|
|
78
|
+
"""Return the class registered under *class_name*, or ``None``.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
class_name: The ``__name__`` of the desired class.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
The registered class, or ``None`` if not found.
|
|
85
|
+
"""
|
|
86
|
+
# Try short name first, then fully-qualified name.
|
|
87
|
+
if class_name in self.classes:
|
|
88
|
+
return self.classes[class_name]
|
|
89
|
+
return self.fqclasses.get(class_name)
|
|
90
|
+
|
|
91
|
+
def list_classes(self) -> List[str]:
|
|
92
|
+
"""Return the names of all registered classes.
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
A list of class name strings.
|
|
96
|
+
"""
|
|
97
|
+
return list(self.classes.keys())
|