PyperCache 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,485 @@
1
+ """High-performance SQLite cache storage backend.
2
+
3
+ IO Strategy — Write-Behind Buffer
4
+ ──────────────────────────────────
5
+ The dominant cost in the naive SQLite backend is per-write fsync: every
6
+ ``commit()`` forces the OS to flush pages to disk. For large JSON/HTML
7
+ records this is the single biggest latency killer.
8
+
9
+ This backend eliminates that by:
10
+
11
+ 1. **Hot read cache** — all records are loaded into memory on open.
12
+ ``get_record`` never touches disk (O(1) dict lookup).
13
+
14
+ 2. **Write-behind dirty buffer** — ``store_record`` / ``update_record``
15
+ write into the in-memory dict *and* mark the key dirty. No disk IO at
16
+ all until a flush is triggered.
17
+
18
+ 3. **Batch flush** — dirty keys are persisted in a *single* transaction
19
+ (one fsync regardless of how many records changed). Flush is triggered
20
+ by any of:
21
+ a. ``DIRTY_FLUSH_THRESHOLD`` dirty keys accumulated (default 50)
22
+ b. ``FLUSH_INTERVAL_SECONDS`` wall-clock seconds elapsed (default 5 s)
23
+ — a background daemon thread handles this automatically.
24
+ c. Explicit ``flush()`` call.
25
+ d. ``close()`` / context-manager ``__exit__``.
26
+
27
+ 4. **Single-commit bulk upsert** — the flush uses ``executemany`` inside
28
+ one ``BEGIN … COMMIT`` block, so N dirty records → 1 fsync.
29
+
30
+ 5. **WAL mode** — concurrent readers are never blocked by the writer.
31
+
32
+ Schema
33
+ ──────
34
+ Each cache record is stored with its fields in dedicated columns rather
35
+ than a single serialised JSON blob:
36
+
37
+ key TEXT PRIMARY KEY
38
+ cast TEXT — type/cast metadata
39
+ expiry REAL — expiry timestamp (Unix epoch, nullable)
40
+ timestamp REAL — record creation/update time (Unix epoch)
41
+ data BLOB — raw payload bytes (JSON, msgpack-prefixed, or raw BLOB)
42
+
43
+ Trade-off: a process crash between flushes can lose at most
44
+ ``FLUSH_INTERVAL_SECONDS`` seconds of writes. For a *cache* this is
45
+ always acceptable — stale-miss on restart is far cheaper than per-write
46
+ fsync latency under load.
47
+ """
48
+
49
+ import json
50
+ import jsonpickle
51
+ import msgpack
52
+ import sqlite3
53
+ import threading
54
+ import time
55
+ from collections.abc import MutableMapping
56
+ from pathlib import Path
57
+ from typing import Dict, Iterator, Optional, Set
58
+
59
+ from PyperCache.storage.base import StorageMechanism
60
+ from PyperCache.utils.fs import ensure_dirs_exist
61
+
62
+
63
+ # ---------------------------------------------------------------------------
64
+ # Tunables
65
+ # ---------------------------------------------------------------------------
66
+
67
+ DIRTY_FLUSH_THRESHOLD: int = 50 # flush when this many keys are dirty
68
+ FLUSH_INTERVAL_SECONDS: float = 5.0 # background flush cadence in seconds
69
+
70
+ # ---------------------------------------------------------------------------
71
+ # Schema helpers
72
+ # ---------------------------------------------------------------------------
73
+
74
+ # Canonical field names that map to dedicated columns.
75
+ _COLUMNS = ("cast", "expiry", "timestamp", "data")
76
+
77
+ _CREATE_TABLE = """
78
+ CREATE TABLE IF NOT EXISTS cache_records (
79
+ key TEXT PRIMARY KEY,
80
+ "cast" TEXT,
81
+ expiry REAL,
82
+ timestamp REAL,
83
+ data BLOB
84
+ )
85
+ """
86
+
87
+ _UPSERT = """
88
+ INSERT INTO cache_records (key, "cast", expiry, timestamp, data)
89
+ VALUES (:key, :cast, :expiry, :timestamp, :data)
90
+ ON CONFLICT(key) DO UPDATE SET
91
+ "cast" = excluded."cast",
92
+ expiry = excluded.expiry,
93
+ timestamp = excluded.timestamp,
94
+ data = excluded.data
95
+ """
96
+
97
+ _SELECT_ALL = """SELECT key, "cast", expiry, timestamp, data FROM cache_records"""
98
+ _DELETE_KEY = "DELETE FROM cache_records WHERE key = ?"
99
+
100
+
101
+ _MSGPACK_PREFIX = b'\x00' # null byte never appears at the start of valid JSON
102
+ _JSONPICKLE_PREFIX = b'\x01' # SOH byte — fallback for types msgpack can't handle
103
+
104
+
105
+ def _serialize_data(value) -> Optional[bytes]:
106
+ """Encode the ``data`` field to bytes for SQLite BLOB storage.
107
+
108
+ Encoding ladder (first success wins):
109
+ - ``None`` → NULL in DB
110
+ - ``bytes`` → raw BLOB as-is
111
+ - JSON-able → plain UTF-8 JSON text
112
+ - msgpack-able → ``\\x00`` + msgpack bytes (e.g. dicts containing bytes)
113
+ - anything else → ``\\x01`` + jsonpickle JSON (arbitrary Python objects)
114
+ """
115
+ if value is None:
116
+ return None
117
+ if isinstance(value, (bytes, bytearray)):
118
+ return bytes(value) # pure bytes → BLOB as-is
119
+ try:
120
+ return json.dumps(value).encode() # plain JSON for normal data
121
+ except (TypeError, ValueError):
122
+ pass
123
+ try:
124
+ return _MSGPACK_PREFIX + msgpack.dumps(value) # dicts containing bytes etc.
125
+ except Exception:
126
+ pass
127
+ return _JSONPICKLE_PREFIX + jsonpickle.encode(value).encode() # arbitrary objects
128
+
129
+
130
+ def _deserialize_data(raw) -> object:
131
+ """Decode bytes retrieved from the BLOB column back to a Python object.
132
+
133
+ - ``None`` → ``None``
134
+ - ``\\x00`` prefix → msgpack decode
135
+ - ``\\x01`` prefix → jsonpickle decode
136
+ - valid UTF-8 JSON → parsed object
137
+ - non-JSON bytes → returned as raw ``bytes``
138
+ """
139
+ if raw is None:
140
+ return None
141
+ if isinstance(raw, (bytes, bytearray)):
142
+ if raw.startswith(_MSGPACK_PREFIX):
143
+ return msgpack.loads(memoryview(raw)[1:], raw=False)
144
+ if raw.startswith(_JSONPICKLE_PREFIX):
145
+ return jsonpickle.decode(memoryview(raw)[1:].tobytes().decode())
146
+ try:
147
+ return json.loads(raw.decode())
148
+ except (UnicodeDecodeError, json.JSONDecodeError):
149
+ return bytes(raw) # pure BLOB fallback
150
+ # SQLite may return a str if the column affinity kicked in
151
+ try:
152
+ return json.loads(raw)
153
+ except (TypeError, json.JSONDecodeError):
154
+ return raw
155
+
156
+
157
+ def _row_to_record(row: tuple) -> dict:
158
+ """Convert a ``(key, cast, expiry, timestamp, data)`` DB row to a dict."""
159
+ _key, cast, expiry, timestamp, raw_data = row
160
+ return {
161
+ "cast": cast,
162
+ "expiry": expiry,
163
+ "timestamp": timestamp,
164
+ "data": _deserialize_data(raw_data),
165
+ }
166
+
167
+
168
+ def _record_to_params(key: str, record: dict) -> dict:
169
+ """Convert an in-memory record dict to named params for :data:`_UPSERT`."""
170
+ return {
171
+ "key": key,
172
+ "cast": record.get("cast"),
173
+ "expiry": record.get("expiry"),
174
+ "timestamp": record.get("timestamp"),
175
+ "data": _serialize_data(record.get("data")),
176
+ }
177
+
178
+
179
+ # ---------------------------------------------------------------------------
180
+ # In-memory MutableMapping with dirty tracking
181
+ # ---------------------------------------------------------------------------
182
+
183
+ class _BufferedMapping(MutableMapping):
184
+ """In-memory ``dict`` with a dirty-key set for write-behind flushing.
185
+
186
+ All reads are served from ``_store`` (pure RAM, zero IO).
187
+ All writes update ``_store`` and mark the key in ``_dirty``.
188
+
189
+ The owning :class:`SQLiteStorage` inspects ``_dirty`` and calls
190
+ :meth:`pop_dirty` to drain it during a flush.
191
+ """
192
+
193
+ def __init__(self, initial: Dict[str, dict]):
194
+ self._store: Dict[str, dict] = initial
195
+ self._dirty: Set[str] = set()
196
+ self._deleted: Set[str] = set()
197
+
198
+ # MutableMapping protocol --------------------------------------------------
199
+
200
+ def __getitem__(self, key: str) -> dict:
201
+ return self._store[key] # pure RAM — zero IO
202
+
203
+ def __setitem__(self, key: str, value: dict):
204
+ self._store[key] = value
205
+ self._dirty.add(key)
206
+ self._deleted.discard(key) # un-delete if re-inserted
207
+
208
+ def __delitem__(self, key: str):
209
+ if key not in self._store:
210
+ raise KeyError(key)
211
+ del self._store[key]
212
+ self._dirty.discard(key)
213
+ self._deleted.add(key)
214
+
215
+ def __iter__(self) -> Iterator[str]:
216
+ return iter(self._store) # pure RAM
217
+
218
+ def __len__(self) -> int:
219
+ return len(self._store) # pure RAM
220
+
221
+ def __contains__(self, key: object) -> bool:
222
+ return key in self._store # pure RAM
223
+
224
+ # Dirty-buffer helpers -----------------------------------------------------
225
+
226
+ @property
227
+ def dirty_count(self) -> int:
228
+ return len(self._dirty)
229
+
230
+ def pop_dirty(self) -> tuple[Dict[str, dict], Set[str]]:
231
+ """Return pending upserts and deletes, then clear both sets."""
232
+ upserts = {k: self._store[k] for k in self._dirty if k in self._store}
233
+ deletes = set(self._deleted)
234
+ self._dirty.clear()
235
+ self._deleted.clear()
236
+ return upserts, deletes
237
+
238
+
239
+ # ---------------------------------------------------------------------------
240
+ # StorageMechanism implementation
241
+ # ---------------------------------------------------------------------------
242
+
243
+ class SQLiteStorage(StorageMechanism):
244
+ """Cache storage backend with a write-behind buffer over SQLite.
245
+
246
+ Reads are always served from RAM (after a one-time bulk load on open).
247
+ Writes accumulate in a dirty buffer and are flushed to disk in a single
248
+ batched transaction, collapsing N fsyncs into 1.
249
+
250
+ Each cache record is stored in a typed, columnar schema::
251
+
252
+ key TEXT PRIMARY KEY
253
+ cast TEXT
254
+ expiry REAL (Unix epoch float, nullable)
255
+ timestamp REAL (Unix epoch float)
256
+ data BLOB
257
+
258
+ Usage::
259
+
260
+ # Recommended: use as a context manager so close() always runs.
261
+ with SQLiteStorage("path/to/cache.db") as store:
262
+ store.store_record("page-1", {
263
+ "cast": "html",
264
+ "expiry": 1700000000.0,
265
+ "timestamp": 1699990000.0,
266
+ "data": b"<p>...</p>",
267
+ })
268
+ record = store.get_record("page-1")
269
+
270
+ # Manual lifecycle:
271
+ store = SQLiteStorage("cache.db")
272
+ store.store_record("k", {"cast": "json", "timestamp": time.time(), "data": b"{}"})
273
+ store.flush() # explicit flush without closing
274
+ store.close() # flushes + closes connection
275
+
276
+ Args:
277
+ filepath: Path to the ``.db`` file. Created automatically.
278
+ flush_interval: Seconds between background auto-flushes (default 5 s).
279
+ dirty_threshold: Flush immediately when this many keys are dirty
280
+ (default 50).
281
+ """
282
+
283
+ _TABLE = "cache_records"
284
+
285
+ def __init__(
286
+ self,
287
+ filepath: str,
288
+ flush_interval: float = FLUSH_INTERVAL_SECONDS,
289
+ dirty_threshold: int = DIRTY_FLUSH_THRESHOLD,
290
+ ):
291
+ self._conn: sqlite3.Connection | None = None
292
+ self._flush_interval = flush_interval
293
+ self._dirty_threshold = dirty_threshold
294
+ self._flush_lock = threading.Lock() # serialises actual DB writes
295
+ self._closed = False
296
+
297
+ # super().__init__ calls load() which opens the connection and
298
+ # populates self.records with a _BufferedMapping.
299
+ super().__init__(filepath)
300
+
301
+ # Start background flush thread after records are ready.
302
+ self._bg_thread = threading.Thread(
303
+ target=self._background_flush_loop,
304
+ daemon=True,
305
+ name=f"SQLiteStorage-flush-{filepath}",
306
+ )
307
+ self._bg_thread.start()
308
+
309
+ # ------------------------------------------------------------------
310
+ # Connection management
311
+ # ------------------------------------------------------------------
312
+
313
+ def _open_connection(self, filepath: Path) -> sqlite3.Connection:
314
+ if self._conn is None:
315
+ self._conn = sqlite3.connect(
316
+ str(filepath),
317
+ check_same_thread=False, # serialised by _flush_lock + base lock
318
+ isolation_level=None, # we manage transactions manually
319
+ )
320
+ # WAL: readers never block writers; writers never block readers.
321
+ self._conn.execute("PRAGMA journal_mode=WAL")
322
+ # Larger pages suit big binary blobs.
323
+ self._conn.execute("PRAGMA page_size=8192")
324
+ # OS handles durability — we don't need per-commit fsync.
325
+ self._conn.execute("PRAGMA synchronous=NORMAL")
326
+ # 64 MB shared-memory cache — reduces repeated page reads.
327
+ self._conn.execute("PRAGMA cache_size=-65536")
328
+ return self._conn
329
+
330
+ # ------------------------------------------------------------------
331
+ # Background flush loop
332
+ # ------------------------------------------------------------------
333
+
334
+ def _background_flush_loop(self):
335
+ """Daemon thread: flush dirty buffer every ``_flush_interval`` seconds."""
336
+ while not self._closed:
337
+ time.sleep(self._flush_interval)
338
+ if not self._closed:
339
+ self._do_flush()
340
+
341
+ # ------------------------------------------------------------------
342
+ # Flush logic
343
+ # ------------------------------------------------------------------
344
+
345
+ def _do_flush(self):
346
+ """Write all dirty records to SQLite in one transaction (one fsync)."""
347
+ if not isinstance(self.records, _BufferedMapping):
348
+ return
349
+ if self.records.dirty_count == 0 and not self.records._deleted:
350
+ return
351
+
352
+ with self._flush_lock:
353
+ upserts, deletes = self.records.pop_dirty()
354
+
355
+ if not upserts and not deletes:
356
+ return
357
+
358
+ conn = self._conn
359
+ if conn is None:
360
+ return
361
+
362
+ try:
363
+ conn.execute("BEGIN")
364
+
365
+ if upserts:
366
+ conn.executemany(
367
+ _UPSERT,
368
+ (_record_to_params(k, v) for k, v in upserts.items()),
369
+ )
370
+
371
+ if deletes:
372
+ conn.executemany(
373
+ _DELETE_KEY,
374
+ ((k,) for k in deletes),
375
+ )
376
+
377
+ conn.execute("COMMIT") # one fsync for all dirty records
378
+ except sqlite3.Error:
379
+ conn.execute("ROLLBACK")
380
+ raise
381
+
382
+ def flush(self):
383
+ """Public API: force an immediate flush of the dirty buffer."""
384
+ self._do_flush()
385
+
386
+ def _maybe_flush(self):
387
+ """Flush eagerly if the dirty buffer has hit the threshold."""
388
+ if (
389
+ isinstance(self.records, _BufferedMapping)
390
+ and self.records.dirty_count >= self._dirty_threshold
391
+ ):
392
+ self._do_flush()
393
+
394
+ # ------------------------------------------------------------------
395
+ # StorageMechanism public API overrides
396
+ # ------------------------------------------------------------------
397
+
398
+ def store_record(self, key: str, cache_record_dict: dict):
399
+ """Insert or overwrite *key* in the in-memory buffer and mark dirty.
400
+
401
+ Overrides the base class to skip the synchronous ``save()`` call that
402
+ would otherwise acquire the lock and call ``touch_store()`` on every
403
+ write. Persistence is handled lazily by the dirty-buffer flush cycle.
404
+ """
405
+ self.records[str(key)] = cache_record_dict # __setitem__ marks dirty
406
+ self._maybe_flush() # flush only if threshold hit
407
+
408
+ # ------------------------------------------------------------------
409
+ # StorageMechanism abstract hooks
410
+ # ------------------------------------------------------------------
411
+
412
+ def _impl__touch_store(self, filepath: Path) -> bool:
413
+ """Create the SQLite file and records table if absent."""
414
+ try:
415
+ conn = self._open_connection(filepath)
416
+ conn.execute(_CREATE_TABLE)
417
+ conn.commit()
418
+ return True
419
+ except sqlite3.Error:
420
+ return False
421
+
422
+ def _impl__load(self, filepath: Path) -> MutableMapping[str, dict]:
423
+ """Bulk-load every record from disk into a :class:`_BufferedMapping`.
424
+
425
+ This is the only full-table scan that ever happens. After this
426
+ point all reads are served from RAM. Each row is unpacked into a
427
+ plain dict with keys ``cast``, ``expiry``, ``timestamp``, ``data``.
428
+ """
429
+ conn = self._open_connection(filepath)
430
+ rows = conn.execute(_SELECT_ALL).fetchall()
431
+ initial = {row[0]: _row_to_record(row) for row in rows}
432
+ return _BufferedMapping(initial)
433
+
434
+ def _impl__save(self, cache_records_dict: Dict[str, dict], filepath: Path):
435
+ """Called by the base class after store_record; threshold-check only."""
436
+ self._maybe_flush()
437
+
438
+ def _impl__update_record(self, key: str, data: dict):
439
+ """Merge *data* into the existing record (pure RAM) and mark dirty.
440
+
441
+ Only the four recognised columns (``cast``, ``expiry``, ``timestamp``,
442
+ ``data``) are merged; unknown keys in *data* are ignored so that
443
+ callers cannot accidentally introduce columns that do not exist in
444
+ the schema.
445
+ """
446
+ existing = self.records.get(key, {})
447
+ for col in _COLUMNS:
448
+ if col in data:
449
+ existing[col] = data[col]
450
+ self.records[key] = existing # __setitem__ marks dirty — no IO
451
+
452
+ def _impl__erase_everything(self):
453
+ """Clear the in-memory buffer and the on-disk table atomically."""
454
+ with self._flush_lock:
455
+ if isinstance(self.records, _BufferedMapping):
456
+ self.records._dirty.clear()
457
+ self.records._deleted.clear()
458
+ self.records._store.clear()
459
+ if self._conn:
460
+ self._conn.execute("BEGIN")
461
+ self._conn.execute(f"DELETE FROM {self._TABLE}")
462
+ self._conn.execute("COMMIT")
463
+
464
+ # ------------------------------------------------------------------
465
+ # Lifecycle
466
+ # ------------------------------------------------------------------
467
+
468
+ def close(self):
469
+ """Flush all dirty records, then close the SQLite connection.
470
+
471
+ Always call this (or use the context manager) so pending writes
472
+ are not lost when the process exits.
473
+ """
474
+ self._closed = True
475
+ self._do_flush() # final flush — nothing left behind
476
+ if self._conn:
477
+ self._conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
478
+ self._conn.close()
479
+ self._conn = None
480
+
481
+ def __enter__(self):
482
+ return self
483
+
484
+ def __exit__(self, *_):
485
+ self.close()
@@ -0,0 +1,25 @@
1
+ """PyperCache.utils — public re-export surface.
2
+
3
+ All symbols that were previously importable from the flat ``utils`` module
4
+ remain importable from here, so existing ``from utils import X`` calls
5
+ continue to work after a simple search-and-replace of the import target.
6
+ """
7
+
8
+ from PyperCache.utils.collections import convert_defaultdict_to_dict
9
+ from PyperCache.utils.fs import ensure_dirs_exist, open_folder
10
+ from PyperCache.utils.patterns import ClassRepository, singleton
11
+ from PyperCache.utils.profiling import Profiler
12
+ from PyperCache.utils.serialization import DataSerializer, PickleStore
13
+ from PyperCache.utils.sentinel import UNSET
14
+
15
+ __all__ = [
16
+ "ClassRepository",
17
+ "convert_defaultdict_to_dict",
18
+ "DataSerializer",
19
+ "ensure_dirs_exist",
20
+ "open_folder",
21
+ "PickleStore",
22
+ "Profiler",
23
+ "singleton",
24
+ "UNSET",
25
+ ]
@@ -0,0 +1,28 @@
1
+ """Collection utilities."""
2
+
3
+ from collections import defaultdict
4
+ from typing import Any
5
+
6
+
7
+ def convert_defaultdict_to_dict(data: Any) -> Any:
8
+ """Recursively convert nested ``defaultdict`` instances to plain ``dict``.
9
+
10
+ Also traverses nested lists so that defaultdicts at any depth are converted.
11
+
12
+ Args:
13
+ data: The object to convert. Non-dict/list values are returned as-is.
14
+
15
+ Returns:
16
+ The same structure with every ``defaultdict`` replaced by a ``dict``.
17
+ """
18
+ if isinstance(data, defaultdict):
19
+ data = dict(data)
20
+
21
+ if isinstance(data, dict):
22
+ for key, value in data.items():
23
+ data[key] = convert_defaultdict_to_dict(value)
24
+ elif isinstance(data, list):
25
+ for i, item in enumerate(data):
26
+ data[i] = convert_defaultdict_to_dict(item)
27
+
28
+ return data
PyperCache/utils/fs.py ADDED
@@ -0,0 +1,46 @@
1
+ """Filesystem helpers: directory creation and platform file-explorer launcher."""
2
+
3
+ import os
4
+ import sys
5
+ import subprocess
6
+ from pathlib import Path
7
+
8
+
9
+ def ensure_dirs_exist(path: str) -> None:
10
+ """Create all intermediate directories required for *path* if they don't exist.
11
+
12
+ If the final component of *path* contains a ``.`` it is treated as a
13
+ filename and only its parent directories are created; otherwise the full
14
+ path is treated as a directory tree and created in its entirety.
15
+
16
+ Args:
17
+ path: A file or directory path whose parent directories should exist.
18
+ """
19
+ p = Path(path)
20
+
21
+ if len(p.parts) <= 1:
22
+ return # Nothing to create for a bare filename/single component
23
+
24
+ # Determine whether the last part looks like a file (has an extension).
25
+ if "." in p.parts[-1]:
26
+ dir_path = Path(*p.parts[:-1])
27
+ else:
28
+ dir_path = p
29
+
30
+ if not dir_path.exists():
31
+ os.makedirs(dir_path)
32
+
33
+
34
+ def open_folder(path: Path) -> None:
35
+ """Open *path* in the system file explorer, cross-platform.
36
+
37
+ Args:
38
+ path: Directory to open.
39
+ """
40
+ path = path.resolve()
41
+ if sys.platform == "win32":
42
+ os.startfile(str(path))
43
+ elif sys.platform == "darwin":
44
+ subprocess.run(["open", str(path)], check=False)
45
+ else:
46
+ subprocess.run(["xdg-open", str(path)], check=False)
@@ -0,0 +1,97 @@
1
+ """Design-pattern utilities: singleton decorator and class registry."""
2
+
3
+ from functools import wraps
4
+ from typing import Any, Callable, Dict, List, Type, TypeVar
5
+
6
+
7
+ T = TypeVar("T")
8
+
9
+
10
+ def singleton(cls: Type[T]) -> Callable[..., T]:
11
+ """Class decorator that enforces the singleton pattern.
12
+
13
+ The first call constructs and caches the instance. If the class defines
14
+ ``__post_init__``, it is invoked immediately after construction. Subsequent
15
+ calls return the cached instance regardless of the arguments passed.
16
+
17
+ Args:
18
+ cls: The class to wrap as a singleton.
19
+
20
+ Returns:
21
+ A wrapper function that always returns the single shared instance.
22
+ """
23
+ instances: Dict[type, Any] = {}
24
+
25
+ @wraps(cls)
26
+ def get_instance(*args: Any, **kwargs: Any) -> T:
27
+ if cls not in instances:
28
+ instances[cls] = cls(*args, **kwargs)
29
+ if hasattr(instances[cls], "__post_init__"):
30
+ instances[cls].__post_init__()
31
+ return instances[cls]
32
+
33
+ return get_instance
34
+
35
+
36
+ @singleton
37
+ class ClassRepository:
38
+ """A singleton registry that maps class names to their types.
39
+
40
+ Useful for dynamic instantiation by name — e.g. deserialising objects
41
+ whose concrete type is stored as a string.
42
+ """
43
+
44
+ def __init__(self) -> None:
45
+ self.classes: Dict[str, type] = {}
46
+ # map fully-qualified name -> type
47
+ self.fqclasses: Dict[str, type] = {}
48
+
49
+ def add_module_classes(self, globals_dict: Dict[str, Any]) -> None:
50
+ """Discover and register every class defined in *globals_dict*.
51
+
52
+ Typically called with ``globals()`` from the module you want to index.
53
+ ``ClassRepository`` itself is excluded to avoid self-registration.
54
+
55
+ Args:
56
+ globals_dict: The global namespace to scan (pass ``globals()``).
57
+ """
58
+ for name, obj in globals_dict.items():
59
+ if name != "ClassRepository" and isinstance(obj, type):
60
+ self.classes[name] = obj
61
+
62
+ def add_class(self, cls: type) -> None:
63
+ """Register a single class.
64
+
65
+ Args:
66
+ cls: The class to register.
67
+
68
+ Raises:
69
+ TypeError: If *cls* is not a type.
70
+ """
71
+ if not isinstance(cls, type):
72
+ raise TypeError("'cls' must be a type.")
73
+ self.classes[cls.__name__] = cls
74
+ fq = f"{cls.__module__}.{cls.__name__}"
75
+ self.fqclasses[fq] = cls
76
+
77
+ def get_class(self, class_name: str) -> type | None:
78
+ """Return the class registered under *class_name*, or ``None``.
79
+
80
+ Args:
81
+ class_name: The ``__name__`` of the desired class.
82
+
83
+ Returns:
84
+ The registered class, or ``None`` if not found.
85
+ """
86
+ # Try short name first, then fully-qualified name.
87
+ if class_name in self.classes:
88
+ return self.classes[class_name]
89
+ return self.fqclasses.get(class_name)
90
+
91
+ def list_classes(self) -> List[str]:
92
+ """Return the names of all registered classes.
93
+
94
+ Returns:
95
+ A list of class name strings.
96
+ """
97
+ return list(self.classes.keys())