emboss 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
emboss/__init__.py ADDED
@@ -0,0 +1,35 @@
1
+ """emboss — On-Disk Input-keyed Cache.
2
+
3
+ Disk-backed memoization built on `diskcache`, with auto-detection of
4
+ pydantic v2 `BaseModel` return types (encoded via `model_dump`, decoded
5
+ via `model_validate`) so models defined in `__main__` round-trip across
6
+ script invocations.
7
+
8
+ Usage::
9
+
10
+ import diskcache
11
+ from emboss import cached
12
+
13
+ cache = diskcache.Cache("/tmp/my-cache")
14
+
15
+ @cached(cache)
16
+ def expensive(url: str) -> dict:
17
+ return requests.get(url).json()
18
+
19
+ # pydantic BaseModel returns are auto-encoded / decoded
20
+ from pydantic import BaseModel
21
+
22
+ class User(BaseModel):
23
+ name: str
24
+
25
+ @cached(cache)
26
+ def get_user(uid: int) -> User | None:
27
+ return User.model_validate(requests.get(f"/users/{uid}").json())
28
+
29
+ See README.md for the full feature list.
30
+ """
31
+
32
+ from emboss._cached import cached, safe_jsonable_encoder
33
+
34
+ __version__ = "0.1.0"
35
+ __all__ = ["cached", "safe_jsonable_encoder"]
emboss/_cached.py ADDED
@@ -0,0 +1,187 @@
1
+ """Internal: `@cached` decorator implementation. Public API lives in `emboss.__init__`."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import functools
7
+ import hashlib
8
+ import inspect
9
+ import json
10
+ import logging
11
+ import types
12
+ import typing
13
+ from collections.abc import Callable
14
+ from typing import Any, TypeVar, Union
15
+
16
+ import diskcache
17
+
18
+ try:
19
+ from pydantic import BaseModel
20
+ except ImportError: # pragma: no cover — pydantic is optional for callers
21
+ BaseModel = None # type: ignore[assignment]
22
+
23
+ T = TypeVar("T")
24
+ logger = logging.getLogger(__name__)
25
+
26
+ # Sentinel for "key absent from cache" — lets None be a valid cached value.
27
+ _MISSING = object()
28
+
29
+
30
+ def safe_jsonable_encoder(obj: Any) -> Any:
31
+ """Convert objects to JSON-serializable forms for cache keys."""
32
+ if obj is None or isinstance(obj, (bool, int, float, str)):
33
+ return obj
34
+ if isinstance(obj, (list, tuple)):
35
+ return [safe_jsonable_encoder(item) for item in obj]
36
+ if isinstance(obj, dict):
37
+ return {str(k): safe_jsonable_encoder(v) for k, v in obj.items()}
38
+ if isinstance(obj, set):
39
+ return sorted([safe_jsonable_encoder(item) for item in obj])
40
+ if isinstance(obj, bytes):
41
+ return obj.decode("utf-8", errors="ignore")
42
+ try:
43
+ import arrow
44
+
45
+ if isinstance(obj, arrow.Arrow):
46
+ return obj.isoformat()
47
+ except ImportError:
48
+ pass
49
+ try:
50
+ from datetime import date, datetime, time
51
+
52
+ if isinstance(obj, (datetime, date, time)):
53
+ return obj.isoformat()
54
+ except ImportError:
55
+ pass
56
+ try:
57
+ from pathlib import Path
58
+
59
+ if isinstance(obj, Path):
60
+ return str(obj)
61
+ except ImportError:
62
+ pass
63
+ if BaseModel is not None and isinstance(obj, BaseModel):
64
+ return obj.model_dump()
65
+ if hasattr(obj, "__dict__"):
66
+ return safe_jsonable_encoder(obj.__dict__)
67
+ return str(obj)
68
+
69
+
70
+ def _is_basemodel_class(cls: Any) -> bool:
71
+ return BaseModel is not None and isinstance(cls, type) and issubclass(cls, BaseModel)
72
+
73
+
74
+ def _model_info(annotation: Any) -> tuple[type | None, str]:
75
+ """Return `(Model class, container)` extracted from a return annotation.
76
+
77
+ `container` is one of `"none"` (single value), `"list"`, or `"dict"`.
78
+ Returns `(None, "none")` when no BaseModel is in play (decorator falls back
79
+ to pass-through encode/decode).
80
+ """
81
+ if BaseModel is None or annotation is inspect.Parameter.empty or annotation is None:
82
+ return None, "none"
83
+ if _is_basemodel_class(annotation):
84
+ return annotation, "none"
85
+
86
+ origin = typing.get_origin(annotation)
87
+ if origin in (Union, types.UnionType):
88
+ for arg in typing.get_args(annotation):
89
+ if _is_basemodel_class(arg):
90
+ return arg, "none"
91
+ return None, "none"
92
+ if origin is list:
93
+ args = typing.get_args(annotation)
94
+ if args and _is_basemodel_class(args[0]):
95
+ return args[0], "list"
96
+ if origin is dict:
97
+ args = typing.get_args(annotation)
98
+ if len(args) == 2 and _is_basemodel_class(args[1]):
99
+ return args[1], "dict"
100
+ return None, "none"
101
+
102
+
103
+ def _encode(value: Any, model_cls: type | None, container: str) -> Any:
104
+ """Convert pydantic models to plain dicts before pickling."""
105
+ if value is None or model_cls is None:
106
+ return value
107
+ if container == "list":
108
+ return [v.model_dump() if isinstance(v, model_cls) else v for v in value]
109
+ if container == "dict":
110
+ return {k: (v.model_dump() if isinstance(v, model_cls) else v) for k, v in value.items()}
111
+ if isinstance(value, model_cls):
112
+ return value.model_dump()
113
+ return value
114
+
115
+
116
+ def _decode(value: Any, model_cls: type | None, container: str) -> Any:
117
+ """Rehydrate dicts into pydantic models on cache hit."""
118
+ if value is None or model_cls is None:
119
+ return value
120
+ if container == "list":
121
+ return [model_cls.model_validate(v) if isinstance(v, dict) else v for v in value]
122
+ if container == "dict":
123
+ return {k: (model_cls.model_validate(v) if isinstance(v, dict) else v) for k, v in value.items()}
124
+ if isinstance(value, dict):
125
+ return model_cls.model_validate(value)
126
+ return value
127
+
128
+
129
+ def cached(
130
+ cache: diskcache.Cache | None = None,
131
+ ) -> Callable[[Callable[..., T]], Callable[..., T]]:
132
+ """Disk-backed memoization decorator.
133
+
134
+ Detects `BaseModel` / `list[Model]` / `dict[str, Model]` return annotations
135
+ and stores them as dicts (rehydrated on read) so model classes defined in
136
+ `__main__` round-trip across script invocations.
137
+ """
138
+ if cache is None:
139
+ cache = diskcache.Cache()
140
+
141
+ def decorator(func: Callable[..., T]) -> Callable[..., T]:
142
+ func_source = inspect.getsource(func)
143
+ func_hash = hashlib.md5(func_source.encode()).hexdigest()
144
+ try:
145
+ return_anno = inspect.signature(func).return_annotation
146
+ except (TypeError, ValueError):
147
+ return_anno = inspect.Parameter.empty
148
+ model_cls, container = _model_info(return_anno)
149
+
150
+ @functools.wraps(func)
151
+ def wrapper(*args: Any, **kwargs: Any) -> T:
152
+ json_args = [safe_jsonable_encoder(arg) for arg in args]
153
+ json_kwargs = {k: safe_jsonable_encoder(v) for k, v in kwargs.items()}
154
+ arg_hash = hashlib.md5(
155
+ f"{json.dumps(json_args)}{json.dumps(json_kwargs)}".encode()
156
+ ).hexdigest()
157
+ key: str = hashlib.md5(
158
+ f"{func.__name__}{func_hash}{arg_hash}".encode()
159
+ ).hexdigest()
160
+
161
+ raw = cache.get(key, default=_MISSING)
162
+ if raw is not _MISSING:
163
+ decoded = _decode(raw, model_cls, container)
164
+ if asyncio.iscoroutinefunction(func):
165
+
166
+ async def return_cached():
167
+ return decoded
168
+
169
+ return return_cached() # type: ignore[return-value]
170
+ return decoded # type: ignore[return-value]
171
+
172
+ if asyncio.iscoroutinefunction(func):
173
+
174
+ async def execute():
175
+ result = await func(*args, **kwargs) # type: ignore[misc]
176
+ cache.set(key, _encode(result, model_cls, container))
177
+ return result
178
+
179
+ return execute() # type: ignore[return-value]
180
+
181
+ result = func(*args, **kwargs)
182
+ cache.set(key, _encode(result, model_cls, container))
183
+ return result
184
+
185
+ return wrapper
186
+
187
+ return decorator
@@ -0,0 +1,118 @@
1
+ Metadata-Version: 2.4
2
+ Name: emboss
3
+ Version: 0.1.0
4
+ Summary: On-Disk Input-keyed Cache — disk-backed memoization with pydantic-aware encoding
5
+ Author-email: Daniel Hails <emboss@hails.info>
6
+ Project-URL: Homepage, https://github.com/DJRHails/emboss
7
+ Project-URL: Bug Tracker, https://github.com/DJRHails/emboss/issues
8
+ Requires-Python: >=3.10
9
+ Description-Content-Type: text/markdown
10
+ License-File: LICENSE
11
+ Requires-Dist: diskcache>=5
12
+ Provides-Extra: pydantic
13
+ Requires-Dist: pydantic>=2; extra == "pydantic"
14
+ Dynamic: license-file
15
+
16
+ # emboss
17
+
18
+ **O**n-**D**isk **I**nput-keyed **C**ache — disk-backed memoization with pydantic-aware encoding.
19
+
20
+ Version: 0.1.0
21
+
22
+ ```bash
23
+ pip install emboss # core (just diskcache)
24
+ pip install emboss[pydantic] # + pydantic v2 BaseModel support
25
+ ```
26
+
27
+ ## Why
28
+
29
+ `functools.lru_cache` is per-process. `diskcache` survives invocations but pickles values as-is — which breaks the moment your cached return type is a pydantic `BaseModel` defined in `__main__` (the new process can't unpickle `__main__.MyModel`). `emboss` fixes that by detecting BaseModel return annotations and converting to/from plain dicts at the cache boundary.
30
+
31
+ Plus: a `None`-aware sentinel so functions returning `None` actually cache instead of re-running every call.
32
+
33
+ ## Quick start
34
+
35
+ ```python
36
+ import diskcache
37
+ from emboss import cached
38
+
39
+ cache = diskcache.Cache("/tmp/my-cache")
40
+
41
+ @cached(cache)
42
+ def fetch(url: str) -> dict:
43
+ import requests
44
+ return requests.get(url).json()
45
+
46
+ fetch("https://api.example.com/users/1") # network
47
+ fetch("https://api.example.com/users/1") # cached, no network
48
+ ```
49
+
50
+ ## Pydantic BaseModel returns
51
+
52
+ `emboss` reads the function's return type annotation. If it sees a `BaseModel`, `list[BaseModel]`, `dict[str, BaseModel]`, or `BaseModel | None`, it serialises via `model.model_dump()` before pickling and rehydrates via `Model.model_validate(...)` on read. The cached value on disk is a plain dict — round-trips cleanly across process boundaries, even for models defined in `__main__`.
53
+
54
+ ```python
55
+ from pydantic import BaseModel
56
+
57
+ class User(BaseModel):
58
+ id: int
59
+ name: str
60
+
61
+ @cached(cache)
62
+ def get_user(uid: int) -> User | None:
63
+ ...
64
+
65
+ @cached(cache)
66
+ def list_users() -> list[User]:
67
+ ...
68
+
69
+ @cached(cache)
70
+ def users_by_id() -> dict[str, User]:
71
+ ...
72
+ ```
73
+
74
+ Functions returning non-BaseModel types continue to pickle as-is — fully backward-compatible.
75
+
76
+ ## None caching
77
+
78
+ ```python
79
+ @cached(cache)
80
+ def lookup(query: str) -> str | None:
81
+ return external_api(query)
82
+
83
+ lookup("missing") # returns None, cached
84
+ lookup("missing") # returns cached None, no re-run
85
+ ```
86
+
87
+ The previous behaviour (skip-cache-on-None) is replaced by a `_MISSING` sentinel internally so `None` is a valid cached value.
88
+
89
+ ## Cache key
90
+
91
+ Arguments are converted via `safe_jsonable_encoder` (recursive JSON-friendly conversion handling sets, bytes, dates, `Path`, BaseModel, and objects with `__dict__`), then hashed with the function source + name. Re-decorating the same function body → same key; changing the function body → new key (transparent cache invalidation on code change).
92
+
93
+ ## Async support
94
+
95
+ ```python
96
+ @cached(cache)
97
+ async def fetch_async(url: str) -> dict:
98
+ async with httpx.AsyncClient() as c:
99
+ return (await c.get(url)).json()
100
+ ```
101
+
102
+ Cache hits return a fresh awaitable wrapping the cached value, so the call site keeps `await`-ing as normal.
103
+
104
+ ## Daily-rolling caches
105
+
106
+ The `diskcache.Cache` instance you pass is yours to manage. A common pattern for "expire daily" without thinking about it:
107
+
108
+ ```python
109
+ from datetime import date
110
+ import diskcache
111
+ cache = diskcache.Cache(f"/tmp/my-cache-{date.today()}")
112
+ ```
113
+
114
+ Each new day → new dir → effectively fresh cache. Old dirs land in `/tmp` and get reaped by the OS.
115
+
116
+ ## License
117
+
118
+ MIT.
@@ -0,0 +1,8 @@
1
+ emboss/__init__.py,sha256=P-gRsT4MWIoeWW9KMeb3_t6NmTJlqmSQxtZCFrQQWeM,930
2
+ emboss/_cached.py,sha256=x-99bi6j7_hqXorTeMy3g4u1b94mG5_H8H6f9oQ3r1Q,6504
3
+ emboss-0.1.0.dist-info/licenses/LICENSE,sha256=RjexM-UPby8fRPlfzBjGcbGkb-awdO-A5pngqiggAAw,1069
4
+ tests/test_cached.py,sha256=DtkCO6IRhaZH6E8j_eWDF6p_IzmyzCsnfADwdGQUe0k,2195
5
+ emboss-0.1.0.dist-info/METADATA,sha256=xaGH2YeSre0bLFD-CxtZiisM0wHj9ddYTVv9KQT40Ks,3773
6
+ emboss-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
7
+ emboss-0.1.0.dist-info/top_level.txt,sha256=d5kCPMCoP_GksIdWZk4T9oTU6NiPm2Z6cWuJgyHY3Yo,13
8
+ emboss-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Daniel Hails
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,2 @@
1
+ emboss
2
+ tests
tests/test_cached.py ADDED
@@ -0,0 +1,106 @@
1
+ """End-to-end round-trip tests for emboss.cached."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import diskcache
6
+ import pytest
7
+ from pydantic import BaseModel
8
+
9
+ from emboss import cached
10
+
11
+
12
+ class M(BaseModel):
13
+ name: str
14
+ n: int = 0
15
+
16
+
17
+ @pytest.fixture
18
+ def cache(tmp_path):
19
+ c = diskcache.Cache(str(tmp_path / "cache"))
20
+ yield c
21
+ c.close()
22
+
23
+
24
+ def test_plain_dict_round_trip(cache):
25
+ calls = {"n": 0}
26
+
27
+ @cached(cache)
28
+ def f(x: int) -> dict:
29
+ calls["n"] += 1
30
+ return {"value": x * 2}
31
+
32
+ assert f(3) == {"value": 6}
33
+ assert f(3) == {"value": 6}
34
+ assert calls["n"] == 1, "second call should be cached"
35
+
36
+
37
+ def test_basemodel_round_trip(cache):
38
+ calls = {"n": 0}
39
+
40
+ @cached(cache)
41
+ def f() -> M:
42
+ calls["n"] += 1
43
+ return M(name="solo", n=1)
44
+
45
+ r1 = f()
46
+ r2 = f()
47
+ assert isinstance(r1, M) and isinstance(r2, M)
48
+ assert r1 == r2
49
+ assert calls["n"] == 1
50
+
51
+
52
+ def test_list_of_basemodel_round_trip(cache):
53
+ calls = {"n": 0}
54
+
55
+ @cached(cache)
56
+ def f() -> list[M]:
57
+ calls["n"] += 1
58
+ return [M(name="a"), M(name="b", n=2)]
59
+
60
+ assert f() == f()
61
+ assert calls["n"] == 1
62
+ # All elements are still pydantic models, not dicts
63
+ assert all(isinstance(m, M) for m in f())
64
+
65
+
66
+ def test_dict_of_basemodel_round_trip(cache):
67
+ calls = {"n": 0}
68
+
69
+ @cached(cache)
70
+ def f() -> dict[str, M]:
71
+ calls["n"] += 1
72
+ return {"x": M(name="x", n=9)}
73
+
74
+ assert f() == f()
75
+ assert calls["n"] == 1
76
+ assert isinstance(f()["x"], M)
77
+
78
+
79
+ def test_optional_basemodel_none_caches(cache):
80
+ calls = {"n": 0}
81
+
82
+ @cached(cache)
83
+ def f(x: int) -> M | None:
84
+ calls["n"] += 1
85
+ return None if x < 0 else M(name="opt", n=x)
86
+
87
+ assert f(-1) is None
88
+ assert f(-1) is None
89
+ assert f(5).n == 5
90
+ assert f(5).n == 5
91
+ # 2 distinct keys (one for -1, one for 5), each computed once
92
+ assert calls["n"] == 2
93
+
94
+
95
+ def test_none_return_caches(cache):
96
+ """Pre-emboss behaviour skipped caching None; we want None cached too."""
97
+ calls = {"n": 0}
98
+
99
+ @cached(cache)
100
+ def f(x: str) -> str | None:
101
+ calls["n"] += 1
102
+ return None
103
+
104
+ f("any")
105
+ f("any")
106
+ assert calls["n"] == 1