checkpointer 2.14.9__tar.gz → 2.14.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {checkpointer-2.14.9 → checkpointer-2.14.11}/PKG-INFO +19 -1
- {checkpointer-2.14.9 → checkpointer-2.14.11}/README.md +18 -0
- {checkpointer-2.14.9 → checkpointer-2.14.11}/checkpointer/checkpoint.py +16 -18
- {checkpointer-2.14.9 → checkpointer-2.14.11}/checkpointer/object_hash.py +11 -0
- {checkpointer-2.14.9 → checkpointer-2.14.11}/checkpointer/storages/memory_storage.py +2 -0
- {checkpointer-2.14.9 → checkpointer-2.14.11}/pyproject.toml +1 -1
- {checkpointer-2.14.9 → checkpointer-2.14.11}/uv.lock +1 -1
- {checkpointer-2.14.9 → checkpointer-2.14.11}/.gitignore +0 -0
- {checkpointer-2.14.9 → checkpointer-2.14.11}/.python-version +0 -0
- {checkpointer-2.14.9 → checkpointer-2.14.11}/ATTRIBUTION.md +0 -0
- {checkpointer-2.14.9 → checkpointer-2.14.11}/LICENSE +0 -0
- {checkpointer-2.14.9 → checkpointer-2.14.11}/checkpointer/__init__.py +0 -0
- {checkpointer-2.14.9 → checkpointer-2.14.11}/checkpointer/fn_ident.py +0 -0
- {checkpointer-2.14.9 → checkpointer-2.14.11}/checkpointer/fn_string.py +0 -0
- {checkpointer-2.14.9 → checkpointer-2.14.11}/checkpointer/import_mappings.py +0 -0
- {checkpointer-2.14.9 → checkpointer-2.14.11}/checkpointer/print_checkpoint.py +0 -0
- {checkpointer-2.14.9 → checkpointer-2.14.11}/checkpointer/storages/__init__.py +0 -0
- {checkpointer-2.14.9 → checkpointer-2.14.11}/checkpointer/storages/pickle_storage.py +0 -0
- {checkpointer-2.14.9 → checkpointer-2.14.11}/checkpointer/storages/storage.py +0 -0
- {checkpointer-2.14.9 → checkpointer-2.14.11}/checkpointer/types.py +0 -0
- {checkpointer-2.14.9 → checkpointer-2.14.11}/checkpointer/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: checkpointer
|
|
3
|
-
Version: 2.14.
|
|
3
|
+
Version: 2.14.11
|
|
4
4
|
Summary: checkpointer adds code-aware caching to Python functions, maintaining correctness and speeding up execution as your code changes.
|
|
5
5
|
Project-URL: Repository, https://github.com/Reddan/checkpointer.git
|
|
6
6
|
Author: Hampus Hallman
|
|
@@ -177,6 +177,24 @@ def process(
|
|
|
177
177
|
|
|
178
178
|
In this example, the hash for `numbers` ignores order, `data_file` is hashed based on its contents rather than path, and changes to `log` don't affect caching.
|
|
179
179
|
|
|
180
|
+
## 🔑 Custom Instance Hashing with `__objecthash__`
|
|
181
|
+
|
|
182
|
+
Any class can implement `__objecthash__` to control how its instances are hashed by `checkpointer`. When an instance is encountered during hashing, `checkpointer` calls `__objecthash__()` and hashes the returned value instead of inspecting the object's internals.
|
|
183
|
+
|
|
184
|
+
```python
|
|
185
|
+
class Model:
|
|
186
|
+
def __init__(self, id: str, weights: list[float]):
|
|
187
|
+
self.id = id
|
|
188
|
+
self.weights = weights
|
|
189
|
+
|
|
190
|
+
def __objecthash__(self):
|
|
191
|
+
return self.id # Hash instances by their id only
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
The return value of `__objecthash__` can be any Python value — a string, int, tuple, dict, or anything else `checkpointer` knows how to hash. This makes it easy to define a precise, stable identity for your objects without relying on pickle or attribute inspection.
|
|
195
|
+
|
|
196
|
+
Once defined, `__objecthash__` applies everywhere the class is used — as a function argument, captured variable, or nested value — with no need for `HashBy` annotations at each call site.
|
|
197
|
+
|
|
180
198
|
## 🎯 Capturing Global Variables
|
|
181
199
|
|
|
182
200
|
`checkpointer` can include **captured global variables** in call hashes - these are globals your function reads during execution that may affect results.
|
|
@@ -160,6 +160,24 @@ def process(
|
|
|
160
160
|
|
|
161
161
|
In this example, the hash for `numbers` ignores order, `data_file` is hashed based on its contents rather than path, and changes to `log` don't affect caching.
|
|
162
162
|
|
|
163
|
+
## 🔑 Custom Instance Hashing with `__objecthash__`
|
|
164
|
+
|
|
165
|
+
Any class can implement `__objecthash__` to control how its instances are hashed by `checkpointer`. When an instance is encountered during hashing, `checkpointer` calls `__objecthash__()` and hashes the returned value instead of inspecting the object's internals.
|
|
166
|
+
|
|
167
|
+
```python
|
|
168
|
+
class Model:
|
|
169
|
+
def __init__(self, id: str, weights: list[float]):
|
|
170
|
+
self.id = id
|
|
171
|
+
self.weights = weights
|
|
172
|
+
|
|
173
|
+
def __objecthash__(self):
|
|
174
|
+
return self.id # Hash instances by their id only
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
The return value of `__objecthash__` can be any Python value — a string, int, tuple, dict, or anything else `checkpointer` knows how to hash. This makes it easy to define a precise, stable identity for your objects without relying on pickle or attribute inspection.
|
|
178
|
+
|
|
179
|
+
Once defined, `__objecthash__` applies everywhere the class is used — as a function argument, captured variable, or nested value — with no need for `HashBy` annotations at each call site.
|
|
180
|
+
|
|
163
181
|
## 🎯 Capturing Global Variables
|
|
164
182
|
|
|
165
183
|
`checkpointer` can include **captured global variables** in call hashes - these are globals your function reads during execution that may affect results.
|
|
@@ -5,6 +5,7 @@ from datetime import datetime, timedelta
|
|
|
5
5
|
from functools import cached_property, update_wrapper
|
|
6
6
|
from inspect import Parameter, iscoroutine, signature, unwrap
|
|
7
7
|
from pathlib import Path
|
|
8
|
+
from types import MethodType
|
|
8
9
|
from typing import (
|
|
9
10
|
Callable, Concatenate, Coroutine, Generic, Iterable,
|
|
10
11
|
Literal, Self, Type, TypedDict, Unpack, overload,
|
|
@@ -96,10 +97,10 @@ class FunctionIdent:
|
|
|
96
97
|
@cached_property
|
|
97
98
|
def fn_hash(self) -> str:
|
|
98
99
|
if self.is_static():
|
|
99
|
-
return
|
|
100
|
+
return ObjectHash(self.checkpointer.fn_hash_from, digest_size=16).hexdigest()
|
|
100
101
|
depends = self.deep_idents(past_static=False)
|
|
101
102
|
deep_hashes = [d.fn_hash if d.is_static() else d.raw_ident.fn_hash for d in depends]
|
|
102
|
-
return
|
|
103
|
+
return ObjectHash(digest_size=16).write_text(iter=deep_hashes).hexdigest()
|
|
103
104
|
|
|
104
105
|
@cached_property
|
|
105
106
|
def capturables(self) -> list[Capturable]:
|
|
@@ -151,7 +152,7 @@ class CachedFunction(Generic[Fn]):
|
|
|
151
152
|
@property
|
|
152
153
|
def fn(self) -> Fn:
|
|
153
154
|
if self.bound:
|
|
154
|
-
return self.ident.fn
|
|
155
|
+
return MethodType(self.ident.fn, self.bound[0]) # type: ignore
|
|
155
156
|
return self.ident.fn # type: ignore
|
|
156
157
|
|
|
157
158
|
@cached_property
|
|
@@ -184,11 +185,11 @@ class CachedFunction(Generic[Fn]):
|
|
|
184
185
|
elif key == b"**":
|
|
185
186
|
for key in kw.keys() - ident.arg_names:
|
|
186
187
|
named_args[key] = hash_by(named_args[key])
|
|
187
|
-
|
|
188
|
+
return ObjectHash(digest_size=16) \
|
|
188
189
|
.update(header="NAMED", iter=flatten(sorted(named_args.items()))) \
|
|
189
190
|
.update(header="POS", iter=pos_args) \
|
|
190
|
-
.update(header="CAPTURED", iter=flatten(c.capture() for c in ident.capturables))
|
|
191
|
-
|
|
191
|
+
.update(header="CAPTURED", iter=flatten(c.capture() for c in ident.capturables)) \
|
|
192
|
+
.hexdigest()
|
|
192
193
|
|
|
193
194
|
def get_call_hash(self: CachedFunction[Callable[P, R]], *args: P.args, **kw: P.kwargs) -> str:
|
|
194
195
|
return self._get_call_hash(args, kw)
|
|
@@ -203,15 +204,13 @@ class CachedFunction(Generic[Fn]):
|
|
|
203
204
|
def is_expired(self, call_hash: str) -> bool:
|
|
204
205
|
return not self.storage.exists(call_hash) or self.storage.expired(call_hash)
|
|
205
206
|
|
|
206
|
-
def _call(self: CachedFunction[Callable[P, R]], args: tuple, kw: dict,
|
|
207
|
-
params = self.ident.checkpointer
|
|
208
|
-
if not cached:
|
|
209
|
-
return self.fn(*args, **kw)
|
|
207
|
+
def _call(self: CachedFunction[Callable[P, R]], args: tuple, kw: dict, rerun=False) -> R:
|
|
210
208
|
call_hash = self._get_call_hash(args, kw)
|
|
211
209
|
call_id = f"{self.storage.fn_id()}/{call_hash}"
|
|
210
|
+
verbosity = self.ident.checkpointer.verbosity
|
|
212
211
|
|
|
213
212
|
if rerun or self.is_expired(call_hash):
|
|
214
|
-
print_checkpoint(
|
|
213
|
+
print_checkpoint(verbosity >= 1, "MEMORIZING", call_id, "blue")
|
|
215
214
|
data = self.fn(*args, **kw)
|
|
216
215
|
if iscoroutine(data):
|
|
217
216
|
return self._store_coroutine(call_hash, data)
|
|
@@ -219,23 +218,22 @@ class CachedFunction(Generic[Fn]):
|
|
|
219
218
|
|
|
220
219
|
try:
|
|
221
220
|
data = self.storage.load(call_hash)
|
|
222
|
-
print_checkpoint(
|
|
221
|
+
print_checkpoint(verbosity >= 2, "REMEMBERED", call_id, "green")
|
|
223
222
|
if isinstance(data, AwaitableValue):
|
|
224
223
|
return to_coroutine(data.value) # type: ignore
|
|
225
224
|
return data
|
|
226
225
|
except (EOFError, FileNotFoundError):
|
|
227
226
|
pass
|
|
228
|
-
print_checkpoint(
|
|
227
|
+
print_checkpoint(verbosity >= 1, "CORRUPTED", call_id, "yellow")
|
|
229
228
|
return self._call(args, kw, rerun=True)
|
|
230
229
|
|
|
231
230
|
def __call__(self: CachedFunction[Callable[P, R]], *args: P.args, **kw: P.kwargs) -> R:
|
|
232
|
-
|
|
231
|
+
if not self.ident.checkpointer.when:
|
|
232
|
+
return self.fn(*args, **kw)
|
|
233
|
+
return self._call(args, kw)
|
|
233
234
|
|
|
234
235
|
def cached(self: CachedFunction[Callable[P, R]], *args: P.args, **kw: P.kwargs) -> R:
|
|
235
|
-
return self._call(args, kw
|
|
236
|
-
|
|
237
|
-
def uncached(self: CachedFunction[Callable[P, R]], *args: P.args, **kw: P.kwargs) -> R:
|
|
238
|
-
return self._call(args, kw, False)
|
|
236
|
+
return self._call(args, kw)
|
|
239
237
|
|
|
240
238
|
def rerun(self: CachedFunction[Callable[P, R]], *args: P.args, **kw: P.kwargs) -> R:
|
|
241
239
|
return self._call(args, kw, rerun=True)
|
|
@@ -68,6 +68,17 @@ class ObjectHash:
|
|
|
68
68
|
def __eq__(self, value: object) -> bool:
|
|
69
69
|
return isinstance(value, ObjectHash) and str(self) == str(value)
|
|
70
70
|
|
|
71
|
+
def __hash__(self) -> int:
|
|
72
|
+
self.update = self.write_bytes = self.write_text = self._update_immutable
|
|
73
|
+
return hash(str(self))
|
|
74
|
+
|
|
75
|
+
def _update_immutable(self, *_, **__) -> Self:
|
|
76
|
+
raise TypeError(
|
|
77
|
+
"This ObjectHash instance is now immutable and can’t accept more data. "
|
|
78
|
+
"You already called __hash__, which freezes it. "
|
|
79
|
+
"Create a new ObjectHash or use .copy() if you need to continue hashing."
|
|
80
|
+
)
|
|
81
|
+
|
|
71
82
|
def nested_hash(self, *objs: object) -> str:
|
|
72
83
|
return ObjectHash(iter=objs, tolerable=self.tolerable.value).hexdigest()
|
|
73
84
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|