checkpointer 2.14.6__tar.gz → 2.14.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {checkpointer-2.14.6 → checkpointer-2.14.8}/PKG-INFO +2 -1
- {checkpointer-2.14.6 → checkpointer-2.14.8}/checkpointer/checkpoint.py +25 -18
- {checkpointer-2.14.6 → checkpointer-2.14.8}/checkpointer/fn_ident.py +3 -2
- {checkpointer-2.14.6 → checkpointer-2.14.8}/checkpointer/object_hash.py +5 -7
- {checkpointer-2.14.6 → checkpointer-2.14.8}/checkpointer/storages/pickle_storage.py +10 -10
- {checkpointer-2.14.6 → checkpointer-2.14.8}/checkpointer/storages/storage.py +3 -3
- {checkpointer-2.14.6 → checkpointer-2.14.8}/checkpointer/types.py +1 -1
- {checkpointer-2.14.6 → checkpointer-2.14.8}/checkpointer/utils.py +8 -0
- {checkpointer-2.14.6 → checkpointer-2.14.8}/pyproject.toml +5 -4
- {checkpointer-2.14.6 → checkpointer-2.14.8}/uv.lock +272 -237
- {checkpointer-2.14.6 → checkpointer-2.14.8}/.gitignore +0 -0
- {checkpointer-2.14.6 → checkpointer-2.14.8}/.python-version +0 -0
- {checkpointer-2.14.6 → checkpointer-2.14.8}/ATTRIBUTION.md +0 -0
- {checkpointer-2.14.6 → checkpointer-2.14.8}/LICENSE +0 -0
- {checkpointer-2.14.6 → checkpointer-2.14.8}/README.md +0 -0
- {checkpointer-2.14.6 → checkpointer-2.14.8}/checkpointer/__init__.py +0 -0
- {checkpointer-2.14.6 → checkpointer-2.14.8}/checkpointer/fn_string.py +0 -0
- {checkpointer-2.14.6 → checkpointer-2.14.8}/checkpointer/import_mappings.py +0 -0
- {checkpointer-2.14.6 → checkpointer-2.14.8}/checkpointer/print_checkpoint.py +0 -0
- {checkpointer-2.14.6 → checkpointer-2.14.8}/checkpointer/storages/__init__.py +0 -0
- {checkpointer-2.14.6 → checkpointer-2.14.8}/checkpointer/storages/memory_storage.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: checkpointer
|
|
3
|
-
Version: 2.14.
|
|
3
|
+
Version: 2.14.8
|
|
4
4
|
Summary: checkpointer adds code-aware caching to Python functions, maintaining correctness and speeding up execution as your code changes.
|
|
5
5
|
Project-URL: Repository, https://github.com/Reddan/checkpointer.git
|
|
6
6
|
Author: Hampus Hallman
|
|
@@ -11,6 +11,7 @@ Keywords: async,cache,caching,data analysis,data processing,fast,hashing,invalid
|
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.11
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.12
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.13
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
14
15
|
Requires-Python: >=3.11
|
|
15
16
|
Description-Content-Type: text/markdown
|
|
16
17
|
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
+
import asyncio
|
|
2
3
|
import re
|
|
3
4
|
from datetime import datetime, timedelta
|
|
4
5
|
from functools import cached_property, update_wrapper
|
|
@@ -13,7 +14,7 @@ from .object_hash import ObjectHash
|
|
|
13
14
|
from .print_checkpoint import print_checkpoint
|
|
14
15
|
from .storages import STORAGE_MAP, Storage, StorageType
|
|
15
16
|
from .types import AwaitableValue, C, Coro, Fn, P, R, T, hash_by_from_annotation
|
|
16
|
-
from .utils import flatten, to_coroutine
|
|
17
|
+
from .utils import flatten, is_asyncio, to_coroutine
|
|
17
18
|
|
|
18
19
|
DEFAULT_DIR = Path.home() / ".cache/checkpoints"
|
|
19
20
|
|
|
@@ -161,7 +162,7 @@ class CachedFunction(Generic[Fn]):
|
|
|
161
162
|
def cleanup(self):
|
|
162
163
|
return self.storage.cleanup
|
|
163
164
|
|
|
164
|
-
def reinit(self, recursive=True) ->
|
|
165
|
+
def reinit(self, recursive=True) -> Self:
|
|
165
166
|
depend_idents = list(self.ident.deep_idents()) if recursive else [self.ident]
|
|
166
167
|
for ident in depend_idents: ident.reset()
|
|
167
168
|
for ident in depend_idents: ident.fn_hash
|
|
@@ -191,28 +192,32 @@ class CachedFunction(Generic[Fn]):
|
|
|
191
192
|
return self._get_call_hash(args, kw)
|
|
192
193
|
|
|
193
194
|
async def _store_coroutine(self, call_hash: str, coroutine: Coroutine):
|
|
194
|
-
|
|
195
|
+
if is_asyncio():
|
|
196
|
+
data = await asyncio.to_thread(self.storage.store, call_hash, AwaitableValue(await coroutine))
|
|
197
|
+
return data.value
|
|
198
|
+
else:
|
|
199
|
+
return self.storage.store(call_hash, AwaitableValue(await coroutine)).value
|
|
195
200
|
|
|
196
|
-
def
|
|
201
|
+
def is_expired(self, call_hash: str) -> bool:
|
|
202
|
+
return not self.storage.exists(call_hash) or self.storage.expired(call_hash)
|
|
203
|
+
|
|
204
|
+
def _call(self: CachedFunction[Callable[P, R]], args: tuple, kw: dict, rerun=False, cached=False) -> R:
|
|
197
205
|
full_args = self.bound + args
|
|
198
206
|
params = self.ident.checkpointer
|
|
199
|
-
|
|
200
|
-
if not params.when:
|
|
207
|
+
if not params.when and not cached:
|
|
201
208
|
return self.fn(*full_args, **kw)
|
|
202
|
-
|
|
203
209
|
call_hash = self._get_call_hash(args, kw)
|
|
204
|
-
call_id = f"{storage.fn_id()}/{call_hash}"
|
|
205
|
-
refresh = rerun or not storage.exists(call_hash) or storage.expired(call_hash)
|
|
210
|
+
call_id = f"{self.storage.fn_id()}/{call_hash}"
|
|
206
211
|
|
|
207
|
-
if
|
|
212
|
+
if rerun or self.is_expired(call_hash):
|
|
208
213
|
print_checkpoint(params.verbosity >= 1, "MEMORIZING", call_id, "blue")
|
|
209
214
|
data = self.fn(*full_args, **kw)
|
|
210
215
|
if iscoroutine(data):
|
|
211
216
|
return self._store_coroutine(call_hash, data)
|
|
212
|
-
return storage.store(call_hash, data)
|
|
217
|
+
return self.storage.store(call_hash, data)
|
|
213
218
|
|
|
214
219
|
try:
|
|
215
|
-
data = storage.load(call_hash)
|
|
220
|
+
data = self.storage.load(call_hash)
|
|
216
221
|
print_checkpoint(params.verbosity >= 2, "REMEMBERED", call_id, "green")
|
|
217
222
|
if isinstance(data, AwaitableValue):
|
|
218
223
|
return to_coroutine(data.value) # type: ignore
|
|
@@ -220,18 +225,21 @@ class CachedFunction(Generic[Fn]):
|
|
|
220
225
|
except (EOFError, FileNotFoundError):
|
|
221
226
|
pass
|
|
222
227
|
print_checkpoint(params.verbosity >= 1, "CORRUPTED", call_id, "yellow")
|
|
223
|
-
return self._call(args, kw, True)
|
|
228
|
+
return self._call(args, kw, True, cached)
|
|
224
229
|
|
|
225
230
|
def __call__(self: CachedFunction[Callable[P, R]], *args: P.args, **kw: P.kwargs) -> R:
|
|
226
231
|
return self._call(args, kw)
|
|
227
232
|
|
|
233
|
+
def cached(self: CachedFunction[Callable[P, R]], *args: P.args, **kw: P.kwargs) -> R:
|
|
234
|
+
return self._call(args, kw, False, True)
|
|
235
|
+
|
|
228
236
|
def rerun(self: CachedFunction[Callable[P, R]], *args: P.args, **kw: P.kwargs) -> R:
|
|
229
|
-
return self._call(args, kw, True)
|
|
237
|
+
return self._call(args, kw, True, True)
|
|
230
238
|
|
|
231
239
|
def exists(self: CachedFunction[Callable[P, R]], *args: P.args, **kw: P.kwargs) -> bool:
|
|
232
240
|
return self.storage.exists(self._get_call_hash(args, kw))
|
|
233
241
|
|
|
234
|
-
def delete(self: CachedFunction[Callable[P, R]], *args: P.args, **kw: P.kwargs):
|
|
242
|
+
def delete(self: CachedFunction[Callable[P, R]], *args: P.args, **kw: P.kwargs) -> None:
|
|
235
243
|
self.storage.delete(self._get_call_hash(args, kw))
|
|
236
244
|
|
|
237
245
|
@overload
|
|
@@ -267,6 +275,5 @@ class CachedFunction(Generic[Fn]):
|
|
|
267
275
|
self.set(AwaitableValue(value), *args, **kw)
|
|
268
276
|
|
|
269
277
|
def __repr__(self) -> str:
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
return f"<CachedFunction {self.fn.__name__} {fn_hash}>"
|
|
278
|
+
ident = self.ident.__dict__.get("fn_hash", "")[:6] or "- uninitialized"
|
|
279
|
+
return f"<CachedFunction {self.fn.__name__} {ident}>"
|
|
@@ -69,9 +69,10 @@ def extract_scope_values(code: CodeType, scope_vars: dict) -> Iterable[tuple[Att
|
|
|
69
69
|
scope_vars = {**scope_vars, **{k: {**scope_vars[k], **v} for k, v in classvars.items()}}
|
|
70
70
|
instructs = seekable(dis.get_instructions(code))
|
|
71
71
|
for instruct in instructs:
|
|
72
|
-
|
|
72
|
+
opname = instruct.opname.replace("LOAD_FAST_BORROW", "LOAD_FAST")
|
|
73
|
+
if opname in scope_vars:
|
|
73
74
|
attrs = takewhile((x.opname in ("LOAD_ATTR", "LOAD_METHOD"), x.argval) for x in instructs)
|
|
74
|
-
attr_path = AttrPath((
|
|
75
|
+
attr_path = AttrPath((opname, instruct.argval, *attrs))
|
|
75
76
|
parent_path = attr_path[:-1]
|
|
76
77
|
instructs.step(-1)
|
|
77
78
|
obj = get_at(scope_vars, *attr_path)
|
|
@@ -1,17 +1,15 @@
|
|
|
1
1
|
import ctypes
|
|
2
2
|
import hashlib
|
|
3
|
-
import inspect
|
|
4
|
-
import io
|
|
5
3
|
import re
|
|
6
4
|
import sys
|
|
7
|
-
import tokenize
|
|
8
5
|
import sysconfig
|
|
6
|
+
import tokenize
|
|
9
7
|
from collections import OrderedDict
|
|
10
8
|
from collections.abc import Iterable
|
|
11
9
|
from contextlib import nullcontext, suppress
|
|
12
10
|
from decimal import Decimal
|
|
13
|
-
from
|
|
14
|
-
from
|
|
11
|
+
from inspect import getfile, getsource
|
|
12
|
+
from io import BufferedRandom, BufferedReader, BufferedWriter, FileIO, StringIO, TextIOWrapper
|
|
15
13
|
from itertools import chain
|
|
16
14
|
from pathlib import Path
|
|
17
15
|
from pickle import HIGHEST_PROTOCOL as PICKLE_PROTOCOL
|
|
@@ -146,7 +144,7 @@ class ObjectHash:
|
|
|
146
144
|
case GeneratorType():
|
|
147
145
|
self.header("generator", obj.__qualname__)._update_iterator(obj)
|
|
148
146
|
|
|
149
|
-
case
|
|
147
|
+
case TextIOWrapper() | FileIO() | BufferedRandom() | BufferedWriter() | BufferedReader():
|
|
150
148
|
self.header("file", encode_type_of(obj)).update(obj.name, obj.mode, obj.tell())
|
|
151
149
|
|
|
152
150
|
case type():
|
|
@@ -228,7 +226,7 @@ class ObjectHash:
|
|
|
228
226
|
|
|
229
227
|
def get_fn_body(fn: Callable) -> str:
|
|
230
228
|
try:
|
|
231
|
-
source =
|
|
229
|
+
source = getsource(fn)
|
|
232
230
|
except OSError:
|
|
233
231
|
return ""
|
|
234
232
|
tokens = tokenize.generate_tokens(StringIO(source).readline)
|
|
@@ -8,7 +8,7 @@ from .storage import Storage
|
|
|
8
8
|
|
|
9
9
|
try:
|
|
10
10
|
import polars as pl
|
|
11
|
-
except:
|
|
11
|
+
except Exception:
|
|
12
12
|
pl = None
|
|
13
13
|
|
|
14
14
|
def filedate(path: Path) -> datetime:
|
|
@@ -51,18 +51,18 @@ class PickleStorage(Storage):
|
|
|
51
51
|
version_path = self.fn_dir()
|
|
52
52
|
fn_path = version_path.parent
|
|
53
53
|
if invalidated and fn_path.exists():
|
|
54
|
-
|
|
55
|
-
for path in
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
if
|
|
59
|
-
print(f"Removed {len(
|
|
54
|
+
invalidated_dirs = [path for path in fn_path.iterdir() if path.is_dir() and path != version_path]
|
|
55
|
+
pkls = [pkl for path in invalidated_dirs for pkl in path.glob("**/*.pkl")]
|
|
56
|
+
for pkl in pkls:
|
|
57
|
+
pkl.unlink(missing_ok=True)
|
|
58
|
+
if pkls:
|
|
59
|
+
print(f"Removed {len(pkls)} checkpoints from {len(invalidated_dirs)} invalidated directories for {self.cached_fn.__qualname__}")
|
|
60
60
|
if expired and self.checkpointer.expiry:
|
|
61
61
|
count = 0
|
|
62
|
-
for
|
|
63
|
-
if self.expired_dt(filedate(
|
|
62
|
+
for pkl in fn_path.glob("**/*.pkl"):
|
|
63
|
+
if self.expired_dt(filedate(pkl)):
|
|
64
64
|
count += 1
|
|
65
|
-
|
|
65
|
+
pkl.unlink(missing_ok=True)
|
|
66
66
|
if count:
|
|
67
67
|
print(f"Removed {count} expired checkpoints for {self.cached_fn.__qualname__}")
|
|
68
68
|
clear_directory(fn_path)
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
from datetime import datetime, timedelta
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import TYPE_CHECKING, Any
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Protocol
|
|
5
5
|
|
|
6
6
|
if TYPE_CHECKING:
|
|
7
7
|
from ..checkpoint import CachedFunction, Checkpointer
|
|
8
8
|
|
|
9
|
-
class Storage:
|
|
9
|
+
class Storage(Protocol):
|
|
10
10
|
checkpointer: Checkpointer
|
|
11
|
-
|
|
11
|
+
cached_fn: CachedFunction
|
|
12
12
|
|
|
13
13
|
def __init__(self, cached_fn: CachedFunction):
|
|
14
14
|
self.checkpointer = cached_fn.ident.checkpointer
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
+
import asyncio
|
|
2
3
|
import inspect
|
|
3
4
|
from contextlib import contextmanager, suppress
|
|
4
5
|
from itertools import chain, islice
|
|
@@ -10,6 +11,13 @@ from .types import T
|
|
|
10
11
|
cwd = Path.cwd().resolve()
|
|
11
12
|
flatten = chain.from_iterable
|
|
12
13
|
|
|
14
|
+
def is_asyncio() -> bool:
|
|
15
|
+
try:
|
|
16
|
+
loop = asyncio.get_running_loop()
|
|
17
|
+
return isinstance(loop, asyncio.AbstractEventLoop)
|
|
18
|
+
except Exception:
|
|
19
|
+
return False
|
|
20
|
+
|
|
13
21
|
async def to_coroutine(value: T) -> T:
|
|
14
22
|
return value
|
|
15
23
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "checkpointer"
|
|
3
|
-
version = "2.14.
|
|
3
|
+
version = "2.14.8"
|
|
4
4
|
requires-python = ">=3.11"
|
|
5
5
|
dependencies = []
|
|
6
6
|
authors = [
|
|
@@ -14,6 +14,7 @@ classifiers = [
|
|
|
14
14
|
"Programming Language :: Python :: 3.11",
|
|
15
15
|
"Programming Language :: Python :: 3.12",
|
|
16
16
|
"Programming Language :: Python :: 3.13",
|
|
17
|
+
"Programming Language :: Python :: 3.14",
|
|
17
18
|
]
|
|
18
19
|
keywords = [
|
|
19
20
|
"data processing",
|
|
@@ -39,10 +40,10 @@ dev = [
|
|
|
39
40
|
"omg>=1.3.9",
|
|
40
41
|
"poethepoet>=0.30.0",
|
|
41
42
|
"polars>=1.31.0",
|
|
42
|
-
"pytest>=
|
|
43
|
-
"pytest-asyncio>=
|
|
43
|
+
"pytest>=9.0.0",
|
|
44
|
+
"pytest-asyncio>=1.3.0",
|
|
44
45
|
"rich>=14.0.0",
|
|
45
|
-
"torch>=2.
|
|
46
|
+
"torch>=2.9.1",
|
|
46
47
|
]
|
|
47
48
|
|
|
48
49
|
[tool.poe.tasks]
|