checkpointer 2.0.1__py3-none-any.whl → 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checkpointer/__init__.py +2 -1
- checkpointer/checkpoint.py +33 -29
- checkpointer/function_body.py +68 -34
- checkpointer/storages/__init__.py +11 -0
- checkpointer/storages/bcolz_storage.py +7 -12
- checkpointer/storages/memory_storage.py +16 -19
- checkpointer/storages/pickle_storage.py +5 -10
- checkpointer/types.py +16 -12
- checkpointer/utils.py +41 -6
- checkpointer-2.1.0.dist-info/METADATA +248 -0
- checkpointer-2.1.0.dist-info/RECORD +14 -0
- checkpointer-2.0.1.dist-info/METADATA +0 -270
- checkpointer-2.0.1.dist-info/RECORD +0 -13
- {checkpointer-2.0.1.dist-info → checkpointer-2.1.0.dist-info}/WHEEL +0 -0
- {checkpointer-2.0.1.dist-info → checkpointer-2.1.0.dist-info}/licenses/LICENSE +0 -0
checkpointer/__init__.py
CHANGED
@@ -5,5 +5,6 @@ import tempfile
|
|
5
5
|
|
6
6
|
create_checkpointer = Checkpointer
|
7
7
|
checkpoint = Checkpointer()
|
8
|
-
|
8
|
+
capture_checkpoint = Checkpointer(capture=True)
|
9
|
+
memory_checkpoint = Checkpointer(format="memory", verbosity=0)
|
9
10
|
tmp_checkpoint = Checkpointer(root_path=tempfile.gettempdir() + "/checkpoints")
|
checkpointer/checkpoint.py
CHANGED
@@ -1,32 +1,31 @@
|
|
1
|
+
from __future__ import annotations
|
1
2
|
import inspect
|
2
3
|
import relib.hashing as hashing
|
3
|
-
from typing import Generic, TypeVar, TypedDict, Callable, Unpack, Literal,
|
4
|
-
from datetime import datetime
|
4
|
+
from typing import Generic, TypeVar, Type, TypedDict, Callable, Unpack, Literal, Any, cast, overload
|
5
5
|
from pathlib import Path
|
6
|
+
from datetime import datetime
|
6
7
|
from functools import update_wrapper
|
7
8
|
from .types import Storage
|
8
9
|
from .function_body import get_function_hash
|
9
|
-
from .utils import unwrap_fn, sync_resolve_coroutine
|
10
|
-
from .storages
|
11
|
-
from .storages.memory_storage import MemoryStorage
|
12
|
-
from .storages.bcolz_storage import BcolzStorage
|
10
|
+
from .utils import unwrap_fn, sync_resolve_coroutine, resolved_awaitable
|
11
|
+
from .storages import STORAGE_MAP
|
13
12
|
from .print_checkpoint import print_checkpoint
|
14
13
|
|
15
14
|
Fn = TypeVar("Fn", bound=Callable)
|
16
15
|
|
17
16
|
DEFAULT_DIR = Path.home() / ".cache/checkpoints"
|
18
|
-
STORAGE_MAP = {"memory": MemoryStorage, "pickle": PickleStorage, "bcolz": BcolzStorage}
|
19
17
|
|
20
18
|
class CheckpointError(Exception):
|
21
19
|
pass
|
22
20
|
|
23
21
|
class CheckpointerOpts(TypedDict, total=False):
|
24
|
-
format: Storage | Literal["pickle", "memory", "bcolz"]
|
22
|
+
format: Type[Storage] | Literal["pickle", "memory", "bcolz"]
|
25
23
|
root_path: Path | str | None
|
26
24
|
when: bool
|
27
25
|
verbosity: Literal[0, 1]
|
28
26
|
path: Callable[..., str] | None
|
29
27
|
should_expire: Callable[[datetime], bool] | None
|
28
|
+
capture: bool
|
30
29
|
|
31
30
|
class Checkpointer:
|
32
31
|
def __init__(self, **opts: Unpack[CheckpointerOpts]):
|
@@ -36,15 +35,13 @@ class Checkpointer:
|
|
36
35
|
self.verbosity = opts.get("verbosity", 1)
|
37
36
|
self.path = opts.get("path")
|
38
37
|
self.should_expire = opts.get("should_expire")
|
39
|
-
|
40
|
-
def get_storage(self) -> Storage:
|
41
|
-
return STORAGE_MAP[self.format] if isinstance(self.format, str) else self.format
|
38
|
+
self.capture = opts.get("capture", False)
|
42
39
|
|
43
40
|
@overload
|
44
|
-
def __call__(self, fn: Fn, **override_opts: Unpack[CheckpointerOpts]) ->
|
41
|
+
def __call__(self, fn: Fn, **override_opts: Unpack[CheckpointerOpts]) -> CheckpointFn[Fn]: ...
|
45
42
|
@overload
|
46
|
-
def __call__(self, fn: None=None, **override_opts: Unpack[CheckpointerOpts]) ->
|
47
|
-
def __call__(self, fn: Fn | None=None, **override_opts: Unpack[CheckpointerOpts]) ->
|
43
|
+
def __call__(self, fn: None=None, **override_opts: Unpack[CheckpointerOpts]) -> Checkpointer: ...
|
44
|
+
def __call__(self, fn: Fn | None=None, **override_opts: Unpack[CheckpointerOpts]) -> Checkpointer | CheckpointFn[Fn]:
|
48
45
|
if override_opts:
|
49
46
|
opts = CheckpointerOpts(**{**self.__dict__, **override_opts})
|
50
47
|
return Checkpointer(**opts)(fn)
|
@@ -56,15 +53,19 @@ class CheckpointFn(Generic[Fn]):
|
|
56
53
|
wrapped = unwrap_fn(fn)
|
57
54
|
file_name = Path(wrapped.__code__.co_filename).name
|
58
55
|
update_wrapper(cast(Callable, self), wrapped)
|
56
|
+
storage = STORAGE_MAP[checkpointer.format] if isinstance(checkpointer.format, str) else checkpointer.format
|
59
57
|
self.checkpointer = checkpointer
|
60
58
|
self.fn = fn
|
61
|
-
self.fn_hash = get_function_hash(wrapped)
|
59
|
+
self.fn_hash, self.depends = get_function_hash(wrapped, self.checkpointer.capture)
|
62
60
|
self.fn_id = f"{file_name}/{wrapped.__name__}"
|
63
61
|
self.is_async = inspect.iscoroutinefunction(wrapped)
|
62
|
+
self.storage = storage(checkpointer)
|
64
63
|
|
65
64
|
def get_checkpoint_id(self, args: tuple, kw: dict) -> str:
|
66
65
|
if not callable(self.checkpointer.path):
|
67
|
-
|
66
|
+
# TODO: use digest size before digesting instead of truncating the hash
|
67
|
+
call_hash = hashing.hash((self.fn_hash, args, kw), "blake2b")[:32]
|
68
|
+
return f"{self.fn_id}/{call_hash}"
|
68
69
|
checkpoint_id = self.checkpointer.path(*args, **kw)
|
69
70
|
if not isinstance(checkpoint_id, str):
|
70
71
|
raise CheckpointError(f"path function must return a string, got {type(checkpoint_id)}")
|
@@ -73,27 +74,26 @@ class CheckpointFn(Generic[Fn]):
|
|
73
74
|
async def _store_on_demand(self, args: tuple, kw: dict, rerun: bool):
|
74
75
|
checkpoint_id = self.get_checkpoint_id(args, kw)
|
75
76
|
checkpoint_path = self.checkpointer.root_path / checkpoint_id
|
76
|
-
|
77
|
-
should_log = storage is not MemoryStorage and self.checkpointer.verbosity > 0
|
77
|
+
should_log = self.checkpointer.verbosity > 0
|
78
78
|
refresh = rerun \
|
79
|
-
or not storage.exists(checkpoint_path) \
|
80
|
-
or (self.checkpointer.should_expire and self.checkpointer.should_expire(storage.checkpoint_date(checkpoint_path)))
|
79
|
+
or not self.storage.exists(checkpoint_path) \
|
80
|
+
or (self.checkpointer.should_expire and self.checkpointer.should_expire(self.storage.checkpoint_date(checkpoint_path)))
|
81
81
|
|
82
82
|
if refresh:
|
83
83
|
print_checkpoint(should_log, "MEMORIZING", checkpoint_id, "blue")
|
84
84
|
data = self.fn(*args, **kw)
|
85
85
|
if inspect.iscoroutine(data):
|
86
86
|
data = await data
|
87
|
-
storage.store(checkpoint_path, data)
|
87
|
+
self.storage.store(checkpoint_path, data)
|
88
88
|
return data
|
89
89
|
|
90
90
|
try:
|
91
|
-
data = storage.load(checkpoint_path)
|
91
|
+
data = self.storage.load(checkpoint_path)
|
92
92
|
print_checkpoint(should_log, "REMEMBERED", checkpoint_id, "green")
|
93
93
|
return data
|
94
94
|
except (EOFError, FileNotFoundError):
|
95
95
|
print_checkpoint(should_log, "CORRUPTED", checkpoint_id, "yellow")
|
96
|
-
storage.delete(checkpoint_path)
|
96
|
+
self.storage.delete(checkpoint_path)
|
97
97
|
return await self._store_on_demand(args, kw, rerun)
|
98
98
|
|
99
99
|
def _call(self, args: tuple, kw: dict, rerun=False):
|
@@ -102,13 +102,17 @@ class CheckpointFn(Generic[Fn]):
|
|
102
102
|
coroutine = self._store_on_demand(args, kw, rerun)
|
103
103
|
return coroutine if self.is_async else sync_resolve_coroutine(coroutine)
|
104
104
|
|
105
|
-
|
106
|
-
rerun: Fn = cast(Fn, lambda self, *args, **kw: self._call(args, kw, True))
|
107
|
-
|
108
|
-
def get(self, *args, **kw) -> Any:
|
105
|
+
def _get(self, args, kw) -> Any:
|
109
106
|
checkpoint_path = self.checkpointer.root_path / self.get_checkpoint_id(args, kw)
|
110
|
-
storage = self.checkpointer.get_storage()
|
111
107
|
try:
|
112
|
-
|
108
|
+
val = self.storage.load(checkpoint_path)
|
109
|
+
return resolved_awaitable(val) if self.is_async else val
|
113
110
|
except:
|
114
111
|
raise CheckpointError("Could not load checkpoint")
|
112
|
+
|
113
|
+
def exists(self, *args: tuple, **kw: dict) -> bool:
|
114
|
+
return self.storage.exists(self.checkpointer.root_path / self.get_checkpoint_id(args, kw))
|
115
|
+
|
116
|
+
__call__: Fn = cast(Fn, lambda self, *args, **kw: self._call(args, kw))
|
117
|
+
rerun: Fn = cast(Fn, lambda self, *args, **kw: self._call(args, kw, True))
|
118
|
+
get: Fn = cast(Fn, lambda self, *args, **kw: self._get(args, kw))
|
checkpointer/function_body.py
CHANGED
@@ -1,46 +1,80 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
import dis
|
1
3
|
import inspect
|
2
|
-
import
|
4
|
+
import tokenize
|
5
|
+
from io import StringIO
|
3
6
|
from collections.abc import Callable
|
4
|
-
from
|
7
|
+
from itertools import chain, takewhile
|
8
|
+
from operator import itemgetter
|
5
9
|
from pathlib import Path
|
6
|
-
from
|
10
|
+
from typing import Any, TypeGuard, TYPE_CHECKING
|
11
|
+
from types import CodeType, FunctionType
|
12
|
+
from relib import transpose, hashing, merge_dicts, drop_none
|
13
|
+
from .utils import unwrap_fn, iterate_and_upcoming, get_cell_contents, AttrDict, get_at_attr
|
14
|
+
|
15
|
+
if TYPE_CHECKING:
|
16
|
+
from .checkpoint import CheckpointFn
|
7
17
|
|
8
18
|
cwd = Path.cwd()
|
9
19
|
|
10
|
-
def
|
11
|
-
|
20
|
+
def extract_scope_values(code: CodeType, scope_vars: dict[str, Any], closure = False) -> dict[tuple[str, ...], Any]:
|
21
|
+
opname = "LOAD_GLOBAL" if not closure else "LOAD_DEREF"
|
22
|
+
scope_values_by_path: dict[tuple[str, ...], Any] = {}
|
23
|
+
instructions = list(dis.get_instructions(code))
|
24
|
+
|
25
|
+
for instr, upcoming_instrs in iterate_and_upcoming(instructions):
|
26
|
+
if instr.opname == opname:
|
27
|
+
name = instr.argval
|
28
|
+
attrs = takewhile(lambda instr: instr.opname == "LOAD_ATTR", upcoming_instrs)
|
29
|
+
attr_path = (name, *(instr.argval for instr in attrs))
|
30
|
+
scope_values_by_path[attr_path] = get_at_attr(scope_vars, attr_path)
|
31
|
+
|
32
|
+
children = (extract_scope_values(const, scope_vars, closure) for const in code.co_consts if isinstance(const, CodeType))
|
33
|
+
return merge_dicts(scope_values_by_path, *children)
|
12
34
|
|
13
|
-
def
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
return
|
35
|
+
def get_fn_captured_vals(fn: Callable) -> list[Any]:
|
36
|
+
closure_scope = {k: get_cell_contents(v) for k, v in zip(fn.__code__.co_freevars, fn.__closure__ or [])}
|
37
|
+
global_vals = extract_scope_values(fn.__code__, AttrDict(fn.__globals__), closure=False)
|
38
|
+
closure_vals = extract_scope_values(fn.__code__, AttrDict(closure_scope), closure=True)
|
39
|
+
sorted_items = chain(sorted(global_vals.items()), sorted(closure_vals.items()))
|
40
|
+
return drop_none(map(itemgetter(1), sorted_items))
|
19
41
|
|
20
|
-
def
|
21
|
-
|
22
|
-
|
23
|
-
|
42
|
+
def get_fn_body(fn: Callable) -> str:
|
43
|
+
source = "".join(inspect.getsourcelines(fn)[0])
|
44
|
+
tokens = tokenize.generate_tokens(StringIO(source).readline)
|
45
|
+
ignore_types = (tokenize.COMMENT, tokenize.NL)
|
46
|
+
return "".join("\0" + token.string for token in tokens if token.type not in ignore_types)
|
24
47
|
|
25
|
-
def
|
48
|
+
def get_fn_path(fn: Callable) -> Path:
|
49
|
+
return Path(inspect.getfile(fn)).resolve()
|
50
|
+
|
51
|
+
def is_user_fn(candidate_fn) -> TypeGuard[Callable]:
|
26
52
|
return isinstance(candidate_fn, FunctionType) \
|
27
|
-
and candidate_fn not in cleared_fns \
|
28
53
|
and cwd in get_fn_path(candidate_fn).parents
|
29
54
|
|
30
|
-
def
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
for
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
return
|
55
|
+
def append_fn_depends(checkpoint_fns: set[CheckpointFn], captured_vals_by_fn: dict[Callable, list[Any]], fn: Callable, capture: bool) -> None:
|
56
|
+
from .checkpoint import CheckpointFn
|
57
|
+
captured_vals = get_fn_captured_vals(fn)
|
58
|
+
captured_vals_by_fn[fn] = [v for v in captured_vals if capture and not callable(v)]
|
59
|
+
callables = [unwrap_fn(val, checkpoint_fn=True) for val in captured_vals if callable(val)]
|
60
|
+
depends = {val for val in callables if is_user_fn(val)}
|
61
|
+
checkpoint_fns.update({val for val in callables if isinstance(val, CheckpointFn)})
|
62
|
+
not_appended = depends - captured_vals_by_fn.keys()
|
63
|
+
captured_vals_by_fn.update({fn: [] for fn in not_appended})
|
64
|
+
for child_fn in not_appended:
|
65
|
+
append_fn_depends(checkpoint_fns, captured_vals_by_fn, child_fn, capture)
|
66
|
+
|
67
|
+
def get_depend_fns(fn: Callable, capture: bool) -> tuple[set[CheckpointFn], dict[Callable, list[Any]]]:
|
68
|
+
checkpoint_fns: set[CheckpointFn] = set()
|
69
|
+
captured_vals_by_fn: dict[Callable, list[Any]] = {}
|
70
|
+
append_fn_depends(checkpoint_fns, captured_vals_by_fn, fn, capture)
|
71
|
+
return checkpoint_fns, captured_vals_by_fn
|
72
|
+
|
73
|
+
def get_function_hash(fn: Callable, capture: bool) -> tuple[str, list[Callable]]:
|
74
|
+
checkpoint_fns, captured_vals_by_fn = get_depend_fns(fn, capture)
|
75
|
+
checkpoint_fns = sorted(checkpoint_fns, key=lambda fn: unwrap_fn(fn).__qualname__)
|
76
|
+
checkpoint_hashes = [check.fn_hash for check in checkpoint_fns]
|
77
|
+
depend_fns, depend_captured_vals = transpose(sorted(captured_vals_by_fn.items(), key=lambda x: x[0].__qualname__), 2)
|
78
|
+
fn_bodies = list(map(get_fn_body, [fn] + depend_fns))
|
79
|
+
fn_hash = hashing.hash((fn_bodies, depend_captured_vals, checkpoint_hashes), "blake2b")
|
80
|
+
return fn_hash, checkpoint_fns + depend_fns
|
@@ -0,0 +1,11 @@
|
|
1
|
+
from typing import Type
|
2
|
+
from ..types import Storage
|
3
|
+
from .pickle_storage import PickleStorage
|
4
|
+
from .memory_storage import MemoryStorage
|
5
|
+
from .bcolz_storage import BcolzStorage
|
6
|
+
|
7
|
+
STORAGE_MAP: dict[str, Type[Storage]] = {
|
8
|
+
"pickle": PickleStorage,
|
9
|
+
"memory": MemoryStorage,
|
10
|
+
"bcolz": BcolzStorage,
|
11
|
+
}
|
@@ -24,16 +24,13 @@ def insert_data(path: Path, data):
|
|
24
24
|
c.flush()
|
25
25
|
|
26
26
|
class BcolzStorage(Storage):
|
27
|
-
|
28
|
-
def exists(path):
|
27
|
+
def exists(self, path):
|
29
28
|
return path.exists()
|
30
29
|
|
31
|
-
|
32
|
-
def checkpoint_date(path):
|
30
|
+
def checkpoint_date(self, path):
|
33
31
|
return datetime.fromtimestamp(path.stat().st_mtime)
|
34
32
|
|
35
|
-
|
36
|
-
def store(path, data):
|
33
|
+
def store(self, path, data):
|
37
34
|
metapath = get_metapath(path)
|
38
35
|
path.parent.mkdir(parents=True, exist_ok=True)
|
39
36
|
data_type_str = get_data_type_str(data)
|
@@ -48,12 +45,11 @@ class BcolzStorage(Storage):
|
|
48
45
|
if data_type_str in ["tuple", "dict"]:
|
49
46
|
for i in range(len(fields)):
|
50
47
|
child_path = Path(f"{path} ({i})")
|
51
|
-
|
48
|
+
self.store(child_path, data[fields[i]])
|
52
49
|
else:
|
53
50
|
insert_data(path, data)
|
54
51
|
|
55
|
-
|
56
|
-
def load(path):
|
52
|
+
def load(self, path):
|
57
53
|
import bcolz
|
58
54
|
metapath = get_metapath(path)
|
59
55
|
meta_data = bcolz.open(metapath)[:][0]
|
@@ -61,7 +57,7 @@ class BcolzStorage(Storage):
|
|
61
57
|
if data_type_str in ["tuple", "dict"]:
|
62
58
|
fields = meta_data["fields"]
|
63
59
|
partitions = range(len(fields))
|
64
|
-
data = [
|
60
|
+
data = [self.load(Path(f"{path} ({i})")) for i in partitions]
|
65
61
|
if data_type_str == "tuple":
|
66
62
|
return tuple(data)
|
67
63
|
else:
|
@@ -75,8 +71,7 @@ class BcolzStorage(Storage):
|
|
75
71
|
else:
|
76
72
|
return data[:]
|
77
73
|
|
78
|
-
|
79
|
-
def delete(path):
|
74
|
+
def delete(self, path):
|
80
75
|
# NOTE: Not recursive
|
81
76
|
metapath = get_metapath(path)
|
82
77
|
try:
|
@@ -1,28 +1,25 @@
|
|
1
|
+
from typing import Any
|
2
|
+
from pathlib import Path
|
1
3
|
from datetime import datetime
|
2
4
|
from ..types import Storage
|
3
5
|
|
4
|
-
|
5
|
-
date_stored = {}
|
6
|
+
item_map: dict[str, tuple[datetime, Any]] = {}
|
6
7
|
|
7
8
|
class MemoryStorage(Storage):
|
8
|
-
|
9
|
-
|
10
|
-
return str(path) in store
|
9
|
+
def get_short_path(self, path: Path):
|
10
|
+
return str(path.relative_to(self.checkpointer.root_path))
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
return date_stored[str(path)]
|
12
|
+
def exists(self, path):
|
13
|
+
return self.get_short_path(path) in item_map
|
15
14
|
|
16
|
-
|
17
|
-
|
18
|
-
store[str(path)] = data
|
19
|
-
date_stored[str(path)] = datetime.now()
|
15
|
+
def checkpoint_date(self, path):
|
16
|
+
return item_map[self.get_short_path(path)][0]
|
20
17
|
|
21
|
-
|
22
|
-
|
23
|
-
return store[str(path)]
|
18
|
+
def store(self, path, data):
|
19
|
+
item_map[self.get_short_path(path)] = (datetime.now(), data)
|
24
20
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
21
|
+
def load(self, path):
|
22
|
+
return item_map[self.get_short_path(path)][1]
|
23
|
+
|
24
|
+
def delete(self, path):
|
25
|
+
del item_map[self.get_short_path(path)]
|
@@ -7,29 +7,24 @@ def get_path(path: Path):
|
|
7
7
|
return path.with_name(f"{path.name}.pkl")
|
8
8
|
|
9
9
|
class PickleStorage(Storage):
|
10
|
-
|
11
|
-
def exists(path):
|
10
|
+
def exists(self, path):
|
12
11
|
return get_path(path).exists()
|
13
12
|
|
14
|
-
|
15
|
-
def checkpoint_date(path):
|
13
|
+
def checkpoint_date(self, path):
|
16
14
|
return datetime.fromtimestamp(get_path(path).stat().st_mtime)
|
17
15
|
|
18
|
-
|
19
|
-
def store(path, data):
|
16
|
+
def store(self, path, data):
|
20
17
|
full_path = get_path(path)
|
21
18
|
full_path.parent.mkdir(parents=True, exist_ok=True)
|
22
19
|
with full_path.open("wb") as file:
|
23
20
|
pickle.dump(data, file, -1)
|
24
21
|
|
25
|
-
|
26
|
-
def load(path):
|
22
|
+
def load(self, path):
|
27
23
|
full_path = get_path(path)
|
28
24
|
with full_path.open("rb") as file:
|
29
25
|
return pickle.load(file)
|
30
26
|
|
31
|
-
|
32
|
-
def delete(path):
|
27
|
+
def delete(self, path):
|
33
28
|
try:
|
34
29
|
get_path(path).unlink()
|
35
30
|
except FileNotFoundError:
|
checkpointer/types.py
CHANGED
@@ -1,19 +1,23 @@
|
|
1
|
-
from
|
1
|
+
from __future__ import annotations
|
2
|
+
from typing import Any, TYPE_CHECKING
|
2
3
|
from pathlib import Path
|
3
4
|
from datetime import datetime
|
4
5
|
|
5
|
-
|
6
|
-
|
7
|
-
def exists(path: Path) -> bool: ...
|
6
|
+
if TYPE_CHECKING:
|
7
|
+
from .checkpoint import Checkpointer
|
8
8
|
|
9
|
-
|
10
|
-
|
9
|
+
class Storage:
|
10
|
+
checkpointer: Checkpointer
|
11
11
|
|
12
|
-
|
13
|
-
|
12
|
+
def __init__(self, checkpointer: Checkpointer):
|
13
|
+
self.checkpointer = checkpointer
|
14
14
|
|
15
|
-
|
16
|
-
def load(path: Path) -> Any: ...
|
15
|
+
def exists(self, path: Path) -> bool: ...
|
17
16
|
|
18
|
-
|
19
|
-
|
17
|
+
def checkpoint_date(self, path: Path) -> datetime: ...
|
18
|
+
|
19
|
+
def store(self, path: Path, data: Any) -> None: ...
|
20
|
+
|
21
|
+
def load(self, path: Path) -> Any: ...
|
22
|
+
|
23
|
+
def delete(self, path: Path) -> None: ...
|
checkpointer/utils.py
CHANGED
@@ -1,17 +1,52 @@
|
|
1
|
-
import
|
1
|
+
from typing import Generator, Coroutine, Iterable, Any, cast
|
2
|
+
from types import CellType, coroutine
|
3
|
+
from itertools import islice
|
2
4
|
|
3
|
-
|
5
|
+
class AttrDict(dict):
|
6
|
+
def __init__(self, *args, **kwargs):
|
7
|
+
super(AttrDict, self).__init__(*args, **kwargs)
|
8
|
+
self.__dict__ = self
|
9
|
+
|
10
|
+
def __getattribute__(self, name: str) -> Any:
|
11
|
+
return super().__getattribute__(name)
|
12
|
+
|
13
|
+
def unwrap_fn[T](fn: T, checkpoint_fn=False) -> T:
|
14
|
+
from .checkpoint import CheckpointFn
|
4
15
|
while hasattr(fn, "__wrapped__"):
|
16
|
+
if checkpoint_fn and isinstance(fn, CheckpointFn):
|
17
|
+
return fn
|
5
18
|
fn = getattr(fn, "__wrapped__")
|
6
19
|
return fn
|
7
20
|
|
8
|
-
@
|
9
|
-
def coroutine_as_generator(coroutine):
|
21
|
+
@coroutine
|
22
|
+
def coroutine_as_generator[T](coroutine: Coroutine[None, None, T]) -> Generator[None, None, T]:
|
10
23
|
val = yield from coroutine
|
11
24
|
return val
|
12
25
|
|
13
|
-
def sync_resolve_coroutine(coroutine):
|
26
|
+
def sync_resolve_coroutine[T](coroutine: Coroutine[None, None, T]) -> T:
|
27
|
+
gen = cast(Generator, coroutine_as_generator(coroutine))
|
14
28
|
try:
|
15
|
-
next(
|
29
|
+
while True: next(gen)
|
16
30
|
except StopIteration as ex:
|
17
31
|
return ex.value
|
32
|
+
|
33
|
+
async def resolved_awaitable[T](value: T) -> T:
|
34
|
+
return value
|
35
|
+
|
36
|
+
def iterate_and_upcoming[T](l: list[T]) -> Iterable[tuple[T, Iterable[T]]]:
|
37
|
+
for i, item in enumerate(l):
|
38
|
+
yield item, islice(l, i + 1, None)
|
39
|
+
|
40
|
+
def get_at_attr(d: dict, keys: tuple[str, ...]) -> Any:
|
41
|
+
try:
|
42
|
+
for key in keys:
|
43
|
+
d = getattr(d, key)
|
44
|
+
except AttributeError:
|
45
|
+
return None
|
46
|
+
return d
|
47
|
+
|
48
|
+
def get_cell_contents(cell: CellType) -> Any:
|
49
|
+
try:
|
50
|
+
return cell.cell_contents
|
51
|
+
except ValueError:
|
52
|
+
return None
|
@@ -0,0 +1,248 @@
|
|
1
|
+
Metadata-Version: 2.3
|
2
|
+
Name: checkpointer
|
3
|
+
Version: 2.1.0
|
4
|
+
Summary: A Python library for memoizing function results with support for multiple storage backends, async runtimes, and automatic cache invalidation
|
5
|
+
Project-URL: Repository, https://github.com/Reddan/checkpointer.git
|
6
|
+
Author: Hampus Hallman
|
7
|
+
License: Copyright 2024 Hampus Hallman
|
8
|
+
|
9
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
12
|
+
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
14
|
+
Requires-Python: >=3.12
|
15
|
+
Requires-Dist: relib
|
16
|
+
Description-Content-Type: text/markdown
|
17
|
+
|
18
|
+
# checkpointer · [](https://github.com/Reddan/checkpointer/blob/master/LICENSE) [](https://pypi.org/project/checkpointer/) [](https://pypi.org/project/checkpointer/)
|
19
|
+
|
20
|
+
`checkpointer` is a Python library for memoizing function results. It provides a decorator-based API with support for multiple storage backends. Use it for computationally expensive operations where caching can save time, or during development to avoid waiting for redundant computations.
|
21
|
+
|
22
|
+
Adding or removing `@checkpoint` doesn't change how your code works. You can apply it to any function, including ones you've already written, without altering their behavior or introducing side effects. The original function remains unchanged and can still be called directly when needed.
|
23
|
+
|
24
|
+
### Key Features:
|
25
|
+
- 🗂️ **Multiple Storage Backends**: Built-in support for in-memory and pickle-based storage, or create your own.
|
26
|
+
- 🎯 **Simple Decorator API**: Apply `@checkpoint` to functions without boilerplate.
|
27
|
+
- 🔄 **Async and Sync Compatibility**: Works with synchronous functions and any Python async runtime (e.g., `asyncio`, `Trio`, `Curio`).
|
28
|
+
- ⏲️ **Custom Expiration Logic**: Automatically invalidate old checkpoints.
|
29
|
+
- 📂 **Flexible Path Configuration**: Control where checkpoints are stored.
|
30
|
+
- 📦 **Captured Variables Handling**: Optionally include captured variables in cache invalidation.
|
31
|
+
|
32
|
+
---
|
33
|
+
|
34
|
+
## Installation
|
35
|
+
|
36
|
+
```bash
|
37
|
+
pip install checkpointer
|
38
|
+
```
|
39
|
+
|
40
|
+
---
|
41
|
+
|
42
|
+
## Quick Start 🚀
|
43
|
+
|
44
|
+
```python
|
45
|
+
from checkpointer import checkpoint
|
46
|
+
|
47
|
+
@checkpoint
|
48
|
+
def expensive_function(x: int) -> int:
|
49
|
+
print("Computing...")
|
50
|
+
return x ** 2
|
51
|
+
|
52
|
+
result = expensive_function(4) # Computes and stores the result
|
53
|
+
result = expensive_function(4) # Loads from the cache
|
54
|
+
```
|
55
|
+
|
56
|
+
---
|
57
|
+
|
58
|
+
## How It Works
|
59
|
+
|
60
|
+
When you use `@checkpoint`, the function's **arguments** (`args`, `kwargs`) are hashed to create a unique identifier for each call. This identifier is used to store and retrieve cached results. If the same arguments are passed again, `checkpointer` loads the cached result instead of recomputing.
|
61
|
+
|
62
|
+
Additionally, `checkpointer` ensures that caches are invalidated when a function's implementation or any of its dependencies change. Each function is assigned a hash based on:
|
63
|
+
|
64
|
+
1. **Its source code**: Changes to the function's code update its hash.
|
65
|
+
2. **Dependent functions**: If a function calls others, changes in those dependencies will also update the hash.
|
66
|
+
3. **Captured variables**: (Optional) If `capture=True`, changes to captured variables and global variables will also update the hash.
|
67
|
+
|
68
|
+
### Example: Cache Invalidation
|
69
|
+
|
70
|
+
```python
|
71
|
+
def multiply(a, b):
|
72
|
+
return a * b
|
73
|
+
|
74
|
+
@checkpoint
|
75
|
+
def helper(x):
|
76
|
+
return multiply(x + 1, 2)
|
77
|
+
|
78
|
+
@checkpoint
|
79
|
+
def compute(a, b):
|
80
|
+
return helper(a) + helper(b)
|
81
|
+
```
|
82
|
+
|
83
|
+
If you modify `multiply`, caches for both `helper` and `compute` are invalidated and recomputed.
|
84
|
+
|
85
|
+
---
|
86
|
+
|
87
|
+
## Parameterization
|
88
|
+
|
89
|
+
### Custom Configuration
|
90
|
+
|
91
|
+
Set up a `Checkpointer` instance with custom settings, and extend it by calling itself with overrides:
|
92
|
+
|
93
|
+
```python
|
94
|
+
from checkpointer import checkpoint
|
95
|
+
|
96
|
+
IS_DEVELOPMENT = True # Toggle based on your environment
|
97
|
+
|
98
|
+
tmp_checkpoint = checkpoint(root_path="/tmp/checkpoints")
|
99
|
+
dev_checkpoint = tmp_checkpoint(when=IS_DEVELOPMENT) # Adds development-specific behavior
|
100
|
+
```
|
101
|
+
|
102
|
+
### Per-Function Customization & Layered Caching
|
103
|
+
|
104
|
+
Layer caches by stacking checkpoints:
|
105
|
+
|
106
|
+
```python
|
107
|
+
@checkpoint(format="memory") # Always use memory storage
|
108
|
+
@dev_checkpoint # Adds caching during development
|
109
|
+
def some_expensive_function():
|
110
|
+
print("Performing a time-consuming operation...")
|
111
|
+
return sum(i * i for i in range(10**6))
|
112
|
+
```
|
113
|
+
|
114
|
+
- **In development**: Both `dev_checkpoint` and `memory` caches are active.
|
115
|
+
- **In production**: Only the `memory` cache is active.
|
116
|
+
|
117
|
+
---
|
118
|
+
|
119
|
+
## Usage
|
120
|
+
|
121
|
+
### Basic Invocation and Caching
|
122
|
+
|
123
|
+
Call the decorated function as usual. On the first call, the result is computed and stored in the cache. Subsequent calls with the same arguments load the result from the cache:
|
124
|
+
|
125
|
+
```python
|
126
|
+
result = expensive_function(4) # Computes and stores the result
|
127
|
+
result = expensive_function(4) # Loads the result from the cache
|
128
|
+
```
|
129
|
+
|
130
|
+
### Force Recalculation
|
131
|
+
|
132
|
+
Force a recalculation and overwrite the stored checkpoint:
|
133
|
+
|
134
|
+
```python
|
135
|
+
result = expensive_function.rerun(4)
|
136
|
+
```
|
137
|
+
|
138
|
+
### Call the Original Function
|
139
|
+
|
140
|
+
Use `fn` to directly call the original, undecorated function:
|
141
|
+
|
142
|
+
```python
|
143
|
+
result = expensive_function.fn(4)
|
144
|
+
```
|
145
|
+
|
146
|
+
This is especially useful **inside recursive functions** to avoid redundant caching of intermediate steps while still caching the final result.
|
147
|
+
|
148
|
+
### Retrieve Stored Checkpoints
|
149
|
+
|
150
|
+
Access cached results without recalculating:
|
151
|
+
|
152
|
+
```python
|
153
|
+
stored_result = expensive_function.get(4)
|
154
|
+
```
|
155
|
+
|
156
|
+
---
|
157
|
+
|
158
|
+
## Storage Backends
|
159
|
+
|
160
|
+
`checkpointer` works with both built-in and custom storage backends, so you can use what's provided or roll your own as needed.
|
161
|
+
|
162
|
+
### Built-In Backends
|
163
|
+
|
164
|
+
1. **PickleStorage**: Stores checkpoints on disk using Python's `pickle`.
|
165
|
+
2. **MemoryStorage**: Keeps checkpoints in memory for non-persistent, fast caching.
|
166
|
+
|
167
|
+
You can specify a storage backend using either its name (`"pickle"` or `"memory"`) or its corresponding class (`PickleStorage` or `MemoryStorage`) in the `format` parameter:
|
168
|
+
|
169
|
+
```python
|
170
|
+
from checkpointer import checkpoint, PickleStorage, MemoryStorage
|
171
|
+
|
172
|
+
@checkpoint(format="pickle") # Short for format=PickleStorage
|
173
|
+
def disk_cached(x: int) -> int:
|
174
|
+
return x ** 2
|
175
|
+
|
176
|
+
@checkpoint(format="memory") # Short for format=MemoryStorage
|
177
|
+
def memory_cached(x: int) -> int:
|
178
|
+
return x * 10
|
179
|
+
```
|
180
|
+
|
181
|
+
### Custom Storage Backends
|
182
|
+
|
183
|
+
Create a custom storage backend by inheriting from the `Storage` class and implementing its methods. Access configuration options through the `self.checkpointer` attribute, an instance of `Checkpointer`.
|
184
|
+
|
185
|
+
#### Example: Custom Storage Backend
|
186
|
+
|
187
|
+
```python
|
188
|
+
from checkpointer import checkpoint, Storage
|
189
|
+
from datetime import datetime
|
190
|
+
|
191
|
+
class CustomStorage(Storage):
|
192
|
+
def exists(self, path) -> bool: ... # Check if a checkpoint exists at the given path
|
193
|
+
def checkpoint_date(self, path) -> datetime: ... # Return the date the checkpoint was created
|
194
|
+
def store(self, path, data): ... # Save the checkpoint data
|
195
|
+
def load(self, path): ... # Return the checkpoint data
|
196
|
+
def delete(self, path): ... # Delete the checkpoint
|
197
|
+
|
198
|
+
@checkpoint(format=CustomStorage)
|
199
|
+
def custom_cached(x: int):
|
200
|
+
return x ** 2
|
201
|
+
```
|
202
|
+
|
203
|
+
Using a custom backend lets you tailor storage to your application, whether it involves databases, cloud storage, or custom file formats.
|
204
|
+
|
205
|
+
---
|
206
|
+
|
207
|
+
## Configuration Options ⚙️
|
208
|
+
|
209
|
+
| Option | Type | Default | Description |
|
210
|
+
|-----------------|-----------------------------------|----------------------|------------------------------------------------|
|
211
|
+
| `capture` | `bool` | `False` | Include captured variables in function hashes. |
|
212
|
+
| `format` | `"pickle"`, `"memory"`, `Storage` | `"pickle"` | Storage backend format. |
|
213
|
+
| `root_path` | `Path`, `str`, or `None` | ~/.cache/checkpoints | Root directory for storing checkpoints. |
|
214
|
+
| `when` | `bool` | `True` | Enable or disable checkpointing. |
|
215
|
+
| `verbosity` | `0` or `1` | `1` | Logging verbosity. |
|
216
|
+
| `path` | `Callable[..., str]` | `None` | Custom path for checkpoint storage. |
|
217
|
+
| `should_expire` | `Callable[[datetime], bool]` | `None` | Custom expiration logic. |
|
218
|
+
|
219
|
+
---
|
220
|
+
|
221
|
+
## Full Example 🛠️
|
222
|
+
|
223
|
+
```python
|
224
|
+
import asyncio
|
225
|
+
from checkpointer import checkpoint
|
226
|
+
|
227
|
+
@checkpoint
|
228
|
+
def compute_square(n: int) -> int:
|
229
|
+
print(f"Computing {n}^2...")
|
230
|
+
return n ** 2
|
231
|
+
|
232
|
+
@checkpoint(format="memory")
|
233
|
+
async def async_compute_sum(a: int, b: int) -> int:
|
234
|
+
await asyncio.sleep(1)
|
235
|
+
return a + b
|
236
|
+
|
237
|
+
async def main():
|
238
|
+
result1 = compute_square(5)
|
239
|
+
print(result1) # Outputs 25
|
240
|
+
|
241
|
+
result2 = await async_compute_sum(3, 7)
|
242
|
+
print(result2) # Outputs 10
|
243
|
+
|
244
|
+
result3 = await async_compute_sum.get(3, 7)
|
245
|
+
print(result3) # Outputs 10
|
246
|
+
|
247
|
+
asyncio.run(main())
|
248
|
+
```
|
@@ -0,0 +1,14 @@
|
|
1
|
+
checkpointer/__init__.py,sha256=t-dv0hIfgJHFx2M8tjCUMC9DlucPM8hvJOwGv86owUo,411
|
2
|
+
checkpointer/checkpoint.py,sha256=NHY_63EzlY3X6eqbOBE-dIprMZ_-X_GRC-nhy6cI1QQ,4990
|
3
|
+
checkpointer/function_body.py,sha256=DAq5fj1MMgb3az_Pfdxzqg6woJ6esFgvaqkkKqogJBY,4074
|
4
|
+
checkpointer/print_checkpoint.py,sha256=21aeqgM9CMjNAJyScqFmXCWWfh3jBIn7o7i5zJkZGaA,1369
|
5
|
+
checkpointer/types.py,sha256=SslunQTXxovFuGOR_VKfL7z5Vif9RD1PPx0J1FQdGLw,564
|
6
|
+
checkpointer/utils.py,sha256=qT7pk3o6GkX-1ylfi6I-DJO5fdVOIHMuaEK8dTEAoVw,1465
|
7
|
+
checkpointer/storages/__init__.py,sha256=G7JrOAyCGITd1wOz-u6_4RZVgxzxGLVLHPwBuW1sx1U,300
|
8
|
+
checkpointer/storages/bcolz_storage.py,sha256=UoeREc3oS8skFClu9sULpgpqbIVcp3tVd8CeYfAe5yM,2220
|
9
|
+
checkpointer/storages/memory_storage.py,sha256=RQ4WTVapxJGVPv1DNlb9VFTifxtyQy8YVo8fwaRLfdk,692
|
10
|
+
checkpointer/storages/pickle_storage.py,sha256=nyrBWLXKnyzXgZIMwrpWUOAGRozpX3jL9pCyCV29e4E,787
|
11
|
+
checkpointer-2.1.0.dist-info/METADATA,sha256=tdpLxisGi4Wx3gvs_W52t1SQqRwgDuke26pkfzHVuKY,9926
|
12
|
+
checkpointer-2.1.0.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
|
13
|
+
checkpointer-2.1.0.dist-info/licenses/LICENSE,sha256=0cmUKqBotzbBcysIexd52AhjwbphhlGYiWbvg5l2QAU,1054
|
14
|
+
checkpointer-2.1.0.dist-info/RECORD,,
|
@@ -1,270 +0,0 @@
|
|
1
|
-
Metadata-Version: 2.3
|
2
|
-
Name: checkpointer
|
3
|
-
Version: 2.0.1
|
4
|
-
Summary: A Python library for memoizing function results with support for multiple storage backends, async runtimes, and automatic cache invalidation
|
5
|
-
Project-URL: Repository, https://github.com/Reddan/checkpointer.git
|
6
|
-
Author: Hampus Hallman
|
7
|
-
License: Copyright 2024 Hampus Hallman
|
8
|
-
|
9
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
10
|
-
|
11
|
-
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
12
|
-
|
13
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
14
|
-
Requires-Python: >=3.12
|
15
|
-
Requires-Dist: relib
|
16
|
-
Description-Content-Type: text/markdown
|
17
|
-
|
18
|
-
# checkpointer · [](https://github.com/Reddan/checkpointer/blob/master/LICENSE) [](https://pypi.org/project/checkpointer/) [](https://pypi.org/project/checkpointer/)
|
19
|
-
|
20
|
-
`checkpointer` is a Python library for memoizing function results. It simplifies caching by providing a decorator-based API and supports various storage backends. It's designed for computationally expensive operations where caching can save time, or during development to avoid waiting for redundant computations. ⚡️
|
21
|
-
|
22
|
-
Adding or removing `@checkpoint` doesn't change how your code works, and it can be applied to any function, including ones you've already written, without altering their behavior or introducing side effects. The original function remains unchanged and can still be called directly when needed.
|
23
|
-
|
24
|
-
### Key Features:
|
25
|
-
- 🗂️ **Multiple Storage Backends**: Supports in-memory, pickle, or your own custom storage.
|
26
|
-
- 🎯 **Simple Decorator API**: Apply `@checkpoint` to functions.
|
27
|
-
- 🔄 **Async and Sync Compatibility**: Works with synchronous functions and any Python async runtime (e.g., `asyncio`, `Trio`, `Curio`).
|
28
|
-
- ⏲️ **Custom Expiration Logic**: Automatically invalidate old checkpoints.
|
29
|
-
- 📂 **Flexible Path Configuration**: Control where checkpoints are stored.
|
30
|
-
|
31
|
-
---
|
32
|
-
|
33
|
-
## Installation
|
34
|
-
|
35
|
-
```bash
|
36
|
-
pip install checkpointer
|
37
|
-
```
|
38
|
-
|
39
|
-
---
|
40
|
-
|
41
|
-
## Quick Start 🚀
|
42
|
-
|
43
|
-
```python
|
44
|
-
from checkpointer import checkpoint
|
45
|
-
|
46
|
-
@checkpoint
|
47
|
-
def expensive_function(x: int) -> int:
|
48
|
-
print("Computing...")
|
49
|
-
return x ** 2
|
50
|
-
|
51
|
-
result = expensive_function(4) # Computes and stores result
|
52
|
-
result = expensive_function(4) # Loads from checkpoint
|
53
|
-
```
|
54
|
-
|
55
|
-
---
|
56
|
-
|
57
|
-
## How It Works
|
58
|
-
|
59
|
-
When you use `@checkpoint`, the function's **arguments** (`args`, `kwargs`) are hashed to create a unique identifier for each call. This identifier is used to store and retrieve cached results. If the same arguments are passed again, `checkpointer` will return the cached result instead of recomputing.
|
60
|
-
|
61
|
-
Additionally, `checkpointer` ensures that caches are invalidated when a function’s implementation or any of its dependencies change. Each function is assigned a hash based on:
|
62
|
-
1. **Its source code**: Changes to the function’s code update its hash.
|
63
|
-
2. **Dependent functions**: If a function calls others, changes to those will also update the hash.
|
64
|
-
|
65
|
-
### Example: Cache Invalidation by Function Dependencies
|
66
|
-
|
67
|
-
```python
|
68
|
-
def multiply(a, b):
|
69
|
-
return a * b
|
70
|
-
|
71
|
-
@checkpoint
|
72
|
-
def helper(x):
|
73
|
-
return multiply(x + 1, 2)
|
74
|
-
|
75
|
-
@checkpoint
|
76
|
-
def compute(a, b):
|
77
|
-
return helper(a) + helper(b)
|
78
|
-
```
|
79
|
-
|
80
|
-
If you change `multiply`, the checkpoints for both `helper` and `compute` will be invalidated and recomputed.
|
81
|
-
|
82
|
-
---
|
83
|
-
|
84
|
-
## Parameterization
|
85
|
-
|
86
|
-
### Global Configuration
|
87
|
-
|
88
|
-
You can configure a custom `Checkpointer`:
|
89
|
-
|
90
|
-
```python
|
91
|
-
from checkpointer import checkpoint
|
92
|
-
|
93
|
-
checkpoint = checkpoint(format="memory", root_path="/tmp/checkpoints")
|
94
|
-
```
|
95
|
-
|
96
|
-
Extend this configuration by calling itself again:
|
97
|
-
|
98
|
-
```python
|
99
|
-
extended_checkpoint = checkpoint(format="pickle", verbosity=0)
|
100
|
-
```
|
101
|
-
|
102
|
-
### Per-Function Customization
|
103
|
-
|
104
|
-
```python
|
105
|
-
@checkpoint(format="pickle", verbosity=0)
|
106
|
-
def my_function(x, y):
|
107
|
-
return x + y
|
108
|
-
```
|
109
|
-
|
110
|
-
### Combining Configurations
|
111
|
-
|
112
|
-
```python
|
113
|
-
checkpoint = checkpoint(format="memory", verbosity=1)
|
114
|
-
quiet_checkpoint = checkpoint(verbosity=0)
|
115
|
-
pickle_checkpoint = checkpoint(format="pickle", root_path="/tmp/pickle_checkpoints")
|
116
|
-
|
117
|
-
@checkpoint
|
118
|
-
def compute_square(n: int) -> int:
|
119
|
-
return n ** 2
|
120
|
-
|
121
|
-
@quiet_checkpoint
|
122
|
-
def compute_quietly(n: int) -> int:
|
123
|
-
return n ** 3
|
124
|
-
|
125
|
-
@pickle_checkpoint
|
126
|
-
def compute_sum(a: int, b: int) -> int:
|
127
|
-
return a + b
|
128
|
-
```
|
129
|
-
|
130
|
-
### Layered Caching
|
131
|
-
|
132
|
-
```python
|
133
|
-
IS_DEVELOPMENT = True # Toggle based on environment
|
134
|
-
|
135
|
-
dev_checkpoint = checkpoint(when=IS_DEVELOPMENT)
|
136
|
-
|
137
|
-
@checkpoint(format="memory")
|
138
|
-
@dev_checkpoint
|
139
|
-
def some_expensive_function():
|
140
|
-
print("Performing a time-consuming operation...")
|
141
|
-
return sum(i * i for i in range(10**6))
|
142
|
-
```
|
143
|
-
|
144
|
-
- In development: Both `dev_checkpoint` and `memory` caches are active.
|
145
|
-
- In production: Only the `memory` cache is active.
|
146
|
-
|
147
|
-
---
|
148
|
-
|
149
|
-
## Usage
|
150
|
-
|
151
|
-
### Force Recalculation
|
152
|
-
Use `rerun` to force a recalculation and overwrite the stored checkpoint:
|
153
|
-
|
154
|
-
```python
|
155
|
-
result = expensive_function.rerun(4)
|
156
|
-
```
|
157
|
-
|
158
|
-
### Bypass Checkpointer
|
159
|
-
Use `fn` to directly call the original, undecorated function:
|
160
|
-
|
161
|
-
```python
|
162
|
-
result = expensive_function.fn(4)
|
163
|
-
```
|
164
|
-
|
165
|
-
This is especially useful **inside recursive functions**. By using `.fn` within the function itself, you avoid redundant caching of intermediate recursive calls while still caching the final result at the top level.
|
166
|
-
|
167
|
-
### Retrieve Stored Checkpoints
|
168
|
-
Access stored results without recalculating:
|
169
|
-
|
170
|
-
```python
|
171
|
-
stored_result = expensive_function.get(4)
|
172
|
-
```
|
173
|
-
|
174
|
-
---
|
175
|
-
|
176
|
-
## Storage Backends
|
177
|
-
|
178
|
-
`checkpointer` supports flexible storage backends, including built-in options and custom implementations.
|
179
|
-
|
180
|
-
### Built-In Backends
|
181
|
-
|
182
|
-
1. **PickleStorage**: Saves checkpoints to disk using Python's `pickle` module.
|
183
|
-
2. **MemoryStorage**: Caches checkpoints in memory for fast, non-persistent use.
|
184
|
-
|
185
|
-
To use these backends, pass either `"pickle"` or `PickleStorage` (and similarly for `"memory"` or `MemoryStorage`) to the `format` parameter:
|
186
|
-
```python
|
187
|
-
from checkpointer import checkpoint, PickleStorage, MemoryStorage
|
188
|
-
|
189
|
-
@checkpoint(format="pickle") # Equivalent to format=PickleStorage
|
190
|
-
def disk_cached(x: int) -> int:
|
191
|
-
return x ** 2
|
192
|
-
|
193
|
-
@checkpoint(format="memory") # Equivalent to format=MemoryStorage
|
194
|
-
def memory_cached(x: int) -> int:
|
195
|
-
return x * 10
|
196
|
-
```
|
197
|
-
|
198
|
-
### Custom Storage Backends
|
199
|
-
|
200
|
-
Create custom storage backends by implementing methods for storing, loading, and managing checkpoints. For example, a custom storage backend might use a database, cloud storage, or a specialized format.
|
201
|
-
|
202
|
-
Example usage:
|
203
|
-
```python
|
204
|
-
from checkpointer import checkpoint, Storage
|
205
|
-
from typing import Any
|
206
|
-
from pathlib import Path
|
207
|
-
from datetime import datetime
|
208
|
-
|
209
|
-
class CustomStorage(Storage): # Optional for type hinting
|
210
|
-
@staticmethod
|
211
|
-
def exists(path: Path) -> bool: ...
|
212
|
-
@staticmethod
|
213
|
-
def checkpoint_date(path: Path) -> datetime: ...
|
214
|
-
@staticmethod
|
215
|
-
def store(path: Path, data: Any) -> None: ...
|
216
|
-
@staticmethod
|
217
|
-
def load(path: Path) -> Any: ...
|
218
|
-
@staticmethod
|
219
|
-
def delete(path: Path) -> None: ...
|
220
|
-
|
221
|
-
@checkpoint(format=CustomStorage)
|
222
|
-
def custom_cached(x: int):
|
223
|
-
return x ** 2
|
224
|
-
```
|
225
|
-
|
226
|
-
This flexibility allows you to adapt `checkpointer` to meet any storage requirement, whether persistent or in-memory.
|
227
|
-
|
228
|
-
---
|
229
|
-
|
230
|
-
## Configuration Options ⚙️
|
231
|
-
|
232
|
-
| Option | Type | Default | Description |
|
233
|
-
|----------------|-------------------------------------|-------------|---------------------------------------------|
|
234
|
-
| `format` | `"pickle"`, `"memory"`, `Storage` | `"pickle"` | Storage backend format. |
|
235
|
-
| `root_path` | `Path`, `str`, or `None` | User Cache | Root directory for storing checkpoints. |
|
236
|
-
| `when` | `bool` | `True` | Enable or disable checkpointing. |
|
237
|
-
| `verbosity` | `0` or `1` | `1` | Logging verbosity. |
|
238
|
-
| `path` | `Callable[..., str]` | `None` | Custom path for checkpoint storage. |
|
239
|
-
| `should_expire`| `Callable[[datetime], bool]` | `None` | Custom expiration logic. |
|
240
|
-
|
241
|
-
---
|
242
|
-
|
243
|
-
## Full Example 🛠️
|
244
|
-
|
245
|
-
```python
|
246
|
-
import asyncio
|
247
|
-
from checkpointer import checkpoint
|
248
|
-
|
249
|
-
@checkpoint
|
250
|
-
def compute_square(n: int) -> int:
|
251
|
-
print(f"Computing {n}^2...")
|
252
|
-
return n ** 2
|
253
|
-
|
254
|
-
@checkpoint(format="memory")
|
255
|
-
async def async_compute_sum(a: int, b: int) -> int:
|
256
|
-
await asyncio.sleep(1)
|
257
|
-
return a + b
|
258
|
-
|
259
|
-
async def main():
|
260
|
-
result1 = compute_square(5)
|
261
|
-
print(result1)
|
262
|
-
|
263
|
-
result2 = await async_compute_sum(3, 7)
|
264
|
-
print(result2)
|
265
|
-
|
266
|
-
result3 = async_compute_sum.get(3, 7)
|
267
|
-
print(result3)
|
268
|
-
|
269
|
-
asyncio.run(main())
|
270
|
-
```
|
@@ -1,13 +0,0 @@
|
|
1
|
-
checkpointer/__init__.py,sha256=2o-pOMXC_wVcjDtyjyapAdeTh6jyYwKYE0--C5XsKdc,350
|
2
|
-
checkpointer/checkpoint.py,sha256=-09sz8sZdYFwxfb8_O3L2PmdCN_lDXdcwKKTkFlOAtw,4715
|
3
|
-
checkpointer/function_body.py,sha256=92mnTY9d_JhKnKugeySYRP6qhU4fH6F6zesb7h2pEi0,1720
|
4
|
-
checkpointer/print_checkpoint.py,sha256=21aeqgM9CMjNAJyScqFmXCWWfh3jBIn7o7i5zJkZGaA,1369
|
5
|
-
checkpointer/types.py,sha256=n1AspKywTQhurCy7V_3t1HKIxYm0T6qOwuoDYfamO0E,408
|
6
|
-
checkpointer/utils.py,sha256=UrQt689UHUjl7kXpTbUCGkHUgQZllByX2rbuvZdt9vk,368
|
7
|
-
checkpointer/storages/bcolz_storage.py,sha256=F1JahTAgYmSpeE5mL1kPcANWTVxDgvb2YY8fgWRxt2U,2286
|
8
|
-
checkpointer/storages/memory_storage.py,sha256=EmXwscJ2D31Sekr4n0ONNaeiQWMf7SHfpHoVwRb1Ec8,534
|
9
|
-
checkpointer/storages/pickle_storage.py,sha256=YOndlnUdCaRUDWkzvQrU79j6FkGyp44WrSjl4kIs8RA,837
|
10
|
-
checkpointer-2.0.1.dist-info/METADATA,sha256=yHkb_PR1Js26cqT5UW1g2rmar_RnRabNjw4cE59tSlA,9568
|
11
|
-
checkpointer-2.0.1.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
|
12
|
-
checkpointer-2.0.1.dist-info/licenses/LICENSE,sha256=0cmUKqBotzbBcysIexd52AhjwbphhlGYiWbvg5l2QAU,1054
|
13
|
-
checkpointer-2.0.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|