checkpointer 2.0.0__py3-none-any.whl → 2.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checkpointer/__init__.py +4 -4
- checkpointer/checkpoint.py +34 -45
- checkpointer/print_checkpoint.py +1 -1
- checkpointer/storages/bcolz_storage.py +10 -27
- checkpointer/storages/memory_storage.py +16 -20
- checkpointer/storages/pickle_storage.py +14 -38
- checkpointer/types.py +16 -12
- {checkpointer-2.0.0.dist-info → checkpointer-2.0.2.dist-info}/METADATA +99 -79
- checkpointer-2.0.2.dist-info/RECORD +13 -0
- checkpointer-2.0.0.dist-info/RECORD +0 -13
- {checkpointer-2.0.0.dist-info → checkpointer-2.0.2.dist-info}/WHEEL +0 -0
- {checkpointer-2.0.0.dist-info → checkpointer-2.0.2.dist-info}/licenses/LICENSE +0 -0
checkpointer/__init__.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1
|
-
from .checkpoint import Checkpointer, CheckpointFn
|
2
|
-
from .checkpoint import CheckpointError, CheckpointReadFail
|
1
|
+
from .checkpoint import Checkpointer, CheckpointFn, CheckpointError
|
3
2
|
from .types import Storage
|
4
3
|
from .function_body import get_function_hash
|
4
|
+
import tempfile
|
5
5
|
|
6
6
|
create_checkpointer = Checkpointer
|
7
7
|
checkpoint = Checkpointer()
|
8
|
-
memory_checkpoint = Checkpointer(format="memory")
|
9
|
-
tmp_checkpoint = Checkpointer(root_path="/
|
8
|
+
memory_checkpoint = Checkpointer(format="memory", verbosity=0)
|
9
|
+
tmp_checkpoint = Checkpointer(root_path=tempfile.gettempdir() + "/checkpoints")
|
checkpointer/checkpoint.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1
|
+
from __future__ import annotations
|
1
2
|
import inspect
|
2
3
|
import relib.hashing as hashing
|
3
|
-
from typing import Generic, TypeVar, TypedDict, Unpack, Literal,
|
4
|
-
from collections.abc import Callable
|
5
|
-
from datetime import datetime
|
4
|
+
from typing import Generic, TypeVar, Type, TypedDict, Callable, Unpack, Literal, Any, cast, overload
|
6
5
|
from pathlib import Path
|
6
|
+
from datetime import datetime
|
7
7
|
from functools import update_wrapper
|
8
8
|
from .types import Storage
|
9
9
|
from .function_body import get_function_hash
|
@@ -16,25 +16,18 @@ from .print_checkpoint import print_checkpoint
|
|
16
16
|
Fn = TypeVar("Fn", bound=Callable)
|
17
17
|
|
18
18
|
DEFAULT_DIR = Path.home() / ".cache/checkpoints"
|
19
|
-
STORAGE_MAP = {"memory": MemoryStorage, "pickle": PickleStorage, "bcolz": BcolzStorage}
|
19
|
+
STORAGE_MAP: dict[str, Type[Storage]] = {"memory": MemoryStorage, "pickle": PickleStorage, "bcolz": BcolzStorage}
|
20
20
|
|
21
21
|
class CheckpointError(Exception):
|
22
22
|
pass
|
23
23
|
|
24
|
-
class CheckpointReadFail(CheckpointError):
|
25
|
-
pass
|
26
|
-
|
27
|
-
StorageType = Literal["pickle", "memory", "bcolz"] | Storage
|
28
|
-
CheckpointPath = str | Callable[..., str] | None
|
29
|
-
ShouldExpire = Callable[[datetime], bool]
|
30
|
-
|
31
24
|
class CheckpointerOpts(TypedDict, total=False):
|
32
|
-
format:
|
25
|
+
format: Type[Storage] | Literal["pickle", "memory", "bcolz"]
|
33
26
|
root_path: Path | str | None
|
34
27
|
when: bool
|
35
28
|
verbosity: Literal[0, 1]
|
36
|
-
path:
|
37
|
-
should_expire:
|
29
|
+
path: Callable[..., str] | None
|
30
|
+
should_expire: Callable[[datetime], bool] | None
|
38
31
|
|
39
32
|
class Checkpointer:
|
40
33
|
def __init__(self, **opts: Unpack[CheckpointerOpts]):
|
@@ -45,14 +38,11 @@ class Checkpointer:
|
|
45
38
|
self.path = opts.get("path")
|
46
39
|
self.should_expire = opts.get("should_expire")
|
47
40
|
|
48
|
-
def get_storage(self) -> Storage:
|
49
|
-
return STORAGE_MAP[self.format] if isinstance(self.format, str) else self.format
|
50
|
-
|
51
41
|
@overload
|
52
|
-
def __call__(self, fn: Fn, **override_opts: Unpack[CheckpointerOpts]) ->
|
42
|
+
def __call__(self, fn: Fn, **override_opts: Unpack[CheckpointerOpts]) -> CheckpointFn[Fn]: ...
|
53
43
|
@overload
|
54
|
-
def __call__(self, fn=None, **override_opts: Unpack[CheckpointerOpts]) ->
|
55
|
-
def __call__(self, fn: Fn | None=None, **override_opts: Unpack[CheckpointerOpts]) ->
|
44
|
+
def __call__(self, fn: None=None, **override_opts: Unpack[CheckpointerOpts]) -> Checkpointer: ...
|
45
|
+
def __call__(self, fn: Fn | None=None, **override_opts: Unpack[CheckpointerOpts]) -> Checkpointer | CheckpointFn[Fn]:
|
56
46
|
if override_opts:
|
57
47
|
opts = CheckpointerOpts(**{**self.__dict__, **override_opts})
|
58
48
|
return Checkpointer(**opts)(fn)
|
@@ -64,52 +54,51 @@ class CheckpointFn(Generic[Fn]):
|
|
64
54
|
wrapped = unwrap_fn(fn)
|
65
55
|
file_name = Path(wrapped.__code__.co_filename).name
|
66
56
|
update_wrapper(cast(Callable, self), wrapped)
|
57
|
+
storage = STORAGE_MAP[checkpointer.format] if isinstance(checkpointer.format, str) else checkpointer.format
|
67
58
|
self.checkpointer = checkpointer
|
68
59
|
self.fn = fn
|
69
60
|
self.fn_hash = get_function_hash(wrapped)
|
70
61
|
self.fn_id = f"{file_name}/{wrapped.__name__}"
|
71
|
-
self.is_async = inspect.iscoroutinefunction(
|
62
|
+
self.is_async = inspect.iscoroutinefunction(wrapped)
|
63
|
+
self.storage = storage(checkpointer)
|
72
64
|
|
73
65
|
def get_checkpoint_id(self, args: tuple, kw: dict) -> str:
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
return f"{self.fn_id}/{hashing.hash([self.fn_hash, args, kw or 0])}"
|
83
|
-
|
84
|
-
async def _store_on_demand(self, args: tuple, kw: dict, force: bool):
|
66
|
+
if not callable(self.checkpointer.path):
|
67
|
+
return f"{self.fn_id}/{hashing.hash([self.fn_hash, args, kw or 0])}"
|
68
|
+
checkpoint_id = self.checkpointer.path(*args, **kw)
|
69
|
+
if not isinstance(checkpoint_id, str):
|
70
|
+
raise CheckpointError(f"path function must return a string, got {type(checkpoint_id)}")
|
71
|
+
return checkpoint_id
|
72
|
+
|
73
|
+
async def _store_on_demand(self, args: tuple, kw: dict, rerun: bool):
|
85
74
|
checkpoint_id = self.get_checkpoint_id(args, kw)
|
86
75
|
checkpoint_path = self.checkpointer.root_path / checkpoint_id
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
or storage.
|
91
|
-
or (self.checkpointer.should_expire and storage.should_expire(checkpoint_path, self.checkpointer.should_expire))
|
76
|
+
should_log = self.checkpointer.verbosity > 0
|
77
|
+
refresh = rerun \
|
78
|
+
or not self.storage.exists(checkpoint_path) \
|
79
|
+
or (self.checkpointer.should_expire and self.checkpointer.should_expire(self.storage.checkpoint_date(checkpoint_path)))
|
92
80
|
|
93
81
|
if refresh:
|
94
82
|
print_checkpoint(should_log, "MEMORIZING", checkpoint_id, "blue")
|
95
83
|
data = self.fn(*args, **kw)
|
96
84
|
if inspect.iscoroutine(data):
|
97
85
|
data = await data
|
98
|
-
|
86
|
+
self.storage.store(checkpoint_path, data)
|
87
|
+
return data
|
99
88
|
|
100
89
|
try:
|
101
|
-
data = storage.
|
90
|
+
data = self.storage.load(checkpoint_path)
|
102
91
|
print_checkpoint(should_log, "REMEMBERED", checkpoint_id, "green")
|
103
92
|
return data
|
104
93
|
except (EOFError, FileNotFoundError):
|
105
94
|
print_checkpoint(should_log, "CORRUPTED", checkpoint_id, "yellow")
|
106
|
-
storage.
|
107
|
-
return await self._store_on_demand(args, kw,
|
95
|
+
self.storage.delete(checkpoint_path)
|
96
|
+
return await self._store_on_demand(args, kw, rerun)
|
108
97
|
|
109
|
-
def _call(self, args: tuple, kw: dict,
|
98
|
+
def _call(self, args: tuple, kw: dict, rerun=False):
|
110
99
|
if not self.checkpointer.when:
|
111
100
|
return self.fn(*args, **kw)
|
112
|
-
coroutine = self._store_on_demand(args, kw,
|
101
|
+
coroutine = self._store_on_demand(args, kw, rerun)
|
113
102
|
return coroutine if self.is_async else sync_resolve_coroutine(coroutine)
|
114
103
|
|
115
104
|
__call__: Fn = cast(Fn, lambda self, *args, **kw: self._call(args, kw))
|
@@ -118,6 +107,6 @@ class CheckpointFn(Generic[Fn]):
|
|
118
107
|
def get(self, *args, **kw) -> Any:
|
119
108
|
checkpoint_path = self.checkpointer.root_path / self.get_checkpoint_id(args, kw)
|
120
109
|
try:
|
121
|
-
return self.
|
110
|
+
return self.storage.load(checkpoint_path)
|
122
111
|
except:
|
123
|
-
raise
|
112
|
+
raise CheckpointError("Could not load checkpoint")
|
checkpointer/print_checkpoint.py
CHANGED
@@ -44,7 +44,7 @@ def colored_(text: str, color: Color | None = None, on_color: Color | None = Non
|
|
44
44
|
text = f"\033[{COLOR_MAP[on_color] + 10}m{text}"
|
45
45
|
return text + "\033[0m"
|
46
46
|
|
47
|
-
noop = lambda *
|
47
|
+
noop = lambda text, *a, **k: text
|
48
48
|
colored = colored_ if allow_color() else noop
|
49
49
|
|
50
50
|
def print_checkpoint(should_log: bool, title: str, text: str, color: Color):
|
@@ -18,35 +18,21 @@ def get_data_type_str(x):
|
|
18
18
|
def get_metapath(path: Path):
|
19
19
|
return path.with_name(f"{path.name}_meta")
|
20
20
|
|
21
|
-
def get_collection_timestamp(path: Path):
|
22
|
-
import bcolz
|
23
|
-
metapath = get_metapath(path)
|
24
|
-
meta_data = bcolz.open(metapath)[:][0]
|
25
|
-
return meta_data["created"]
|
26
|
-
|
27
21
|
def insert_data(path: Path, data):
|
28
22
|
import bcolz
|
29
23
|
c = bcolz.carray(data, rootdir=path, mode="w")
|
30
24
|
c.flush()
|
31
25
|
|
32
26
|
class BcolzStorage(Storage):
|
33
|
-
|
34
|
-
|
35
|
-
try:
|
36
|
-
get_collection_timestamp(path)
|
37
|
-
return False
|
38
|
-
except (FileNotFoundError, EOFError):
|
39
|
-
return True
|
27
|
+
def exists(self, path):
|
28
|
+
return path.exists()
|
40
29
|
|
41
|
-
|
42
|
-
|
43
|
-
return expire_fn(get_collection_timestamp(path))
|
30
|
+
def checkpoint_date(self, path):
|
31
|
+
return datetime.fromtimestamp(path.stat().st_mtime)
|
44
32
|
|
45
|
-
|
46
|
-
def store_data(path, data):
|
33
|
+
def store(self, path, data):
|
47
34
|
metapath = get_metapath(path)
|
48
35
|
path.parent.mkdir(parents=True, exist_ok=True)
|
49
|
-
created = datetime.now()
|
50
36
|
data_type_str = get_data_type_str(data)
|
51
37
|
if data_type_str == "tuple":
|
52
38
|
fields = list(range(len(data)))
|
@@ -54,18 +40,16 @@ class BcolzStorage(Storage):
|
|
54
40
|
fields = sorted(data.keys())
|
55
41
|
else:
|
56
42
|
fields = []
|
57
|
-
meta_data = {"
|
43
|
+
meta_data = {"data_type_str": data_type_str, "fields": fields}
|
58
44
|
insert_data(metapath, meta_data)
|
59
45
|
if data_type_str in ["tuple", "dict"]:
|
60
46
|
for i in range(len(fields)):
|
61
47
|
child_path = Path(f"{path} ({i})")
|
62
|
-
|
48
|
+
self.store(child_path, data[fields[i]])
|
63
49
|
else:
|
64
50
|
insert_data(path, data)
|
65
|
-
return data
|
66
51
|
|
67
|
-
|
68
|
-
def load_data(path):
|
52
|
+
def load(self, path):
|
69
53
|
import bcolz
|
70
54
|
metapath = get_metapath(path)
|
71
55
|
meta_data = bcolz.open(metapath)[:][0]
|
@@ -73,7 +57,7 @@ class BcolzStorage(Storage):
|
|
73
57
|
if data_type_str in ["tuple", "dict"]:
|
74
58
|
fields = meta_data["fields"]
|
75
59
|
partitions = range(len(fields))
|
76
|
-
data = [
|
60
|
+
data = [self.load(Path(f"{path} ({i})")) for i in partitions]
|
77
61
|
if data_type_str == "tuple":
|
78
62
|
return tuple(data)
|
79
63
|
else:
|
@@ -87,8 +71,7 @@ class BcolzStorage(Storage):
|
|
87
71
|
else:
|
88
72
|
return data[:]
|
89
73
|
|
90
|
-
|
91
|
-
def delete_data(path):
|
74
|
+
def delete(self, path):
|
92
75
|
# NOTE: Not recursive
|
93
76
|
metapath = get_metapath(path)
|
94
77
|
try:
|
@@ -1,29 +1,25 @@
|
|
1
|
+
from typing import Any
|
2
|
+
from pathlib import Path
|
1
3
|
from datetime import datetime
|
2
4
|
from ..types import Storage
|
3
5
|
|
4
|
-
|
5
|
-
date_stored = {}
|
6
|
+
item_map: dict[str, tuple[datetime, Any]] = {}
|
6
7
|
|
7
8
|
class MemoryStorage(Storage):
|
8
|
-
|
9
|
-
|
10
|
-
return path not in store
|
9
|
+
def get_short_path(self, path: Path):
|
10
|
+
return str(path.relative_to(self.checkpointer.root_path))
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
return expire_fn(date_stored[path])
|
12
|
+
def exists(self, path):
|
13
|
+
return self.get_short_path(path) in item_map
|
15
14
|
|
16
|
-
|
17
|
-
|
18
|
-
store[path] = data
|
19
|
-
date_stored[path] = datetime.now()
|
20
|
-
return data
|
15
|
+
def checkpoint_date(self, path):
|
16
|
+
return item_map[self.get_short_path(path)][0]
|
21
17
|
|
22
|
-
|
23
|
-
|
24
|
-
return store[path]
|
18
|
+
def store(self, path, data):
|
19
|
+
item_map[self.get_short_path(path)] = (datetime.now(), data)
|
25
20
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
21
|
+
def load(self, path):
|
22
|
+
return item_map[self.get_short_path(path)][1]
|
23
|
+
|
24
|
+
def delete(self, path):
|
25
|
+
del item_map[self.get_short_path(path)]
|
@@ -3,53 +3,29 @@ from pathlib import Path
|
|
3
3
|
from datetime import datetime
|
4
4
|
from ..types import Storage
|
5
5
|
|
6
|
-
def
|
7
|
-
|
8
|
-
pkl_full_path = path.with_name(f"{path.name}.pkl")
|
9
|
-
return meta_full_path, pkl_full_path
|
10
|
-
|
11
|
-
def get_collection_timestamp(path: Path):
|
12
|
-
meta_full_path, _ = get_paths(path)
|
13
|
-
with meta_full_path.open("rb") as file:
|
14
|
-
meta_data = pickle.load(file)
|
15
|
-
return meta_data["created"]
|
6
|
+
def get_path(path: Path):
|
7
|
+
return path.with_name(f"{path.name}.pkl")
|
16
8
|
|
17
9
|
class PickleStorage(Storage):
|
18
|
-
|
19
|
-
|
20
|
-
try:
|
21
|
-
get_collection_timestamp(path)
|
22
|
-
return False
|
23
|
-
except (FileNotFoundError, EOFError):
|
24
|
-
return True
|
10
|
+
def exists(self, path):
|
11
|
+
return get_path(path).exists()
|
25
12
|
|
26
|
-
|
27
|
-
|
28
|
-
return expire_fn(get_collection_timestamp(path))
|
13
|
+
def checkpoint_date(self, path):
|
14
|
+
return datetime.fromtimestamp(get_path(path).stat().st_mtime)
|
29
15
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
meta_full_path, pkl_full_path = get_paths(path)
|
35
|
-
pkl_full_path.parent.mkdir(parents=True, exist_ok=True)
|
36
|
-
with pkl_full_path.open("wb") as file:
|
16
|
+
def store(self, path, data):
|
17
|
+
full_path = get_path(path)
|
18
|
+
full_path.parent.mkdir(parents=True, exist_ok=True)
|
19
|
+
with full_path.open("wb") as file:
|
37
20
|
pickle.dump(data, file, -1)
|
38
|
-
with meta_full_path.open("wb") as file:
|
39
|
-
pickle.dump(meta_data, file, -1)
|
40
|
-
return data
|
41
21
|
|
42
|
-
|
43
|
-
|
44
|
-
_, full_path = get_paths(path)
|
22
|
+
def load(self, path):
|
23
|
+
full_path = get_path(path)
|
45
24
|
with full_path.open("rb") as file:
|
46
25
|
return pickle.load(file)
|
47
26
|
|
48
|
-
|
49
|
-
def delete_data(path):
|
50
|
-
meta_full_path, pkl_full_path = get_paths(path)
|
27
|
+
def delete(self, path):
|
51
28
|
try:
|
52
|
-
|
53
|
-
pkl_full_path.unlink()
|
29
|
+
get_path(path).unlink()
|
54
30
|
except FileNotFoundError:
|
55
31
|
pass
|
checkpointer/types.py
CHANGED
@@ -1,19 +1,23 @@
|
|
1
|
-
from
|
1
|
+
from __future__ import annotations
|
2
|
+
from typing import Any, TYPE_CHECKING
|
2
3
|
from pathlib import Path
|
3
4
|
from datetime import datetime
|
4
5
|
|
5
|
-
|
6
|
-
|
7
|
-
def is_expired(path: Path) -> bool: ...
|
6
|
+
if TYPE_CHECKING:
|
7
|
+
from .checkpoint import Checkpointer
|
8
8
|
|
9
|
-
|
10
|
-
|
9
|
+
class Storage:
|
10
|
+
checkpointer: Checkpointer
|
11
11
|
|
12
|
-
|
13
|
-
|
12
|
+
def __init__(self, checkpointer: Checkpointer):
|
13
|
+
self.checkpointer = checkpointer
|
14
14
|
|
15
|
-
|
16
|
-
def load_data(path: Path) -> Any: ...
|
15
|
+
def exists(self, path: Path) -> bool: ...
|
17
16
|
|
18
|
-
|
19
|
-
|
17
|
+
def checkpoint_date(self, path: Path) -> datetime: ...
|
18
|
+
|
19
|
+
def store(self, path: Path, data: Any) -> None: ...
|
20
|
+
|
21
|
+
def load(self, path: Path) -> Any: ...
|
22
|
+
|
23
|
+
def delete(self, path: Path) -> None: ...
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: checkpointer
|
3
|
-
Version: 2.0.
|
3
|
+
Version: 2.0.2
|
4
4
|
Summary: A Python library for memoizing function results with support for multiple storage backends, async runtimes, and automatic cache invalidation
|
5
5
|
Project-URL: Repository, https://github.com/Reddan/checkpointer.git
|
6
6
|
Author: Hampus Hallman
|
@@ -17,41 +17,16 @@ Description-Content-Type: text/markdown
|
|
17
17
|
|
18
18
|
# checkpointer · [](https://github.com/Reddan/checkpointer/blob/master/LICENSE) [](https://pypi.org/project/checkpointer/) [](https://pypi.org/project/checkpointer/)
|
19
19
|
|
20
|
-
`checkpointer` is a Python library for memoizing function results. It
|
20
|
+
`checkpointer` is a Python library for memoizing function results. It provides a decorator-based API with support for multiple storage backends. Use it for computationally expensive operations where caching can save time, or during development to avoid waiting for redundant computations.
|
21
21
|
|
22
22
|
Adding or removing `@checkpoint` doesn't change how your code works, and it can be applied to any function, including ones you've already written, without altering their behavior or introducing side effects. The original function remains unchanged and can still be called directly when needed.
|
23
23
|
|
24
24
|
### Key Features:
|
25
|
-
- **Multiple Storage Backends**:
|
26
|
-
- **Simple Decorator API**: Apply `@checkpoint` to functions.
|
27
|
-
- **Async and Sync Compatibility**: Works with synchronous functions and any Python async runtime (e.g., `asyncio`, `Trio`, `Curio`).
|
28
|
-
- **Custom Expiration Logic**: Automatically invalidate old checkpoints.
|
29
|
-
- **Flexible Path Configuration**: Control where checkpoints are stored.
|
30
|
-
|
31
|
-
### How It Works
|
32
|
-
|
33
|
-
When you use `@checkpoint`, the function's **arguments** (`args`, `kwargs`) are hashed to create a unique identifier for each call. This identifier is used to store and retrieve cached results. If the same arguments are passed again, `checkpointer` will return the cached result instead of recomputing.
|
34
|
-
|
35
|
-
Additionally, `checkpointer` ensures that caches are invalidated when a function’s implementation or any of its dependencies change. Each function is assigned a hash based on:
|
36
|
-
1. **Its source code**: Changes to the function’s code update its hash.
|
37
|
-
2. **Dependent functions**: If a function calls others, changes to those will also update the hash.
|
38
|
-
|
39
|
-
### Example: Cache Invalidation by Function Dependencies
|
40
|
-
|
41
|
-
```python
|
42
|
-
def multiply(a, b):
|
43
|
-
return a * b
|
44
|
-
|
45
|
-
@checkpoint
|
46
|
-
def helper(x):
|
47
|
-
return multiply(x + 1, 2)
|
48
|
-
|
49
|
-
@checkpoint
|
50
|
-
def compute(a, b):
|
51
|
-
return helper(a) + helper(b)
|
52
|
-
```
|
53
|
-
|
54
|
-
If you change `multiply`, the checkpoints for both `helper` and `compute` will be invalidated and recomputed.
|
25
|
+
- 🗂️ **Multiple Storage Backends**: Built-in support for in-memory and pickle-based storage, or create your own.
|
26
|
+
- 🎯 **Simple Decorator API**: Apply `@checkpoint` to functions without boilerplate.
|
27
|
+
- 🔄 **Async and Sync Compatibility**: Works with synchronous functions and any Python async runtime (e.g., `asyncio`, `Trio`, `Curio`).
|
28
|
+
- ⏲️ **Custom Expiration Logic**: Automatically invalidate old checkpoints.
|
29
|
+
- 📂 **Flexible Path Configuration**: Control where checkpoints are stored.
|
55
30
|
|
56
31
|
---
|
57
32
|
|
@@ -63,7 +38,7 @@ pip install checkpointer
|
|
63
38
|
|
64
39
|
---
|
65
40
|
|
66
|
-
## Quick Start
|
41
|
+
## Quick Start 🚀
|
67
42
|
|
68
43
|
```python
|
69
44
|
from checkpointer import checkpoint
|
@@ -73,95 +48,91 @@ def expensive_function(x: int) -> int:
|
|
73
48
|
print("Computing...")
|
74
49
|
return x ** 2
|
75
50
|
|
76
|
-
result = expensive_function(4) # Computes and stores result
|
77
|
-
result = expensive_function(4) # Loads from
|
51
|
+
result = expensive_function(4) # Computes and stores the result
|
52
|
+
result = expensive_function(4) # Loads from the cache
|
78
53
|
```
|
79
54
|
|
80
55
|
---
|
81
56
|
|
82
|
-
##
|
57
|
+
## How It Works
|
58
|
+
|
59
|
+
When you use `@checkpoint`, the function's **arguments** (`args`, `kwargs`) are hashed to create a unique identifier for each call. This identifier is used to store and retrieve cached results. If the same arguments are passed again, `checkpointer` loads the cached result instead of recomputing.
|
83
60
|
|
84
|
-
|
61
|
+
Additionally, `checkpointer` ensures that caches are invalidated when a function's implementation or any of its dependencies change. Each function is assigned a hash based on:
|
62
|
+
1. **Its source code**: Changes to the function's code update its hash.
|
63
|
+
2. **Dependent functions**: If a function calls others, changes in those dependencies will also update the hash.
|
85
64
|
|
86
|
-
|
65
|
+
### Example: Cache Invalidation
|
87
66
|
|
88
67
|
```python
|
89
|
-
|
68
|
+
def multiply(a, b):
|
69
|
+
return a * b
|
90
70
|
|
91
|
-
checkpoint
|
71
|
+
@checkpoint
|
72
|
+
def helper(x):
|
73
|
+
return multiply(x + 1, 2)
|
74
|
+
|
75
|
+
@checkpoint
|
76
|
+
def compute(a, b):
|
77
|
+
return helper(a) + helper(b)
|
92
78
|
```
|
93
79
|
|
94
|
-
|
80
|
+
If you modify `multiply`, caches for both `helper` and `compute` are invalidated and recomputed.
|
95
81
|
|
96
|
-
|
97
|
-
extended_checkpoint = checkpoint(format="pickle", verbosity=0)
|
98
|
-
```
|
82
|
+
---
|
99
83
|
|
100
|
-
|
84
|
+
## Parameterization
|
101
85
|
|
102
|
-
|
103
|
-
@checkpoint(format="pickle", verbosity=0)
|
104
|
-
def my_function(x, y):
|
105
|
-
return x + y
|
106
|
-
```
|
86
|
+
### Custom Configuration
|
107
87
|
|
108
|
-
|
88
|
+
Set up a `Checkpointer` instance with custom settings, and extend it by calling itself with overrides:
|
109
89
|
|
110
90
|
```python
|
111
|
-
|
112
|
-
quiet_checkpoint = checkpoint(verbosity=0)
|
113
|
-
pickle_checkpoint = checkpoint(format="pickle", root_path="/tmp/pickle_checkpoints")
|
114
|
-
|
115
|
-
@checkpoint
|
116
|
-
def compute_square(n: int) -> int:
|
117
|
-
return n ** 2
|
91
|
+
from checkpointer import checkpoint
|
118
92
|
|
119
|
-
|
120
|
-
def compute_quietly(n: int) -> int:
|
121
|
-
return n ** 3
|
93
|
+
IS_DEVELOPMENT = True # Toggle based on your environment
|
122
94
|
|
123
|
-
|
124
|
-
|
125
|
-
return a + b
|
95
|
+
tmp_checkpoint = checkpoint(root_path="/tmp/checkpoints")
|
96
|
+
dev_checkpoint = tmp_checkpoint(when=IS_DEVELOPMENT) # Adds development-specific behavior
|
126
97
|
```
|
127
98
|
|
128
|
-
### Layered Caching
|
99
|
+
### Per-Function Customization & Layered Caching
|
129
100
|
|
130
|
-
|
131
|
-
IS_DEVELOPMENT = True # Toggle based on environment
|
101
|
+
Layer caches by stacking checkpoints:
|
132
102
|
|
133
|
-
|
134
|
-
|
135
|
-
@
|
136
|
-
@dev_checkpoint
|
103
|
+
```python
|
104
|
+
@checkpoint(format="memory") # Always use memory storage
|
105
|
+
@dev_checkpoint # Adds caching during development
|
137
106
|
def some_expensive_function():
|
138
107
|
print("Performing a time-consuming operation...")
|
139
108
|
return sum(i * i for i in range(10**6))
|
140
109
|
```
|
141
110
|
|
142
|
-
- In development
|
143
|
-
- In production
|
111
|
+
- **In development**: Both `dev_checkpoint` and `memory` caches are active.
|
112
|
+
- **In production**: Only the `memory` cache is active.
|
144
113
|
|
145
114
|
---
|
146
115
|
|
147
116
|
## Usage
|
148
117
|
|
149
118
|
### Force Recalculation
|
150
|
-
|
119
|
+
Force a recalculation and overwrite the stored checkpoint:
|
151
120
|
|
152
121
|
```python
|
153
122
|
result = expensive_function.rerun(4)
|
154
123
|
```
|
155
124
|
|
156
|
-
###
|
125
|
+
### Call the Original Function
|
157
126
|
Use `fn` to directly call the original, undecorated function:
|
158
127
|
|
159
128
|
```python
|
160
129
|
result = expensive_function.fn(4)
|
161
130
|
```
|
162
131
|
|
132
|
+
This is especially useful **inside recursive functions** to avoid redundant caching of intermediate steps while still caching the final result.
|
133
|
+
|
163
134
|
### Retrieve Stored Checkpoints
|
164
|
-
Access
|
135
|
+
Access cached results without recalculating:
|
165
136
|
|
166
137
|
```python
|
167
138
|
stored_result = expensive_function.get(4)
|
@@ -169,7 +140,56 @@ stored_result = expensive_function.get(4)
|
|
169
140
|
|
170
141
|
---
|
171
142
|
|
172
|
-
##
|
143
|
+
## Storage Backends
|
144
|
+
|
145
|
+
`checkpointer` works with both built-in and custom storage backends, so you can use what's provided or roll your own as needed.
|
146
|
+
|
147
|
+
### Built-In Backends
|
148
|
+
|
149
|
+
1. **PickleStorage**: Stores checkpoints on disk using Python's `pickle`.
|
150
|
+
2. **MemoryStorage**: Keeps checkpoints in memory for non-persistent, fast caching.
|
151
|
+
|
152
|
+
You can specify a storage backend using either its name (`"pickle"` or `"memory"`) or its corresponding class (`PickleStorage` or `MemoryStorage`) in the `format` parameter:
|
153
|
+
|
154
|
+
```python
|
155
|
+
from checkpointer import checkpoint, PickleStorage, MemoryStorage
|
156
|
+
|
157
|
+
@checkpoint(format="pickle") # Equivalent to format=PickleStorage
|
158
|
+
def disk_cached(x: int) -> int:
|
159
|
+
return x ** 2
|
160
|
+
|
161
|
+
@checkpoint(format="memory") # Equivalent to format=MemoryStorage
|
162
|
+
def memory_cached(x: int) -> int:
|
163
|
+
return x * 10
|
164
|
+
```
|
165
|
+
|
166
|
+
### Custom Storage Backends
|
167
|
+
|
168
|
+
Create a custom storage backend by inheriting from the `Storage` class and implementing its methods. Access configuration options through the `self.checkpointer` attribute, an instance of `Checkpointer`.
|
169
|
+
|
170
|
+
#### Example: Custom Storage Backend
|
171
|
+
|
172
|
+
```python
|
173
|
+
from checkpointer import checkpoint, Storage
|
174
|
+
from datetime import datetime
|
175
|
+
|
176
|
+
class CustomStorage(Storage):
|
177
|
+
def exists(self, path) -> bool: ... # Check if a checkpoint exists at the given path
|
178
|
+
def checkpoint_date(self, path) -> datetime: ... # Return the date the checkpoint was created
|
179
|
+
def store(self, path, data): ... # Save the checkpoint data
|
180
|
+
def load(self, path): ... # Return the checkpoint data
|
181
|
+
def delete(self, path): ... # Delete the checkpoint
|
182
|
+
|
183
|
+
@checkpoint(format=CustomStorage)
|
184
|
+
def custom_cached(x: int):
|
185
|
+
return x ** 2
|
186
|
+
```
|
187
|
+
|
188
|
+
Using a custom backend lets you tailor storage to your application, whether it involves databases, cloud storage, or custom file formats.
|
189
|
+
|
190
|
+
---
|
191
|
+
|
192
|
+
## Configuration Options ⚙️
|
173
193
|
|
174
194
|
| Option | Type | Default | Description |
|
175
195
|
|----------------|-------------------------------------|-------------|---------------------------------------------|
|
@@ -177,12 +197,12 @@ stored_result = expensive_function.get(4)
|
|
177
197
|
| `root_path` | `Path`, `str`, or `None` | User Cache | Root directory for storing checkpoints. |
|
178
198
|
| `when` | `bool` | `True` | Enable or disable checkpointing. |
|
179
199
|
| `verbosity` | `0` or `1` | `1` | Logging verbosity. |
|
180
|
-
| `path` | `
|
200
|
+
| `path` | `Callable[..., str]` | `None` | Custom path for checkpoint storage. |
|
181
201
|
| `should_expire`| `Callable[[datetime], bool]` | `None` | Custom expiration logic. |
|
182
202
|
|
183
203
|
---
|
184
204
|
|
185
|
-
## Full Example
|
205
|
+
## Full Example 🛠️
|
186
206
|
|
187
207
|
```python
|
188
208
|
import asyncio
|
@@ -0,0 +1,13 @@
|
|
1
|
+
checkpointer/__init__.py,sha256=22K2KXw5OV6wARX_tC0JwOBjFolcAgetarPg8thN4pk,363
|
2
|
+
checkpointer/checkpoint.py,sha256=NE_3f0qGabovELFUespUZ31CnKJIbNuH6SllNL_dais,4703
|
3
|
+
checkpointer/function_body.py,sha256=92mnTY9d_JhKnKugeySYRP6qhU4fH6F6zesb7h2pEi0,1720
|
4
|
+
checkpointer/print_checkpoint.py,sha256=21aeqgM9CMjNAJyScqFmXCWWfh3jBIn7o7i5zJkZGaA,1369
|
5
|
+
checkpointer/types.py,sha256=SslunQTXxovFuGOR_VKfL7z5Vif9RD1PPx0J1FQdGLw,564
|
6
|
+
checkpointer/utils.py,sha256=UrQt689UHUjl7kXpTbUCGkHUgQZllByX2rbuvZdt9vk,368
|
7
|
+
checkpointer/storages/bcolz_storage.py,sha256=UoeREc3oS8skFClu9sULpgpqbIVcp3tVd8CeYfAe5yM,2220
|
8
|
+
checkpointer/storages/memory_storage.py,sha256=RQ4WTVapxJGVPv1DNlb9VFTifxtyQy8YVo8fwaRLfdk,692
|
9
|
+
checkpointer/storages/pickle_storage.py,sha256=nyrBWLXKnyzXgZIMwrpWUOAGRozpX3jL9pCyCV29e4E,787
|
10
|
+
checkpointer-2.0.2.dist-info/METADATA,sha256=kRrURoPrW0vf1xkMTHTxKR4QbclBF1zhUxbD-m8FsM4,9076
|
11
|
+
checkpointer-2.0.2.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
|
12
|
+
checkpointer-2.0.2.dist-info/licenses/LICENSE,sha256=0cmUKqBotzbBcysIexd52AhjwbphhlGYiWbvg5l2QAU,1054
|
13
|
+
checkpointer-2.0.2.dist-info/RECORD,,
|
@@ -1,13 +0,0 @@
|
|
1
|
-
checkpointer/__init__.py,sha256=TODGBGbZYBJ5LIpz5t6tnQNJ7ODPRUvXjv3Ooqb8-cc,357
|
2
|
-
checkpointer/checkpoint.py,sha256=V8JL8ibmMeqZjLjaigeAWa-8c948VfIiYqO8t5OFk48,4812
|
3
|
-
checkpointer/function_body.py,sha256=92mnTY9d_JhKnKugeySYRP6qhU4fH6F6zesb7h2pEi0,1720
|
4
|
-
checkpointer/print_checkpoint.py,sha256=wHC2xWNwNfFhRHyhrmLkadYoyThRTJWiox3NjgE9Ubc,1369
|
5
|
-
checkpointer/types.py,sha256=yoNPnN_QJHfyK_Gs8c0SoywHHDUlU7uhKqPPTTWjRTE,469
|
6
|
-
checkpointer/utils.py,sha256=UrQt689UHUjl7kXpTbUCGkHUgQZllByX2rbuvZdt9vk,368
|
7
|
-
checkpointer/storages/bcolz_storage.py,sha256=5hbJB0VJ2k-FHf7rItywMXP74WT-JTqeNK5N8yftcnw,2647
|
8
|
-
checkpointer/storages/memory_storage.py,sha256=5ITKjh_bVNfj1C6pcyMgB4YU4sy6jOLlvH0_3Pl1Elo,558
|
9
|
-
checkpointer/storages/pickle_storage.py,sha256=ipXG2dht8YQAXJFEK5-OwOb8xP8ij_v7K0Qu5Xz9aVE,1622
|
10
|
-
checkpointer-2.0.0.dist-info/METADATA,sha256=5DZmJ0rMnPeRZ9b5REXv1h7z8tbj0llgTO9yp8xEbnQ,7561
|
11
|
-
checkpointer-2.0.0.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
|
12
|
-
checkpointer-2.0.0.dist-info/licenses/LICENSE,sha256=0cmUKqBotzbBcysIexd52AhjwbphhlGYiWbvg5l2QAU,1054
|
13
|
-
checkpointer-2.0.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|