checkpointer 1.2.0__tar.gz → 2.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,270 @@
1
+ Metadata-Version: 2.3
2
+ Name: checkpointer
3
+ Version: 2.0.1
4
+ Summary: A Python library for memoizing function results with support for multiple storage backends, async runtimes, and automatic cache invalidation
5
+ Project-URL: Repository, https://github.com/Reddan/checkpointer.git
6
+ Author: Hampus Hallman
7
+ License: Copyright 2024 Hampus Hallman
8
+
9
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
14
+ Requires-Python: >=3.12
15
+ Requires-Dist: relib
16
+ Description-Content-Type: text/markdown
17
+
18
+ # checkpointer · [![License](https://img.shields.io/badge/license-MIT-blue)](https://github.com/Reddan/checkpointer/blob/master/LICENSE) [![pypi](https://img.shields.io/pypi/v/checkpointer)](https://pypi.org/project/checkpointer/) [![Python 3.12](https://img.shields.io/badge/python-3.12-blue)](https://pypi.org/project/checkpointer/)
19
+
20
+ `checkpointer` is a Python library for memoizing function results. It simplifies caching by providing a decorator-based API and supports various storage backends. It's designed for computationally expensive operations where caching can save time, or during development to avoid waiting for redundant computations. ⚡️
21
+
22
+ Adding or removing `@checkpoint` doesn't change how your code works, and it can be applied to any function, including ones you've already written, without altering their behavior or introducing side effects. The original function remains unchanged and can still be called directly when needed.
23
+
24
+ ### Key Features:
25
+ - 🗂️ **Multiple Storage Backends**: Supports in-memory, pickle, or your own custom storage.
26
+ - 🎯 **Simple Decorator API**: Apply `@checkpoint` to functions.
27
+ - 🔄 **Async and Sync Compatibility**: Works with synchronous functions and any Python async runtime (e.g., `asyncio`, `Trio`, `Curio`).
28
+ - ⏲️ **Custom Expiration Logic**: Automatically invalidate old checkpoints.
29
+ - 📂 **Flexible Path Configuration**: Control where checkpoints are stored.
30
+
31
+ ---
32
+
33
+ ## Installation
34
+
35
+ ```bash
36
+ pip install checkpointer
37
+ ```
38
+
39
+ ---
40
+
41
+ ## Quick Start 🚀
42
+
43
+ ```python
44
+ from checkpointer import checkpoint
45
+
46
+ @checkpoint
47
+ def expensive_function(x: int) -> int:
48
+ print("Computing...")
49
+ return x ** 2
50
+
51
+ result = expensive_function(4) # Computes and stores result
52
+ result = expensive_function(4) # Loads from checkpoint
53
+ ```
54
+
55
+ ---
56
+
57
+ ## How It Works
58
+
59
+ When you use `@checkpoint`, the function's **arguments** (`args`, `kwargs`) are hashed to create a unique identifier for each call. This identifier is used to store and retrieve cached results. If the same arguments are passed again, `checkpointer` will return the cached result instead of recomputing.
60
+
61
+ Additionally, `checkpointer` ensures that caches are invalidated when a function’s implementation or any of its dependencies change. Each function is assigned a hash based on:
62
+ 1. **Its source code**: Changes to the function’s code update its hash.
63
+ 2. **Dependent functions**: If a function calls others, changes to those will also update the hash.
64
+
65
+ ### Example: Cache Invalidation by Function Dependencies
66
+
67
+ ```python
68
+ def multiply(a, b):
69
+ return a * b
70
+
71
+ @checkpoint
72
+ def helper(x):
73
+ return multiply(x + 1, 2)
74
+
75
+ @checkpoint
76
+ def compute(a, b):
77
+ return helper(a) + helper(b)
78
+ ```
79
+
80
+ If you change `multiply`, the checkpoints for both `helper` and `compute` will be invalidated and recomputed.
81
+
82
+ ---
83
+
84
+ ## Parameterization
85
+
86
+ ### Global Configuration
87
+
88
+ You can configure a custom `Checkpointer`:
89
+
90
+ ```python
91
+ from checkpointer import checkpoint
92
+
93
+ checkpoint = checkpoint(format="memory", root_path="/tmp/checkpoints")
94
+ ```
95
+
96
+ Extend this configuration by calling itself again:
97
+
98
+ ```python
99
+ extended_checkpoint = checkpoint(format="pickle", verbosity=0)
100
+ ```
101
+
102
+ ### Per-Function Customization
103
+
104
+ ```python
105
+ @checkpoint(format="pickle", verbosity=0)
106
+ def my_function(x, y):
107
+ return x + y
108
+ ```
109
+
110
+ ### Combining Configurations
111
+
112
+ ```python
113
+ checkpoint = checkpoint(format="memory", verbosity=1)
114
+ quiet_checkpoint = checkpoint(verbosity=0)
115
+ pickle_checkpoint = checkpoint(format="pickle", root_path="/tmp/pickle_checkpoints")
116
+
117
+ @checkpoint
118
+ def compute_square(n: int) -> int:
119
+ return n ** 2
120
+
121
+ @quiet_checkpoint
122
+ def compute_quietly(n: int) -> int:
123
+ return n ** 3
124
+
125
+ @pickle_checkpoint
126
+ def compute_sum(a: int, b: int) -> int:
127
+ return a + b
128
+ ```
129
+
130
+ ### Layered Caching
131
+
132
+ ```python
133
+ IS_DEVELOPMENT = True # Toggle based on environment
134
+
135
+ dev_checkpoint = checkpoint(when=IS_DEVELOPMENT)
136
+
137
+ @checkpoint(format="memory")
138
+ @dev_checkpoint
139
+ def some_expensive_function():
140
+ print("Performing a time-consuming operation...")
141
+ return sum(i * i for i in range(10**6))
142
+ ```
143
+
144
+ - In development: Both `dev_checkpoint` and `memory` caches are active.
145
+ - In production: Only the `memory` cache is active.
146
+
147
+ ---
148
+
149
+ ## Usage
150
+
151
+ ### Force Recalculation
152
+ Use `rerun` to force a recalculation and overwrite the stored checkpoint:
153
+
154
+ ```python
155
+ result = expensive_function.rerun(4)
156
+ ```
157
+
158
+ ### Bypass Checkpointer
159
+ Use `fn` to directly call the original, undecorated function:
160
+
161
+ ```python
162
+ result = expensive_function.fn(4)
163
+ ```
164
+
165
+ This is especially useful **inside recursive functions**. By using `.fn` within the function itself, you avoid redundant caching of intermediate recursive calls while still caching the final result at the top level.
166
+
167
+ ### Retrieve Stored Checkpoints
168
+ Access stored results without recalculating:
169
+
170
+ ```python
171
+ stored_result = expensive_function.get(4)
172
+ ```
173
+
174
+ ---
175
+
176
+ ## Storage Backends
177
+
178
+ `checkpointer` supports flexible storage backends, including built-in options and custom implementations.
179
+
180
+ ### Built-In Backends
181
+
182
+ 1. **PickleStorage**: Saves checkpoints to disk using Python's `pickle` module.
183
+ 2. **MemoryStorage**: Caches checkpoints in memory for fast, non-persistent use.
184
+
185
+ To use these backends, pass either `"pickle"` or `PickleStorage` (and similarly for `"memory"` or `MemoryStorage`) to the `format` parameter:
186
+ ```python
187
+ from checkpointer import checkpoint, PickleStorage, MemoryStorage
188
+
189
+ @checkpoint(format="pickle") # Equivalent to format=PickleStorage
190
+ def disk_cached(x: int) -> int:
191
+ return x ** 2
192
+
193
+ @checkpoint(format="memory") # Equivalent to format=MemoryStorage
194
+ def memory_cached(x: int) -> int:
195
+ return x * 10
196
+ ```
197
+
198
+ ### Custom Storage Backends
199
+
200
+ Create custom storage backends by implementing methods for storing, loading, and managing checkpoints. For example, a custom storage backend might use a database, cloud storage, or a specialized format.
201
+
202
+ Example usage:
203
+ ```python
204
+ from checkpointer import checkpoint, Storage
205
+ from typing import Any
206
+ from pathlib import Path
207
+ from datetime import datetime
208
+
209
+ class CustomStorage(Storage): # Optional for type hinting
210
+ @staticmethod
211
+ def exists(path: Path) -> bool: ...
212
+ @staticmethod
213
+ def checkpoint_date(path: Path) -> datetime: ...
214
+ @staticmethod
215
+ def store(path: Path, data: Any) -> None: ...
216
+ @staticmethod
217
+ def load(path: Path) -> Any: ...
218
+ @staticmethod
219
+ def delete(path: Path) -> None: ...
220
+
221
+ @checkpoint(format=CustomStorage)
222
+ def custom_cached(x: int):
223
+ return x ** 2
224
+ ```
225
+
226
+ This flexibility allows you to adapt `checkpointer` to meet any storage requirement, whether persistent or in-memory.
227
+
228
+ ---
229
+
230
+ ## Configuration Options ⚙️
231
+
232
+ | Option | Type | Default | Description |
233
+ |----------------|-------------------------------------|-------------|---------------------------------------------|
234
+ | `format` | `"pickle"`, `"memory"`, `Storage` | `"pickle"` | Storage backend format. |
235
+ | `root_path` | `Path`, `str`, or `None` | User Cache | Root directory for storing checkpoints. |
236
+ | `when` | `bool` | `True` | Enable or disable checkpointing. |
237
+ | `verbosity` | `0` or `1` | `1` | Logging verbosity. |
238
+ | `path` | `Callable[..., str]` | `None` | Custom path for checkpoint storage. |
239
+ | `should_expire`| `Callable[[datetime], bool]` | `None` | Custom expiration logic. |
240
+
241
+ ---
242
+
243
+ ## Full Example 🛠️
244
+
245
+ ```python
246
+ import asyncio
247
+ from checkpointer import checkpoint
248
+
249
+ @checkpoint
250
+ def compute_square(n: int) -> int:
251
+ print(f"Computing {n}^2...")
252
+ return n ** 2
253
+
254
+ @checkpoint(format="memory")
255
+ async def async_compute_sum(a: int, b: int) -> int:
256
+ await asyncio.sleep(1)
257
+ return a + b
258
+
259
+ async def main():
260
+ result1 = compute_square(5)
261
+ print(result1)
262
+
263
+ result2 = await async_compute_sum(3, 7)
264
+ print(result2)
265
+
266
+ result3 = async_compute_sum.get(3, 7)
267
+ print(result3)
268
+
269
+ asyncio.run(main())
270
+ ```
@@ -0,0 +1,253 @@
1
+ # checkpointer · [![License](https://img.shields.io/badge/license-MIT-blue)](https://github.com/Reddan/checkpointer/blob/master/LICENSE) [![pypi](https://img.shields.io/pypi/v/checkpointer)](https://pypi.org/project/checkpointer/) [![Python 3.12](https://img.shields.io/badge/python-3.12-blue)](https://pypi.org/project/checkpointer/)
2
+
3
+ `checkpointer` is a Python library for memoizing function results. It simplifies caching by providing a decorator-based API and supports various storage backends. It's designed for computationally expensive operations where caching can save time, or during development to avoid waiting for redundant computations. ⚡️
4
+
5
+ Adding or removing `@checkpoint` doesn't change how your code works, and it can be applied to any function, including ones you've already written, without altering their behavior or introducing side effects. The original function remains unchanged and can still be called directly when needed.
6
+
7
+ ### Key Features:
8
+ - 🗂️ **Multiple Storage Backends**: Supports in-memory, pickle, or your own custom storage.
9
+ - 🎯 **Simple Decorator API**: Apply `@checkpoint` to functions.
10
+ - 🔄 **Async and Sync Compatibility**: Works with synchronous functions and any Python async runtime (e.g., `asyncio`, `Trio`, `Curio`).
11
+ - ⏲️ **Custom Expiration Logic**: Automatically invalidate old checkpoints.
12
+ - 📂 **Flexible Path Configuration**: Control where checkpoints are stored.
13
+
14
+ ---
15
+
16
+ ## Installation
17
+
18
+ ```bash
19
+ pip install checkpointer
20
+ ```
21
+
22
+ ---
23
+
24
+ ## Quick Start 🚀
25
+
26
+ ```python
27
+ from checkpointer import checkpoint
28
+
29
+ @checkpoint
30
+ def expensive_function(x: int) -> int:
31
+ print("Computing...")
32
+ return x ** 2
33
+
34
+ result = expensive_function(4) # Computes and stores result
35
+ result = expensive_function(4) # Loads from checkpoint
36
+ ```
37
+
38
+ ---
39
+
40
+ ## How It Works
41
+
42
+ When you use `@checkpoint`, the function's **arguments** (`args`, `kwargs`) are hashed to create a unique identifier for each call. This identifier is used to store and retrieve cached results. If the same arguments are passed again, `checkpointer` will return the cached result instead of recomputing.
43
+
44
+ Additionally, `checkpointer` ensures that caches are invalidated when a function’s implementation or any of its dependencies change. Each function is assigned a hash based on:
45
+ 1. **Its source code**: Changes to the function’s code update its hash.
46
+ 2. **Dependent functions**: If a function calls others, changes to those will also update the hash.
47
+
48
+ ### Example: Cache Invalidation by Function Dependencies
49
+
50
+ ```python
51
+ def multiply(a, b):
52
+ return a * b
53
+
54
+ @checkpoint
55
+ def helper(x):
56
+ return multiply(x + 1, 2)
57
+
58
+ @checkpoint
59
+ def compute(a, b):
60
+ return helper(a) + helper(b)
61
+ ```
62
+
63
+ If you change `multiply`, the checkpoints for both `helper` and `compute` will be invalidated and recomputed.
64
+
65
+ ---
66
+
67
+ ## Parameterization
68
+
69
+ ### Global Configuration
70
+
71
+ You can configure a custom `Checkpointer`:
72
+
73
+ ```python
74
+ from checkpointer import checkpoint
75
+
76
+ checkpoint = checkpoint(format="memory", root_path="/tmp/checkpoints")
77
+ ```
78
+
79
+ Extend this configuration by calling itself again:
80
+
81
+ ```python
82
+ extended_checkpoint = checkpoint(format="pickle", verbosity=0)
83
+ ```
84
+
85
+ ### Per-Function Customization
86
+
87
+ ```python
88
+ @checkpoint(format="pickle", verbosity=0)
89
+ def my_function(x, y):
90
+ return x + y
91
+ ```
92
+
93
+ ### Combining Configurations
94
+
95
+ ```python
96
+ checkpoint = checkpoint(format="memory", verbosity=1)
97
+ quiet_checkpoint = checkpoint(verbosity=0)
98
+ pickle_checkpoint = checkpoint(format="pickle", root_path="/tmp/pickle_checkpoints")
99
+
100
+ @checkpoint
101
+ def compute_square(n: int) -> int:
102
+ return n ** 2
103
+
104
+ @quiet_checkpoint
105
+ def compute_quietly(n: int) -> int:
106
+ return n ** 3
107
+
108
+ @pickle_checkpoint
109
+ def compute_sum(a: int, b: int) -> int:
110
+ return a + b
111
+ ```
112
+
113
+ ### Layered Caching
114
+
115
+ ```python
116
+ IS_DEVELOPMENT = True # Toggle based on environment
117
+
118
+ dev_checkpoint = checkpoint(when=IS_DEVELOPMENT)
119
+
120
+ @checkpoint(format="memory")
121
+ @dev_checkpoint
122
+ def some_expensive_function():
123
+ print("Performing a time-consuming operation...")
124
+ return sum(i * i for i in range(10**6))
125
+ ```
126
+
127
+ - In development: Both `dev_checkpoint` and `memory` caches are active.
128
+ - In production: Only the `memory` cache is active.
129
+
130
+ ---
131
+
132
+ ## Usage
133
+
134
+ ### Force Recalculation
135
+ Use `rerun` to force a recalculation and overwrite the stored checkpoint:
136
+
137
+ ```python
138
+ result = expensive_function.rerun(4)
139
+ ```
140
+
141
+ ### Bypass Checkpointer
142
+ Use `fn` to directly call the original, undecorated function:
143
+
144
+ ```python
145
+ result = expensive_function.fn(4)
146
+ ```
147
+
148
+ This is especially useful **inside recursive functions**. By using `.fn` within the function itself, you avoid redundant caching of intermediate recursive calls while still caching the final result at the top level.
149
+
150
+ ### Retrieve Stored Checkpoints
151
+ Access stored results without recalculating:
152
+
153
+ ```python
154
+ stored_result = expensive_function.get(4)
155
+ ```
156
+
157
+ ---
158
+
159
+ ## Storage Backends
160
+
161
+ `checkpointer` supports flexible storage backends, including built-in options and custom implementations.
162
+
163
+ ### Built-In Backends
164
+
165
+ 1. **PickleStorage**: Saves checkpoints to disk using Python's `pickle` module.
166
+ 2. **MemoryStorage**: Caches checkpoints in memory for fast, non-persistent use.
167
+
168
+ To use these backends, pass either `"pickle"` or `PickleStorage` (and similarly for `"memory"` or `MemoryStorage`) to the `format` parameter:
169
+ ```python
170
+ from checkpointer import checkpoint, PickleStorage, MemoryStorage
171
+
172
+ @checkpoint(format="pickle") # Equivalent to format=PickleStorage
173
+ def disk_cached(x: int) -> int:
174
+ return x ** 2
175
+
176
+ @checkpoint(format="memory") # Equivalent to format=MemoryStorage
177
+ def memory_cached(x: int) -> int:
178
+ return x * 10
179
+ ```
180
+
181
+ ### Custom Storage Backends
182
+
183
+ Create custom storage backends by implementing methods for storing, loading, and managing checkpoints. For example, a custom storage backend might use a database, cloud storage, or a specialized format.
184
+
185
+ Example usage:
186
+ ```python
187
+ from checkpointer import checkpoint, Storage
188
+ from typing import Any
189
+ from pathlib import Path
190
+ from datetime import datetime
191
+
192
+ class CustomStorage(Storage): # Optional for type hinting
193
+ @staticmethod
194
+ def exists(path: Path) -> bool: ...
195
+ @staticmethod
196
+ def checkpoint_date(path: Path) -> datetime: ...
197
+ @staticmethod
198
+ def store(path: Path, data: Any) -> None: ...
199
+ @staticmethod
200
+ def load(path: Path) -> Any: ...
201
+ @staticmethod
202
+ def delete(path: Path) -> None: ...
203
+
204
+ @checkpoint(format=CustomStorage)
205
+ def custom_cached(x: int):
206
+ return x ** 2
207
+ ```
208
+
209
+ This flexibility allows you to adapt `checkpointer` to meet any storage requirement, whether persistent or in-memory.
210
+
211
+ ---
212
+
213
+ ## Configuration Options ⚙️
214
+
215
+ | Option | Type | Default | Description |
216
+ |----------------|-------------------------------------|-------------|---------------------------------------------|
217
+ | `format` | `"pickle"`, `"memory"`, `Storage` | `"pickle"` | Storage backend format. |
218
+ | `root_path` | `Path`, `str`, or `None` | User Cache | Root directory for storing checkpoints. |
219
+ | `when` | `bool` | `True` | Enable or disable checkpointing. |
220
+ | `verbosity` | `0` or `1` | `1` | Logging verbosity. |
221
+ | `path` | `Callable[..., str]` | `None` | Custom path for checkpoint storage. |
222
+ | `should_expire`| `Callable[[datetime], bool]` | `None` | Custom expiration logic. |
223
+
224
+ ---
225
+
226
+ ## Full Example 🛠️
227
+
228
+ ```python
229
+ import asyncio
230
+ from checkpointer import checkpoint
231
+
232
+ @checkpoint
233
+ def compute_square(n: int) -> int:
234
+ print(f"Computing {n}^2...")
235
+ return n ** 2
236
+
237
+ @checkpoint(format="memory")
238
+ async def async_compute_sum(a: int, b: int) -> int:
239
+ await asyncio.sleep(1)
240
+ return a + b
241
+
242
+ async def main():
243
+ result1 = compute_square(5)
244
+ print(result1)
245
+
246
+ result2 = await async_compute_sum(3, 7)
247
+ print(result2)
248
+
249
+ result3 = async_compute_sum.get(3, 7)
250
+ print(result3)
251
+
252
+ asyncio.run(main())
253
+ ```
@@ -0,0 +1,9 @@
1
+ from .checkpoint import Checkpointer, CheckpointFn, CheckpointError
2
+ from .types import Storage
3
+ from .function_body import get_function_hash
4
+ import tempfile
5
+
6
+ create_checkpointer = Checkpointer
7
+ checkpoint = Checkpointer()
8
+ memory_checkpoint = Checkpointer(format="memory")
9
+ tmp_checkpoint = Checkpointer(root_path=tempfile.gettempdir() + "/checkpoints")
@@ -0,0 +1,114 @@
1
+ import inspect
2
+ import relib.hashing as hashing
3
+ from typing import Generic, TypeVar, TypedDict, Callable, Unpack, Literal, Union, Any, cast, overload
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+ from functools import update_wrapper
7
+ from .types import Storage
8
+ from .function_body import get_function_hash
9
+ from .utils import unwrap_fn, sync_resolve_coroutine
10
+ from .storages.pickle_storage import PickleStorage
11
+ from .storages.memory_storage import MemoryStorage
12
+ from .storages.bcolz_storage import BcolzStorage
13
+ from .print_checkpoint import print_checkpoint
14
+
15
+ Fn = TypeVar("Fn", bound=Callable)
16
+
17
+ DEFAULT_DIR = Path.home() / ".cache/checkpoints"
18
+ STORAGE_MAP = {"memory": MemoryStorage, "pickle": PickleStorage, "bcolz": BcolzStorage}
19
+
20
+ class CheckpointError(Exception):
21
+ pass
22
+
23
+ class CheckpointerOpts(TypedDict, total=False):
24
+ format: Storage | Literal["pickle", "memory", "bcolz"]
25
+ root_path: Path | str | None
26
+ when: bool
27
+ verbosity: Literal[0, 1]
28
+ path: Callable[..., str] | None
29
+ should_expire: Callable[[datetime], bool] | None
30
+
31
+ class Checkpointer:
32
+ def __init__(self, **opts: Unpack[CheckpointerOpts]):
33
+ self.format = opts.get("format", "pickle")
34
+ self.root_path = Path(opts.get("root_path", DEFAULT_DIR) or ".")
35
+ self.when = opts.get("when", True)
36
+ self.verbosity = opts.get("verbosity", 1)
37
+ self.path = opts.get("path")
38
+ self.should_expire = opts.get("should_expire")
39
+
40
+ def get_storage(self) -> Storage:
41
+ return STORAGE_MAP[self.format] if isinstance(self.format, str) else self.format
42
+
43
+ @overload
44
+ def __call__(self, fn: Fn, **override_opts: Unpack[CheckpointerOpts]) -> "CheckpointFn[Fn]": ...
45
+ @overload
46
+ def __call__(self, fn: None=None, **override_opts: Unpack[CheckpointerOpts]) -> "Checkpointer": ...
47
+ def __call__(self, fn: Fn | None=None, **override_opts: Unpack[CheckpointerOpts]) -> Union["Checkpointer", "CheckpointFn[Fn]"]:
48
+ if override_opts:
49
+ opts = CheckpointerOpts(**{**self.__dict__, **override_opts})
50
+ return Checkpointer(**opts)(fn)
51
+
52
+ return CheckpointFn(self, fn) if callable(fn) else self
53
+
54
+ class CheckpointFn(Generic[Fn]):
55
+ def __init__(self, checkpointer: Checkpointer, fn: Fn):
56
+ wrapped = unwrap_fn(fn)
57
+ file_name = Path(wrapped.__code__.co_filename).name
58
+ update_wrapper(cast(Callable, self), wrapped)
59
+ self.checkpointer = checkpointer
60
+ self.fn = fn
61
+ self.fn_hash = get_function_hash(wrapped)
62
+ self.fn_id = f"{file_name}/{wrapped.__name__}"
63
+ self.is_async = inspect.iscoroutinefunction(wrapped)
64
+
65
+ def get_checkpoint_id(self, args: tuple, kw: dict) -> str:
66
+ if not callable(self.checkpointer.path):
67
+ return f"{self.fn_id}/{hashing.hash([self.fn_hash, args, kw or 0])}"
68
+ checkpoint_id = self.checkpointer.path(*args, **kw)
69
+ if not isinstance(checkpoint_id, str):
70
+ raise CheckpointError(f"path function must return a string, got {type(checkpoint_id)}")
71
+ return checkpoint_id
72
+
73
+ async def _store_on_demand(self, args: tuple, kw: dict, rerun: bool):
74
+ checkpoint_id = self.get_checkpoint_id(args, kw)
75
+ checkpoint_path = self.checkpointer.root_path / checkpoint_id
76
+ storage = self.checkpointer.get_storage()
77
+ should_log = storage is not MemoryStorage and self.checkpointer.verbosity > 0
78
+ refresh = rerun \
79
+ or not storage.exists(checkpoint_path) \
80
+ or (self.checkpointer.should_expire and self.checkpointer.should_expire(storage.checkpoint_date(checkpoint_path)))
81
+
82
+ if refresh:
83
+ print_checkpoint(should_log, "MEMORIZING", checkpoint_id, "blue")
84
+ data = self.fn(*args, **kw)
85
+ if inspect.iscoroutine(data):
86
+ data = await data
87
+ storage.store(checkpoint_path, data)
88
+ return data
89
+
90
+ try:
91
+ data = storage.load(checkpoint_path)
92
+ print_checkpoint(should_log, "REMEMBERED", checkpoint_id, "green")
93
+ return data
94
+ except (EOFError, FileNotFoundError):
95
+ print_checkpoint(should_log, "CORRUPTED", checkpoint_id, "yellow")
96
+ storage.delete(checkpoint_path)
97
+ return await self._store_on_demand(args, kw, rerun)
98
+
99
+ def _call(self, args: tuple, kw: dict, rerun=False):
100
+ if not self.checkpointer.when:
101
+ return self.fn(*args, **kw)
102
+ coroutine = self._store_on_demand(args, kw, rerun)
103
+ return coroutine if self.is_async else sync_resolve_coroutine(coroutine)
104
+
105
+ __call__: Fn = cast(Fn, lambda self, *args, **kw: self._call(args, kw))
106
+ rerun: Fn = cast(Fn, lambda self, *args, **kw: self._call(args, kw, True))
107
+
108
+ def get(self, *args, **kw) -> Any:
109
+ checkpoint_path = self.checkpointer.root_path / self.get_checkpoint_id(args, kw)
110
+ storage = self.checkpointer.get_storage()
111
+ try:
112
+ return storage.load(checkpoint_path)
113
+ except:
114
+ raise CheckpointError("Could not load checkpoint")
@@ -0,0 +1,46 @@
1
+ import inspect
2
+ import relib.hashing as hashing
3
+ from collections.abc import Callable
4
+ from types import FunctionType, CodeType
5
+ from pathlib import Path
6
+ from .utils import unwrap_fn
7
+
8
+ cwd = Path.cwd()
9
+
10
+ def get_fn_path(fn: Callable) -> Path:
11
+ return Path(inspect.getfile(fn)).resolve()
12
+
13
+ def get_function_body(fn: Callable) -> str:
14
+ # TODO: Strip comments
15
+ lines = inspect.getsourcelines(fn)[0]
16
+ lines = [line.rstrip() for line in lines]
17
+ lines = [line for line in lines if line]
18
+ return "\n".join(lines)
19
+
20
+ def get_code_children(code: CodeType) -> list[str]:
21
+ consts = [const for const in code.co_consts if isinstance(const, CodeType)]
22
+ children = [child for const in consts for child in get_code_children(const)]
23
+ return list(code.co_names) + children
24
+
25
+ def is_user_fn(candidate_fn, cleared_fns: set[Callable]) -> bool:
26
+ return isinstance(candidate_fn, FunctionType) \
27
+ and candidate_fn not in cleared_fns \
28
+ and cwd in get_fn_path(candidate_fn).parents
29
+
30
+ def append_fn_children(cleared_fns: set[Callable], fn: Callable) -> None:
31
+ code_children = get_code_children(fn.__code__)
32
+ fn_children = [unwrap_fn(fn.__globals__.get(co_name, None)) for co_name in code_children]
33
+ fn_children = [child for child in fn_children if is_user_fn(child, cleared_fns)]
34
+ cleared_fns.update(fn_children)
35
+ for child_fn in fn_children:
36
+ append_fn_children(cleared_fns, child_fn)
37
+
38
+ def get_fn_children(fn: Callable) -> list[Callable]:
39
+ cleared_fns: set[Callable] = set()
40
+ append_fn_children(cleared_fns, fn)
41
+ return sorted(cleared_fns, key=lambda fn: fn.__name__)
42
+
43
+ def get_function_hash(fn: Callable) -> str:
44
+ fns = [fn] + get_fn_children(fn)
45
+ fn_bodies = list(map(get_function_body, fns))
46
+ return hashing.hash(fn_bodies)