checkpointer 2.0.0__tar.gz → 2.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {checkpointer-2.0.0 → checkpointer-2.0.2}/PKG-INFO +99 -79
- checkpointer-2.0.2/README.md +215 -0
- checkpointer-2.0.2/checkpointer/__init__.py +9 -0
- {checkpointer-2.0.0 → checkpointer-2.0.2}/checkpointer/checkpoint.py +34 -45
- {checkpointer-2.0.0 → checkpointer-2.0.2}/checkpointer/print_checkpoint.py +1 -1
- {checkpointer-2.0.0 → checkpointer-2.0.2}/checkpointer/storages/bcolz_storage.py +10 -27
- checkpointer-2.0.2/checkpointer/storages/memory_storage.py +25 -0
- checkpointer-2.0.2/checkpointer/storages/pickle_storage.py +31 -0
- checkpointer-2.0.2/checkpointer/types.py +23 -0
- {checkpointer-2.0.0 → checkpointer-2.0.2}/pyproject.toml +1 -1
- {checkpointer-2.0.0 → checkpointer-2.0.2}/uv.lock +1 -1
- checkpointer-2.0.0/README.md +0 -195
- checkpointer-2.0.0/checkpointer/__init__.py +0 -9
- checkpointer-2.0.0/checkpointer/storages/memory_storage.py +0 -29
- checkpointer-2.0.0/checkpointer/storages/pickle_storage.py +0 -55
- checkpointer-2.0.0/checkpointer/types.py +0 -19
- {checkpointer-2.0.0 → checkpointer-2.0.2}/.gitignore +0 -0
- {checkpointer-2.0.0 → checkpointer-2.0.2}/LICENSE +0 -0
- {checkpointer-2.0.0 → checkpointer-2.0.2}/checkpointer/function_body.py +0 -0
- {checkpointer-2.0.0 → checkpointer-2.0.2}/checkpointer/utils.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: checkpointer
|
3
|
-
Version: 2.0.
|
3
|
+
Version: 2.0.2
|
4
4
|
Summary: A Python library for memoizing function results with support for multiple storage backends, async runtimes, and automatic cache invalidation
|
5
5
|
Project-URL: Repository, https://github.com/Reddan/checkpointer.git
|
6
6
|
Author: Hampus Hallman
|
@@ -17,41 +17,16 @@ Description-Content-Type: text/markdown
|
|
17
17
|
|
18
18
|
# checkpointer · [](https://github.com/Reddan/checkpointer/blob/master/LICENSE) [](https://pypi.org/project/checkpointer/) [](https://pypi.org/project/checkpointer/)
|
19
19
|
|
20
|
-
`checkpointer` is a Python library for memoizing function results. It
|
20
|
+
`checkpointer` is a Python library for memoizing function results. It provides a decorator-based API with support for multiple storage backends. Use it for computationally expensive operations where caching can save time, or during development to avoid waiting for redundant computations.
|
21
21
|
|
22
22
|
Adding or removing `@checkpoint` doesn't change how your code works, and it can be applied to any function, including ones you've already written, without altering their behavior or introducing side effects. The original function remains unchanged and can still be called directly when needed.
|
23
23
|
|
24
24
|
### Key Features:
|
25
|
-
- **Multiple Storage Backends**:
|
26
|
-
- **Simple Decorator API**: Apply `@checkpoint` to functions.
|
27
|
-
- **Async and Sync Compatibility**: Works with synchronous functions and any Python async runtime (e.g., `asyncio`, `Trio`, `Curio`).
|
28
|
-
- **Custom Expiration Logic**: Automatically invalidate old checkpoints.
|
29
|
-
- **Flexible Path Configuration**: Control where checkpoints are stored.
|
30
|
-
|
31
|
-
### How It Works
|
32
|
-
|
33
|
-
When you use `@checkpoint`, the function's **arguments** (`args`, `kwargs`) are hashed to create a unique identifier for each call. This identifier is used to store and retrieve cached results. If the same arguments are passed again, `checkpointer` will return the cached result instead of recomputing.
|
34
|
-
|
35
|
-
Additionally, `checkpointer` ensures that caches are invalidated when a function’s implementation or any of its dependencies change. Each function is assigned a hash based on:
|
36
|
-
1. **Its source code**: Changes to the function’s code update its hash.
|
37
|
-
2. **Dependent functions**: If a function calls others, changes to those will also update the hash.
|
38
|
-
|
39
|
-
### Example: Cache Invalidation by Function Dependencies
|
40
|
-
|
41
|
-
```python
|
42
|
-
def multiply(a, b):
|
43
|
-
return a * b
|
44
|
-
|
45
|
-
@checkpoint
|
46
|
-
def helper(x):
|
47
|
-
return multiply(x + 1, 2)
|
48
|
-
|
49
|
-
@checkpoint
|
50
|
-
def compute(a, b):
|
51
|
-
return helper(a) + helper(b)
|
52
|
-
```
|
53
|
-
|
54
|
-
If you change `multiply`, the checkpoints for both `helper` and `compute` will be invalidated and recomputed.
|
25
|
+
- 🗂️ **Multiple Storage Backends**: Built-in support for in-memory and pickle-based storage, or create your own.
|
26
|
+
- 🎯 **Simple Decorator API**: Apply `@checkpoint` to functions without boilerplate.
|
27
|
+
- 🔄 **Async and Sync Compatibility**: Works with synchronous functions and any Python async runtime (e.g., `asyncio`, `Trio`, `Curio`).
|
28
|
+
- ⏲️ **Custom Expiration Logic**: Automatically invalidate old checkpoints.
|
29
|
+
- 📂 **Flexible Path Configuration**: Control where checkpoints are stored.
|
55
30
|
|
56
31
|
---
|
57
32
|
|
@@ -63,7 +38,7 @@ pip install checkpointer
|
|
63
38
|
|
64
39
|
---
|
65
40
|
|
66
|
-
## Quick Start
|
41
|
+
## Quick Start 🚀
|
67
42
|
|
68
43
|
```python
|
69
44
|
from checkpointer import checkpoint
|
@@ -73,95 +48,91 @@ def expensive_function(x: int) -> int:
|
|
73
48
|
print("Computing...")
|
74
49
|
return x ** 2
|
75
50
|
|
76
|
-
result = expensive_function(4) # Computes and stores result
|
77
|
-
result = expensive_function(4) # Loads from
|
51
|
+
result = expensive_function(4) # Computes and stores the result
|
52
|
+
result = expensive_function(4) # Loads from the cache
|
78
53
|
```
|
79
54
|
|
80
55
|
---
|
81
56
|
|
82
|
-
##
|
57
|
+
## How It Works
|
58
|
+
|
59
|
+
When you use `@checkpoint`, the function's **arguments** (`args`, `kwargs`) are hashed to create a unique identifier for each call. This identifier is used to store and retrieve cached results. If the same arguments are passed again, `checkpointer` loads the cached result instead of recomputing.
|
83
60
|
|
84
|
-
|
61
|
+
Additionally, `checkpointer` ensures that caches are invalidated when a function's implementation or any of its dependencies change. Each function is assigned a hash based on:
|
62
|
+
1. **Its source code**: Changes to the function's code update its hash.
|
63
|
+
2. **Dependent functions**: If a function calls others, changes in those dependencies will also update the hash.
|
85
64
|
|
86
|
-
|
65
|
+
### Example: Cache Invalidation
|
87
66
|
|
88
67
|
```python
|
89
|
-
|
68
|
+
def multiply(a, b):
|
69
|
+
return a * b
|
90
70
|
|
91
|
-
checkpoint
|
71
|
+
@checkpoint
|
72
|
+
def helper(x):
|
73
|
+
return multiply(x + 1, 2)
|
74
|
+
|
75
|
+
@checkpoint
|
76
|
+
def compute(a, b):
|
77
|
+
return helper(a) + helper(b)
|
92
78
|
```
|
93
79
|
|
94
|
-
|
80
|
+
If you modify `multiply`, caches for both `helper` and `compute` are invalidated and recomputed.
|
95
81
|
|
96
|
-
|
97
|
-
extended_checkpoint = checkpoint(format="pickle", verbosity=0)
|
98
|
-
```
|
82
|
+
---
|
99
83
|
|
100
|
-
|
84
|
+
## Parameterization
|
101
85
|
|
102
|
-
|
103
|
-
@checkpoint(format="pickle", verbosity=0)
|
104
|
-
def my_function(x, y):
|
105
|
-
return x + y
|
106
|
-
```
|
86
|
+
### Custom Configuration
|
107
87
|
|
108
|
-
|
88
|
+
Set up a `Checkpointer` instance with custom settings, and extend it by calling itself with overrides:
|
109
89
|
|
110
90
|
```python
|
111
|
-
|
112
|
-
quiet_checkpoint = checkpoint(verbosity=0)
|
113
|
-
pickle_checkpoint = checkpoint(format="pickle", root_path="/tmp/pickle_checkpoints")
|
114
|
-
|
115
|
-
@checkpoint
|
116
|
-
def compute_square(n: int) -> int:
|
117
|
-
return n ** 2
|
91
|
+
from checkpointer import checkpoint
|
118
92
|
|
119
|
-
|
120
|
-
def compute_quietly(n: int) -> int:
|
121
|
-
return n ** 3
|
93
|
+
IS_DEVELOPMENT = True # Toggle based on your environment
|
122
94
|
|
123
|
-
|
124
|
-
|
125
|
-
return a + b
|
95
|
+
tmp_checkpoint = checkpoint(root_path="/tmp/checkpoints")
|
96
|
+
dev_checkpoint = tmp_checkpoint(when=IS_DEVELOPMENT) # Adds development-specific behavior
|
126
97
|
```
|
127
98
|
|
128
|
-
### Layered Caching
|
99
|
+
### Per-Function Customization & Layered Caching
|
129
100
|
|
130
|
-
|
131
|
-
IS_DEVELOPMENT = True # Toggle based on environment
|
101
|
+
Layer caches by stacking checkpoints:
|
132
102
|
|
133
|
-
|
134
|
-
|
135
|
-
@
|
136
|
-
@dev_checkpoint
|
103
|
+
```python
|
104
|
+
@checkpoint(format="memory") # Always use memory storage
|
105
|
+
@dev_checkpoint # Adds caching during development
|
137
106
|
def some_expensive_function():
|
138
107
|
print("Performing a time-consuming operation...")
|
139
108
|
return sum(i * i for i in range(10**6))
|
140
109
|
```
|
141
110
|
|
142
|
-
- In development
|
143
|
-
- In production
|
111
|
+
- **In development**: Both `dev_checkpoint` and `memory` caches are active.
|
112
|
+
- **In production**: Only the `memory` cache is active.
|
144
113
|
|
145
114
|
---
|
146
115
|
|
147
116
|
## Usage
|
148
117
|
|
149
118
|
### Force Recalculation
|
150
|
-
|
119
|
+
Force a recalculation and overwrite the stored checkpoint:
|
151
120
|
|
152
121
|
```python
|
153
122
|
result = expensive_function.rerun(4)
|
154
123
|
```
|
155
124
|
|
156
|
-
###
|
125
|
+
### Call the Original Function
|
157
126
|
Use `fn` to directly call the original, undecorated function:
|
158
127
|
|
159
128
|
```python
|
160
129
|
result = expensive_function.fn(4)
|
161
130
|
```
|
162
131
|
|
132
|
+
This is especially useful **inside recursive functions** to avoid redundant caching of intermediate steps while still caching the final result.
|
133
|
+
|
163
134
|
### Retrieve Stored Checkpoints
|
164
|
-
Access
|
135
|
+
Access cached results without recalculating:
|
165
136
|
|
166
137
|
```python
|
167
138
|
stored_result = expensive_function.get(4)
|
@@ -169,7 +140,56 @@ stored_result = expensive_function.get(4)
|
|
169
140
|
|
170
141
|
---
|
171
142
|
|
172
|
-
##
|
143
|
+
## Storage Backends
|
144
|
+
|
145
|
+
`checkpointer` works with both built-in and custom storage backends, so you can use what's provided or roll your own as needed.
|
146
|
+
|
147
|
+
### Built-In Backends
|
148
|
+
|
149
|
+
1. **PickleStorage**: Stores checkpoints on disk using Python's `pickle`.
|
150
|
+
2. **MemoryStorage**: Keeps checkpoints in memory for non-persistent, fast caching.
|
151
|
+
|
152
|
+
You can specify a storage backend using either its name (`"pickle"` or `"memory"`) or its corresponding class (`PickleStorage` or `MemoryStorage`) in the `format` parameter:
|
153
|
+
|
154
|
+
```python
|
155
|
+
from checkpointer import checkpoint, PickleStorage, MemoryStorage
|
156
|
+
|
157
|
+
@checkpoint(format="pickle") # Equivalent to format=PickleStorage
|
158
|
+
def disk_cached(x: int) -> int:
|
159
|
+
return x ** 2
|
160
|
+
|
161
|
+
@checkpoint(format="memory") # Equivalent to format=MemoryStorage
|
162
|
+
def memory_cached(x: int) -> int:
|
163
|
+
return x * 10
|
164
|
+
```
|
165
|
+
|
166
|
+
### Custom Storage Backends
|
167
|
+
|
168
|
+
Create a custom storage backend by inheriting from the `Storage` class and implementing its methods. Access configuration options through the `self.checkpointer` attribute, an instance of `Checkpointer`.
|
169
|
+
|
170
|
+
#### Example: Custom Storage Backend
|
171
|
+
|
172
|
+
```python
|
173
|
+
from checkpointer import checkpoint, Storage
|
174
|
+
from datetime import datetime
|
175
|
+
|
176
|
+
class CustomStorage(Storage):
|
177
|
+
def exists(self, path) -> bool: ... # Check if a checkpoint exists at the given path
|
178
|
+
def checkpoint_date(self, path) -> datetime: ... # Return the date the checkpoint was created
|
179
|
+
def store(self, path, data): ... # Save the checkpoint data
|
180
|
+
def load(self, path): ... # Return the checkpoint data
|
181
|
+
def delete(self, path): ... # Delete the checkpoint
|
182
|
+
|
183
|
+
@checkpoint(format=CustomStorage)
|
184
|
+
def custom_cached(x: int):
|
185
|
+
return x ** 2
|
186
|
+
```
|
187
|
+
|
188
|
+
Using a custom backend lets you tailor storage to your application, whether it involves databases, cloud storage, or custom file formats.
|
189
|
+
|
190
|
+
---
|
191
|
+
|
192
|
+
## Configuration Options ⚙️
|
173
193
|
|
174
194
|
| Option | Type | Default | Description |
|
175
195
|
|----------------|-------------------------------------|-------------|---------------------------------------------|
|
@@ -177,12 +197,12 @@ stored_result = expensive_function.get(4)
|
|
177
197
|
| `root_path` | `Path`, `str`, or `None` | User Cache | Root directory for storing checkpoints. |
|
178
198
|
| `when` | `bool` | `True` | Enable or disable checkpointing. |
|
179
199
|
| `verbosity` | `0` or `1` | `1` | Logging verbosity. |
|
180
|
-
| `path` | `
|
200
|
+
| `path` | `Callable[..., str]` | `None` | Custom path for checkpoint storage. |
|
181
201
|
| `should_expire`| `Callable[[datetime], bool]` | `None` | Custom expiration logic. |
|
182
202
|
|
183
203
|
---
|
184
204
|
|
185
|
-
## Full Example
|
205
|
+
## Full Example 🛠️
|
186
206
|
|
187
207
|
```python
|
188
208
|
import asyncio
|
@@ -0,0 +1,215 @@
|
|
1
|
+
# checkpointer · [](https://github.com/Reddan/checkpointer/blob/master/LICENSE) [](https://pypi.org/project/checkpointer/) [](https://pypi.org/project/checkpointer/)
|
2
|
+
|
3
|
+
`checkpointer` is a Python library for memoizing function results. It provides a decorator-based API with support for multiple storage backends. Use it for computationally expensive operations where caching can save time, or during development to avoid waiting for redundant computations.
|
4
|
+
|
5
|
+
Adding or removing `@checkpoint` doesn't change how your code works, and it can be applied to any function, including ones you've already written, without altering their behavior or introducing side effects. The original function remains unchanged and can still be called directly when needed.
|
6
|
+
|
7
|
+
### Key Features:
|
8
|
+
- 🗂️ **Multiple Storage Backends**: Built-in support for in-memory and pickle-based storage, or create your own.
|
9
|
+
- 🎯 **Simple Decorator API**: Apply `@checkpoint` to functions without boilerplate.
|
10
|
+
- 🔄 **Async and Sync Compatibility**: Works with synchronous functions and any Python async runtime (e.g., `asyncio`, `Trio`, `Curio`).
|
11
|
+
- ⏲️ **Custom Expiration Logic**: Automatically invalidate old checkpoints.
|
12
|
+
- 📂 **Flexible Path Configuration**: Control where checkpoints are stored.
|
13
|
+
|
14
|
+
---
|
15
|
+
|
16
|
+
## Installation
|
17
|
+
|
18
|
+
```bash
|
19
|
+
pip install checkpointer
|
20
|
+
```
|
21
|
+
|
22
|
+
---
|
23
|
+
|
24
|
+
## Quick Start 🚀
|
25
|
+
|
26
|
+
```python
|
27
|
+
from checkpointer import checkpoint
|
28
|
+
|
29
|
+
@checkpoint
|
30
|
+
def expensive_function(x: int) -> int:
|
31
|
+
print("Computing...")
|
32
|
+
return x ** 2
|
33
|
+
|
34
|
+
result = expensive_function(4) # Computes and stores the result
|
35
|
+
result = expensive_function(4) # Loads from the cache
|
36
|
+
```
|
37
|
+
|
38
|
+
---
|
39
|
+
|
40
|
+
## How It Works
|
41
|
+
|
42
|
+
When you use `@checkpoint`, the function's **arguments** (`args`, `kwargs`) are hashed to create a unique identifier for each call. This identifier is used to store and retrieve cached results. If the same arguments are passed again, `checkpointer` loads the cached result instead of recomputing.
|
43
|
+
|
44
|
+
Additionally, `checkpointer` ensures that caches are invalidated when a function's implementation or any of its dependencies change. Each function is assigned a hash based on:
|
45
|
+
1. **Its source code**: Changes to the function's code update its hash.
|
46
|
+
2. **Dependent functions**: If a function calls others, changes in those dependencies will also update the hash.
|
47
|
+
|
48
|
+
### Example: Cache Invalidation
|
49
|
+
|
50
|
+
```python
|
51
|
+
def multiply(a, b):
|
52
|
+
return a * b
|
53
|
+
|
54
|
+
@checkpoint
|
55
|
+
def helper(x):
|
56
|
+
return multiply(x + 1, 2)
|
57
|
+
|
58
|
+
@checkpoint
|
59
|
+
def compute(a, b):
|
60
|
+
return helper(a) + helper(b)
|
61
|
+
```
|
62
|
+
|
63
|
+
If you modify `multiply`, caches for both `helper` and `compute` are invalidated and recomputed.
|
64
|
+
|
65
|
+
---
|
66
|
+
|
67
|
+
## Parameterization
|
68
|
+
|
69
|
+
### Custom Configuration
|
70
|
+
|
71
|
+
Set up a `Checkpointer` instance with custom settings, and extend it by calling itself with overrides:
|
72
|
+
|
73
|
+
```python
|
74
|
+
from checkpointer import checkpoint
|
75
|
+
|
76
|
+
IS_DEVELOPMENT = True # Toggle based on your environment
|
77
|
+
|
78
|
+
tmp_checkpoint = checkpoint(root_path="/tmp/checkpoints")
|
79
|
+
dev_checkpoint = tmp_checkpoint(when=IS_DEVELOPMENT) # Adds development-specific behavior
|
80
|
+
```
|
81
|
+
|
82
|
+
### Per-Function Customization & Layered Caching
|
83
|
+
|
84
|
+
Layer caches by stacking checkpoints:
|
85
|
+
|
86
|
+
```python
|
87
|
+
@checkpoint(format="memory") # Always use memory storage
|
88
|
+
@dev_checkpoint # Adds caching during development
|
89
|
+
def some_expensive_function():
|
90
|
+
print("Performing a time-consuming operation...")
|
91
|
+
return sum(i * i for i in range(10**6))
|
92
|
+
```
|
93
|
+
|
94
|
+
- **In development**: Both `dev_checkpoint` and `memory` caches are active.
|
95
|
+
- **In production**: Only the `memory` cache is active.
|
96
|
+
|
97
|
+
---
|
98
|
+
|
99
|
+
## Usage
|
100
|
+
|
101
|
+
### Force Recalculation
|
102
|
+
Force a recalculation and overwrite the stored checkpoint:
|
103
|
+
|
104
|
+
```python
|
105
|
+
result = expensive_function.rerun(4)
|
106
|
+
```
|
107
|
+
|
108
|
+
### Call the Original Function
|
109
|
+
Use `fn` to directly call the original, undecorated function:
|
110
|
+
|
111
|
+
```python
|
112
|
+
result = expensive_function.fn(4)
|
113
|
+
```
|
114
|
+
|
115
|
+
This is especially useful **inside recursive functions** to avoid redundant caching of intermediate steps while still caching the final result.
|
116
|
+
|
117
|
+
### Retrieve Stored Checkpoints
|
118
|
+
Access cached results without recalculating:
|
119
|
+
|
120
|
+
```python
|
121
|
+
stored_result = expensive_function.get(4)
|
122
|
+
```
|
123
|
+
|
124
|
+
---
|
125
|
+
|
126
|
+
## Storage Backends
|
127
|
+
|
128
|
+
`checkpointer` works with both built-in and custom storage backends, so you can use what's provided or roll your own as needed.
|
129
|
+
|
130
|
+
### Built-In Backends
|
131
|
+
|
132
|
+
1. **PickleStorage**: Stores checkpoints on disk using Python's `pickle`.
|
133
|
+
2. **MemoryStorage**: Keeps checkpoints in memory for non-persistent, fast caching.
|
134
|
+
|
135
|
+
You can specify a storage backend using either its name (`"pickle"` or `"memory"`) or its corresponding class (`PickleStorage` or `MemoryStorage`) in the `format` parameter:
|
136
|
+
|
137
|
+
```python
|
138
|
+
from checkpointer import checkpoint, PickleStorage, MemoryStorage
|
139
|
+
|
140
|
+
@checkpoint(format="pickle") # Equivalent to format=PickleStorage
|
141
|
+
def disk_cached(x: int) -> int:
|
142
|
+
return x ** 2
|
143
|
+
|
144
|
+
@checkpoint(format="memory") # Equivalent to format=MemoryStorage
|
145
|
+
def memory_cached(x: int) -> int:
|
146
|
+
return x * 10
|
147
|
+
```
|
148
|
+
|
149
|
+
### Custom Storage Backends
|
150
|
+
|
151
|
+
Create a custom storage backend by inheriting from the `Storage` class and implementing its methods. Access configuration options through the `self.checkpointer` attribute, an instance of `Checkpointer`.
|
152
|
+
|
153
|
+
#### Example: Custom Storage Backend
|
154
|
+
|
155
|
+
```python
|
156
|
+
from checkpointer import checkpoint, Storage
|
157
|
+
from datetime import datetime
|
158
|
+
|
159
|
+
class CustomStorage(Storage):
|
160
|
+
def exists(self, path) -> bool: ... # Check if a checkpoint exists at the given path
|
161
|
+
def checkpoint_date(self, path) -> datetime: ... # Return the date the checkpoint was created
|
162
|
+
def store(self, path, data): ... # Save the checkpoint data
|
163
|
+
def load(self, path): ... # Return the checkpoint data
|
164
|
+
def delete(self, path): ... # Delete the checkpoint
|
165
|
+
|
166
|
+
@checkpoint(format=CustomStorage)
|
167
|
+
def custom_cached(x: int):
|
168
|
+
return x ** 2
|
169
|
+
```
|
170
|
+
|
171
|
+
Using a custom backend lets you tailor storage to your application, whether it involves databases, cloud storage, or custom file formats.
|
172
|
+
|
173
|
+
---
|
174
|
+
|
175
|
+
## Configuration Options ⚙️
|
176
|
+
|
177
|
+
| Option | Type | Default | Description |
|
178
|
+
|----------------|-------------------------------------|-------------|---------------------------------------------|
|
179
|
+
| `format` | `"pickle"`, `"memory"`, `Storage` | `"pickle"` | Storage backend format. |
|
180
|
+
| `root_path` | `Path`, `str`, or `None` | User Cache | Root directory for storing checkpoints. |
|
181
|
+
| `when` | `bool` | `True` | Enable or disable checkpointing. |
|
182
|
+
| `verbosity` | `0` or `1` | `1` | Logging verbosity. |
|
183
|
+
| `path` | `Callable[..., str]` | `None` | Custom path for checkpoint storage. |
|
184
|
+
| `should_expire`| `Callable[[datetime], bool]` | `None` | Custom expiration logic. |
|
185
|
+
|
186
|
+
---
|
187
|
+
|
188
|
+
## Full Example 🛠️
|
189
|
+
|
190
|
+
```python
|
191
|
+
import asyncio
|
192
|
+
from checkpointer import checkpoint
|
193
|
+
|
194
|
+
@checkpoint
|
195
|
+
def compute_square(n: int) -> int:
|
196
|
+
print(f"Computing {n}^2...")
|
197
|
+
return n ** 2
|
198
|
+
|
199
|
+
@checkpoint(format="memory")
|
200
|
+
async def async_compute_sum(a: int, b: int) -> int:
|
201
|
+
await asyncio.sleep(1)
|
202
|
+
return a + b
|
203
|
+
|
204
|
+
async def main():
|
205
|
+
result1 = compute_square(5)
|
206
|
+
print(result1)
|
207
|
+
|
208
|
+
result2 = await async_compute_sum(3, 7)
|
209
|
+
print(result2)
|
210
|
+
|
211
|
+
result3 = async_compute_sum.get(3, 7)
|
212
|
+
print(result3)
|
213
|
+
|
214
|
+
asyncio.run(main())
|
215
|
+
```
|
@@ -0,0 +1,9 @@
|
|
1
|
+
from .checkpoint import Checkpointer, CheckpointFn, CheckpointError
|
2
|
+
from .types import Storage
|
3
|
+
from .function_body import get_function_hash
|
4
|
+
import tempfile
|
5
|
+
|
6
|
+
create_checkpointer = Checkpointer
|
7
|
+
checkpoint = Checkpointer()
|
8
|
+
memory_checkpoint = Checkpointer(format="memory", verbosity=0)
|
9
|
+
tmp_checkpoint = Checkpointer(root_path=tempfile.gettempdir() + "/checkpoints")
|
@@ -1,9 +1,9 @@
|
|
1
|
+
from __future__ import annotations
|
1
2
|
import inspect
|
2
3
|
import relib.hashing as hashing
|
3
|
-
from typing import Generic, TypeVar, TypedDict, Unpack, Literal,
|
4
|
-
from collections.abc import Callable
|
5
|
-
from datetime import datetime
|
4
|
+
from typing import Generic, TypeVar, Type, TypedDict, Callable, Unpack, Literal, Any, cast, overload
|
6
5
|
from pathlib import Path
|
6
|
+
from datetime import datetime
|
7
7
|
from functools import update_wrapper
|
8
8
|
from .types import Storage
|
9
9
|
from .function_body import get_function_hash
|
@@ -16,25 +16,18 @@ from .print_checkpoint import print_checkpoint
|
|
16
16
|
Fn = TypeVar("Fn", bound=Callable)
|
17
17
|
|
18
18
|
DEFAULT_DIR = Path.home() / ".cache/checkpoints"
|
19
|
-
STORAGE_MAP = {"memory": MemoryStorage, "pickle": PickleStorage, "bcolz": BcolzStorage}
|
19
|
+
STORAGE_MAP: dict[str, Type[Storage]] = {"memory": MemoryStorage, "pickle": PickleStorage, "bcolz": BcolzStorage}
|
20
20
|
|
21
21
|
class CheckpointError(Exception):
|
22
22
|
pass
|
23
23
|
|
24
|
-
class CheckpointReadFail(CheckpointError):
|
25
|
-
pass
|
26
|
-
|
27
|
-
StorageType = Literal["pickle", "memory", "bcolz"] | Storage
|
28
|
-
CheckpointPath = str | Callable[..., str] | None
|
29
|
-
ShouldExpire = Callable[[datetime], bool]
|
30
|
-
|
31
24
|
class CheckpointerOpts(TypedDict, total=False):
|
32
|
-
format:
|
25
|
+
format: Type[Storage] | Literal["pickle", "memory", "bcolz"]
|
33
26
|
root_path: Path | str | None
|
34
27
|
when: bool
|
35
28
|
verbosity: Literal[0, 1]
|
36
|
-
path:
|
37
|
-
should_expire:
|
29
|
+
path: Callable[..., str] | None
|
30
|
+
should_expire: Callable[[datetime], bool] | None
|
38
31
|
|
39
32
|
class Checkpointer:
|
40
33
|
def __init__(self, **opts: Unpack[CheckpointerOpts]):
|
@@ -45,14 +38,11 @@ class Checkpointer:
|
|
45
38
|
self.path = opts.get("path")
|
46
39
|
self.should_expire = opts.get("should_expire")
|
47
40
|
|
48
|
-
def get_storage(self) -> Storage:
|
49
|
-
return STORAGE_MAP[self.format] if isinstance(self.format, str) else self.format
|
50
|
-
|
51
41
|
@overload
|
52
|
-
def __call__(self, fn: Fn, **override_opts: Unpack[CheckpointerOpts]) ->
|
42
|
+
def __call__(self, fn: Fn, **override_opts: Unpack[CheckpointerOpts]) -> CheckpointFn[Fn]: ...
|
53
43
|
@overload
|
54
|
-
def __call__(self, fn=None, **override_opts: Unpack[CheckpointerOpts]) ->
|
55
|
-
def __call__(self, fn: Fn | None=None, **override_opts: Unpack[CheckpointerOpts]) ->
|
44
|
+
def __call__(self, fn: None=None, **override_opts: Unpack[CheckpointerOpts]) -> Checkpointer: ...
|
45
|
+
def __call__(self, fn: Fn | None=None, **override_opts: Unpack[CheckpointerOpts]) -> Checkpointer | CheckpointFn[Fn]:
|
56
46
|
if override_opts:
|
57
47
|
opts = CheckpointerOpts(**{**self.__dict__, **override_opts})
|
58
48
|
return Checkpointer(**opts)(fn)
|
@@ -64,52 +54,51 @@ class CheckpointFn(Generic[Fn]):
|
|
64
54
|
wrapped = unwrap_fn(fn)
|
65
55
|
file_name = Path(wrapped.__code__.co_filename).name
|
66
56
|
update_wrapper(cast(Callable, self), wrapped)
|
57
|
+
storage = STORAGE_MAP[checkpointer.format] if isinstance(checkpointer.format, str) else checkpointer.format
|
67
58
|
self.checkpointer = checkpointer
|
68
59
|
self.fn = fn
|
69
60
|
self.fn_hash = get_function_hash(wrapped)
|
70
61
|
self.fn_id = f"{file_name}/{wrapped.__name__}"
|
71
|
-
self.is_async = inspect.iscoroutinefunction(
|
62
|
+
self.is_async = inspect.iscoroutinefunction(wrapped)
|
63
|
+
self.storage = storage(checkpointer)
|
72
64
|
|
73
65
|
def get_checkpoint_id(self, args: tuple, kw: dict) -> str:
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
return f"{self.fn_id}/{hashing.hash([self.fn_hash, args, kw or 0])}"
|
83
|
-
|
84
|
-
async def _store_on_demand(self, args: tuple, kw: dict, force: bool):
|
66
|
+
if not callable(self.checkpointer.path):
|
67
|
+
return f"{self.fn_id}/{hashing.hash([self.fn_hash, args, kw or 0])}"
|
68
|
+
checkpoint_id = self.checkpointer.path(*args, **kw)
|
69
|
+
if not isinstance(checkpoint_id, str):
|
70
|
+
raise CheckpointError(f"path function must return a string, got {type(checkpoint_id)}")
|
71
|
+
return checkpoint_id
|
72
|
+
|
73
|
+
async def _store_on_demand(self, args: tuple, kw: dict, rerun: bool):
|
85
74
|
checkpoint_id = self.get_checkpoint_id(args, kw)
|
86
75
|
checkpoint_path = self.checkpointer.root_path / checkpoint_id
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
or storage.
|
91
|
-
or (self.checkpointer.should_expire and storage.should_expire(checkpoint_path, self.checkpointer.should_expire))
|
76
|
+
should_log = self.checkpointer.verbosity > 0
|
77
|
+
refresh = rerun \
|
78
|
+
or not self.storage.exists(checkpoint_path) \
|
79
|
+
or (self.checkpointer.should_expire and self.checkpointer.should_expire(self.storage.checkpoint_date(checkpoint_path)))
|
92
80
|
|
93
81
|
if refresh:
|
94
82
|
print_checkpoint(should_log, "MEMORIZING", checkpoint_id, "blue")
|
95
83
|
data = self.fn(*args, **kw)
|
96
84
|
if inspect.iscoroutine(data):
|
97
85
|
data = await data
|
98
|
-
|
86
|
+
self.storage.store(checkpoint_path, data)
|
87
|
+
return data
|
99
88
|
|
100
89
|
try:
|
101
|
-
data = storage.
|
90
|
+
data = self.storage.load(checkpoint_path)
|
102
91
|
print_checkpoint(should_log, "REMEMBERED", checkpoint_id, "green")
|
103
92
|
return data
|
104
93
|
except (EOFError, FileNotFoundError):
|
105
94
|
print_checkpoint(should_log, "CORRUPTED", checkpoint_id, "yellow")
|
106
|
-
storage.
|
107
|
-
return await self._store_on_demand(args, kw,
|
95
|
+
self.storage.delete(checkpoint_path)
|
96
|
+
return await self._store_on_demand(args, kw, rerun)
|
108
97
|
|
109
|
-
def _call(self, args: tuple, kw: dict,
|
98
|
+
def _call(self, args: tuple, kw: dict, rerun=False):
|
110
99
|
if not self.checkpointer.when:
|
111
100
|
return self.fn(*args, **kw)
|
112
|
-
coroutine = self._store_on_demand(args, kw,
|
101
|
+
coroutine = self._store_on_demand(args, kw, rerun)
|
113
102
|
return coroutine if self.is_async else sync_resolve_coroutine(coroutine)
|
114
103
|
|
115
104
|
__call__: Fn = cast(Fn, lambda self, *args, **kw: self._call(args, kw))
|
@@ -118,6 +107,6 @@ class CheckpointFn(Generic[Fn]):
|
|
118
107
|
def get(self, *args, **kw) -> Any:
|
119
108
|
checkpoint_path = self.checkpointer.root_path / self.get_checkpoint_id(args, kw)
|
120
109
|
try:
|
121
|
-
return self.
|
110
|
+
return self.storage.load(checkpoint_path)
|
122
111
|
except:
|
123
|
-
raise
|
112
|
+
raise CheckpointError("Could not load checkpoint")
|
@@ -44,7 +44,7 @@ def colored_(text: str, color: Color | None = None, on_color: Color | None = Non
|
|
44
44
|
text = f"\033[{COLOR_MAP[on_color] + 10}m{text}"
|
45
45
|
return text + "\033[0m"
|
46
46
|
|
47
|
-
noop = lambda *
|
47
|
+
noop = lambda text, *a, **k: text
|
48
48
|
colored = colored_ if allow_color() else noop
|
49
49
|
|
50
50
|
def print_checkpoint(should_log: bool, title: str, text: str, color: Color):
|
@@ -18,35 +18,21 @@ def get_data_type_str(x):
|
|
18
18
|
def get_metapath(path: Path):
|
19
19
|
return path.with_name(f"{path.name}_meta")
|
20
20
|
|
21
|
-
def get_collection_timestamp(path: Path):
|
22
|
-
import bcolz
|
23
|
-
metapath = get_metapath(path)
|
24
|
-
meta_data = bcolz.open(metapath)[:][0]
|
25
|
-
return meta_data["created"]
|
26
|
-
|
27
21
|
def insert_data(path: Path, data):
|
28
22
|
import bcolz
|
29
23
|
c = bcolz.carray(data, rootdir=path, mode="w")
|
30
24
|
c.flush()
|
31
25
|
|
32
26
|
class BcolzStorage(Storage):
|
33
|
-
|
34
|
-
|
35
|
-
try:
|
36
|
-
get_collection_timestamp(path)
|
37
|
-
return False
|
38
|
-
except (FileNotFoundError, EOFError):
|
39
|
-
return True
|
27
|
+
def exists(self, path):
|
28
|
+
return path.exists()
|
40
29
|
|
41
|
-
|
42
|
-
|
43
|
-
return expire_fn(get_collection_timestamp(path))
|
30
|
+
def checkpoint_date(self, path):
|
31
|
+
return datetime.fromtimestamp(path.stat().st_mtime)
|
44
32
|
|
45
|
-
|
46
|
-
def store_data(path, data):
|
33
|
+
def store(self, path, data):
|
47
34
|
metapath = get_metapath(path)
|
48
35
|
path.parent.mkdir(parents=True, exist_ok=True)
|
49
|
-
created = datetime.now()
|
50
36
|
data_type_str = get_data_type_str(data)
|
51
37
|
if data_type_str == "tuple":
|
52
38
|
fields = list(range(len(data)))
|
@@ -54,18 +40,16 @@ class BcolzStorage(Storage):
|
|
54
40
|
fields = sorted(data.keys())
|
55
41
|
else:
|
56
42
|
fields = []
|
57
|
-
meta_data = {"
|
43
|
+
meta_data = {"data_type_str": data_type_str, "fields": fields}
|
58
44
|
insert_data(metapath, meta_data)
|
59
45
|
if data_type_str in ["tuple", "dict"]:
|
60
46
|
for i in range(len(fields)):
|
61
47
|
child_path = Path(f"{path} ({i})")
|
62
|
-
|
48
|
+
self.store(child_path, data[fields[i]])
|
63
49
|
else:
|
64
50
|
insert_data(path, data)
|
65
|
-
return data
|
66
51
|
|
67
|
-
|
68
|
-
def load_data(path):
|
52
|
+
def load(self, path):
|
69
53
|
import bcolz
|
70
54
|
metapath = get_metapath(path)
|
71
55
|
meta_data = bcolz.open(metapath)[:][0]
|
@@ -73,7 +57,7 @@ class BcolzStorage(Storage):
|
|
73
57
|
if data_type_str in ["tuple", "dict"]:
|
74
58
|
fields = meta_data["fields"]
|
75
59
|
partitions = range(len(fields))
|
76
|
-
data = [
|
60
|
+
data = [self.load(Path(f"{path} ({i})")) for i in partitions]
|
77
61
|
if data_type_str == "tuple":
|
78
62
|
return tuple(data)
|
79
63
|
else:
|
@@ -87,8 +71,7 @@ class BcolzStorage(Storage):
|
|
87
71
|
else:
|
88
72
|
return data[:]
|
89
73
|
|
90
|
-
|
91
|
-
def delete_data(path):
|
74
|
+
def delete(self, path):
|
92
75
|
# NOTE: Not recursive
|
93
76
|
metapath = get_metapath(path)
|
94
77
|
try:
|
@@ -0,0 +1,25 @@
|
|
1
|
+
from typing import Any
|
2
|
+
from pathlib import Path
|
3
|
+
from datetime import datetime
|
4
|
+
from ..types import Storage
|
5
|
+
|
6
|
+
item_map: dict[str, tuple[datetime, Any]] = {}
|
7
|
+
|
8
|
+
class MemoryStorage(Storage):
|
9
|
+
def get_short_path(self, path: Path):
|
10
|
+
return str(path.relative_to(self.checkpointer.root_path))
|
11
|
+
|
12
|
+
def exists(self, path):
|
13
|
+
return self.get_short_path(path) in item_map
|
14
|
+
|
15
|
+
def checkpoint_date(self, path):
|
16
|
+
return item_map[self.get_short_path(path)][0]
|
17
|
+
|
18
|
+
def store(self, path, data):
|
19
|
+
item_map[self.get_short_path(path)] = (datetime.now(), data)
|
20
|
+
|
21
|
+
def load(self, path):
|
22
|
+
return item_map[self.get_short_path(path)][1]
|
23
|
+
|
24
|
+
def delete(self, path):
|
25
|
+
del item_map[self.get_short_path(path)]
|
@@ -0,0 +1,31 @@
|
|
1
|
+
import pickle
|
2
|
+
from pathlib import Path
|
3
|
+
from datetime import datetime
|
4
|
+
from ..types import Storage
|
5
|
+
|
6
|
+
def get_path(path: Path):
|
7
|
+
return path.with_name(f"{path.name}.pkl")
|
8
|
+
|
9
|
+
class PickleStorage(Storage):
|
10
|
+
def exists(self, path):
|
11
|
+
return get_path(path).exists()
|
12
|
+
|
13
|
+
def checkpoint_date(self, path):
|
14
|
+
return datetime.fromtimestamp(get_path(path).stat().st_mtime)
|
15
|
+
|
16
|
+
def store(self, path, data):
|
17
|
+
full_path = get_path(path)
|
18
|
+
full_path.parent.mkdir(parents=True, exist_ok=True)
|
19
|
+
with full_path.open("wb") as file:
|
20
|
+
pickle.dump(data, file, -1)
|
21
|
+
|
22
|
+
def load(self, path):
|
23
|
+
full_path = get_path(path)
|
24
|
+
with full_path.open("rb") as file:
|
25
|
+
return pickle.load(file)
|
26
|
+
|
27
|
+
def delete(self, path):
|
28
|
+
try:
|
29
|
+
get_path(path).unlink()
|
30
|
+
except FileNotFoundError:
|
31
|
+
pass
|
@@ -0,0 +1,23 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
from typing import Any, TYPE_CHECKING
|
3
|
+
from pathlib import Path
|
4
|
+
from datetime import datetime
|
5
|
+
|
6
|
+
if TYPE_CHECKING:
|
7
|
+
from .checkpoint import Checkpointer
|
8
|
+
|
9
|
+
class Storage:
|
10
|
+
checkpointer: Checkpointer
|
11
|
+
|
12
|
+
def __init__(self, checkpointer: Checkpointer):
|
13
|
+
self.checkpointer = checkpointer
|
14
|
+
|
15
|
+
def exists(self, path: Path) -> bool: ...
|
16
|
+
|
17
|
+
def checkpoint_date(self, path: Path) -> datetime: ...
|
18
|
+
|
19
|
+
def store(self, path: Path, data: Any) -> None: ...
|
20
|
+
|
21
|
+
def load(self, path: Path) -> Any: ...
|
22
|
+
|
23
|
+
def delete(self, path: Path) -> None: ...
|
checkpointer-2.0.0/README.md
DELETED
@@ -1,195 +0,0 @@
|
|
1
|
-
# checkpointer · [](https://github.com/Reddan/checkpointer/blob/master/LICENSE) [](https://pypi.org/project/checkpointer/) [](https://pypi.org/project/checkpointer/)
|
2
|
-
|
3
|
-
`checkpointer` is a Python library for memoizing function results. It simplifies caching by providing a decorator-based API and supports various storage backends. It's designed for computationally expensive operations where caching can save time, or during development to avoid waiting for redundant computations. 🚀
|
4
|
-
|
5
|
-
Adding or removing `@checkpoint` doesn't change how your code works, and it can be applied to any function, including ones you've already written, without altering their behavior or introducing side effects. The original function remains unchanged and can still be called directly when needed.
|
6
|
-
|
7
|
-
### Key Features:
|
8
|
-
- **Multiple Storage Backends**: Supports in-memory, pickle, or your own custom storage.
|
9
|
-
- **Simple Decorator API**: Apply `@checkpoint` to functions.
|
10
|
-
- **Async and Sync Compatibility**: Works with synchronous functions and any Python async runtime (e.g., `asyncio`, `Trio`, `Curio`).
|
11
|
-
- **Custom Expiration Logic**: Automatically invalidate old checkpoints.
|
12
|
-
- **Flexible Path Configuration**: Control where checkpoints are stored.
|
13
|
-
|
14
|
-
### How It Works
|
15
|
-
|
16
|
-
When you use `@checkpoint`, the function's **arguments** (`args`, `kwargs`) are hashed to create a unique identifier for each call. This identifier is used to store and retrieve cached results. If the same arguments are passed again, `checkpointer` will return the cached result instead of recomputing.
|
17
|
-
|
18
|
-
Additionally, `checkpointer` ensures that caches are invalidated when a function’s implementation or any of its dependencies change. Each function is assigned a hash based on:
|
19
|
-
1. **Its source code**: Changes to the function’s code update its hash.
|
20
|
-
2. **Dependent functions**: If a function calls others, changes to those will also update the hash.
|
21
|
-
|
22
|
-
### Example: Cache Invalidation by Function Dependencies
|
23
|
-
|
24
|
-
```python
|
25
|
-
def multiply(a, b):
|
26
|
-
return a * b
|
27
|
-
|
28
|
-
@checkpoint
|
29
|
-
def helper(x):
|
30
|
-
return multiply(x + 1, 2)
|
31
|
-
|
32
|
-
@checkpoint
|
33
|
-
def compute(a, b):
|
34
|
-
return helper(a) + helper(b)
|
35
|
-
```
|
36
|
-
|
37
|
-
If you change `multiply`, the checkpoints for both `helper` and `compute` will be invalidated and recomputed.
|
38
|
-
|
39
|
-
---
|
40
|
-
|
41
|
-
## Installation
|
42
|
-
|
43
|
-
```bash
|
44
|
-
pip install checkpointer
|
45
|
-
```
|
46
|
-
|
47
|
-
---
|
48
|
-
|
49
|
-
## Quick Start
|
50
|
-
|
51
|
-
```python
|
52
|
-
from checkpointer import checkpoint
|
53
|
-
|
54
|
-
@checkpoint
|
55
|
-
def expensive_function(x: int) -> int:
|
56
|
-
print("Computing...")
|
57
|
-
return x ** 2
|
58
|
-
|
59
|
-
result = expensive_function(4) # Computes and stores result
|
60
|
-
result = expensive_function(4) # Loads from checkpoint
|
61
|
-
```
|
62
|
-
|
63
|
-
---
|
64
|
-
|
65
|
-
## Parameterization
|
66
|
-
|
67
|
-
### Global Configuration
|
68
|
-
|
69
|
-
You can configure a custom `Checkpointer`:
|
70
|
-
|
71
|
-
```python
|
72
|
-
from checkpointer import checkpoint
|
73
|
-
|
74
|
-
checkpoint = checkpoint(format="memory", root_path="/tmp/checkpoints")
|
75
|
-
```
|
76
|
-
|
77
|
-
Extend this configuration by calling itself again:
|
78
|
-
|
79
|
-
```python
|
80
|
-
extended_checkpoint = checkpoint(format="pickle", verbosity=0)
|
81
|
-
```
|
82
|
-
|
83
|
-
### Per-Function Customization
|
84
|
-
|
85
|
-
```python
|
86
|
-
@checkpoint(format="pickle", verbosity=0)
|
87
|
-
def my_function(x, y):
|
88
|
-
return x + y
|
89
|
-
```
|
90
|
-
|
91
|
-
### Combining Configurations
|
92
|
-
|
93
|
-
```python
|
94
|
-
checkpoint = checkpoint(format="memory", verbosity=1)
|
95
|
-
quiet_checkpoint = checkpoint(verbosity=0)
|
96
|
-
pickle_checkpoint = checkpoint(format="pickle", root_path="/tmp/pickle_checkpoints")
|
97
|
-
|
98
|
-
@checkpoint
|
99
|
-
def compute_square(n: int) -> int:
|
100
|
-
return n ** 2
|
101
|
-
|
102
|
-
@quiet_checkpoint
|
103
|
-
def compute_quietly(n: int) -> int:
|
104
|
-
return n ** 3
|
105
|
-
|
106
|
-
@pickle_checkpoint
|
107
|
-
def compute_sum(a: int, b: int) -> int:
|
108
|
-
return a + b
|
109
|
-
```
|
110
|
-
|
111
|
-
### Layered Caching
|
112
|
-
|
113
|
-
```python
|
114
|
-
IS_DEVELOPMENT = True # Toggle based on environment
|
115
|
-
|
116
|
-
dev_checkpoint = checkpoint(when=IS_DEVELOPMENT)
|
117
|
-
|
118
|
-
@checkpoint(format="memory")
|
119
|
-
@dev_checkpoint
|
120
|
-
def some_expensive_function():
|
121
|
-
print("Performing a time-consuming operation...")
|
122
|
-
return sum(i * i for i in range(10**6))
|
123
|
-
```
|
124
|
-
|
125
|
-
- In development: Both `dev_checkpoint` and `memory` caches are active.
|
126
|
-
- In production: Only the `memory` cache is active.
|
127
|
-
|
128
|
-
---
|
129
|
-
|
130
|
-
## Usage
|
131
|
-
|
132
|
-
### Force Recalculation
|
133
|
-
Use `rerun` to force a recalculation and overwrite the stored checkpoint:
|
134
|
-
|
135
|
-
```python
|
136
|
-
result = expensive_function.rerun(4)
|
137
|
-
```
|
138
|
-
|
139
|
-
### Bypass Checkpointer
|
140
|
-
Use `fn` to directly call the original, undecorated function:
|
141
|
-
|
142
|
-
```python
|
143
|
-
result = expensive_function.fn(4)
|
144
|
-
```
|
145
|
-
|
146
|
-
### Retrieve Stored Checkpoints
|
147
|
-
Access stored results without recalculating:
|
148
|
-
|
149
|
-
```python
|
150
|
-
stored_result = expensive_function.get(4)
|
151
|
-
```
|
152
|
-
|
153
|
-
---
|
154
|
-
|
155
|
-
## Configuration Options
|
156
|
-
|
157
|
-
| Option | Type | Default | Description |
|
158
|
-
|----------------|-------------------------------------|-------------|---------------------------------------------|
|
159
|
-
| `format` | `"pickle"`, `"memory"`, `Storage` | `"pickle"` | Storage backend format. |
|
160
|
-
| `root_path` | `Path`, `str`, or `None` | User Cache | Root directory for storing checkpoints. |
|
161
|
-
| `when` | `bool` | `True` | Enable or disable checkpointing. |
|
162
|
-
| `verbosity` | `0` or `1` | `1` | Logging verbosity. |
|
163
|
-
| `path` | `str` or `Callable[..., str]` | `None` | Custom path for checkpoint storage. |
|
164
|
-
| `should_expire`| `Callable[[datetime], bool]` | `None` | Custom expiration logic. |
|
165
|
-
|
166
|
-
---
|
167
|
-
|
168
|
-
## Full Example
|
169
|
-
|
170
|
-
```python
|
171
|
-
import asyncio
|
172
|
-
from checkpointer import checkpoint
|
173
|
-
|
174
|
-
@checkpoint
|
175
|
-
def compute_square(n: int) -> int:
|
176
|
-
print(f"Computing {n}^2...")
|
177
|
-
return n ** 2
|
178
|
-
|
179
|
-
@checkpoint(format="memory")
|
180
|
-
async def async_compute_sum(a: int, b: int) -> int:
|
181
|
-
await asyncio.sleep(1)
|
182
|
-
return a + b
|
183
|
-
|
184
|
-
async def main():
|
185
|
-
result1 = compute_square(5)
|
186
|
-
print(result1)
|
187
|
-
|
188
|
-
result2 = await async_compute_sum(3, 7)
|
189
|
-
print(result2)
|
190
|
-
|
191
|
-
result3 = async_compute_sum.get(3, 7)
|
192
|
-
print(result3)
|
193
|
-
|
194
|
-
asyncio.run(main())
|
195
|
-
```
|
@@ -1,9 +0,0 @@
|
|
1
|
-
from .checkpoint import Checkpointer, CheckpointFn
|
2
|
-
from .checkpoint import CheckpointError, CheckpointReadFail
|
3
|
-
from .types import Storage
|
4
|
-
from .function_body import get_function_hash
|
5
|
-
|
6
|
-
create_checkpointer = Checkpointer
|
7
|
-
checkpoint = Checkpointer()
|
8
|
-
memory_checkpoint = Checkpointer(format="memory")
|
9
|
-
tmp_checkpoint = Checkpointer(root_path="/tmp/checkpoints")
|
@@ -1,29 +0,0 @@
|
|
1
|
-
from datetime import datetime
|
2
|
-
from ..types import Storage
|
3
|
-
|
4
|
-
store = {}
|
5
|
-
date_stored = {}
|
6
|
-
|
7
|
-
class MemoryStorage(Storage):
|
8
|
-
@staticmethod
|
9
|
-
def is_expired(path):
|
10
|
-
return path not in store
|
11
|
-
|
12
|
-
@staticmethod
|
13
|
-
def should_expire(path, expire_fn):
|
14
|
-
return expire_fn(date_stored[path])
|
15
|
-
|
16
|
-
@staticmethod
|
17
|
-
def store_data(path, data):
|
18
|
-
store[path] = data
|
19
|
-
date_stored[path] = datetime.now()
|
20
|
-
return data
|
21
|
-
|
22
|
-
@staticmethod
|
23
|
-
def load_data(path):
|
24
|
-
return store[path]
|
25
|
-
|
26
|
-
@staticmethod
|
27
|
-
def delete_data(path):
|
28
|
-
del store[path]
|
29
|
-
del date_stored[path]
|
@@ -1,55 +0,0 @@
|
|
1
|
-
import pickle
|
2
|
-
from pathlib import Path
|
3
|
-
from datetime import datetime
|
4
|
-
from ..types import Storage
|
5
|
-
|
6
|
-
def get_paths(path: Path):
|
7
|
-
meta_full_path = path.with_name(f"{path.name}_meta.pkl")
|
8
|
-
pkl_full_path = path.with_name(f"{path.name}.pkl")
|
9
|
-
return meta_full_path, pkl_full_path
|
10
|
-
|
11
|
-
def get_collection_timestamp(path: Path):
|
12
|
-
meta_full_path, _ = get_paths(path)
|
13
|
-
with meta_full_path.open("rb") as file:
|
14
|
-
meta_data = pickle.load(file)
|
15
|
-
return meta_data["created"]
|
16
|
-
|
17
|
-
class PickleStorage(Storage):
|
18
|
-
@staticmethod
|
19
|
-
def is_expired(path):
|
20
|
-
try:
|
21
|
-
get_collection_timestamp(path)
|
22
|
-
return False
|
23
|
-
except (FileNotFoundError, EOFError):
|
24
|
-
return True
|
25
|
-
|
26
|
-
@staticmethod
|
27
|
-
def should_expire(path, expire_fn):
|
28
|
-
return expire_fn(get_collection_timestamp(path))
|
29
|
-
|
30
|
-
@staticmethod
|
31
|
-
def store_data(path, data):
|
32
|
-
created = datetime.now()
|
33
|
-
meta_data = {"created": created} # TODO: this should just be a JSON or binary dump of the unix timestamp and other metadata - not pickle
|
34
|
-
meta_full_path, pkl_full_path = get_paths(path)
|
35
|
-
pkl_full_path.parent.mkdir(parents=True, exist_ok=True)
|
36
|
-
with pkl_full_path.open("wb") as file:
|
37
|
-
pickle.dump(data, file, -1)
|
38
|
-
with meta_full_path.open("wb") as file:
|
39
|
-
pickle.dump(meta_data, file, -1)
|
40
|
-
return data
|
41
|
-
|
42
|
-
@staticmethod
|
43
|
-
def load_data(path):
|
44
|
-
_, full_path = get_paths(path)
|
45
|
-
with full_path.open("rb") as file:
|
46
|
-
return pickle.load(file)
|
47
|
-
|
48
|
-
@staticmethod
|
49
|
-
def delete_data(path):
|
50
|
-
meta_full_path, pkl_full_path = get_paths(path)
|
51
|
-
try:
|
52
|
-
meta_full_path.unlink()
|
53
|
-
pkl_full_path.unlink()
|
54
|
-
except FileNotFoundError:
|
55
|
-
pass
|
@@ -1,19 +0,0 @@
|
|
1
|
-
from typing import Callable, Protocol, Any
|
2
|
-
from pathlib import Path
|
3
|
-
from datetime import datetime
|
4
|
-
|
5
|
-
class Storage(Protocol):
|
6
|
-
@staticmethod
|
7
|
-
def is_expired(path: Path) -> bool: ...
|
8
|
-
|
9
|
-
@staticmethod
|
10
|
-
def should_expire(path: Path, expire_fn: Callable[[datetime], bool]) -> bool: ...
|
11
|
-
|
12
|
-
@staticmethod
|
13
|
-
def store_data(path: Path, data: Any) -> Any: ...
|
14
|
-
|
15
|
-
@staticmethod
|
16
|
-
def load_data(path: Path) -> Any: ...
|
17
|
-
|
18
|
-
@staticmethod
|
19
|
-
def delete_data(path: Path) -> None: ...
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|