checkpointer 2.0.1__tar.gz → 2.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {checkpointer-2.0.1 → checkpointer-2.0.2}/PKG-INFO +42 -80
- {checkpointer-2.0.1 → checkpointer-2.0.2}/README.md +41 -79
- {checkpointer-2.0.1 → checkpointer-2.0.2}/checkpointer/__init__.py +1 -1
- {checkpointer-2.0.1 → checkpointer-2.0.2}/checkpointer/checkpoint.py +17 -19
- {checkpointer-2.0.1 → checkpointer-2.0.2}/checkpointer/storages/bcolz_storage.py +7 -12
- checkpointer-2.0.2/checkpointer/storages/memory_storage.py +25 -0
- {checkpointer-2.0.1 → checkpointer-2.0.2}/checkpointer/storages/pickle_storage.py +5 -10
- checkpointer-2.0.2/checkpointer/types.py +23 -0
- {checkpointer-2.0.1 → checkpointer-2.0.2}/pyproject.toml +1 -1
- {checkpointer-2.0.1 → checkpointer-2.0.2}/uv.lock +1 -1
- checkpointer-2.0.1/checkpointer/storages/memory_storage.py +0 -28
- checkpointer-2.0.1/checkpointer/types.py +0 -19
- {checkpointer-2.0.1 → checkpointer-2.0.2}/.gitignore +0 -0
- {checkpointer-2.0.1 → checkpointer-2.0.2}/LICENSE +0 -0
- {checkpointer-2.0.1 → checkpointer-2.0.2}/checkpointer/function_body.py +0 -0
- {checkpointer-2.0.1 → checkpointer-2.0.2}/checkpointer/print_checkpoint.py +0 -0
- {checkpointer-2.0.1 → checkpointer-2.0.2}/checkpointer/utils.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: checkpointer
|
3
|
-
Version: 2.0.
|
3
|
+
Version: 2.0.2
|
4
4
|
Summary: A Python library for memoizing function results with support for multiple storage backends, async runtimes, and automatic cache invalidation
|
5
5
|
Project-URL: Repository, https://github.com/Reddan/checkpointer.git
|
6
6
|
Author: Hampus Hallman
|
@@ -17,13 +17,13 @@ Description-Content-Type: text/markdown
|
|
17
17
|
|
18
18
|
# checkpointer · [](https://github.com/Reddan/checkpointer/blob/master/LICENSE) [](https://pypi.org/project/checkpointer/) [](https://pypi.org/project/checkpointer/)
|
19
19
|
|
20
|
-
`checkpointer` is a Python library for memoizing function results. It
|
20
|
+
`checkpointer` is a Python library for memoizing function results. It provides a decorator-based API with support for multiple storage backends. Use it for computationally expensive operations where caching can save time, or during development to avoid waiting for redundant computations.
|
21
21
|
|
22
22
|
Adding or removing `@checkpoint` doesn't change how your code works, and it can be applied to any function, including ones you've already written, without altering their behavior or introducing side effects. The original function remains unchanged and can still be called directly when needed.
|
23
23
|
|
24
24
|
### Key Features:
|
25
|
-
- 🗂️ **Multiple Storage Backends**:
|
26
|
-
- 🎯 **Simple Decorator API**: Apply `@checkpoint` to functions.
|
25
|
+
- 🗂️ **Multiple Storage Backends**: Built-in support for in-memory and pickle-based storage, or create your own.
|
26
|
+
- 🎯 **Simple Decorator API**: Apply `@checkpoint` to functions without boilerplate.
|
27
27
|
- 🔄 **Async and Sync Compatibility**: Works with synchronous functions and any Python async runtime (e.g., `asyncio`, `Trio`, `Curio`).
|
28
28
|
- ⏲️ **Custom Expiration Logic**: Automatically invalidate old checkpoints.
|
29
29
|
- 📂 **Flexible Path Configuration**: Control where checkpoints are stored.
|
@@ -48,21 +48,21 @@ def expensive_function(x: int) -> int:
|
|
48
48
|
print("Computing...")
|
49
49
|
return x ** 2
|
50
50
|
|
51
|
-
result = expensive_function(4) # Computes and stores result
|
52
|
-
result = expensive_function(4) # Loads from
|
51
|
+
result = expensive_function(4) # Computes and stores the result
|
52
|
+
result = expensive_function(4) # Loads from the cache
|
53
53
|
```
|
54
54
|
|
55
55
|
---
|
56
56
|
|
57
57
|
## How It Works
|
58
58
|
|
59
|
-
When you use `@checkpoint`, the function's **arguments** (`args`, `kwargs`) are hashed to create a unique identifier for each call. This identifier is used to store and retrieve cached results. If the same arguments are passed again, `checkpointer`
|
59
|
+
When you use `@checkpoint`, the function's **arguments** (`args`, `kwargs`) are hashed to create a unique identifier for each call. This identifier is used to store and retrieve cached results. If the same arguments are passed again, `checkpointer` loads the cached result instead of recomputing.
|
60
60
|
|
61
|
-
Additionally, `checkpointer` ensures that caches are invalidated when a function
|
62
|
-
1. **Its source code**: Changes to the function
|
63
|
-
2. **Dependent functions**: If a function calls others, changes
|
61
|
+
Additionally, `checkpointer` ensures that caches are invalidated when a function's implementation or any of its dependencies change. Each function is assigned a hash based on:
|
62
|
+
1. **Its source code**: Changes to the function's code update its hash.
|
63
|
+
2. **Dependent functions**: If a function calls others, changes in those dependencies will also update the hash.
|
64
64
|
|
65
|
-
### Example: Cache Invalidation
|
65
|
+
### Example: Cache Invalidation
|
66
66
|
|
67
67
|
```python
|
68
68
|
def multiply(a, b):
|
@@ -77,95 +77,62 @@ def compute(a, b):
|
|
77
77
|
return helper(a) + helper(b)
|
78
78
|
```
|
79
79
|
|
80
|
-
If you
|
80
|
+
If you modify `multiply`, caches for both `helper` and `compute` are invalidated and recomputed.
|
81
81
|
|
82
82
|
---
|
83
83
|
|
84
84
|
## Parameterization
|
85
85
|
|
86
|
-
###
|
86
|
+
### Custom Configuration
|
87
87
|
|
88
|
-
|
88
|
+
Set up a `Checkpointer` instance with custom settings, and extend it by calling itself with overrides:
|
89
89
|
|
90
90
|
```python
|
91
91
|
from checkpointer import checkpoint
|
92
92
|
|
93
|
-
|
94
|
-
```
|
95
|
-
|
96
|
-
Extend this configuration by calling itself again:
|
93
|
+
IS_DEVELOPMENT = True # Toggle based on your environment
|
97
94
|
|
98
|
-
|
99
|
-
|
95
|
+
tmp_checkpoint = checkpoint(root_path="/tmp/checkpoints")
|
96
|
+
dev_checkpoint = tmp_checkpoint(when=IS_DEVELOPMENT) # Adds development-specific behavior
|
100
97
|
```
|
101
98
|
|
102
|
-
### Per-Function Customization
|
103
|
-
|
104
|
-
```python
|
105
|
-
@checkpoint(format="pickle", verbosity=0)
|
106
|
-
def my_function(x, y):
|
107
|
-
return x + y
|
108
|
-
```
|
109
|
-
|
110
|
-
### Combining Configurations
|
111
|
-
|
112
|
-
```python
|
113
|
-
checkpoint = checkpoint(format="memory", verbosity=1)
|
114
|
-
quiet_checkpoint = checkpoint(verbosity=0)
|
115
|
-
pickle_checkpoint = checkpoint(format="pickle", root_path="/tmp/pickle_checkpoints")
|
116
|
-
|
117
|
-
@checkpoint
|
118
|
-
def compute_square(n: int) -> int:
|
119
|
-
return n ** 2
|
120
|
-
|
121
|
-
@quiet_checkpoint
|
122
|
-
def compute_quietly(n: int) -> int:
|
123
|
-
return n ** 3
|
124
|
-
|
125
|
-
@pickle_checkpoint
|
126
|
-
def compute_sum(a: int, b: int) -> int:
|
127
|
-
return a + b
|
128
|
-
```
|
99
|
+
### Per-Function Customization & Layered Caching
|
129
100
|
|
130
|
-
|
101
|
+
Layer caches by stacking checkpoints:
|
131
102
|
|
132
103
|
```python
|
133
|
-
|
134
|
-
|
135
|
-
dev_checkpoint = checkpoint(when=IS_DEVELOPMENT)
|
136
|
-
|
137
|
-
@checkpoint(format="memory")
|
138
|
-
@dev_checkpoint
|
104
|
+
@checkpoint(format="memory") # Always use memory storage
|
105
|
+
@dev_checkpoint # Adds caching during development
|
139
106
|
def some_expensive_function():
|
140
107
|
print("Performing a time-consuming operation...")
|
141
108
|
return sum(i * i for i in range(10**6))
|
142
109
|
```
|
143
110
|
|
144
|
-
- In development
|
145
|
-
- In production
|
111
|
+
- **In development**: Both `dev_checkpoint` and `memory` caches are active.
|
112
|
+
- **In production**: Only the `memory` cache is active.
|
146
113
|
|
147
114
|
---
|
148
115
|
|
149
116
|
## Usage
|
150
117
|
|
151
118
|
### Force Recalculation
|
152
|
-
|
119
|
+
Force a recalculation and overwrite the stored checkpoint:
|
153
120
|
|
154
121
|
```python
|
155
122
|
result = expensive_function.rerun(4)
|
156
123
|
```
|
157
124
|
|
158
|
-
###
|
125
|
+
### Call the Original Function
|
159
126
|
Use `fn` to directly call the original, undecorated function:
|
160
127
|
|
161
128
|
```python
|
162
129
|
result = expensive_function.fn(4)
|
163
130
|
```
|
164
131
|
|
165
|
-
This is especially useful **inside recursive functions
|
132
|
+
This is especially useful **inside recursive functions** to avoid redundant caching of intermediate steps while still caching the final result.
|
166
133
|
|
167
134
|
### Retrieve Stored Checkpoints
|
168
|
-
Access
|
135
|
+
Access cached results without recalculating:
|
169
136
|
|
170
137
|
```python
|
171
138
|
stored_result = expensive_function.get(4)
|
@@ -175,14 +142,15 @@ stored_result = expensive_function.get(4)
|
|
175
142
|
|
176
143
|
## Storage Backends
|
177
144
|
|
178
|
-
`checkpointer`
|
145
|
+
`checkpointer` works with both built-in and custom storage backends, so you can use what's provided or roll your own as needed.
|
179
146
|
|
180
147
|
### Built-In Backends
|
181
148
|
|
182
|
-
1. **PickleStorage**:
|
183
|
-
2. **MemoryStorage**:
|
149
|
+
1. **PickleStorage**: Stores checkpoints on disk using Python's `pickle`.
|
150
|
+
2. **MemoryStorage**: Keeps checkpoints in memory for non-persistent, fast caching.
|
151
|
+
|
152
|
+
You can specify a storage backend using either its name (`"pickle"` or `"memory"`) or its corresponding class (`PickleStorage` or `MemoryStorage`) in the `format` parameter:
|
184
153
|
|
185
|
-
To use these backends, pass either `"pickle"` or `PickleStorage` (and similarly for `"memory"` or `MemoryStorage`) to the `format` parameter:
|
186
154
|
```python
|
187
155
|
from checkpointer import checkpoint, PickleStorage, MemoryStorage
|
188
156
|
|
@@ -197,33 +165,27 @@ def memory_cached(x: int) -> int:
|
|
197
165
|
|
198
166
|
### Custom Storage Backends
|
199
167
|
|
200
|
-
Create custom storage
|
168
|
+
Create a custom storage backend by inheriting from the `Storage` class and implementing its methods. Access configuration options through the `self.checkpointer` attribute, an instance of `Checkpointer`.
|
169
|
+
|
170
|
+
#### Example: Custom Storage Backend
|
201
171
|
|
202
|
-
Example usage:
|
203
172
|
```python
|
204
173
|
from checkpointer import checkpoint, Storage
|
205
|
-
from typing import Any
|
206
|
-
from pathlib import Path
|
207
174
|
from datetime import datetime
|
208
175
|
|
209
|
-
class CustomStorage(Storage):
|
210
|
-
|
211
|
-
def
|
212
|
-
|
213
|
-
def
|
214
|
-
|
215
|
-
def store(path: Path, data: Any) -> None: ...
|
216
|
-
@staticmethod
|
217
|
-
def load(path: Path) -> Any: ...
|
218
|
-
@staticmethod
|
219
|
-
def delete(path: Path) -> None: ...
|
176
|
+
class CustomStorage(Storage):
|
177
|
+
def exists(self, path) -> bool: ... # Check if a checkpoint exists at the given path
|
178
|
+
def checkpoint_date(self, path) -> datetime: ... # Return the date the checkpoint was created
|
179
|
+
def store(self, path, data): ... # Save the checkpoint data
|
180
|
+
def load(self, path): ... # Return the checkpoint data
|
181
|
+
def delete(self, path): ... # Delete the checkpoint
|
220
182
|
|
221
183
|
@checkpoint(format=CustomStorage)
|
222
184
|
def custom_cached(x: int):
|
223
185
|
return x ** 2
|
224
186
|
```
|
225
187
|
|
226
|
-
|
188
|
+
Using a custom backend lets you tailor storage to your application, whether it involves databases, cloud storage, or custom file formats.
|
227
189
|
|
228
190
|
---
|
229
191
|
|
@@ -1,12 +1,12 @@
|
|
1
1
|
# checkpointer · [](https://github.com/Reddan/checkpointer/blob/master/LICENSE) [](https://pypi.org/project/checkpointer/) [](https://pypi.org/project/checkpointer/)
|
2
2
|
|
3
|
-
`checkpointer` is a Python library for memoizing function results. It
|
3
|
+
`checkpointer` is a Python library for memoizing function results. It provides a decorator-based API with support for multiple storage backends. Use it for computationally expensive operations where caching can save time, or during development to avoid waiting for redundant computations.
|
4
4
|
|
5
5
|
Adding or removing `@checkpoint` doesn't change how your code works, and it can be applied to any function, including ones you've already written, without altering their behavior or introducing side effects. The original function remains unchanged and can still be called directly when needed.
|
6
6
|
|
7
7
|
### Key Features:
|
8
|
-
- 🗂️ **Multiple Storage Backends**:
|
9
|
-
- 🎯 **Simple Decorator API**: Apply `@checkpoint` to functions.
|
8
|
+
- 🗂️ **Multiple Storage Backends**: Built-in support for in-memory and pickle-based storage, or create your own.
|
9
|
+
- 🎯 **Simple Decorator API**: Apply `@checkpoint` to functions without boilerplate.
|
10
10
|
- 🔄 **Async and Sync Compatibility**: Works with synchronous functions and any Python async runtime (e.g., `asyncio`, `Trio`, `Curio`).
|
11
11
|
- ⏲️ **Custom Expiration Logic**: Automatically invalidate old checkpoints.
|
12
12
|
- 📂 **Flexible Path Configuration**: Control where checkpoints are stored.
|
@@ -31,21 +31,21 @@ def expensive_function(x: int) -> int:
|
|
31
31
|
print("Computing...")
|
32
32
|
return x ** 2
|
33
33
|
|
34
|
-
result = expensive_function(4) # Computes and stores result
|
35
|
-
result = expensive_function(4) # Loads from
|
34
|
+
result = expensive_function(4) # Computes and stores the result
|
35
|
+
result = expensive_function(4) # Loads from the cache
|
36
36
|
```
|
37
37
|
|
38
38
|
---
|
39
39
|
|
40
40
|
## How It Works
|
41
41
|
|
42
|
-
When you use `@checkpoint`, the function's **arguments** (`args`, `kwargs`) are hashed to create a unique identifier for each call. This identifier is used to store and retrieve cached results. If the same arguments are passed again, `checkpointer`
|
42
|
+
When you use `@checkpoint`, the function's **arguments** (`args`, `kwargs`) are hashed to create a unique identifier for each call. This identifier is used to store and retrieve cached results. If the same arguments are passed again, `checkpointer` loads the cached result instead of recomputing.
|
43
43
|
|
44
|
-
Additionally, `checkpointer` ensures that caches are invalidated when a function
|
45
|
-
1. **Its source code**: Changes to the function
|
46
|
-
2. **Dependent functions**: If a function calls others, changes
|
44
|
+
Additionally, `checkpointer` ensures that caches are invalidated when a function's implementation or any of its dependencies change. Each function is assigned a hash based on:
|
45
|
+
1. **Its source code**: Changes to the function's code update its hash.
|
46
|
+
2. **Dependent functions**: If a function calls others, changes in those dependencies will also update the hash.
|
47
47
|
|
48
|
-
### Example: Cache Invalidation
|
48
|
+
### Example: Cache Invalidation
|
49
49
|
|
50
50
|
```python
|
51
51
|
def multiply(a, b):
|
@@ -60,95 +60,62 @@ def compute(a, b):
|
|
60
60
|
return helper(a) + helper(b)
|
61
61
|
```
|
62
62
|
|
63
|
-
If you
|
63
|
+
If you modify `multiply`, caches for both `helper` and `compute` are invalidated and recomputed.
|
64
64
|
|
65
65
|
---
|
66
66
|
|
67
67
|
## Parameterization
|
68
68
|
|
69
|
-
###
|
69
|
+
### Custom Configuration
|
70
70
|
|
71
|
-
|
71
|
+
Set up a `Checkpointer` instance with custom settings, and extend it by calling itself with overrides:
|
72
72
|
|
73
73
|
```python
|
74
74
|
from checkpointer import checkpoint
|
75
75
|
|
76
|
-
|
77
|
-
```
|
78
|
-
|
79
|
-
Extend this configuration by calling itself again:
|
76
|
+
IS_DEVELOPMENT = True # Toggle based on your environment
|
80
77
|
|
81
|
-
|
82
|
-
|
78
|
+
tmp_checkpoint = checkpoint(root_path="/tmp/checkpoints")
|
79
|
+
dev_checkpoint = tmp_checkpoint(when=IS_DEVELOPMENT) # Adds development-specific behavior
|
83
80
|
```
|
84
81
|
|
85
|
-
### Per-Function Customization
|
86
|
-
|
87
|
-
```python
|
88
|
-
@checkpoint(format="pickle", verbosity=0)
|
89
|
-
def my_function(x, y):
|
90
|
-
return x + y
|
91
|
-
```
|
92
|
-
|
93
|
-
### Combining Configurations
|
94
|
-
|
95
|
-
```python
|
96
|
-
checkpoint = checkpoint(format="memory", verbosity=1)
|
97
|
-
quiet_checkpoint = checkpoint(verbosity=0)
|
98
|
-
pickle_checkpoint = checkpoint(format="pickle", root_path="/tmp/pickle_checkpoints")
|
99
|
-
|
100
|
-
@checkpoint
|
101
|
-
def compute_square(n: int) -> int:
|
102
|
-
return n ** 2
|
103
|
-
|
104
|
-
@quiet_checkpoint
|
105
|
-
def compute_quietly(n: int) -> int:
|
106
|
-
return n ** 3
|
107
|
-
|
108
|
-
@pickle_checkpoint
|
109
|
-
def compute_sum(a: int, b: int) -> int:
|
110
|
-
return a + b
|
111
|
-
```
|
82
|
+
### Per-Function Customization & Layered Caching
|
112
83
|
|
113
|
-
|
84
|
+
Layer caches by stacking checkpoints:
|
114
85
|
|
115
86
|
```python
|
116
|
-
|
117
|
-
|
118
|
-
dev_checkpoint = checkpoint(when=IS_DEVELOPMENT)
|
119
|
-
|
120
|
-
@checkpoint(format="memory")
|
121
|
-
@dev_checkpoint
|
87
|
+
@checkpoint(format="memory") # Always use memory storage
|
88
|
+
@dev_checkpoint # Adds caching during development
|
122
89
|
def some_expensive_function():
|
123
90
|
print("Performing a time-consuming operation...")
|
124
91
|
return sum(i * i for i in range(10**6))
|
125
92
|
```
|
126
93
|
|
127
|
-
- In development
|
128
|
-
- In production
|
94
|
+
- **In development**: Both `dev_checkpoint` and `memory` caches are active.
|
95
|
+
- **In production**: Only the `memory` cache is active.
|
129
96
|
|
130
97
|
---
|
131
98
|
|
132
99
|
## Usage
|
133
100
|
|
134
101
|
### Force Recalculation
|
135
|
-
|
102
|
+
Force a recalculation and overwrite the stored checkpoint:
|
136
103
|
|
137
104
|
```python
|
138
105
|
result = expensive_function.rerun(4)
|
139
106
|
```
|
140
107
|
|
141
|
-
###
|
108
|
+
### Call the Original Function
|
142
109
|
Use `fn` to directly call the original, undecorated function:
|
143
110
|
|
144
111
|
```python
|
145
112
|
result = expensive_function.fn(4)
|
146
113
|
```
|
147
114
|
|
148
|
-
This is especially useful **inside recursive functions
|
115
|
+
This is especially useful **inside recursive functions** to avoid redundant caching of intermediate steps while still caching the final result.
|
149
116
|
|
150
117
|
### Retrieve Stored Checkpoints
|
151
|
-
Access
|
118
|
+
Access cached results without recalculating:
|
152
119
|
|
153
120
|
```python
|
154
121
|
stored_result = expensive_function.get(4)
|
@@ -158,14 +125,15 @@ stored_result = expensive_function.get(4)
|
|
158
125
|
|
159
126
|
## Storage Backends
|
160
127
|
|
161
|
-
`checkpointer`
|
128
|
+
`checkpointer` works with both built-in and custom storage backends, so you can use what's provided or roll your own as needed.
|
162
129
|
|
163
130
|
### Built-In Backends
|
164
131
|
|
165
|
-
1. **PickleStorage**:
|
166
|
-
2. **MemoryStorage**:
|
132
|
+
1. **PickleStorage**: Stores checkpoints on disk using Python's `pickle`.
|
133
|
+
2. **MemoryStorage**: Keeps checkpoints in memory for non-persistent, fast caching.
|
134
|
+
|
135
|
+
You can specify a storage backend using either its name (`"pickle"` or `"memory"`) or its corresponding class (`PickleStorage` or `MemoryStorage`) in the `format` parameter:
|
167
136
|
|
168
|
-
To use these backends, pass either `"pickle"` or `PickleStorage` (and similarly for `"memory"` or `MemoryStorage`) to the `format` parameter:
|
169
137
|
```python
|
170
138
|
from checkpointer import checkpoint, PickleStorage, MemoryStorage
|
171
139
|
|
@@ -180,33 +148,27 @@ def memory_cached(x: int) -> int:
|
|
180
148
|
|
181
149
|
### Custom Storage Backends
|
182
150
|
|
183
|
-
Create custom storage
|
151
|
+
Create a custom storage backend by inheriting from the `Storage` class and implementing its methods. Access configuration options through the `self.checkpointer` attribute, an instance of `Checkpointer`.
|
152
|
+
|
153
|
+
#### Example: Custom Storage Backend
|
184
154
|
|
185
|
-
Example usage:
|
186
155
|
```python
|
187
156
|
from checkpointer import checkpoint, Storage
|
188
|
-
from typing import Any
|
189
|
-
from pathlib import Path
|
190
157
|
from datetime import datetime
|
191
158
|
|
192
|
-
class CustomStorage(Storage):
|
193
|
-
|
194
|
-
def
|
195
|
-
|
196
|
-
def
|
197
|
-
|
198
|
-
def store(path: Path, data: Any) -> None: ...
|
199
|
-
@staticmethod
|
200
|
-
def load(path: Path) -> Any: ...
|
201
|
-
@staticmethod
|
202
|
-
def delete(path: Path) -> None: ...
|
159
|
+
class CustomStorage(Storage):
|
160
|
+
def exists(self, path) -> bool: ... # Check if a checkpoint exists at the given path
|
161
|
+
def checkpoint_date(self, path) -> datetime: ... # Return the date the checkpoint was created
|
162
|
+
def store(self, path, data): ... # Save the checkpoint data
|
163
|
+
def load(self, path): ... # Return the checkpoint data
|
164
|
+
def delete(self, path): ... # Delete the checkpoint
|
203
165
|
|
204
166
|
@checkpoint(format=CustomStorage)
|
205
167
|
def custom_cached(x: int):
|
206
168
|
return x ** 2
|
207
169
|
```
|
208
170
|
|
209
|
-
|
171
|
+
Using a custom backend lets you tailor storage to your application, whether it involves databases, cloud storage, or custom file formats.
|
210
172
|
|
211
173
|
---
|
212
174
|
|
@@ -5,5 +5,5 @@ import tempfile
|
|
5
5
|
|
6
6
|
create_checkpointer = Checkpointer
|
7
7
|
checkpoint = Checkpointer()
|
8
|
-
memory_checkpoint = Checkpointer(format="memory")
|
8
|
+
memory_checkpoint = Checkpointer(format="memory", verbosity=0)
|
9
9
|
tmp_checkpoint = Checkpointer(root_path=tempfile.gettempdir() + "/checkpoints")
|
@@ -1,8 +1,9 @@
|
|
1
|
+
from __future__ import annotations
|
1
2
|
import inspect
|
2
3
|
import relib.hashing as hashing
|
3
|
-
from typing import Generic, TypeVar, TypedDict, Callable, Unpack, Literal,
|
4
|
-
from datetime import datetime
|
4
|
+
from typing import Generic, TypeVar, Type, TypedDict, Callable, Unpack, Literal, Any, cast, overload
|
5
5
|
from pathlib import Path
|
6
|
+
from datetime import datetime
|
6
7
|
from functools import update_wrapper
|
7
8
|
from .types import Storage
|
8
9
|
from .function_body import get_function_hash
|
@@ -15,13 +16,13 @@ from .print_checkpoint import print_checkpoint
|
|
15
16
|
Fn = TypeVar("Fn", bound=Callable)
|
16
17
|
|
17
18
|
DEFAULT_DIR = Path.home() / ".cache/checkpoints"
|
18
|
-
STORAGE_MAP = {"memory": MemoryStorage, "pickle": PickleStorage, "bcolz": BcolzStorage}
|
19
|
+
STORAGE_MAP: dict[str, Type[Storage]] = {"memory": MemoryStorage, "pickle": PickleStorage, "bcolz": BcolzStorage}
|
19
20
|
|
20
21
|
class CheckpointError(Exception):
|
21
22
|
pass
|
22
23
|
|
23
24
|
class CheckpointerOpts(TypedDict, total=False):
|
24
|
-
format: Storage | Literal["pickle", "memory", "bcolz"]
|
25
|
+
format: Type[Storage] | Literal["pickle", "memory", "bcolz"]
|
25
26
|
root_path: Path | str | None
|
26
27
|
when: bool
|
27
28
|
verbosity: Literal[0, 1]
|
@@ -37,14 +38,11 @@ class Checkpointer:
|
|
37
38
|
self.path = opts.get("path")
|
38
39
|
self.should_expire = opts.get("should_expire")
|
39
40
|
|
40
|
-
def get_storage(self) -> Storage:
|
41
|
-
return STORAGE_MAP[self.format] if isinstance(self.format, str) else self.format
|
42
|
-
|
43
41
|
@overload
|
44
|
-
def __call__(self, fn: Fn, **override_opts: Unpack[CheckpointerOpts]) ->
|
42
|
+
def __call__(self, fn: Fn, **override_opts: Unpack[CheckpointerOpts]) -> CheckpointFn[Fn]: ...
|
45
43
|
@overload
|
46
|
-
def __call__(self, fn: None=None, **override_opts: Unpack[CheckpointerOpts]) ->
|
47
|
-
def __call__(self, fn: Fn | None=None, **override_opts: Unpack[CheckpointerOpts]) ->
|
44
|
+
def __call__(self, fn: None=None, **override_opts: Unpack[CheckpointerOpts]) -> Checkpointer: ...
|
45
|
+
def __call__(self, fn: Fn | None=None, **override_opts: Unpack[CheckpointerOpts]) -> Checkpointer | CheckpointFn[Fn]:
|
48
46
|
if override_opts:
|
49
47
|
opts = CheckpointerOpts(**{**self.__dict__, **override_opts})
|
50
48
|
return Checkpointer(**opts)(fn)
|
@@ -56,11 +54,13 @@ class CheckpointFn(Generic[Fn]):
|
|
56
54
|
wrapped = unwrap_fn(fn)
|
57
55
|
file_name = Path(wrapped.__code__.co_filename).name
|
58
56
|
update_wrapper(cast(Callable, self), wrapped)
|
57
|
+
storage = STORAGE_MAP[checkpointer.format] if isinstance(checkpointer.format, str) else checkpointer.format
|
59
58
|
self.checkpointer = checkpointer
|
60
59
|
self.fn = fn
|
61
60
|
self.fn_hash = get_function_hash(wrapped)
|
62
61
|
self.fn_id = f"{file_name}/{wrapped.__name__}"
|
63
62
|
self.is_async = inspect.iscoroutinefunction(wrapped)
|
63
|
+
self.storage = storage(checkpointer)
|
64
64
|
|
65
65
|
def get_checkpoint_id(self, args: tuple, kw: dict) -> str:
|
66
66
|
if not callable(self.checkpointer.path):
|
@@ -73,27 +73,26 @@ class CheckpointFn(Generic[Fn]):
|
|
73
73
|
async def _store_on_demand(self, args: tuple, kw: dict, rerun: bool):
|
74
74
|
checkpoint_id = self.get_checkpoint_id(args, kw)
|
75
75
|
checkpoint_path = self.checkpointer.root_path / checkpoint_id
|
76
|
-
|
77
|
-
should_log = storage is not MemoryStorage and self.checkpointer.verbosity > 0
|
76
|
+
should_log = self.checkpointer.verbosity > 0
|
78
77
|
refresh = rerun \
|
79
|
-
or not storage.exists(checkpoint_path) \
|
80
|
-
or (self.checkpointer.should_expire and self.checkpointer.should_expire(storage.checkpoint_date(checkpoint_path)))
|
78
|
+
or not self.storage.exists(checkpoint_path) \
|
79
|
+
or (self.checkpointer.should_expire and self.checkpointer.should_expire(self.storage.checkpoint_date(checkpoint_path)))
|
81
80
|
|
82
81
|
if refresh:
|
83
82
|
print_checkpoint(should_log, "MEMORIZING", checkpoint_id, "blue")
|
84
83
|
data = self.fn(*args, **kw)
|
85
84
|
if inspect.iscoroutine(data):
|
86
85
|
data = await data
|
87
|
-
storage.store(checkpoint_path, data)
|
86
|
+
self.storage.store(checkpoint_path, data)
|
88
87
|
return data
|
89
88
|
|
90
89
|
try:
|
91
|
-
data = storage.load(checkpoint_path)
|
90
|
+
data = self.storage.load(checkpoint_path)
|
92
91
|
print_checkpoint(should_log, "REMEMBERED", checkpoint_id, "green")
|
93
92
|
return data
|
94
93
|
except (EOFError, FileNotFoundError):
|
95
94
|
print_checkpoint(should_log, "CORRUPTED", checkpoint_id, "yellow")
|
96
|
-
storage.delete(checkpoint_path)
|
95
|
+
self.storage.delete(checkpoint_path)
|
97
96
|
return await self._store_on_demand(args, kw, rerun)
|
98
97
|
|
99
98
|
def _call(self, args: tuple, kw: dict, rerun=False):
|
@@ -107,8 +106,7 @@ class CheckpointFn(Generic[Fn]):
|
|
107
106
|
|
108
107
|
def get(self, *args, **kw) -> Any:
|
109
108
|
checkpoint_path = self.checkpointer.root_path / self.get_checkpoint_id(args, kw)
|
110
|
-
storage = self.checkpointer.get_storage()
|
111
109
|
try:
|
112
|
-
return storage.load(checkpoint_path)
|
110
|
+
return self.storage.load(checkpoint_path)
|
113
111
|
except:
|
114
112
|
raise CheckpointError("Could not load checkpoint")
|
@@ -24,16 +24,13 @@ def insert_data(path: Path, data):
|
|
24
24
|
c.flush()
|
25
25
|
|
26
26
|
class BcolzStorage(Storage):
|
27
|
-
|
28
|
-
def exists(path):
|
27
|
+
def exists(self, path):
|
29
28
|
return path.exists()
|
30
29
|
|
31
|
-
|
32
|
-
def checkpoint_date(path):
|
30
|
+
def checkpoint_date(self, path):
|
33
31
|
return datetime.fromtimestamp(path.stat().st_mtime)
|
34
32
|
|
35
|
-
|
36
|
-
def store(path, data):
|
33
|
+
def store(self, path, data):
|
37
34
|
metapath = get_metapath(path)
|
38
35
|
path.parent.mkdir(parents=True, exist_ok=True)
|
39
36
|
data_type_str = get_data_type_str(data)
|
@@ -48,12 +45,11 @@ class BcolzStorage(Storage):
|
|
48
45
|
if data_type_str in ["tuple", "dict"]:
|
49
46
|
for i in range(len(fields)):
|
50
47
|
child_path = Path(f"{path} ({i})")
|
51
|
-
|
48
|
+
self.store(child_path, data[fields[i]])
|
52
49
|
else:
|
53
50
|
insert_data(path, data)
|
54
51
|
|
55
|
-
|
56
|
-
def load(path):
|
52
|
+
def load(self, path):
|
57
53
|
import bcolz
|
58
54
|
metapath = get_metapath(path)
|
59
55
|
meta_data = bcolz.open(metapath)[:][0]
|
@@ -61,7 +57,7 @@ class BcolzStorage(Storage):
|
|
61
57
|
if data_type_str in ["tuple", "dict"]:
|
62
58
|
fields = meta_data["fields"]
|
63
59
|
partitions = range(len(fields))
|
64
|
-
data = [
|
60
|
+
data = [self.load(Path(f"{path} ({i})")) for i in partitions]
|
65
61
|
if data_type_str == "tuple":
|
66
62
|
return tuple(data)
|
67
63
|
else:
|
@@ -75,8 +71,7 @@ class BcolzStorage(Storage):
|
|
75
71
|
else:
|
76
72
|
return data[:]
|
77
73
|
|
78
|
-
|
79
|
-
def delete(path):
|
74
|
+
def delete(self, path):
|
80
75
|
# NOTE: Not recursive
|
81
76
|
metapath = get_metapath(path)
|
82
77
|
try:
|
@@ -0,0 +1,25 @@
|
|
1
|
+
from typing import Any
|
2
|
+
from pathlib import Path
|
3
|
+
from datetime import datetime
|
4
|
+
from ..types import Storage
|
5
|
+
|
6
|
+
item_map: dict[str, tuple[datetime, Any]] = {}
|
7
|
+
|
8
|
+
class MemoryStorage(Storage):
|
9
|
+
def get_short_path(self, path: Path):
|
10
|
+
return str(path.relative_to(self.checkpointer.root_path))
|
11
|
+
|
12
|
+
def exists(self, path):
|
13
|
+
return self.get_short_path(path) in item_map
|
14
|
+
|
15
|
+
def checkpoint_date(self, path):
|
16
|
+
return item_map[self.get_short_path(path)][0]
|
17
|
+
|
18
|
+
def store(self, path, data):
|
19
|
+
item_map[self.get_short_path(path)] = (datetime.now(), data)
|
20
|
+
|
21
|
+
def load(self, path):
|
22
|
+
return item_map[self.get_short_path(path)][1]
|
23
|
+
|
24
|
+
def delete(self, path):
|
25
|
+
del item_map[self.get_short_path(path)]
|
@@ -7,29 +7,24 @@ def get_path(path: Path):
|
|
7
7
|
return path.with_name(f"{path.name}.pkl")
|
8
8
|
|
9
9
|
class PickleStorage(Storage):
|
10
|
-
|
11
|
-
def exists(path):
|
10
|
+
def exists(self, path):
|
12
11
|
return get_path(path).exists()
|
13
12
|
|
14
|
-
|
15
|
-
def checkpoint_date(path):
|
13
|
+
def checkpoint_date(self, path):
|
16
14
|
return datetime.fromtimestamp(get_path(path).stat().st_mtime)
|
17
15
|
|
18
|
-
|
19
|
-
def store(path, data):
|
16
|
+
def store(self, path, data):
|
20
17
|
full_path = get_path(path)
|
21
18
|
full_path.parent.mkdir(parents=True, exist_ok=True)
|
22
19
|
with full_path.open("wb") as file:
|
23
20
|
pickle.dump(data, file, -1)
|
24
21
|
|
25
|
-
|
26
|
-
def load(path):
|
22
|
+
def load(self, path):
|
27
23
|
full_path = get_path(path)
|
28
24
|
with full_path.open("rb") as file:
|
29
25
|
return pickle.load(file)
|
30
26
|
|
31
|
-
|
32
|
-
def delete(path):
|
27
|
+
def delete(self, path):
|
33
28
|
try:
|
34
29
|
get_path(path).unlink()
|
35
30
|
except FileNotFoundError:
|
@@ -0,0 +1,23 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
from typing import Any, TYPE_CHECKING
|
3
|
+
from pathlib import Path
|
4
|
+
from datetime import datetime
|
5
|
+
|
6
|
+
if TYPE_CHECKING:
|
7
|
+
from .checkpoint import Checkpointer
|
8
|
+
|
9
|
+
class Storage:
|
10
|
+
checkpointer: Checkpointer
|
11
|
+
|
12
|
+
def __init__(self, checkpointer: Checkpointer):
|
13
|
+
self.checkpointer = checkpointer
|
14
|
+
|
15
|
+
def exists(self, path: Path) -> bool: ...
|
16
|
+
|
17
|
+
def checkpoint_date(self, path: Path) -> datetime: ...
|
18
|
+
|
19
|
+
def store(self, path: Path, data: Any) -> None: ...
|
20
|
+
|
21
|
+
def load(self, path: Path) -> Any: ...
|
22
|
+
|
23
|
+
def delete(self, path: Path) -> None: ...
|
@@ -1,28 +0,0 @@
|
|
1
|
-
from datetime import datetime
|
2
|
-
from ..types import Storage
|
3
|
-
|
4
|
-
store = {}
|
5
|
-
date_stored = {}
|
6
|
-
|
7
|
-
class MemoryStorage(Storage):
|
8
|
-
@staticmethod
|
9
|
-
def exists(path):
|
10
|
-
return str(path) in store
|
11
|
-
|
12
|
-
@staticmethod
|
13
|
-
def checkpoint_date(path):
|
14
|
-
return date_stored[str(path)]
|
15
|
-
|
16
|
-
@staticmethod
|
17
|
-
def store(path, data):
|
18
|
-
store[str(path)] = data
|
19
|
-
date_stored[str(path)] = datetime.now()
|
20
|
-
|
21
|
-
@staticmethod
|
22
|
-
def load(path):
|
23
|
-
return store[str(path)]
|
24
|
-
|
25
|
-
@staticmethod
|
26
|
-
def delete(path):
|
27
|
-
del store[str(path)]
|
28
|
-
del date_stored[str(path)]
|
@@ -1,19 +0,0 @@
|
|
1
|
-
from typing import Protocol, Any
|
2
|
-
from pathlib import Path
|
3
|
-
from datetime import datetime
|
4
|
-
|
5
|
-
class Storage(Protocol):
|
6
|
-
@staticmethod
|
7
|
-
def exists(path: Path) -> bool: ...
|
8
|
-
|
9
|
-
@staticmethod
|
10
|
-
def checkpoint_date(path: Path) -> datetime: ...
|
11
|
-
|
12
|
-
@staticmethod
|
13
|
-
def store(path: Path, data: Any) -> None: ...
|
14
|
-
|
15
|
-
@staticmethod
|
16
|
-
def load(path: Path) -> Any: ...
|
17
|
-
|
18
|
-
@staticmethod
|
19
|
-
def delete(path: Path) -> None: ...
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|