checkpointer 2.0.0__tar.gz → 2.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {checkpointer-2.0.0 → checkpointer-2.0.1}/PKG-INFO +93 -35
- {checkpointer-2.0.0 → checkpointer-2.0.1}/README.md +92 -34
- {checkpointer-2.0.0 → checkpointer-2.0.1}/checkpointer/__init__.py +3 -3
- {checkpointer-2.0.0 → checkpointer-2.0.1}/checkpointer/checkpoint.py +27 -36
- {checkpointer-2.0.0 → checkpointer-2.0.1}/checkpointer/print_checkpoint.py +1 -1
- {checkpointer-2.0.0 → checkpointer-2.0.1}/checkpointer/storages/bcolz_storage.py +10 -22
- checkpointer-2.0.1/checkpointer/storages/memory_storage.py +28 -0
- checkpointer-2.0.1/checkpointer/storages/pickle_storage.py +36 -0
- checkpointer-2.0.1/checkpointer/types.py +19 -0
- {checkpointer-2.0.0 → checkpointer-2.0.1}/pyproject.toml +1 -1
- {checkpointer-2.0.0 → checkpointer-2.0.1}/uv.lock +1 -1
- checkpointer-2.0.0/checkpointer/storages/memory_storage.py +0 -29
- checkpointer-2.0.0/checkpointer/storages/pickle_storage.py +0 -55
- checkpointer-2.0.0/checkpointer/types.py +0 -19
- {checkpointer-2.0.0 → checkpointer-2.0.1}/.gitignore +0 -0
- {checkpointer-2.0.0 → checkpointer-2.0.1}/LICENSE +0 -0
- {checkpointer-2.0.0 → checkpointer-2.0.1}/checkpointer/function_body.py +0 -0
- {checkpointer-2.0.0 → checkpointer-2.0.1}/checkpointer/utils.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: checkpointer
|
3
|
-
Version: 2.0.
|
3
|
+
Version: 2.0.1
|
4
4
|
Summary: A Python library for memoizing function results with support for multiple storage backends, async runtimes, and automatic cache invalidation
|
5
5
|
Project-URL: Repository, https://github.com/Reddan/checkpointer.git
|
6
6
|
Author: Hampus Hallman
|
@@ -17,18 +17,44 @@ Description-Content-Type: text/markdown
|
|
17
17
|
|
18
18
|
# checkpointer · [](https://github.com/Reddan/checkpointer/blob/master/LICENSE) [](https://pypi.org/project/checkpointer/) [](https://pypi.org/project/checkpointer/)
|
19
19
|
|
20
|
-
`checkpointer` is a Python library for memoizing function results. It simplifies caching by providing a decorator-based API and supports various storage backends. It's designed for computationally expensive operations where caching can save time, or during development to avoid waiting for redundant computations.
|
20
|
+
`checkpointer` is a Python library for memoizing function results. It simplifies caching by providing a decorator-based API and supports various storage backends. It's designed for computationally expensive operations where caching can save time, or during development to avoid waiting for redundant computations. ⚡️
|
21
21
|
|
22
22
|
Adding or removing `@checkpoint` doesn't change how your code works, and it can be applied to any function, including ones you've already written, without altering their behavior or introducing side effects. The original function remains unchanged and can still be called directly when needed.
|
23
23
|
|
24
24
|
### Key Features:
|
25
|
-
- **Multiple Storage Backends**: Supports in-memory, pickle, or your own custom storage.
|
26
|
-
- **Simple Decorator API**: Apply `@checkpoint` to functions.
|
27
|
-
- **Async and Sync Compatibility**: Works with synchronous functions and any Python async runtime (e.g., `asyncio`, `Trio`, `Curio`).
|
28
|
-
- **Custom Expiration Logic**: Automatically invalidate old checkpoints.
|
29
|
-
- **Flexible Path Configuration**: Control where checkpoints are stored.
|
25
|
+
- 🗂️ **Multiple Storage Backends**: Supports in-memory, pickle, or your own custom storage.
|
26
|
+
- 🎯 **Simple Decorator API**: Apply `@checkpoint` to functions.
|
27
|
+
- 🔄 **Async and Sync Compatibility**: Works with synchronous functions and any Python async runtime (e.g., `asyncio`, `Trio`, `Curio`).
|
28
|
+
- ⏲️ **Custom Expiration Logic**: Automatically invalidate old checkpoints.
|
29
|
+
- 📂 **Flexible Path Configuration**: Control where checkpoints are stored.
|
30
30
|
|
31
|
-
|
31
|
+
---
|
32
|
+
|
33
|
+
## Installation
|
34
|
+
|
35
|
+
```bash
|
36
|
+
pip install checkpointer
|
37
|
+
```
|
38
|
+
|
39
|
+
---
|
40
|
+
|
41
|
+
## Quick Start 🚀
|
42
|
+
|
43
|
+
```python
|
44
|
+
from checkpointer import checkpoint
|
45
|
+
|
46
|
+
@checkpoint
|
47
|
+
def expensive_function(x: int) -> int:
|
48
|
+
print("Computing...")
|
49
|
+
return x ** 2
|
50
|
+
|
51
|
+
result = expensive_function(4) # Computes and stores result
|
52
|
+
result = expensive_function(4) # Loads from checkpoint
|
53
|
+
```
|
54
|
+
|
55
|
+
---
|
56
|
+
|
57
|
+
## How It Works
|
32
58
|
|
33
59
|
When you use `@checkpoint`, the function's **arguments** (`args`, `kwargs`) are hashed to create a unique identifier for each call. This identifier is used to store and retrieve cached results. If the same arguments are passed again, `checkpointer` will return the cached result instead of recomputing.
|
34
60
|
|
@@ -55,30 +81,6 @@ If you change `multiply`, the checkpoints for both `helper` and `compute` will b
|
|
55
81
|
|
56
82
|
---
|
57
83
|
|
58
|
-
## Installation
|
59
|
-
|
60
|
-
```bash
|
61
|
-
pip install checkpointer
|
62
|
-
```
|
63
|
-
|
64
|
-
---
|
65
|
-
|
66
|
-
## Quick Start
|
67
|
-
|
68
|
-
```python
|
69
|
-
from checkpointer import checkpoint
|
70
|
-
|
71
|
-
@checkpoint
|
72
|
-
def expensive_function(x: int) -> int:
|
73
|
-
print("Computing...")
|
74
|
-
return x ** 2
|
75
|
-
|
76
|
-
result = expensive_function(4) # Computes and stores result
|
77
|
-
result = expensive_function(4) # Loads from checkpoint
|
78
|
-
```
|
79
|
-
|
80
|
-
---
|
81
|
-
|
82
84
|
## Parameterization
|
83
85
|
|
84
86
|
### Global Configuration
|
@@ -160,6 +162,8 @@ Use `fn` to directly call the original, undecorated function:
|
|
160
162
|
result = expensive_function.fn(4)
|
161
163
|
```
|
162
164
|
|
165
|
+
This is especially useful **inside recursive functions**. By using `.fn` within the function itself, you avoid redundant caching of intermediate recursive calls while still caching the final result at the top level.
|
166
|
+
|
163
167
|
### Retrieve Stored Checkpoints
|
164
168
|
Access stored results without recalculating:
|
165
169
|
|
@@ -169,7 +173,61 @@ stored_result = expensive_function.get(4)
|
|
169
173
|
|
170
174
|
---
|
171
175
|
|
172
|
-
##
|
176
|
+
## Storage Backends
|
177
|
+
|
178
|
+
`checkpointer` supports flexible storage backends, including built-in options and custom implementations.
|
179
|
+
|
180
|
+
### Built-In Backends
|
181
|
+
|
182
|
+
1. **PickleStorage**: Saves checkpoints to disk using Python's `pickle` module.
|
183
|
+
2. **MemoryStorage**: Caches checkpoints in memory for fast, non-persistent use.
|
184
|
+
|
185
|
+
To use these backends, pass either `"pickle"` or `PickleStorage` (and similarly for `"memory"` or `MemoryStorage`) to the `format` parameter:
|
186
|
+
```python
|
187
|
+
from checkpointer import checkpoint, PickleStorage, MemoryStorage
|
188
|
+
|
189
|
+
@checkpoint(format="pickle") # Equivalent to format=PickleStorage
|
190
|
+
def disk_cached(x: int) -> int:
|
191
|
+
return x ** 2
|
192
|
+
|
193
|
+
@checkpoint(format="memory") # Equivalent to format=MemoryStorage
|
194
|
+
def memory_cached(x: int) -> int:
|
195
|
+
return x * 10
|
196
|
+
```
|
197
|
+
|
198
|
+
### Custom Storage Backends
|
199
|
+
|
200
|
+
Create custom storage backends by implementing methods for storing, loading, and managing checkpoints. For example, a custom storage backend might use a database, cloud storage, or a specialized format.
|
201
|
+
|
202
|
+
Example usage:
|
203
|
+
```python
|
204
|
+
from checkpointer import checkpoint, Storage
|
205
|
+
from typing import Any
|
206
|
+
from pathlib import Path
|
207
|
+
from datetime import datetime
|
208
|
+
|
209
|
+
class CustomStorage(Storage): # Optional for type hinting
|
210
|
+
@staticmethod
|
211
|
+
def exists(path: Path) -> bool: ...
|
212
|
+
@staticmethod
|
213
|
+
def checkpoint_date(path: Path) -> datetime: ...
|
214
|
+
@staticmethod
|
215
|
+
def store(path: Path, data: Any) -> None: ...
|
216
|
+
@staticmethod
|
217
|
+
def load(path: Path) -> Any: ...
|
218
|
+
@staticmethod
|
219
|
+
def delete(path: Path) -> None: ...
|
220
|
+
|
221
|
+
@checkpoint(format=CustomStorage)
|
222
|
+
def custom_cached(x: int):
|
223
|
+
return x ** 2
|
224
|
+
```
|
225
|
+
|
226
|
+
This flexibility allows you to adapt `checkpointer` to meet any storage requirement, whether persistent or in-memory.
|
227
|
+
|
228
|
+
---
|
229
|
+
|
230
|
+
## Configuration Options ⚙️
|
173
231
|
|
174
232
|
| Option | Type | Default | Description |
|
175
233
|
|----------------|-------------------------------------|-------------|---------------------------------------------|
|
@@ -177,12 +235,12 @@ stored_result = expensive_function.get(4)
|
|
177
235
|
| `root_path` | `Path`, `str`, or `None` | User Cache | Root directory for storing checkpoints. |
|
178
236
|
| `when` | `bool` | `True` | Enable or disable checkpointing. |
|
179
237
|
| `verbosity` | `0` or `1` | `1` | Logging verbosity. |
|
180
|
-
| `path` | `
|
238
|
+
| `path` | `Callable[..., str]` | `None` | Custom path for checkpoint storage. |
|
181
239
|
| `should_expire`| `Callable[[datetime], bool]` | `None` | Custom expiration logic. |
|
182
240
|
|
183
241
|
---
|
184
242
|
|
185
|
-
## Full Example
|
243
|
+
## Full Example 🛠️
|
186
244
|
|
187
245
|
```python
|
188
246
|
import asyncio
|
@@ -1,17 +1,43 @@
|
|
1
1
|
# checkpointer · [](https://github.com/Reddan/checkpointer/blob/master/LICENSE) [](https://pypi.org/project/checkpointer/) [](https://pypi.org/project/checkpointer/)
|
2
2
|
|
3
|
-
`checkpointer` is a Python library for memoizing function results. It simplifies caching by providing a decorator-based API and supports various storage backends. It's designed for computationally expensive operations where caching can save time, or during development to avoid waiting for redundant computations.
|
3
|
+
`checkpointer` is a Python library for memoizing function results. It simplifies caching by providing a decorator-based API and supports various storage backends. It's designed for computationally expensive operations where caching can save time, or during development to avoid waiting for redundant computations. ⚡️
|
4
4
|
|
5
5
|
Adding or removing `@checkpoint` doesn't change how your code works, and it can be applied to any function, including ones you've already written, without altering their behavior or introducing side effects. The original function remains unchanged and can still be called directly when needed.
|
6
6
|
|
7
7
|
### Key Features:
|
8
|
-
- **Multiple Storage Backends**: Supports in-memory, pickle, or your own custom storage.
|
9
|
-
- **Simple Decorator API**: Apply `@checkpoint` to functions.
|
10
|
-
- **Async and Sync Compatibility**: Works with synchronous functions and any Python async runtime (e.g., `asyncio`, `Trio`, `Curio`).
|
11
|
-
- **Custom Expiration Logic**: Automatically invalidate old checkpoints.
|
12
|
-
- **Flexible Path Configuration**: Control where checkpoints are stored.
|
8
|
+
- 🗂️ **Multiple Storage Backends**: Supports in-memory, pickle, or your own custom storage.
|
9
|
+
- 🎯 **Simple Decorator API**: Apply `@checkpoint` to functions.
|
10
|
+
- 🔄 **Async and Sync Compatibility**: Works with synchronous functions and any Python async runtime (e.g., `asyncio`, `Trio`, `Curio`).
|
11
|
+
- ⏲️ **Custom Expiration Logic**: Automatically invalidate old checkpoints.
|
12
|
+
- 📂 **Flexible Path Configuration**: Control where checkpoints are stored.
|
13
13
|
|
14
|
-
|
14
|
+
---
|
15
|
+
|
16
|
+
## Installation
|
17
|
+
|
18
|
+
```bash
|
19
|
+
pip install checkpointer
|
20
|
+
```
|
21
|
+
|
22
|
+
---
|
23
|
+
|
24
|
+
## Quick Start 🚀
|
25
|
+
|
26
|
+
```python
|
27
|
+
from checkpointer import checkpoint
|
28
|
+
|
29
|
+
@checkpoint
|
30
|
+
def expensive_function(x: int) -> int:
|
31
|
+
print("Computing...")
|
32
|
+
return x ** 2
|
33
|
+
|
34
|
+
result = expensive_function(4) # Computes and stores result
|
35
|
+
result = expensive_function(4) # Loads from checkpoint
|
36
|
+
```
|
37
|
+
|
38
|
+
---
|
39
|
+
|
40
|
+
## How It Works
|
15
41
|
|
16
42
|
When you use `@checkpoint`, the function's **arguments** (`args`, `kwargs`) are hashed to create a unique identifier for each call. This identifier is used to store and retrieve cached results. If the same arguments are passed again, `checkpointer` will return the cached result instead of recomputing.
|
17
43
|
|
@@ -38,30 +64,6 @@ If you change `multiply`, the checkpoints for both `helper` and `compute` will b
|
|
38
64
|
|
39
65
|
---
|
40
66
|
|
41
|
-
## Installation
|
42
|
-
|
43
|
-
```bash
|
44
|
-
pip install checkpointer
|
45
|
-
```
|
46
|
-
|
47
|
-
---
|
48
|
-
|
49
|
-
## Quick Start
|
50
|
-
|
51
|
-
```python
|
52
|
-
from checkpointer import checkpoint
|
53
|
-
|
54
|
-
@checkpoint
|
55
|
-
def expensive_function(x: int) -> int:
|
56
|
-
print("Computing...")
|
57
|
-
return x ** 2
|
58
|
-
|
59
|
-
result = expensive_function(4) # Computes and stores result
|
60
|
-
result = expensive_function(4) # Loads from checkpoint
|
61
|
-
```
|
62
|
-
|
63
|
-
---
|
64
|
-
|
65
67
|
## Parameterization
|
66
68
|
|
67
69
|
### Global Configuration
|
@@ -143,6 +145,8 @@ Use `fn` to directly call the original, undecorated function:
|
|
143
145
|
result = expensive_function.fn(4)
|
144
146
|
```
|
145
147
|
|
148
|
+
This is especially useful **inside recursive functions**. By using `.fn` within the function itself, you avoid redundant caching of intermediate recursive calls while still caching the final result at the top level.
|
149
|
+
|
146
150
|
### Retrieve Stored Checkpoints
|
147
151
|
Access stored results without recalculating:
|
148
152
|
|
@@ -152,7 +156,61 @@ stored_result = expensive_function.get(4)
|
|
152
156
|
|
153
157
|
---
|
154
158
|
|
155
|
-
##
|
159
|
+
## Storage Backends
|
160
|
+
|
161
|
+
`checkpointer` supports flexible storage backends, including built-in options and custom implementations.
|
162
|
+
|
163
|
+
### Built-In Backends
|
164
|
+
|
165
|
+
1. **PickleStorage**: Saves checkpoints to disk using Python's `pickle` module.
|
166
|
+
2. **MemoryStorage**: Caches checkpoints in memory for fast, non-persistent use.
|
167
|
+
|
168
|
+
To use these backends, pass either `"pickle"` or `PickleStorage` (and similarly for `"memory"` or `MemoryStorage`) to the `format` parameter:
|
169
|
+
```python
|
170
|
+
from checkpointer import checkpoint, PickleStorage, MemoryStorage
|
171
|
+
|
172
|
+
@checkpoint(format="pickle") # Equivalent to format=PickleStorage
|
173
|
+
def disk_cached(x: int) -> int:
|
174
|
+
return x ** 2
|
175
|
+
|
176
|
+
@checkpoint(format="memory") # Equivalent to format=MemoryStorage
|
177
|
+
def memory_cached(x: int) -> int:
|
178
|
+
return x * 10
|
179
|
+
```
|
180
|
+
|
181
|
+
### Custom Storage Backends
|
182
|
+
|
183
|
+
Create custom storage backends by implementing methods for storing, loading, and managing checkpoints. For example, a custom storage backend might use a database, cloud storage, or a specialized format.
|
184
|
+
|
185
|
+
Example usage:
|
186
|
+
```python
|
187
|
+
from checkpointer import checkpoint, Storage
|
188
|
+
from typing import Any
|
189
|
+
from pathlib import Path
|
190
|
+
from datetime import datetime
|
191
|
+
|
192
|
+
class CustomStorage(Storage): # Optional for type hinting
|
193
|
+
@staticmethod
|
194
|
+
def exists(path: Path) -> bool: ...
|
195
|
+
@staticmethod
|
196
|
+
def checkpoint_date(path: Path) -> datetime: ...
|
197
|
+
@staticmethod
|
198
|
+
def store(path: Path, data: Any) -> None: ...
|
199
|
+
@staticmethod
|
200
|
+
def load(path: Path) -> Any: ...
|
201
|
+
@staticmethod
|
202
|
+
def delete(path: Path) -> None: ...
|
203
|
+
|
204
|
+
@checkpoint(format=CustomStorage)
|
205
|
+
def custom_cached(x: int):
|
206
|
+
return x ** 2
|
207
|
+
```
|
208
|
+
|
209
|
+
This flexibility allows you to adapt `checkpointer` to meet any storage requirement, whether persistent or in-memory.
|
210
|
+
|
211
|
+
---
|
212
|
+
|
213
|
+
## Configuration Options ⚙️
|
156
214
|
|
157
215
|
| Option | Type | Default | Description |
|
158
216
|
|----------------|-------------------------------------|-------------|---------------------------------------------|
|
@@ -160,12 +218,12 @@ stored_result = expensive_function.get(4)
|
|
160
218
|
| `root_path` | `Path`, `str`, or `None` | User Cache | Root directory for storing checkpoints. |
|
161
219
|
| `when` | `bool` | `True` | Enable or disable checkpointing. |
|
162
220
|
| `verbosity` | `0` or `1` | `1` | Logging verbosity. |
|
163
|
-
| `path` | `
|
221
|
+
| `path` | `Callable[..., str]` | `None` | Custom path for checkpoint storage. |
|
164
222
|
| `should_expire`| `Callable[[datetime], bool]` | `None` | Custom expiration logic. |
|
165
223
|
|
166
224
|
---
|
167
225
|
|
168
|
-
## Full Example
|
226
|
+
## Full Example 🛠️
|
169
227
|
|
170
228
|
```python
|
171
229
|
import asyncio
|
@@ -1,9 +1,9 @@
|
|
1
|
-
from .checkpoint import Checkpointer, CheckpointFn
|
2
|
-
from .checkpoint import CheckpointError, CheckpointReadFail
|
1
|
+
from .checkpoint import Checkpointer, CheckpointFn, CheckpointError
|
3
2
|
from .types import Storage
|
4
3
|
from .function_body import get_function_hash
|
4
|
+
import tempfile
|
5
5
|
|
6
6
|
create_checkpointer = Checkpointer
|
7
7
|
checkpoint = Checkpointer()
|
8
8
|
memory_checkpoint = Checkpointer(format="memory")
|
9
|
-
tmp_checkpoint = Checkpointer(root_path="/
|
9
|
+
tmp_checkpoint = Checkpointer(root_path=tempfile.gettempdir() + "/checkpoints")
|
@@ -1,7 +1,6 @@
|
|
1
1
|
import inspect
|
2
2
|
import relib.hashing as hashing
|
3
|
-
from typing import Generic, TypeVar, TypedDict, Unpack, Literal, Union, Any, cast, overload
|
4
|
-
from collections.abc import Callable
|
3
|
+
from typing import Generic, TypeVar, TypedDict, Callable, Unpack, Literal, Union, Any, cast, overload
|
5
4
|
from datetime import datetime
|
6
5
|
from pathlib import Path
|
7
6
|
from functools import update_wrapper
|
@@ -21,20 +20,13 @@ STORAGE_MAP = {"memory": MemoryStorage, "pickle": PickleStorage, "bcolz": BcolzS
|
|
21
20
|
class CheckpointError(Exception):
|
22
21
|
pass
|
23
22
|
|
24
|
-
class CheckpointReadFail(CheckpointError):
|
25
|
-
pass
|
26
|
-
|
27
|
-
StorageType = Literal["pickle", "memory", "bcolz"] | Storage
|
28
|
-
CheckpointPath = str | Callable[..., str] | None
|
29
|
-
ShouldExpire = Callable[[datetime], bool]
|
30
|
-
|
31
23
|
class CheckpointerOpts(TypedDict, total=False):
|
32
|
-
format:
|
24
|
+
format: Storage | Literal["pickle", "memory", "bcolz"]
|
33
25
|
root_path: Path | str | None
|
34
26
|
when: bool
|
35
27
|
verbosity: Literal[0, 1]
|
36
|
-
path:
|
37
|
-
should_expire:
|
28
|
+
path: Callable[..., str] | None
|
29
|
+
should_expire: Callable[[datetime], bool] | None
|
38
30
|
|
39
31
|
class Checkpointer:
|
40
32
|
def __init__(self, **opts: Unpack[CheckpointerOpts]):
|
@@ -51,7 +43,7 @@ class Checkpointer:
|
|
51
43
|
@overload
|
52
44
|
def __call__(self, fn: Fn, **override_opts: Unpack[CheckpointerOpts]) -> "CheckpointFn[Fn]": ...
|
53
45
|
@overload
|
54
|
-
def __call__(self, fn=None, **override_opts: Unpack[CheckpointerOpts]) -> "Checkpointer": ...
|
46
|
+
def __call__(self, fn: None=None, **override_opts: Unpack[CheckpointerOpts]) -> "Checkpointer": ...
|
55
47
|
def __call__(self, fn: Fn | None=None, **override_opts: Unpack[CheckpointerOpts]) -> Union["Checkpointer", "CheckpointFn[Fn]"]:
|
56
48
|
if override_opts:
|
57
49
|
opts = CheckpointerOpts(**{**self.__dict__, **override_opts})
|
@@ -68,48 +60,46 @@ class CheckpointFn(Generic[Fn]):
|
|
68
60
|
self.fn = fn
|
69
61
|
self.fn_hash = get_function_hash(wrapped)
|
70
62
|
self.fn_id = f"{file_name}/{wrapped.__name__}"
|
71
|
-
self.is_async = inspect.iscoroutinefunction(
|
63
|
+
self.is_async = inspect.iscoroutinefunction(wrapped)
|
72
64
|
|
73
65
|
def get_checkpoint_id(self, args: tuple, kw: dict) -> str:
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
return f"{self.fn_id}/{hashing.hash([self.fn_hash, args, kw or 0])}"
|
83
|
-
|
84
|
-
async def _store_on_demand(self, args: tuple, kw: dict, force: bool):
|
66
|
+
if not callable(self.checkpointer.path):
|
67
|
+
return f"{self.fn_id}/{hashing.hash([self.fn_hash, args, kw or 0])}"
|
68
|
+
checkpoint_id = self.checkpointer.path(*args, **kw)
|
69
|
+
if not isinstance(checkpoint_id, str):
|
70
|
+
raise CheckpointError(f"path function must return a string, got {type(checkpoint_id)}")
|
71
|
+
return checkpoint_id
|
72
|
+
|
73
|
+
async def _store_on_demand(self, args: tuple, kw: dict, rerun: bool):
|
85
74
|
checkpoint_id = self.get_checkpoint_id(args, kw)
|
86
75
|
checkpoint_path = self.checkpointer.root_path / checkpoint_id
|
87
76
|
storage = self.checkpointer.get_storage()
|
88
77
|
should_log = storage is not MemoryStorage and self.checkpointer.verbosity > 0
|
89
|
-
refresh =
|
90
|
-
or storage.
|
91
|
-
or (self.checkpointer.should_expire and
|
78
|
+
refresh = rerun \
|
79
|
+
or not storage.exists(checkpoint_path) \
|
80
|
+
or (self.checkpointer.should_expire and self.checkpointer.should_expire(storage.checkpoint_date(checkpoint_path)))
|
92
81
|
|
93
82
|
if refresh:
|
94
83
|
print_checkpoint(should_log, "MEMORIZING", checkpoint_id, "blue")
|
95
84
|
data = self.fn(*args, **kw)
|
96
85
|
if inspect.iscoroutine(data):
|
97
86
|
data = await data
|
98
|
-
|
87
|
+
storage.store(checkpoint_path, data)
|
88
|
+
return data
|
99
89
|
|
100
90
|
try:
|
101
|
-
data = storage.
|
91
|
+
data = storage.load(checkpoint_path)
|
102
92
|
print_checkpoint(should_log, "REMEMBERED", checkpoint_id, "green")
|
103
93
|
return data
|
104
94
|
except (EOFError, FileNotFoundError):
|
105
95
|
print_checkpoint(should_log, "CORRUPTED", checkpoint_id, "yellow")
|
106
|
-
storage.
|
107
|
-
return await self._store_on_demand(args, kw,
|
96
|
+
storage.delete(checkpoint_path)
|
97
|
+
return await self._store_on_demand(args, kw, rerun)
|
108
98
|
|
109
|
-
def _call(self, args: tuple, kw: dict,
|
99
|
+
def _call(self, args: tuple, kw: dict, rerun=False):
|
110
100
|
if not self.checkpointer.when:
|
111
101
|
return self.fn(*args, **kw)
|
112
|
-
coroutine = self._store_on_demand(args, kw,
|
102
|
+
coroutine = self._store_on_demand(args, kw, rerun)
|
113
103
|
return coroutine if self.is_async else sync_resolve_coroutine(coroutine)
|
114
104
|
|
115
105
|
__call__: Fn = cast(Fn, lambda self, *args, **kw: self._call(args, kw))
|
@@ -117,7 +107,8 @@ class CheckpointFn(Generic[Fn]):
|
|
117
107
|
|
118
108
|
def get(self, *args, **kw) -> Any:
|
119
109
|
checkpoint_path = self.checkpointer.root_path / self.get_checkpoint_id(args, kw)
|
110
|
+
storage = self.checkpointer.get_storage()
|
120
111
|
try:
|
121
|
-
return
|
112
|
+
return storage.load(checkpoint_path)
|
122
113
|
except:
|
123
|
-
raise
|
114
|
+
raise CheckpointError("Could not load checkpoint")
|
@@ -44,7 +44,7 @@ def colored_(text: str, color: Color | None = None, on_color: Color | None = Non
|
|
44
44
|
text = f"\033[{COLOR_MAP[on_color] + 10}m{text}"
|
45
45
|
return text + "\033[0m"
|
46
46
|
|
47
|
-
noop = lambda *
|
47
|
+
noop = lambda text, *a, **k: text
|
48
48
|
colored = colored_ if allow_color() else noop
|
49
49
|
|
50
50
|
def print_checkpoint(should_log: bool, title: str, text: str, color: Color):
|
@@ -18,12 +18,6 @@ def get_data_type_str(x):
|
|
18
18
|
def get_metapath(path: Path):
|
19
19
|
return path.with_name(f"{path.name}_meta")
|
20
20
|
|
21
|
-
def get_collection_timestamp(path: Path):
|
22
|
-
import bcolz
|
23
|
-
metapath = get_metapath(path)
|
24
|
-
meta_data = bcolz.open(metapath)[:][0]
|
25
|
-
return meta_data["created"]
|
26
|
-
|
27
21
|
def insert_data(path: Path, data):
|
28
22
|
import bcolz
|
29
23
|
c = bcolz.carray(data, rootdir=path, mode="w")
|
@@ -31,22 +25,17 @@ def insert_data(path: Path, data):
|
|
31
25
|
|
32
26
|
class BcolzStorage(Storage):
|
33
27
|
@staticmethod
|
34
|
-
def
|
35
|
-
|
36
|
-
get_collection_timestamp(path)
|
37
|
-
return False
|
38
|
-
except (FileNotFoundError, EOFError):
|
39
|
-
return True
|
28
|
+
def exists(path):
|
29
|
+
return path.exists()
|
40
30
|
|
41
31
|
@staticmethod
|
42
|
-
def
|
43
|
-
return
|
32
|
+
def checkpoint_date(path):
|
33
|
+
return datetime.fromtimestamp(path.stat().st_mtime)
|
44
34
|
|
45
35
|
@staticmethod
|
46
|
-
def
|
36
|
+
def store(path, data):
|
47
37
|
metapath = get_metapath(path)
|
48
38
|
path.parent.mkdir(parents=True, exist_ok=True)
|
49
|
-
created = datetime.now()
|
50
39
|
data_type_str = get_data_type_str(data)
|
51
40
|
if data_type_str == "tuple":
|
52
41
|
fields = list(range(len(data)))
|
@@ -54,18 +43,17 @@ class BcolzStorage(Storage):
|
|
54
43
|
fields = sorted(data.keys())
|
55
44
|
else:
|
56
45
|
fields = []
|
57
|
-
meta_data = {"
|
46
|
+
meta_data = {"data_type_str": data_type_str, "fields": fields}
|
58
47
|
insert_data(metapath, meta_data)
|
59
48
|
if data_type_str in ["tuple", "dict"]:
|
60
49
|
for i in range(len(fields)):
|
61
50
|
child_path = Path(f"{path} ({i})")
|
62
|
-
BcolzStorage.
|
51
|
+
BcolzStorage.store(child_path, data[fields[i]])
|
63
52
|
else:
|
64
53
|
insert_data(path, data)
|
65
|
-
return data
|
66
54
|
|
67
55
|
@staticmethod
|
68
|
-
def
|
56
|
+
def load(path):
|
69
57
|
import bcolz
|
70
58
|
metapath = get_metapath(path)
|
71
59
|
meta_data = bcolz.open(metapath)[:][0]
|
@@ -73,7 +61,7 @@ class BcolzStorage(Storage):
|
|
73
61
|
if data_type_str in ["tuple", "dict"]:
|
74
62
|
fields = meta_data["fields"]
|
75
63
|
partitions = range(len(fields))
|
76
|
-
data = [BcolzStorage.
|
64
|
+
data = [BcolzStorage.load(Path(f"{path} ({i})")) for i in partitions]
|
77
65
|
if data_type_str == "tuple":
|
78
66
|
return tuple(data)
|
79
67
|
else:
|
@@ -88,7 +76,7 @@ class BcolzStorage(Storage):
|
|
88
76
|
return data[:]
|
89
77
|
|
90
78
|
@staticmethod
|
91
|
-
def
|
79
|
+
def delete(path):
|
92
80
|
# NOTE: Not recursive
|
93
81
|
metapath = get_metapath(path)
|
94
82
|
try:
|
@@ -0,0 +1,28 @@
|
|
1
|
+
from datetime import datetime
|
2
|
+
from ..types import Storage
|
3
|
+
|
4
|
+
store = {}
|
5
|
+
date_stored = {}
|
6
|
+
|
7
|
+
class MemoryStorage(Storage):
|
8
|
+
@staticmethod
|
9
|
+
def exists(path):
|
10
|
+
return str(path) in store
|
11
|
+
|
12
|
+
@staticmethod
|
13
|
+
def checkpoint_date(path):
|
14
|
+
return date_stored[str(path)]
|
15
|
+
|
16
|
+
@staticmethod
|
17
|
+
def store(path, data):
|
18
|
+
store[str(path)] = data
|
19
|
+
date_stored[str(path)] = datetime.now()
|
20
|
+
|
21
|
+
@staticmethod
|
22
|
+
def load(path):
|
23
|
+
return store[str(path)]
|
24
|
+
|
25
|
+
@staticmethod
|
26
|
+
def delete(path):
|
27
|
+
del store[str(path)]
|
28
|
+
del date_stored[str(path)]
|
@@ -0,0 +1,36 @@
|
|
1
|
+
import pickle
|
2
|
+
from pathlib import Path
|
3
|
+
from datetime import datetime
|
4
|
+
from ..types import Storage
|
5
|
+
|
6
|
+
def get_path(path: Path):
|
7
|
+
return path.with_name(f"{path.name}.pkl")
|
8
|
+
|
9
|
+
class PickleStorage(Storage):
|
10
|
+
@staticmethod
|
11
|
+
def exists(path):
|
12
|
+
return get_path(path).exists()
|
13
|
+
|
14
|
+
@staticmethod
|
15
|
+
def checkpoint_date(path):
|
16
|
+
return datetime.fromtimestamp(get_path(path).stat().st_mtime)
|
17
|
+
|
18
|
+
@staticmethod
|
19
|
+
def store(path, data):
|
20
|
+
full_path = get_path(path)
|
21
|
+
full_path.parent.mkdir(parents=True, exist_ok=True)
|
22
|
+
with full_path.open("wb") as file:
|
23
|
+
pickle.dump(data, file, -1)
|
24
|
+
|
25
|
+
@staticmethod
|
26
|
+
def load(path):
|
27
|
+
full_path = get_path(path)
|
28
|
+
with full_path.open("rb") as file:
|
29
|
+
return pickle.load(file)
|
30
|
+
|
31
|
+
@staticmethod
|
32
|
+
def delete(path):
|
33
|
+
try:
|
34
|
+
get_path(path).unlink()
|
35
|
+
except FileNotFoundError:
|
36
|
+
pass
|
@@ -0,0 +1,19 @@
|
|
1
|
+
from typing import Protocol, Any
|
2
|
+
from pathlib import Path
|
3
|
+
from datetime import datetime
|
4
|
+
|
5
|
+
class Storage(Protocol):
|
6
|
+
@staticmethod
|
7
|
+
def exists(path: Path) -> bool: ...
|
8
|
+
|
9
|
+
@staticmethod
|
10
|
+
def checkpoint_date(path: Path) -> datetime: ...
|
11
|
+
|
12
|
+
@staticmethod
|
13
|
+
def store(path: Path, data: Any) -> None: ...
|
14
|
+
|
15
|
+
@staticmethod
|
16
|
+
def load(path: Path) -> Any: ...
|
17
|
+
|
18
|
+
@staticmethod
|
19
|
+
def delete(path: Path) -> None: ...
|
@@ -1,29 +0,0 @@
|
|
1
|
-
from datetime import datetime
|
2
|
-
from ..types import Storage
|
3
|
-
|
4
|
-
store = {}
|
5
|
-
date_stored = {}
|
6
|
-
|
7
|
-
class MemoryStorage(Storage):
|
8
|
-
@staticmethod
|
9
|
-
def is_expired(path):
|
10
|
-
return path not in store
|
11
|
-
|
12
|
-
@staticmethod
|
13
|
-
def should_expire(path, expire_fn):
|
14
|
-
return expire_fn(date_stored[path])
|
15
|
-
|
16
|
-
@staticmethod
|
17
|
-
def store_data(path, data):
|
18
|
-
store[path] = data
|
19
|
-
date_stored[path] = datetime.now()
|
20
|
-
return data
|
21
|
-
|
22
|
-
@staticmethod
|
23
|
-
def load_data(path):
|
24
|
-
return store[path]
|
25
|
-
|
26
|
-
@staticmethod
|
27
|
-
def delete_data(path):
|
28
|
-
del store[path]
|
29
|
-
del date_stored[path]
|
@@ -1,55 +0,0 @@
|
|
1
|
-
import pickle
|
2
|
-
from pathlib import Path
|
3
|
-
from datetime import datetime
|
4
|
-
from ..types import Storage
|
5
|
-
|
6
|
-
def get_paths(path: Path):
|
7
|
-
meta_full_path = path.with_name(f"{path.name}_meta.pkl")
|
8
|
-
pkl_full_path = path.with_name(f"{path.name}.pkl")
|
9
|
-
return meta_full_path, pkl_full_path
|
10
|
-
|
11
|
-
def get_collection_timestamp(path: Path):
|
12
|
-
meta_full_path, _ = get_paths(path)
|
13
|
-
with meta_full_path.open("rb") as file:
|
14
|
-
meta_data = pickle.load(file)
|
15
|
-
return meta_data["created"]
|
16
|
-
|
17
|
-
class PickleStorage(Storage):
|
18
|
-
@staticmethod
|
19
|
-
def is_expired(path):
|
20
|
-
try:
|
21
|
-
get_collection_timestamp(path)
|
22
|
-
return False
|
23
|
-
except (FileNotFoundError, EOFError):
|
24
|
-
return True
|
25
|
-
|
26
|
-
@staticmethod
|
27
|
-
def should_expire(path, expire_fn):
|
28
|
-
return expire_fn(get_collection_timestamp(path))
|
29
|
-
|
30
|
-
@staticmethod
|
31
|
-
def store_data(path, data):
|
32
|
-
created = datetime.now()
|
33
|
-
meta_data = {"created": created} # TODO: this should just be a JSON or binary dump of the unix timestamp and other metadata - not pickle
|
34
|
-
meta_full_path, pkl_full_path = get_paths(path)
|
35
|
-
pkl_full_path.parent.mkdir(parents=True, exist_ok=True)
|
36
|
-
with pkl_full_path.open("wb") as file:
|
37
|
-
pickle.dump(data, file, -1)
|
38
|
-
with meta_full_path.open("wb") as file:
|
39
|
-
pickle.dump(meta_data, file, -1)
|
40
|
-
return data
|
41
|
-
|
42
|
-
@staticmethod
|
43
|
-
def load_data(path):
|
44
|
-
_, full_path = get_paths(path)
|
45
|
-
with full_path.open("rb") as file:
|
46
|
-
return pickle.load(file)
|
47
|
-
|
48
|
-
@staticmethod
|
49
|
-
def delete_data(path):
|
50
|
-
meta_full_path, pkl_full_path = get_paths(path)
|
51
|
-
try:
|
52
|
-
meta_full_path.unlink()
|
53
|
-
pkl_full_path.unlink()
|
54
|
-
except FileNotFoundError:
|
55
|
-
pass
|
@@ -1,19 +0,0 @@
|
|
1
|
-
from typing import Callable, Protocol, Any
|
2
|
-
from pathlib import Path
|
3
|
-
from datetime import datetime
|
4
|
-
|
5
|
-
class Storage(Protocol):
|
6
|
-
@staticmethod
|
7
|
-
def is_expired(path: Path) -> bool: ...
|
8
|
-
|
9
|
-
@staticmethod
|
10
|
-
def should_expire(path: Path, expire_fn: Callable[[datetime], bool]) -> bool: ...
|
11
|
-
|
12
|
-
@staticmethod
|
13
|
-
def store_data(path: Path, data: Any) -> Any: ...
|
14
|
-
|
15
|
-
@staticmethod
|
16
|
-
def load_data(path: Path) -> Any: ...
|
17
|
-
|
18
|
-
@staticmethod
|
19
|
-
def delete_data(path: Path) -> None: ...
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|