checkpointer 2.8.1__tar.gz → 2.9.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. checkpointer-2.9.1/PKG-INFO +215 -0
  2. checkpointer-2.9.1/README.md +195 -0
  3. {checkpointer-2.8.1 → checkpointer-2.9.1}/checkpointer/checkpoint.py +65 -61
  4. checkpointer-2.9.1/checkpointer/storages/memory_storage.py +36 -0
  5. {checkpointer-2.8.1 → checkpointer-2.9.1}/checkpointer/storages/pickle_storage.py +18 -20
  6. {checkpointer-2.8.1 → checkpointer-2.9.1}/checkpointer/storages/storage.py +11 -5
  7. {checkpointer-2.8.1 → checkpointer-2.9.1}/checkpointer/test_checkpointer.py +17 -6
  8. {checkpointer-2.8.1 → checkpointer-2.9.1}/pyproject.toml +1 -1
  9. {checkpointer-2.8.1 → checkpointer-2.9.1}/uv.lock +1 -1
  10. checkpointer-2.8.1/PKG-INFO +0 -262
  11. checkpointer-2.8.1/README.md +0 -242
  12. checkpointer-2.8.1/checkpointer/storages/memory_storage.py +0 -39
  13. {checkpointer-2.8.1 → checkpointer-2.9.1}/.gitignore +0 -0
  14. {checkpointer-2.8.1 → checkpointer-2.9.1}/.python-version +0 -0
  15. {checkpointer-2.8.1 → checkpointer-2.9.1}/LICENSE +0 -0
  16. {checkpointer-2.8.1 → checkpointer-2.9.1}/checkpointer/__init__.py +0 -0
  17. {checkpointer-2.8.1 → checkpointer-2.9.1}/checkpointer/fn_ident.py +0 -0
  18. {checkpointer-2.8.1 → checkpointer-2.9.1}/checkpointer/object_hash.py +0 -0
  19. {checkpointer-2.8.1 → checkpointer-2.9.1}/checkpointer/print_checkpoint.py +0 -0
  20. {checkpointer-2.8.1 → checkpointer-2.9.1}/checkpointer/storages/__init__.py +0 -0
  21. {checkpointer-2.8.1 → checkpointer-2.9.1}/checkpointer/storages/bcolz_storage.py +0 -0
  22. {checkpointer-2.8.1 → checkpointer-2.9.1}/checkpointer/utils.py +0 -0
@@ -0,0 +1,215 @@
1
+ Metadata-Version: 2.4
2
+ Name: checkpointer
3
+ Version: 2.9.1
4
+ Summary: A Python library for memoizing function results with support for multiple storage backends, async runtimes, and automatic cache invalidation
5
+ Project-URL: Repository, https://github.com/Reddan/checkpointer.git
6
+ Author: Hampus Hallman
7
+ License: Copyright 2018-2025 Hampus Hallman
8
+
9
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
14
+ License-File: LICENSE
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Requires-Python: >=3.11
19
+ Description-Content-Type: text/markdown
20
+
21
+ # checkpointer · [![License](https://img.shields.io/badge/license-MIT-blue)](https://github.com/Reddan/checkpointer/blob/master/LICENSE) [![pypi](https://img.shields.io/pypi/v/checkpointer)](https://pypi.org/project/checkpointer/) [![pypi](https://img.shields.io/pypi/pyversions/checkpointer)](https://pypi.org/project/checkpointer/)
22
+
23
+ `checkpointer` is a Python library providing a decorator-based API for memoizing (caching) function results. It helps you skip redundant, computationally expensive operations, saving execution time and streamlining your workflows.
24
+
25
+ It works with synchronous and asynchronous functions, supports multiple storage backends, and automatically invalidates caches when function code, dependencies, or captured variables change.
26
+
27
+ ## 📦 Installation
28
+
29
+ ```bash
30
+ pip install checkpointer
31
+ ```
32
+
33
+ ## 🚀 Quick Start
34
+
35
+ Apply the `@checkpoint` decorator to any function:
36
+
37
+ ```python
38
+ from checkpointer import checkpoint
39
+
40
+ @checkpoint
41
+ def expensive_function(x: int) -> int:
42
+ print("Computing...")
43
+ return x ** 2
44
+
45
+ result = expensive_function(4) # Computes and stores the result
46
+ result = expensive_function(4) # Loads from the cache
47
+ ```
48
+
49
+ ## 🧠 How It Works
50
+
51
+ When a function decorated with `@checkpoint` is called:
52
+
53
+ 1. `checkpointer` computes a unique identifier (hash) for the function call based on its source code, its dependencies, and the arguments passed.
54
+ 2. It attempts to retrieve a cached result using this identifier.
55
+ 3. If a cached result is found, it's returned immediately.
56
+ 4. If no cached result exists or the cache has expired, the original function is executed, its result is stored, and then returned.
57
+
58
+ ### ♻️ Automatic Cache Invalidation
59
+
60
+ `checkpointer` ensures caches are invalidated automatically when the underlying computation changes. A function's hash, which determines cache validity, updates if:
61
+
62
+ * **Function Code Changes**: The source code of the decorated function itself is modified.
63
+ * **Dependencies Change**: Any user-defined function in its dependency tree (direct or indirect, even across modules or not decorated with `@checkpoint`) is modified.
64
+ * **Captured Variables Change** (with `capture=True`): Global or closure-based variables used within the function are altered.
65
+
66
+ **Example: Dependency Invalidation**
67
+
68
+ ```python
69
+ def multiply(a, b):
70
+ return a * b
71
+
72
+ @checkpoint
73
+ def helper(x):
74
+ # Depends on `multiply`
75
+ return multiply(x + 1, 2)
76
+
77
+ @checkpoint
78
+ def compute(a, b):
79
+ # Depends on `helper`
80
+ return helper(a) + helper(b)
81
+ ```
82
+
83
+ If `multiply` is modified, caches for both `helper` and `compute` will automatically be invalidated and recomputed upon their next call.
84
+
85
+ ## 💡 Usage
86
+
87
+ Once a function is decorated with `@checkpoint`, you can interact with its caching behavior using the following methods:
88
+
89
+ * **`expensive_function(...)`**:
90
+ Call the function normally. This will either compute and cache the result or load it from the cache if available.
91
+
92
+ * **`expensive_function.rerun(...)`**:
93
+ Forces the original function to execute, compute a new result, and overwrite any existing cached value for the given arguments.
94
+
95
+ * **`expensive_function.fn(...)`**:
96
+ Calls the original, undecorated function directly, bypassing the cache entirely. This is particularly useful within recursive functions to prevent caching intermediate steps.
97
+
98
+ * **`expensive_function.get(...)`**:
99
+ Attempts to retrieve the cached result for the given arguments without executing the original function. Raises `CheckpointError` if no valid cached result exists.
100
+
101
+ * **`expensive_function.exists(...)`**:
102
+ Checks if a cached result exists for the given arguments without attempting to compute or load it. Returns `True` if a valid checkpoint exists, `False` otherwise.
103
+
104
+ * **`expensive_function.delete(...)`**:
105
+ Removes the cached entry for the specified arguments.
106
+
107
+ * **`expensive_function.reinit()`**:
108
+ Recalculates the function's internal hash. This is primarily used when `capture=True` and you need to update the cache based on changes to external variables within the same Python session.
109
+
110
+ ## ⚙️ Configuration & Customization
111
+
112
+ The `@checkpoint` decorator accepts the following parameters to customize its behavior:
113
+
114
+ * **`format`** (Type: `str` or `checkpointer.Storage`, Default: `"pickle"`)
115
+ Defines the storage backend to use. Built-in options are `"pickle"` (disk-based, persistent) and `"memory"` (in-memory, non-persistent). You can also provide a custom `Storage` class.
116
+
117
+ * **`root_path`** (Type: `str` or `pathlib.Path` or `None`, Default: `~/.cache/checkpoints`)
118
+ The base directory for storing disk-based checkpoints. This parameter is only relevant when `format` is set to `"pickle"`.
119
+
120
+ * **`when`** (Type: `bool`, Default: `True`)
121
+ A boolean flag to enable or disable checkpointing for the decorated function. This is particularly useful for toggling caching based on environment variables (e.g., `when=os.environ.get("ENABLE_CACHING", "false").lower() == "true"`).
122
+
123
+ * **`capture`** (Type: `bool`, Default: `False`)
124
+ If set to `True`, `checkpointer` includes global or closure-based variables used by the function in its hash calculation. This ensures that changes to these external variables also trigger cache invalidation and recomputation.
125
+
126
+ * **`should_expire`** (Type: `Callable[[datetime.datetime], bool]`, Default: `None`)
127
+ A custom callable that receives the `datetime` timestamp of a cached result. It should return `True` if the cached result is considered expired and needs recomputation, or `False` otherwise.
128
+
129
+ * **`hash_by`** (Type: `Callable[..., Any]`, Default: `None`)
130
+ A custom callable that takes the function's arguments (`*args`, `**kwargs`) and returns a hashable object (or tuple of objects). This allows for custom argument normalization (e.g., sorting lists before hashing) or optimized hashing for complex input types, which can improve cache hit rates or speed up the hashing process.
131
+
132
+ * **`fn_hash`** (Type: `checkpointer.ObjectHash`, Default: `None`)
133
+ An optional parameter that takes an instance of `checkpointer.ObjectHash`. This allows you to override the automatically computed function hash, giving you explicit control over when the function's cache should be invalidated. You can pass any values relevant to your invalidation logic to `ObjectHash` (e.g., `ObjectHash(version_string, config_id, ...)`, as it can consistently hash most Python values.
134
+
135
+ * **`verbosity`** (Type: `int` (`0`, `1`, or `2`), Default: `1`)
136
+ Controls the level of logging output from `checkpointer`.
137
+ * `0`: No output.
138
+ * `1`: Shows when functions are computed and cached.
139
+ * `2`: Also shows when cached results are remembered (loaded from cache).
140
+
141
+ ### 🗄️ Custom Storage Backends
142
+
143
+ For integration with databases, cloud storage, or custom serialization, implement your own storage backend by inheriting from `checkpointer.Storage` and implementing its abstract methods.
144
+
145
+ Within custom storage methods, `call_id` identifies calls by arguments. Use `self.fn_id()` to get the function's unique identity (name + hash/version), crucial for organizing stored checkpoints (e.g., by function version). Access global `Checkpointer` config via `self.checkpointer`.
146
+
147
+ #### Example: Custom Storage Backend
148
+
149
+ ```python
150
+ from checkpointer import checkpoint, Storage
151
+ from datetime import datetime
152
+
153
+ class MyCustomStorage(Storage):
154
+ def exists(self, call_id):
155
+ # Example: Constructing a path based on function ID and call ID
156
+ fn_dir = self.checkpointer.root_path / self.fn_id()
157
+ return (fn_dir / call_id).exists()
158
+
159
+ def checkpoint_date(self, call_id): ...
160
+ def store(self, call_id, data): ...
161
+ def load(self, call_id): ...
162
+ def delete(self, call_id): ...
163
+
164
+ @checkpoint(format=MyCustomStorage)
165
+ def custom_cached_function(x: int):
166
+ return x ** 2
167
+ ```
168
+
169
+ ## 🧱 Layered Caching
170
+
171
+ You can apply multiple `@checkpoint` decorators to a single function to create layered caching strategies. `checkpointer` processes these decorators from bottom to top, meaning the decorator closest to the function definition is evaluated first.
172
+
173
+ This is useful for scenarios like combining a fast, ephemeral cache (e.g., in-memory) with a persistent, slower cache (e.g., disk-based).
174
+
175
+ **Example: Memory Cache over Disk Cache**
176
+
177
+ ```python
178
+ from checkpointer import checkpoint
179
+
180
+ @checkpoint(format="memory") # Layer 2: Fast, ephemeral in-memory cache
181
+ @checkpoint(format="pickle") # Layer 1: Persistent disk cache
182
+ def some_expensive_operation():
183
+ print("Performing a time-consuming operation...")
184
+ return sum(i for i in range(10**7))
185
+ ```
186
+
187
+ ## ⚡ Async Support
188
+
189
+ `checkpointer` works seamlessly with Python's `asyncio` and other async runtimes.
190
+
191
+ ```python
192
+ import asyncio
193
+ from checkpointer import checkpoint
194
+
195
+ @checkpoint
196
+ async def async_compute_sum(a: int, b: int) -> int:
197
+ print(f"Asynchronously computing {a} + {b}...")
198
+ await asyncio.sleep(1)
199
+ return a + b
200
+
201
+ async def main():
202
+ # First call computes and caches
203
+ result1 = await async_compute_sum(3, 7)
204
+ print(f"Result 1: {result1}")
205
+
206
+ # Second call loads from cache
207
+ result2 = await async_compute_sum(3, 7)
208
+ print(f"Result 2: {result2}")
209
+
210
+ # Retrieve from cache without re-running the async function
211
+ result3 = async_compute_sum.get(3, 7)
212
+ print(f"Result 3 (from cache): {result3}")
213
+
214
+ asyncio.run(main())
215
+ ```
@@ -0,0 +1,195 @@
1
+ # checkpointer · [![License](https://img.shields.io/badge/license-MIT-blue)](https://github.com/Reddan/checkpointer/blob/master/LICENSE) [![pypi](https://img.shields.io/pypi/v/checkpointer)](https://pypi.org/project/checkpointer/) [![pypi](https://img.shields.io/pypi/pyversions/checkpointer)](https://pypi.org/project/checkpointer/)
2
+
3
+ `checkpointer` is a Python library providing a decorator-based API for memoizing (caching) function results. It helps you skip redundant, computationally expensive operations, saving execution time and streamlining your workflows.
4
+
5
+ It works with synchronous and asynchronous functions, supports multiple storage backends, and automatically invalidates caches when function code, dependencies, or captured variables change.
6
+
7
+ ## 📦 Installation
8
+
9
+ ```bash
10
+ pip install checkpointer
11
+ ```
12
+
13
+ ## 🚀 Quick Start
14
+
15
+ Apply the `@checkpoint` decorator to any function:
16
+
17
+ ```python
18
+ from checkpointer import checkpoint
19
+
20
+ @checkpoint
21
+ def expensive_function(x: int) -> int:
22
+ print("Computing...")
23
+ return x ** 2
24
+
25
+ result = expensive_function(4) # Computes and stores the result
26
+ result = expensive_function(4) # Loads from the cache
27
+ ```
28
+
29
+ ## 🧠 How It Works
30
+
31
+ When a function decorated with `@checkpoint` is called:
32
+
33
+ 1. `checkpointer` computes a unique identifier (hash) for the function call based on its source code, its dependencies, and the arguments passed.
34
+ 2. It attempts to retrieve a cached result using this identifier.
35
+ 3. If a cached result is found, it's returned immediately.
36
+ 4. If no cached result exists or the cache has expired, the original function is executed, its result is stored, and then returned.
37
+
38
+ ### ♻️ Automatic Cache Invalidation
39
+
40
+ `checkpointer` ensures caches are invalidated automatically when the underlying computation changes. A function's hash, which determines cache validity, updates if:
41
+
42
+ * **Function Code Changes**: The source code of the decorated function itself is modified.
43
+ * **Dependencies Change**: Any user-defined function in its dependency tree (direct or indirect, even across modules or not decorated with `@checkpoint`) is modified.
44
+ * **Captured Variables Change** (with `capture=True`): Global or closure-based variables used within the function are altered.
45
+
46
+ **Example: Dependency Invalidation**
47
+
48
+ ```python
49
+ def multiply(a, b):
50
+ return a * b
51
+
52
+ @checkpoint
53
+ def helper(x):
54
+ # Depends on `multiply`
55
+ return multiply(x + 1, 2)
56
+
57
+ @checkpoint
58
+ def compute(a, b):
59
+ # Depends on `helper`
60
+ return helper(a) + helper(b)
61
+ ```
62
+
63
+ If `multiply` is modified, caches for both `helper` and `compute` will automatically be invalidated and recomputed upon their next call.
64
+
65
+ ## 💡 Usage
66
+
67
+ Once a function is decorated with `@checkpoint`, you can interact with its caching behavior using the following methods:
68
+
69
+ * **`expensive_function(...)`**:
70
+ Call the function normally. This will either compute and cache the result or load it from the cache if available.
71
+
72
+ * **`expensive_function.rerun(...)`**:
73
+ Forces the original function to execute, compute a new result, and overwrite any existing cached value for the given arguments.
74
+
75
+ * **`expensive_function.fn(...)`**:
76
+ Calls the original, undecorated function directly, bypassing the cache entirely. This is particularly useful within recursive functions to prevent caching intermediate steps.
77
+
78
+ * **`expensive_function.get(...)`**:
79
+ Attempts to retrieve the cached result for the given arguments without executing the original function. Raises `CheckpointError` if no valid cached result exists.
80
+
81
+ * **`expensive_function.exists(...)`**:
82
+ Checks if a cached result exists for the given arguments without attempting to compute or load it. Returns `True` if a valid checkpoint exists, `False` otherwise.
83
+
84
+ * **`expensive_function.delete(...)`**:
85
+ Removes the cached entry for the specified arguments.
86
+
87
+ * **`expensive_function.reinit()`**:
88
+ Recalculates the function's internal hash. This is primarily used when `capture=True` and you need to update the cache based on changes to external variables within the same Python session.
89
+
90
+ ## ⚙️ Configuration & Customization
91
+
92
+ The `@checkpoint` decorator accepts the following parameters to customize its behavior:
93
+
94
+ * **`format`** (Type: `str` or `checkpointer.Storage`, Default: `"pickle"`)
95
+ Defines the storage backend to use. Built-in options are `"pickle"` (disk-based, persistent) and `"memory"` (in-memory, non-persistent). You can also provide a custom `Storage` class.
96
+
97
+ * **`root_path`** (Type: `str` or `pathlib.Path` or `None`, Default: `~/.cache/checkpoints`)
98
+ The base directory for storing disk-based checkpoints. This parameter is only relevant when `format` is set to `"pickle"`.
99
+
100
+ * **`when`** (Type: `bool`, Default: `True`)
101
+ A boolean flag to enable or disable checkpointing for the decorated function. This is particularly useful for toggling caching based on environment variables (e.g., `when=os.environ.get("ENABLE_CACHING", "false").lower() == "true"`).
102
+
103
+ * **`capture`** (Type: `bool`, Default: `False`)
104
+ If set to `True`, `checkpointer` includes global or closure-based variables used by the function in its hash calculation. This ensures that changes to these external variables also trigger cache invalidation and recomputation.
105
+
106
+ * **`should_expire`** (Type: `Callable[[datetime.datetime], bool]`, Default: `None`)
107
+ A custom callable that receives the `datetime` timestamp of a cached result. It should return `True` if the cached result is considered expired and needs recomputation, or `False` otherwise.
108
+
109
+ * **`hash_by`** (Type: `Callable[..., Any]`, Default: `None`)
110
+ A custom callable that takes the function's arguments (`*args`, `**kwargs`) and returns a hashable object (or tuple of objects). This allows for custom argument normalization (e.g., sorting lists before hashing) or optimized hashing for complex input types, which can improve cache hit rates or speed up the hashing process.
111
+
112
+ * **`fn_hash`** (Type: `checkpointer.ObjectHash`, Default: `None`)
113
+ An optional parameter that takes an instance of `checkpointer.ObjectHash`. This allows you to override the automatically computed function hash, giving you explicit control over when the function's cache should be invalidated. You can pass any values relevant to your invalidation logic to `ObjectHash` (e.g., `ObjectHash(version_string, config_id, ...)`, as it can consistently hash most Python values.
114
+
115
+ * **`verbosity`** (Type: `int` (`0`, `1`, or `2`), Default: `1`)
116
+ Controls the level of logging output from `checkpointer`.
117
+ * `0`: No output.
118
+ * `1`: Shows when functions are computed and cached.
119
+ * `2`: Also shows when cached results are remembered (loaded from cache).
120
+
121
+ ### 🗄️ Custom Storage Backends
122
+
123
+ For integration with databases, cloud storage, or custom serialization, implement your own storage backend by inheriting from `checkpointer.Storage` and implementing its abstract methods.
124
+
125
+ Within custom storage methods, `call_id` identifies calls by arguments. Use `self.fn_id()` to get the function's unique identity (name + hash/version), crucial for organizing stored checkpoints (e.g., by function version). Access global `Checkpointer` config via `self.checkpointer`.
126
+
127
+ #### Example: Custom Storage Backend
128
+
129
+ ```python
130
+ from checkpointer import checkpoint, Storage
131
+ from datetime import datetime
132
+
133
+ class MyCustomStorage(Storage):
134
+ def exists(self, call_id):
135
+ # Example: Constructing a path based on function ID and call ID
136
+ fn_dir = self.checkpointer.root_path / self.fn_id()
137
+ return (fn_dir / call_id).exists()
138
+
139
+ def checkpoint_date(self, call_id): ...
140
+ def store(self, call_id, data): ...
141
+ def load(self, call_id): ...
142
+ def delete(self, call_id): ...
143
+
144
+ @checkpoint(format=MyCustomStorage)
145
+ def custom_cached_function(x: int):
146
+ return x ** 2
147
+ ```
148
+
149
+ ## 🧱 Layered Caching
150
+
151
+ You can apply multiple `@checkpoint` decorators to a single function to create layered caching strategies. `checkpointer` processes these decorators from bottom to top, meaning the decorator closest to the function definition is evaluated first.
152
+
153
+ This is useful for scenarios like combining a fast, ephemeral cache (e.g., in-memory) with a persistent, slower cache (e.g., disk-based).
154
+
155
+ **Example: Memory Cache over Disk Cache**
156
+
157
+ ```python
158
+ from checkpointer import checkpoint
159
+
160
+ @checkpoint(format="memory") # Layer 2: Fast, ephemeral in-memory cache
161
+ @checkpoint(format="pickle") # Layer 1: Persistent disk cache
162
+ def some_expensive_operation():
163
+ print("Performing a time-consuming operation...")
164
+ return sum(i for i in range(10**7))
165
+ ```
166
+
167
+ ## ⚡ Async Support
168
+
169
+ `checkpointer` works seamlessly with Python's `asyncio` and other async runtimes.
170
+
171
+ ```python
172
+ import asyncio
173
+ from checkpointer import checkpoint
174
+
175
+ @checkpoint
176
+ async def async_compute_sum(a: int, b: int) -> int:
177
+ print(f"Asynchronously computing {a} + {b}...")
178
+ await asyncio.sleep(1)
179
+ return a + b
180
+
181
+ async def main():
182
+ # First call computes and caches
183
+ result1 = await async_compute_sum(3, 7)
184
+ print(f"Result 1: {result1}")
185
+
186
+ # Second call loads from cache
187
+ result2 = await async_compute_sum(3, 7)
188
+ print(f"Result 2: {result2}")
189
+
190
+ # Retrieve from cache without re-running the async function
191
+ result3 = async_compute_sum.get(3, 7)
192
+ print(f"Result 3 (from cache): {result3}")
193
+
194
+ asyncio.run(main())
195
+ ```
@@ -1,10 +1,11 @@
1
1
  from __future__ import annotations
2
2
  import inspect
3
3
  import re
4
+ from contextlib import suppress
4
5
  from datetime import datetime
5
- from functools import update_wrapper
6
+ from functools import cached_property, update_wrapper
6
7
  from pathlib import Path
7
- from typing import Any, Awaitable, Callable, Generic, Iterable, Literal, ParamSpec, Type, TypedDict, TypeVar, Unpack, cast, overload
8
+ from typing import Awaitable, Callable, Generic, Iterable, Literal, ParamSpec, Type, TypedDict, TypeVar, Unpack, cast, overload
8
9
  from .fn_ident import get_fn_ident
9
10
  from .object_hash import ObjectHash
10
11
  from .print_checkpoint import print_checkpoint
@@ -54,84 +55,83 @@ class Checkpointer:
54
55
 
55
56
  class CheckpointFn(Generic[Fn]):
56
57
  def __init__(self, checkpointer: Checkpointer, fn: Fn):
57
- self.checkpointer = checkpointer
58
- self.fn = fn
59
-
60
- def _set_ident(self, force=False):
61
- if not hasattr(self, "fn_hash_raw") or force:
62
- self.fn_hash_raw, self.depends = get_fn_ident(unwrap_fn(self.fn), self.checkpointer.capture)
63
- return self
64
-
65
- def _lazyinit(self):
66
- params = self.checkpointer
67
- wrapped = unwrap_fn(self.fn)
58
+ wrapped = unwrap_fn(fn)
68
59
  fn_file = Path(wrapped.__code__.co_filename).name
69
60
  fn_name = re.sub(r"[^\w.]", "", wrapped.__qualname__)
61
+ Storage = STORAGE_MAP[checkpointer.format] if isinstance(checkpointer.format, str) else checkpointer.format
70
62
  update_wrapper(cast(Callable, self), wrapped)
71
- Storage = STORAGE_MAP[params.format] if isinstance(params.format, str) else params.format
72
- deep_hashes = [child._set_ident().fn_hash_raw for child in iterate_checkpoint_fns(self)]
73
- self.fn_hash = str(params.fn_hash or ObjectHash(digest_size=16).write_text(self.fn_hash_raw, *deep_hashes))
74
- self.fn_subdir = f"{fn_file}/{fn_name}/{self.fn_hash[:32]}"
63
+ self.checkpointer = checkpointer
64
+ self.fn = fn
65
+ self.fn_dir = f"{fn_file}/{fn_name}"
75
66
  self.storage = Storage(self)
76
67
  self.cleanup = self.storage.cleanup
77
68
 
78
- def __getattribute__(self, name: str) -> Any:
79
- return object.__getattribute__(self, "_getattribute")(name)
69
+ @cached_property
70
+ def ident_tuple(self) -> tuple[str, list[Callable]]:
71
+ return get_fn_ident(unwrap_fn(self.fn), self.checkpointer.capture)
72
+
73
+ @property
74
+ def fn_hash_raw(self) -> str:
75
+ return self.ident_tuple[0]
76
+
77
+ @property
78
+ def depends(self) -> list[Callable]:
79
+ return self.ident_tuple[1]
80
80
 
81
- def _getattribute(self, name: str) -> Any:
82
- setattr(self, "_getattribute", super().__getattribute__)
83
- self._lazyinit()
84
- return self._getattribute(name)
81
+ @cached_property
82
+ def fn_hash(self) -> str:
83
+ fn_hash = self.checkpointer.fn_hash
84
+ deep_hashes = [depend.fn_hash_raw for depend in self.deep_depends()]
85
+ return str(fn_hash or ObjectHash(digest_size=16).write_text(self.fn_hash_raw, *deep_hashes))[:32]
85
86
 
86
87
  def reinit(self, recursive=False) -> CheckpointFn[Fn]:
87
- pointfns = list(iterate_checkpoint_fns(self)) if recursive else [self]
88
- for pointfn in pointfns:
89
- pointfn._set_ident(True)
90
- for pointfn in pointfns:
91
- pointfn._lazyinit()
88
+ depends = list(self.deep_depends()) if recursive else [self]
89
+ for depend in depends:
90
+ with suppress(AttributeError):
91
+ del depend.ident_tuple, depend.fn_hash
92
+ for depend in depends:
93
+ depend.fn_hash
92
94
  return self
93
95
 
94
- def get_checkpoint_id(self, args: tuple, kw: dict) -> str:
96
+ def get_call_id(self, args: tuple, kw: dict) -> str:
95
97
  hash_by = self.checkpointer.hash_by
96
98
  hash_params = hash_by(*args, **kw) if hash_by else (args, kw)
97
- call_hash = ObjectHash(hash_params, digest_size=16)
98
- return f"{self.fn_subdir}/{call_hash}"
99
+ return str(ObjectHash(hash_params, digest_size=16))
99
100
 
100
- async def _resolve_awaitable(self, checkpoint_path: Path, awaitable: Awaitable):
101
+ async def _resolve_awaitable(self, checkpoint_id: str, awaitable: Awaitable):
101
102
  data = await awaitable
102
- self.storage.store(checkpoint_path, AwaitableValue(data))
103
+ self.storage.store(checkpoint_id, AwaitableValue(data))
103
104
  return data
104
105
 
105
- def _store_on_demand(self, args: tuple, kw: dict, rerun: bool):
106
+ def _call(self, args: tuple, kw: dict, rerun=False):
106
107
  params = self.checkpointer
107
- checkpoint_id = self.get_checkpoint_id(args, kw)
108
- checkpoint_path = params.root_path / checkpoint_id
108
+ if not params.when:
109
+ return self.fn(*args, **kw)
110
+
111
+ call_id = self.get_call_id(args, kw)
112
+ call_id_long = f"{self.fn_dir}/{self.fn_hash}/{call_id}"
113
+
109
114
  refresh = rerun \
110
- or not self.storage.exists(checkpoint_path) \
111
- or (params.should_expire and params.should_expire(self.storage.checkpoint_date(checkpoint_path)))
115
+ or not self.storage.exists(call_id) \
116
+ or (params.should_expire and params.should_expire(self.storage.checkpoint_date(call_id)))
112
117
 
113
118
  if refresh:
114
- print_checkpoint(params.verbosity >= 1, "MEMORIZING", checkpoint_id, "blue")
119
+ print_checkpoint(params.verbosity >= 1, "MEMORIZING", call_id_long, "blue")
115
120
  data = self.fn(*args, **kw)
116
121
  if inspect.isawaitable(data):
117
- return self._resolve_awaitable(checkpoint_path, data)
122
+ return self._resolve_awaitable(call_id, data)
118
123
  else:
119
- self.storage.store(checkpoint_path, data)
124
+ self.storage.store(call_id, data)
120
125
  return data
121
126
 
122
127
  try:
123
- data = self.storage.load(checkpoint_path)
124
- print_checkpoint(params.verbosity >= 2, "REMEMBERED", checkpoint_id, "green")
128
+ data = self.storage.load(call_id)
129
+ print_checkpoint(params.verbosity >= 2, "REMEMBERED", call_id_long, "green")
125
130
  return data
126
131
  except (EOFError, FileNotFoundError):
127
132
  pass
128
- print_checkpoint(params.verbosity >= 1, "CORRUPTED", checkpoint_id, "yellow")
129
- return self._store_on_demand(args, kw, True)
130
-
131
- def _call(self, args: tuple, kw: dict, rerun=False):
132
- if not self.checkpointer.when:
133
- return self.fn(*args, **kw)
134
- return self._store_on_demand(args, kw, rerun)
133
+ print_checkpoint(params.verbosity >= 1, "CORRUPTED", call_id_long, "yellow")
134
+ return self._call(args, kw, True)
135
135
 
136
136
  __call__: Fn = cast(Fn, lambda self, *args, **kw: self._call(args, kw))
137
137
  rerun: Fn = cast(Fn, lambda self, *args, **kw: self._call(args, kw, True))
@@ -142,25 +142,29 @@ class CheckpointFn(Generic[Fn]):
142
142
  def get(self: Callable[P, R], *args: P.args, **kw: P.kwargs) -> R: ...
143
143
 
144
144
  def get(self, *args, **kw):
145
- checkpoint_path = self.checkpointer.root_path / self.get_checkpoint_id(args, kw)
145
+ call_id = self.get_call_id(args, kw)
146
146
  try:
147
- data = self.storage.load(checkpoint_path)
147
+ data = self.storage.load(call_id)
148
148
  return data.value if isinstance(data, AwaitableValue) else data
149
149
  except Exception as ex:
150
150
  raise CheckpointError("Could not load checkpoint") from ex
151
151
 
152
152
  def exists(self: Callable[P, R], *args: P.args, **kw: P.kwargs) -> bool: # type: ignore
153
153
  self = cast(CheckpointFn, self)
154
- return self.storage.exists(self.checkpointer.root_path / self.get_checkpoint_id(args, kw))
154
+ return self.storage.exists(self.get_call_id(args, kw))
155
+
156
+ def delete(self: Callable[P, R], *args: P.args, **kw: P.kwargs): # type: ignore
157
+ self = cast(CheckpointFn, self)
158
+ self.storage.delete(self.get_call_id(args, kw))
155
159
 
156
160
  def __repr__(self) -> str:
157
161
  return f"<CheckpointFn {self.fn.__name__} {self.fn_hash[:6]}>"
158
162
 
159
- def iterate_checkpoint_fns(pointfn: CheckpointFn, visited: set[CheckpointFn] = set()) -> Iterable[CheckpointFn]:
160
- visited = visited or set()
161
- if pointfn not in visited:
162
- yield pointfn
163
- visited.add(pointfn)
164
- for depend in pointfn.depends:
165
- if isinstance(depend, CheckpointFn):
166
- yield from iterate_checkpoint_fns(depend, visited)
163
+ def deep_depends(self, visited: set[CheckpointFn] = set()) -> Iterable[CheckpointFn]:
164
+ if self not in visited:
165
+ yield self
166
+ visited = visited or set()
167
+ visited.add(self)
168
+ for depend in self.depends:
169
+ if isinstance(depend, CheckpointFn):
170
+ yield from depend.deep_depends(visited)
@@ -0,0 +1,36 @@
1
+ from typing import Any
2
+ from pathlib import Path
3
+ from datetime import datetime
4
+ from .storage import Storage
5
+
6
+ item_map: dict[Path, dict[str, tuple[datetime, Any]]] = {}
7
+
8
+ class MemoryStorage(Storage):
9
+ def get_dict(self):
10
+ return item_map.setdefault(self.fn_dir(), {})
11
+
12
+ def store(self, call_id, data):
13
+ self.get_dict()[call_id] = (datetime.now(), data)
14
+
15
+ def exists(self, call_id):
16
+ return call_id in self.get_dict()
17
+
18
+ def checkpoint_date(self, call_id):
19
+ return self.get_dict()[call_id][0]
20
+
21
+ def load(self, call_id):
22
+ return self.get_dict()[call_id][1]
23
+
24
+ def delete(self, call_id):
25
+ self.get_dict().pop(call_id, None)
26
+
27
+ def cleanup(self, invalidated=True, expired=True):
28
+ curr_key = self.fn_dir()
29
+ for key, calldict in list(item_map.items()):
30
+ if key.parent == curr_key.parent:
31
+ if invalidated and key != curr_key:
32
+ del item_map[key]
33
+ elif expired and self.checkpointer.should_expire:
34
+ for call_id, (date, _) in list(calldict.items()):
35
+ if self.checkpointer.should_expire(date):
36
+ del calldict[call_id]