checkpointer 2.14.6__tar.gz → 2.14.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (21) hide show
  1. {checkpointer-2.14.6 → checkpointer-2.14.8}/PKG-INFO +2 -1
  2. {checkpointer-2.14.6 → checkpointer-2.14.8}/checkpointer/checkpoint.py +25 -18
  3. {checkpointer-2.14.6 → checkpointer-2.14.8}/checkpointer/fn_ident.py +3 -2
  4. {checkpointer-2.14.6 → checkpointer-2.14.8}/checkpointer/object_hash.py +5 -7
  5. {checkpointer-2.14.6 → checkpointer-2.14.8}/checkpointer/storages/pickle_storage.py +10 -10
  6. {checkpointer-2.14.6 → checkpointer-2.14.8}/checkpointer/storages/storage.py +3 -3
  7. {checkpointer-2.14.6 → checkpointer-2.14.8}/checkpointer/types.py +1 -1
  8. {checkpointer-2.14.6 → checkpointer-2.14.8}/checkpointer/utils.py +8 -0
  9. {checkpointer-2.14.6 → checkpointer-2.14.8}/pyproject.toml +5 -4
  10. {checkpointer-2.14.6 → checkpointer-2.14.8}/uv.lock +272 -237
  11. {checkpointer-2.14.6 → checkpointer-2.14.8}/.gitignore +0 -0
  12. {checkpointer-2.14.6 → checkpointer-2.14.8}/.python-version +0 -0
  13. {checkpointer-2.14.6 → checkpointer-2.14.8}/ATTRIBUTION.md +0 -0
  14. {checkpointer-2.14.6 → checkpointer-2.14.8}/LICENSE +0 -0
  15. {checkpointer-2.14.6 → checkpointer-2.14.8}/README.md +0 -0
  16. {checkpointer-2.14.6 → checkpointer-2.14.8}/checkpointer/__init__.py +0 -0
  17. {checkpointer-2.14.6 → checkpointer-2.14.8}/checkpointer/fn_string.py +0 -0
  18. {checkpointer-2.14.6 → checkpointer-2.14.8}/checkpointer/import_mappings.py +0 -0
  19. {checkpointer-2.14.6 → checkpointer-2.14.8}/checkpointer/print_checkpoint.py +0 -0
  20. {checkpointer-2.14.6 → checkpointer-2.14.8}/checkpointer/storages/__init__.py +0 -0
  21. {checkpointer-2.14.6 → checkpointer-2.14.8}/checkpointer/storages/memory_storage.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: checkpointer
3
- Version: 2.14.6
3
+ Version: 2.14.8
4
4
  Summary: checkpointer adds code-aware caching to Python functions, maintaining correctness and speeding up execution as your code changes.
5
5
  Project-URL: Repository, https://github.com/Reddan/checkpointer.git
6
6
  Author: Hampus Hallman
@@ -11,6 +11,7 @@ Keywords: async,cache,caching,data analysis,data processing,fast,hashing,invalid
11
11
  Classifier: Programming Language :: Python :: 3.11
12
12
  Classifier: Programming Language :: Python :: 3.12
13
13
  Classifier: Programming Language :: Python :: 3.13
14
+ Classifier: Programming Language :: Python :: 3.14
14
15
  Requires-Python: >=3.11
15
16
  Description-Content-Type: text/markdown
16
17
 
@@ -1,4 +1,5 @@
1
1
  from __future__ import annotations
2
+ import asyncio
2
3
  import re
3
4
  from datetime import datetime, timedelta
4
5
  from functools import cached_property, update_wrapper
@@ -13,7 +14,7 @@ from .object_hash import ObjectHash
13
14
  from .print_checkpoint import print_checkpoint
14
15
  from .storages import STORAGE_MAP, Storage, StorageType
15
16
  from .types import AwaitableValue, C, Coro, Fn, P, R, T, hash_by_from_annotation
16
- from .utils import flatten, to_coroutine
17
+ from .utils import flatten, is_asyncio, to_coroutine
17
18
 
18
19
  DEFAULT_DIR = Path.home() / ".cache/checkpoints"
19
20
 
@@ -161,7 +162,7 @@ class CachedFunction(Generic[Fn]):
161
162
  def cleanup(self):
162
163
  return self.storage.cleanup
163
164
 
164
- def reinit(self, recursive=True) -> CachedFunction[Fn]:
165
+ def reinit(self, recursive=True) -> Self:
165
166
  depend_idents = list(self.ident.deep_idents()) if recursive else [self.ident]
166
167
  for ident in depend_idents: ident.reset()
167
168
  for ident in depend_idents: ident.fn_hash
@@ -191,28 +192,32 @@ class CachedFunction(Generic[Fn]):
191
192
  return self._get_call_hash(args, kw)
192
193
 
193
194
  async def _store_coroutine(self, call_hash: str, coroutine: Coroutine):
194
- return self.storage.store(call_hash, AwaitableValue(await coroutine)).value
195
+ if is_asyncio():
196
+ data = await asyncio.to_thread(self.storage.store, call_hash, AwaitableValue(await coroutine))
197
+ return data.value
198
+ else:
199
+ return self.storage.store(call_hash, AwaitableValue(await coroutine)).value
195
200
 
196
- def _call(self: CachedFunction[Callable[P, R]], args: tuple, kw: dict, rerun=False) -> R:
201
+ def is_expired(self, call_hash: str) -> bool:
202
+ return not self.storage.exists(call_hash) or self.storage.expired(call_hash)
203
+
204
+ def _call(self: CachedFunction[Callable[P, R]], args: tuple, kw: dict, rerun=False, cached=False) -> R:
197
205
  full_args = self.bound + args
198
206
  params = self.ident.checkpointer
199
- storage = self.storage
200
- if not params.when:
207
+ if not params.when and not cached:
201
208
  return self.fn(*full_args, **kw)
202
-
203
209
  call_hash = self._get_call_hash(args, kw)
204
- call_id = f"{storage.fn_id()}/{call_hash}"
205
- refresh = rerun or not storage.exists(call_hash) or storage.expired(call_hash)
210
+ call_id = f"{self.storage.fn_id()}/{call_hash}"
206
211
 
207
- if refresh:
212
+ if rerun or self.is_expired(call_hash):
208
213
  print_checkpoint(params.verbosity >= 1, "MEMORIZING", call_id, "blue")
209
214
  data = self.fn(*full_args, **kw)
210
215
  if iscoroutine(data):
211
216
  return self._store_coroutine(call_hash, data)
212
- return storage.store(call_hash, data)
217
+ return self.storage.store(call_hash, data)
213
218
 
214
219
  try:
215
- data = storage.load(call_hash)
220
+ data = self.storage.load(call_hash)
216
221
  print_checkpoint(params.verbosity >= 2, "REMEMBERED", call_id, "green")
217
222
  if isinstance(data, AwaitableValue):
218
223
  return to_coroutine(data.value) # type: ignore
@@ -220,18 +225,21 @@ class CachedFunction(Generic[Fn]):
220
225
  except (EOFError, FileNotFoundError):
221
226
  pass
222
227
  print_checkpoint(params.verbosity >= 1, "CORRUPTED", call_id, "yellow")
223
- return self._call(args, kw, True)
228
+ return self._call(args, kw, True, cached)
224
229
 
225
230
  def __call__(self: CachedFunction[Callable[P, R]], *args: P.args, **kw: P.kwargs) -> R:
226
231
  return self._call(args, kw)
227
232
 
233
+ def cached(self: CachedFunction[Callable[P, R]], *args: P.args, **kw: P.kwargs) -> R:
234
+ return self._call(args, kw, False, True)
235
+
228
236
  def rerun(self: CachedFunction[Callable[P, R]], *args: P.args, **kw: P.kwargs) -> R:
229
- return self._call(args, kw, True)
237
+ return self._call(args, kw, True, True)
230
238
 
231
239
  def exists(self: CachedFunction[Callable[P, R]], *args: P.args, **kw: P.kwargs) -> bool:
232
240
  return self.storage.exists(self._get_call_hash(args, kw))
233
241
 
234
- def delete(self: CachedFunction[Callable[P, R]], *args: P.args, **kw: P.kwargs):
242
+ def delete(self: CachedFunction[Callable[P, R]], *args: P.args, **kw: P.kwargs) -> None:
235
243
  self.storage.delete(self._get_call_hash(args, kw))
236
244
 
237
245
  @overload
@@ -267,6 +275,5 @@ class CachedFunction(Generic[Fn]):
267
275
  self.set(AwaitableValue(value), *args, **kw)
268
276
 
269
277
  def __repr__(self) -> str:
270
- initialized = "fn_hash" in self.ident.__dict__
271
- fn_hash = self.ident.fn_hash[:6] if initialized else "- uninitialized"
272
- return f"<CachedFunction {self.fn.__name__} {fn_hash}>"
278
+ ident = self.ident.__dict__.get("fn_hash", "")[:6] or "- uninitialized"
279
+ return f"<CachedFunction {self.fn.__name__} {ident}>"
@@ -69,9 +69,10 @@ def extract_scope_values(code: CodeType, scope_vars: dict) -> Iterable[tuple[Att
69
69
  scope_vars = {**scope_vars, **{k: {**scope_vars[k], **v} for k, v in classvars.items()}}
70
70
  instructs = seekable(dis.get_instructions(code))
71
71
  for instruct in instructs:
72
- if instruct.opname in scope_vars:
72
+ opname = instruct.opname.replace("LOAD_FAST_BORROW", "LOAD_FAST")
73
+ if opname in scope_vars:
73
74
  attrs = takewhile((x.opname in ("LOAD_ATTR", "LOAD_METHOD"), x.argval) for x in instructs)
74
- attr_path = AttrPath((instruct.opname, instruct.argval, *attrs))
75
+ attr_path = AttrPath((opname, instruct.argval, *attrs))
75
76
  parent_path = attr_path[:-1]
76
77
  instructs.step(-1)
77
78
  obj = get_at(scope_vars, *attr_path)
@@ -1,17 +1,15 @@
1
1
  import ctypes
2
2
  import hashlib
3
- import inspect
4
- import io
5
3
  import re
6
4
  import sys
7
- import tokenize
8
5
  import sysconfig
6
+ import tokenize
9
7
  from collections import OrderedDict
10
8
  from collections.abc import Iterable
11
9
  from contextlib import nullcontext, suppress
12
10
  from decimal import Decimal
13
- from io import StringIO
14
- from inspect import getfile
11
+ from inspect import getfile, getsource
12
+ from io import BufferedRandom, BufferedReader, BufferedWriter, FileIO, StringIO, TextIOWrapper
15
13
  from itertools import chain
16
14
  from pathlib import Path
17
15
  from pickle import HIGHEST_PROTOCOL as PICKLE_PROTOCOL
@@ -146,7 +144,7 @@ class ObjectHash:
146
144
  case GeneratorType():
147
145
  self.header("generator", obj.__qualname__)._update_iterator(obj)
148
146
 
149
- case io.TextIOWrapper() | io.FileIO() | io.BufferedRandom() | io.BufferedWriter() | io.BufferedReader():
147
+ case TextIOWrapper() | FileIO() | BufferedRandom() | BufferedWriter() | BufferedReader():
150
148
  self.header("file", encode_type_of(obj)).update(obj.name, obj.mode, obj.tell())
151
149
 
152
150
  case type():
@@ -228,7 +226,7 @@ class ObjectHash:
228
226
 
229
227
  def get_fn_body(fn: Callable) -> str:
230
228
  try:
231
- source = inspect.getsource(fn)
229
+ source = getsource(fn)
232
230
  except OSError:
233
231
  return ""
234
232
  tokens = tokenize.generate_tokens(StringIO(source).readline)
@@ -8,7 +8,7 @@ from .storage import Storage
8
8
 
9
9
  try:
10
10
  import polars as pl
11
- except:
11
+ except Exception:
12
12
  pl = None
13
13
 
14
14
  def filedate(path: Path) -> datetime:
@@ -51,18 +51,18 @@ class PickleStorage(Storage):
51
51
  version_path = self.fn_dir()
52
52
  fn_path = version_path.parent
53
53
  if invalidated and fn_path.exists():
54
- old_dirs = [path for path in fn_path.iterdir() if path.is_dir() and path != version_path]
55
- for path in old_dirs:
56
- for pkl_path in path.glob("**/*.pkl"):
57
- pkl_path.unlink(missing_ok=True)
58
- if old_dirs:
59
- print(f"Removed {len(old_dirs)} invalidated directories for {self.cached_fn.__qualname__}")
54
+ invalidated_dirs = [path for path in fn_path.iterdir() if path.is_dir() and path != version_path]
55
+ pkls = [pkl for path in invalidated_dirs for pkl in path.glob("**/*.pkl")]
56
+ for pkl in pkls:
57
+ pkl.unlink(missing_ok=True)
58
+ if pkls:
59
+ print(f"Removed {len(pkls)} checkpoints from {len(invalidated_dirs)} invalidated directories for {self.cached_fn.__qualname__}")
60
60
  if expired and self.checkpointer.expiry:
61
61
  count = 0
62
- for pkl_path in fn_path.glob("**/*.pkl"):
63
- if self.expired_dt(filedate(pkl_path)):
62
+ for pkl in fn_path.glob("**/*.pkl"):
63
+ if self.expired_dt(filedate(pkl)):
64
64
  count += 1
65
- pkl_path.unlink(missing_ok=True)
65
+ pkl.unlink(missing_ok=True)
66
66
  if count:
67
67
  print(f"Removed {count} expired checkpoints for {self.cached_fn.__qualname__}")
68
68
  clear_directory(fn_path)
@@ -1,14 +1,14 @@
1
1
  from __future__ import annotations
2
2
  from datetime import datetime, timedelta
3
3
  from pathlib import Path
4
- from typing import TYPE_CHECKING, Any
4
+ from typing import TYPE_CHECKING, Any, Protocol
5
5
 
6
6
  if TYPE_CHECKING:
7
7
  from ..checkpoint import CachedFunction, Checkpointer
8
8
 
9
- class Storage:
9
+ class Storage(Protocol):
10
10
  checkpointer: Checkpointer
11
- ident: CachedFunction
11
+ cached_fn: CachedFunction
12
12
 
13
13
  def __init__(self, cached_fn: CachedFunction):
14
14
  self.checkpointer = cached_fn.ident.checkpointer
@@ -45,5 +45,5 @@ class AwaitableValue(Generic[T]):
45
45
  self.value = value
46
46
 
47
47
  def __await__(self):
48
- yield
48
+ yield from ()
49
49
  return self.value
@@ -1,4 +1,5 @@
1
1
  from __future__ import annotations
2
+ import asyncio
2
3
  import inspect
3
4
  from contextlib import contextmanager, suppress
4
5
  from itertools import chain, islice
@@ -10,6 +11,13 @@ from .types import T
10
11
  cwd = Path.cwd().resolve()
11
12
  flatten = chain.from_iterable
12
13
 
14
+ def is_asyncio() -> bool:
15
+ try:
16
+ loop = asyncio.get_running_loop()
17
+ return isinstance(loop, asyncio.AbstractEventLoop)
18
+ except Exception:
19
+ return False
20
+
13
21
  async def to_coroutine(value: T) -> T:
14
22
  return value
15
23
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "checkpointer"
3
- version = "2.14.6"
3
+ version = "2.14.8"
4
4
  requires-python = ">=3.11"
5
5
  dependencies = []
6
6
  authors = [
@@ -14,6 +14,7 @@ classifiers = [
14
14
  "Programming Language :: Python :: 3.11",
15
15
  "Programming Language :: Python :: 3.12",
16
16
  "Programming Language :: Python :: 3.13",
17
+ "Programming Language :: Python :: 3.14",
17
18
  ]
18
19
  keywords = [
19
20
  "data processing",
@@ -39,10 +40,10 @@ dev = [
39
40
  "omg>=1.3.9",
40
41
  "poethepoet>=0.30.0",
41
42
  "polars>=1.31.0",
42
- "pytest>=8.3.5",
43
- "pytest-asyncio>=0.26.0",
43
+ "pytest>=9.0.0",
44
+ "pytest-asyncio>=1.3.0",
44
45
  "rich>=14.0.0",
45
- "torch>=2.6.0",
46
+ "torch>=2.9.1",
46
47
  ]
47
48
 
48
49
  [tool.poe.tasks]