checkpointer 1.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checkpointer-1.2.0/.gitignore +3 -0
- checkpointer-1.2.0/LICENSE +7 -0
- checkpointer-1.2.0/PKG-INFO +16 -0
- checkpointer-1.2.0/README.md +0 -0
- checkpointer-1.2.0/checkpointer/__init__.py +10 -0
- checkpointer-1.2.0/checkpointer/checkpoint.py +68 -0
- checkpointer-1.2.0/checkpointer/function_body.py +49 -0
- checkpointer-1.2.0/checkpointer/storage.py +60 -0
- checkpointer-1.2.0/checkpointer/storages/bcolz_storage.py +92 -0
- checkpointer-1.2.0/checkpointer/storages/memory_storage.py +18 -0
- checkpointer-1.2.0/checkpointer/storages/pickle_storage.py +49 -0
- checkpointer-1.2.0/checkpointer/utils.py +17 -0
- checkpointer-1.2.0/pyproject.toml +24 -0
@@ -0,0 +1,7 @@
|
|
1
|
+
Copyright 2024 Hampus Hallman
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
4
|
+
|
5
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
6
|
+
|
7
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
@@ -0,0 +1,16 @@
|
|
1
|
+
Metadata-Version: 2.3
|
2
|
+
Name: checkpointer
|
3
|
+
Version: 1.2.0
|
4
|
+
Project-URL: Repository, https://github.com/Reddan/checkpointer.git
|
5
|
+
Author: Hampus Hallman
|
6
|
+
License: Copyright 2024 Hampus Hallman
|
7
|
+
|
8
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
9
|
+
|
10
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
11
|
+
|
12
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
13
|
+
License-File: LICENSE
|
14
|
+
Requires-Python: >=3.8
|
15
|
+
Requires-Dist: relib
|
16
|
+
Requires-Dist: termcolor
|
File without changes
|
@@ -0,0 +1,10 @@
|
|
1
|
+
import os
|
2
|
+
|
3
|
+
from .checkpoint import create_checkpointer, read_only, default_dir
|
4
|
+
from .storage import store_on_demand, read_from_store
|
5
|
+
from .function_body import get_function_hash
|
6
|
+
|
7
|
+
storage_dir = os.environ.get('CHECKPOINTS_DIR', default_dir)
|
8
|
+
verbosity = int(os.environ.get('CHECKPOINTS_VERBOSITY', '1'))
|
9
|
+
|
10
|
+
checkpoint = create_checkpointer(root_path=storage_dir, verbosity=verbosity)
|
@@ -0,0 +1,68 @@
|
|
1
|
+
import inspect
|
2
|
+
from collections import namedtuple
|
3
|
+
from pathlib import Path
|
4
|
+
from functools import wraps
|
5
|
+
import relib.hashing as hashing
|
6
|
+
from . import storage
|
7
|
+
from .function_body import get_function_hash
|
8
|
+
from .utils import unwrap_func, sync_resolve_coroutine
|
9
|
+
|
10
|
+
default_dir = Path.home() / '.checkpoints'
|
11
|
+
|
12
|
+
def get_invoke_path(func, function_hash, args, kwargs, path):
|
13
|
+
if type(path) == str:
|
14
|
+
return path
|
15
|
+
elif callable(path):
|
16
|
+
return path(*args, **kwargs)
|
17
|
+
else:
|
18
|
+
hash = hashing.hash([function_hash, args, kwargs or 0])
|
19
|
+
file_name = Path(func.__code__.co_filename).name
|
20
|
+
name = func.__name__
|
21
|
+
return file_name + '/' + name + '/' + hash
|
22
|
+
|
23
|
+
def create_checkpointer_from_config(config):
|
24
|
+
def checkpoint(opt_func=None, format=config.format, path=None, should_expire=None, when=True):
|
25
|
+
def receive_func(func):
|
26
|
+
if not (config.when and when):
|
27
|
+
return func
|
28
|
+
|
29
|
+
config_ = config._replace(format=format)
|
30
|
+
is_async = inspect.iscoroutinefunction(func)
|
31
|
+
unwrapped_func = unwrap_func(func)
|
32
|
+
function_hash = get_function_hash(unwrapped_func)
|
33
|
+
|
34
|
+
@wraps(unwrapped_func)
|
35
|
+
def wrapper(*args, **kwargs):
|
36
|
+
compute = lambda: func(*args, **kwargs)
|
37
|
+
recheck = kwargs.pop('recheck', False)
|
38
|
+
invoke_path = get_invoke_path(unwrapped_func, function_hash, args, kwargs, path)
|
39
|
+
coroutine = storage.store_on_demand(compute, invoke_path, config_, recheck, should_expire)
|
40
|
+
if is_async:
|
41
|
+
return coroutine
|
42
|
+
else:
|
43
|
+
return sync_resolve_coroutine(coroutine)
|
44
|
+
|
45
|
+
wrapper.checkpoint_config = config_
|
46
|
+
|
47
|
+
return wrapper
|
48
|
+
|
49
|
+
return receive_func(opt_func) if callable(opt_func) else receive_func
|
50
|
+
|
51
|
+
return checkpoint
|
52
|
+
|
53
|
+
def create_checkpointer(format='pickle', root_path=default_dir, when=True, verbosity=1):
|
54
|
+
root_path = None if root_path is None else Path(root_path)
|
55
|
+
opts = locals()
|
56
|
+
CheckpointerConfig = namedtuple('CheckpointerConfig', sorted(opts))
|
57
|
+
config = CheckpointerConfig(**opts)
|
58
|
+
return create_checkpointer_from_config(config)
|
59
|
+
|
60
|
+
def read_only(wrapper_func, config, format='pickle', path=None):
|
61
|
+
func = unwrap_func(wrapper_func)
|
62
|
+
function_hash = get_function_hash(func)
|
63
|
+
|
64
|
+
def wrapper(*args, **kwargs):
|
65
|
+
invoke_path = get_invoke_path(func, function_hash, args, kwargs, path)
|
66
|
+
return storage.read_from_store(invoke_path, config, storage=format)
|
67
|
+
|
68
|
+
return wrapper
|
@@ -0,0 +1,49 @@
|
|
1
|
+
import inspect
|
2
|
+
from types import FunctionType, CodeType
|
3
|
+
import relib.hashing as hashing
|
4
|
+
from pathlib import Path
|
5
|
+
from .utils import unwrap_func
|
6
|
+
|
7
|
+
cwd = Path.cwd()
|
8
|
+
|
9
|
+
def get_fn_path(fn):
|
10
|
+
return Path(inspect.getfile(fn)).absolute()
|
11
|
+
|
12
|
+
def get_function_body(fn):
|
13
|
+
# TODO: Strip comments
|
14
|
+
lines = inspect.getsourcelines(fn)[0]
|
15
|
+
lines = [line.rstrip() for line in lines]
|
16
|
+
lines = [line for line in lines if line]
|
17
|
+
return '\n'.join(lines)
|
18
|
+
|
19
|
+
def get_code_children(__code__):
|
20
|
+
consts = [const for const in __code__.co_consts if isinstance(const, CodeType)]
|
21
|
+
children = [child for const in consts for child in get_code_children(const)]
|
22
|
+
return list(__code__.co_names) + children
|
23
|
+
|
24
|
+
def is_user_fn(candidate_fn, cleared_fns):
|
25
|
+
return isinstance(candidate_fn, FunctionType) \
|
26
|
+
and candidate_fn not in cleared_fns \
|
27
|
+
and cwd in get_fn_path(candidate_fn).parents
|
28
|
+
|
29
|
+
def append_fn_children(fn, cleared_fns):
|
30
|
+
code_children = get_code_children(fn.__code__)
|
31
|
+
fn_children = [unwrap_func(fn.__globals__.get(co_name, None)) for co_name in code_children]
|
32
|
+
fn_children = [child for child in fn_children if is_user_fn(child, cleared_fns)]
|
33
|
+
|
34
|
+
for fn in fn_children:
|
35
|
+
cleared_fns.add(fn)
|
36
|
+
|
37
|
+
for child_fn in fn_children:
|
38
|
+
append_fn_children(child_fn, cleared_fns)
|
39
|
+
|
40
|
+
def get_fn_children(fn):
|
41
|
+
cleared_fns = set()
|
42
|
+
append_fn_children(fn, cleared_fns)
|
43
|
+
return sorted(cleared_fns, key=lambda fn: fn.__name__)
|
44
|
+
|
45
|
+
def get_function_hash(fn):
|
46
|
+
fns = [fn] + get_fn_children(fn)
|
47
|
+
fn_bodies = list(map(get_function_body, fns))
|
48
|
+
fn_bodies_hash = hashing.hash(fn_bodies)
|
49
|
+
return fn_bodies_hash
|
@@ -0,0 +1,60 @@
|
|
1
|
+
import inspect
|
2
|
+
from termcolor import colored
|
3
|
+
from .storages import memory_storage, pickle_storage, bcolz_storage
|
4
|
+
|
5
|
+
storages = {
|
6
|
+
'memory': memory_storage,
|
7
|
+
'pickle': pickle_storage,
|
8
|
+
'bcolz': bcolz_storage,
|
9
|
+
}
|
10
|
+
|
11
|
+
initialized_storages = set()
|
12
|
+
|
13
|
+
def create_logger(should_log):
|
14
|
+
def log(color, title, text):
|
15
|
+
if should_log:
|
16
|
+
title_log = colored(f' {title} ', 'grey', 'on_' + color)
|
17
|
+
rest_log = colored(text, color)
|
18
|
+
print(title_log + ' ' + rest_log)
|
19
|
+
return log
|
20
|
+
|
21
|
+
def get_storage(storage):
|
22
|
+
if type(storage) == str:
|
23
|
+
storage = storages[storage]
|
24
|
+
if storage not in initialized_storages:
|
25
|
+
if hasattr(storage, 'initialize'):
|
26
|
+
storage.initialize()
|
27
|
+
initialized_storages.add(storage)
|
28
|
+
return storage
|
29
|
+
|
30
|
+
async def store_on_demand(get_data, name, config, force=False, should_expire=None):
|
31
|
+
storage = get_storage(config.format)
|
32
|
+
should_log = storage != memory_storage and config.verbosity != 0
|
33
|
+
log = create_logger(should_log)
|
34
|
+
refresh = force \
|
35
|
+
or storage.get_is_expired(config, name) \
|
36
|
+
or (should_expire and storage.should_expire(config, name, should_expire))
|
37
|
+
|
38
|
+
if refresh:
|
39
|
+
log('blue', 'MEMORIZING', name)
|
40
|
+
data = get_data()
|
41
|
+
if inspect.iscoroutine(data):
|
42
|
+
data = await data
|
43
|
+
return storage.store_data(config, name, data)
|
44
|
+
else:
|
45
|
+
try:
|
46
|
+
data = storage.load_data(config, name)
|
47
|
+
log('green', 'REMEMBERED', name)
|
48
|
+
return data
|
49
|
+
except (EOFError, FileNotFoundError):
|
50
|
+
log('yellow', 'CORRUPTED', name)
|
51
|
+
storage.delete_data(config, name)
|
52
|
+
result = await store_on_demand(get_data, name, config, force, should_expire)
|
53
|
+
return result
|
54
|
+
|
55
|
+
def read_from_store(name, config, storage='pickle'):
|
56
|
+
storage = get_storage(storage)
|
57
|
+
try:
|
58
|
+
return storage.load_data(config, name)
|
59
|
+
except:
|
60
|
+
return None
|
@@ -0,0 +1,92 @@
|
|
1
|
+
import shutil
|
2
|
+
from pathlib import Path
|
3
|
+
from datetime import datetime
|
4
|
+
|
5
|
+
def get_data_type_str(x):
|
6
|
+
if isinstance(x, tuple):
|
7
|
+
return 'tuple'
|
8
|
+
elif isinstance(x, dict):
|
9
|
+
return 'dict'
|
10
|
+
elif isinstance(x, list):
|
11
|
+
return 'list'
|
12
|
+
elif isinstance(x, str) or not hasattr(x, '__len__'):
|
13
|
+
return 'other'
|
14
|
+
else:
|
15
|
+
return 'ndarray'
|
16
|
+
|
17
|
+
def get_paths(root_path, invoke_path):
|
18
|
+
full_path = Path(invoke_path) if root_path is None else root_path / invoke_path
|
19
|
+
meta_full_path = full_path.with_name(full_path.name + '_meta')
|
20
|
+
return full_path, meta_full_path
|
21
|
+
|
22
|
+
def get_collection_timestamp(config, path):
|
23
|
+
import bcolz
|
24
|
+
_, meta_full_path = get_paths(config.root_path, path)
|
25
|
+
meta_data = bcolz.open(meta_full_path)[:][0]
|
26
|
+
return meta_data['created']
|
27
|
+
|
28
|
+
def get_is_expired(config, path):
|
29
|
+
try:
|
30
|
+
get_collection_timestamp(config, path)
|
31
|
+
return False
|
32
|
+
except (FileNotFoundError, EOFError):
|
33
|
+
return True
|
34
|
+
|
35
|
+
def should_expire(config, path, expire_fn):
|
36
|
+
return expire_fn(get_collection_timestamp(config, path))
|
37
|
+
|
38
|
+
def insert_data(path, data):
|
39
|
+
import bcolz
|
40
|
+
c = bcolz.carray(data, rootdir=path, mode='w')
|
41
|
+
c.flush()
|
42
|
+
|
43
|
+
def store_data(config, path, data, expire_in=None):
|
44
|
+
full_path, meta_full_path = get_paths(config.root_path, path)
|
45
|
+
full_path.parent.mkdir(parents=True, exist_ok=True)
|
46
|
+
created = datetime.now()
|
47
|
+
data_type_str = get_data_type_str(data)
|
48
|
+
if data_type_str == 'tuple':
|
49
|
+
fields = list(range(len(data)))
|
50
|
+
elif data_type_str == 'dict':
|
51
|
+
fields = sorted(data.keys())
|
52
|
+
else:
|
53
|
+
fields = []
|
54
|
+
meta_data = {'created': created, 'data_type_str': data_type_str, 'fields': fields}
|
55
|
+
insert_data(meta_full_path, meta_data)
|
56
|
+
if data_type_str in ['tuple', 'dict']:
|
57
|
+
for i in range(len(fields)):
|
58
|
+
sub_path = f"{path} ({i})"
|
59
|
+
store_data(config, sub_path, data[fields[i]])
|
60
|
+
else:
|
61
|
+
insert_data(full_path, data)
|
62
|
+
return data
|
63
|
+
|
64
|
+
def load_data(config, path):
|
65
|
+
import bcolz
|
66
|
+
full_path, meta_full_path = get_paths(config.root_path, path)
|
67
|
+
meta_data = bcolz.open(meta_full_path)[:][0]
|
68
|
+
data_type_str = meta_data['data_type_str']
|
69
|
+
if data_type_str in ['tuple', 'dict']:
|
70
|
+
fields = meta_data['fields']
|
71
|
+
partitions = range(len(fields))
|
72
|
+
data = [load_data(config, f"{path} ({i})") for i in partitions]
|
73
|
+
if data_type_str == 'tuple':
|
74
|
+
return tuple(data)
|
75
|
+
else:
|
76
|
+
return dict(zip(fields, data))
|
77
|
+
else:
|
78
|
+
data = bcolz.open(full_path)
|
79
|
+
if data_type_str == 'list':
|
80
|
+
return list(data)
|
81
|
+
elif data_type_str == 'other':
|
82
|
+
return data[0]
|
83
|
+
else:
|
84
|
+
return data[:]
|
85
|
+
|
86
|
+
def delete_data(config, path):
|
87
|
+
full_path, meta_full_path = get_paths(config.root_path, path)
|
88
|
+
try:
|
89
|
+
shutil.rmtree(meta_full_path)
|
90
|
+
shutil.rmtree(full_path)
|
91
|
+
except FileNotFoundError:
|
92
|
+
pass
|
@@ -0,0 +1,18 @@
|
|
1
|
+
from datetime import datetime
|
2
|
+
|
3
|
+
store = {}
|
4
|
+
date_stored = {}
|
5
|
+
|
6
|
+
def get_is_expired(config, path):
|
7
|
+
return path not in store
|
8
|
+
|
9
|
+
def should_expire(config, path, expire_fn):
|
10
|
+
return expire_fn(date_stored[path])
|
11
|
+
|
12
|
+
def store_data(config, path, data):
|
13
|
+
store[path] = data
|
14
|
+
date_stored[path] = datetime.now()
|
15
|
+
return data
|
16
|
+
|
17
|
+
def load_data(config, path):
|
18
|
+
return store[path]
|
@@ -0,0 +1,49 @@
|
|
1
|
+
import pickle
|
2
|
+
from pathlib import Path
|
3
|
+
from datetime import datetime
|
4
|
+
|
5
|
+
def get_paths(root_path, invoke_path):
|
6
|
+
p = Path(invoke_path) if root_path is None else root_path / invoke_path
|
7
|
+
meta_full_path = p.with_name(p.name + '_meta.pkl')
|
8
|
+
pkl_full_path = p.with_name(p.name + '.pkl')
|
9
|
+
return meta_full_path, pkl_full_path
|
10
|
+
|
11
|
+
def get_collection_timestamp(config, path):
|
12
|
+
meta_full_path, pkl_full_path = get_paths(config.root_path, path)
|
13
|
+
with meta_full_path.open('rb') as file:
|
14
|
+
meta_data = pickle.load(file)
|
15
|
+
return meta_data['created']
|
16
|
+
|
17
|
+
def get_is_expired(config, path):
|
18
|
+
try:
|
19
|
+
get_collection_timestamp(config, path)
|
20
|
+
return False
|
21
|
+
except (FileNotFoundError, EOFError):
|
22
|
+
return True
|
23
|
+
|
24
|
+
def should_expire(config, path, expire_fn):
|
25
|
+
return expire_fn(get_collection_timestamp(config, path))
|
26
|
+
|
27
|
+
def store_data(config, path, data):
|
28
|
+
created = datetime.now()
|
29
|
+
meta_data = {'created': created}
|
30
|
+
meta_full_path, pkl_full_path = get_paths(config.root_path, path)
|
31
|
+
pkl_full_path.parent.mkdir(parents=True, exist_ok=True)
|
32
|
+
with pkl_full_path.open('wb') as file:
|
33
|
+
pickle.dump(data, file, -1)
|
34
|
+
with meta_full_path.open('wb') as file:
|
35
|
+
pickle.dump(meta_data, file, -1)
|
36
|
+
return data
|
37
|
+
|
38
|
+
def load_data(config, path):
|
39
|
+
_, full_path = get_paths(config.root_path, path)
|
40
|
+
with full_path.open('rb') as file:
|
41
|
+
return pickle.load(file)
|
42
|
+
|
43
|
+
def delete_data(config, path):
|
44
|
+
meta_full_path, pkl_full_path = get_paths(config.root_path, path)
|
45
|
+
try:
|
46
|
+
meta_full_path.unlink()
|
47
|
+
pkl_full_path.unlink()
|
48
|
+
except FileNotFoundError:
|
49
|
+
pass
|
@@ -0,0 +1,17 @@
|
|
1
|
+
import types
|
2
|
+
|
3
|
+
def unwrap_func(func):
|
4
|
+
while hasattr(func, '__wrapped__'):
|
5
|
+
func = func.__wrapped__
|
6
|
+
return func
|
7
|
+
|
8
|
+
@types.coroutine
|
9
|
+
def coroutine_as_generator(coroutine):
|
10
|
+
val = yield from coroutine
|
11
|
+
return val
|
12
|
+
|
13
|
+
def sync_resolve_coroutine(coroutine):
|
14
|
+
try:
|
15
|
+
next(coroutine_as_generator(coroutine))
|
16
|
+
except StopIteration as ex:
|
17
|
+
return ex.value
|
@@ -0,0 +1,24 @@
|
|
1
|
+
[project]
|
2
|
+
name = "checkpointer"
|
3
|
+
version = "1.2.0"
|
4
|
+
requires-python = ">=3.8"
|
5
|
+
dependencies = [
|
6
|
+
"relib",
|
7
|
+
"termcolor",
|
8
|
+
]
|
9
|
+
authors = [
|
10
|
+
{name = "Hampus Hallman"}
|
11
|
+
]
|
12
|
+
# description = ""
|
13
|
+
readme = "README.md"
|
14
|
+
license = {file = "LICENSE"}
|
15
|
+
|
16
|
+
[project.urls]
|
17
|
+
Repository = "https://github.com/Reddan/checkpointer.git"
|
18
|
+
|
19
|
+
[build-system]
|
20
|
+
requires = ["hatchling"]
|
21
|
+
build-backend = "hatchling.build"
|
22
|
+
|
23
|
+
[tool.hatch.build.targets.wheel]
|
24
|
+
packages = ["checkpointer", "checkpointer.storages"]
|