checkpointer 1.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ __pycache__/
2
+ /dist/
3
+ .DS_Store
@@ -0,0 +1,7 @@
1
+ Copyright 2024 Hampus Hallman
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4
+
5
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6
+
7
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,16 @@
1
+ Metadata-Version: 2.3
2
+ Name: checkpointer
3
+ Version: 1.2.0
4
+ Project-URL: Repository, https://github.com/Reddan/checkpointer.git
5
+ Author: Hampus Hallman
6
+ License: Copyright 2024 Hampus Hallman
7
+
8
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
11
+
12
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
13
+ License-File: LICENSE
14
+ Requires-Python: >=3.8
15
+ Requires-Dist: relib
16
+ Requires-Dist: termcolor
File without changes
@@ -0,0 +1,10 @@
1
+ import os
2
+
3
+ from .checkpoint import create_checkpointer, read_only, default_dir
4
+ from .storage import store_on_demand, read_from_store
5
+ from .function_body import get_function_hash
6
+
7
+ storage_dir = os.environ.get('CHECKPOINTS_DIR', default_dir)
8
+ verbosity = int(os.environ.get('CHECKPOINTS_VERBOSITY', '1'))
9
+
10
+ checkpoint = create_checkpointer(root_path=storage_dir, verbosity=verbosity)
@@ -0,0 +1,68 @@
1
+ import inspect
2
+ from collections import namedtuple
3
+ from pathlib import Path
4
+ from functools import wraps
5
+ import relib.hashing as hashing
6
+ from . import storage
7
+ from .function_body import get_function_hash
8
+ from .utils import unwrap_func, sync_resolve_coroutine
9
+
10
+ default_dir = Path.home() / '.checkpoints'
11
+
12
+ def get_invoke_path(func, function_hash, args, kwargs, path):
13
+ if type(path) == str:
14
+ return path
15
+ elif callable(path):
16
+ return path(*args, **kwargs)
17
+ else:
18
+ hash = hashing.hash([function_hash, args, kwargs or 0])
19
+ file_name = Path(func.__code__.co_filename).name
20
+ name = func.__name__
21
+ return file_name + '/' + name + '/' + hash
22
+
23
+ def create_checkpointer_from_config(config):
24
+ def checkpoint(opt_func=None, format=config.format, path=None, should_expire=None, when=True):
25
+ def receive_func(func):
26
+ if not (config.when and when):
27
+ return func
28
+
29
+ config_ = config._replace(format=format)
30
+ is_async = inspect.iscoroutinefunction(func)
31
+ unwrapped_func = unwrap_func(func)
32
+ function_hash = get_function_hash(unwrapped_func)
33
+
34
+ @wraps(unwrapped_func)
35
+ def wrapper(*args, **kwargs):
36
+ compute = lambda: func(*args, **kwargs)
37
+ recheck = kwargs.pop('recheck', False)
38
+ invoke_path = get_invoke_path(unwrapped_func, function_hash, args, kwargs, path)
39
+ coroutine = storage.store_on_demand(compute, invoke_path, config_, recheck, should_expire)
40
+ if is_async:
41
+ return coroutine
42
+ else:
43
+ return sync_resolve_coroutine(coroutine)
44
+
45
+ wrapper.checkpoint_config = config_
46
+
47
+ return wrapper
48
+
49
+ return receive_func(opt_func) if callable(opt_func) else receive_func
50
+
51
+ return checkpoint
52
+
53
+ def create_checkpointer(format='pickle', root_path=default_dir, when=True, verbosity=1):
54
+ root_path = None if root_path is None else Path(root_path)
55
+ opts = locals()
56
+ CheckpointerConfig = namedtuple('CheckpointerConfig', sorted(opts))
57
+ config = CheckpointerConfig(**opts)
58
+ return create_checkpointer_from_config(config)
59
+
60
+ def read_only(wrapper_func, config, format='pickle', path=None):
61
+ func = unwrap_func(wrapper_func)
62
+ function_hash = get_function_hash(func)
63
+
64
+ def wrapper(*args, **kwargs):
65
+ invoke_path = get_invoke_path(func, function_hash, args, kwargs, path)
66
+ return storage.read_from_store(invoke_path, config, storage=format)
67
+
68
+ return wrapper
@@ -0,0 +1,49 @@
1
+ import inspect
2
+ from types import FunctionType, CodeType
3
+ import relib.hashing as hashing
4
+ from pathlib import Path
5
+ from .utils import unwrap_func
6
+
7
+ cwd = Path.cwd()
8
+
9
+ def get_fn_path(fn):
10
+ return Path(inspect.getfile(fn)).absolute()
11
+
12
+ def get_function_body(fn):
13
+ # TODO: Strip comments
14
+ lines = inspect.getsourcelines(fn)[0]
15
+ lines = [line.rstrip() for line in lines]
16
+ lines = [line for line in lines if line]
17
+ return '\n'.join(lines)
18
+
19
+ def get_code_children(__code__):
20
+ consts = [const for const in __code__.co_consts if isinstance(const, CodeType)]
21
+ children = [child for const in consts for child in get_code_children(const)]
22
+ return list(__code__.co_names) + children
23
+
24
+ def is_user_fn(candidate_fn, cleared_fns):
25
+ return isinstance(candidate_fn, FunctionType) \
26
+ and candidate_fn not in cleared_fns \
27
+ and cwd in get_fn_path(candidate_fn).parents
28
+
29
+ def append_fn_children(fn, cleared_fns):
30
+ code_children = get_code_children(fn.__code__)
31
+ fn_children = [unwrap_func(fn.__globals__.get(co_name, None)) for co_name in code_children]
32
+ fn_children = [child for child in fn_children if is_user_fn(child, cleared_fns)]
33
+
34
+ for fn in fn_children:
35
+ cleared_fns.add(fn)
36
+
37
+ for child_fn in fn_children:
38
+ append_fn_children(child_fn, cleared_fns)
39
+
40
+ def get_fn_children(fn):
41
+ cleared_fns = set()
42
+ append_fn_children(fn, cleared_fns)
43
+ return sorted(cleared_fns, key=lambda fn: fn.__name__)
44
+
45
+ def get_function_hash(fn):
46
+ fns = [fn] + get_fn_children(fn)
47
+ fn_bodies = list(map(get_function_body, fns))
48
+ fn_bodies_hash = hashing.hash(fn_bodies)
49
+ return fn_bodies_hash
@@ -0,0 +1,60 @@
1
+ import inspect
2
+ from termcolor import colored
3
+ from .storages import memory_storage, pickle_storage, bcolz_storage
4
+
5
+ storages = {
6
+ 'memory': memory_storage,
7
+ 'pickle': pickle_storage,
8
+ 'bcolz': bcolz_storage,
9
+ }
10
+
11
+ initialized_storages = set()
12
+
13
+ def create_logger(should_log):
14
+ def log(color, title, text):
15
+ if should_log:
16
+ title_log = colored(f' {title} ', 'grey', 'on_' + color)
17
+ rest_log = colored(text, color)
18
+ print(title_log + ' ' + rest_log)
19
+ return log
20
+
21
+ def get_storage(storage):
22
+ if type(storage) == str:
23
+ storage = storages[storage]
24
+ if storage not in initialized_storages:
25
+ if hasattr(storage, 'initialize'):
26
+ storage.initialize()
27
+ initialized_storages.add(storage)
28
+ return storage
29
+
30
+ async def store_on_demand(get_data, name, config, force=False, should_expire=None):
31
+ storage = get_storage(config.format)
32
+ should_log = storage != memory_storage and config.verbosity != 0
33
+ log = create_logger(should_log)
34
+ refresh = force \
35
+ or storage.get_is_expired(config, name) \
36
+ or (should_expire and storage.should_expire(config, name, should_expire))
37
+
38
+ if refresh:
39
+ log('blue', 'MEMORIZING', name)
40
+ data = get_data()
41
+ if inspect.iscoroutine(data):
42
+ data = await data
43
+ return storage.store_data(config, name, data)
44
+ else:
45
+ try:
46
+ data = storage.load_data(config, name)
47
+ log('green', 'REMEMBERED', name)
48
+ return data
49
+ except (EOFError, FileNotFoundError):
50
+ log('yellow', 'CORRUPTED', name)
51
+ storage.delete_data(config, name)
52
+ result = await store_on_demand(get_data, name, config, force, should_expire)
53
+ return result
54
+
55
+ def read_from_store(name, config, storage='pickle'):
56
+ storage = get_storage(storage)
57
+ try:
58
+ return storage.load_data(config, name)
59
+ except:
60
+ return None
@@ -0,0 +1,92 @@
1
+ import shutil
2
+ from pathlib import Path
3
+ from datetime import datetime
4
+
5
+ def get_data_type_str(x):
6
+ if isinstance(x, tuple):
7
+ return 'tuple'
8
+ elif isinstance(x, dict):
9
+ return 'dict'
10
+ elif isinstance(x, list):
11
+ return 'list'
12
+ elif isinstance(x, str) or not hasattr(x, '__len__'):
13
+ return 'other'
14
+ else:
15
+ return 'ndarray'
16
+
17
+ def get_paths(root_path, invoke_path):
18
+ full_path = Path(invoke_path) if root_path is None else root_path / invoke_path
19
+ meta_full_path = full_path.with_name(full_path.name + '_meta')
20
+ return full_path, meta_full_path
21
+
22
+ def get_collection_timestamp(config, path):
23
+ import bcolz
24
+ _, meta_full_path = get_paths(config.root_path, path)
25
+ meta_data = bcolz.open(meta_full_path)[:][0]
26
+ return meta_data['created']
27
+
28
+ def get_is_expired(config, path):
29
+ try:
30
+ get_collection_timestamp(config, path)
31
+ return False
32
+ except (FileNotFoundError, EOFError):
33
+ return True
34
+
35
+ def should_expire(config, path, expire_fn):
36
+ return expire_fn(get_collection_timestamp(config, path))
37
+
38
+ def insert_data(path, data):
39
+ import bcolz
40
+ c = bcolz.carray(data, rootdir=path, mode='w')
41
+ c.flush()
42
+
43
+ def store_data(config, path, data, expire_in=None):
44
+ full_path, meta_full_path = get_paths(config.root_path, path)
45
+ full_path.parent.mkdir(parents=True, exist_ok=True)
46
+ created = datetime.now()
47
+ data_type_str = get_data_type_str(data)
48
+ if data_type_str == 'tuple':
49
+ fields = list(range(len(data)))
50
+ elif data_type_str == 'dict':
51
+ fields = sorted(data.keys())
52
+ else:
53
+ fields = []
54
+ meta_data = {'created': created, 'data_type_str': data_type_str, 'fields': fields}
55
+ insert_data(meta_full_path, meta_data)
56
+ if data_type_str in ['tuple', 'dict']:
57
+ for i in range(len(fields)):
58
+ sub_path = f"{path} ({i})"
59
+ store_data(config, sub_path, data[fields[i]])
60
+ else:
61
+ insert_data(full_path, data)
62
+ return data
63
+
64
+ def load_data(config, path):
65
+ import bcolz
66
+ full_path, meta_full_path = get_paths(config.root_path, path)
67
+ meta_data = bcolz.open(meta_full_path)[:][0]
68
+ data_type_str = meta_data['data_type_str']
69
+ if data_type_str in ['tuple', 'dict']:
70
+ fields = meta_data['fields']
71
+ partitions = range(len(fields))
72
+ data = [load_data(config, f"{path} ({i})") for i in partitions]
73
+ if data_type_str == 'tuple':
74
+ return tuple(data)
75
+ else:
76
+ return dict(zip(fields, data))
77
+ else:
78
+ data = bcolz.open(full_path)
79
+ if data_type_str == 'list':
80
+ return list(data)
81
+ elif data_type_str == 'other':
82
+ return data[0]
83
+ else:
84
+ return data[:]
85
+
86
+ def delete_data(config, path):
87
+ full_path, meta_full_path = get_paths(config.root_path, path)
88
+ try:
89
+ shutil.rmtree(meta_full_path)
90
+ shutil.rmtree(full_path)
91
+ except FileNotFoundError:
92
+ pass
@@ -0,0 +1,18 @@
1
+ from datetime import datetime
2
+
3
+ store = {}
4
+ date_stored = {}
5
+
6
+ def get_is_expired(config, path):
7
+ return path not in store
8
+
9
+ def should_expire(config, path, expire_fn):
10
+ return expire_fn(date_stored[path])
11
+
12
+ def store_data(config, path, data):
13
+ store[path] = data
14
+ date_stored[path] = datetime.now()
15
+ return data
16
+
17
+ def load_data(config, path):
18
+ return store[path]
@@ -0,0 +1,49 @@
1
+ import pickle
2
+ from pathlib import Path
3
+ from datetime import datetime
4
+
5
+ def get_paths(root_path, invoke_path):
6
+ p = Path(invoke_path) if root_path is None else root_path / invoke_path
7
+ meta_full_path = p.with_name(p.name + '_meta.pkl')
8
+ pkl_full_path = p.with_name(p.name + '.pkl')
9
+ return meta_full_path, pkl_full_path
10
+
11
+ def get_collection_timestamp(config, path):
12
+ meta_full_path, pkl_full_path = get_paths(config.root_path, path)
13
+ with meta_full_path.open('rb') as file:
14
+ meta_data = pickle.load(file)
15
+ return meta_data['created']
16
+
17
+ def get_is_expired(config, path):
18
+ try:
19
+ get_collection_timestamp(config, path)
20
+ return False
21
+ except (FileNotFoundError, EOFError):
22
+ return True
23
+
24
+ def should_expire(config, path, expire_fn):
25
+ return expire_fn(get_collection_timestamp(config, path))
26
+
27
+ def store_data(config, path, data):
28
+ created = datetime.now()
29
+ meta_data = {'created': created}
30
+ meta_full_path, pkl_full_path = get_paths(config.root_path, path)
31
+ pkl_full_path.parent.mkdir(parents=True, exist_ok=True)
32
+ with pkl_full_path.open('wb') as file:
33
+ pickle.dump(data, file, -1)
34
+ with meta_full_path.open('wb') as file:
35
+ pickle.dump(meta_data, file, -1)
36
+ return data
37
+
38
+ def load_data(config, path):
39
+ _, full_path = get_paths(config.root_path, path)
40
+ with full_path.open('rb') as file:
41
+ return pickle.load(file)
42
+
43
+ def delete_data(config, path):
44
+ meta_full_path, pkl_full_path = get_paths(config.root_path, path)
45
+ try:
46
+ meta_full_path.unlink()
47
+ pkl_full_path.unlink()
48
+ except FileNotFoundError:
49
+ pass
@@ -0,0 +1,17 @@
1
+ import types
2
+
3
+ def unwrap_func(func):
4
+ while hasattr(func, '__wrapped__'):
5
+ func = func.__wrapped__
6
+ return func
7
+
8
+ @types.coroutine
9
+ def coroutine_as_generator(coroutine):
10
+ val = yield from coroutine
11
+ return val
12
+
13
+ def sync_resolve_coroutine(coroutine):
14
+ try:
15
+ next(coroutine_as_generator(coroutine))
16
+ except StopIteration as ex:
17
+ return ex.value
@@ -0,0 +1,24 @@
1
+ [project]
2
+ name = "checkpointer"
3
+ version = "1.2.0"
4
+ requires-python = ">=3.8"
5
+ dependencies = [
6
+ "relib",
7
+ "termcolor",
8
+ ]
9
+ authors = [
10
+ {name = "Hampus Hallman"}
11
+ ]
12
+ # description = ""
13
+ readme = "README.md"
14
+ license = {file = "LICENSE"}
15
+
16
+ [project.urls]
17
+ Repository = "https://github.com/Reddan/checkpointer.git"
18
+
19
+ [build-system]
20
+ requires = ["hatchling"]
21
+ build-backend = "hatchling.build"
22
+
23
+ [tool.hatch.build.targets.wheel]
24
+ packages = ["checkpointer", "checkpointer.storages"]