resutil 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
resutil/__init__.py ADDED
@@ -0,0 +1,6 @@
1
+ from .main import main
2
+
3
+
4
+ @main
5
+ def func():
6
+ pass
@@ -0,0 +1 @@
1
+ from .cli_main import main
@@ -0,0 +1,4 @@
1
+ from .cli_main import main
2
+
3
+ if __name__ == "__main__":
4
+ main()
@@ -0,0 +1,256 @@
1
+ import argparse
2
+ import os
3
+ from os.path import join
4
+ from datetime import datetime
5
+
6
+ from rich import print
7
+
8
+
9
+ from ..ex_dir import find_undownloaded_ex_dirs, find_unuploaded_ex_dirs, create_ex_dir
10
+ from ..utils import user_confirm
11
+ from ..config_file import ConfigYaml
12
+ from ..storage import Box
13
+ from ..config_file import create_ex_yaml
14
+
15
+ from ..core import initialize, upload, upload_all, download_all, download
16
+
17
+
18
+ config_file_path = "resutil-conf.yaml"
19
+
20
+
21
+ def main():
22
+ parser = argparse.ArgumentParser(description="")
23
+ subparsers = parser.add_subparsers()
24
+
25
+ # init
26
+ parser_init = subparsers.add_parser("init", help="initialize resutil")
27
+ parser_init.set_defaults(handler=command_init)
28
+
29
+ # pull
30
+ parser_pull = subparsers.add_parser("pull", help="pull experiments")
31
+ parser_pull.add_argument(
32
+ "--no-dependency",
33
+ action="store_true",
34
+ help="pull experiments without dependencies",
35
+ )
36
+ group_pull = parser_pull.add_mutually_exclusive_group(required=True)
37
+ group_pull.add_argument(
38
+ "-A", "--all", action="store_true", help="pull all experiments"
39
+ )
40
+ group_pull.add_argument("experiment", nargs="?", help="experiment to pull")
41
+ parser_pull.set_defaults(handler=command_pull)
42
+
43
+ # push
44
+ parser_push = subparsers.add_parser("push", help="push experiments")
45
+ parser_push.add_argument(
46
+ "--no-dependency",
47
+ action="store_true",
48
+ help="push experiments without dependencies",
49
+ )
50
+ group_push = parser_push.add_mutually_exclusive_group(required=True)
51
+ group_push.add_argument(
52
+ "-A", "--all", action="store_true", help="push all experiments"
53
+ )
54
+ group_push.add_argument("experiment", nargs="?", help="experient to push")
55
+ parser_push.set_defaults(handler=command_push)
56
+
57
+ # add
58
+ parser_add = subparsers.add_parser("add", help="add experiments")
59
+ parser_add.add_argument("comment", nargs="?", help="experient to add")
60
+ parser_add.add_argument(
61
+ "-d",
62
+ "--dependency",
63
+ nargs="+",
64
+ help="add depending experiments",
65
+ )
66
+ parser_add.set_defaults(handler=command_add)
67
+
68
+ # args
69
+ args = parser.parse_args()
70
+
71
+ if hasattr(args, "handler"):
72
+ args.handler(args)
73
+ else:
74
+ parser.print_help()
75
+
76
+
77
+ def get_default_project_name():
78
+ current_dir = os.getcwd()
79
+ parent_dir_name = os.path.basename(current_dir)
80
+
81
+ return parent_dir_name
82
+
83
+
84
+ def search_default_result_dir():
85
+ candidate = [
86
+ "results",
87
+ "result",
88
+ "data",
89
+ "output",
90
+ "outputs",
91
+ "input",
92
+ "inputs",
93
+ ]
94
+ default = "results"
95
+ # get dirs in current directory
96
+ dirs = [d for d in os.listdir() if os.path.isdir(d)]
97
+
98
+ for c in candidate:
99
+ if c in dirs:
100
+ return c
101
+
102
+ return default
103
+
104
+
105
+ def command_init(args):
106
+ # check if already initialized
107
+ if os.path.exists(config_file_path):
108
+ print("⚠️ Already initialized.")
109
+ print(" [yellow]resutil-conf.yaml[/yellow] already exists.")
110
+ print(" If you proceed, the existing file will be overwritten.")
111
+ yn = user_confirm(" Do you want to proceed?", default="n")
112
+ if not yn:
113
+ return
114
+
115
+ # create config
116
+ config = ConfigYaml()
117
+
118
+ # set project name (default is parent directory name)
119
+ d = get_default_project_name()
120
+ print(f"Input project name [bold]({d})[/bold]: ", end="")
121
+ s = input()
122
+ project_name = s if s != "" else d
123
+ config.set_project_name(project_name)
124
+
125
+ # set directory to store results
126
+ d = search_default_result_dir()
127
+ print(f"Input directory name to store results [bold]({d})[/bold]: ", end="")
128
+ s = input()
129
+ results_dir = s if s != "" else d
130
+
131
+ # create results directory if not exist
132
+ if not os.path.exists(results_dir):
133
+ os.makedirs(results_dir)
134
+
135
+ config.set_results_dir(results_dir)
136
+
137
+ # add to .gitignore
138
+ yn = user_confirm(f"Do you want to add .gitignore to {results_dir}?", default="y")
139
+ if yn:
140
+ with open(os.path.join(results_dir, ".gitignore"), "a") as f:
141
+ f.write("# Resutil result dir\n")
142
+ f.write("# Automatically added by resutil\n")
143
+ f.write("# Files here are not managed by git\n")
144
+ f.write("*\n")
145
+ f.write("!.gitignore\n")
146
+
147
+ # set storage type
148
+ while True:
149
+ d = "box"
150
+ print(f"Input storage_type [bold]({d})[/bold]: ", end="")
151
+ s = input()
152
+ storage_type = s if s != "" else "box"
153
+ if storage_type in ["box"]:
154
+ break
155
+ config.set_storage_type(storage_type)
156
+
157
+ # set box config
158
+ if storage_type == "box":
159
+ d = "key.json"
160
+ print(f"Input key file_path [bold]({d})[/bold]: ", end="")
161
+ s = input()
162
+ key_file_path = s if s != "" else "key.json"
163
+
164
+ yn = user_confirm(
165
+ f"Do you want to add {key_file_path} to .gitignore?", default="y"
166
+ )
167
+ if yn:
168
+ with open(".gitignore", "a") as f:
169
+ f.write("\n# Resutil config file\n")
170
+ f.write(key_file_path + "\n")
171
+
172
+ print(f"Input folder id of base dir: ", end="")
173
+ base_dir_id = input()
174
+
175
+ storage_config = {"key_file_path": key_file_path, "base_dir_id": base_dir_id}
176
+
177
+ config.set_storage_config(storage_config)
178
+
179
+ try:
180
+ Box(config.storage_config, config.project_name)
181
+ except Exception as e:
182
+ print("❌ Failed to connect to storage.")
183
+ print(f" [red]{e}[/red]")
184
+ return
185
+
186
+ # save config
187
+ config.save(config_file_path)
188
+
189
+ print("βœ… Initialized.")
190
+
191
+
192
+ def command_pull(args):
193
+ config, storage = initialize()
194
+
195
+ if args.experiment:
196
+ download(args.experiment, config.results_dir, storage, args.no_dependency)
197
+ print(args.experiment)
198
+ print("βœ… Downloaded")
199
+
200
+ elif args.all:
201
+ ex_names_to_upload = find_undownloaded_ex_dirs(config.results_dir, storage)
202
+
203
+ n = len(ex_names_to_upload)
204
+ if n > 0 and user_confirm(
205
+ f"ℹ️ There are {n} other experiment directory(s) that have not been downloaded. Do you want to download them?",
206
+ default="y",
207
+ ):
208
+ download_all(ex_names_to_upload, config.results_dir, storage)
209
+ print("βœ… Downloaded")
210
+
211
+ elif n == 0:
212
+ print("βœ… No experiment to download.")
213
+
214
+
215
+ def command_push(args):
216
+ config, storage = initialize()
217
+
218
+ if args.experiment:
219
+ upload(args.experiment, config.results_dir, storage, args.no_dependency)
220
+ print(args.experiment)
221
+ print("βœ… Uploaded")
222
+
223
+ elif args.all:
224
+ ex_names_to_upload = find_unuploaded_ex_dirs(config.results_dir, storage)
225
+
226
+ n = len(ex_names_to_upload)
227
+ if n > 0 and user_confirm(
228
+ f"ℹ️ There are {n} other experiment directory(s) that have not been uploaded. Do you want to upload them?",
229
+ default="y",
230
+ ):
231
+ upload_all(ex_names_to_upload, config.results_dir, storage)
232
+ print("βœ… Uploaded")
233
+
234
+ elif n == 0:
235
+ print("βœ… No experiment to upload.")
236
+
237
+
238
+ def command_add(args):
239
+ config, _ = initialize()
240
+
241
+ if args.comment is None:
242
+ comment = input("πŸ“ Input comment for this experiment: ")
243
+ else:
244
+ comment = args.comment
245
+ print(f"com: {comment}")
246
+
247
+ dependency = args.dependency if args.dependency is not None else []
248
+
249
+ ex_name = create_ex_dir(
250
+ datetime.now(),
251
+ comment,
252
+ config.results_dir,
253
+ )
254
+ ex_dir_path = join(config.results_dir, ex_name)
255
+
256
+ create_ex_yaml(ex_dir_path, dependency)
resutil/config_file.py ADDED
@@ -0,0 +1,80 @@
1
+ from os.path import join
2
+ import sys
3
+ from os.path import exists
4
+ from typing import Optional
5
+
6
+ import yaml
7
+
8
+ from .utils import parse_result_dirs
9
+
10
+
11
+ class ConfigYaml:
12
+ def __init__(self, config_file_path=None):
13
+ if config_file_path is None:
14
+ return
15
+
16
+ try:
17
+ with open(config_file_path, "r") as f:
18
+ conf = yaml.safe_load(f)
19
+ except FileNotFoundError:
20
+ raise FileNotFoundError(f"config file {config_file_path} does not exist.")
21
+
22
+ self.project_name = conf["project_name"]
23
+ self.results_dir = conf["results_dir"]
24
+ self.storage_type = conf["storage_type"]
25
+ self.storage_config = conf["storage_config"]
26
+
27
+ def set_project_name(self, project_name: str):
28
+ self.project_name = project_name
29
+
30
+ def set_results_dir(self, results_dir: str):
31
+ # check existence
32
+ if not exists(results_dir):
33
+ raise ValueError(f"results_dir {results_dir} does not exist.")
34
+ self.results_dir = results_dir
35
+
36
+ def set_storage_type(self, storage_type: str):
37
+ if storage_type not in ["box"]:
38
+ raise ValueError("storage_type must be 'local' or 'box'")
39
+ self.storage_type = storage_type
40
+
41
+ def set_storage_config(self, storage_config):
42
+ # check storage_config is valid
43
+ if self.storage_type == "box":
44
+ if "key_file_path" not in storage_config:
45
+ raise ValueError("storage_config must have 'key_file_path' key")
46
+ if "base_dir_id" not in storage_config:
47
+ raise ValueError("storage_config must have 'base_dir_id' key")
48
+ else:
49
+ raise ValueError("storage_type must be 'box'")
50
+ self.storage_config = storage_config
51
+
52
+ def save(self, config_file_path):
53
+ data = {
54
+ "project_name": self.project_name,
55
+ "results_dir": self.results_dir,
56
+ "storage_type": self.storage_type,
57
+ "storage_config": self.storage_config,
58
+ }
59
+ with open(config_file_path, "w") as stream:
60
+ yaml.dump(data, stream)
61
+
62
+
63
+ def create_ex_yaml(
64
+ dir: str,
65
+ dependency: list[str] = [],
66
+ commit_hash: Optional[str] = None,
67
+ uncommited_files: list[str] = [],
68
+ ):
69
+ cmd = " ".join(sys.argv)
70
+ data = {
71
+ "cmd": cmd,
72
+ "result_dir": dir,
73
+ "dependency": dependency,
74
+ "git": {
75
+ "uncommited_files": uncommited_files,
76
+ "commit_hash": commit_hash,
77
+ },
78
+ }
79
+ with open(join(dir, "resutil-exp.yaml"), "w") as stream:
80
+ yaml.dump(data, stream)
resutil/core.py ADDED
@@ -0,0 +1,95 @@
1
+ from concurrent.futures import ThreadPoolExecutor, as_completed
2
+ from os.path import join, exists
3
+
4
+ from rich import print
5
+
6
+ from .storage import Box
7
+ from .config_file import ConfigYaml
8
+ from .exp_file import ExpFile
9
+
10
+ config_file_name = "resutil-conf.yaml"
11
+ exp_file_name = "resutil-exp.yaml"
12
+
13
+
14
+ def initialize():
15
+ try:
16
+ config = ConfigYaml(config_file_name)
17
+ except FileNotFoundError:
18
+ print(f"⚠️ Config file {config_file_name} does not exist.")
19
+ print("Create a config file by running [bold]resutil init[/bold] and try again")
20
+ exit(1)
21
+
22
+ if config.storage_type == "box":
23
+ storage = Box(config.storage_config, config.project_name)
24
+ print("πŸ“¦ Connected to [bold]box[/bold]")
25
+ base_dir_name, project_folder_name = storage.get_info()
26
+ print(f" πŸ“ Base dir: [bold]{base_dir_name}[/bold]")
27
+ print(f" πŸ“ Project dir: [bold]{project_folder_name}[/bold]")
28
+ else:
29
+ raise (
30
+ ValueError(
31
+ f"⛔️ Wronge storage type. Check your [bold]{config_file_name}[/bold] file."
32
+ )
33
+ )
34
+ return config, storage
35
+
36
+
37
+ def upload(ex_name: str, results_dir: str, storage: Box, no_dependency=False):
38
+ with ThreadPoolExecutor(max_workers=10) as executor:
39
+ ex_dir_path = join(results_dir, ex_name)
40
+
41
+ def callback(ex_name):
42
+ print(f"πŸ—‚οΈ Uploading: [bold]{ex_name}[/bold]")
43
+
44
+ storage.upload_experiment(ex_dir_path, callback, executor)
45
+
46
+ if no_dependency:
47
+ return
48
+
49
+ ex_file_path = join(ex_dir_path, exp_file_name)
50
+ if exists(ex_file_path):
51
+ exp_file = ExpFile(ex_file_path)
52
+ for d in exp_file.dependency:
53
+ upload(d, results_dir, storage, no_dependency)
54
+
55
+
56
+ def upload_all(ex_names_to_upload: list[str], results_dir: str, storage: Box):
57
+ with ThreadPoolExecutor(max_workers=5) as executor:
58
+ for ex_name in ex_names_to_upload:
59
+ ex_dir_path = join(results_dir, ex_name)
60
+
61
+ def callback(ex_name):
62
+ print(f"πŸ—‚οΈ Uploading: [bold]{ex_name}[/bold]")
63
+
64
+ storage.upload_experiment(ex_dir_path, callback, executor)
65
+
66
+
67
+ def download(ex_name: str, results_dir: str, storage: Box, no_dependency=False):
68
+ with ThreadPoolExecutor(max_workers=10) as executor:
69
+ ex_dir_path = join(results_dir, ex_name)
70
+
71
+ def callback(ex_name):
72
+ print(f"πŸ—‚οΈ Downloading: [bold]{ex_name}[/bold]")
73
+
74
+ futures = storage.download_experiment(ex_dir_path, callback, executor)
75
+
76
+ if no_dependency:
77
+ return
78
+
79
+ for future in as_completed(futures):
80
+ f = future.result()
81
+ if f == exp_file_name:
82
+ exp_file = ExpFile(join(ex_dir_path, f))
83
+ for d in exp_file.dependency:
84
+ download(d, results_dir, storage, no_dependency)
85
+
86
+
87
+ def download_all(ex_names_to_download: list[str], results_dir: str, storage: Box):
88
+ with ThreadPoolExecutor(max_workers=5) as executor:
89
+ for ex_name in ex_names_to_download:
90
+ ex_dir_path = join(results_dir, ex_name)
91
+
92
+ def callback(ex_name):
93
+ print(f"πŸ—‚οΈ Downloading: [bold]{ex_name}[/bold]")
94
+
95
+ storage.download_experiment(ex_dir_path, callback, executor)
resutil/ex_dir.py ADDED
@@ -0,0 +1,64 @@
1
+ from datetime import datetime
2
+ import os
3
+ import shutil
4
+ from glob import glob
5
+ from os.path import basename, normpath, join
6
+
7
+
8
+ from .utils import to_base26, parse_result_dirs
9
+
10
+
11
+ def create_ex_dir(now, comment, results_dir):
12
+
13
+ base_time = datetime(2024, 1, 1, 0, 0, 0, 0)
14
+ now_str = now.strftime("%Y%m%dT%H%M%S")
15
+
16
+ elapsed_time = (now - base_time).total_seconds() / 60
17
+ str26 = to_base26(int(elapsed_time))
18
+
19
+ ex_name = f"{str26}_{now_str}_{comment}"
20
+
21
+ ex_dir_path = os.path.join(results_dir, ex_name)
22
+
23
+ # if exist, throw error
24
+ if os.path.isdir(ex_dir_path):
25
+ raise FileExistsError(f"{ex_dir_path} is already exist")
26
+
27
+ os.makedirs(ex_dir_path)
28
+
29
+ return ex_name
30
+
31
+
32
+ def get_ex_dir_names(results_dir):
33
+ ex_dir_paths = glob(join(results_dir + "/*/"))
34
+ ex_dir_names = [basename(normpath(p)) for p in ex_dir_paths]
35
+ return ex_dir_names
36
+
37
+
38
+ def find_unuploaded_ex_dirs(results_dir_path, storage):
39
+ remote_ex_dir_names = storage.get_all_experiment_names()
40
+ local_ex_dir_names = get_ex_dir_names(results_dir_path)
41
+ ex_dir_names_to_upload = []
42
+ for local_ex_dir_name in local_ex_dir_names:
43
+ if local_ex_dir_name not in remote_ex_dir_names:
44
+ ex_dir_names_to_upload.append(local_ex_dir_name)
45
+ return ex_dir_names_to_upload
46
+
47
+
48
+ def find_undownloaded_ex_dirs(results_dir_path, storage):
49
+ remote_ex_dir_names = storage.get_all_experiment_names()
50
+ local_ex_dir_names = get_ex_dir_names(results_dir_path)
51
+ ex_dir_names_to_download = []
52
+ for remote_ex_dir_name in remote_ex_dir_names:
53
+ if remote_ex_dir_name not in local_ex_dir_names:
54
+ ex_dir_names_to_download.append(remote_ex_dir_name)
55
+ return ex_dir_names_to_download
56
+
57
+
58
+ def delete_ex_dir(ex_dir_path):
59
+ # check valid path
60
+ if parse_result_dirs(ex_dir_path) == []:
61
+ raise ValueError(f"{ex_dir_path} does not exist")
62
+ if os.path.exists(ex_dir_path):
63
+ shutil.rmtree(ex_dir_path)
64
+ return True
resutil/exp_file.py ADDED
@@ -0,0 +1,15 @@
1
+ import yaml
2
+
3
+
4
+ class ExpFile:
5
+ def __init__(self, exp_file_path=None):
6
+ if exp_file_path is None:
7
+ return
8
+
9
+ try:
10
+ with open(exp_file_path, "r") as f:
11
+ conf = yaml.safe_load(f)
12
+ except FileNotFoundError:
13
+ raise FileNotFoundError(f"experiment file {exp_file_path} does not exist.")
14
+
15
+ self.dependency = conf["dependency"]
resutil/git.py ADDED
@@ -0,0 +1,33 @@
1
+ from shutil import copy
2
+ from pathlib import Path
3
+
4
+ from git import Repo, InvalidGitRepositoryError
5
+
6
+
7
+ def find_git_repo():
8
+ # search repogitory recursively to root
9
+ try:
10
+ repo = Repo("./", search_parent_directories=True)
11
+ except InvalidGitRepositoryError:
12
+ return None
13
+ return repo
14
+
15
+
16
+ def get_git_info(repo: Repo):
17
+ # Get the most recent commit
18
+ latest_commit = repo.head.commit.hexsha
19
+
20
+ # Get modified files
21
+
22
+ modified_files = [item.a_path for item in repo.index.diff(None)]
23
+ staged_files = [item.a_path for item in repo.index.diff("HEAD")]
24
+
25
+ return latest_commit, [*modified_files, *staged_files]
26
+
27
+
28
+ def store_uncomited(uncommited_files, dir_path):
29
+ # copy files
30
+ for file in uncommited_files:
31
+ dest = Path(dir_path, file)
32
+ dest.parent.mkdir(parents=True, exist_ok=True)
33
+ copy(file, dest)
resutil/main.py ADDED
@@ -0,0 +1,103 @@
1
+ from functools import wraps
2
+ from datetime import datetime
3
+ from os.path import join
4
+ import sys
5
+
6
+ from rich import print
7
+
8
+ from .utils import user_confirm, parse_result_dirs
9
+ from .config_file import create_ex_yaml
10
+ from .ex_dir import create_ex_dir, delete_ex_dir, find_unuploaded_ex_dirs
11
+ from .git import find_git_repo, get_git_info, store_uncomited
12
+
13
+ from .core import initialize, upload, upload_all
14
+
15
+
16
+ class resutil_args:
17
+ def __init__(self, ex_dir):
18
+ self.ex_dir = ex_dir
19
+
20
+
21
+ # Used as a decorator
22
+ def main(verbose=True):
23
+ def main_wrapper(func):
24
+ @wraps(func)
25
+ def wrapper(*args, **kwargs):
26
+ print("")
27
+ print("✨ Runnning your code with [bold]Resutil[/bold]")
28
+ print("")
29
+
30
+ config, storage = initialize()
31
+
32
+ print("")
33
+
34
+ comment = input("πŸ“ Input comment for this experiment: ")
35
+
36
+ print("")
37
+
38
+ ex_name = create_ex_dir(
39
+ datetime.now(),
40
+ comment,
41
+ config.results_dir,
42
+ )
43
+ ex_dir_path = join(config.results_dir, ex_name)
44
+
45
+ # check uncommited files
46
+ git_repo = find_git_repo()
47
+ if git_repo is None:
48
+ commit_hash, unstaged_files = None, None
49
+ else:
50
+ commit_hash, unstaged_files = get_git_info(git_repo)
51
+ if len(unstaged_files) > 0:
52
+ print("πŸ” Unstaged files will be stored in the result dir:")
53
+ for file in unstaged_files:
54
+ print(f" - {file}")
55
+ store_uncomited(
56
+ unstaged_files, join(ex_dir_path, "uncommited_files")
57
+ )
58
+
59
+ dependency = parse_result_dirs(" ".join(sys.argv))
60
+
61
+ create_ex_yaml(
62
+ ex_dir_path,
63
+ dependency,
64
+ commit_hash=commit_hash,
65
+ uncommited_files=unstaged_files,
66
+ )
67
+
68
+ # Run the main function
69
+ print("πŸš€ Running the main function...")
70
+
71
+ try:
72
+ func(resutil_args(ex_dir_path), *args, **kwargs)
73
+ except KeyboardInterrupt:
74
+ print("")
75
+ if user_confirm(
76
+ "πŸ”” Interrupted by user. Do you want to [bold]delete[/bold] experiment file for trial?",
77
+ default="n",
78
+ ) and user_confirm(
79
+ "πŸ”” Are your sure to [bold]DELETE[/bold] it?",
80
+ default="n",
81
+ ):
82
+ delete_ex_dir(ex_dir_path)
83
+ print(f"πŸ—‘οΈ Deleted [bold]{ex_dir_path}[/bold]")
84
+ print("⛔️ Aborted")
85
+
86
+ print("")
87
+
88
+ upload(ex_name, config.results_dir, storage)
89
+
90
+ ex_names_to_upload = find_unuploaded_ex_dirs(config.results_dir, storage)
91
+
92
+ n = len(ex_names_to_upload)
93
+ if n > 0 and user_confirm(
94
+ f"ℹ️ There are {n} other experiment directory(s) that have not been uploaded. Do you want to upload them?",
95
+ default="y",
96
+ ):
97
+ upload_all(ex_names_to_upload, config.results_dir, storage)
98
+
99
+ print("βœ… Done")
100
+
101
+ return wrapper
102
+
103
+ return main_wrapper
@@ -0,0 +1,3 @@
1
+ from .box.box import Box
2
+
3
+ __all__ = ["Box"]
@@ -0,0 +1,68 @@
1
+ from os.path import basename, normpath
2
+ from os import makedirs
3
+ from concurrent.futures import ThreadPoolExecutor, Future
4
+
5
+ from .box_client import BoxClient
6
+
7
+
8
+ class Box:
9
+ def __init__(self, storage_config: dict, project_name: str):
10
+ self.client = BoxClient(storage_config["key_file_path"])
11
+
12
+ self.base_dir = self.client.find_folder(storage_config["base_dir_id"])
13
+
14
+ self.project_folder = self.client.find_subfolder_by_name(
15
+ project_name, self.base_dir.id
16
+ )
17
+ if self.project_folder is None:
18
+ self.project_folder = self.client.create_folder(
19
+ project_name, self.base_dir.id
20
+ )
21
+
22
+ def get_info(self) -> tuple[str, str, str]:
23
+ return (
24
+ self.base_dir.name,
25
+ self.project_folder.name,
26
+ )
27
+
28
+ def upload_experiment(
29
+ self, local_ex_path: str, callback, executor: ThreadPoolExecutor
30
+ ) -> None:
31
+ """Uploads a folder and its contents to Box.
32
+
33
+ Args:
34
+ local_ex_path (str): path to the folder to be uploaded
35
+ """
36
+ ex_dir_name = basename(normpath(local_ex_path))
37
+ callback(ex_dir_name)
38
+ ex_dir = self.client.create_subfolder(ex_dir_name, self.project_folder.id)
39
+
40
+ futures = []
41
+ self.client.upload_recursively(local_ex_path, ex_dir, executor, futures)
42
+ return futures
43
+
44
+ def download_experiment(
45
+ self, local_ex_path: str, callback, executor: ThreadPoolExecutor
46
+ ) -> Future:
47
+ """Downloads a folder and its contents to Box.
48
+
49
+ Args:
50
+ local_ex_path (str): path to the folder to be uploaded
51
+ """
52
+ ex_dir_name = basename(normpath(local_ex_path))
53
+ callback(ex_dir_name)
54
+ makedirs(local_ex_path, exist_ok=True)
55
+ ex_dir = self.client.find_subfolder_by_name(ex_dir_name, self.project_folder.id)
56
+
57
+ futures = []
58
+ self.client.download_recursively(ex_dir, local_ex_path, executor, futures)
59
+ return futures
60
+
61
+ def get_all_experiment_names(self) -> list[str]:
62
+ """Get all experiment names in the project folder.
63
+
64
+ Returns:
65
+ list[str]: List of experiment names
66
+ """
67
+ folders = self.client.get_folders_in(self.project_folder.id)
68
+ return [folder.name for folder in folders]
@@ -0,0 +1,92 @@
1
+ from os.path import basename, normpath, isfile, isdir, join
2
+ from os import makedirs
3
+ from glob import glob
4
+ from concurrent.futures import ThreadPoolExecutor, Future
5
+
6
+ from boxsdk import Client, JWTAuth, BoxAPIException
7
+ from boxsdk.object.folder import Folder
8
+ from boxsdk.object.item import Item
9
+
10
+
11
+ class BoxClient:
12
+ def __init__(self, key_file_path: str):
13
+ try:
14
+ auth = JWTAuth.from_settings_file(key_file_path)
15
+ except FileNotFoundError:
16
+ raise ValueError(f"Key file not found at {key_file_path}")
17
+
18
+ # self.access_token = auth.authenticate_instance()
19
+ self.client = Client(auth)
20
+
21
+ def create_folder(self, name: str, folder_id: str) -> Folder:
22
+ new_folder = self.create_subfolder(name, folder_id)
23
+ return new_folder
24
+
25
+ def find_subfolder_by_name(self, name: str, folder_id: str) -> Folder:
26
+ folders = self.get_folders_in(folder_id)
27
+ for folder in folders:
28
+ if folder.name == name:
29
+ return folder
30
+ return None
31
+
32
+ def get_folders_in(self, folder_id: str) -> list[Folder]:
33
+ items = self.client.folder(folder_id).get_items()
34
+ folders = []
35
+ for item in items:
36
+ if item.type == "folder":
37
+ folders.append(item)
38
+ return folders
39
+
40
+ def find_folder(self, folder_id: str) -> Folder:
41
+ try:
42
+ return self.client.folder(folder_id).get()
43
+ except BoxAPIException:
44
+ raise ValueError(f"Folder with id {folder_id} not found")
45
+
46
+ def upload_recursively(
47
+ self,
48
+ local_dir: str,
49
+ folder: Folder,
50
+ executor: ThreadPoolExecutor,
51
+ futures: list[Future] = [],
52
+ ):
53
+ local_items = glob(local_dir + "/*")
54
+ for local_item in local_items:
55
+ if isdir(local_item):
56
+ subfolder_name = basename(normpath(local_item))
57
+ subfolder = self.create_subfolder(subfolder_name, folder.id)
58
+ self.upload_recursively(local_item, subfolder, executor, futures)
59
+ elif isfile(local_item):
60
+ future = executor.submit(
61
+ self.upload_file, local_item, folder.id, basename(local_item)
62
+ )
63
+ futures.append(future)
64
+
65
+ def create_subfolder(self, name: str, folder_id: str) -> Folder:
66
+ return self.client.folder(folder_id).create_subfolder(name)
67
+
68
+ def upload_file(self, local_file_path: str, folder_id: str, name: str) -> None:
69
+ self.client.folder(folder_id).upload(local_file_path, name)
70
+
71
+ def download_recursively(
72
+ self,
73
+ folder: Folder,
74
+ local_dir: str,
75
+ executor: ThreadPoolExecutor,
76
+ futures: list[Future] = [],
77
+ ):
78
+ items = self.client.folder(folder.id).get_items()
79
+ for item in items:
80
+ if item.type == "folder":
81
+ subfolder_name = join(local_dir, item.name)
82
+ makedirs(subfolder_name, exist_ok=True)
83
+ self.download_recursively(item, subfolder_name, executor, futures)
84
+ elif item.type == "file":
85
+ future = executor.submit(self.download_file, item, local_dir)
86
+ futures.append(future)
87
+ return futures
88
+
89
+ def download_file(self, item: Item, local_dir: str) -> None:
90
+ with open(join(local_dir, item.name), "wb") as f:
91
+ f.write(item.content())
92
+ return item.name
resutil/utils.py ADDED
@@ -0,0 +1,44 @@
1
+ import re
2
+ from rich import print
3
+
4
+
5
+ def to_base26(n):
6
+ code = "abcdefghijklmnopqrstuvwxyz"
7
+ converted = []
8
+
9
+ for i in range(6):
10
+ converted.append(code[n % 26])
11
+ n -= n % 26
12
+ n //= 26
13
+
14
+ converted = "".join(converted[::-1])
15
+ converted = "a" * (5 - len(converted)) + converted
16
+
17
+ return converted
18
+
19
+
20
+ def parse_result_dirs(text: str):
21
+ pattern = r"[a-zA-Z]{5}_\d{8}T\d{6}_[^/\s\\]*"
22
+ matches = re.findall(pattern, text)
23
+ return matches
24
+
25
+
26
+ def user_confirm(question: str, default="") -> bool:
27
+ yn = "(Y/n)" if default == "y" else "(y/N)" if default == "n" else "(y/n)"
28
+
29
+ print(f"{question} [bold]{yn}[/bold]: ", end="")
30
+ reply = str(input()).lower().strip()
31
+
32
+ if reply[:1] == "y":
33
+ return True
34
+ elif reply[:1] == "n":
35
+ return False
36
+ elif default == "y" and len(reply) == 0:
37
+ return True
38
+ elif default == "n" and len(reply) == 0:
39
+ return False
40
+ else:
41
+ new_question = question
42
+ if "Please try again - " not in question:
43
+ new_question = f"Please try again - {question}"
44
+ return user_confirm(new_question, default)
@@ -0,0 +1,187 @@
1
+ Metadata-Version: 2.3
2
+ Name: resutil
3
+ Version: 0.1.0
4
+ Summary: Add your description here
5
+ Project-URL: Homepage, https://github.com/KatayamaLab/resutil
6
+ Project-URL: Issues, https://github.com/KatayamaLab/resutil/issues
7
+ Author-email: nobkat <katayama@rs.tus.ac.jp>
8
+ License-File: LICENSE
9
+ Requires-Python: >=3.8
10
+ Requires-Dist: boxsdk[jwt]>=3.9.0
11
+ Requires-Dist: gitpython>=3.1.0
12
+ Requires-Dist: pytest-mock>=3.14.0
13
+ Requires-Dist: pyyaml>=6.0.0
14
+ Requires-Dist: rich>=13.7.0
15
+ Description-Content-Type: text/markdown
16
+
17
+ # Resutil
18
+
19
+ ## What is Resutil
20
+
21
+ **Resutil** is a utility to manage experimental result data obtained from Python projects. It also manages dependency such as codes and input data with result data. Data is synced to the cloud for team sharing and collaboration.
22
+
23
+ ## Why choose Resutil?
24
+
25
+ - **Simple**: Easy to install and can be quickly integrated into your project.
26
+ - **High Reproducibility**: Tracks programs, input data, and experimental results.
27
+ - **Open-source and free**: Join development to future release of Resutil.
28
+
29
+
30
+ ## Features
31
+
32
+ - Sync experimental data saved in a specific directory to the cloud (currently only for Box) after the program execution finished.
33
+ - Save information necessary to reproduce the experiment in a YAML file.
34
+ - Execution command
35
+ - Input files given as arguments (only files within folders managed by resutil)
36
+ - Git commit hash
37
+ - Uncommitted files
38
+ - Upload experimental data that hasn’t been uploaded yet.
39
+ - Download experimental data from the cloud using commands.
40
+
41
+ ## Installation
42
+
43
+ Open terminal and run
44
+
45
+ ```bash
46
+ $ pip install resutil
47
+ ```
48
+
49
+ Get JWT (JSON Web Tokens) key from [Box](https://developer.box.com/guides/authentication/jwt/), and saved as `key.json`:
50
+
51
+ ```JSON
52
+ {
53
+ "boxAppSettings": {
54
+ "clientID": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
55
+ "clientSecret": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
56
+ "appAuth": {
57
+ "publicKeyID": "xxxxxxxx",
58
+ "privateKey": "-----BEGIN ENCRYPTED PRIVATE KEY-----\nxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\n-----END ENCRYPTED PRIVATE KEY-----\n",
59
+ "passphrase": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
60
+ }
61
+ },
62
+ "enterpriseID": "796607301"
63
+ }
64
+ ```
65
+
66
+ Initialize resutil
67
+
68
+ ```bash
69
+ $ cd your-project-root-directory
70
+ $ resutil init
71
+ Input project name (resutil): MyProj
72
+ Input directory name to store results (results): results
73
+ Do you want to add .gitignore to results? (Y/n): Y
74
+ Input storage_type (box): box
75
+ Input key file_path (key.json): key.json
76
+ Do you want to add key.json to .gitignore? (Y/n): Y
77
+ Input folder id of base dir: 123456789012
78
+ βœ… Initialized.
79
+ ```
80
+
81
+ The folder id is the Box folder ID, which is the numeric part of the URL when viewing the folder on Box (e.g., https://xxxx.app.box.com/folder/123456789012).
82
+
83
+
84
+ A file named `resutil-conf.yaml` will be created.
85
+
86
+ Modify main function in your project like:
87
+
88
+
89
+ ```python
90
+
91
+ # Import resutil
92
+ import resutil
93
+
94
+ # Add decorator before the main function
95
+ @resutil.main()
96
+ def main_func(params):
97
+ # A directory is automatically created, and its path is stored in params.ex_dir
98
+ ex_dir = params.ex_dir
99
+
100
+ # Execute the program HEREπŸš€
101
+ # Resutil will handle the rest
102
+
103
+ if __name__ == "__main__":
104
+ main_func()
105
+ ```
106
+
107
+ ## Commands
108
+
109
+ ### `resutil init`
110
+
111
+ `resutil init` creates `resutil-conf.yaml` in your project folder such as:
112
+
113
+ ```yaml
114
+ project_name: MyProj
115
+ results_dir: results/
116
+ storage_type: box
117
+ storage_config:
118
+ base_dir_id: xxxx
119
+ key_file: key.json
120
+ ```
121
+
122
+ ### `resutil push`
123
+
124
+ The `resutil push` command is used to upload experimental data to the cloud that does not exist in the cloud result directory.
125
+
126
+ `resutil push [exp_name]` uploads experiment a specific directory to the cloud. Depending directorys included in `exp-config.yaml` are automtically uploaded. `--no-dependency` option restrain automatic dependency upload.
127
+
128
+ `resutil pull --all` will upload all experimental data to the cloud.
129
+
130
+ This is useful for keeping your local data up-to-date with the data stored in the cloud, especially when multiple people are working on the same project and updating the experimental data.
131
+
132
+ ### `resutil pull`
133
+
134
+ The `resutil pull` command is used to download a specific experimental data from the cloud that does not exist in the local result directory.
135
+
136
+ `resutil pull [exp_name]` downloads experiment directory from cloud. Depending directorys included in `exp-config.yaml` are automtically downlowded. `--no-dependency` option restrain automatic dependency download.
137
+
138
+ `resutil pull --all` will download all experimental data from the cloud that is not currently in your local result directory.
139
+
140
+ This is useful for keeping your local data up-to-date with the data stored in the cloud, especially when multiple people are working on the same project and updating the experimental data.
141
+
142
+
143
+ ### `resutil add`
144
+
145
+ The resutil add command is used to add an experiment directory without executing any code.
146
+
147
+ You can use it as follows: resutil `add [comment] -d [DEPENDENCY1] [DEPENDENCY2]...`. This command adds an experiment directory named "comment" and sets its dependencies. The dependencies are other experiments that this experiment depends on.
148
+
149
+ For example, if you have two experiments `exp1` and `exp2` and a new experiment depends on them, you can add the new experiment with the following command: `resutil add "new experiment" -d exp1 exp2`. This will create a new experiment directory named "new experiment" and set `exp1` and `exp2` as its dependencies.
150
+
151
+ ## Directory structure in the cloud storage
152
+
153
+ ```plain text
154
+ BaseDir # Base directory specified base_dir_id
155
+ β”œβ”€β”€MyProj # Project directory
156
+ β”‚ β”œβ”€β”€ aakuqj_20240511T174522_ex1 # Experiment directory
157
+ β”‚ β”‚ β”œβ”€β”€ resutil-exp.yaml # Experiment information
158
+ β”‚ β”‚ └── data.txt # Data (example)
159
+ β”‚ β”œβ”€β”€ aamxrp_20240606T135747_ex2
160
+ β”‚ β”‚ β”œβ”€β”€ resutil-exp.yaml
161
+ β”‚ β”‚ β”œβ”€β”€ data.txt
162
+ β”‚ β”‚ └── uncommited_files # Uncommitted files
163
+ β”‚ β”‚ └── main.py
164
+ β”‚ ...
165
+ β”‚
166
+ β”œβ”€β”€OtherProj
167
+ β”‚
168
+ ...
169
+ ```
170
+
171
+ Experiment directories are formatted as `xxxxxx_yyyymmddTHHMMSS_comment`, where xxxxxx is timestamped for easy ordering and tab completion in shells.
172
+
173
+ ## resutil-exp.yaml [WIP]
174
+
175
+ Each experiment directory has `resutil-exp.yaml`, which contains information to reproduce experimental results.
176
+
177
+ ``` yaml
178
+ cmd: Execution command
179
+ params: Options at runtime
180
+ hoo: xxx
181
+ bar: 123
182
+ dependency: Dependencies (automatically extracted from directories in the command)
183
+ - ex1
184
+ - ex2
185
+ ```
186
+
187
+
@@ -0,0 +1,19 @@
1
+ resutil/__init__.py,sha256=0uQCHxlSuLTeX96NpADFumYUFmrEkvaLehb-LyRtAjk,52
2
+ resutil/config_file.py,sha256=e-FBMSOeRZdRK7eMH9QVkl7NEtfN70dVURjwmbH3e98,2559
3
+ resutil/core.py,sha256=l9vKLLiih7cs3xJsKNI1YpK7UFJzNs9W6RNJsAgM38E,3287
4
+ resutil/ex_dir.py,sha256=qANI9HtOX-75qb1CsMpXfHsXadK7TvEajEQ2Px76IsM,1951
5
+ resutil/exp_file.py,sha256=ZBKUR-dNx8e16GX8Et7bisbkvQEVb6dnalISfSqcBGQ,397
6
+ resutil/git.py,sha256=xvLQHeNyejmJp3uBobBA3_IMvFFmFN-ADn-lb75ty9Q,858
7
+ resutil/main.py,sha256=bXdNoDujZPWFuym5wsLfUlGHdd1enp98Z1alfPrWizk,3275
8
+ resutil/utils.py,sha256=zTrG5a2GgWnWn9T6pKNKoxQWriLXp15On5c13B5Jdkc,1134
9
+ resutil/cli/__init__.py,sha256=Hn4N9T8PbbE27oPqzsUQEATeAVp8dcGwBaecIhWIIR8,27
10
+ resutil/cli/__main__.py,sha256=IBG0X9Tace8Mfjy0ADwY6ekuqH8QArM3F10FOlzptpA,66
11
+ resutil/cli/cli_main.py,sha256=Vc0hlth3j7ugX5wRLIlQGuoTAveXpqGCHqDKR8IBmLk,7580
12
+ resutil/storage/__init__.py,sha256=kkBWPOvQvdlREuUeA0PBT9rCB4ozsFSBP6PdUCHCiDE,44
13
+ resutil/storage/box/box.py,sha256=fqZrUeqnQa34Q3fmSeDeb9T3Az1rRsVvCnCIfsBnZYQ,2295
14
+ resutil/storage/box/box_client.py,sha256=5N8RZSxnXDtHaXxxS0pl0HWDjaoZzzHppKdnFhaN5f4,3397
15
+ resutil-0.1.0.dist-info/METADATA,sha256=LZKkj69veXSBK_biGi8mJrBxMn7Uyr91Y_ZKKb0AVuo,6466
16
+ resutil-0.1.0.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
17
+ resutil-0.1.0.dist-info/entry_points.txt,sha256=LJDQZrcPvwHTDYoMUEugji1Yj6PrmDT2shc4o6cFdnE,45
18
+ resutil-0.1.0.dist-info/licenses/LICENSE,sha256=ZNYNRUtLl_vWP43Rhcoj6rf5PKkTR1mOI6tOrha-n5I,1063
19
+ resutil-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.24.2
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ resutil = resutil.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 nobkat
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.