halib 0.2.30__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- halib/__init__.py +94 -0
- halib/common/__init__.py +0 -0
- halib/common/common.py +326 -0
- halib/common/rich_color.py +285 -0
- halib/common.py +151 -0
- halib/csvfile.py +48 -0
- halib/cuda.py +39 -0
- halib/dataset.py +209 -0
- halib/exp/__init__.py +0 -0
- halib/exp/core/__init__.py +0 -0
- halib/exp/core/base_config.py +167 -0
- halib/exp/core/base_exp.py +147 -0
- halib/exp/core/param_gen.py +170 -0
- halib/exp/core/wandb_op.py +117 -0
- halib/exp/data/__init__.py +0 -0
- halib/exp/data/dataclass_util.py +41 -0
- halib/exp/data/dataset.py +208 -0
- halib/exp/data/torchloader.py +165 -0
- halib/exp/perf/__init__.py +0 -0
- halib/exp/perf/flop_calc.py +190 -0
- halib/exp/perf/gpu_mon.py +58 -0
- halib/exp/perf/perfcalc.py +470 -0
- halib/exp/perf/perfmetrics.py +137 -0
- halib/exp/perf/perftb.py +778 -0
- halib/exp/perf/profiler.py +507 -0
- halib/exp/viz/__init__.py +0 -0
- halib/exp/viz/plot.py +754 -0
- halib/filesys.py +117 -0
- halib/filetype/__init__.py +0 -0
- halib/filetype/csvfile.py +192 -0
- halib/filetype/ipynb.py +61 -0
- halib/filetype/jsonfile.py +19 -0
- halib/filetype/textfile.py +12 -0
- halib/filetype/videofile.py +266 -0
- halib/filetype/yamlfile.py +87 -0
- halib/gdrive.py +179 -0
- halib/gdrive_mkdir.py +41 -0
- halib/gdrive_test.py +37 -0
- halib/jsonfile.py +22 -0
- halib/listop.py +13 -0
- halib/online/__init__.py +0 -0
- halib/online/gdrive.py +229 -0
- halib/online/gdrive_mkdir.py +53 -0
- halib/online/gdrive_test.py +50 -0
- halib/online/projectmake.py +131 -0
- halib/online/tele_noti.py +165 -0
- halib/plot.py +301 -0
- halib/projectmake.py +115 -0
- halib/research/__init__.py +0 -0
- halib/research/base_config.py +100 -0
- halib/research/base_exp.py +157 -0
- halib/research/benchquery.py +131 -0
- halib/research/core/__init__.py +0 -0
- halib/research/core/base_config.py +144 -0
- halib/research/core/base_exp.py +157 -0
- halib/research/core/param_gen.py +108 -0
- halib/research/core/wandb_op.py +117 -0
- halib/research/data/__init__.py +0 -0
- halib/research/data/dataclass_util.py +41 -0
- halib/research/data/dataset.py +208 -0
- halib/research/data/torchloader.py +165 -0
- halib/research/dataset.py +208 -0
- halib/research/flop_csv.py +34 -0
- halib/research/flops.py +156 -0
- halib/research/metrics.py +137 -0
- halib/research/mics.py +74 -0
- halib/research/params_gen.py +108 -0
- halib/research/perf/__init__.py +0 -0
- halib/research/perf/flop_calc.py +190 -0
- halib/research/perf/gpu_mon.py +58 -0
- halib/research/perf/perfcalc.py +363 -0
- halib/research/perf/perfmetrics.py +137 -0
- halib/research/perf/perftb.py +778 -0
- halib/research/perf/profiler.py +301 -0
- halib/research/perfcalc.py +361 -0
- halib/research/perftb.py +780 -0
- halib/research/plot.py +758 -0
- halib/research/profiler.py +300 -0
- halib/research/torchloader.py +162 -0
- halib/research/viz/__init__.py +0 -0
- halib/research/viz/plot.py +754 -0
- halib/research/wandb_op.py +116 -0
- halib/rich_color.py +285 -0
- halib/sys/__init__.py +0 -0
- halib/sys/cmd.py +8 -0
- halib/sys/filesys.py +124 -0
- halib/system/__init__.py +0 -0
- halib/system/_list_pc.csv +6 -0
- halib/system/cmd.py +8 -0
- halib/system/filesys.py +164 -0
- halib/system/path.py +106 -0
- halib/tele_noti.py +166 -0
- halib/textfile.py +13 -0
- halib/torchloader.py +162 -0
- halib/utils/__init__.py +0 -0
- halib/utils/dataclass_util.py +40 -0
- halib/utils/dict.py +317 -0
- halib/utils/dict_op.py +9 -0
- halib/utils/gpu_mon.py +58 -0
- halib/utils/list.py +17 -0
- halib/utils/listop.py +13 -0
- halib/utils/slack.py +86 -0
- halib/utils/tele_noti.py +166 -0
- halib/utils/video.py +82 -0
- halib/videofile.py +139 -0
- halib-0.2.30.dist-info/METADATA +237 -0
- halib-0.2.30.dist-info/RECORD +110 -0
- halib-0.2.30.dist-info/WHEEL +5 -0
- halib-0.2.30.dist-info/licenses/LICENSE.txt +17 -0
- halib-0.2.30.dist-info/top_level.txt +1 -0
halib/common.py
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
import rich
|
|
4
|
+
import arrow
|
|
5
|
+
import pathlib
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
import urllib.parse
|
|
8
|
+
|
|
9
|
+
from rich import print
|
|
10
|
+
from rich.panel import Panel
|
|
11
|
+
from rich.console import Console
|
|
12
|
+
from rich.pretty import pprint, Pretty
|
|
13
|
+
from pathlib import PureWindowsPath
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
console = Console()
|
|
17
|
+
|
|
18
|
+
def seed_everything(seed=42):
|
|
19
|
+
import random
|
|
20
|
+
import numpy as np
|
|
21
|
+
|
|
22
|
+
random.seed(seed)
|
|
23
|
+
np.random.seed(seed)
|
|
24
|
+
# import torch if it is available
|
|
25
|
+
try:
|
|
26
|
+
import torch
|
|
27
|
+
|
|
28
|
+
torch.manual_seed(seed)
|
|
29
|
+
torch.cuda.manual_seed(seed)
|
|
30
|
+
torch.cuda.manual_seed_all(seed)
|
|
31
|
+
torch.backends.cudnn.deterministic = True
|
|
32
|
+
torch.backends.cudnn.benchmark = False
|
|
33
|
+
except ImportError:
|
|
34
|
+
pprint("torch not imported, skipping torch seed_everything")
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def now_str(sep_date_time="."):
|
|
39
|
+
assert sep_date_time in [
|
|
40
|
+
".",
|
|
41
|
+
"_",
|
|
42
|
+
"-",
|
|
43
|
+
], "sep_date_time must be one of '.', '_', or '-'"
|
|
44
|
+
now_string = arrow.now().format(f"YYYYMMDD{sep_date_time}HHmmss")
|
|
45
|
+
return now_string
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def norm_str(in_str):
|
|
49
|
+
# Replace one or more whitespace characters with a single underscore
|
|
50
|
+
norm_string = re.sub(r"\s+", "_", in_str)
|
|
51
|
+
# Remove leading and trailing spaces
|
|
52
|
+
norm_string = norm_string.strip()
|
|
53
|
+
return norm_string
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def pprint_box(obj, title="", border_style="green"):
|
|
57
|
+
"""
|
|
58
|
+
Pretty print an object in a box.
|
|
59
|
+
"""
|
|
60
|
+
rich.print(
|
|
61
|
+
Panel(Pretty(obj, expand_all=True), title=title, border_style=border_style)
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
def console_rule(msg, do_norm_msg=True, is_end_tag=False):
|
|
65
|
+
msg = norm_str(msg) if do_norm_msg else msg
|
|
66
|
+
if is_end_tag:
|
|
67
|
+
console.rule(f"</{msg}>")
|
|
68
|
+
else:
|
|
69
|
+
console.rule(f"<{msg}>")
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def console_log(func):
|
|
73
|
+
def wrapper(*args, **kwargs):
|
|
74
|
+
console_rule(func.__name__)
|
|
75
|
+
result = func(*args, **kwargs)
|
|
76
|
+
console_rule(func.__name__, is_end_tag=True)
|
|
77
|
+
return result
|
|
78
|
+
|
|
79
|
+
return wrapper
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class ConsoleLog:
|
|
83
|
+
def __init__(self, message):
|
|
84
|
+
self.message = message
|
|
85
|
+
|
|
86
|
+
def __enter__(self):
|
|
87
|
+
console_rule(self.message)
|
|
88
|
+
return self
|
|
89
|
+
|
|
90
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
|
91
|
+
console_rule(self.message, is_end_tag=True)
|
|
92
|
+
if exc_type is not None:
|
|
93
|
+
print(f"An exception of type {exc_type} occurred.")
|
|
94
|
+
print(f"Exception message: {exc_value}")
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def linux_to_wins_path(path: str) -> str:
|
|
98
|
+
"""
|
|
99
|
+
Convert a Linux-style WSL path (/mnt/c/... or /mnt/d/...) to a Windows-style path (C:\...).
|
|
100
|
+
"""
|
|
101
|
+
# Handle only /mnt/<drive>/... style
|
|
102
|
+
if (
|
|
103
|
+
path.startswith("/mnt/")
|
|
104
|
+
and len(path) > 6
|
|
105
|
+
and path[5].isalpha()
|
|
106
|
+
and path[6] == "/"
|
|
107
|
+
):
|
|
108
|
+
drive = path[5].upper() # Extract drive letter
|
|
109
|
+
win_path = f"{drive}:{path[6:]}" # Replace "/mnt/c/" with "C:/"
|
|
110
|
+
else:
|
|
111
|
+
win_path = path # Return unchanged if not a WSL-style path
|
|
112
|
+
# Normalize to Windows-style backslashes
|
|
113
|
+
return str(PureWindowsPath(win_path))
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def pprint_local_path(
|
|
117
|
+
local_path: str, get_wins_path: bool = False, tag: str = ""
|
|
118
|
+
) -> str:
|
|
119
|
+
"""
|
|
120
|
+
Pretty-print a local path with emoji and clickable file:// URI.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
local_path: Path to file or directory (Linux or Windows style).
|
|
124
|
+
get_wins_path: If True on Linux, convert WSL-style path to Windows style before printing.
|
|
125
|
+
tag: Optional console log tag.
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
The file URI string.
|
|
129
|
+
"""
|
|
130
|
+
p = Path(local_path).resolve()
|
|
131
|
+
type_str = "📄" if p.is_file() else "📁" if p.is_dir() else "❓"
|
|
132
|
+
|
|
133
|
+
if get_wins_path and os.name == "posix":
|
|
134
|
+
# Try WSL → Windows conversion
|
|
135
|
+
converted = linux_to_wins_path(str(p))
|
|
136
|
+
if converted != str(p): # Conversion happened
|
|
137
|
+
file_uri = str(PureWindowsPath(converted).as_uri())
|
|
138
|
+
else:
|
|
139
|
+
file_uri = p.as_uri()
|
|
140
|
+
else:
|
|
141
|
+
file_uri = p.as_uri()
|
|
142
|
+
|
|
143
|
+
content_str = f"{type_str} [link={file_uri}]{file_uri}[/link]"
|
|
144
|
+
|
|
145
|
+
if tag:
|
|
146
|
+
with ConsoleLog(tag):
|
|
147
|
+
console.print(content_str)
|
|
148
|
+
else:
|
|
149
|
+
console.print(content_str)
|
|
150
|
+
|
|
151
|
+
return file_uri
|
halib/csvfile.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
from tabulate import tabulate
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def read(file, separator=","):
|
|
6
|
+
df = pd.read_csv(file, separator)
|
|
7
|
+
return df
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
# for append, mode = 'a'
|
|
11
|
+
def write(df, outfile, mode='w', header=True, index_label=None):
|
|
12
|
+
if not outfile.endswith('.csv'):
|
|
13
|
+
outfile = f'{outfile}.csv'
|
|
14
|
+
if index_label is not None:
|
|
15
|
+
df.to_csv(outfile, mode=mode, header=header, index_label=index_label)
|
|
16
|
+
else:
|
|
17
|
+
df.to_csv(outfile, mode=mode, header=header, index=False)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def make_df_with_columns(columns):
|
|
21
|
+
df = pd.DataFrame(columns=columns)
|
|
22
|
+
return df
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def insert_row(df, row_dict_or_list):
|
|
26
|
+
if isinstance(row_dict_or_list, list):
|
|
27
|
+
new_row_df = pd.DataFrame([row_dict_or_list], columns=df.columns)
|
|
28
|
+
df = pd.concat([df, new_row_df], ignore_index=True)
|
|
29
|
+
return df
|
|
30
|
+
elif isinstance(row_dict_or_list, dict):
|
|
31
|
+
df = df.append(row_dict_or_list, ignore_index=True)
|
|
32
|
+
return df
|
|
33
|
+
else:
|
|
34
|
+
raise ValueError('invalid row data')
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def display_df(df):
|
|
38
|
+
print(tabulate(df, headers='keys', tablefmt='psql', numalign="right"))
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def config_display_pd(max_rows=None, max_columns=None,
|
|
42
|
+
display_width=1000, col_header_justify='center',
|
|
43
|
+
precision=10):
|
|
44
|
+
pd.set_option('display.max_rows', max_rows)
|
|
45
|
+
pd.set_option('display.max_columns', max_columns)
|
|
46
|
+
pd.set_option('display.width', display_width)
|
|
47
|
+
pd.set_option('display.colheader_justify', col_header_justify)
|
|
48
|
+
pd.set_option('display.precision', precision)
|
halib/cuda.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import importlib
|
|
2
|
+
from rich.pretty import pprint
|
|
3
|
+
from rich.console import Console
|
|
4
|
+
|
|
5
|
+
console = Console()
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def tcuda():
|
|
9
|
+
NOT_INSTALLED = "Not Installed"
|
|
10
|
+
GPU_AVAILABLE = "GPU(s) Available"
|
|
11
|
+
ls_lib = ["torch", "tensorflow"]
|
|
12
|
+
lib_stats = {lib: NOT_INSTALLED for lib in ls_lib}
|
|
13
|
+
for lib in ls_lib:
|
|
14
|
+
spec = importlib.util.find_spec(lib)
|
|
15
|
+
if spec:
|
|
16
|
+
if lib == "torch":
|
|
17
|
+
import torch
|
|
18
|
+
|
|
19
|
+
lib_stats[lib] = str(torch.cuda.device_count()) + " " + GPU_AVAILABLE
|
|
20
|
+
elif lib == "tensorflow":
|
|
21
|
+
import tensorflow as tf
|
|
22
|
+
|
|
23
|
+
lib_stats[lib] = (
|
|
24
|
+
str(len(tf.config.list_physical_devices("GPU")))
|
|
25
|
+
+ " "
|
|
26
|
+
+ GPU_AVAILABLE
|
|
27
|
+
)
|
|
28
|
+
console.rule("<CUDA Library Stats>")
|
|
29
|
+
pprint(lib_stats)
|
|
30
|
+
console.rule("</CUDA Library Stats>")
|
|
31
|
+
return lib_stats
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def main():
|
|
35
|
+
tcuda()
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
if __name__ == "__main__":
|
|
39
|
+
main()
|
halib/dataset.py
ADDED
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
# This script create a test version
|
|
2
|
+
# of the watcam (wc) dataset
|
|
3
|
+
# for testing the tflite model
|
|
4
|
+
|
|
5
|
+
from argparse import ArgumentParser
|
|
6
|
+
|
|
7
|
+
from rich import inspect
|
|
8
|
+
from common import console, seed_everything, ConsoleLog
|
|
9
|
+
from sklearn.model_selection import StratifiedShuffleSplit, ShuffleSplit
|
|
10
|
+
from tqdm import tqdm
|
|
11
|
+
import os
|
|
12
|
+
import click
|
|
13
|
+
from torchvision.datasets import ImageFolder
|
|
14
|
+
import shutil
|
|
15
|
+
from rich.pretty import pprint
|
|
16
|
+
from system import filesys as fs
|
|
17
|
+
import glob
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def parse_args():
|
|
21
|
+
parser = ArgumentParser(description="desc text")
|
|
22
|
+
parser.add_argument(
|
|
23
|
+
"-indir",
|
|
24
|
+
"--indir",
|
|
25
|
+
type=str,
|
|
26
|
+
help="orignal dataset path",
|
|
27
|
+
)
|
|
28
|
+
parser.add_argument(
|
|
29
|
+
"-outdir",
|
|
30
|
+
"--outdir",
|
|
31
|
+
type=str,
|
|
32
|
+
help="dataset out path",
|
|
33
|
+
default=".", # default to current dir
|
|
34
|
+
)
|
|
35
|
+
parser.add_argument(
|
|
36
|
+
"-val_size",
|
|
37
|
+
"--val_size",
|
|
38
|
+
type=float,
|
|
39
|
+
help="validation size", # no default value to force user to input
|
|
40
|
+
default=0.2,
|
|
41
|
+
)
|
|
42
|
+
# add using StratifiedShuffleSplit or ShuffleSplit
|
|
43
|
+
parser.add_argument(
|
|
44
|
+
"-seed",
|
|
45
|
+
"--seed",
|
|
46
|
+
type=int,
|
|
47
|
+
help="random seed",
|
|
48
|
+
default=42,
|
|
49
|
+
)
|
|
50
|
+
parser.add_argument(
|
|
51
|
+
"-inplace",
|
|
52
|
+
"--inplace",
|
|
53
|
+
action="store_true",
|
|
54
|
+
help="inplace operation, will overwrite the outdir if exists",
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
parser.add_argument(
|
|
58
|
+
"-stratified",
|
|
59
|
+
"--stratified",
|
|
60
|
+
action="store_true",
|
|
61
|
+
help="use StratifiedShuffleSplit instead of ShuffleSplit",
|
|
62
|
+
)
|
|
63
|
+
parser.add_argument(
|
|
64
|
+
"-no_train",
|
|
65
|
+
"--no_train",
|
|
66
|
+
action="store_true",
|
|
67
|
+
help="only create test set, no train set",
|
|
68
|
+
)
|
|
69
|
+
parser.add_argument(
|
|
70
|
+
"-reverse",
|
|
71
|
+
"--reverse",
|
|
72
|
+
action="store_true",
|
|
73
|
+
help="combine train and val set back to original dataset",
|
|
74
|
+
)
|
|
75
|
+
return parser.parse_args()
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def move_images(image_paths, target_set_dir):
|
|
79
|
+
for img_path in tqdm(image_paths):
|
|
80
|
+
# get folder name of the image
|
|
81
|
+
img_dir = os.path.dirname(img_path)
|
|
82
|
+
out_cls_dir = os.path.join(target_set_dir, os.path.basename(img_dir))
|
|
83
|
+
if not os.path.exists(out_cls_dir):
|
|
84
|
+
os.makedirs(out_cls_dir)
|
|
85
|
+
# move the image to the class folder
|
|
86
|
+
shutil.move(img_path, out_cls_dir)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def split_dataset_cls(
|
|
90
|
+
indir, outdir, val_size, seed, inplace, stratified_split, no_train
|
|
91
|
+
):
|
|
92
|
+
seed_everything(seed)
|
|
93
|
+
console.rule("Config confirm?")
|
|
94
|
+
pprint(locals())
|
|
95
|
+
click.confirm("Continue?", abort=True)
|
|
96
|
+
assert os.path.exists(indir), f"{indir} does not exist"
|
|
97
|
+
|
|
98
|
+
if not inplace:
|
|
99
|
+
assert (not inplace) and (
|
|
100
|
+
not os.path.exists(outdir)
|
|
101
|
+
), f"{outdir} already exists; SKIP ...."
|
|
102
|
+
|
|
103
|
+
if inplace:
|
|
104
|
+
outdir = indir
|
|
105
|
+
if not os.path.exists(outdir):
|
|
106
|
+
os.makedirs(outdir)
|
|
107
|
+
|
|
108
|
+
console.rule(f"Creating train/val dataset")
|
|
109
|
+
|
|
110
|
+
sss = (
|
|
111
|
+
ShuffleSplit(n_splits=1, test_size=val_size)
|
|
112
|
+
if not stratified_split
|
|
113
|
+
else StratifiedShuffleSplit(n_splits=1, test_size=val_size)
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
pprint({"split strategy": sss, "indir": indir, "outdir": outdir})
|
|
117
|
+
dataset = ImageFolder(
|
|
118
|
+
root=indir,
|
|
119
|
+
transform=None,
|
|
120
|
+
)
|
|
121
|
+
train_dataset_indices = None
|
|
122
|
+
val_dataset_indices = None # val here means test
|
|
123
|
+
for train_indices, val_indices in sss.split(dataset.samples, dataset.targets):
|
|
124
|
+
train_dataset_indices = train_indices
|
|
125
|
+
val_dataset_indices = val_indices
|
|
126
|
+
|
|
127
|
+
# get image paths for train/val split dataset
|
|
128
|
+
train_image_paths = [dataset.imgs[i][0] for i in train_dataset_indices]
|
|
129
|
+
val_image_paths = [dataset.imgs[i][0] for i in val_dataset_indices]
|
|
130
|
+
|
|
131
|
+
# start creating train/val folders then move images
|
|
132
|
+
out_train_dir = os.path.join(outdir, "train")
|
|
133
|
+
out_val_dir = os.path.join(outdir, "val")
|
|
134
|
+
if inplace:
|
|
135
|
+
assert os.path.exists(out_train_dir) == False, f"{out_train_dir} already exists"
|
|
136
|
+
assert os.path.exists(out_val_dir) == False, f"{out_val_dir} already exists"
|
|
137
|
+
|
|
138
|
+
os.makedirs(out_train_dir)
|
|
139
|
+
os.makedirs(out_val_dir)
|
|
140
|
+
|
|
141
|
+
if not no_train:
|
|
142
|
+
with ConsoleLog(f"Moving train images to {out_train_dir} "):
|
|
143
|
+
move_images(train_image_paths, out_train_dir)
|
|
144
|
+
else:
|
|
145
|
+
pprint("test only, skip moving train images")
|
|
146
|
+
# remove out_train_dir
|
|
147
|
+
shutil.rmtree(out_train_dir)
|
|
148
|
+
|
|
149
|
+
with ConsoleLog(f"Moving val images to {out_val_dir} "):
|
|
150
|
+
move_images(val_image_paths, out_val_dir)
|
|
151
|
+
|
|
152
|
+
if inplace:
|
|
153
|
+
pprint(f"remove all folders, except train and val")
|
|
154
|
+
for cls_dir in os.listdir(outdir):
|
|
155
|
+
if cls_dir not in ["train", "val"]:
|
|
156
|
+
shutil.rmtree(os.path.join(indir, cls_dir))
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def reverse_split_ds(indir):
|
|
160
|
+
console.rule(f"Reversing split dataset <{indir}>...")
|
|
161
|
+
ls_dirs = os.listdir(indir)
|
|
162
|
+
# make sure there are only two dirs 'train' and 'val'
|
|
163
|
+
assert len(ls_dirs) == 2, f"Found more than 2 dirs: {len(ls_dirs) } dirs"
|
|
164
|
+
assert "train" in ls_dirs, f"train dir not found in {indir}"
|
|
165
|
+
assert "val" in ls_dirs, f"val dir not found in {indir}"
|
|
166
|
+
train_dir = os.path.join(indir, "train")
|
|
167
|
+
val_dir = os.path.join(indir, "val")
|
|
168
|
+
all_train_files = fs.filter_files_by_extension(
|
|
169
|
+
train_dir, ["jpg", "jpeg", "png", "bmp", "gif", "tiff"]
|
|
170
|
+
)
|
|
171
|
+
all_val_files = fs.filter_files_by_extension(
|
|
172
|
+
val_dir, ["jpg", "jpeg", "png", "bmp", "gif", "tiff"]
|
|
173
|
+
)
|
|
174
|
+
# move all files from train to indir
|
|
175
|
+
with ConsoleLog(f"Moving train images to {indir} "):
|
|
176
|
+
move_images(all_train_files, indir)
|
|
177
|
+
with ConsoleLog(f"Moving val images to {indir} "):
|
|
178
|
+
move_images(all_val_files, indir)
|
|
179
|
+
with ConsoleLog(f"Removing train and val dirs"):
|
|
180
|
+
# remove train and val dirs
|
|
181
|
+
shutil.rmtree(train_dir)
|
|
182
|
+
shutil.rmtree(val_dir)
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def main():
|
|
186
|
+
args = parse_args()
|
|
187
|
+
indir = args.indir
|
|
188
|
+
outdir = args.outdir
|
|
189
|
+
if outdir == ".":
|
|
190
|
+
# get current folder of the indir
|
|
191
|
+
indir_parent_dir = os.path.dirname(os.path.normpath(indir))
|
|
192
|
+
indir_name = os.path.basename(indir)
|
|
193
|
+
outdir = os.path.join(indir_parent_dir, f"{indir_name}_split")
|
|
194
|
+
val_size = args.val_size
|
|
195
|
+
seed = args.seed
|
|
196
|
+
inplace = args.inplace
|
|
197
|
+
stratified_split = args.stratified
|
|
198
|
+
no_train = args.no_train
|
|
199
|
+
reverse = args.reverse
|
|
200
|
+
if not reverse:
|
|
201
|
+
split_dataset_cls(
|
|
202
|
+
indir, outdir, val_size, seed, inplace, stratified_split, no_train
|
|
203
|
+
)
|
|
204
|
+
else:
|
|
205
|
+
reverse_split_ds(indir)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
if __name__ == "__main__":
|
|
209
|
+
main()
|
halib/exp/__init__.py
ADDED
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from rich.pretty import pprint
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from typing import List, Optional, TypeVar, Generic
|
|
5
|
+
|
|
6
|
+
from abc import ABC, abstractmethod
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from dataclass_wizard import YAMLWizard
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class NamedCfg(ABC):
|
|
12
|
+
"""
|
|
13
|
+
Base class for named configurations.
|
|
14
|
+
All configurations should have a name.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
@abstractmethod
|
|
18
|
+
def get_name(self):
|
|
19
|
+
"""
|
|
20
|
+
Get the name of the configuration.
|
|
21
|
+
This method should be implemented in subclasses.
|
|
22
|
+
"""
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class AutoNamedCfg(YAMLWizard, NamedCfg):
|
|
28
|
+
"""
|
|
29
|
+
Mixin that automatically implements get_name() by returning self.name.
|
|
30
|
+
Classes using this MUST have a 'name' field.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
name: Optional[str] = None
|
|
34
|
+
|
|
35
|
+
def get_name(self):
|
|
36
|
+
return self.name
|
|
37
|
+
|
|
38
|
+
def __post_init__(self):
|
|
39
|
+
# Enforce the "MUST" rule here
|
|
40
|
+
if self.name is None:
|
|
41
|
+
# We allow None during initial load, but it must be set before usage
|
|
42
|
+
# or handled by the loader.
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
T = TypeVar("T", bound=AutoNamedCfg)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class BaseSelectorCfg(Generic[T]):
|
|
50
|
+
"""
|
|
51
|
+
Base class to handle the logic of selecting an item from a list by name.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
def _resolve_selection(self, items: List[T], selected_name: str, context: str) -> T:
|
|
55
|
+
if selected_name is None:
|
|
56
|
+
raise ValueError(f"No {context} selected in the configuration.")
|
|
57
|
+
|
|
58
|
+
# Create a lookup dict for O(1) access, or just iterate if list is short
|
|
59
|
+
for item in items:
|
|
60
|
+
if item.name == selected_name:
|
|
61
|
+
return item
|
|
62
|
+
|
|
63
|
+
raise ValueError(
|
|
64
|
+
f"{context.capitalize()} '{selected_name}' not found in the configuration list."
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class ExpBaseCfg(ABC, YAMLWizard):
|
|
69
|
+
"""
|
|
70
|
+
Base class for configuration objects.
|
|
71
|
+
What a cfg class must have:
|
|
72
|
+
1 - a dataset cfg
|
|
73
|
+
2 - a metric cfg
|
|
74
|
+
3 - a method cfg
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
cfg_name: Optional[str] = None
|
|
78
|
+
|
|
79
|
+
# Save to yaml fil
|
|
80
|
+
def save_to_outdir(
|
|
81
|
+
self, filename: str = "__config.yaml", outdir=None, override: bool = False
|
|
82
|
+
) -> None:
|
|
83
|
+
"""
|
|
84
|
+
Save the configuration to the output directory.
|
|
85
|
+
"""
|
|
86
|
+
if outdir is not None:
|
|
87
|
+
output_dir = outdir
|
|
88
|
+
else:
|
|
89
|
+
output_dir = self.get_outdir()
|
|
90
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
91
|
+
assert (output_dir is not None) and (
|
|
92
|
+
os.path.isdir(output_dir)
|
|
93
|
+
), f"Output directory '{output_dir}' does not exist or is not a directory."
|
|
94
|
+
file_path = os.path.join(output_dir, filename)
|
|
95
|
+
if os.path.exists(file_path) and not override:
|
|
96
|
+
pprint(
|
|
97
|
+
f"File '{file_path}' already exists. Use 'override=True' to overwrite."
|
|
98
|
+
)
|
|
99
|
+
else:
|
|
100
|
+
# method of YAMLWizard to_yaml_file
|
|
101
|
+
self.to_yaml_file(file_path)
|
|
102
|
+
|
|
103
|
+
@classmethod
|
|
104
|
+
@abstractmethod
|
|
105
|
+
# load from a custom YAML file
|
|
106
|
+
def from_custom_yaml_file(cls, yaml_file: str):
|
|
107
|
+
"""Load a configuration from a custom YAML file."""
|
|
108
|
+
pass
|
|
109
|
+
|
|
110
|
+
def get_cfg_name(self, sep: str = "__", *args, **kwargs) -> str:
|
|
111
|
+
# auto get the config name from dataset, method, metric
|
|
112
|
+
# 2. Generate the canonical Config Name
|
|
113
|
+
name_parts = []
|
|
114
|
+
general_info = self.get_general_cfg().get_name()
|
|
115
|
+
dataset_info = self.get_dataset_cfg().get_name()
|
|
116
|
+
method_info = self.get_method_cfg().get_name()
|
|
117
|
+
name_parts = [
|
|
118
|
+
general_info,
|
|
119
|
+
f"ds_{dataset_info}",
|
|
120
|
+
f"mt_{method_info}",
|
|
121
|
+
]
|
|
122
|
+
if "extra" in kwargs:
|
|
123
|
+
extra_info = kwargs["extra"]
|
|
124
|
+
assert isinstance(extra_info, str), "'extra' kwarg must be a string."
|
|
125
|
+
name_parts.append(extra_info)
|
|
126
|
+
self.cfg_name = sep.join(name_parts)
|
|
127
|
+
return self.cfg_name
|
|
128
|
+
|
|
129
|
+
@abstractmethod
|
|
130
|
+
def get_outdir(self):
|
|
131
|
+
"""
|
|
132
|
+
Get the output directory for the configuration.
|
|
133
|
+
This method should be implemented in subclasses.
|
|
134
|
+
"""
|
|
135
|
+
return None
|
|
136
|
+
|
|
137
|
+
@abstractmethod
|
|
138
|
+
def get_general_cfg(self) -> NamedCfg:
|
|
139
|
+
"""
|
|
140
|
+
Get the general configuration like output directory, log settings, SEED, etc.
|
|
141
|
+
This method should be implemented in subclasses.
|
|
142
|
+
"""
|
|
143
|
+
pass
|
|
144
|
+
|
|
145
|
+
@abstractmethod
|
|
146
|
+
def get_dataset_cfg(self) -> NamedCfg:
|
|
147
|
+
"""
|
|
148
|
+
Get the dataset configuration.
|
|
149
|
+
This method should be implemented in subclasses.
|
|
150
|
+
"""
|
|
151
|
+
pass
|
|
152
|
+
|
|
153
|
+
@abstractmethod
|
|
154
|
+
def get_method_cfg(self) -> NamedCfg:
|
|
155
|
+
"""
|
|
156
|
+
Get the method configuration.
|
|
157
|
+
This method should be implemented in subclasses.
|
|
158
|
+
"""
|
|
159
|
+
pass
|
|
160
|
+
|
|
161
|
+
@abstractmethod
|
|
162
|
+
def get_metric_cfg(self) -> NamedCfg:
|
|
163
|
+
"""
|
|
164
|
+
Get the metric configuration.
|
|
165
|
+
This method should be implemented in subclasses.
|
|
166
|
+
"""
|
|
167
|
+
pass
|