halib 0.1.99__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {halib-0.1.99 → halib-0.2.1}/PKG-INFO +4 -1
- {halib-0.1.99 → halib-0.2.1}/README.md +3 -0
- halib-0.2.1/halib/research/base_exp.py +157 -0
- {halib-0.1.99 → halib-0.2.1}/halib/research/dataset.py +1 -1
- {halib-0.1.99 → halib-0.2.1}/halib/research/metrics.py +4 -0
- {halib-0.1.99 → halib-0.2.1}/halib/research/mics.py +8 -2
- {halib-0.1.99 → halib-0.2.1}/halib/research/perfcalc.py +57 -32
- {halib-0.1.99 → halib-0.2.1}/halib.egg-info/PKG-INFO +4 -1
- {halib-0.1.99 → halib-0.2.1}/halib.egg-info/SOURCES.txt +0 -1
- {halib-0.1.99 → halib-0.2.1}/setup.py +1 -1
- halib-0.1.99/guide_publish_pip.pdf +0 -0
- halib-0.1.99/halib/research/base_exp.py +0 -100
- {halib-0.1.99 → halib-0.2.1}/.gitignore +0 -0
- {halib-0.1.99 → halib-0.2.1}/GDriveFolder.txt +0 -0
- {halib-0.1.99 → halib-0.2.1}/LICENSE.txt +0 -0
- {halib-0.1.99 → halib-0.2.1}/MANIFEST.in +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/__init__.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/common.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/cuda.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/filetype/__init__.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/filetype/csvfile.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/filetype/ipynb.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/filetype/jsonfile.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/filetype/textfile.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/filetype/videofile.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/filetype/yamlfile.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/online/__init__.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/online/gdrive.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/online/gdrive_mkdir.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/online/gdrive_test.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/online/projectmake.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/research/__init__.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/research/base_config.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/research/flop_csv.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/research/flops.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/research/params_gen.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/research/perftb.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/research/plot.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/research/profiler.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/research/torchloader.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/research/wandb_op.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/rich_color.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/system/__init__.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/system/cmd.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/system/filesys.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/utils/__init__.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/utils/dataclass_util.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/utils/dict_op.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/utils/gpu_mon.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/utils/listop.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/utils/tele_noti.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib/utils/video.py +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib.egg-info/dependency_links.txt +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib.egg-info/requires.txt +0 -0
- {halib-0.1.99 → halib-0.2.1}/halib.egg-info/top_level.txt +0 -0
- {halib-0.1.99 → halib-0.2.1}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: halib
|
|
3
|
-
Version: 0.1
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: Small library for common tasks
|
|
5
5
|
Author: Hoang Van Ha
|
|
6
6
|
Author-email: hoangvanhauit@gmail.com
|
|
@@ -53,6 +53,9 @@ Dynamic: summary
|
|
|
53
53
|
|
|
54
54
|
# Helper package for coding and automation
|
|
55
55
|
|
|
56
|
+
**Version 0.2.01**
|
|
57
|
+
+ `research/base_exp`: add `eval_exp` method to evaluate experiment (e.g., model evaluation on test set) after experiment running is done.
|
|
58
|
+
|
|
56
59
|
**Version 0.1.99**
|
|
57
60
|
+ `filetype/ipynb`: add `gen_ipynb_name` generator to create file name based on current notebook name as prefix (with optional timestamp)
|
|
58
61
|
|
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
# Helper package for coding and automation
|
|
2
2
|
|
|
3
|
+
**Version 0.2.01**
|
|
4
|
+
+ `research/base_exp`: add `eval_exp` method to evaluate experiment (e.g., model evaluation on test set) after experiment running is done.
|
|
5
|
+
|
|
3
6
|
**Version 0.1.99**
|
|
4
7
|
+ `filetype/ipynb`: add `gen_ipynb_name` generator to create file name based on current notebook name as prefix (with optional timestamp)
|
|
5
8
|
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import Tuple, Any, Optional
|
|
3
|
+
from ..research.base_config import ExpBaseConfig
|
|
4
|
+
from ..research.perfcalc import PerfCalc
|
|
5
|
+
from ..research.metrics import MetricsBackend
|
|
6
|
+
|
|
7
|
+
# ! SEE https://github.com/hahv/base_exp for sample usage
|
|
8
|
+
class BaseExperiment(PerfCalc, ABC):
|
|
9
|
+
"""
|
|
10
|
+
Base class for experiments.
|
|
11
|
+
Orchestrates the experiment pipeline using a pluggable metrics backend.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
def __init__(self, config: ExpBaseConfig):
|
|
15
|
+
self.config = config
|
|
16
|
+
self.metric_backend = None
|
|
17
|
+
# Flag to track if init_general/prepare_dataset has run
|
|
18
|
+
self._is_env_ready = False
|
|
19
|
+
|
|
20
|
+
# -----------------------
|
|
21
|
+
# PerfCalc Required Methods
|
|
22
|
+
# -----------------------
|
|
23
|
+
def get_dataset_name(self):
|
|
24
|
+
return self.config.get_dataset_cfg().get_name()
|
|
25
|
+
|
|
26
|
+
def get_experiment_name(self):
|
|
27
|
+
return self.config.get_cfg_name()
|
|
28
|
+
|
|
29
|
+
def get_metric_backend(self):
|
|
30
|
+
if not self.metric_backend:
|
|
31
|
+
self.metric_backend = self.prepare_metrics(self.config.get_metric_cfg())
|
|
32
|
+
return self.metric_backend
|
|
33
|
+
|
|
34
|
+
# -----------------------
|
|
35
|
+
# Abstract Experiment Steps
|
|
36
|
+
# -----------------------
|
|
37
|
+
@abstractmethod
|
|
38
|
+
def init_general(self, general_cfg):
|
|
39
|
+
"""Setup general settings like SEED, logging, env variables."""
|
|
40
|
+
pass
|
|
41
|
+
|
|
42
|
+
@abstractmethod
|
|
43
|
+
def prepare_dataset(self, dataset_cfg):
|
|
44
|
+
"""Load/prepare dataset."""
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
@abstractmethod
|
|
48
|
+
def prepare_metrics(self, metric_cfg) -> MetricsBackend:
|
|
49
|
+
"""
|
|
50
|
+
Prepare the metrics for the experiment.
|
|
51
|
+
This method should be implemented in subclasses.
|
|
52
|
+
"""
|
|
53
|
+
pass
|
|
54
|
+
|
|
55
|
+
@abstractmethod
|
|
56
|
+
def before_exec_exp_once(self, *args, **kwargs):
|
|
57
|
+
"""Optional: any setup before exec_exp. Note this is called once per run_exp."""
|
|
58
|
+
pass
|
|
59
|
+
|
|
60
|
+
@abstractmethod
|
|
61
|
+
def exec_exp(self, *args, **kwargs) -> Optional[Tuple[Any, Any]]:
|
|
62
|
+
"""Run experiment process, e.g.: training/evaluation loop.
|
|
63
|
+
Return: either `None` or a tuple of (raw_metrics_data, extra_data) for calc_and_save_exp_perfs
|
|
64
|
+
"""
|
|
65
|
+
pass
|
|
66
|
+
|
|
67
|
+
@abstractmethod
|
|
68
|
+
def exec_eval(self, *args, **kwargs) -> Optional[Tuple[Any, Any]]:
|
|
69
|
+
"""Run evaluation process.
|
|
70
|
+
Return: either `None` or a tuple of (raw_metrics_data, extra_data) for calc_and_save_exp_perfs
|
|
71
|
+
"""
|
|
72
|
+
pass
|
|
73
|
+
|
|
74
|
+
# -----------------------
|
|
75
|
+
# Internal Helpers
|
|
76
|
+
# -----------------------
|
|
77
|
+
def _validate_and_unpack(self, results):
|
|
78
|
+
if results is None:
|
|
79
|
+
return None
|
|
80
|
+
if not isinstance(results, (tuple, list)) or len(results) != 2:
|
|
81
|
+
raise ValueError("exec must return (metrics_data, extra_data)")
|
|
82
|
+
return results[0], results[1]
|
|
83
|
+
|
|
84
|
+
def _prepare_environment(self, force_reload: bool = False):
|
|
85
|
+
"""
|
|
86
|
+
Common setup. Skips if already initialized, unless force_reload is True.
|
|
87
|
+
"""
|
|
88
|
+
if self._is_env_ready and not force_reload:
|
|
89
|
+
# Environment is already prepared, skipping setup.
|
|
90
|
+
return
|
|
91
|
+
|
|
92
|
+
# 1. Run Setup
|
|
93
|
+
self.init_general(self.config.get_general_cfg())
|
|
94
|
+
self.prepare_dataset(self.config.get_dataset_cfg())
|
|
95
|
+
|
|
96
|
+
# 2. Update metric backend (refresh if needed)
|
|
97
|
+
self.metric_backend = self.prepare_metrics(self.config.get_metric_cfg())
|
|
98
|
+
|
|
99
|
+
# 3. Mark as ready
|
|
100
|
+
self._is_env_ready = True
|
|
101
|
+
|
|
102
|
+
# -----------------------
|
|
103
|
+
# Main Experiment Runner
|
|
104
|
+
# -----------------------
|
|
105
|
+
def run_exp(self, should_calc_metrics=True, reload_env=False, *args, **kwargs):
|
|
106
|
+
"""
|
|
107
|
+
Run the whole experiment pipeline.
|
|
108
|
+
:param reload_env: If True, forces dataset/general init to run again.
|
|
109
|
+
:param should_calc_metrics: Whether to calculate and save metrics after execution.
|
|
110
|
+
:kwargs Params:
|
|
111
|
+
+ 'outfile' to save csv file results,
|
|
112
|
+
+ 'outdir' to set output directory for experiment results.
|
|
113
|
+
+ 'return_df' to return a DataFrame of results instead of a dictionary.
|
|
114
|
+
|
|
115
|
+
Full pipeline:
|
|
116
|
+
1. Init
|
|
117
|
+
2. Prepare Environment (General + Dataset + Metrics)
|
|
118
|
+
3. Save Config
|
|
119
|
+
4. Execute
|
|
120
|
+
5. Calculate & Save Metrics
|
|
121
|
+
"""
|
|
122
|
+
self._prepare_environment(force_reload=reload_env)
|
|
123
|
+
|
|
124
|
+
# Any pre-exec setup (loading models, etc)
|
|
125
|
+
self.before_exec_exp_once(*args, **kwargs)
|
|
126
|
+
# Save config before running
|
|
127
|
+
self.config.save_to_outdir()
|
|
128
|
+
|
|
129
|
+
# Execute experiment
|
|
130
|
+
results = self.exec_exp(*args, **kwargs)
|
|
131
|
+
|
|
132
|
+
if should_calc_metrics and results is not None:
|
|
133
|
+
metrics_data, extra_data = self._validate_and_unpack(results)
|
|
134
|
+
# Calculate & Save metrics
|
|
135
|
+
perf_results = self.calc_perfs(
|
|
136
|
+
raw_metrics_data=metrics_data, extra_data=extra_data, *args, **kwargs
|
|
137
|
+
)
|
|
138
|
+
return perf_results
|
|
139
|
+
else:
|
|
140
|
+
return results
|
|
141
|
+
|
|
142
|
+
# -----------------------
|
|
143
|
+
# Main Experiment Evaluator
|
|
144
|
+
# -----------------------
|
|
145
|
+
def eval_exp(self, reload_env=False, *args, **kwargs):
|
|
146
|
+
"""
|
|
147
|
+
Run evaluation only.
|
|
148
|
+
:param reload_env: If True, forces dataset/general init to run again.
|
|
149
|
+
"""
|
|
150
|
+
self._prepare_environment(force_reload=reload_env)
|
|
151
|
+
results = self.exec_eval(*args, **kwargs)
|
|
152
|
+
if results is not None:
|
|
153
|
+
metrics_data, extra_data = self._validate_and_unpack(results)
|
|
154
|
+
return self.calc_perfs(
|
|
155
|
+
raw_metrics_data=metrics_data, extra_data=extra_data, *args, **kwargs
|
|
156
|
+
)
|
|
157
|
+
return None
|
|
@@ -13,8 +13,8 @@ from rich.pretty import pprint
|
|
|
13
13
|
from torchvision.datasets import ImageFolder
|
|
14
14
|
from sklearn.model_selection import StratifiedShuffleSplit, ShuffleSplit
|
|
15
15
|
|
|
16
|
-
from ..system import filesys as fs
|
|
17
16
|
from ..common import console, seed_everything, ConsoleLog
|
|
17
|
+
from ..system import filesys as fs
|
|
18
18
|
|
|
19
19
|
def parse_args():
|
|
20
20
|
parser = ArgumentParser(description="desc text")
|
|
@@ -11,6 +11,10 @@ class MetricsBackend(ABC):
|
|
|
11
11
|
def __init__(self, metrics_info: Union[List[str], Dict[str, Any]]):
|
|
12
12
|
"""
|
|
13
13
|
Initialize the backend with optional metrics_info.
|
|
14
|
+
`metrics_info` can be either:
|
|
15
|
+
- A list of metric names (strings). e.g., ["accuracy", "precision"]
|
|
16
|
+
- A dict mapping metric names with object that defines how to compute them. e.g: {"accuracy": torchmetrics.Accuracy(), "precision": torchmetrics.Precision()}
|
|
17
|
+
|
|
14
18
|
"""
|
|
15
19
|
self.metric_info = metrics_info
|
|
16
20
|
self.validate_metrics_info(self.metric_info)
|
|
@@ -9,7 +9,7 @@ PC_NAME_TO_ABBR = {
|
|
|
9
9
|
"DESKTOP-5IRHU87": "MSI_Laptop",
|
|
10
10
|
"DESKTOP-96HQCNO": "4090_SV",
|
|
11
11
|
"DESKTOP-Q2IKLC0": "4GPU_SV",
|
|
12
|
-
"DESKTOP-QNS3DNF": "1GPU_SV"
|
|
12
|
+
"DESKTOP-QNS3DNF": "1GPU_SV",
|
|
13
13
|
}
|
|
14
14
|
|
|
15
15
|
DEFAULT_ABBR_WORKING_DISK = {
|
|
@@ -19,19 +19,25 @@ DEFAULT_ABBR_WORKING_DISK = {
|
|
|
19
19
|
"4GPU_SV": "D:",
|
|
20
20
|
}
|
|
21
21
|
|
|
22
|
+
|
|
22
23
|
def list_PCs(show=True):
|
|
23
|
-
df = pd.DataFrame(
|
|
24
|
+
df = pd.DataFrame(
|
|
25
|
+
list(PC_NAME_TO_ABBR.items()), columns=["PC Name", "Abbreviation"]
|
|
26
|
+
)
|
|
24
27
|
if show:
|
|
25
28
|
csvfile.fn_display_df(df)
|
|
26
29
|
return df
|
|
27
30
|
|
|
31
|
+
|
|
28
32
|
def get_PC_name():
|
|
29
33
|
return platform.node()
|
|
30
34
|
|
|
35
|
+
|
|
31
36
|
def get_PC_abbr_name():
|
|
32
37
|
pc_name = get_PC_name()
|
|
33
38
|
return PC_NAME_TO_ABBR.get(pc_name, "Unknown")
|
|
34
39
|
|
|
40
|
+
|
|
35
41
|
# ! This funcction search for full paths in the obj and normalize them according to the current platform and working disk
|
|
36
42
|
# ! E.g: "E:/zdataset/DFire", but working_disk: "D:", current_platform: "windows" => "D:/zdataset/DFire"
|
|
37
43
|
# ! E.g: "E:/zdataset/DFire", but working_disk: "D:", current_platform: "linux" => "/mnt/d/zdataset/DFire"
|
|
@@ -3,12 +3,9 @@ import glob
|
|
|
3
3
|
from typing import Optional, Tuple
|
|
4
4
|
import pandas as pd
|
|
5
5
|
|
|
6
|
-
from rich.pretty import pprint
|
|
7
|
-
|
|
8
6
|
from abc import ABC, abstractmethod
|
|
9
7
|
from collections import OrderedDict
|
|
10
8
|
|
|
11
|
-
from ..filetype import csvfile
|
|
12
9
|
from ..system import filesys as fs
|
|
13
10
|
from ..common import now_str
|
|
14
11
|
from ..research.perftb import PerfTB
|
|
@@ -19,6 +16,7 @@ REQUIRED_COLS = ["experiment", "dataset"]
|
|
|
19
16
|
CSV_FILE_POSTFIX = "__perf"
|
|
20
17
|
METRIC_PREFIX = "metric_"
|
|
21
18
|
|
|
19
|
+
|
|
22
20
|
class PerfCalc(ABC): # Abstract base class for performance calculation
|
|
23
21
|
@abstractmethod
|
|
24
22
|
def get_experiment_name(self) -> str:
|
|
@@ -44,29 +42,32 @@ class PerfCalc(ABC): # Abstract base class for performance calculation
|
|
|
44
42
|
"""
|
|
45
43
|
pass
|
|
46
44
|
|
|
47
|
-
def valid_proc_extra_data(
|
|
48
|
-
self, proc_extra_data
|
|
49
|
-
):
|
|
45
|
+
def valid_proc_extra_data(self, proc_extra_data):
|
|
50
46
|
# make sure that all items in proc_extra_data are dictionaries, with same keys
|
|
51
47
|
if proc_extra_data is None or len(proc_extra_data) == 0:
|
|
52
48
|
return
|
|
53
49
|
if not all(isinstance(item, dict) for item in proc_extra_data):
|
|
54
50
|
raise TypeError("All items in proc_extra_data must be dictionaries")
|
|
55
51
|
|
|
56
|
-
if not all(
|
|
57
|
-
|
|
52
|
+
if not all(
|
|
53
|
+
item.keys() == proc_extra_data[0].keys() for item in proc_extra_data
|
|
54
|
+
):
|
|
55
|
+
raise ValueError(
|
|
56
|
+
"All dictionaries in proc_extra_data must have the same keys"
|
|
57
|
+
)
|
|
58
58
|
|
|
59
|
-
def valid_proc_metric_raw_data(
|
|
60
|
-
self, metric_names, proc_metric_raw_data
|
|
61
|
-
):
|
|
59
|
+
def valid_proc_metric_raw_data(self, metric_names, proc_metric_raw_data):
|
|
62
60
|
# make sure that all items in proc_metric_raw_data are dictionaries, with same keys as metric_names
|
|
63
|
-
assert
|
|
64
|
-
|
|
61
|
+
assert (
|
|
62
|
+
isinstance(proc_metric_raw_data, list) and len(proc_metric_raw_data) > 0
|
|
63
|
+
), "raw_data_for_metrics must be a non-empty list of dictionaries"
|
|
65
64
|
|
|
66
65
|
# make sure that all items in proc_metric_raw_data are dictionaries with keys as metric_names
|
|
67
66
|
if not all(isinstance(item, dict) for item in proc_metric_raw_data):
|
|
68
67
|
raise TypeError("All items in raw_data_for_metrics must be dictionaries")
|
|
69
|
-
if not all(
|
|
68
|
+
if not all(
|
|
69
|
+
set(item.keys()) == set(metric_names) for item in proc_metric_raw_data
|
|
70
|
+
):
|
|
70
71
|
raise ValueError(
|
|
71
72
|
"All dictionaries in raw_data_for_metrics must have the same keys as metric_names"
|
|
72
73
|
)
|
|
@@ -75,21 +76,30 @@ class PerfCalc(ABC): # Abstract base class for performance calculation
|
|
|
75
76
|
def calc_exp_perf_metrics(
|
|
76
77
|
self, metric_names, raw_metrics_data, extra_data=None, *args, **kwargs
|
|
77
78
|
):
|
|
78
|
-
assert isinstance(raw_metrics_data, dict) or isinstance(
|
|
79
|
-
|
|
79
|
+
assert isinstance(raw_metrics_data, dict) or isinstance(
|
|
80
|
+
raw_metrics_data, list
|
|
81
|
+
), "raw_data_for_metrics must be a dictionary or a list"
|
|
80
82
|
|
|
81
83
|
if extra_data is not None:
|
|
82
|
-
assert isinstance(
|
|
83
|
-
|
|
84
|
+
assert isinstance(
|
|
85
|
+
extra_data, type(raw_metrics_data)
|
|
86
|
+
), "extra_data must be of the same type as raw_data_for_metrics (dict or list)"
|
|
84
87
|
# prepare raw_metric data for processing
|
|
85
|
-
proc_metric_raw_data_ls =
|
|
88
|
+
proc_metric_raw_data_ls = (
|
|
89
|
+
raw_metrics_data
|
|
90
|
+
if isinstance(raw_metrics_data, list)
|
|
91
|
+
else [raw_metrics_data.copy()]
|
|
92
|
+
)
|
|
86
93
|
self.valid_proc_metric_raw_data(metric_names, proc_metric_raw_data_ls)
|
|
87
94
|
# prepare extra data for processing
|
|
88
95
|
proc_extra_data_ls = []
|
|
89
96
|
if extra_data is not None:
|
|
90
|
-
proc_extra_data_ls =
|
|
91
|
-
|
|
92
|
-
|
|
97
|
+
proc_extra_data_ls = (
|
|
98
|
+
extra_data if isinstance(extra_data, list) else [extra_data.copy()]
|
|
99
|
+
)
|
|
100
|
+
assert len(proc_extra_data_ls) == len(
|
|
101
|
+
proc_metric_raw_data_ls
|
|
102
|
+
), "extra_data must have the same length as raw_data_for_metrics if it is a list"
|
|
93
103
|
# validate the extra_data
|
|
94
104
|
self.valid_proc_extra_data(proc_extra_data_ls)
|
|
95
105
|
|
|
@@ -102,7 +112,7 @@ class PerfCalc(ABC): # Abstract base class for performance calculation
|
|
|
102
112
|
"experiment": self.get_experiment_name(),
|
|
103
113
|
}
|
|
104
114
|
custom_fields = []
|
|
105
|
-
if len(proc_extra_data_ls)> 0:
|
|
115
|
+
if len(proc_extra_data_ls) > 0:
|
|
106
116
|
# add extra data to the output dictionary
|
|
107
117
|
extra_data_item = proc_extra_data_ls[idx]
|
|
108
118
|
out_dict.update(extra_data_item)
|
|
@@ -110,7 +120,9 @@ class PerfCalc(ABC): # Abstract base class for performance calculation
|
|
|
110
120
|
metric_results = metrics_backend.calc_metrics(
|
|
111
121
|
metrics_data_dict=raw_metrics_data, *args, **kwargs
|
|
112
122
|
)
|
|
113
|
-
metric_results_prefix = {
|
|
123
|
+
metric_results_prefix = {
|
|
124
|
+
f"metric_{k}": v for k, v in metric_results.items()
|
|
125
|
+
}
|
|
114
126
|
out_dict.update(metric_results_prefix)
|
|
115
127
|
ordered_cols = (
|
|
116
128
|
REQUIRED_COLS + custom_fields + list(metric_results_prefix.keys())
|
|
@@ -126,7 +138,7 @@ class PerfCalc(ABC): # Abstract base class for performance calculation
|
|
|
126
138
|
#! outfile - if provided, will save the output to a CSV file with the given path
|
|
127
139
|
#! outdir - if provided, will save the output to a CSV file in the given directory with a generated filename
|
|
128
140
|
#! return_df - if True, will return a DataFrame instead of a dictionary
|
|
129
|
-
def
|
|
141
|
+
def calc_perfs(
|
|
130
142
|
self,
|
|
131
143
|
raw_metrics_data: Union[List[dict], dict],
|
|
132
144
|
extra_data: Optional[Union[List[dict], dict]] = None,
|
|
@@ -140,9 +152,11 @@ class PerfCalc(ABC): # Abstract base class for performance calculation
|
|
|
140
152
|
"""
|
|
141
153
|
metric_names = self.get_metric_backend().metric_names
|
|
142
154
|
out_dict_list = self.calc_exp_perf_metrics(
|
|
143
|
-
metric_names=metric_names,
|
|
155
|
+
metric_names=metric_names,
|
|
156
|
+
raw_metrics_data=raw_metrics_data,
|
|
144
157
|
extra_data=extra_data,
|
|
145
|
-
*args,
|
|
158
|
+
*args,
|
|
159
|
+
**kwargs,
|
|
146
160
|
)
|
|
147
161
|
csv_outfile = kwargs.get("outfile", None)
|
|
148
162
|
if csv_outfile is not None:
|
|
@@ -176,13 +190,18 @@ class PerfCalc(ABC): # Abstract base class for performance calculation
|
|
|
176
190
|
return "__perf.csv" in exp_file_name
|
|
177
191
|
|
|
178
192
|
@classmethod
|
|
179
|
-
def
|
|
180
|
-
cls,
|
|
193
|
+
def get_perftb_for_multi_exps(
|
|
194
|
+
cls,
|
|
195
|
+
indir: str,
|
|
196
|
+
exp_csv_filter_fn=default_exp_csv_filter_fn,
|
|
197
|
+
include_file_name=False,
|
|
198
|
+
csv_sep=";",
|
|
181
199
|
) -> PerfTB:
|
|
182
200
|
"""
|
|
183
201
|
Generate a performance report by scanning experiment subdirectories.
|
|
184
202
|
Must return a dictionary with keys as metric names and values as performance tables.
|
|
185
203
|
"""
|
|
204
|
+
|
|
186
205
|
def get_df_for_all_exp_perf(csv_perf_files, csv_sep=";"):
|
|
187
206
|
"""
|
|
188
207
|
Create a single DataFrame from all CSV files.
|
|
@@ -194,7 +213,9 @@ class PerfCalc(ABC): # Abstract base class for performance calculation
|
|
|
194
213
|
for csv_file in csv_perf_files:
|
|
195
214
|
temp_df = pd.read_csv(csv_file, sep=csv_sep)
|
|
196
215
|
if FILE_NAME_COL:
|
|
197
|
-
temp_df[FILE_NAME_COL] = fs.get_file_name(
|
|
216
|
+
temp_df[FILE_NAME_COL] = fs.get_file_name(
|
|
217
|
+
csv_file, split_file_ext=False
|
|
218
|
+
)
|
|
198
219
|
# csvfile.fn_display_df(temp_df)
|
|
199
220
|
temp_df_cols = temp_df.columns.tolist()
|
|
200
221
|
for col in temp_df_cols:
|
|
@@ -205,7 +226,9 @@ class PerfCalc(ABC): # Abstract base class for performance calculation
|
|
|
205
226
|
for csv_file in csv_perf_files:
|
|
206
227
|
temp_df = pd.read_csv(csv_file, sep=csv_sep)
|
|
207
228
|
if FILE_NAME_COL:
|
|
208
|
-
temp_df[FILE_NAME_COL] = fs.get_file_name(
|
|
229
|
+
temp_df[FILE_NAME_COL] = fs.get_file_name(
|
|
230
|
+
csv_file, split_file_ext=False
|
|
231
|
+
)
|
|
209
232
|
# Drop all-NA columns to avoid dtype inconsistency
|
|
210
233
|
temp_df = temp_df.dropna(axis=1, how="all")
|
|
211
234
|
# ensure all columns are present in the final DataFrame
|
|
@@ -215,7 +238,9 @@ class PerfCalc(ABC): # Abstract base class for performance calculation
|
|
|
215
238
|
df = pd.concat([df, temp_df], ignore_index=True)
|
|
216
239
|
# assert that REQUIRED_COLS are present in the DataFrame
|
|
217
240
|
# pprint(df.columns.tolist())
|
|
218
|
-
sticky_cols = REQUIRED_COLS + (
|
|
241
|
+
sticky_cols = REQUIRED_COLS + (
|
|
242
|
+
[FILE_NAME_COL] if include_file_name else []
|
|
243
|
+
) # columns that must always be present
|
|
219
244
|
for col in sticky_cols:
|
|
220
245
|
if col not in df.columns:
|
|
221
246
|
raise ValueError(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: halib
|
|
3
|
-
Version: 0.1
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: Small library for common tasks
|
|
5
5
|
Author: Hoang Van Ha
|
|
6
6
|
Author-email: hoangvanhauit@gmail.com
|
|
@@ -53,6 +53,9 @@ Dynamic: summary
|
|
|
53
53
|
|
|
54
54
|
# Helper package for coding and automation
|
|
55
55
|
|
|
56
|
+
**Version 0.2.01**
|
|
57
|
+
+ `research/base_exp`: add `eval_exp` method to evaluate experiment (e.g., model evaluation on test set) after experiment running is done.
|
|
58
|
+
|
|
56
59
|
**Version 0.1.99**
|
|
57
60
|
+ `filetype/ipynb`: add `gen_ipynb_name` generator to create file name based on current notebook name as prefix (with optional timestamp)
|
|
58
61
|
|
|
Binary file
|
|
@@ -1,100 +0,0 @@
|
|
|
1
|
-
from abc import ABC, abstractmethod
|
|
2
|
-
|
|
3
|
-
from ..research.base_config import ExpBaseConfig
|
|
4
|
-
from ..research.perfcalc import PerfCalc
|
|
5
|
-
from ..research.metrics import MetricsBackend
|
|
6
|
-
|
|
7
|
-
# ! SEE https://github.com/hahv/base_exp for sample usage
|
|
8
|
-
class BaseExperiment(PerfCalc, ABC):
|
|
9
|
-
"""
|
|
10
|
-
Base class for experiments.
|
|
11
|
-
Orchestrates the experiment pipeline using a pluggable metrics backend.
|
|
12
|
-
"""
|
|
13
|
-
|
|
14
|
-
def __init__(self, config: ExpBaseConfig):
|
|
15
|
-
self.config = config
|
|
16
|
-
self.metric_backend = None
|
|
17
|
-
|
|
18
|
-
# -----------------------
|
|
19
|
-
# PerfCalc Required Methods
|
|
20
|
-
# -----------------------
|
|
21
|
-
def get_dataset_name(self):
|
|
22
|
-
return self.config.get_dataset_cfg().get_name()
|
|
23
|
-
|
|
24
|
-
def get_experiment_name(self):
|
|
25
|
-
return self.config.get_cfg_name()
|
|
26
|
-
|
|
27
|
-
def get_metric_backend(self):
|
|
28
|
-
if not self.metric_backend:
|
|
29
|
-
self.metric_backend = self.prepare_metrics(self.config.get_metric_cfg())
|
|
30
|
-
return self.metric_backend
|
|
31
|
-
|
|
32
|
-
# -----------------------
|
|
33
|
-
# Abstract Experiment Steps
|
|
34
|
-
# -----------------------
|
|
35
|
-
@abstractmethod
|
|
36
|
-
def init_general(self, general_cfg):
|
|
37
|
-
"""Setup general settings like SEED, logging, env variables."""
|
|
38
|
-
pass
|
|
39
|
-
|
|
40
|
-
@abstractmethod
|
|
41
|
-
def prepare_dataset(self, dataset_cfg):
|
|
42
|
-
"""Load/prepare dataset."""
|
|
43
|
-
pass
|
|
44
|
-
|
|
45
|
-
@abstractmethod
|
|
46
|
-
def prepare_metrics(self, metric_cfg) -> MetricsBackend:
|
|
47
|
-
"""
|
|
48
|
-
Prepare the metrics for the experiment.
|
|
49
|
-
This method should be implemented in subclasses.
|
|
50
|
-
"""
|
|
51
|
-
pass
|
|
52
|
-
|
|
53
|
-
@abstractmethod
|
|
54
|
-
def exec_exp(self, *args, **kwargs):
|
|
55
|
-
"""Run experiment process, e.g.: training/evaluation loop.
|
|
56
|
-
Return: raw_metrics_data, and extra_data as input for calc_and_save_exp_perfs
|
|
57
|
-
"""
|
|
58
|
-
pass
|
|
59
|
-
|
|
60
|
-
def eval_exp(self):
|
|
61
|
-
"""Optional: re-run evaluation from saved results."""
|
|
62
|
-
pass
|
|
63
|
-
|
|
64
|
-
# -----------------------
|
|
65
|
-
# Main Experiment Runner
|
|
66
|
-
# -----------------------
|
|
67
|
-
def run_exp(self, do_calc_metrics=True, *args, **kwargs):
|
|
68
|
-
"""
|
|
69
|
-
Run the whole experiment pipeline.
|
|
70
|
-
Params:
|
|
71
|
-
+ 'outfile' to save csv file results,
|
|
72
|
-
+ 'outdir' to set output directory for experiment results.
|
|
73
|
-
+ 'return_df' to return a DataFrame of results instead of a dictionary.
|
|
74
|
-
|
|
75
|
-
Full pipeline:
|
|
76
|
-
1. Init
|
|
77
|
-
2. Dataset
|
|
78
|
-
3. Metrics Preparation
|
|
79
|
-
4. Save Config
|
|
80
|
-
5. Execute
|
|
81
|
-
6. Calculate & Save Metrics
|
|
82
|
-
"""
|
|
83
|
-
self.init_general(self.config.get_general_cfg())
|
|
84
|
-
self.prepare_dataset(self.config.get_dataset_cfg())
|
|
85
|
-
self.prepare_metrics(self.config.get_metric_cfg())
|
|
86
|
-
|
|
87
|
-
# Save config before running
|
|
88
|
-
self.config.save_to_outdir()
|
|
89
|
-
|
|
90
|
-
# Execute experiment
|
|
91
|
-
results = self.exec_exp(*args, **kwargs)
|
|
92
|
-
if do_calc_metrics:
|
|
93
|
-
metrics_data, extra_data = results
|
|
94
|
-
# Calculate & Save metrics
|
|
95
|
-
perf_results = self.calc_and_save_exp_perfs(
|
|
96
|
-
raw_metrics_data=metrics_data, extra_data=extra_data, *args, **kwargs
|
|
97
|
-
)
|
|
98
|
-
return perf_results
|
|
99
|
-
else:
|
|
100
|
-
return results
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|