halib 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- halib/exp/core/base_config.py +31 -8
- halib/exp/core/base_exp.py +140 -119
- halib/exp/perf/perfcalc.py +154 -77
- halib/filetype/yamlfile.py +6 -14
- halib/system/path.py +32 -11
- {halib-0.2.5.dist-info → halib-0.2.7.dist-info}/METADATA +3 -2
- {halib-0.2.5.dist-info → halib-0.2.7.dist-info}/RECORD +10 -10
- {halib-0.2.5.dist-info → halib-0.2.7.dist-info}/WHEEL +0 -0
- {halib-0.2.5.dist-info → halib-0.2.7.dist-info}/licenses/LICENSE.txt +0 -0
- {halib-0.2.5.dist-info → halib-0.2.7.dist-info}/top_level.txt +0 -0
halib/exp/core/base_config.py
CHANGED
|
@@ -42,8 +42,10 @@ class AutoNamedCfg(YAMLWizard, NamedCfg):
|
|
|
42
42
|
# or handled by the loader.
|
|
43
43
|
pass
|
|
44
44
|
|
|
45
|
+
|
|
45
46
|
T = TypeVar("T", bound=AutoNamedCfg)
|
|
46
47
|
|
|
48
|
+
|
|
47
49
|
class BaseSelectorCfg(Generic[T]):
|
|
48
50
|
"""
|
|
49
51
|
Base class to handle the logic of selecting an item from a list by name.
|
|
@@ -72,6 +74,8 @@ class ExpBaseCfg(ABC, YAMLWizard):
|
|
|
72
74
|
3 - a method cfg
|
|
73
75
|
"""
|
|
74
76
|
|
|
77
|
+
cfg_name: Optional[str] = None
|
|
78
|
+
|
|
75
79
|
# Save to yaml fil
|
|
76
80
|
def save_to_outdir(
|
|
77
81
|
self, filename: str = "__config.yaml", outdir=None, override: bool = False
|
|
@@ -103,13 +107,24 @@ class ExpBaseCfg(ABC, YAMLWizard):
|
|
|
103
107
|
"""Load a configuration from a custom YAML file."""
|
|
104
108
|
pass
|
|
105
109
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
110
|
+
def get_cfg_name(self, sep: str = "__", *args, **kwargs) -> str:
|
|
111
|
+
# auto get the config name from dataset, method, metric
|
|
112
|
+
# 2. Generate the canonical Config Name
|
|
113
|
+
name_parts = []
|
|
114
|
+
general_info = self.get_general_cfg().get_name()
|
|
115
|
+
dataset_info = self.get_dataset_cfg().get_name()
|
|
116
|
+
method_info = self.get_method_cfg().get_name()
|
|
117
|
+
name_parts = [
|
|
118
|
+
general_info,
|
|
119
|
+
f"ds_{dataset_info}",
|
|
120
|
+
f"mt_{method_info}",
|
|
121
|
+
]
|
|
122
|
+
if "extra" in kwargs:
|
|
123
|
+
extra_info = kwargs["extra"]
|
|
124
|
+
assert isinstance(extra_info, str), "'extra' kwarg must be a string."
|
|
125
|
+
name_parts.append(extra_info)
|
|
126
|
+
self.cfg_name = sep.join(name_parts)
|
|
127
|
+
return self.cfg_name
|
|
113
128
|
|
|
114
129
|
@abstractmethod
|
|
115
130
|
def get_outdir(self):
|
|
@@ -120,7 +135,7 @@ class ExpBaseCfg(ABC, YAMLWizard):
|
|
|
120
135
|
return None
|
|
121
136
|
|
|
122
137
|
@abstractmethod
|
|
123
|
-
def get_general_cfg(self):
|
|
138
|
+
def get_general_cfg(self) -> NamedCfg:
|
|
124
139
|
"""
|
|
125
140
|
Get the general configuration like output directory, log settings, SEED, etc.
|
|
126
141
|
This method should be implemented in subclasses.
|
|
@@ -135,6 +150,14 @@ class ExpBaseCfg(ABC, YAMLWizard):
|
|
|
135
150
|
"""
|
|
136
151
|
pass
|
|
137
152
|
|
|
153
|
+
@abstractmethod
|
|
154
|
+
def get_method_cfg(self) -> NamedCfg:
|
|
155
|
+
"""
|
|
156
|
+
Get the method configuration.
|
|
157
|
+
This method should be implemented in subclasses.
|
|
158
|
+
"""
|
|
159
|
+
pass
|
|
160
|
+
|
|
138
161
|
@abstractmethod
|
|
139
162
|
def get_metric_cfg(self) -> NamedCfg:
|
|
140
163
|
"""
|
halib/exp/core/base_exp.py
CHANGED
|
@@ -1,147 +1,168 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from rich.pretty import pprint
|
|
1
3
|
from abc import ABC, abstractmethod
|
|
2
|
-
from typing import
|
|
3
|
-
from .base_config import ExpBaseCfg
|
|
4
|
-
from ..perf.perfcalc import PerfCalc
|
|
5
|
-
from ..perf.perfmetrics import MetricsBackend
|
|
4
|
+
from typing import List, Optional, TypeVar, Generic
|
|
6
5
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def on_before_run(self, exp): pass
|
|
11
|
-
def on_after_run(self, exp, results): pass
|
|
6
|
+
from abc import ABC, abstractmethod
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from dataclass_wizard import YAMLWizard
|
|
12
9
|
|
|
13
10
|
|
|
14
|
-
|
|
15
|
-
class BaseExp(PerfCalc, ABC):
|
|
11
|
+
class NamedCfg(ABC):
|
|
16
12
|
"""
|
|
17
|
-
Base class for
|
|
18
|
-
|
|
13
|
+
Base class for named configurations.
|
|
14
|
+
All configurations should have a name.
|
|
19
15
|
"""
|
|
20
16
|
|
|
21
|
-
def __init__(self, config: ExpBaseCfg):
|
|
22
|
-
self.config = config
|
|
23
|
-
self.metric_backend = None
|
|
24
|
-
# Flag to track if init_general/prepare_dataset has run
|
|
25
|
-
self._is_env_ready = False
|
|
26
|
-
self.hooks = []
|
|
27
|
-
|
|
28
|
-
def register_hook(self, hook: ExpHook):
|
|
29
|
-
self.hooks.append(hook)
|
|
30
|
-
|
|
31
|
-
def _trigger_hooks(self, method_name: str, *args, **kwargs):
|
|
32
|
-
for hook in self.hooks:
|
|
33
|
-
method = getattr(hook, method_name, None)
|
|
34
|
-
if callable(method):
|
|
35
|
-
method(*args, **kwargs)
|
|
36
|
-
|
|
37
|
-
# -----------------------
|
|
38
|
-
# PerfCalc Required Methods
|
|
39
|
-
# -----------------------
|
|
40
|
-
def get_dataset_name(self):
|
|
41
|
-
return self.config.get_dataset_cfg().get_name()
|
|
42
|
-
|
|
43
|
-
def get_experiment_name(self):
|
|
44
|
-
return self.config.get_cfg_name()
|
|
45
|
-
|
|
46
|
-
def get_metric_backend(self):
|
|
47
|
-
if not self.metric_backend:
|
|
48
|
-
self.metric_backend = self.prepare_metrics(self.config.get_metric_cfg())
|
|
49
|
-
return self.metric_backend
|
|
50
|
-
|
|
51
|
-
# -----------------------
|
|
52
|
-
# Abstract Experiment Steps
|
|
53
|
-
# -----------------------
|
|
54
17
|
@abstractmethod
|
|
55
|
-
def
|
|
56
|
-
"""
|
|
18
|
+
def get_name(self):
|
|
19
|
+
"""
|
|
20
|
+
Get the name of the configuration.
|
|
21
|
+
This method should be implemented in subclasses.
|
|
22
|
+
"""
|
|
57
23
|
pass
|
|
58
24
|
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class AutoNamedCfg(YAMLWizard, NamedCfg):
|
|
28
|
+
"""
|
|
29
|
+
Mixin that automatically implements get_name() by returning self.name.
|
|
30
|
+
Classes using this MUST have a 'name' field.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
name: Optional[str] = None
|
|
34
|
+
|
|
35
|
+
def get_name(self):
|
|
36
|
+
return self.name
|
|
37
|
+
|
|
38
|
+
def __post_init__(self):
|
|
39
|
+
# Enforce the "MUST" rule here
|
|
40
|
+
if self.name is None:
|
|
41
|
+
# We allow None during initial load, but it must be set before usage
|
|
42
|
+
# or handled by the loader.
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
T = TypeVar("T", bound=AutoNamedCfg)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class BaseSelectorCfg(Generic[T]):
|
|
50
|
+
"""
|
|
51
|
+
Base class to handle the logic of selecting an item from a list by name.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
def _resolve_selection(self, items: List[T], selected_name: str, context: str) -> T:
|
|
55
|
+
if selected_name is None:
|
|
56
|
+
raise ValueError(f"No {context} selected in the configuration.")
|
|
57
|
+
|
|
58
|
+
# Create a lookup dict for O(1) access, or just iterate if list is short
|
|
59
|
+
for item in items:
|
|
60
|
+
if item.name == selected_name:
|
|
61
|
+
return item
|
|
62
|
+
|
|
63
|
+
raise ValueError(
|
|
64
|
+
f"{context.capitalize()} '{selected_name}' not found in the configuration list."
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class ExpBaseCfg(ABC, YAMLWizard):
|
|
69
|
+
"""
|
|
70
|
+
Base class for configuration objects.
|
|
71
|
+
What a cfg class must have:
|
|
72
|
+
1 - a dataset cfg
|
|
73
|
+
2 - a metric cfg
|
|
74
|
+
3 - a method cfg
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
cfg_name: Optional[str] = None
|
|
78
|
+
|
|
79
|
+
# Save to yaml fil
|
|
80
|
+
def save_to_outdir(
|
|
81
|
+
self, filename: str = "__config.yaml", outdir=None, override: bool = False
|
|
82
|
+
) -> None:
|
|
83
|
+
"""
|
|
84
|
+
Save the configuration to the output directory.
|
|
85
|
+
"""
|
|
86
|
+
if outdir is not None:
|
|
87
|
+
output_dir = outdir
|
|
88
|
+
else:
|
|
89
|
+
output_dir = self.get_outdir()
|
|
90
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
91
|
+
assert (output_dir is not None) and (
|
|
92
|
+
os.path.isdir(output_dir)
|
|
93
|
+
), f"Output directory '{output_dir}' does not exist or is not a directory."
|
|
94
|
+
file_path = os.path.join(output_dir, filename)
|
|
95
|
+
if os.path.exists(file_path) and not override:
|
|
96
|
+
pprint(
|
|
97
|
+
f"File '{file_path}' already exists. Use 'override=True' to overwrite."
|
|
98
|
+
)
|
|
99
|
+
else:
|
|
100
|
+
# method of YAMLWizard to_yaml_file
|
|
101
|
+
self.to_yaml_file(file_path)
|
|
102
|
+
|
|
103
|
+
@classmethod
|
|
59
104
|
@abstractmethod
|
|
60
|
-
|
|
61
|
-
|
|
105
|
+
# load from a custom YAML file
|
|
106
|
+
def from_custom_yaml_file(cls, yaml_file: str):
|
|
107
|
+
"""Load a configuration from a custom YAML file."""
|
|
62
108
|
pass
|
|
63
109
|
|
|
110
|
+
def get_cfg_name(self, sep: str = "__", *args, **kwargs) -> str:
|
|
111
|
+
if self.cfg_name is None:
|
|
112
|
+
# auto get the config name from dataset, method, metric
|
|
113
|
+
# 2. Generate the canonical Config Name
|
|
114
|
+
name_parts = []
|
|
115
|
+
general_info = self.get_general_cfg().get_name()
|
|
116
|
+
dataset_info = self.get_dataset_cfg().get_name()
|
|
117
|
+
method_info = self.get_method_cfg().get_name()
|
|
118
|
+
name_parts = [
|
|
119
|
+
general_info,
|
|
120
|
+
f"ds_{dataset_info}",
|
|
121
|
+
f"mt_{method_info}",
|
|
122
|
+
]
|
|
123
|
+
if "extra" in kwargs:
|
|
124
|
+
extra_info = kwargs["extra"]
|
|
125
|
+
assert isinstance(extra_info, str), "'extra' kwarg must be a string."
|
|
126
|
+
name_parts.append(extra_info)
|
|
127
|
+
self.cfg_name = sep.join(name_parts)
|
|
128
|
+
return self.cfg_name
|
|
129
|
+
|
|
64
130
|
@abstractmethod
|
|
65
|
-
def
|
|
131
|
+
def get_outdir(self):
|
|
66
132
|
"""
|
|
67
|
-
|
|
133
|
+
Get the output directory for the configuration.
|
|
68
134
|
This method should be implemented in subclasses.
|
|
69
135
|
"""
|
|
70
|
-
|
|
136
|
+
return None
|
|
71
137
|
|
|
72
138
|
@abstractmethod
|
|
73
|
-
def
|
|
74
|
-
"""
|
|
75
|
-
|
|
139
|
+
def get_general_cfg(self) -> NamedCfg:
|
|
140
|
+
"""
|
|
141
|
+
Get the general configuration like output directory, log settings, SEED, etc.
|
|
142
|
+
This method should be implemented in subclasses.
|
|
76
143
|
"""
|
|
77
144
|
pass
|
|
78
145
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
# -----------------------
|
|
82
|
-
def _validate_and_unpack(self, results):
|
|
83
|
-
if results is None:
|
|
84
|
-
return None
|
|
85
|
-
if not isinstance(results, (tuple, list)) or len(results) != 2:
|
|
86
|
-
raise ValueError("exec must return (metrics_data, extra_data)")
|
|
87
|
-
return results[0], results[1]
|
|
88
|
-
|
|
89
|
-
def _prepare_environment(self, force_reload: bool = False):
|
|
146
|
+
@abstractmethod
|
|
147
|
+
def get_dataset_cfg(self) -> NamedCfg:
|
|
90
148
|
"""
|
|
91
|
-
|
|
149
|
+
Get the dataset configuration.
|
|
150
|
+
This method should be implemented in subclasses.
|
|
92
151
|
"""
|
|
93
|
-
|
|
94
|
-
# Environment is already prepared, skipping setup.
|
|
95
|
-
return
|
|
96
|
-
|
|
97
|
-
# 1. Run Setup
|
|
98
|
-
self.init_general(self.config.get_general_cfg())
|
|
99
|
-
self.prepare_dataset(self.config.get_dataset_cfg())
|
|
100
|
-
|
|
101
|
-
# 2. Update metric backend (refresh if needed)
|
|
102
|
-
self.metric_backend = self.prepare_metrics(self.config.get_metric_cfg())
|
|
103
|
-
|
|
104
|
-
# 3. Mark as ready
|
|
105
|
-
self._is_env_ready = True
|
|
152
|
+
pass
|
|
106
153
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
# -----------------------
|
|
110
|
-
def run_exp(self, should_calc_metrics=True, reload_env=False, *args, **kwargs):
|
|
154
|
+
@abstractmethod
|
|
155
|
+
def get_method_cfg(self) -> NamedCfg:
|
|
111
156
|
"""
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
:param should_calc_metrics: Whether to calculate and save metrics after execution.
|
|
115
|
-
:kwargs Params:
|
|
116
|
-
+ 'outfile' to save csv file results,
|
|
117
|
-
+ 'outdir' to set output directory for experiment results.
|
|
118
|
-
+ 'return_df' to return a DataFrame of results instead of a dictionary.
|
|
119
|
-
|
|
120
|
-
Full pipeline:
|
|
121
|
-
1. Init
|
|
122
|
-
2. Prepare Environment (General + Dataset + Metrics)
|
|
123
|
-
3. Save Config
|
|
124
|
-
4. Execute
|
|
125
|
-
5. Calculate & Save Metrics
|
|
157
|
+
Get the method configuration.
|
|
158
|
+
This method should be implemented in subclasses.
|
|
126
159
|
"""
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
self._trigger_hooks("before_run", self)
|
|
130
|
-
|
|
131
|
-
# Save config before running
|
|
132
|
-
self.config.save_to_outdir()
|
|
133
|
-
|
|
134
|
-
# Execute experiment
|
|
135
|
-
results = self.exec_exp(*args, **kwargs)
|
|
160
|
+
pass
|
|
136
161
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
return perf_results
|
|
145
|
-
else:
|
|
146
|
-
self._trigger_hooks("after_run", self, results)
|
|
147
|
-
return results
|
|
162
|
+
@abstractmethod
|
|
163
|
+
def get_metric_cfg(self) -> NamedCfg:
|
|
164
|
+
"""
|
|
165
|
+
Get the metric configuration.
|
|
166
|
+
This method should be implemented in subclasses.
|
|
167
|
+
"""
|
|
168
|
+
pass
|
halib/exp/perf/perfcalc.py
CHANGED
|
@@ -74,114 +74,191 @@ class PerfCalc(ABC): # Abstract base class for performance calculation
|
|
|
74
74
|
"All dictionaries in raw_data_for_metrics must have the same keys as metric_names"
|
|
75
75
|
)
|
|
76
76
|
|
|
77
|
-
#
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
proc_extra_data_ls = (
|
|
100
|
-
extra_data if isinstance(extra_data, list) else [extra_data.copy()]
|
|
101
|
-
)
|
|
102
|
-
assert len(proc_extra_data_ls) == len(
|
|
103
|
-
proc_metric_raw_data_ls
|
|
104
|
-
), "extra_data must have the same length as raw_data_for_metrics if it is a list"
|
|
105
|
-
# validate the extra_data
|
|
106
|
-
self.valid_proc_extra_data(proc_extra_data_ls)
|
|
77
|
+
# =========================================================================
|
|
78
|
+
# 1. Formatting Logic (Decoupled)
|
|
79
|
+
# =========================================================================
|
|
80
|
+
def package_metrics(
|
|
81
|
+
self,
|
|
82
|
+
metric_results_list: List[dict],
|
|
83
|
+
extra_data_list: Optional[List[dict]] = None,
|
|
84
|
+
) -> List[OrderedDict]:
|
|
85
|
+
"""
|
|
86
|
+
Pure formatting function.
|
|
87
|
+
Takes ALREADY CALCULATED metrics and formats them
|
|
88
|
+
(adds metadata, prefixes keys, ensures column order).
|
|
89
|
+
"""
|
|
90
|
+
# Normalize extra_data to a list if provided
|
|
91
|
+
if extra_data_list is None:
|
|
92
|
+
extra_data_list = [{} for _ in range(len(metric_results_list))]
|
|
93
|
+
elif isinstance(extra_data_list, dict):
|
|
94
|
+
extra_data_list = [extra_data_list]
|
|
95
|
+
|
|
96
|
+
assert len(extra_data_list) == len(
|
|
97
|
+
metric_results_list
|
|
98
|
+
), "Length mismatch: metrics vs extra_data"
|
|
107
99
|
|
|
108
|
-
# calculate the metrics output results
|
|
109
|
-
metrics_backend = self.get_metric_backend()
|
|
110
100
|
proc_outdict_list = []
|
|
111
|
-
|
|
101
|
+
|
|
102
|
+
for metric_res, extra_item in zip(metric_results_list, extra_data_list):
|
|
103
|
+
# A. Base Metadata
|
|
112
104
|
out_dict = {
|
|
113
105
|
"dataset": self.get_dataset_name(),
|
|
114
106
|
"experiment": self.get_experiment_name(),
|
|
115
107
|
}
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
out_dict.update(metric_results_prefix)
|
|
129
|
-
ordered_cols = (
|
|
130
|
-
REQUIRED_COLS + custom_fields + list(metric_results_prefix.keys())
|
|
108
|
+
|
|
109
|
+
# B. Attach Extra Data
|
|
110
|
+
out_dict.update(extra_item)
|
|
111
|
+
custom_fields = list(extra_item.keys())
|
|
112
|
+
|
|
113
|
+
# C. Prefix Metric Keys (e.g., 'acc' -> 'metric_acc')
|
|
114
|
+
metric_results_prefixed = {f"metric_{k}": v for k, v in metric_res.items()}
|
|
115
|
+
out_dict.update(metric_results_prefixed)
|
|
116
|
+
|
|
117
|
+
# D. Order Columns
|
|
118
|
+
all_cols = (
|
|
119
|
+
REQUIRED_COLS + custom_fields + list(metric_results_prefixed.keys())
|
|
131
120
|
)
|
|
132
|
-
|
|
133
|
-
(col, out_dict[col]) for col in
|
|
121
|
+
ordered_out = OrderedDict(
|
|
122
|
+
(col, out_dict[col]) for col in all_cols if col in out_dict
|
|
134
123
|
)
|
|
135
|
-
proc_outdict_list.append(
|
|
124
|
+
proc_outdict_list.append(ordered_out)
|
|
136
125
|
|
|
137
126
|
return proc_outdict_list
|
|
138
127
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
def calc_perfs(
|
|
128
|
+
# =========================================================================
|
|
129
|
+
# 2. Calculation Logic (The Coordinator)
|
|
130
|
+
# =========================================================================
|
|
131
|
+
def calc_exp_perf_metrics(
|
|
144
132
|
self,
|
|
133
|
+
metric_names: List[str],
|
|
145
134
|
raw_metrics_data: Union[List[dict], dict],
|
|
146
135
|
extra_data: Optional[Union[List[dict], dict]] = None,
|
|
147
136
|
*args,
|
|
148
137
|
**kwargs,
|
|
149
|
-
) ->
|
|
138
|
+
) -> List[OrderedDict]:
|
|
150
139
|
"""
|
|
151
|
-
|
|
152
|
-
This function should be overridden by the subclass if needed.
|
|
153
|
-
Must return a dictionary with keys as metric names and values as the calculated metrics.
|
|
140
|
+
Full workflow: Validates raw data -> Calculates via Backend -> Packages results.
|
|
154
141
|
"""
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
raw_metrics_data
|
|
159
|
-
|
|
160
|
-
*args,
|
|
161
|
-
**kwargs,
|
|
142
|
+
# Prepare Raw Data
|
|
143
|
+
raw_data_ls = (
|
|
144
|
+
raw_metrics_data
|
|
145
|
+
if isinstance(raw_metrics_data, list)
|
|
146
|
+
else [raw_metrics_data]
|
|
162
147
|
)
|
|
148
|
+
self.valid_proc_metric_raw_data(metric_names, raw_data_ls)
|
|
149
|
+
|
|
150
|
+
# Prepare Extra Data (Validation only)
|
|
151
|
+
extra_data_ls = None
|
|
152
|
+
if extra_data:
|
|
153
|
+
extra_data_ls = extra_data if isinstance(extra_data, list) else [extra_data]
|
|
154
|
+
self.valid_proc_extra_data(extra_data_ls)
|
|
155
|
+
|
|
156
|
+
# Calculate Metrics via Backend
|
|
157
|
+
metrics_backend = self.get_metric_backend()
|
|
158
|
+
calculated_results = []
|
|
159
|
+
|
|
160
|
+
for data_item in raw_data_ls:
|
|
161
|
+
res = metrics_backend.calc_metrics(
|
|
162
|
+
metrics_data_dict=data_item, *args, **kwargs
|
|
163
|
+
)
|
|
164
|
+
calculated_results.append(res)
|
|
165
|
+
|
|
166
|
+
# Delegate to Formatting
|
|
167
|
+
return self.package_metrics(calculated_results, extra_data_ls)
|
|
168
|
+
|
|
169
|
+
# =========================================================================
|
|
170
|
+
# 3. File Saving Logic (Decoupled)
|
|
171
|
+
# =========================================================================
|
|
172
|
+
def save_results_to_csv(
|
|
173
|
+
self, out_dict_list: List[OrderedDict], **kwargs
|
|
174
|
+
) -> Tuple[pd.DataFrame, Optional[str]]:
|
|
175
|
+
"""
|
|
176
|
+
Helper function to convert results to DataFrame and save to CSV.
|
|
177
|
+
"""
|
|
163
178
|
csv_outfile = kwargs.get("outfile", None)
|
|
179
|
+
|
|
180
|
+
# Determine Output Path
|
|
164
181
|
if csv_outfile is not None:
|
|
165
182
|
filePathNoExt, _ = os.path.splitext(csv_outfile)
|
|
166
|
-
# pprint(f"CSV Outfile Path (No Ext): {filePathNoExt}")
|
|
167
183
|
csv_outfile = f"{filePathNoExt}{CSV_FILE_POSTFIX}.csv"
|
|
168
184
|
elif "outdir" in kwargs:
|
|
169
185
|
csvoutdir = kwargs["outdir"]
|
|
170
186
|
csvfilename = f"{now_str()}_{self.get_dataset_name()}_{self.get_experiment_name()}_{CSV_FILE_POSTFIX}.csv"
|
|
171
187
|
csv_outfile = os.path.join(csvoutdir, csvfilename)
|
|
172
188
|
|
|
173
|
-
#
|
|
189
|
+
# Convert to DataFrame
|
|
174
190
|
df = pd.DataFrame(out_dict_list)
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
191
|
+
if out_dict_list:
|
|
192
|
+
ordered_cols = list(out_dict_list[0].keys())
|
|
193
|
+
df = df[ordered_cols]
|
|
194
|
+
|
|
195
|
+
# Save to File
|
|
178
196
|
if csv_outfile:
|
|
179
197
|
df.to_csv(csv_outfile, index=False, sep=";", encoding="utf-8")
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
198
|
+
|
|
199
|
+
return df, csv_outfile
|
|
200
|
+
|
|
201
|
+
# =========================================================================
|
|
202
|
+
# 4. Public API: Standard Calculation
|
|
203
|
+
# raw_metrics_data: example: [{"preds": ..., "target": ...}, ...]
|
|
204
|
+
# =========================================================================
|
|
205
|
+
def calc_perfs(
|
|
206
|
+
self,
|
|
207
|
+
raw_metrics_data: Union[List[dict], dict],
|
|
208
|
+
extra_data: Optional[Union[List[dict], dict]] = None,
|
|
209
|
+
*args,
|
|
210
|
+
**kwargs,
|
|
211
|
+
) -> Tuple[Union[List[OrderedDict], pd.DataFrame], Optional[str]]:
|
|
212
|
+
"""
|
|
213
|
+
Standard use case: Calculate metrics AND save to CSV.
|
|
214
|
+
"""
|
|
215
|
+
metric_names = self.get_metric_backend().metric_names
|
|
216
|
+
|
|
217
|
+
# 1. Calculate & Package
|
|
218
|
+
out_dict_list = self.calc_exp_perf_metrics(
|
|
219
|
+
metric_names=metric_names,
|
|
220
|
+
raw_metrics_data=raw_metrics_data,
|
|
221
|
+
extra_data=extra_data,
|
|
222
|
+
*args,
|
|
223
|
+
**kwargs,
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
# 2. Save
|
|
227
|
+
df, csv_outfile = self.save_results_to_csv(out_dict_list, **kwargs)
|
|
228
|
+
|
|
229
|
+
return (
|
|
230
|
+
(df, csv_outfile)
|
|
231
|
+
if kwargs.get("return_df", False)
|
|
232
|
+
else (out_dict_list, csv_outfile)
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
# =========================================================================
|
|
236
|
+
# 5. Public API: Manual / External Metrics (The Shortcut)
|
|
237
|
+
# =========================================================================
|
|
238
|
+
def save_computed_perfs(
|
|
239
|
+
self,
|
|
240
|
+
metrics_data: Union[List[dict], dict],
|
|
241
|
+
extra_data: Optional[Union[List[dict], dict]] = None,
|
|
242
|
+
**kwargs,
|
|
243
|
+
) -> Tuple[Union[List[OrderedDict], pd.DataFrame], Optional[str]]:
|
|
244
|
+
|
|
245
|
+
# Ensure list format
|
|
246
|
+
if isinstance(metrics_data, dict):
|
|
247
|
+
metrics_data = [metrics_data]
|
|
248
|
+
if isinstance(extra_data, dict):
|
|
249
|
+
extra_data = [extra_data]
|
|
250
|
+
|
|
251
|
+
# 1. Package (Format)
|
|
252
|
+
formatted_list = self.package_metrics(metrics_data, extra_data)
|
|
253
|
+
|
|
254
|
+
# 2. Save
|
|
255
|
+
df, csv_outfile = self.save_results_to_csv(formatted_list, **kwargs)
|
|
256
|
+
|
|
257
|
+
return (
|
|
258
|
+
(df, csv_outfile)
|
|
259
|
+
if kwargs.get("return_df", False)
|
|
260
|
+
else (formatted_list, csv_outfile)
|
|
261
|
+
)
|
|
185
262
|
|
|
186
263
|
@staticmethod
|
|
187
264
|
def default_exp_csv_filter_fn(exp_file_name: str) -> bool:
|
halib/filetype/yamlfile.py
CHANGED
|
@@ -11,6 +11,7 @@ from ..system.path import *
|
|
|
11
11
|
|
|
12
12
|
console = Console()
|
|
13
13
|
|
|
14
|
+
|
|
14
15
|
def _load_yaml_recursively(
|
|
15
16
|
yaml_file, yaml_files=[], share_nx_graph=nx.DiGraph(), log_info=False
|
|
16
17
|
):
|
|
@@ -53,25 +54,16 @@ def load_yaml(yaml_file, to_dict=False, log_info=False):
|
|
|
53
54
|
else:
|
|
54
55
|
return omgconf
|
|
55
56
|
|
|
57
|
+
|
|
56
58
|
def load_yaml_with_PC_abbr(
|
|
57
|
-
yaml_file,
|
|
59
|
+
yaml_file, abbr_disk_map=ABBR_DISK_MAP
|
|
58
60
|
):
|
|
59
|
-
# current PC abbreviation
|
|
60
|
-
pc_abbr = get_PC_abbr_name()
|
|
61
|
-
|
|
62
|
-
# current plaftform: windows or linux
|
|
63
|
-
current_platform = platform.system().lower()
|
|
64
|
-
|
|
65
|
-
assert pc_abbr in pc_abbr_to_working_disk, f"The is no mapping for {pc_abbr} to <working_disk>"
|
|
66
|
-
|
|
67
|
-
# working disk
|
|
68
|
-
working_disk = pc_abbr_to_working_disk.get(pc_abbr)
|
|
69
|
-
|
|
70
61
|
# load yaml file
|
|
71
62
|
data_dict = load_yaml(yaml_file=yaml_file, to_dict=True)
|
|
72
|
-
|
|
73
63
|
# Normalize paths in the loaded data
|
|
74
|
-
data_dict = normalize_paths(
|
|
64
|
+
data_dict = normalize_paths(
|
|
65
|
+
data_dict, get_working_disk(abbr_disk_map), get_os_platform()
|
|
66
|
+
)
|
|
75
67
|
return data_dict
|
|
76
68
|
|
|
77
69
|
|
halib/system/path.py
CHANGED
|
@@ -2,9 +2,9 @@ from ..common.common import *
|
|
|
2
2
|
from ..filetype import csvfile
|
|
3
3
|
import pandas as pd
|
|
4
4
|
import platform
|
|
5
|
+
import re # <--- [FIX 1] Added missing import
|
|
5
6
|
|
|
6
|
-
|
|
7
|
-
PC_NAME_TO_ABBR = {
|
|
7
|
+
PC_TO_ABBR = {
|
|
8
8
|
"DESKTOP-JQD9K01": "MainPC",
|
|
9
9
|
"DESKTOP-5IRHU87": "MSI_Laptop",
|
|
10
10
|
"DESKTOP-96HQCNO": "4090_SV",
|
|
@@ -12,17 +12,16 @@ PC_NAME_TO_ABBR = {
|
|
|
12
12
|
"DESKTOP-QNS3DNF": "1GPU_SV",
|
|
13
13
|
}
|
|
14
14
|
|
|
15
|
-
|
|
15
|
+
ABBR_DISK_MAP = {
|
|
16
16
|
"MainPC": "E:",
|
|
17
17
|
"MSI_Laptop": "D:",
|
|
18
18
|
"4090_SV": "E:",
|
|
19
19
|
"4GPU_SV": "D:",
|
|
20
20
|
}
|
|
21
21
|
|
|
22
|
+
|
|
22
23
|
def list_PCs(show=True):
|
|
23
|
-
df = pd.DataFrame(
|
|
24
|
-
list(PC_NAME_TO_ABBR.items()), columns=["PC Name", "Abbreviation"]
|
|
25
|
-
)
|
|
24
|
+
df = pd.DataFrame(list(PC_TO_ABBR.items()), columns=["PC Name", "Abbreviation"])
|
|
26
25
|
if show:
|
|
27
26
|
csvfile.fn_display_df(df)
|
|
28
27
|
return df
|
|
@@ -34,13 +33,32 @@ def get_PC_name():
|
|
|
34
33
|
|
|
35
34
|
def get_PC_abbr_name():
|
|
36
35
|
pc_name = get_PC_name()
|
|
37
|
-
return
|
|
36
|
+
return PC_TO_ABBR.get(pc_name, "Unknown")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def get_os_platform():
|
|
40
|
+
return platform.system().lower()
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def get_working_disk(abbr_disk_map=ABBR_DISK_MAP):
|
|
44
|
+
pc_abbr = get_PC_abbr_name()
|
|
45
|
+
return abbr_disk_map.get(pc_abbr, None)
|
|
38
46
|
|
|
39
47
|
|
|
40
48
|
# ! This funcction search for full paths in the obj and normalize them according to the current platform and working disk
|
|
41
49
|
# ! E.g: "E:/zdataset/DFire", but working_disk: "D:", current_platform: "windows" => "D:/zdataset/DFire"
|
|
42
50
|
# ! E.g: "E:/zdataset/DFire", but working_disk: "D:", current_platform: "linux" => "/mnt/d/zdataset/DFire"
|
|
43
|
-
def normalize_paths(obj, working_disk, current_platform):
|
|
51
|
+
def normalize_paths(obj, working_disk=None, current_platform=None):
|
|
52
|
+
# [FIX 3] Resolve defaults inside function to be safer/cleaner
|
|
53
|
+
if working_disk is None:
|
|
54
|
+
working_disk = get_working_disk()
|
|
55
|
+
if current_platform is None:
|
|
56
|
+
current_platform = get_os_platform()
|
|
57
|
+
|
|
58
|
+
# [FIX 2] If PC is unknown, working_disk is None. Return early to avoid crash.
|
|
59
|
+
if working_disk is None:
|
|
60
|
+
return obj
|
|
61
|
+
|
|
44
62
|
if isinstance(obj, dict):
|
|
45
63
|
for key, value in obj.items():
|
|
46
64
|
obj[key] = normalize_paths(value, working_disk, current_platform)
|
|
@@ -52,22 +70,25 @@ def normalize_paths(obj, working_disk, current_platform):
|
|
|
52
70
|
elif isinstance(obj, str):
|
|
53
71
|
# Normalize backslashes to forward slashes for consistency
|
|
54
72
|
obj = obj.replace("\\", "/")
|
|
73
|
+
|
|
55
74
|
# Regex for Windows-style path: e.g., "E:/zdataset/DFire"
|
|
56
75
|
win_match = re.match(r"^([A-Z]):/(.*)$", obj)
|
|
57
76
|
# Regex for Linux-style path: e.g., "/mnt/e/zdataset/DFire"
|
|
58
77
|
lin_match = re.match(r"^/mnt/([a-z])/(.*)$", obj)
|
|
78
|
+
|
|
59
79
|
if win_match or lin_match:
|
|
60
80
|
rest = win_match.group(2) if win_match else lin_match.group(2)
|
|
81
|
+
|
|
61
82
|
if current_platform == "windows":
|
|
62
83
|
# working_disk is like "D:", so "D:/" + rest
|
|
63
|
-
new_path = working_disk
|
|
84
|
+
new_path = f"{working_disk}/{rest}"
|
|
64
85
|
elif current_platform == "linux":
|
|
65
86
|
# Extract drive letter from working_disk (e.g., "D:" -> "d")
|
|
66
87
|
drive_letter = working_disk[0].lower()
|
|
67
|
-
new_path = "/mnt/
|
|
88
|
+
new_path = f"/mnt/{drive_letter}/{rest}"
|
|
68
89
|
else:
|
|
69
|
-
# Unknown platform, return original
|
|
70
90
|
return obj
|
|
71
91
|
return new_path
|
|
92
|
+
|
|
72
93
|
# For non-strings or non-path strings, return as is
|
|
73
94
|
return obj
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: halib
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.7
|
|
4
4
|
Summary: Small library for common tasks
|
|
5
5
|
Author: Hoang Van Ha
|
|
6
6
|
Author-email: hoangvanhauit@gmail.com
|
|
@@ -53,8 +53,9 @@ Dynamic: summary
|
|
|
53
53
|
|
|
54
54
|
# Helper package for coding and automation
|
|
55
55
|
|
|
56
|
-
**Version 0.2.
|
|
56
|
+
**Version 0.2.7**
|
|
57
57
|
+ reorganize packages with most changes in `research` package; also rename `research` to `exp` (package for experiment management and utilities)
|
|
58
|
+
+ update `exp/perfcalc.py` to allow save computed performance to csv file (without explicit calling method `calc_perfs`)
|
|
58
59
|
|
|
59
60
|
**Version 0.2.1**
|
|
60
61
|
+ `research/base_exp`: add `eval_exp` method to evaluate experiment (e.g., model evaluation on test set) after experiment running is done.
|
|
@@ -21,8 +21,8 @@ halib/common/common.py,sha256=olkeXdFdojOkySP5aurzxKlehngRwBHdNBw5JfE4_fQ,5038
|
|
|
21
21
|
halib/common/rich_color.py,sha256=tyK5fl3Dtv1tKsfFzt_5Rco4Fj72QliA-w5aGXaVuqQ,6392
|
|
22
22
|
halib/exp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
23
23
|
halib/exp/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
24
|
-
halib/exp/core/base_config.py,sha256=
|
|
25
|
-
halib/exp/core/base_exp.py,sha256=
|
|
24
|
+
halib/exp/core/base_config.py,sha256=Js2oVDt7qwT7eV_sOUWw6XXl569G1bX6ls-VYAx2gWY,5032
|
|
25
|
+
halib/exp/core/base_exp.py,sha256=XjRHXbUHE-DCZLRDTteDF5gsxKN3mhGEe2zWL24JP80,5131
|
|
26
26
|
halib/exp/core/param_gen.py,sha256=I9JHrDCaep4CjvApDoX0QzFuw38zMC2PsDFueuA7pjM,4271
|
|
27
27
|
halib/exp/core/wandb_op.py,sha256=powL2QyLBqF-6PUGAOqd60s1npHLLKJxPns3S4hKeNo,4160
|
|
28
28
|
halib/exp/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -32,7 +32,7 @@ halib/exp/data/torchloader.py,sha256=oWUplXlGd1IB6CqdRd-mGe-DfMjjZxz9hQ7SWONb-0s
|
|
|
32
32
|
halib/exp/perf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
33
33
|
halib/exp/perf/flop_calc.py,sha256=Kb3Gwqc7QtGALZzfyYXBA_9SioReJpTJdUX84kqj-Aw,6907
|
|
34
34
|
halib/exp/perf/gpu_mon.py,sha256=vD41_ZnmPLKguuq9X44SB_vwd9JrblO4BDzHLXZhhFY,2233
|
|
35
|
-
halib/exp/perf/perfcalc.py,sha256=
|
|
35
|
+
halib/exp/perf/perfcalc.py,sha256=p7rhVShiie7DT_s50lbvbGftVCkrWE0tQGFLUEmTXi0,18326
|
|
36
36
|
halib/exp/perf/perfmetrics.py,sha256=qRiNiCKGUSTLY7gPMVMuVHGAAyeosfGWup2eM4490aw,5485
|
|
37
37
|
halib/exp/perf/perftb.py,sha256=IWElg3OB5dmhfxnY8pMZvkL2y_EnvLmEx3gJlpUR1Fs,31066
|
|
38
38
|
halib/exp/perf/profiler.py,sha256=5ZjES8kAqEsSV1mC3Yr_1ivFLwQDc_yv4HY7dKt_AS0,11782
|
|
@@ -44,7 +44,7 @@ halib/filetype/ipynb.py,sha256=pd2LgmPa7ZbF0YlQJbeQZEsl6jHQUSoyVtkCT7WhU5Q,1657
|
|
|
44
44
|
halib/filetype/jsonfile.py,sha256=2HcBqXYjLNvqFok3PHOgH59vlhDCZLZpt7ezvgx1TFM,474
|
|
45
45
|
halib/filetype/textfile.py,sha256=3koEFyVme1SEHdud7TnjArHndoiqfMGfMdYY3NIFegM,397
|
|
46
46
|
halib/filetype/videofile.py,sha256=wDyZp7Dh0ZuNgQUvt8gLTpy3Flx1jDr-QsO4-jzriGE,8104
|
|
47
|
-
halib/filetype/yamlfile.py,sha256=
|
|
47
|
+
halib/filetype/yamlfile.py,sha256=buvj016n3wmEBDfeDX9lbljqDk5R5OlKHuXDKSfIKK0,2411
|
|
48
48
|
halib/online/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
49
49
|
halib/online/gdrive.py,sha256=RmF4y6UPxektkKIctmfT-pKWZsBM9FVUeld6zZmJkp0,7787
|
|
50
50
|
halib/online/gdrive_mkdir.py,sha256=wSJkQMJCDuS1gxQ2lHQHq_IrJ4xR_SEoPSo9n_2WNFU,1474
|
|
@@ -91,7 +91,7 @@ halib/sys/filesys.py,sha256=ERpnELLDKJoTIIKf-AajgkY62nID4qmqmX5TkE95APU,2931
|
|
|
91
91
|
halib/system/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
92
92
|
halib/system/cmd.py,sha256=b2x7JPcNnFjLGheIESVYvqAb-w2UwBM1PAwYxMZ5YjA,228
|
|
93
93
|
halib/system/filesys.py,sha256=102J2fkQhmH1_-HQVy2FQ4NOU8LTjMWV3hToT_APtq8,4401
|
|
94
|
-
halib/system/path.py,sha256=
|
|
94
|
+
halib/system/path.py,sha256=uCXTltd943L_2heU2HLwvXcgYJRd8-ZEzDe435419OQ,3165
|
|
95
95
|
halib/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
96
96
|
halib/utils/dataclass_util.py,sha256=rj2IMLlUzbm2OlF5_B2dRTk9njZOaF7tTjYkOsq8uLY,1416
|
|
97
97
|
halib/utils/dict.py,sha256=wYE6Iw-_CnCWdMg9tpJ2Y2-e2ESkW9FxmdBkZkbUh80,299
|
|
@@ -101,8 +101,8 @@ halib/utils/list.py,sha256=BM-8sRhYyqF7bh4p7TQtV7P_gnFruUCA6DTUOombaZg,337
|
|
|
101
101
|
halib/utils/listop.py,sha256=Vpa8_2fI0wySpB2-8sfTBkyi_A4FhoFVVvFiuvW8N64,339
|
|
102
102
|
halib/utils/tele_noti.py,sha256=-4WXZelCA4W9BroapkRyIdUu9cUVrcJJhegnMs_WpGU,5928
|
|
103
103
|
halib/utils/video.py,sha256=zLoj5EHk4SmP9OnoHjO8mLbzPdtq6gQPzTQisOEDdO8,3261
|
|
104
|
-
halib-0.2.
|
|
105
|
-
halib-0.2.
|
|
106
|
-
halib-0.2.
|
|
107
|
-
halib-0.2.
|
|
108
|
-
halib-0.2.
|
|
104
|
+
halib-0.2.7.dist-info/licenses/LICENSE.txt,sha256=qZssdna4aETiR8znYsShUjidu-U4jUT9Q-EWNlZ9yBQ,1100
|
|
105
|
+
halib-0.2.7.dist-info/METADATA,sha256=jSjp5DPZ8A8ohlO-QQ__7mE0Z-fO7sdkZ5Bz6ssKnhU,6836
|
|
106
|
+
halib-0.2.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
107
|
+
halib-0.2.7.dist-info/top_level.txt,sha256=7AD6PLaQTreE0Fn44mdZsoHBe_Zdd7GUmjsWPyQ7I-k,6
|
|
108
|
+
halib-0.2.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|