halib 0.1.99__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. {halib-0.1.99 → halib-0.2.1}/PKG-INFO +4 -1
  2. {halib-0.1.99 → halib-0.2.1}/README.md +3 -0
  3. halib-0.2.1/halib/research/base_exp.py +157 -0
  4. {halib-0.1.99 → halib-0.2.1}/halib/research/dataset.py +1 -1
  5. {halib-0.1.99 → halib-0.2.1}/halib/research/metrics.py +4 -0
  6. {halib-0.1.99 → halib-0.2.1}/halib/research/mics.py +8 -2
  7. {halib-0.1.99 → halib-0.2.1}/halib/research/perfcalc.py +57 -32
  8. {halib-0.1.99 → halib-0.2.1}/halib.egg-info/PKG-INFO +4 -1
  9. {halib-0.1.99 → halib-0.2.1}/halib.egg-info/SOURCES.txt +0 -1
  10. {halib-0.1.99 → halib-0.2.1}/setup.py +1 -1
  11. halib-0.1.99/guide_publish_pip.pdf +0 -0
  12. halib-0.1.99/halib/research/base_exp.py +0 -100
  13. {halib-0.1.99 → halib-0.2.1}/.gitignore +0 -0
  14. {halib-0.1.99 → halib-0.2.1}/GDriveFolder.txt +0 -0
  15. {halib-0.1.99 → halib-0.2.1}/LICENSE.txt +0 -0
  16. {halib-0.1.99 → halib-0.2.1}/MANIFEST.in +0 -0
  17. {halib-0.1.99 → halib-0.2.1}/halib/__init__.py +0 -0
  18. {halib-0.1.99 → halib-0.2.1}/halib/common.py +0 -0
  19. {halib-0.1.99 → halib-0.2.1}/halib/cuda.py +0 -0
  20. {halib-0.1.99 → halib-0.2.1}/halib/filetype/__init__.py +0 -0
  21. {halib-0.1.99 → halib-0.2.1}/halib/filetype/csvfile.py +0 -0
  22. {halib-0.1.99 → halib-0.2.1}/halib/filetype/ipynb.py +0 -0
  23. {halib-0.1.99 → halib-0.2.1}/halib/filetype/jsonfile.py +0 -0
  24. {halib-0.1.99 → halib-0.2.1}/halib/filetype/textfile.py +0 -0
  25. {halib-0.1.99 → halib-0.2.1}/halib/filetype/videofile.py +0 -0
  26. {halib-0.1.99 → halib-0.2.1}/halib/filetype/yamlfile.py +0 -0
  27. {halib-0.1.99 → halib-0.2.1}/halib/online/__init__.py +0 -0
  28. {halib-0.1.99 → halib-0.2.1}/halib/online/gdrive.py +0 -0
  29. {halib-0.1.99 → halib-0.2.1}/halib/online/gdrive_mkdir.py +0 -0
  30. {halib-0.1.99 → halib-0.2.1}/halib/online/gdrive_test.py +0 -0
  31. {halib-0.1.99 → halib-0.2.1}/halib/online/projectmake.py +0 -0
  32. {halib-0.1.99 → halib-0.2.1}/halib/research/__init__.py +0 -0
  33. {halib-0.1.99 → halib-0.2.1}/halib/research/base_config.py +0 -0
  34. {halib-0.1.99 → halib-0.2.1}/halib/research/flop_csv.py +0 -0
  35. {halib-0.1.99 → halib-0.2.1}/halib/research/flops.py +0 -0
  36. {halib-0.1.99 → halib-0.2.1}/halib/research/params_gen.py +0 -0
  37. {halib-0.1.99 → halib-0.2.1}/halib/research/perftb.py +0 -0
  38. {halib-0.1.99 → halib-0.2.1}/halib/research/plot.py +0 -0
  39. {halib-0.1.99 → halib-0.2.1}/halib/research/profiler.py +0 -0
  40. {halib-0.1.99 → halib-0.2.1}/halib/research/torchloader.py +0 -0
  41. {halib-0.1.99 → halib-0.2.1}/halib/research/wandb_op.py +0 -0
  42. {halib-0.1.99 → halib-0.2.1}/halib/rich_color.py +0 -0
  43. {halib-0.1.99 → halib-0.2.1}/halib/system/__init__.py +0 -0
  44. {halib-0.1.99 → halib-0.2.1}/halib/system/cmd.py +0 -0
  45. {halib-0.1.99 → halib-0.2.1}/halib/system/filesys.py +0 -0
  46. {halib-0.1.99 → halib-0.2.1}/halib/utils/__init__.py +0 -0
  47. {halib-0.1.99 → halib-0.2.1}/halib/utils/dataclass_util.py +0 -0
  48. {halib-0.1.99 → halib-0.2.1}/halib/utils/dict_op.py +0 -0
  49. {halib-0.1.99 → halib-0.2.1}/halib/utils/gpu_mon.py +0 -0
  50. {halib-0.1.99 → halib-0.2.1}/halib/utils/listop.py +0 -0
  51. {halib-0.1.99 → halib-0.2.1}/halib/utils/tele_noti.py +0 -0
  52. {halib-0.1.99 → halib-0.2.1}/halib/utils/video.py +0 -0
  53. {halib-0.1.99 → halib-0.2.1}/halib.egg-info/dependency_links.txt +0 -0
  54. {halib-0.1.99 → halib-0.2.1}/halib.egg-info/requires.txt +0 -0
  55. {halib-0.1.99 → halib-0.2.1}/halib.egg-info/top_level.txt +0 -0
  56. {halib-0.1.99 → halib-0.2.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: halib
3
- Version: 0.1.99
3
+ Version: 0.2.1
4
4
  Summary: Small library for common tasks
5
5
  Author: Hoang Van Ha
6
6
  Author-email: hoangvanhauit@gmail.com
@@ -53,6 +53,9 @@ Dynamic: summary
53
53
 
54
54
  # Helper package for coding and automation
55
55
 
56
+ **Version 0.2.01**
57
+ + `research/base_exp`: add `eval_exp` method to evaluate experiment (e.g., model evaluation on test set) after experiment running is done.
58
+
56
59
  **Version 0.1.99**
57
60
  + `filetype/ipynb`: add `gen_ipynb_name` generator to create file name based on current notebook name as prefix (with optional timestamp)
58
61
 
@@ -1,5 +1,8 @@
1
1
  # Helper package for coding and automation
2
2
 
3
+ **Version 0.2.01**
4
+ + `research/base_exp`: add `eval_exp` method to evaluate experiment (e.g., model evaluation on test set) after experiment running is done.
5
+
3
6
  **Version 0.1.99**
4
7
  + `filetype/ipynb`: add `gen_ipynb_name` generator to create file name based on current notebook name as prefix (with optional timestamp)
5
8
 
@@ -0,0 +1,157 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Tuple, Any, Optional
3
+ from ..research.base_config import ExpBaseConfig
4
+ from ..research.perfcalc import PerfCalc
5
+ from ..research.metrics import MetricsBackend
6
+
7
+ # ! SEE https://github.com/hahv/base_exp for sample usage
8
+ class BaseExperiment(PerfCalc, ABC):
9
+ """
10
+ Base class for experiments.
11
+ Orchestrates the experiment pipeline using a pluggable metrics backend.
12
+ """
13
+
14
+ def __init__(self, config: ExpBaseConfig):
15
+ self.config = config
16
+ self.metric_backend = None
17
+ # Flag to track if init_general/prepare_dataset has run
18
+ self._is_env_ready = False
19
+
20
+ # -----------------------
21
+ # PerfCalc Required Methods
22
+ # -----------------------
23
+ def get_dataset_name(self):
24
+ return self.config.get_dataset_cfg().get_name()
25
+
26
+ def get_experiment_name(self):
27
+ return self.config.get_cfg_name()
28
+
29
+ def get_metric_backend(self):
30
+ if not self.metric_backend:
31
+ self.metric_backend = self.prepare_metrics(self.config.get_metric_cfg())
32
+ return self.metric_backend
33
+
34
+ # -----------------------
35
+ # Abstract Experiment Steps
36
+ # -----------------------
37
+ @abstractmethod
38
+ def init_general(self, general_cfg):
39
+ """Setup general settings like SEED, logging, env variables."""
40
+ pass
41
+
42
+ @abstractmethod
43
+ def prepare_dataset(self, dataset_cfg):
44
+ """Load/prepare dataset."""
45
+ pass
46
+
47
+ @abstractmethod
48
+ def prepare_metrics(self, metric_cfg) -> MetricsBackend:
49
+ """
50
+ Prepare the metrics for the experiment.
51
+ This method should be implemented in subclasses.
52
+ """
53
+ pass
54
+
55
+ @abstractmethod
56
+ def before_exec_exp_once(self, *args, **kwargs):
57
+ """Optional: any setup before exec_exp. Note this is called once per run_exp."""
58
+ pass
59
+
60
+ @abstractmethod
61
+ def exec_exp(self, *args, **kwargs) -> Optional[Tuple[Any, Any]]:
62
+ """Run experiment process, e.g.: training/evaluation loop.
63
+ Return: either `None` or a tuple of (raw_metrics_data, extra_data) for calc_and_save_exp_perfs
64
+ """
65
+ pass
66
+
67
+ @abstractmethod
68
+ def exec_eval(self, *args, **kwargs) -> Optional[Tuple[Any, Any]]:
69
+ """Run evaluation process.
70
+ Return: either `None` or a tuple of (raw_metrics_data, extra_data) for calc_and_save_exp_perfs
71
+ """
72
+ pass
73
+
74
+ # -----------------------
75
+ # Internal Helpers
76
+ # -----------------------
77
+ def _validate_and_unpack(self, results):
78
+ if results is None:
79
+ return None
80
+ if not isinstance(results, (tuple, list)) or len(results) != 2:
81
+ raise ValueError("exec must return (metrics_data, extra_data)")
82
+ return results[0], results[1]
83
+
84
+ def _prepare_environment(self, force_reload: bool = False):
85
+ """
86
+ Common setup. Skips if already initialized, unless force_reload is True.
87
+ """
88
+ if self._is_env_ready and not force_reload:
89
+ # Environment is already prepared, skipping setup.
90
+ return
91
+
92
+ # 1. Run Setup
93
+ self.init_general(self.config.get_general_cfg())
94
+ self.prepare_dataset(self.config.get_dataset_cfg())
95
+
96
+ # 2. Update metric backend (refresh if needed)
97
+ self.metric_backend = self.prepare_metrics(self.config.get_metric_cfg())
98
+
99
+ # 3. Mark as ready
100
+ self._is_env_ready = True
101
+
102
+ # -----------------------
103
+ # Main Experiment Runner
104
+ # -----------------------
105
+ def run_exp(self, should_calc_metrics=True, reload_env=False, *args, **kwargs):
106
+ """
107
+ Run the whole experiment pipeline.
108
+ :param reload_env: If True, forces dataset/general init to run again.
109
+ :param should_calc_metrics: Whether to calculate and save metrics after execution.
110
+ :kwargs Params:
111
+ + 'outfile' to save csv file results,
112
+ + 'outdir' to set output directory for experiment results.
113
+ + 'return_df' to return a DataFrame of results instead of a dictionary.
114
+
115
+ Full pipeline:
116
+ 1. Init
117
+ 2. Prepare Environment (General + Dataset + Metrics)
118
+ 3. Save Config
119
+ 4. Execute
120
+ 5. Calculate & Save Metrics
121
+ """
122
+ self._prepare_environment(force_reload=reload_env)
123
+
124
+ # Any pre-exec setup (loading models, etc)
125
+ self.before_exec_exp_once(*args, **kwargs)
126
+ # Save config before running
127
+ self.config.save_to_outdir()
128
+
129
+ # Execute experiment
130
+ results = self.exec_exp(*args, **kwargs)
131
+
132
+ if should_calc_metrics and results is not None:
133
+ metrics_data, extra_data = self._validate_and_unpack(results)
134
+ # Calculate & Save metrics
135
+ perf_results = self.calc_perfs(
136
+ raw_metrics_data=metrics_data, extra_data=extra_data, *args, **kwargs
137
+ )
138
+ return perf_results
139
+ else:
140
+ return results
141
+
142
+ # -----------------------
143
+ # Main Experiment Evaluator
144
+ # -----------------------
145
+ def eval_exp(self, reload_env=False, *args, **kwargs):
146
+ """
147
+ Run evaluation only.
148
+ :param reload_env: If True, forces dataset/general init to run again.
149
+ """
150
+ self._prepare_environment(force_reload=reload_env)
151
+ results = self.exec_eval(*args, **kwargs)
152
+ if results is not None:
153
+ metrics_data, extra_data = self._validate_and_unpack(results)
154
+ return self.calc_perfs(
155
+ raw_metrics_data=metrics_data, extra_data=extra_data, *args, **kwargs
156
+ )
157
+ return None
@@ -13,8 +13,8 @@ from rich.pretty import pprint
13
13
  from torchvision.datasets import ImageFolder
14
14
  from sklearn.model_selection import StratifiedShuffleSplit, ShuffleSplit
15
15
 
16
- from ..system import filesys as fs
17
16
  from ..common import console, seed_everything, ConsoleLog
17
+ from ..system import filesys as fs
18
18
 
19
19
  def parse_args():
20
20
  parser = ArgumentParser(description="desc text")
@@ -11,6 +11,10 @@ class MetricsBackend(ABC):
11
11
  def __init__(self, metrics_info: Union[List[str], Dict[str, Any]]):
12
12
  """
13
13
  Initialize the backend with optional metrics_info.
14
+ `metrics_info` can be either:
15
+ - A list of metric names (strings). e.g., ["accuracy", "precision"]
16
+ - A dict mapping metric names with object that defines how to compute them. e.g: {"accuracy": torchmetrics.Accuracy(), "precision": torchmetrics.Precision()}
17
+
14
18
  """
15
19
  self.metric_info = metrics_info
16
20
  self.validate_metrics_info(self.metric_info)
@@ -9,7 +9,7 @@ PC_NAME_TO_ABBR = {
9
9
  "DESKTOP-5IRHU87": "MSI_Laptop",
10
10
  "DESKTOP-96HQCNO": "4090_SV",
11
11
  "DESKTOP-Q2IKLC0": "4GPU_SV",
12
- "DESKTOP-QNS3DNF": "1GPU_SV"
12
+ "DESKTOP-QNS3DNF": "1GPU_SV",
13
13
  }
14
14
 
15
15
  DEFAULT_ABBR_WORKING_DISK = {
@@ -19,19 +19,25 @@ DEFAULT_ABBR_WORKING_DISK = {
19
19
  "4GPU_SV": "D:",
20
20
  }
21
21
 
22
+
22
23
  def list_PCs(show=True):
23
- df = pd.DataFrame(list(PC_NAME_TO_ABBR.items()), columns=["PC Name", "Abbreviation"])
24
+ df = pd.DataFrame(
25
+ list(PC_NAME_TO_ABBR.items()), columns=["PC Name", "Abbreviation"]
26
+ )
24
27
  if show:
25
28
  csvfile.fn_display_df(df)
26
29
  return df
27
30
 
31
+
28
32
  def get_PC_name():
29
33
  return platform.node()
30
34
 
35
+
31
36
  def get_PC_abbr_name():
32
37
  pc_name = get_PC_name()
33
38
  return PC_NAME_TO_ABBR.get(pc_name, "Unknown")
34
39
 
40
+
35
41
  # ! This funcction search for full paths in the obj and normalize them according to the current platform and working disk
36
42
  # ! E.g: "E:/zdataset/DFire", but working_disk: "D:", current_platform: "windows" => "D:/zdataset/DFire"
37
43
  # ! E.g: "E:/zdataset/DFire", but working_disk: "D:", current_platform: "linux" => "/mnt/d/zdataset/DFire"
@@ -3,12 +3,9 @@ import glob
3
3
  from typing import Optional, Tuple
4
4
  import pandas as pd
5
5
 
6
- from rich.pretty import pprint
7
-
8
6
  from abc import ABC, abstractmethod
9
7
  from collections import OrderedDict
10
8
 
11
- from ..filetype import csvfile
12
9
  from ..system import filesys as fs
13
10
  from ..common import now_str
14
11
  from ..research.perftb import PerfTB
@@ -19,6 +16,7 @@ REQUIRED_COLS = ["experiment", "dataset"]
19
16
  CSV_FILE_POSTFIX = "__perf"
20
17
  METRIC_PREFIX = "metric_"
21
18
 
19
+
22
20
  class PerfCalc(ABC): # Abstract base class for performance calculation
23
21
  @abstractmethod
24
22
  def get_experiment_name(self) -> str:
@@ -44,29 +42,32 @@ class PerfCalc(ABC): # Abstract base class for performance calculation
44
42
  """
45
43
  pass
46
44
 
47
- def valid_proc_extra_data(
48
- self, proc_extra_data
49
- ):
45
+ def valid_proc_extra_data(self, proc_extra_data):
50
46
  # make sure that all items in proc_extra_data are dictionaries, with same keys
51
47
  if proc_extra_data is None or len(proc_extra_data) == 0:
52
48
  return
53
49
  if not all(isinstance(item, dict) for item in proc_extra_data):
54
50
  raise TypeError("All items in proc_extra_data must be dictionaries")
55
51
 
56
- if not all(item.keys() == proc_extra_data[0].keys() for item in proc_extra_data):
57
- raise ValueError("All dictionaries in proc_extra_data must have the same keys")
52
+ if not all(
53
+ item.keys() == proc_extra_data[0].keys() for item in proc_extra_data
54
+ ):
55
+ raise ValueError(
56
+ "All dictionaries in proc_extra_data must have the same keys"
57
+ )
58
58
 
59
- def valid_proc_metric_raw_data(
60
- self, metric_names, proc_metric_raw_data
61
- ):
59
+ def valid_proc_metric_raw_data(self, metric_names, proc_metric_raw_data):
62
60
  # make sure that all items in proc_metric_raw_data are dictionaries, with same keys as metric_names
63
- assert isinstance(proc_metric_raw_data, list) and len(proc_metric_raw_data) > 0, \
64
- "raw_data_for_metrics must be a non-empty list of dictionaries"
61
+ assert (
62
+ isinstance(proc_metric_raw_data, list) and len(proc_metric_raw_data) > 0
63
+ ), "raw_data_for_metrics must be a non-empty list of dictionaries"
65
64
 
66
65
  # make sure that all items in proc_metric_raw_data are dictionaries with keys as metric_names
67
66
  if not all(isinstance(item, dict) for item in proc_metric_raw_data):
68
67
  raise TypeError("All items in raw_data_for_metrics must be dictionaries")
69
- if not all( set(item.keys()) == set(metric_names) for item in proc_metric_raw_data):
68
+ if not all(
69
+ set(item.keys()) == set(metric_names) for item in proc_metric_raw_data
70
+ ):
70
71
  raise ValueError(
71
72
  "All dictionaries in raw_data_for_metrics must have the same keys as metric_names"
72
73
  )
@@ -75,21 +76,30 @@ class PerfCalc(ABC): # Abstract base class for performance calculation
75
76
  def calc_exp_perf_metrics(
76
77
  self, metric_names, raw_metrics_data, extra_data=None, *args, **kwargs
77
78
  ):
78
- assert isinstance(raw_metrics_data, dict) or isinstance(raw_metrics_data, list), \
79
- "raw_data_for_metrics must be a dictionary or a list"
79
+ assert isinstance(raw_metrics_data, dict) or isinstance(
80
+ raw_metrics_data, list
81
+ ), "raw_data_for_metrics must be a dictionary or a list"
80
82
 
81
83
  if extra_data is not None:
82
- assert isinstance(extra_data, type(raw_metrics_data)), \
83
- "extra_data must be of the same type as raw_data_for_metrics (dict or list)"
84
+ assert isinstance(
85
+ extra_data, type(raw_metrics_data)
86
+ ), "extra_data must be of the same type as raw_data_for_metrics (dict or list)"
84
87
  # prepare raw_metric data for processing
85
- proc_metric_raw_data_ls = raw_metrics_data if isinstance(raw_metrics_data, list) else [raw_metrics_data.copy()]
88
+ proc_metric_raw_data_ls = (
89
+ raw_metrics_data
90
+ if isinstance(raw_metrics_data, list)
91
+ else [raw_metrics_data.copy()]
92
+ )
86
93
  self.valid_proc_metric_raw_data(metric_names, proc_metric_raw_data_ls)
87
94
  # prepare extra data for processing
88
95
  proc_extra_data_ls = []
89
96
  if extra_data is not None:
90
- proc_extra_data_ls = extra_data if isinstance(extra_data, list) else [extra_data.copy()]
91
- assert len(proc_extra_data_ls) == len(proc_metric_raw_data_ls), \
92
- "extra_data must have the same length as raw_data_for_metrics if it is a list"
97
+ proc_extra_data_ls = (
98
+ extra_data if isinstance(extra_data, list) else [extra_data.copy()]
99
+ )
100
+ assert len(proc_extra_data_ls) == len(
101
+ proc_metric_raw_data_ls
102
+ ), "extra_data must have the same length as raw_data_for_metrics if it is a list"
93
103
  # validate the extra_data
94
104
  self.valid_proc_extra_data(proc_extra_data_ls)
95
105
 
@@ -102,7 +112,7 @@ class PerfCalc(ABC): # Abstract base class for performance calculation
102
112
  "experiment": self.get_experiment_name(),
103
113
  }
104
114
  custom_fields = []
105
- if len(proc_extra_data_ls)> 0:
115
+ if len(proc_extra_data_ls) > 0:
106
116
  # add extra data to the output dictionary
107
117
  extra_data_item = proc_extra_data_ls[idx]
108
118
  out_dict.update(extra_data_item)
@@ -110,7 +120,9 @@ class PerfCalc(ABC): # Abstract base class for performance calculation
110
120
  metric_results = metrics_backend.calc_metrics(
111
121
  metrics_data_dict=raw_metrics_data, *args, **kwargs
112
122
  )
113
- metric_results_prefix = {f"metric_{k}": v for k, v in metric_results.items()}
123
+ metric_results_prefix = {
124
+ f"metric_{k}": v for k, v in metric_results.items()
125
+ }
114
126
  out_dict.update(metric_results_prefix)
115
127
  ordered_cols = (
116
128
  REQUIRED_COLS + custom_fields + list(metric_results_prefix.keys())
@@ -126,7 +138,7 @@ class PerfCalc(ABC): # Abstract base class for performance calculation
126
138
  #! outfile - if provided, will save the output to a CSV file with the given path
127
139
  #! outdir - if provided, will save the output to a CSV file in the given directory with a generated filename
128
140
  #! return_df - if True, will return a DataFrame instead of a dictionary
129
- def calc_and_save_exp_perfs(
141
+ def calc_perfs(
130
142
  self,
131
143
  raw_metrics_data: Union[List[dict], dict],
132
144
  extra_data: Optional[Union[List[dict], dict]] = None,
@@ -140,9 +152,11 @@ class PerfCalc(ABC): # Abstract base class for performance calculation
140
152
  """
141
153
  metric_names = self.get_metric_backend().metric_names
142
154
  out_dict_list = self.calc_exp_perf_metrics(
143
- metric_names=metric_names, raw_metrics_data=raw_metrics_data,
155
+ metric_names=metric_names,
156
+ raw_metrics_data=raw_metrics_data,
144
157
  extra_data=extra_data,
145
- *args, **kwargs
158
+ *args,
159
+ **kwargs,
146
160
  )
147
161
  csv_outfile = kwargs.get("outfile", None)
148
162
  if csv_outfile is not None:
@@ -176,13 +190,18 @@ class PerfCalc(ABC): # Abstract base class for performance calculation
176
190
  return "__perf.csv" in exp_file_name
177
191
 
178
192
  @classmethod
179
- def gen_perf_report_for_multip_exps(
180
- cls, indir: str, exp_csv_filter_fn=default_exp_csv_filter_fn, include_file_name=False, csv_sep=";"
193
+ def get_perftb_for_multi_exps(
194
+ cls,
195
+ indir: str,
196
+ exp_csv_filter_fn=default_exp_csv_filter_fn,
197
+ include_file_name=False,
198
+ csv_sep=";",
181
199
  ) -> PerfTB:
182
200
  """
183
201
  Generate a performance report by scanning experiment subdirectories.
184
202
  Must return a dictionary with keys as metric names and values as performance tables.
185
203
  """
204
+
186
205
  def get_df_for_all_exp_perf(csv_perf_files, csv_sep=";"):
187
206
  """
188
207
  Create a single DataFrame from all CSV files.
@@ -194,7 +213,9 @@ class PerfCalc(ABC): # Abstract base class for performance calculation
194
213
  for csv_file in csv_perf_files:
195
214
  temp_df = pd.read_csv(csv_file, sep=csv_sep)
196
215
  if FILE_NAME_COL:
197
- temp_df[FILE_NAME_COL] = fs.get_file_name(csv_file, split_file_ext=False)
216
+ temp_df[FILE_NAME_COL] = fs.get_file_name(
217
+ csv_file, split_file_ext=False
218
+ )
198
219
  # csvfile.fn_display_df(temp_df)
199
220
  temp_df_cols = temp_df.columns.tolist()
200
221
  for col in temp_df_cols:
@@ -205,7 +226,9 @@ class PerfCalc(ABC): # Abstract base class for performance calculation
205
226
  for csv_file in csv_perf_files:
206
227
  temp_df = pd.read_csv(csv_file, sep=csv_sep)
207
228
  if FILE_NAME_COL:
208
- temp_df[FILE_NAME_COL] = fs.get_file_name(csv_file, split_file_ext=False)
229
+ temp_df[FILE_NAME_COL] = fs.get_file_name(
230
+ csv_file, split_file_ext=False
231
+ )
209
232
  # Drop all-NA columns to avoid dtype inconsistency
210
233
  temp_df = temp_df.dropna(axis=1, how="all")
211
234
  # ensure all columns are present in the final DataFrame
@@ -215,7 +238,9 @@ class PerfCalc(ABC): # Abstract base class for performance calculation
215
238
  df = pd.concat([df, temp_df], ignore_index=True)
216
239
  # assert that REQUIRED_COLS are present in the DataFrame
217
240
  # pprint(df.columns.tolist())
218
- sticky_cols = REQUIRED_COLS + ([FILE_NAME_COL] if include_file_name else []) # columns that must always be present
241
+ sticky_cols = REQUIRED_COLS + (
242
+ [FILE_NAME_COL] if include_file_name else []
243
+ ) # columns that must always be present
219
244
  for col in sticky_cols:
220
245
  if col not in df.columns:
221
246
  raise ValueError(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: halib
3
- Version: 0.1.99
3
+ Version: 0.2.1
4
4
  Summary: Small library for common tasks
5
5
  Author: Hoang Van Ha
6
6
  Author-email: hoangvanhauit@gmail.com
@@ -53,6 +53,9 @@ Dynamic: summary
53
53
 
54
54
  # Helper package for coding and automation
55
55
 
56
+ **Version 0.2.01**
57
+ + `research/base_exp`: add `eval_exp` method to evaluate experiment (e.g., model evaluation on test set) after experiment running is done.
58
+
56
59
  **Version 0.1.99**
57
60
  + `filetype/ipynb`: add `gen_ipynb_name` generator to create file name based on current notebook name as prefix (with optional timestamp)
58
61
 
@@ -3,7 +3,6 @@ GDriveFolder.txt
3
3
  LICENSE.txt
4
4
  MANIFEST.in
5
5
  README.md
6
- guide_publish_pip.pdf
7
6
  setup.py
8
7
  halib/__init__.py
9
8
  halib/common.py
@@ -8,7 +8,7 @@ with open("requirements.txt") as f:
8
8
 
9
9
  setuptools.setup(
10
10
  name="halib",
11
- version="0.1.99",
11
+ version="0.2.01",
12
12
  author="Hoang Van Ha",
13
13
  author_email="hoangvanhauit@gmail.com",
14
14
  description="Small library for common tasks",
Binary file
@@ -1,100 +0,0 @@
1
- from abc import ABC, abstractmethod
2
-
3
- from ..research.base_config import ExpBaseConfig
4
- from ..research.perfcalc import PerfCalc
5
- from ..research.metrics import MetricsBackend
6
-
7
- # ! SEE https://github.com/hahv/base_exp for sample usage
8
- class BaseExperiment(PerfCalc, ABC):
9
- """
10
- Base class for experiments.
11
- Orchestrates the experiment pipeline using a pluggable metrics backend.
12
- """
13
-
14
- def __init__(self, config: ExpBaseConfig):
15
- self.config = config
16
- self.metric_backend = None
17
-
18
- # -----------------------
19
- # PerfCalc Required Methods
20
- # -----------------------
21
- def get_dataset_name(self):
22
- return self.config.get_dataset_cfg().get_name()
23
-
24
- def get_experiment_name(self):
25
- return self.config.get_cfg_name()
26
-
27
- def get_metric_backend(self):
28
- if not self.metric_backend:
29
- self.metric_backend = self.prepare_metrics(self.config.get_metric_cfg())
30
- return self.metric_backend
31
-
32
- # -----------------------
33
- # Abstract Experiment Steps
34
- # -----------------------
35
- @abstractmethod
36
- def init_general(self, general_cfg):
37
- """Setup general settings like SEED, logging, env variables."""
38
- pass
39
-
40
- @abstractmethod
41
- def prepare_dataset(self, dataset_cfg):
42
- """Load/prepare dataset."""
43
- pass
44
-
45
- @abstractmethod
46
- def prepare_metrics(self, metric_cfg) -> MetricsBackend:
47
- """
48
- Prepare the metrics for the experiment.
49
- This method should be implemented in subclasses.
50
- """
51
- pass
52
-
53
- @abstractmethod
54
- def exec_exp(self, *args, **kwargs):
55
- """Run experiment process, e.g.: training/evaluation loop.
56
- Return: raw_metrics_data, and extra_data as input for calc_and_save_exp_perfs
57
- """
58
- pass
59
-
60
- def eval_exp(self):
61
- """Optional: re-run evaluation from saved results."""
62
- pass
63
-
64
- # -----------------------
65
- # Main Experiment Runner
66
- # -----------------------
67
- def run_exp(self, do_calc_metrics=True, *args, **kwargs):
68
- """
69
- Run the whole experiment pipeline.
70
- Params:
71
- + 'outfile' to save csv file results,
72
- + 'outdir' to set output directory for experiment results.
73
- + 'return_df' to return a DataFrame of results instead of a dictionary.
74
-
75
- Full pipeline:
76
- 1. Init
77
- 2. Dataset
78
- 3. Metrics Preparation
79
- 4. Save Config
80
- 5. Execute
81
- 6. Calculate & Save Metrics
82
- """
83
- self.init_general(self.config.get_general_cfg())
84
- self.prepare_dataset(self.config.get_dataset_cfg())
85
- self.prepare_metrics(self.config.get_metric_cfg())
86
-
87
- # Save config before running
88
- self.config.save_to_outdir()
89
-
90
- # Execute experiment
91
- results = self.exec_exp(*args, **kwargs)
92
- if do_calc_metrics:
93
- metrics_data, extra_data = results
94
- # Calculate & Save metrics
95
- perf_results = self.calc_and_save_exp_perfs(
96
- raw_metrics_data=metrics_data, extra_data=extra_data, *args, **kwargs
97
- )
98
- return perf_results
99
- else:
100
- return results
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes