halib 0.2.6__tar.gz → 0.2.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. {halib-0.2.6 → halib-0.2.8}/PKG-INFO +3 -2
  2. {halib-0.2.6 → halib-0.2.8}/README.md +2 -1
  3. {halib-0.2.6 → halib-0.2.8}/halib/exp/core/base_config.py +31 -8
  4. {halib-0.2.6 → halib-0.2.8}/halib/exp/perf/perfcalc.py +154 -77
  5. {halib-0.2.6 → halib-0.2.8}/halib.egg-info/PKG-INFO +3 -2
  6. {halib-0.2.6 → halib-0.2.8}/setup.py +1 -1
  7. {halib-0.2.6 → halib-0.2.8}/.gitignore +0 -0
  8. {halib-0.2.6 → halib-0.2.8}/GDriveFolder.txt +0 -0
  9. {halib-0.2.6 → halib-0.2.8}/LICENSE.txt +0 -0
  10. {halib-0.2.6 → halib-0.2.8}/MANIFEST.in +0 -0
  11. {halib-0.2.6 → halib-0.2.8}/halib/__init__.py +0 -0
  12. {halib-0.2.6 → halib-0.2.8}/halib/common/__init__.py +0 -0
  13. {halib-0.2.6 → halib-0.2.8}/halib/common/common.py +0 -0
  14. {halib-0.2.6 → halib-0.2.8}/halib/common/rich_color.py +0 -0
  15. {halib-0.2.6 → halib-0.2.8}/halib/exp/__init__.py +0 -0
  16. {halib-0.2.6 → halib-0.2.8}/halib/exp/core/__init__.py +0 -0
  17. {halib-0.2.6 → halib-0.2.8}/halib/exp/core/base_exp.py +0 -0
  18. {halib-0.2.6 → halib-0.2.8}/halib/exp/core/param_gen.py +0 -0
  19. {halib-0.2.6 → halib-0.2.8}/halib/exp/core/wandb_op.py +0 -0
  20. {halib-0.2.6 → halib-0.2.8}/halib/exp/data/__init__.py +0 -0
  21. {halib-0.2.6 → halib-0.2.8}/halib/exp/data/dataclass_util.py +0 -0
  22. {halib-0.2.6 → halib-0.2.8}/halib/exp/data/dataset.py +0 -0
  23. {halib-0.2.6 → halib-0.2.8}/halib/exp/data/torchloader.py +0 -0
  24. {halib-0.2.6 → halib-0.2.8}/halib/exp/perf/__init__.py +0 -0
  25. {halib-0.2.6 → halib-0.2.8}/halib/exp/perf/flop_calc.py +0 -0
  26. {halib-0.2.6 → halib-0.2.8}/halib/exp/perf/gpu_mon.py +0 -0
  27. {halib-0.2.6 → halib-0.2.8}/halib/exp/perf/perfmetrics.py +0 -0
  28. {halib-0.2.6 → halib-0.2.8}/halib/exp/perf/perftb.py +0 -0
  29. {halib-0.2.6 → halib-0.2.8}/halib/exp/perf/profiler.py +0 -0
  30. {halib-0.2.6 → halib-0.2.8}/halib/exp/viz/__init__.py +0 -0
  31. {halib-0.2.6 → halib-0.2.8}/halib/exp/viz/plot.py +0 -0
  32. {halib-0.2.6 → halib-0.2.8}/halib/filetype/__init__.py +0 -0
  33. {halib-0.2.6 → halib-0.2.8}/halib/filetype/csvfile.py +0 -0
  34. {halib-0.2.6 → halib-0.2.8}/halib/filetype/ipynb.py +0 -0
  35. {halib-0.2.6 → halib-0.2.8}/halib/filetype/jsonfile.py +0 -0
  36. {halib-0.2.6 → halib-0.2.8}/halib/filetype/textfile.py +0 -0
  37. {halib-0.2.6 → halib-0.2.8}/halib/filetype/videofile.py +0 -0
  38. {halib-0.2.6 → halib-0.2.8}/halib/filetype/yamlfile.py +0 -0
  39. {halib-0.2.6 → halib-0.2.8}/halib/online/__init__.py +0 -0
  40. {halib-0.2.6 → halib-0.2.8}/halib/online/gdrive.py +0 -0
  41. {halib-0.2.6 → halib-0.2.8}/halib/online/gdrive_mkdir.py +0 -0
  42. {halib-0.2.6 → halib-0.2.8}/halib/online/projectmake.py +0 -0
  43. {halib-0.2.6 → halib-0.2.8}/halib/online/tele_noti.py +0 -0
  44. {halib-0.2.6 → halib-0.2.8}/halib/system/__init__.py +0 -0
  45. {halib-0.2.6 → halib-0.2.8}/halib/system/cmd.py +0 -0
  46. {halib-0.2.6 → halib-0.2.8}/halib/system/filesys.py +0 -0
  47. {halib-0.2.6 → halib-0.2.8}/halib/system/path.py +0 -0
  48. {halib-0.2.6 → halib-0.2.8}/halib/utils/__init__.py +0 -0
  49. {halib-0.2.6 → halib-0.2.8}/halib/utils/dict.py +0 -0
  50. {halib-0.2.6 → halib-0.2.8}/halib/utils/list.py +0 -0
  51. {halib-0.2.6 → halib-0.2.8}/halib.egg-info/SOURCES.txt +0 -0
  52. {halib-0.2.6 → halib-0.2.8}/halib.egg-info/dependency_links.txt +0 -0
  53. {halib-0.2.6 → halib-0.2.8}/halib.egg-info/requires.txt +0 -0
  54. {halib-0.2.6 → halib-0.2.8}/halib.egg-info/top_level.txt +0 -0
  55. {halib-0.2.6 → halib-0.2.8}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: halib
3
- Version: 0.2.6
3
+ Version: 0.2.8
4
4
  Summary: Small library for common tasks
5
5
  Author: Hoang Van Ha
6
6
  Author-email: hoangvanhauit@gmail.com
@@ -53,8 +53,9 @@ Dynamic: summary
53
53
 
54
54
  # Helper package for coding and automation
55
55
 
56
- **Version 0.2.6**
56
+ **Version 0.2.8**
57
57
  + reorganize packages with most changes in `research` package; also rename `research` to `exp` (package for experiment management and utilities)
58
+ + update `exp/perfcalc.py` to allow save computed performance to csv file (without explicit calling method `calc_perfs`)
58
59
 
59
60
  **Version 0.2.1**
60
61
  + `research/base_exp`: add `eval_exp` method to evaluate experiment (e.g., model evaluation on test set) after experiment running is done.
@@ -1,7 +1,8 @@
1
1
  # Helper package for coding and automation
2
2
 
3
- **Version 0.2.6**
3
+ **Version 0.2.8**
4
4
  + reorganize packages with most changes in `research` package; also rename `research` to `exp` (package for experiment management and utilities)
5
+ + update `exp/perfcalc.py` to allow save computed performance to csv file (without explicit calling method `calc_perfs`)
5
6
 
6
7
  **Version 0.2.1**
7
8
  + `research/base_exp`: add `eval_exp` method to evaluate experiment (e.g., model evaluation on test set) after experiment running is done.
@@ -42,8 +42,10 @@ class AutoNamedCfg(YAMLWizard, NamedCfg):
42
42
  # or handled by the loader.
43
43
  pass
44
44
 
45
+
45
46
  T = TypeVar("T", bound=AutoNamedCfg)
46
47
 
48
+
47
49
  class BaseSelectorCfg(Generic[T]):
48
50
  """
49
51
  Base class to handle the logic of selecting an item from a list by name.
@@ -72,6 +74,8 @@ class ExpBaseCfg(ABC, YAMLWizard):
72
74
  3 - a method cfg
73
75
  """
74
76
 
77
+ cfg_name: Optional[str] = None
78
+
75
79
  # Save to yaml fil
76
80
  def save_to_outdir(
77
81
  self, filename: str = "__config.yaml", outdir=None, override: bool = False
@@ -103,13 +107,24 @@ class ExpBaseCfg(ABC, YAMLWizard):
103
107
  """Load a configuration from a custom YAML file."""
104
108
  pass
105
109
 
106
- @abstractmethod
107
- def get_cfg_name(self):
108
- """
109
- Get the name of the configuration.
110
- This method should be implemented in subclasses.
111
- """
112
- pass
110
+ def get_cfg_name(self, sep: str = "__", *args, **kwargs) -> str:
111
+ # auto get the config name from dataset, method, metric
112
+ # 2. Generate the canonical Config Name
113
+ name_parts = []
114
+ general_info = self.get_general_cfg().get_name()
115
+ dataset_info = self.get_dataset_cfg().get_name()
116
+ method_info = self.get_method_cfg().get_name()
117
+ name_parts = [
118
+ general_info,
119
+ f"ds_{dataset_info}",
120
+ f"mt_{method_info}",
121
+ ]
122
+ if "extra" in kwargs:
123
+ extra_info = kwargs["extra"]
124
+ assert isinstance(extra_info, str), "'extra' kwarg must be a string."
125
+ name_parts.append(extra_info)
126
+ self.cfg_name = sep.join(name_parts)
127
+ return self.cfg_name
113
128
 
114
129
  @abstractmethod
115
130
  def get_outdir(self):
@@ -120,7 +135,7 @@ class ExpBaseCfg(ABC, YAMLWizard):
120
135
  return None
121
136
 
122
137
  @abstractmethod
123
- def get_general_cfg(self):
138
+ def get_general_cfg(self) -> NamedCfg:
124
139
  """
125
140
  Get the general configuration like output directory, log settings, SEED, etc.
126
141
  This method should be implemented in subclasses.
@@ -135,6 +150,14 @@ class ExpBaseCfg(ABC, YAMLWizard):
135
150
  """
136
151
  pass
137
152
 
153
+ @abstractmethod
154
+ def get_method_cfg(self) -> NamedCfg:
155
+ """
156
+ Get the method configuration.
157
+ This method should be implemented in subclasses.
158
+ """
159
+ pass
160
+
138
161
  @abstractmethod
139
162
  def get_metric_cfg(self) -> NamedCfg:
140
163
  """
@@ -74,114 +74,191 @@ class PerfCalc(ABC): # Abstract base class for performance calculation
74
74
  "All dictionaries in raw_data_for_metrics must have the same keys as metric_names"
75
75
  )
76
76
 
77
- # ! only need to override this method if torchmetrics are not used
78
- def calc_exp_perf_metrics(
79
- self, metric_names, raw_metrics_data, extra_data=None, *args, **kwargs
80
- ):
81
- assert isinstance(raw_metrics_data, dict) or isinstance(
82
- raw_metrics_data, list
83
- ), "raw_data_for_metrics must be a dictionary or a list"
84
-
85
- if extra_data is not None:
86
- assert isinstance(
87
- extra_data, type(raw_metrics_data)
88
- ), "extra_data must be of the same type as raw_data_for_metrics (dict or list)"
89
- # prepare raw_metric data for processing
90
- proc_metric_raw_data_ls = (
91
- raw_metrics_data
92
- if isinstance(raw_metrics_data, list)
93
- else [raw_metrics_data.copy()]
94
- )
95
- self.valid_proc_metric_raw_data(metric_names, proc_metric_raw_data_ls)
96
- # prepare extra data for processing
97
- proc_extra_data_ls = []
98
- if extra_data is not None:
99
- proc_extra_data_ls = (
100
- extra_data if isinstance(extra_data, list) else [extra_data.copy()]
101
- )
102
- assert len(proc_extra_data_ls) == len(
103
- proc_metric_raw_data_ls
104
- ), "extra_data must have the same length as raw_data_for_metrics if it is a list"
105
- # validate the extra_data
106
- self.valid_proc_extra_data(proc_extra_data_ls)
77
+ # =========================================================================
78
+ # 1. Formatting Logic (Decoupled)
79
+ # =========================================================================
80
+ def package_metrics(
81
+ self,
82
+ metric_results_list: List[dict],
83
+ extra_data_list: Optional[List[dict]] = None,
84
+ ) -> List[OrderedDict]:
85
+ """
86
+ Pure formatting function.
87
+ Takes ALREADY CALCULATED metrics and formats them
88
+ (adds metadata, prefixes keys, ensures column order).
89
+ """
90
+ # Normalize extra_data to a list if provided
91
+ if extra_data_list is None:
92
+ extra_data_list = [{} for _ in range(len(metric_results_list))]
93
+ elif isinstance(extra_data_list, dict):
94
+ extra_data_list = [extra_data_list]
95
+
96
+ assert len(extra_data_list) == len(
97
+ metric_results_list
98
+ ), "Length mismatch: metrics vs extra_data"
107
99
 
108
- # calculate the metrics output results
109
- metrics_backend = self.get_metric_backend()
110
100
  proc_outdict_list = []
111
- for idx, raw_metrics_data in enumerate(proc_metric_raw_data_ls):
101
+
102
+ for metric_res, extra_item in zip(metric_results_list, extra_data_list):
103
+ # A. Base Metadata
112
104
  out_dict = {
113
105
  "dataset": self.get_dataset_name(),
114
106
  "experiment": self.get_experiment_name(),
115
107
  }
116
- custom_fields = []
117
- if len(proc_extra_data_ls) > 0:
118
- # add extra data to the output dictionary
119
- extra_data_item = proc_extra_data_ls[idx]
120
- out_dict.update(extra_data_item)
121
- custom_fields = list(extra_data_item.keys())
122
- metric_results = metrics_backend.calc_metrics(
123
- metrics_data_dict=raw_metrics_data, *args, **kwargs
124
- )
125
- metric_results_prefix = {
126
- f"metric_{k}": v for k, v in metric_results.items()
127
- }
128
- out_dict.update(metric_results_prefix)
129
- ordered_cols = (
130
- REQUIRED_COLS + custom_fields + list(metric_results_prefix.keys())
108
+
109
+ # B. Attach Extra Data
110
+ out_dict.update(extra_item)
111
+ custom_fields = list(extra_item.keys())
112
+
113
+ # C. Prefix Metric Keys (e.g., 'acc' -> 'metric_acc')
114
+ metric_results_prefixed = {f"metric_{k}": v for k, v in metric_res.items()}
115
+ out_dict.update(metric_results_prefixed)
116
+
117
+ # D. Order Columns
118
+ all_cols = (
119
+ REQUIRED_COLS + custom_fields + list(metric_results_prefixed.keys())
131
120
  )
132
- out_dict = OrderedDict(
133
- (col, out_dict[col]) for col in ordered_cols if col in out_dict
121
+ ordered_out = OrderedDict(
122
+ (col, out_dict[col]) for col in all_cols if col in out_dict
134
123
  )
135
- proc_outdict_list.append(out_dict)
124
+ proc_outdict_list.append(ordered_out)
136
125
 
137
126
  return proc_outdict_list
138
127
 
139
- #! custom kwargs:
140
- #! outfile - if provided, will save the output to a CSV file with the given path
141
- #! outdir - if provided, will save the output to a CSV file in the given directory with a generated filename
142
- #! return_df - if True, will return a DataFrame instead of a dictionary
143
- def calc_perfs(
128
+ # =========================================================================
129
+ # 2. Calculation Logic (The Coordinator)
130
+ # =========================================================================
131
+ def calc_exp_perf_metrics(
144
132
  self,
133
+ metric_names: List[str],
145
134
  raw_metrics_data: Union[List[dict], dict],
146
135
  extra_data: Optional[Union[List[dict], dict]] = None,
147
136
  *args,
148
137
  **kwargs,
149
- ) -> Tuple[Union[List[OrderedDict], pd.DataFrame], Optional[str]]:
138
+ ) -> List[OrderedDict]:
150
139
  """
151
- Calculate the metrics.
152
- This function should be overridden by the subclass if needed.
153
- Must return a dictionary with keys as metric names and values as the calculated metrics.
140
+ Full workflow: Validates raw data -> Calculates via Backend -> Packages results.
154
141
  """
155
- metric_names = self.get_metric_backend().metric_names
156
- out_dict_list = self.calc_exp_perf_metrics(
157
- metric_names=metric_names,
158
- raw_metrics_data=raw_metrics_data,
159
- extra_data=extra_data,
160
- *args,
161
- **kwargs,
142
+ # Prepare Raw Data
143
+ raw_data_ls = (
144
+ raw_metrics_data
145
+ if isinstance(raw_metrics_data, list)
146
+ else [raw_metrics_data]
162
147
  )
148
+ self.valid_proc_metric_raw_data(metric_names, raw_data_ls)
149
+
150
+ # Prepare Extra Data (Validation only)
151
+ extra_data_ls = None
152
+ if extra_data:
153
+ extra_data_ls = extra_data if isinstance(extra_data, list) else [extra_data]
154
+ self.valid_proc_extra_data(extra_data_ls)
155
+
156
+ # Calculate Metrics via Backend
157
+ metrics_backend = self.get_metric_backend()
158
+ calculated_results = []
159
+
160
+ for data_item in raw_data_ls:
161
+ res = metrics_backend.calc_metrics(
162
+ metrics_data_dict=data_item, *args, **kwargs
163
+ )
164
+ calculated_results.append(res)
165
+
166
+ # Delegate to Formatting
167
+ return self.package_metrics(calculated_results, extra_data_ls)
168
+
169
+ # =========================================================================
170
+ # 3. File Saving Logic (Decoupled)
171
+ # =========================================================================
172
+ def save_results_to_csv(
173
+ self, out_dict_list: List[OrderedDict], **kwargs
174
+ ) -> Tuple[pd.DataFrame, Optional[str]]:
175
+ """
176
+ Helper function to convert results to DataFrame and save to CSV.
177
+ """
163
178
  csv_outfile = kwargs.get("outfile", None)
179
+
180
+ # Determine Output Path
164
181
  if csv_outfile is not None:
165
182
  filePathNoExt, _ = os.path.splitext(csv_outfile)
166
- # pprint(f"CSV Outfile Path (No Ext): {filePathNoExt}")
167
183
  csv_outfile = f"{filePathNoExt}{CSV_FILE_POSTFIX}.csv"
168
184
  elif "outdir" in kwargs:
169
185
  csvoutdir = kwargs["outdir"]
170
186
  csvfilename = f"{now_str()}_{self.get_dataset_name()}_{self.get_experiment_name()}_{CSV_FILE_POSTFIX}.csv"
171
187
  csv_outfile = os.path.join(csvoutdir, csvfilename)
172
188
 
173
- # convert out_dict to a DataFrame
189
+ # Convert to DataFrame
174
190
  df = pd.DataFrame(out_dict_list)
175
- # get the orders of the columns as the orders or the keys in out_dict
176
- ordered_cols = list(out_dict_list[0].keys())
177
- df = df[ordered_cols] # reorder columns
191
+ if out_dict_list:
192
+ ordered_cols = list(out_dict_list[0].keys())
193
+ df = df[ordered_cols]
194
+
195
+ # Save to File
178
196
  if csv_outfile:
179
197
  df.to_csv(csv_outfile, index=False, sep=";", encoding="utf-8")
180
- return_df = kwargs.get("return_df", False)
181
- if return_df: # return DataFrame instead of dict if requested
182
- return df, csv_outfile
183
- else:
184
- return out_dict_list, csv_outfile
198
+
199
+ return df, csv_outfile
200
+
201
+ # =========================================================================
202
+ # 4. Public API: Standard Calculation
203
+ # raw_metrics_data: example: [{"preds": ..., "target": ...}, ...]
204
+ # =========================================================================
205
+ def calc_perfs(
206
+ self,
207
+ raw_metrics_data: Union[List[dict], dict],
208
+ extra_data: Optional[Union[List[dict], dict]] = None,
209
+ *args,
210
+ **kwargs,
211
+ ) -> Tuple[Union[List[OrderedDict], pd.DataFrame], Optional[str]]:
212
+ """
213
+ Standard use case: Calculate metrics AND save to CSV.
214
+ """
215
+ metric_names = self.get_metric_backend().metric_names
216
+
217
+ # 1. Calculate & Package
218
+ out_dict_list = self.calc_exp_perf_metrics(
219
+ metric_names=metric_names,
220
+ raw_metrics_data=raw_metrics_data,
221
+ extra_data=extra_data,
222
+ *args,
223
+ **kwargs,
224
+ )
225
+
226
+ # 2. Save
227
+ df, csv_outfile = self.save_results_to_csv(out_dict_list, **kwargs)
228
+
229
+ return (
230
+ (df, csv_outfile)
231
+ if kwargs.get("return_df", False)
232
+ else (out_dict_list, csv_outfile)
233
+ )
234
+
235
+ # =========================================================================
236
+ # 5. Public API: Manual / External Metrics (The Shortcut)
237
+ # =========================================================================
238
+ def save_computed_perfs(
239
+ self,
240
+ metrics_data: Union[List[dict], dict],
241
+ extra_data: Optional[Union[List[dict], dict]] = None,
242
+ **kwargs,
243
+ ) -> Tuple[Union[List[OrderedDict], pd.DataFrame], Optional[str]]:
244
+
245
+ # Ensure list format
246
+ if isinstance(metrics_data, dict):
247
+ metrics_data = [metrics_data]
248
+ if isinstance(extra_data, dict):
249
+ extra_data = [extra_data]
250
+
251
+ # 1. Package (Format)
252
+ formatted_list = self.package_metrics(metrics_data, extra_data)
253
+
254
+ # 2. Save
255
+ df, csv_outfile = self.save_results_to_csv(formatted_list, **kwargs)
256
+
257
+ return (
258
+ (df, csv_outfile)
259
+ if kwargs.get("return_df", False)
260
+ else (formatted_list, csv_outfile)
261
+ )
185
262
 
186
263
  @staticmethod
187
264
  def default_exp_csv_filter_fn(exp_file_name: str) -> bool:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: halib
3
- Version: 0.2.6
3
+ Version: 0.2.8
4
4
  Summary: Small library for common tasks
5
5
  Author: Hoang Van Ha
6
6
  Author-email: hoangvanhauit@gmail.com
@@ -53,8 +53,9 @@ Dynamic: summary
53
53
 
54
54
  # Helper package for coding and automation
55
55
 
56
- **Version 0.2.6**
56
+ **Version 0.2.8**
57
57
  + reorganize packages with most changes in `research` package; also rename `research` to `exp` (package for experiment management and utilities)
58
+ + update `exp/perfcalc.py` to allow save computed performance to csv file (without explicit calling method `calc_perfs`)
58
59
 
59
60
  **Version 0.2.1**
60
61
  + `research/base_exp`: add `eval_exp` method to evaluate experiment (e.g., model evaluation on test set) after experiment running is done.
@@ -8,7 +8,7 @@ with open("requirements.txt") as f:
8
8
 
9
9
  setuptools.setup(
10
10
  name="halib",
11
- version="0.2.6",
11
+ version="0.2.8",
12
12
  author="Hoang Van Ha",
13
13
  author_email="hoangvanhauit@gmail.com",
14
14
  description="Small library for common tasks",
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes