halib 0.1.55__tar.gz → 0.1.57__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {halib-0.1.55 → halib-0.1.57}/PKG-INFO +5 -1
  2. {halib-0.1.55 → halib-0.1.57}/README.md +4 -0
  3. halib-0.1.57/halib/research/perfcalc.py +314 -0
  4. {halib-0.1.55 → halib-0.1.57}/halib.egg-info/PKG-INFO +5 -1
  5. {halib-0.1.55 → halib-0.1.57}/halib.egg-info/SOURCES.txt +1 -0
  6. {halib-0.1.55 → halib-0.1.57}/setup.py +1 -1
  7. {halib-0.1.55 → halib-0.1.57}/.gitignore +0 -0
  8. {halib-0.1.55 → halib-0.1.57}/GDriveFolder.txt +0 -0
  9. {halib-0.1.55 → halib-0.1.57}/LICENSE.txt +0 -0
  10. {halib-0.1.55 → halib-0.1.57}/MANIFEST.in +0 -0
  11. {halib-0.1.55 → halib-0.1.57}/guide_publish_pip.pdf +0 -0
  12. {halib-0.1.55 → halib-0.1.57}/halib/__init__.py +0 -0
  13. {halib-0.1.55 → halib-0.1.57}/halib/common.py +0 -0
  14. {halib-0.1.55 → halib-0.1.57}/halib/cuda.py +0 -0
  15. {halib-0.1.55 → halib-0.1.57}/halib/filetype/__init__.py +0 -0
  16. {halib-0.1.55 → halib-0.1.57}/halib/filetype/csvfile.py +0 -0
  17. {halib-0.1.55 → halib-0.1.57}/halib/filetype/jsonfile.py +0 -0
  18. {halib-0.1.55 → halib-0.1.57}/halib/filetype/textfile.py +0 -0
  19. {halib-0.1.55 → halib-0.1.57}/halib/filetype/videofile.py +0 -0
  20. {halib-0.1.55 → halib-0.1.57}/halib/filetype/yamlfile.py +0 -0
  21. {halib-0.1.55 → halib-0.1.57}/halib/online/__init__.py +0 -0
  22. {halib-0.1.55 → halib-0.1.57}/halib/online/gdrive.py +0 -0
  23. {halib-0.1.55 → halib-0.1.57}/halib/online/gdrive_mkdir.py +0 -0
  24. {halib-0.1.55 → halib-0.1.57}/halib/online/gdrive_test.py +0 -0
  25. {halib-0.1.55 → halib-0.1.57}/halib/online/projectmake.py +0 -0
  26. {halib-0.1.55 → halib-0.1.57}/halib/research/__init__.py +0 -0
  27. {halib-0.1.55 → halib-0.1.57}/halib/research/dataset.py +0 -0
  28. {halib-0.1.55 → halib-0.1.57}/halib/research/perftb.py +0 -0
  29. {halib-0.1.55 → halib-0.1.57}/halib/research/plot.py +0 -0
  30. {halib-0.1.55 → halib-0.1.57}/halib/research/torchloader.py +0 -0
  31. {halib-0.1.55 → halib-0.1.57}/halib/research/wandb_op.py +0 -0
  32. {halib-0.1.55 → halib-0.1.57}/halib/rich_color.py +0 -0
  33. {halib-0.1.55 → halib-0.1.57}/halib/system/__init__.py +0 -0
  34. {halib-0.1.55 → halib-0.1.57}/halib/system/cmd.py +0 -0
  35. {halib-0.1.55 → halib-0.1.57}/halib/system/filesys.py +0 -0
  36. {halib-0.1.55 → halib-0.1.57}/halib/utils/__init__.py +0 -0
  37. {halib-0.1.55 → halib-0.1.57}/halib/utils/dataclass_util.py +0 -0
  38. {halib-0.1.55 → halib-0.1.57}/halib/utils/listop.py +0 -0
  39. {halib-0.1.55 → halib-0.1.57}/halib/utils/tele_noti.py +0 -0
  40. {halib-0.1.55 → halib-0.1.57}/halib.egg-info/dependency_links.txt +0 -0
  41. {halib-0.1.55 → halib-0.1.57}/halib.egg-info/requires.txt +0 -0
  42. {halib-0.1.55 → halib-0.1.57}/halib.egg-info/top_level.txt +0 -0
  43. {halib-0.1.55 → halib-0.1.57}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: halib
3
- Version: 0.1.55
3
+ Version: 0.1.57
4
4
  Summary: Small library for common tasks
5
5
  Author: Hoang Van Ha
6
6
  Author-email: hoangvanhauit@gmail.com
@@ -15,6 +15,10 @@ License-File: LICENSE.txt
15
15
 
16
16
  Helper package for coding and automation
17
17
 
18
+ **Version 0.1.57**
19
+
20
+ + add `util/perfcalc`: abstract class for performance calculation. This class need to be inherited and implemented with specific performance calculation logic.
21
+
18
22
  **Version 0.1.55**
19
23
 
20
24
  + add `util/dataclass_util` to help dynamically create `dataclass` classes from dictionary or YAML file, including support for nested dataclasses. From there, we can use `dataclass_wizard` to create a list of `dataclass` classes with the help from ChatGPT.
@@ -1,5 +1,9 @@
1
1
  Helper package for coding and automation
2
2
 
3
+ **Version 0.1.57**
4
+
5
+ + add `util/perfcalc`: abstract class for performance calculation. This class need to be inherited and implemented with specific performance calculation logic.
6
+
3
7
  **Version 0.1.55**
4
8
 
5
9
  + add `util/dataclass_util` to help dynamically create `dataclass` classes from dictionary or YAML file, including support for nested dataclasses. From there, we can use `dataclass_wizard` to create a list of `dataclass` classes with the help from ChatGPT.
@@ -0,0 +1,314 @@
1
+ import os
2
+ import glob
3
+ import inspect
4
+ import pandas as pd
5
+
6
+ from typing import Dict
7
+ from functools import wraps
8
+ from rich.pretty import pprint
9
+
10
+ from abc import ABC, abstractmethod
11
+
12
+ from ..filetype import csvfile
13
+ from ..common import now_str
14
+ from ..research.perftb import PerfTB
15
+
16
+ # try to import torch, and torchmetrics
17
+ try:
18
+ import torch
19
+ import torchmetrics
20
+ from torchmetrics import Metric
21
+ except ImportError:
22
+ raise ImportError("Please install torch and torchmetrics to use this module.")
23
+
24
+ def validate_torch_metrics(fn):
25
+ @wraps(fn)
26
+ def wrapper(self, *args, **kwargs):
27
+ result = fn(self, *args, **kwargs)
28
+
29
+ if not isinstance(result, dict):
30
+ raise TypeError("torch_metrics() must return a dictionary")
31
+
32
+ for k, v in result.items():
33
+ if not isinstance(k, str):
34
+ raise TypeError(f"Key '{k}' is not a string")
35
+ if not isinstance(v, Metric):
36
+ raise TypeError(
37
+ f"Value for key '{k}' is not a torchmetrics.Metric (got {type(v).__name__})"
38
+ )
39
+
40
+ return result
41
+
42
+ return wrapper
43
+ def valid_custom_fields(fn):
44
+ @wraps(fn)
45
+ def wrapper(self, *args, **kwargs):
46
+ rs = fn(self, *args, **kwargs)
47
+ if not isinstance(rs, tuple) or len(rs) != 2:
48
+ raise ValueError("Function must return a tuple (outdict, custom_fields)")
49
+ outdict, custom_fields = rs
50
+ if not isinstance(outdict, dict):
51
+ raise TypeError("Output must be a dictionary")
52
+ if not isinstance(custom_fields, list):
53
+ raise TypeError("Custom fields must be a list")
54
+ for field in custom_fields:
55
+ if not isinstance(field, str):
56
+ raise TypeError(f"Custom field '{field}' is not a string")
57
+ return outdict, custom_fields
58
+
59
+ return wrapper
60
+
61
+ REQUIRED_COLS = ["experiment", "dataset"]
62
+ CSV_FILE_POSTFIX = "__perf.csv"
63
+
64
+ class PerfCalc(ABC): # Abstract base class for performance calculation
65
+ @abstractmethod
66
+ def get_exp_torch_metrics(self):
67
+ """
68
+ Return a dictionary of torchmetrics to be used for performance calculation.
69
+ Example: {"accuracy": Accuracy(), "precision": Precision()}
70
+ """
71
+ pass
72
+
73
+ @abstractmethod
74
+ def get_dataset_name(self):
75
+ """
76
+ Return the name of the dataset.
77
+ This function should be overridden by the subclass if needed.
78
+ """
79
+ pass
80
+
81
+ @abstractmethod
82
+ def prepare_exp_data_for_metrics(self, metric_names, *args, **kwargs):
83
+ """
84
+ Prepare the data for metrics.
85
+ This function should be overridden by the subclass if needed.
86
+ Must return a dictionary with keys as metric names and values as the data to be used for those metrics.
87
+ NOTE: that the data (for each metric) must be in the format expected by the torchmetrics instance (for that metric). E.g: {"accuracy": {"preds": [...], "target": [...]}, ...} since torchmetrics expects the data in a specific format.
88
+ """
89
+ pass
90
+
91
+ @abstractmethod
92
+ def get_experiment_name(self):
93
+ """
94
+ Return the name of the experiment.
95
+ This function should be overridden by the subclass if needed.
96
+ """
97
+ pass
98
+
99
+ def calc_exp_outdict_custom_fields(self, outdict, *args, **kwargs):
100
+ """Can be overridden by the subclass to add custom fields to the output dictionary.
101
+ ! must return the modified outdict, and a ordered list of custom fields to be added to the output dictionary.
102
+ """
103
+ return outdict, []
104
+
105
+ #! custom kwargs:
106
+ #! outfile - if provided, will save the output to a CSV file with the given path
107
+ #! outdir - if provided, will save the output to a CSV file in the given directory with a generated filename
108
+ #! return_df - if True, will return a DataFrame instead of a dictionary
109
+
110
+ def calculate_exp_perf_metrics(self, *args, **kwargs):
111
+ """
112
+ Calculate the metrics.
113
+ This function should be overridden by the subclass if needed.
114
+ Must return a dictionary with keys as metric names and values as the calculated metrics.
115
+ """
116
+ metric_names = list(self.get_exp_torch_metrics().keys())
117
+ out_dict = {metric: None for metric in metric_names}
118
+ out_dict['dataset'] = self.get_dataset_name()
119
+ out_dict['experiment'] = self.get_experiment_name()
120
+ out_dict, custom_fields = self.calc_exp_outdict_custom_fields(
121
+ outdict=out_dict, *args, **kwargs
122
+ )
123
+ torch_metrics_dict = self.get_exp_torch_metrics()
124
+ all_metric_data = self.prepare_exp_data_for_metrics(
125
+ metric_names, *args, **kwargs
126
+ )
127
+ for metric in metric_names:
128
+ if metric not in all_metric_data:
129
+ raise ValueError(f"Metric '{metric}' not found in provided data.")
130
+ tmetric = torch_metrics_dict[metric] # torchmetrics instance
131
+ metric_data = all_metric_data[metric] # should be a dict of args/kwargs
132
+ # Inspect expected parameters for the metric's update() method
133
+ sig = inspect.signature(tmetric.update)
134
+ expected_args = list(sig.parameters.values())
135
+ # Prepare args in correct order
136
+ if isinstance(metric_data, dict):
137
+ # Match dict keys to parameter names
138
+ args = [metric_data[param.name] for param in expected_args]
139
+ elif isinstance(metric_data, (list, tuple)):
140
+ args = metric_data
141
+ else:
142
+ raise TypeError(f"Unsupported data format for metric '{metric}'")
143
+ # Call update and compute
144
+ tmetric.update(*args)
145
+ computed_value = tmetric.compute()
146
+ # ensure the computed value converted to a scala value or list array
147
+ if isinstance(computed_value, torch.Tensor):
148
+ if computed_value.numel() == 1:
149
+ computed_value = computed_value.item()
150
+ else:
151
+ computed_value = computed_value.tolist()
152
+ out_dict[metric] = computed_value
153
+
154
+ # check if any kwargs named "outfile"
155
+ csv_outfile = kwargs.get("outfile", None)
156
+ if csv_outfile is not None:
157
+ # get the file path without the extension
158
+ filePathNoExt, _ = os.path.splitext(csv_outfile)
159
+ # add the postfix to the file path
160
+ csvfilename = f"{now_str()}_{filePathNoExt}{CSV_FILE_POSTFIX}.csv"
161
+ csv_outfile = os.path.join(os.path.dirname(csv_outfile), csvfilename)
162
+ elif "outdir" in kwargs:
163
+ csvoutdir = kwargs["outdir"]
164
+ csvfilename = f"{now_str()}_{self.get_dataset_name()}_{self.get_experiment_name()}_{CSV_FILE_POSTFIX}.csv"
165
+ csv_outfile = os.path.join(csvoutdir, csvfilename)
166
+
167
+ # convert out_dict to a DataFrame
168
+ df = pd.DataFrame([out_dict])
169
+ ordered_cols = REQUIRED_COLS + custom_fields + metric_names
170
+ df = df[ordered_cols] # reorder columns
171
+
172
+ if csv_outfile:
173
+ df.to_csv(csv_outfile, index=False, sep=";", encoding="utf-8")
174
+ return_df = kwargs.get("return_df", False)
175
+ if return_df: # return DataFrame instead of dict if requested
176
+ return df, csv_outfile
177
+ else:
178
+ return out_dict, csv_outfile
179
+
180
+ @classmethod
181
+ def gen_perf_report_for_multip_exps(
182
+ cls, indir: str, exp_perf_csv_pattern="__perf", csv_sep=';'
183
+ ) -> PerfTB:
184
+ """
185
+ Generate a performance report by scanning experiment subdirectories.
186
+ Must return a dictionary with keys as metric names and values as performance tables.
187
+ """
188
+ def get_df_for_all_exp_perf(csv_perf_files, csv_sep=';'):
189
+ """
190
+ Create a single DataFrame from all CSV files.
191
+ Assumes all CSV files MAY have different metrics
192
+ """
193
+ cols = []
194
+ for csv_file in csv_perf_files:
195
+ temp_df = pd.read_csv(csv_file, sep=csv_sep)
196
+ temp_df_cols = temp_df.columns.tolist()
197
+ for col in temp_df_cols:
198
+ if col not in cols:
199
+ cols.append(col)
200
+ df = pd.DataFrame(columns=cols)
201
+ for csv_file in csv_perf_files:
202
+ temp_df = pd.read_csv(csv_file, sep=csv_sep)
203
+ # Drop all-NA columns to avoid dtype inconsistency
204
+ temp_df = temp_df.dropna(axis=1, how='all')
205
+ # ensure all columns are present in the final DataFrame
206
+ for col in cols:
207
+ if col not in temp_df.columns:
208
+ temp_df[col] = None # fill missing columns with None
209
+ df = pd.concat([df, temp_df], ignore_index=True)
210
+ # assert that REQUIRED_COLS are present in the DataFrame
211
+ pprint(df.columns.tolist())
212
+ for col in REQUIRED_COLS:
213
+ if col not in df.columns:
214
+ raise ValueError(f"Required column '{col}' is missing from the DataFrame. REQUIRED_COLS = {REQUIRED_COLS}")
215
+ metric_cols = [col for col in df.columns if col.startswith('metric_')]
216
+ assert len(metric_cols) > 0, "No metric columns found in the DataFrame. Ensure that the CSV files contain metric columns starting with 'metric_'."
217
+ final_cols = REQUIRED_COLS + metric_cols
218
+ df = df[final_cols]
219
+ # ! validate all rows in df before returning
220
+ # make sure all rows will have at least values for REQUIRED_COLS and at least one metric column
221
+ for index, row in df.iterrows():
222
+ if not all(col in row and pd.notna(row[col]) for col in REQUIRED_COLS):
223
+ raise ValueError(f"Row {index} is missing required columns or has NaN values in required columns: {row}")
224
+ if not any(pd.notna(row[col]) for col in metric_cols):
225
+ raise ValueError(f"Row {index} has no metric values: {row}")
226
+ # make sure these is no (experiment, dataset) pair that is duplicated
227
+ duplicates = df.duplicated(subset=['experiment', 'dataset'], keep=False)
228
+ if duplicates.any():
229
+ raise ValueError("Duplicate (experiment, dataset) pairs found in the DataFrame. Please ensure that each experiment-dataset combination is unique.")
230
+ return df
231
+
232
+ def mk_perftb_report(df):
233
+ """
234
+ Create a performance report table from the DataFrame.
235
+ This function should be customized based on the specific requirements of the report.
236
+ """
237
+ perftb = PerfTB()
238
+ # find all "dataset" values (unique)
239
+ dataset_names = list(df['dataset'].unique())
240
+ # find all columns that start with "metric_"
241
+ metric_cols = [col for col in df.columns if col.startswith('metric_')]
242
+
243
+ # Determine which metrics are associated with each dataset.
244
+ # Since a dataset may appear in multiple rows and may not include all metrics in each, identify the row with the same dataset that contains the most non-NaN metric values. The set of metrics for that dataset is defined by the non-NaN metrics in that row.
245
+
246
+ dataset_metrics = {}
247
+ for dataset_name in dataset_names:
248
+ dataset_rows = df[df["dataset"] == dataset_name]
249
+ # Find the row with the most non-NaN metric values
250
+ max_non_nan_row = dataset_rows[metric_cols].count(axis=1).idxmax()
251
+ metrics_for_dataset = dataset_rows.loc[max_non_nan_row, metric_cols].dropna().index.tolist()
252
+ dataset_metrics[dataset_name] = metrics_for_dataset
253
+
254
+ for dataset_name, metrics in dataset_metrics.items():
255
+ # Create a new row for the performance table
256
+ perftb.add_dataset(dataset_name, metrics)
257
+
258
+ for _, row in df.iterrows():
259
+ dataset_name = row['dataset']
260
+ ds_metrics = dataset_metrics.get(dataset_name)
261
+ if dataset_name in dataset_metrics:
262
+ # Add the metrics for this row to the performance table
263
+ exp_name = row.get('experiment')
264
+ exp_metric_values = {}
265
+ for metric in ds_metrics:
266
+ if metric in row and pd.notna(row[metric]):
267
+ exp_metric_values[metric] = row[metric]
268
+ perftb.add_experiment(
269
+ experiment_name=exp_name,
270
+ dataset_name=dataset_name,
271
+ metrics=exp_metric_values
272
+ )
273
+
274
+ return perftb
275
+
276
+ assert os.path.exists(indir), f"Input directory {indir} does not exist."
277
+
278
+ # Find experiment subdirectories
279
+ exp_dirs = [
280
+ os.path.join(indir, d)
281
+ for d in os.listdir(indir)
282
+ if os.path.isdir(os.path.join(indir, d))
283
+ ]
284
+ assert exp_dirs, f"No experiment directories found in {indir}."
285
+
286
+ # Collect all matching CSV files in those subdirs
287
+ csv_perf_files = []
288
+ for exp_dir in exp_dirs:
289
+ pprint(f"Searching in experiment directory: {exp_dir}")
290
+ matched = glob.glob(os.path.join(exp_dir, f"*{exp_perf_csv_pattern}*.csv"))
291
+
292
+ csv_perf_files.extend(matched)
293
+ assert len(csv_perf_files) > 0, f"No CSV files matching pattern '{exp_perf_csv_pattern}' found in the experiment directories."
294
+
295
+ all_exp_perf_df = get_df_for_all_exp_perf(csv_perf_files, csv_sep=csv_sep)
296
+ csvfile.fn_display_df(all_exp_perf_df)
297
+ perf_tb = mk_perftb_report(all_exp_perf_df)
298
+ return perf_tb
299
+
300
+
301
+ def main():
302
+ indir = "./zreport/test"
303
+ report_outfile = "./zreport/all.csv"
304
+ exp_perf_csv_pattern = "__perf"
305
+ csv_sep = ";"
306
+ perftb = PerfCalc.gen_perf_report_for_multip_exps(
307
+ indir, exp_perf_csv_pattern, csv_sep
308
+ )
309
+ perftb.to_csv(report_outfile, sep=csv_sep)
310
+ inspect(perftb)
311
+ perftb.plot(save_path="./zreport/test_csv.svg", open_plot=True)
312
+
313
+ if __name__ == "__main__":
314
+ main()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: halib
3
- Version: 0.1.55
3
+ Version: 0.1.57
4
4
  Summary: Small library for common tasks
5
5
  Author: Hoang Van Ha
6
6
  Author-email: hoangvanhauit@gmail.com
@@ -15,6 +15,10 @@ License-File: LICENSE.txt
15
15
 
16
16
  Helper package for coding and automation
17
17
 
18
+ **Version 0.1.57**
19
+
20
+ + add `util/perfcalc`: abstract class for performance calculation. This class need to be inherited and implemented with specific performance calculation logic.
21
+
18
22
  **Version 0.1.55**
19
23
 
20
24
  + add `util/dataclass_util` to help dynamically create `dataclass` classes from dictionary or YAML file, including support for nested dataclasses. From there, we can use `dataclass_wizard` to create a list of `dataclass` classes with the help from ChatGPT.
@@ -27,6 +27,7 @@ halib/online/gdrive_test.py
27
27
  halib/online/projectmake.py
28
28
  halib/research/__init__.py
29
29
  halib/research/dataset.py
30
+ halib/research/perfcalc.py
30
31
  halib/research/perftb.py
31
32
  halib/research/plot.py
32
33
  halib/research/torchloader.py
@@ -8,7 +8,7 @@ with open("requirements.txt") as f:
8
8
 
9
9
  setuptools.setup(
10
10
  name="halib",
11
- version="0.1.55",
11
+ version="0.1.57",
12
12
  author="Hoang Van Ha",
13
13
  author_email="hoangvanhauit@gmail.com",
14
14
  description="Small library for common tasks",
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes