halib 0.2.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. halib/__init__.py +94 -0
  2. halib/common/__init__.py +0 -0
  3. halib/common/common.py +326 -0
  4. halib/common/rich_color.py +285 -0
  5. halib/common.py +151 -0
  6. halib/csvfile.py +48 -0
  7. halib/cuda.py +39 -0
  8. halib/dataset.py +209 -0
  9. halib/exp/__init__.py +0 -0
  10. halib/exp/core/__init__.py +0 -0
  11. halib/exp/core/base_config.py +167 -0
  12. halib/exp/core/base_exp.py +147 -0
  13. halib/exp/core/param_gen.py +170 -0
  14. halib/exp/core/wandb_op.py +117 -0
  15. halib/exp/data/__init__.py +0 -0
  16. halib/exp/data/dataclass_util.py +41 -0
  17. halib/exp/data/dataset.py +208 -0
  18. halib/exp/data/torchloader.py +165 -0
  19. halib/exp/perf/__init__.py +0 -0
  20. halib/exp/perf/flop_calc.py +190 -0
  21. halib/exp/perf/gpu_mon.py +58 -0
  22. halib/exp/perf/perfcalc.py +470 -0
  23. halib/exp/perf/perfmetrics.py +137 -0
  24. halib/exp/perf/perftb.py +778 -0
  25. halib/exp/perf/profiler.py +507 -0
  26. halib/exp/viz/__init__.py +0 -0
  27. halib/exp/viz/plot.py +754 -0
  28. halib/filesys.py +117 -0
  29. halib/filetype/__init__.py +0 -0
  30. halib/filetype/csvfile.py +192 -0
  31. halib/filetype/ipynb.py +61 -0
  32. halib/filetype/jsonfile.py +19 -0
  33. halib/filetype/textfile.py +12 -0
  34. halib/filetype/videofile.py +266 -0
  35. halib/filetype/yamlfile.py +87 -0
  36. halib/gdrive.py +179 -0
  37. halib/gdrive_mkdir.py +41 -0
  38. halib/gdrive_test.py +37 -0
  39. halib/jsonfile.py +22 -0
  40. halib/listop.py +13 -0
  41. halib/online/__init__.py +0 -0
  42. halib/online/gdrive.py +229 -0
  43. halib/online/gdrive_mkdir.py +53 -0
  44. halib/online/gdrive_test.py +50 -0
  45. halib/online/projectmake.py +131 -0
  46. halib/online/tele_noti.py +165 -0
  47. halib/plot.py +301 -0
  48. halib/projectmake.py +115 -0
  49. halib/research/__init__.py +0 -0
  50. halib/research/base_config.py +100 -0
  51. halib/research/base_exp.py +157 -0
  52. halib/research/benchquery.py +131 -0
  53. halib/research/core/__init__.py +0 -0
  54. halib/research/core/base_config.py +144 -0
  55. halib/research/core/base_exp.py +157 -0
  56. halib/research/core/param_gen.py +108 -0
  57. halib/research/core/wandb_op.py +117 -0
  58. halib/research/data/__init__.py +0 -0
  59. halib/research/data/dataclass_util.py +41 -0
  60. halib/research/data/dataset.py +208 -0
  61. halib/research/data/torchloader.py +165 -0
  62. halib/research/dataset.py +208 -0
  63. halib/research/flop_csv.py +34 -0
  64. halib/research/flops.py +156 -0
  65. halib/research/metrics.py +137 -0
  66. halib/research/mics.py +74 -0
  67. halib/research/params_gen.py +108 -0
  68. halib/research/perf/__init__.py +0 -0
  69. halib/research/perf/flop_calc.py +190 -0
  70. halib/research/perf/gpu_mon.py +58 -0
  71. halib/research/perf/perfcalc.py +363 -0
  72. halib/research/perf/perfmetrics.py +137 -0
  73. halib/research/perf/perftb.py +778 -0
  74. halib/research/perf/profiler.py +301 -0
  75. halib/research/perfcalc.py +361 -0
  76. halib/research/perftb.py +780 -0
  77. halib/research/plot.py +758 -0
  78. halib/research/profiler.py +300 -0
  79. halib/research/torchloader.py +162 -0
  80. halib/research/viz/__init__.py +0 -0
  81. halib/research/viz/plot.py +754 -0
  82. halib/research/wandb_op.py +116 -0
  83. halib/rich_color.py +285 -0
  84. halib/sys/__init__.py +0 -0
  85. halib/sys/cmd.py +8 -0
  86. halib/sys/filesys.py +124 -0
  87. halib/system/__init__.py +0 -0
  88. halib/system/_list_pc.csv +6 -0
  89. halib/system/cmd.py +8 -0
  90. halib/system/filesys.py +164 -0
  91. halib/system/path.py +106 -0
  92. halib/tele_noti.py +166 -0
  93. halib/textfile.py +13 -0
  94. halib/torchloader.py +162 -0
  95. halib/utils/__init__.py +0 -0
  96. halib/utils/dataclass_util.py +40 -0
  97. halib/utils/dict.py +317 -0
  98. halib/utils/dict_op.py +9 -0
  99. halib/utils/gpu_mon.py +58 -0
  100. halib/utils/list.py +17 -0
  101. halib/utils/listop.py +13 -0
  102. halib/utils/slack.py +86 -0
  103. halib/utils/tele_noti.py +166 -0
  104. halib/utils/video.py +82 -0
  105. halib/videofile.py +139 -0
  106. halib-0.2.30.dist-info/METADATA +237 -0
  107. halib-0.2.30.dist-info/RECORD +110 -0
  108. halib-0.2.30.dist-info/WHEEL +5 -0
  109. halib-0.2.30.dist-info/licenses/LICENSE.txt +17 -0
  110. halib-0.2.30.dist-info/top_level.txt +1 -0
@@ -0,0 +1,165 @@
1
+ """
2
+ * @author Hoang Van-Ha
3
+ * @email hoangvanhauit@gmail.com
4
+ * @create date 2024-03-27 15:40:22
5
+ * @modify date 2024-03-27 15:40:22
6
+ * @desc this module works as a utility tools for finding the best configuration for dataloader (num_workers, batch_size, pin_menory, etc.) that fits your hardware.
7
+ """
8
+ from argparse import ArgumentParser
9
+
10
+ import os
11
+ import time
12
+ import traceback
13
+
14
+ from tqdm import tqdm
15
+ from rich import inspect
16
+ from typing import Union
17
+ import itertools as it # for cartesian product
18
+
19
+ from torch.utils.data import DataLoader
20
+ from torchvision import datasets, transforms
21
+
22
+ from ...common.common import *
23
+ from ...filetype import csvfile
24
+ from ...filetype.yamlfile import load_yaml
25
+
26
+ def parse_args():
27
+ parser = ArgumentParser(description="desc text")
28
+ parser.add_argument("-cfg", "--cfg", type=str, help="cfg file for searching")
29
+ return parser.parse_args()
30
+
31
+
32
+ def get_test_range(cfg: dict, search_item="num_workers"):
33
+ item_search_cfg = cfg["search_space"].get(search_item, None)
34
+ if item_search_cfg is None:
35
+ raise ValueError(f"search_item: {search_item} not found in cfg")
36
+ if isinstance(item_search_cfg, list):
37
+ return item_search_cfg
38
+ elif isinstance(item_search_cfg, dict):
39
+ if "mode" in item_search_cfg:
40
+ mode = item_search_cfg["mode"]
41
+ assert mode in ["range", "list"], f"mode: {mode} not supported"
42
+ value_in_mode = item_search_cfg.get(mode, None)
43
+ if value_in_mode is None:
44
+ raise ValueError(f"mode<{mode}>: data not found in <{search_item}>")
45
+ if mode == "range":
46
+ assert len(value_in_mode) == 3, f"range must have 3 values: start, stop, step"
47
+ start = value_in_mode[0]
48
+ stop = value_in_mode[1]
49
+ step = value_in_mode[2]
50
+ return list(range(start, stop, step))
51
+ elif mode == "list":
52
+ return item_search_cfg["list"]
53
+ else:
54
+ return [item_search_cfg] # for int, float, str, bool, etc.
55
+
56
+
57
+ def load_an_batch(loader_iter):
58
+ start = time.time()
59
+ next(loader_iter)
60
+ end = time.time()
61
+ return end - start
62
+
63
+
64
+ def test_dataloader_with_cfg(origin_dataloader: DataLoader, cfg: Union[dict, str]):
65
+ try:
66
+ if isinstance(cfg, str):
67
+ cfg = load_yaml(cfg, to_dict=True)
68
+ dfmk = csvfile.DFCreator()
69
+ search_items = ["batch_size", "num_workers", "persistent_workers", "pin_memory"]
70
+ batch_limit = cfg["general"]["batch_limit"]
71
+ csv_cfg = cfg["general"]["to_csv"]
72
+ log_batch_info = cfg["general"]["log_batch_info"]
73
+
74
+ save_to_csv = csv_cfg["enabled"]
75
+ log_dir = csv_cfg["log_dir"]
76
+ filename = csv_cfg["filename"]
77
+ filename = f"{now_str()}_{filename}.csv"
78
+ outfile = os.path.join(log_dir, filename)
79
+
80
+ dfmk.create_table(
81
+ "cfg_search",
82
+ (search_items + ["avg_time_taken"]),
83
+ )
84
+ ls_range_test = []
85
+ for item in search_items:
86
+ range_test = get_test_range(cfg, search_item=item)
87
+ range_test = [(item, i) for i in range_test]
88
+ ls_range_test.append(range_test)
89
+
90
+ all_combinations = list(it.product(*ls_range_test))
91
+
92
+ rows = []
93
+ for cfg_idx, combine in enumerate(all_combinations):
94
+ console.rule(f"Testing cfg {cfg_idx+1}/{len(all_combinations)}")
95
+ inspect(combine)
96
+ batch_size = combine[search_items.index("batch_size")][1]
97
+ num_workers = combine[search_items.index("num_workers")][1]
98
+ persistent_workers = combine[search_items.index("persistent_workers")][1]
99
+ pin_memory = combine[search_items.index("pin_memory")][1]
100
+
101
+ test_dataloader = DataLoader(origin_dataloader.dataset, batch_size=batch_size, num_workers=num_workers, persistent_workers=persistent_workers, pin_memory=pin_memory, shuffle=True)
102
+ row = [
103
+ batch_size,
104
+ num_workers,
105
+ persistent_workers,
106
+ pin_memory,
107
+ 0.0,
108
+ ]
109
+
110
+ # calculate the avg time taken to load the data for <batch_limit> batches
111
+ trainiter = iter(test_dataloader)
112
+ time_elapsed = 0
113
+ pprint('Start testing...')
114
+ for i in tqdm(range(batch_limit)):
115
+ single_batch_time = load_an_batch(trainiter)
116
+ if log_batch_info:
117
+ pprint(f"Batch {i+1} took {single_batch_time:.4f} seconds to load")
118
+ time_elapsed += single_batch_time
119
+ row[-1] = time_elapsed / batch_limit
120
+ rows.append(row)
121
+ dfmk.insert_rows('cfg_search', rows)
122
+ dfmk.fill_table_from_row_pool('cfg_search')
123
+ with ConsoleLog("results"):
124
+ csvfile.fn_display_df(dfmk['cfg_search'])
125
+ if save_to_csv:
126
+ dfmk["cfg_search"].to_csv(outfile, index=False)
127
+ console.print(f"[red] Data saved to <{outfile}> [/red]")
128
+
129
+ except Exception as e:
130
+ traceback.print_exc()
131
+ print(e)
132
+ # get current directory of this python file
133
+ current_dir = os.path.dirname(os.path.realpath(__file__))
134
+ standar_cfg_path = os.path.join(current_dir, "torchloader_search.yaml")
135
+ pprint(
136
+ f"Make sure you get the right <cfg.yaml> file. An example of <cfg.yaml> file can be found at this path: {standar_cfg_path}"
137
+ )
138
+ return
139
+
140
+ def main():
141
+ args = parse_args()
142
+ cfg_yaml = args.cfg
143
+ cfg_dict = load_yaml(cfg_yaml, to_dict=True)
144
+
145
+ # Define transforms for data augmentation and normalization
146
+ transform = transforms.Compose(
147
+ [
148
+ transforms.RandomHorizontalFlip(), # Randomly flip images horizontally
149
+ transforms.RandomRotation(10), # Randomly rotate images by 10 degrees
150
+ transforms.ToTensor(), # Convert images to PyTorch tensors
151
+ transforms.Normalize(
152
+ (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)
153
+ ), # Normalize pixel values to [-1, 1]
154
+ ]
155
+ )
156
+ test_dataset = datasets.CIFAR10(
157
+ root="./data", train=False, download=True, transform=transform
158
+ )
159
+ batch_size = 64
160
+ train_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
161
+ test_dataloader_with_cfg(train_loader, cfg_dict)
162
+
163
+
164
+ if __name__ == "__main__":
165
+ main()
File without changes
@@ -0,0 +1,190 @@
1
+ import os
2
+ import sys
3
+ import torch
4
+ import timm
5
+ from argparse import ArgumentParser
6
+ from fvcore.nn import FlopCountAnalysis
7
+ from halib import *
8
+ from halib.filetype import csvfile
9
+ from curriculum.utils.config import *
10
+ from curriculum.utils.model_helper import *
11
+
12
+
13
+ # ---------------------------------------------------------------------
14
+ # Argument Parser
15
+ # ---------------------------------------------------------------------
16
+ def parse_args():
17
+ parser = ArgumentParser(description="Calculate FLOPs for TIMM or trained models")
18
+
19
+ # Option 1: Direct TIMM model
20
+ parser.add_argument(
21
+ "--model_name", type=str, help="TIMM model name (e.g., efficientnet_b0)"
22
+ )
23
+ parser.add_argument(
24
+ "--num_classes", type=int, default=1000, help="Number of output classes"
25
+ )
26
+
27
+ # Option 2: Experiment directory
28
+ parser.add_argument(
29
+ "--indir",
30
+ type=str,
31
+ default=None,
32
+ help="Directory containing trained experiment (with .yaml and .pth)",
33
+ )
34
+ parser.add_argument(
35
+ "-o", "--o", action="store_true", help="Open output CSV after saving"
36
+ )
37
+ return parser.parse_args()
38
+
39
+
40
+ # ---------------------------------------------------------------------
41
+ # Helper Functions
42
+ # ---------------------------------------------------------------------
43
+ def _get_list_of_proc_dirs(indir):
44
+ assert os.path.exists(indir), f"Input directory {indir} does not exist."
45
+ pth_files = [f for f in os.listdir(indir) if f.endswith(".pth")]
46
+ if len(pth_files) > 0:
47
+ return [indir]
48
+ return [
49
+ os.path.join(indir, f)
50
+ for f in os.listdir(indir)
51
+ if os.path.isdir(os.path.join(indir, f))
52
+ ]
53
+
54
+
55
+ def _calculate_flops_for_model(model_name, num_classes):
56
+ """Calculate FLOPs for a plain TIMM model."""
57
+ try:
58
+ model = timm.create_model(model_name, pretrained=False, num_classes=num_classes)
59
+ input_size = timm.data.resolve_data_config(model.default_cfg)["input_size"]
60
+ dummy_input = torch.randn(1, *input_size)
61
+ model.eval() # ! set to eval mode to avoid some warnings or errors
62
+ flops = FlopCountAnalysis(model, dummy_input)
63
+ gflops = flops.total() / 1e9
64
+ mflops = flops.total() / 1e6
65
+ print(f"\nModel: **{model_name}**, Classes: {num_classes}")
66
+ print(f"Input size: {input_size}, FLOPs: **{gflops:.3f} GFLOPs**, **{mflops:.3f} MFLOPs**\n")
67
+ return model_name, gflops, mflops
68
+ except Exception as e:
69
+ print(f"[Error] Could not calculate FLOPs for {model_name}: {e}")
70
+ return model_name, -1, -1
71
+
72
+
73
+ def _calculate_flops_for_experiment(exp_dir):
74
+ """Calculate FLOPs for a trained experiment directory."""
75
+ yaml_files = [f for f in os.listdir(exp_dir) if f.endswith(".yaml")]
76
+ pth_files = [f for f in os.listdir(exp_dir) if f.endswith(".pth")]
77
+
78
+ assert (
79
+ len(yaml_files) == 1
80
+ ), f"Expected 1 YAML file in {exp_dir}, found {len(yaml_files)}"
81
+ assert (
82
+ len(pth_files) == 1
83
+ ), f"Expected 1 PTH file in {exp_dir}, found {len(pth_files)}"
84
+
85
+ exp_cfg_yaml = os.path.join(exp_dir, yaml_files[0])
86
+ cfg = ExpConfig.from_yaml(exp_cfg_yaml)
87
+ ds_label_list = cfg.dataset.get_label_list()
88
+
89
+ try:
90
+ model = build_model(
91
+ cfg.model.name, num_classes=len(ds_label_list), pretrained=True
92
+ )
93
+ model_weights_path = os.path.join(exp_dir, pth_files[0])
94
+ model.load_state_dict(torch.load(model_weights_path, map_location="cpu"))
95
+ model.eval()
96
+
97
+ input_size = timm.data.resolve_data_config(model.default_cfg)["input_size"]
98
+ dummy_input = torch.randn(1, *input_size)
99
+ flops = FlopCountAnalysis(model, dummy_input)
100
+ gflops = flops.total() / 1e9
101
+ mflops = flops.total() / 1e6
102
+
103
+ return str(cfg), cfg.model.name, gflops, mflops
104
+ except Exception as e:
105
+ console.print(f"[red] Error processing {exp_dir}: {e}[/red]")
106
+ return str(cfg), cfg.model.name, -1, -1
107
+
108
+
109
+ # ---------------------------------------------------------------------
110
+ # Main Entry
111
+ # ---------------------------------------------------------------------
112
+ def main():
113
+ args = parse_args()
114
+
115
+ # Case 1: Direct TIMM model input
116
+ if args.model_name:
117
+ _calculate_flops_for_model(args.model_name, args.num_classes)
118
+ return
119
+
120
+ # Case 2: Experiment directory input
121
+ if args.indir is None:
122
+ print("[Error] Either --model_name or --indir must be specified.")
123
+ return
124
+
125
+ proc_dirs = _get_list_of_proc_dirs(args.indir)
126
+ pprint(proc_dirs)
127
+
128
+ dfmk = csvfile.DFCreator()
129
+ TABLE_NAME = "model_flops_results"
130
+ dfmk.create_table(TABLE_NAME, ["exp_name", "model_name", "gflops", "mflops"])
131
+
132
+ console.rule(f"Calculating FLOPs for models in {len(proc_dirs)} dir(s)...")
133
+ rows = []
134
+ for exp_dir in tqdm(proc_dirs):
135
+ dir_name = os.path.basename(exp_dir)
136
+ console.rule(f"{dir_name}")
137
+ exp_name, model_name, gflops, mflops = _calculate_flops_for_experiment(exp_dir)
138
+ rows.append([exp_name, model_name, gflops, mflops])
139
+
140
+ dfmk.insert_rows(TABLE_NAME, rows)
141
+ dfmk.fill_table_from_row_pool(TABLE_NAME)
142
+
143
+ outfile = f"zout/zreport/{now_str()}_model_flops_results.csv"
144
+ dfmk[TABLE_NAME].to_csv(outfile, sep=";", index=False)
145
+ csvfile.fn_display_df(dfmk[TABLE_NAME])
146
+
147
+ if args.o:
148
+ os.system(f"start {outfile}")
149
+
150
+
151
+ # ---------------------------------------------------------------------
152
+ # Script Entry
153
+ # ---------------------------------------------------------------------
154
+ # flop_csv.py
155
+ # if __name__ == "__main__":
156
+ # sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
157
+ # main()
158
+
159
+
160
+ # def main():
161
+ # csv_file = "./results-imagenet.csv"
162
+ # df = pd.read_csv(csv_file)
163
+ # # make param_count column as float
164
+ # # df['param_count'] = df['param_count'].astype(float)
165
+ # df["param_count"] = (
166
+ # pd.to_numeric(df["param_count"], errors="coerce").fillna(99999).astype(float)
167
+ # )
168
+ # df = df[df["param_count"] < 5.0] # filter models with param_count < 20M
169
+
170
+ # dict_ls = []
171
+
172
+ # for index, row in tqdm(df.iterrows()):
173
+ # console.rule(f"Row {index+1}/{len(df)}")
174
+ # model = row["model"]
175
+ # num_class = 2
176
+ # _, _, mflops = _calculate_flops_for_model(model, num_class)
177
+ # dict_ls.append(
178
+ # {"model": model, "param_count": row["param_count"], "mflops": mflops}
179
+ # )
180
+
181
+ # # Create a DataFrame from the list of dictionaries
182
+ # result_df = pd.DataFrame(dict_ls)
183
+
184
+ # final_df = pd.merge(df, result_df, on=["model", "param_count"])
185
+ # final_df.sort_values(by="mflops", inplace=True, ascending=True)
186
+ # csvfile.fn_display_df(final_df)
187
+
188
+
189
+ # if __name__ == "__main__":
190
+ # main()
@@ -0,0 +1,58 @@
1
+ # install `pynvml_utils` package first
2
+ # see this repo: https://github.com/gpuopenanalytics/pynvml
3
+ from pynvml_utils import nvidia_smi
4
+ import time
5
+ import threading
6
+ from rich.pretty import pprint
7
+
8
+ class GPUMonitor:
9
+ def __init__(self, gpu_index=0, interval=0.01):
10
+ self.nvsmi = nvidia_smi.getInstance()
11
+ self.gpu_index = gpu_index
12
+ self.interval = interval
13
+ self.gpu_stats = []
14
+ self._running = False
15
+ self._thread = None
16
+
17
+ def _monitor(self):
18
+ while self._running:
19
+ stats = self.nvsmi.DeviceQuery("power.draw, memory.used")["gpu"][
20
+ self.gpu_index
21
+ ]
22
+ # pprint(stats)
23
+ self.gpu_stats.append(
24
+ {
25
+ "power": stats["power_readings"]["power_draw"],
26
+ "power_unit": stats["power_readings"]["unit"],
27
+ "memory": stats["fb_memory_usage"]["used"],
28
+ "memory_unit": stats["fb_memory_usage"]["unit"],
29
+ }
30
+ )
31
+ time.sleep(self.interval)
32
+
33
+ def start(self):
34
+ if not self._running:
35
+ self._running = True
36
+ # clear previous stats
37
+ self.gpu_stats.clear()
38
+ self._thread = threading.Thread(target=self._monitor)
39
+ self._thread.start()
40
+
41
+ def stop(self):
42
+ if self._running:
43
+ self._running = False
44
+ self._thread.join()
45
+ # clear the thread reference
46
+ self._thread = None
47
+
48
+ def get_stats(self):
49
+ ## return self.gpu_stats
50
+ assert self._running is False, "GPU monitor is still running. Stop it first."
51
+
52
+ powers = [s["power"] for s in self.gpu_stats if s["power"] is not None]
53
+ memories = [s["memory"] for s in self.gpu_stats if s["memory"] is not None]
54
+ avg_power = sum(powers) / len(powers) if powers else 0
55
+ max_memory = max(memories) if memories else 0
56
+ # power_unit = self.gpu_stats[0]["power_unit"] if self.gpu_stats else "W"
57
+ # memory_unit = self.gpu_stats[0]["memory_unit"] if self.gpu_stats else "MiB"
58
+ return {"gpu_avg_power": avg_power, "gpu_avg_max_memory": max_memory}