halib 0.1.91__py3-none-any.whl → 0.2.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- halib/__init__.py +12 -6
- halib/common/__init__.py +0 -0
- halib/common/common.py +207 -0
- halib/common/rich_color.py +285 -0
- halib/common.py +53 -10
- halib/exp/__init__.py +0 -0
- halib/exp/core/__init__.py +0 -0
- halib/exp/core/base_config.py +167 -0
- halib/exp/core/base_exp.py +147 -0
- halib/exp/core/param_gen.py +189 -0
- halib/exp/core/wandb_op.py +117 -0
- halib/exp/data/__init__.py +0 -0
- halib/exp/data/dataclass_util.py +41 -0
- halib/exp/data/dataset.py +208 -0
- halib/exp/data/torchloader.py +165 -0
- halib/exp/perf/__init__.py +0 -0
- halib/exp/perf/flop_calc.py +190 -0
- halib/exp/perf/gpu_mon.py +58 -0
- halib/exp/perf/perfcalc.py +440 -0
- halib/exp/perf/perfmetrics.py +137 -0
- halib/exp/perf/perftb.py +778 -0
- halib/exp/perf/profiler.py +507 -0
- halib/exp/viz/__init__.py +0 -0
- halib/exp/viz/plot.py +754 -0
- halib/filetype/csvfile.py +3 -9
- halib/filetype/ipynb.py +61 -0
- halib/filetype/jsonfile.py +0 -3
- halib/filetype/textfile.py +0 -1
- halib/filetype/videofile.py +119 -3
- halib/filetype/yamlfile.py +16 -1
- halib/online/projectmake.py +7 -6
- halib/online/tele_noti.py +165 -0
- halib/research/base_exp.py +75 -18
- halib/research/core/__init__.py +0 -0
- halib/research/core/base_config.py +144 -0
- halib/research/core/base_exp.py +157 -0
- halib/research/core/param_gen.py +108 -0
- halib/research/core/wandb_op.py +117 -0
- halib/research/data/__init__.py +0 -0
- halib/research/data/dataclass_util.py +41 -0
- halib/research/data/dataset.py +208 -0
- halib/research/data/torchloader.py +165 -0
- halib/research/dataset.py +6 -7
- halib/research/flop_csv.py +34 -0
- halib/research/flops.py +156 -0
- halib/research/metrics.py +4 -0
- halib/research/mics.py +59 -1
- halib/research/perf/__init__.py +0 -0
- halib/research/perf/flop_calc.py +190 -0
- halib/research/perf/gpu_mon.py +58 -0
- halib/research/perf/perfcalc.py +363 -0
- halib/research/perf/perfmetrics.py +137 -0
- halib/research/perf/perftb.py +778 -0
- halib/research/perf/profiler.py +301 -0
- halib/research/perfcalc.py +60 -35
- halib/research/perftb.py +2 -1
- halib/research/plot.py +480 -218
- halib/research/viz/__init__.py +0 -0
- halib/research/viz/plot.py +754 -0
- halib/system/_list_pc.csv +6 -0
- halib/system/filesys.py +60 -20
- halib/system/path.py +106 -0
- halib/utils/dict.py +9 -0
- halib/utils/list.py +12 -0
- halib/utils/video.py +6 -0
- halib-0.2.21.dist-info/METADATA +192 -0
- halib-0.2.21.dist-info/RECORD +109 -0
- halib-0.1.91.dist-info/METADATA +0 -201
- halib-0.1.91.dist-info/RECORD +0 -61
- {halib-0.1.91.dist-info → halib-0.2.21.dist-info}/WHEEL +0 -0
- {halib-0.1.91.dist-info → halib-0.2.21.dist-info}/licenses/LICENSE.txt +0 -0
- {halib-0.1.91.dist-info → halib-0.2.21.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import time
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from pprint import pprint
|
|
7
|
+
from threading import Lock
|
|
8
|
+
from loguru import logger
|
|
9
|
+
|
|
10
|
+
from plotly.subplots import make_subplots
|
|
11
|
+
import plotly.graph_objects as go
|
|
12
|
+
import plotly.express as px # for dynamic color scales
|
|
13
|
+
|
|
14
|
+
from ...common.common import ConsoleLog
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class zProfiler:
|
|
18
|
+
"""A singleton profiler to measure execution time of contexts and steps.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
interval_report (int): Frequency of periodic reports (0 to disable).
|
|
22
|
+
stop_to_view (bool): Pause execution to view reports if True (only in debug mode).
|
|
23
|
+
output_file (str): Path to save the profiling report.
|
|
24
|
+
report_format (str): Output format for reports ("json" or "csv").
|
|
25
|
+
|
|
26
|
+
Example:
|
|
27
|
+
prof = zProfiler()
|
|
28
|
+
prof.ctx_start("my_context")
|
|
29
|
+
prof.step_start("my_context", "step1")
|
|
30
|
+
time.sleep(0.1)
|
|
31
|
+
prof.step_end("my_context", "step1")
|
|
32
|
+
prof.ctx_end("my_context")
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
_instance = None
|
|
36
|
+
_lock = Lock()
|
|
37
|
+
|
|
38
|
+
def __new__(cls, *args, **kwargs):
|
|
39
|
+
with cls._lock:
|
|
40
|
+
if cls._instance is None:
|
|
41
|
+
cls._instance = super().__new__(cls)
|
|
42
|
+
return cls._instance
|
|
43
|
+
|
|
44
|
+
def __init__(
|
|
45
|
+
self,
|
|
46
|
+
):
|
|
47
|
+
if not hasattr(self, "_initialized"):
|
|
48
|
+
self.time_dict = {}
|
|
49
|
+
self._initialized = True
|
|
50
|
+
|
|
51
|
+
def ctx_start(self, ctx_name="ctx_default"):
|
|
52
|
+
if not isinstance(ctx_name, str) or not ctx_name:
|
|
53
|
+
raise ValueError("ctx_name must be a non-empty string")
|
|
54
|
+
if ctx_name not in self.time_dict:
|
|
55
|
+
self.time_dict[ctx_name] = {
|
|
56
|
+
"start": time.perf_counter(),
|
|
57
|
+
"step_dict": {},
|
|
58
|
+
"report_count": 0,
|
|
59
|
+
}
|
|
60
|
+
self.time_dict[ctx_name]["report_count"] += 1
|
|
61
|
+
|
|
62
|
+
def ctx_end(self, ctx_name="ctx_default", report_func=None):
|
|
63
|
+
if ctx_name not in self.time_dict:
|
|
64
|
+
return
|
|
65
|
+
self.time_dict[ctx_name]["end"] = time.perf_counter()
|
|
66
|
+
self.time_dict[ctx_name]["duration"] = (
|
|
67
|
+
self.time_dict[ctx_name]["end"] - self.time_dict[ctx_name]["start"]
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
def step_start(self, ctx_name, step_name):
|
|
71
|
+
if not isinstance(step_name, str) or not step_name:
|
|
72
|
+
raise ValueError("step_name must be a non-empty string")
|
|
73
|
+
if ctx_name not in self.time_dict:
|
|
74
|
+
return
|
|
75
|
+
if step_name not in self.time_dict[ctx_name]["step_dict"]:
|
|
76
|
+
self.time_dict[ctx_name]["step_dict"][step_name] = []
|
|
77
|
+
self.time_dict[ctx_name]["step_dict"][step_name].append([time.perf_counter()])
|
|
78
|
+
|
|
79
|
+
def step_end(self, ctx_name, step_name):
|
|
80
|
+
if (
|
|
81
|
+
ctx_name not in self.time_dict
|
|
82
|
+
or step_name not in self.time_dict[ctx_name]["step_dict"]
|
|
83
|
+
):
|
|
84
|
+
return
|
|
85
|
+
self.time_dict[ctx_name]["step_dict"][step_name][-1].append(time.perf_counter())
|
|
86
|
+
|
|
87
|
+
def _step_dict_to_detail(self, ctx_step_dict):
|
|
88
|
+
"""
|
|
89
|
+
'ctx_step_dict': {
|
|
90
|
+
│ │ 'preprocess': [
|
|
91
|
+
│ │ │ [278090.947465806, 278090.960484853],
|
|
92
|
+
│ │ │ [278091.178424035, 278091.230944486],
|
|
93
|
+
│ │ 'infer': [
|
|
94
|
+
│ │ │ [278090.960490534, 278091.178424035],
|
|
95
|
+
│ │ │ [278091.230944486, 278091.251378469],
|
|
96
|
+
│ }
|
|
97
|
+
"""
|
|
98
|
+
assert (
|
|
99
|
+
len(ctx_step_dict.keys()) > 0
|
|
100
|
+
), "step_dict must have only one key (step_name) for detail."
|
|
101
|
+
normed_ctx_step_dict = {}
|
|
102
|
+
for step_name, time_list in ctx_step_dict.items():
|
|
103
|
+
if not isinstance(ctx_step_dict[step_name], list):
|
|
104
|
+
raise ValueError(f"Step data for {step_name} must be a list")
|
|
105
|
+
# step_name = list(ctx_step_dict.keys())[0] # ! debug
|
|
106
|
+
normed_time_ls = []
|
|
107
|
+
for idx, time_data in enumerate(time_list):
|
|
108
|
+
elapsed_time = -1
|
|
109
|
+
if len(time_data) == 2:
|
|
110
|
+
start, end = time_data[0], time_data[1]
|
|
111
|
+
elapsed_time = end - start
|
|
112
|
+
normed_time_ls.append((idx, elapsed_time)) # including step
|
|
113
|
+
normed_ctx_step_dict[step_name] = normed_time_ls
|
|
114
|
+
return normed_ctx_step_dict
|
|
115
|
+
|
|
116
|
+
def get_report_dict(self, with_detail=False):
|
|
117
|
+
report_dict = {}
|
|
118
|
+
for ctx_name, ctx_dict in self.time_dict.items():
|
|
119
|
+
report_dict[ctx_name] = {
|
|
120
|
+
"duration": ctx_dict.get("duration", 0.0),
|
|
121
|
+
"step_dict": {
|
|
122
|
+
"summary": {"avg_time": {}, "percent_time": {}},
|
|
123
|
+
"detail": {},
|
|
124
|
+
},
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
if with_detail:
|
|
128
|
+
report_dict[ctx_name]["step_dict"]["detail"] = (
|
|
129
|
+
self._step_dict_to_detail(ctx_dict["step_dict"])
|
|
130
|
+
)
|
|
131
|
+
avg_time_list = []
|
|
132
|
+
epsilon = 1e-5
|
|
133
|
+
for step_name, step_list in ctx_dict["step_dict"].items():
|
|
134
|
+
durations = []
|
|
135
|
+
try:
|
|
136
|
+
for time_data in step_list:
|
|
137
|
+
if len(time_data) != 2:
|
|
138
|
+
continue
|
|
139
|
+
start, end = time_data
|
|
140
|
+
durations.append(end - start)
|
|
141
|
+
except Exception as e:
|
|
142
|
+
logger.error(
|
|
143
|
+
f"Error processing step {step_name} in context {ctx_name}: {e}"
|
|
144
|
+
)
|
|
145
|
+
continue
|
|
146
|
+
if not durations:
|
|
147
|
+
continue
|
|
148
|
+
avg_time = sum(durations) / len(durations)
|
|
149
|
+
if avg_time < epsilon:
|
|
150
|
+
continue
|
|
151
|
+
avg_time_list.append((step_name, avg_time))
|
|
152
|
+
total_avg_time = (
|
|
153
|
+
sum(time for _, time in avg_time_list) or 1e-10
|
|
154
|
+
) # Avoid division by zero
|
|
155
|
+
for step_name, avg_time in avg_time_list:
|
|
156
|
+
report_dict[ctx_name]["step_dict"]["summary"]["percent_time"][
|
|
157
|
+
f"per_{step_name}"
|
|
158
|
+
] = (avg_time / total_avg_time) * 100.0
|
|
159
|
+
report_dict[ctx_name]["step_dict"]["summary"]["avg_time"][
|
|
160
|
+
f"avg_{step_name}"
|
|
161
|
+
] = avg_time
|
|
162
|
+
report_dict[ctx_name]["step_dict"]["summary"][
|
|
163
|
+
"total_avg_time"
|
|
164
|
+
] = total_avg_time
|
|
165
|
+
report_dict[ctx_name]["step_dict"]["summary"] = dict(
|
|
166
|
+
sorted(report_dict[ctx_name]["step_dict"]["summary"].items())
|
|
167
|
+
)
|
|
168
|
+
return report_dict
|
|
169
|
+
|
|
170
|
+
@classmethod
|
|
171
|
+
@classmethod
|
|
172
|
+
def plot_formatted_data(
|
|
173
|
+
cls, profiler_data, outdir=None, file_format="png", do_show=False, tag=""
|
|
174
|
+
):
|
|
175
|
+
"""
|
|
176
|
+
Plot each context in a separate figure with bar + pie charts.
|
|
177
|
+
Save each figure in the specified format (png or svg).
|
|
178
|
+
"""
|
|
179
|
+
|
|
180
|
+
if outdir is not None:
|
|
181
|
+
os.makedirs(outdir, exist_ok=True)
|
|
182
|
+
|
|
183
|
+
if file_format.lower() not in ["png", "svg"]:
|
|
184
|
+
raise ValueError("file_format must be 'png' or 'svg'")
|
|
185
|
+
|
|
186
|
+
results = {} # {context: fig}
|
|
187
|
+
|
|
188
|
+
for ctx, ctx_data in profiler_data.items():
|
|
189
|
+
summary = ctx_data["step_dict"]["summary"]
|
|
190
|
+
avg_times = summary["avg_time"]
|
|
191
|
+
percent_times = summary["percent_time"]
|
|
192
|
+
|
|
193
|
+
step_names = [s.replace("avg_", "") for s in avg_times.keys()]
|
|
194
|
+
# pprint(f'{step_names=}')
|
|
195
|
+
n_steps = len(step_names)
|
|
196
|
+
|
|
197
|
+
assert n_steps > 0, "No steps found for context: {}".format(ctx)
|
|
198
|
+
# Generate dynamic colors
|
|
199
|
+
colors = px.colors.sample_colorscale(
|
|
200
|
+
"Viridis", [i / (n_steps - 1) for i in range(n_steps)]
|
|
201
|
+
) if n_steps > 1 else [px.colors.sample_colorscale("Viridis", [0])[0]]
|
|
202
|
+
# pprint(f'{len(colors)} colors generated for {n_steps} steps')
|
|
203
|
+
color_map = dict(zip(step_names, colors))
|
|
204
|
+
|
|
205
|
+
# Create figure
|
|
206
|
+
fig = make_subplots(
|
|
207
|
+
rows=1,
|
|
208
|
+
cols=2,
|
|
209
|
+
subplot_titles=[f"Avg Time", f"% Time"],
|
|
210
|
+
specs=[[{"type": "bar"}, {"type": "pie"}]],
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
# Bar chart
|
|
214
|
+
fig.add_trace(
|
|
215
|
+
go.Bar(
|
|
216
|
+
x=step_names,
|
|
217
|
+
y=list(avg_times.values()),
|
|
218
|
+
text=[f"{v*1000:.2f} ms" for v in avg_times.values()],
|
|
219
|
+
textposition="outside",
|
|
220
|
+
marker=dict(color=[color_map[s] for s in step_names]),
|
|
221
|
+
name="", # unified legend
|
|
222
|
+
showlegend=False,
|
|
223
|
+
),
|
|
224
|
+
row=1,
|
|
225
|
+
col=1,
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
# Pie chart (colors match bar)
|
|
229
|
+
fig.add_trace(
|
|
230
|
+
go.Pie(
|
|
231
|
+
labels=step_names,
|
|
232
|
+
values=list(percent_times.values()),
|
|
233
|
+
marker=dict(colors=[color_map[s] for s in step_names]),
|
|
234
|
+
hole=0.4,
|
|
235
|
+
name="",
|
|
236
|
+
showlegend=True,
|
|
237
|
+
),
|
|
238
|
+
row=1,
|
|
239
|
+
col=2,
|
|
240
|
+
)
|
|
241
|
+
tag_str = tag if tag and len(tag) > 0 else ""
|
|
242
|
+
# Layout
|
|
243
|
+
fig.update_layout(
|
|
244
|
+
title_text=f"[{tag_str}] Context Profiler: {ctx}",
|
|
245
|
+
width=1000,
|
|
246
|
+
height=400,
|
|
247
|
+
showlegend=True,
|
|
248
|
+
legend=dict(title="Steps", x=1.05, y=0.5, traceorder="normal"),
|
|
249
|
+
hovermode="x unified",
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
fig.update_xaxes(title_text="Steps", row=1, col=1)
|
|
253
|
+
fig.update_yaxes(title_text="Avg Time (ms)", row=1, col=1)
|
|
254
|
+
|
|
255
|
+
# Show figure
|
|
256
|
+
if do_show:
|
|
257
|
+
fig.show()
|
|
258
|
+
|
|
259
|
+
# Save figure
|
|
260
|
+
if outdir is not None:
|
|
261
|
+
file_prefix = ctx if len(tag_str) == 0 else f"{tag_str}_{ctx}"
|
|
262
|
+
file_path = os.path.join(outdir, f"{file_prefix}_summary.{file_format.lower()}")
|
|
263
|
+
fig.write_image(file_path)
|
|
264
|
+
print(f"Saved figure: {file_path}")
|
|
265
|
+
|
|
266
|
+
results[ctx] = fig
|
|
267
|
+
|
|
268
|
+
return results
|
|
269
|
+
|
|
270
|
+
def report_and_plot(self, outdir=None, file_format="png", do_show=False, tag=""):
|
|
271
|
+
"""
|
|
272
|
+
Generate the profiling report and plot the formatted data.
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
outdir (str): Directory to save figures. If None, figures are only shown.
|
|
276
|
+
file_format (str): Target file format, "png" or "svg". Default is "png".
|
|
277
|
+
do_show (bool): Whether to display the plots. Default is False.
|
|
278
|
+
"""
|
|
279
|
+
report = self.get_report_dict()
|
|
280
|
+
self.get_report_dict(with_detail=False)
|
|
281
|
+
return self.plot_formatted_data(
|
|
282
|
+
report, outdir=outdir, file_format=file_format, do_show=do_show, tag=tag
|
|
283
|
+
)
|
|
284
|
+
def meta_info(self):
|
|
285
|
+
"""
|
|
286
|
+
Print the structure of the profiler's time dictionary.
|
|
287
|
+
Useful for debugging and understanding the profiler's internal state.
|
|
288
|
+
"""
|
|
289
|
+
for ctx_name, ctx_dict in self.time_dict.items():
|
|
290
|
+
with ConsoleLog(f"Context: {ctx_name}"):
|
|
291
|
+
step_names = list(ctx_dict['step_dict'].keys())
|
|
292
|
+
for step_name in step_names:
|
|
293
|
+
pprint(f"Step: {step_name}")
|
|
294
|
+
|
|
295
|
+
def save_report_dict(self, output_file, with_detail=False):
|
|
296
|
+
try:
|
|
297
|
+
report = self.get_report_dict(with_detail=with_detail)
|
|
298
|
+
with open(output_file, "w") as f:
|
|
299
|
+
json.dump(report, f, indent=4)
|
|
300
|
+
except Exception as e:
|
|
301
|
+
logger.error(f"Failed to save report to {output_file}: {e}")
|
halib/research/perfcalc.py
CHANGED
|
@@ -3,12 +3,9 @@ import glob
|
|
|
3
3
|
from typing import Optional, Tuple
|
|
4
4
|
import pandas as pd
|
|
5
5
|
|
|
6
|
-
from rich.pretty import pprint
|
|
7
|
-
|
|
8
6
|
from abc import ABC, abstractmethod
|
|
9
7
|
from collections import OrderedDict
|
|
10
8
|
|
|
11
|
-
from ..filetype import csvfile
|
|
12
9
|
from ..system import filesys as fs
|
|
13
10
|
from ..common import now_str
|
|
14
11
|
from ..research.perftb import PerfTB
|
|
@@ -19,6 +16,7 @@ REQUIRED_COLS = ["experiment", "dataset"]
|
|
|
19
16
|
CSV_FILE_POSTFIX = "__perf"
|
|
20
17
|
METRIC_PREFIX = "metric_"
|
|
21
18
|
|
|
19
|
+
|
|
22
20
|
class PerfCalc(ABC): # Abstract base class for performance calculation
|
|
23
21
|
@abstractmethod
|
|
24
22
|
def get_experiment_name(self) -> str:
|
|
@@ -44,29 +42,32 @@ class PerfCalc(ABC): # Abstract base class for performance calculation
|
|
|
44
42
|
"""
|
|
45
43
|
pass
|
|
46
44
|
|
|
47
|
-
def valid_proc_extra_data(
|
|
48
|
-
self, proc_extra_data
|
|
49
|
-
):
|
|
45
|
+
def valid_proc_extra_data(self, proc_extra_data):
|
|
50
46
|
# make sure that all items in proc_extra_data are dictionaries, with same keys
|
|
51
47
|
if proc_extra_data is None or len(proc_extra_data) == 0:
|
|
52
48
|
return
|
|
53
49
|
if not all(isinstance(item, dict) for item in proc_extra_data):
|
|
54
50
|
raise TypeError("All items in proc_extra_data must be dictionaries")
|
|
55
51
|
|
|
56
|
-
if not all(
|
|
57
|
-
|
|
52
|
+
if not all(
|
|
53
|
+
item.keys() == proc_extra_data[0].keys() for item in proc_extra_data
|
|
54
|
+
):
|
|
55
|
+
raise ValueError(
|
|
56
|
+
"All dictionaries in proc_extra_data must have the same keys"
|
|
57
|
+
)
|
|
58
58
|
|
|
59
|
-
def valid_proc_metric_raw_data(
|
|
60
|
-
self, metric_names, proc_metric_raw_data
|
|
61
|
-
):
|
|
59
|
+
def valid_proc_metric_raw_data(self, metric_names, proc_metric_raw_data):
|
|
62
60
|
# make sure that all items in proc_metric_raw_data are dictionaries, with same keys as metric_names
|
|
63
|
-
assert
|
|
64
|
-
|
|
61
|
+
assert (
|
|
62
|
+
isinstance(proc_metric_raw_data, list) and len(proc_metric_raw_data) > 0
|
|
63
|
+
), "raw_data_for_metrics must be a non-empty list of dictionaries"
|
|
65
64
|
|
|
66
65
|
# make sure that all items in proc_metric_raw_data are dictionaries with keys as metric_names
|
|
67
66
|
if not all(isinstance(item, dict) for item in proc_metric_raw_data):
|
|
68
67
|
raise TypeError("All items in raw_data_for_metrics must be dictionaries")
|
|
69
|
-
if not all(
|
|
68
|
+
if not all(
|
|
69
|
+
set(item.keys()) == set(metric_names) for item in proc_metric_raw_data
|
|
70
|
+
):
|
|
70
71
|
raise ValueError(
|
|
71
72
|
"All dictionaries in raw_data_for_metrics must have the same keys as metric_names"
|
|
72
73
|
)
|
|
@@ -75,21 +76,30 @@ class PerfCalc(ABC): # Abstract base class for performance calculation
|
|
|
75
76
|
def calc_exp_perf_metrics(
|
|
76
77
|
self, metric_names, raw_metrics_data, extra_data=None, *args, **kwargs
|
|
77
78
|
):
|
|
78
|
-
assert isinstance(raw_metrics_data, dict) or isinstance(
|
|
79
|
-
|
|
79
|
+
assert isinstance(raw_metrics_data, dict) or isinstance(
|
|
80
|
+
raw_metrics_data, list
|
|
81
|
+
), "raw_data_for_metrics must be a dictionary or a list"
|
|
80
82
|
|
|
81
83
|
if extra_data is not None:
|
|
82
|
-
assert isinstance(
|
|
83
|
-
|
|
84
|
+
assert isinstance(
|
|
85
|
+
extra_data, type(raw_metrics_data)
|
|
86
|
+
), "extra_data must be of the same type as raw_data_for_metrics (dict or list)"
|
|
84
87
|
# prepare raw_metric data for processing
|
|
85
|
-
proc_metric_raw_data_ls =
|
|
88
|
+
proc_metric_raw_data_ls = (
|
|
89
|
+
raw_metrics_data
|
|
90
|
+
if isinstance(raw_metrics_data, list)
|
|
91
|
+
else [raw_metrics_data.copy()]
|
|
92
|
+
)
|
|
86
93
|
self.valid_proc_metric_raw_data(metric_names, proc_metric_raw_data_ls)
|
|
87
94
|
# prepare extra data for processing
|
|
88
95
|
proc_extra_data_ls = []
|
|
89
96
|
if extra_data is not None:
|
|
90
|
-
proc_extra_data_ls =
|
|
91
|
-
|
|
92
|
-
|
|
97
|
+
proc_extra_data_ls = (
|
|
98
|
+
extra_data if isinstance(extra_data, list) else [extra_data.copy()]
|
|
99
|
+
)
|
|
100
|
+
assert len(proc_extra_data_ls) == len(
|
|
101
|
+
proc_metric_raw_data_ls
|
|
102
|
+
), "extra_data must have the same length as raw_data_for_metrics if it is a list"
|
|
93
103
|
# validate the extra_data
|
|
94
104
|
self.valid_proc_extra_data(proc_extra_data_ls)
|
|
95
105
|
|
|
@@ -102,7 +112,7 @@ class PerfCalc(ABC): # Abstract base class for performance calculation
|
|
|
102
112
|
"experiment": self.get_experiment_name(),
|
|
103
113
|
}
|
|
104
114
|
custom_fields = []
|
|
105
|
-
if len(proc_extra_data_ls)> 0:
|
|
115
|
+
if len(proc_extra_data_ls) > 0:
|
|
106
116
|
# add extra data to the output dictionary
|
|
107
117
|
extra_data_item = proc_extra_data_ls[idx]
|
|
108
118
|
out_dict.update(extra_data_item)
|
|
@@ -110,7 +120,9 @@ class PerfCalc(ABC): # Abstract base class for performance calculation
|
|
|
110
120
|
metric_results = metrics_backend.calc_metrics(
|
|
111
121
|
metrics_data_dict=raw_metrics_data, *args, **kwargs
|
|
112
122
|
)
|
|
113
|
-
metric_results_prefix = {
|
|
123
|
+
metric_results_prefix = {
|
|
124
|
+
f"metric_{k}": v for k, v in metric_results.items()
|
|
125
|
+
}
|
|
114
126
|
out_dict.update(metric_results_prefix)
|
|
115
127
|
ordered_cols = (
|
|
116
128
|
REQUIRED_COLS + custom_fields + list(metric_results_prefix.keys())
|
|
@@ -126,7 +138,7 @@ class PerfCalc(ABC): # Abstract base class for performance calculation
|
|
|
126
138
|
#! outfile - if provided, will save the output to a CSV file with the given path
|
|
127
139
|
#! outdir - if provided, will save the output to a CSV file in the given directory with a generated filename
|
|
128
140
|
#! return_df - if True, will return a DataFrame instead of a dictionary
|
|
129
|
-
def
|
|
141
|
+
def calc_perfs(
|
|
130
142
|
self,
|
|
131
143
|
raw_metrics_data: Union[List[dict], dict],
|
|
132
144
|
extra_data: Optional[Union[List[dict], dict]] = None,
|
|
@@ -140,9 +152,11 @@ class PerfCalc(ABC): # Abstract base class for performance calculation
|
|
|
140
152
|
"""
|
|
141
153
|
metric_names = self.get_metric_backend().metric_names
|
|
142
154
|
out_dict_list = self.calc_exp_perf_metrics(
|
|
143
|
-
metric_names=metric_names,
|
|
155
|
+
metric_names=metric_names,
|
|
156
|
+
raw_metrics_data=raw_metrics_data,
|
|
144
157
|
extra_data=extra_data,
|
|
145
|
-
*args,
|
|
158
|
+
*args,
|
|
159
|
+
**kwargs,
|
|
146
160
|
)
|
|
147
161
|
csv_outfile = kwargs.get("outfile", None)
|
|
148
162
|
if csv_outfile is not None:
|
|
@@ -176,13 +190,18 @@ class PerfCalc(ABC): # Abstract base class for performance calculation
|
|
|
176
190
|
return "__perf.csv" in exp_file_name
|
|
177
191
|
|
|
178
192
|
@classmethod
|
|
179
|
-
def
|
|
180
|
-
cls,
|
|
193
|
+
def get_perftb_for_multi_exps(
|
|
194
|
+
cls,
|
|
195
|
+
indir: str,
|
|
196
|
+
exp_csv_filter_fn=default_exp_csv_filter_fn,
|
|
197
|
+
include_file_name=False,
|
|
198
|
+
csv_sep=";",
|
|
181
199
|
) -> PerfTB:
|
|
182
200
|
"""
|
|
183
201
|
Generate a performance report by scanning experiment subdirectories.
|
|
184
202
|
Must return a dictionary with keys as metric names and values as performance tables.
|
|
185
203
|
"""
|
|
204
|
+
|
|
186
205
|
def get_df_for_all_exp_perf(csv_perf_files, csv_sep=";"):
|
|
187
206
|
"""
|
|
188
207
|
Create a single DataFrame from all CSV files.
|
|
@@ -194,7 +213,9 @@ class PerfCalc(ABC): # Abstract base class for performance calculation
|
|
|
194
213
|
for csv_file in csv_perf_files:
|
|
195
214
|
temp_df = pd.read_csv(csv_file, sep=csv_sep)
|
|
196
215
|
if FILE_NAME_COL:
|
|
197
|
-
temp_df[FILE_NAME_COL] = fs.get_file_name(
|
|
216
|
+
temp_df[FILE_NAME_COL] = fs.get_file_name(
|
|
217
|
+
csv_file, split_file_ext=False
|
|
218
|
+
)
|
|
198
219
|
# csvfile.fn_display_df(temp_df)
|
|
199
220
|
temp_df_cols = temp_df.columns.tolist()
|
|
200
221
|
for col in temp_df_cols:
|
|
@@ -205,7 +226,9 @@ class PerfCalc(ABC): # Abstract base class for performance calculation
|
|
|
205
226
|
for csv_file in csv_perf_files:
|
|
206
227
|
temp_df = pd.read_csv(csv_file, sep=csv_sep)
|
|
207
228
|
if FILE_NAME_COL:
|
|
208
|
-
temp_df[FILE_NAME_COL] = fs.get_file_name(
|
|
229
|
+
temp_df[FILE_NAME_COL] = fs.get_file_name(
|
|
230
|
+
csv_file, split_file_ext=False
|
|
231
|
+
)
|
|
209
232
|
# Drop all-NA columns to avoid dtype inconsistency
|
|
210
233
|
temp_df = temp_df.dropna(axis=1, how="all")
|
|
211
234
|
# ensure all columns are present in the final DataFrame
|
|
@@ -215,7 +238,9 @@ class PerfCalc(ABC): # Abstract base class for performance calculation
|
|
|
215
238
|
df = pd.concat([df, temp_df], ignore_index=True)
|
|
216
239
|
# assert that REQUIRED_COLS are present in the DataFrame
|
|
217
240
|
# pprint(df.columns.tolist())
|
|
218
|
-
sticky_cols = REQUIRED_COLS + (
|
|
241
|
+
sticky_cols = REQUIRED_COLS + (
|
|
242
|
+
[FILE_NAME_COL] if include_file_name else []
|
|
243
|
+
) # columns that must always be present
|
|
219
244
|
for col in sticky_cols:
|
|
220
245
|
if col not in df.columns:
|
|
221
246
|
raise ValueError(
|
|
@@ -227,9 +252,9 @@ class PerfCalc(ABC): # Abstract base class for performance calculation
|
|
|
227
252
|
), "No metric columns found in the DataFrame. Ensure that the CSV files contain metric columns starting with 'metric_'."
|
|
228
253
|
final_cols = sticky_cols + metric_cols
|
|
229
254
|
df = df[final_cols]
|
|
230
|
-
# !hahv debug
|
|
231
|
-
pprint("------ Final DataFrame Columns ------")
|
|
232
|
-
csvfile.fn_display_df(df)
|
|
255
|
+
# # !hahv debug
|
|
256
|
+
# pprint("------ Final DataFrame Columns ------")
|
|
257
|
+
# csvfile.fn_display_df(df)
|
|
233
258
|
# ! validate all rows in df before returning
|
|
234
259
|
# make sure all rows will have at least values for REQUIRED_COLS and at least one metric column
|
|
235
260
|
for index, row in df.iterrows():
|
halib/research/perftb.py
CHANGED
|
@@ -308,7 +308,8 @@ class PerfTB:
|
|
|
308
308
|
if save_path:
|
|
309
309
|
export_success = False
|
|
310
310
|
try:
|
|
311
|
-
fig.write_image(save_path, engine="kaleido")
|
|
311
|
+
# fig.write_image(save_path, engine="kaleido")
|
|
312
|
+
fig.write_image(save_path, engine="kaleido", width=width, height=height * len(metric_list))
|
|
312
313
|
export_success = True
|
|
313
314
|
# pprint(f"Saved: {os.path.abspath(save_path)}")
|
|
314
315
|
except Exception as e:
|