carm-paraver 1.0.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- carm_paraver/GUI_utils.py +845 -0
- carm_paraver/Paraver_CARM.py +4050 -0
- carm_paraver/__init__.py +5 -0
- carm_paraver/__main__.py +14 -0
- carm_paraver/analysis_helpers.py +894 -0
- carm_paraver/assets/CARM_icon3.svg +4 -0
- carm_paraver/assets/CHAMP_logo.svg +44 -0
- carm_paraver/assets/__init__.py +0 -0
- carm_paraver/assets/bsc.svg +4 -0
- carm_paraver/assets/carm_bsc.png +0 -0
- carm_paraver/assets/carm_bsc.svg +4 -0
- carm_paraver/assets/menu_icon.png +0 -0
- carm_paraver/assets/style.css +45 -0
- carm_paraver/carm_results/__init__.py +0 -0
- carm_paraver/carm_results/roofline/MN5_roofline.csv +43 -0
- carm_paraver/carm_results/roofline/__init__.py +0 -0
- carm_paraver/paraver_carm_configs/Intel/Intel_FP_AVX2_DP.cfg +51 -0
- carm_paraver/paraver_carm_configs/Intel/Intel_FP_AVX2_SP.cfg +51 -0
- carm_paraver/paraver_carm_configs/Intel/Intel_FP_AVX512_DP.cfg +51 -0
- carm_paraver/paraver_carm_configs/Intel/Intel_FP_AVX512_SP.cfg +51 -0
- carm_paraver/paraver_carm_configs/Intel/Intel_FP_SSE_DP.cfg +51 -0
- carm_paraver/paraver_carm_configs/Intel/Intel_FP_SSE_SP.cfg +51 -0
- carm_paraver/paraver_carm_configs/Intel/Intel_FP_Scalar_DP.cfg +50 -0
- carm_paraver/paraver_carm_configs/Intel/Intel_FP_Scalar_SP.cfg +51 -0
- carm_paraver/paraver_carm_configs/Intel/Intel_Loads.cfg +51 -0
- carm_paraver/paraver_carm_configs/Intel/Intel_Stores.cfg +51 -0
- carm_paraver/paraver_carm_configs/Intel/__init__.py +0 -0
- carm_paraver/paraver_carm_configs/IntelV2/Intel_FP_AVX2_DP.cfg +51 -0
- carm_paraver/paraver_carm_configs/IntelV2/Intel_FP_AVX2_SP.cfg +50 -0
- carm_paraver/paraver_carm_configs/IntelV2/Intel_FP_AVX512_DP.cfg +51 -0
- carm_paraver/paraver_carm_configs/IntelV2/Intel_FP_AVX512_SP.cfg +50 -0
- carm_paraver/paraver_carm_configs/IntelV2/Intel_FP_SSE_DP.cfg +51 -0
- carm_paraver/paraver_carm_configs/IntelV2/Intel_FP_SSE_SP.cfg +51 -0
- carm_paraver/paraver_carm_configs/IntelV2/Intel_FP_Scalar_DP.cfg +50 -0
- carm_paraver/paraver_carm_configs/IntelV2/Intel_FP_Scalar_SP.cfg +50 -0
- carm_paraver/paraver_carm_configs/IntelV2/Intel_Loads.cfg +51 -0
- carm_paraver/paraver_carm_configs/IntelV2/Intel_Stores.cfg +50 -0
- carm_paraver/paraver_carm_configs/IntelV2/__init__.py +0 -0
- carm_paraver/paraver_carm_configs/Intel_CARM_DP.cfg +2388 -0
- carm_paraver/paraver_carm_configs/Intel_CARM_DPV2.cfg +2388 -0
- carm_paraver/paraver_carm_configs/Intel_CARM_DP_Extrae.xml +110 -0
- carm_paraver/paraver_carm_configs/Intel_CARM_SPV2.cfg +2388 -0
- carm_paraver/paraver_carm_configs/Intel_CARM_SP_Extrae.xml +110 -0
- carm_paraver/paraver_carm_configs/__init__.py +0 -0
- carm_paraver-1.0.0.dev0.dist-info/METADATA +140 -0
- carm_paraver-1.0.0.dev0.dist-info/RECORD +50 -0
- carm_paraver-1.0.0.dev0.dist-info/WHEEL +5 -0
- carm_paraver-1.0.0.dev0.dist-info/entry_points.txt +3 -0
- carm_paraver-1.0.0.dev0.dist-info/licenses/LICENSE +504 -0
- carm_paraver-1.0.0.dev0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,845 @@
|
|
|
1
|
+
import csv
|
|
2
|
+
import hashlib
|
|
3
|
+
import math
|
|
4
|
+
import os
|
|
5
|
+
import shutil
|
|
6
|
+
import subprocess
|
|
7
|
+
import sys
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
import pandas as pd
|
|
11
|
+
import plotly.graph_objects as go
|
|
12
|
+
|
|
13
|
+
CONFIG_FILE = "./config/auto_config/config.txt"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
intel_ISA_colors = {
|
|
17
|
+
"avx512": "blue",
|
|
18
|
+
"avx2": "green",
|
|
19
|
+
"sse": "purple",
|
|
20
|
+
"scalar": "black",
|
|
21
|
+
}
|
|
22
|
+
color_map = {
|
|
23
|
+
"blue": (0, 0, 255),
|
|
24
|
+
"green": (0, 255, 0),
|
|
25
|
+
"purple": (128, 0, 128),
|
|
26
|
+
"black": (0, 0, 0),
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
# Colors for SP/DP and Load/Store
|
|
30
|
+
precision_color_map = {
|
|
31
|
+
"sp": (0, 0, 255), # orange
|
|
32
|
+
"dp": (0, 255, 0), # red
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
loadstore_color_map = {"load": (0, 0, 255), "store": (255, 0, 0)}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def find_and_run(command):
|
|
39
|
+
cmd_path = shutil.which(command)
|
|
40
|
+
if cmd_path is None:
|
|
41
|
+
print(f"ERROR: '{command}' not found on PATH.", file=sys.stderr)
|
|
42
|
+
return False, None
|
|
43
|
+
|
|
44
|
+
try:
|
|
45
|
+
subprocess.run(cmd_path, check=True, capture_output=True, text=True)
|
|
46
|
+
return True
|
|
47
|
+
|
|
48
|
+
except subprocess.CalledProcessError as e:
|
|
49
|
+
print(f"'{command}' failed with exit {e.returncode}", file=sys.stderr)
|
|
50
|
+
print("STDERR:", e.stderr, file=sys.stderr)
|
|
51
|
+
return False, None
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def carm_eq(ai, bw, fp):
|
|
55
|
+
return np.minimum(ai * bw, fp)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def custom_round(value, digits=4):
|
|
59
|
+
if value == 0:
|
|
60
|
+
return 0 # Directly return 0 if the value is 0
|
|
61
|
+
str_value = str(value)
|
|
62
|
+
if abs(value) >= 1 or "e" in str_value or "E" in str_value or "." not in str_value:
|
|
63
|
+
# For numbers greater than or equal to 1, round normally
|
|
64
|
+
return round(value, digits)
|
|
65
|
+
|
|
66
|
+
decimal_part = str_value.split(".")[1]
|
|
67
|
+
leading_zeros = 0
|
|
68
|
+
for char in decimal_part:
|
|
69
|
+
if char == "0":
|
|
70
|
+
leading_zeros += 1
|
|
71
|
+
else:
|
|
72
|
+
break
|
|
73
|
+
|
|
74
|
+
# Adjust the number of digits based on the position of the first significant digit
|
|
75
|
+
total_digits = digits + leading_zeros
|
|
76
|
+
return round(value, total_digits)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def is_valid_paraver_value(val):
|
|
80
|
+
val_str = str(val).strip()
|
|
81
|
+
if val_str == "":
|
|
82
|
+
return False
|
|
83
|
+
try:
|
|
84
|
+
return float(val_str) > 0
|
|
85
|
+
except ValueError:
|
|
86
|
+
return True
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def find_nearest_positive(df, index, lower_filter, duration_filter, use_paraver_mask, min_bound=0):
|
|
90
|
+
max_index = len(df) - 1
|
|
91
|
+
|
|
92
|
+
def row_is_valid(i):
|
|
93
|
+
if use_paraver_mask:
|
|
94
|
+
return (
|
|
95
|
+
df.loc[i, "GFLOPS"] >= lower_filter
|
|
96
|
+
and df.loc[i, "Arithmetic_Intensity"] >= lower_filter
|
|
97
|
+
and df.loc[i, "Duration"] >= duration_filter
|
|
98
|
+
and is_valid_paraver_value(df.loc[i, "Paraver_Label"])
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
else:
|
|
102
|
+
return (
|
|
103
|
+
df.loc[i, "GFLOPS"] >= lower_filter
|
|
104
|
+
and df.loc[i, "Arithmetic_Intensity"] >= lower_filter
|
|
105
|
+
and df.loc[i, "Duration"] >= duration_filter
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
if index >= min_bound and row_is_valid(index):
|
|
109
|
+
return index
|
|
110
|
+
distance = 1
|
|
111
|
+
while (index - distance >= min_bound) or (index + distance <= max_index):
|
|
112
|
+
left_index = index - distance
|
|
113
|
+
if left_index >= min_bound and row_is_valid(left_index):
|
|
114
|
+
return left_index
|
|
115
|
+
|
|
116
|
+
right_index = index + distance
|
|
117
|
+
if right_index <= max_index and row_is_valid(right_index):
|
|
118
|
+
return right_index
|
|
119
|
+
|
|
120
|
+
distance += 1
|
|
121
|
+
raise ValueError(
|
|
122
|
+
f"No row with valid GFLOPS, Arithmetic_Intensity, and Duration found from index {index} with lower bound {min_bound}."
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def read_library_path(tag):
|
|
127
|
+
if os.path.exists(CONFIG_FILE):
|
|
128
|
+
with open(CONFIG_FILE) as file:
|
|
129
|
+
for line in file:
|
|
130
|
+
if line.strip() == "":
|
|
131
|
+
continue
|
|
132
|
+
parts = line.strip().split("=")
|
|
133
|
+
if len(parts) == 2:
|
|
134
|
+
key, value = parts
|
|
135
|
+
if key == tag:
|
|
136
|
+
return value
|
|
137
|
+
return None
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def write_library_path(tag, path):
|
|
141
|
+
with open(CONFIG_FILE, "a") as file:
|
|
142
|
+
file.write(f"{tag}={path}\n")
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def read_csv_file(file_path):
|
|
146
|
+
data_list = []
|
|
147
|
+
with open(file_path, newline="") as csvfile:
|
|
148
|
+
reader = csv.reader(csvfile)
|
|
149
|
+
header = next(reader)
|
|
150
|
+
machine_name = header[1]
|
|
151
|
+
l1_size = int(header[3])
|
|
152
|
+
l2_size = int(header[5])
|
|
153
|
+
l3_size = int(header[7])
|
|
154
|
+
|
|
155
|
+
_header2 = next(reader)
|
|
156
|
+
for row in reader:
|
|
157
|
+
if not row or not "".join(row).strip():
|
|
158
|
+
continue
|
|
159
|
+
data = {}
|
|
160
|
+
data["Date"] = row[0]
|
|
161
|
+
data["ISA"] = row[1]
|
|
162
|
+
data["Precision"] = row[2]
|
|
163
|
+
data["Threads"] = int(row[3])
|
|
164
|
+
data["Loads"] = int(row[4])
|
|
165
|
+
data["Stores"] = int(row[5])
|
|
166
|
+
data["Interleaved"] = row[6]
|
|
167
|
+
data["DRAMBytes"] = int(row[7])
|
|
168
|
+
data["FPInst"] = row[8]
|
|
169
|
+
data["L1"] = float(row[9])
|
|
170
|
+
data["L2"] = float(row[11])
|
|
171
|
+
data["L3"] = float(row[13])
|
|
172
|
+
data["DRAM"] = float(row[15])
|
|
173
|
+
data["FP"] = float(row[17])
|
|
174
|
+
data["FP_FMA"] = float(row[19])
|
|
175
|
+
data_list.append(data)
|
|
176
|
+
|
|
177
|
+
return machine_name, l1_size, l2_size, l3_size, data_list
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def natural_key_for_dotstring(val):
|
|
181
|
+
"""Convert a dotted string like '1.10.2' into a tuple of ints for natural sorting.
|
|
182
|
+
|
|
183
|
+
If the value isn't a string or any piece cannot be converted to an integer, the
|
|
184
|
+
original value (or tuple of parts) is returned so that sorting still works in a
|
|
185
|
+
deterministic way.
|
|
186
|
+
"""
|
|
187
|
+
if isinstance(val, str):
|
|
188
|
+
parts = val.split(".")
|
|
189
|
+
try:
|
|
190
|
+
return tuple(int(p) for p in parts)
|
|
191
|
+
except ValueError:
|
|
192
|
+
# some part wasn't numeric; fall back to strings
|
|
193
|
+
return tuple(parts)
|
|
194
|
+
return val
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def natural_sort_series(series):
|
|
198
|
+
"""Return a transformed Series suitable for use as a ``key`` in :func:`sort_values`.
|
|
199
|
+
|
|
200
|
+
This will map each element through :func:`natural_key_for_dotstring`.
|
|
201
|
+
"""
|
|
202
|
+
return series.map(natural_key_for_dotstring)
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def read_application_csv_file(file_path):
|
|
206
|
+
if not os.path.exists(file_path):
|
|
207
|
+
print("Application file does not exist:", file_path)
|
|
208
|
+
return False
|
|
209
|
+
|
|
210
|
+
data_list = []
|
|
211
|
+
try:
|
|
212
|
+
with open(file_path, newline="") as csvfile:
|
|
213
|
+
reader = csv.reader(csvfile)
|
|
214
|
+
header = next(reader, None)
|
|
215
|
+
|
|
216
|
+
if header is None:
|
|
217
|
+
print("File is empty:", file_path)
|
|
218
|
+
return False
|
|
219
|
+
|
|
220
|
+
for row in reader:
|
|
221
|
+
if row:
|
|
222
|
+
data = {
|
|
223
|
+
"Date": row[0],
|
|
224
|
+
"Method": row[1],
|
|
225
|
+
"Name": row[2],
|
|
226
|
+
"ISA": row[3],
|
|
227
|
+
"Precision": row[4],
|
|
228
|
+
"Threads": row[5],
|
|
229
|
+
"AI": float(row[6]),
|
|
230
|
+
"GFLOPS": float(row[7]),
|
|
231
|
+
"Bandwidth": float(row[8]),
|
|
232
|
+
"Time": float(row[9]),
|
|
233
|
+
}
|
|
234
|
+
data_list.append(data)
|
|
235
|
+
|
|
236
|
+
except Exception as e:
|
|
237
|
+
print("Failed to read the file:", file_path, "Error:", e)
|
|
238
|
+
return False
|
|
239
|
+
return data_list if data_list else False
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def ensure_list(marker_dict, attr_name, default_value, n_points):
|
|
243
|
+
# If marker[attr_name] doesn't exist or is not a list, convert it to a repeated list.
|
|
244
|
+
if attr_name not in marker_dict:
|
|
245
|
+
return [default_value] * n_points
|
|
246
|
+
|
|
247
|
+
val = marker_dict[attr_name]
|
|
248
|
+
if isinstance(val, list):
|
|
249
|
+
return val
|
|
250
|
+
else:
|
|
251
|
+
return [val] * n_points
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def make_power_of_two_ticks(min_val, max_val):
|
|
255
|
+
min_val = max(min_val, 0.0000000001)
|
|
256
|
+
max_val = max(max_val, 0.0000000001)
|
|
257
|
+
start_exp = math.floor(math.log2(min_val))
|
|
258
|
+
end_exp = math.ceil(math.log2(max_val))
|
|
259
|
+
tickvals = [2**i for i in range(start_exp, end_exp + 1)]
|
|
260
|
+
ticktext = [f"2<sup>{i}</sup>" for i in range(start_exp, end_exp + 1)]
|
|
261
|
+
return tickvals, ticktext
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def extract_last_segment(s):
|
|
265
|
+
return s.split("_")[-1] if "_" in s else s
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def extract_prefix(s):
|
|
269
|
+
if "_" in s:
|
|
270
|
+
return s.rsplit("_", 1)[0]
|
|
271
|
+
return s
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def interpolate_color(start_color, end_color, factor):
|
|
275
|
+
r = int(start_color[0] + factor * (end_color[0] - start_color[0]))
|
|
276
|
+
g = int(start_color[1] + factor * (end_color[1] - start_color[1]))
|
|
277
|
+
b = int(start_color[2] + factor * (end_color[2] - start_color[2]))
|
|
278
|
+
return f"rgb({r},{g},{b})"
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def construct_query(filters):
|
|
282
|
+
field_specs = [
|
|
283
|
+
("ISA", True),
|
|
284
|
+
("Precision", True), # string values should be quoted in the query
|
|
285
|
+
("Threads", False), # numeric values should not be quoted
|
|
286
|
+
("Loads", False),
|
|
287
|
+
("Stores", False),
|
|
288
|
+
("Interleaved", True),
|
|
289
|
+
("DRAMBytes", False),
|
|
290
|
+
("FPInst", True),
|
|
291
|
+
("Date", True),
|
|
292
|
+
]
|
|
293
|
+
|
|
294
|
+
query_parts = []
|
|
295
|
+
for field_name, quote_value in field_specs:
|
|
296
|
+
if value := filters.get(field_name):
|
|
297
|
+
formatted = f"'{value}'" if quote_value else value
|
|
298
|
+
query_parts.append(f"{field_name} == {formatted}")
|
|
299
|
+
|
|
300
|
+
return " and ".join(query_parts) if query_parts else None
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def construct_query_timestamp(df, ISA_list, Precision_list, Threads_list):
|
|
304
|
+
selected_columns = []
|
|
305
|
+
for isa in ISA_list:
|
|
306
|
+
for precision in Precision_list:
|
|
307
|
+
column_name = f"Intel_FP_{isa}_{precision}"
|
|
308
|
+
if column_name in df.columns:
|
|
309
|
+
selected_columns.append(column_name)
|
|
310
|
+
|
|
311
|
+
if not selected_columns:
|
|
312
|
+
print("No matching columns found for the selected ISA and Precision.")
|
|
313
|
+
return pd.DataFrame()
|
|
314
|
+
|
|
315
|
+
condition = (df[selected_columns] > 1).any(axis=1)
|
|
316
|
+
thread_condition = df["ThreadID"].isin(Threads_list)
|
|
317
|
+
combined_condition = condition & thread_condition
|
|
318
|
+
df_filtered = df[combined_condition]
|
|
319
|
+
|
|
320
|
+
return df_filtered
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def roof_value_at_x(roof, x):
|
|
324
|
+
start, ridge, end = roof["start"], roof["ridge"], roof["end"]
|
|
325
|
+
if x <= ridge[0]:
|
|
326
|
+
if ridge[0] == start[0]:
|
|
327
|
+
return start[1]
|
|
328
|
+
slope = (ridge[1] - start[1]) / (ridge[0] - start[0])
|
|
329
|
+
return start[1] + slope * (x - start[0])
|
|
330
|
+
else:
|
|
331
|
+
if end[0] == ridge[0]:
|
|
332
|
+
return ridge[1]
|
|
333
|
+
slope = (end[1] - ridge[1]) / (end[0] - ridge[0])
|
|
334
|
+
return ridge[1] + slope * (x - ridge[0])
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def label_cache_level(row, roofs):
|
|
338
|
+
"""
|
|
339
|
+
Determine the cache level at which a performance point (Arithmetic Intensity, GFLOPS)
|
|
340
|
+
lies below the roofline. The first matching roof (highest bandwidth) is returned.
|
|
341
|
+
"""
|
|
342
|
+
x = row["Arithmetic_Intensity"]
|
|
343
|
+
y = row["GFLOPS"]
|
|
344
|
+
|
|
345
|
+
roof_priority = ["DRAM", "L3", "L2", "L1"]
|
|
346
|
+
roof_translation = {"DRAM": 4, "L3": 3, "L2": 2, "L1": 1}
|
|
347
|
+
|
|
348
|
+
if x <= 0 or y <= 0:
|
|
349
|
+
return 0
|
|
350
|
+
|
|
351
|
+
for level in roof_priority:
|
|
352
|
+
if level in roofs:
|
|
353
|
+
roof_y = roof_value_at_x(roofs[level], x)
|
|
354
|
+
if y < roof_y:
|
|
355
|
+
return roof_translation[level]
|
|
356
|
+
|
|
357
|
+
return 6 # Not below any roof
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
def calculate_roofline(values, min_ai):
|
|
361
|
+
aidots = [0] * 3
|
|
362
|
+
FPaidots = [0] * 2
|
|
363
|
+
FPgflopdots = [0] * 2
|
|
364
|
+
|
|
365
|
+
ai = np.linspace(min(0.00390625, min_ai), 256, num=200000)
|
|
366
|
+
cache_levels = ["L1", "L2", "L3", "DRAM"]
|
|
367
|
+
|
|
368
|
+
dots = {}
|
|
369
|
+
|
|
370
|
+
for cache_level in cache_levels:
|
|
371
|
+
if values[cache_levels.index(cache_level)] > 0:
|
|
372
|
+
aidots = [0, 0, 0]
|
|
373
|
+
# Compute the first point
|
|
374
|
+
y_values = carm_eq(ai, values[cache_levels.index(cache_level)], values[5])
|
|
375
|
+
|
|
376
|
+
# Find the point where y_values stops increasing or reaches a plateau
|
|
377
|
+
for i in range(1, len(y_values)):
|
|
378
|
+
if y_values[i - 1] == y_values[i]:
|
|
379
|
+
aidots[1] = float(ai[i - 1])
|
|
380
|
+
break
|
|
381
|
+
else:
|
|
382
|
+
aidots[1] = float(ai[-1])
|
|
383
|
+
i = len(y_values) - 12
|
|
384
|
+
|
|
385
|
+
mid_ai = np.sqrt(aidots[1] * min(0.00390625, min_ai))
|
|
386
|
+
mid_gflops = np.sqrt(y_values[0] * y_values[i - 1])
|
|
387
|
+
|
|
388
|
+
dots[cache_level] = {
|
|
389
|
+
"start": [min(0.00390625, min_ai), y_values[0]],
|
|
390
|
+
"mid": [mid_ai, mid_gflops],
|
|
391
|
+
"ridge": [aidots[1], y_values[i - 1]],
|
|
392
|
+
"end": [ai[-1], y_values[-1]],
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
for i in range(4):
|
|
396
|
+
if values[i]:
|
|
397
|
+
top_roof = values[i]
|
|
398
|
+
break
|
|
399
|
+
|
|
400
|
+
y_values = carm_eq(ai, top_roof, values[4])
|
|
401
|
+
|
|
402
|
+
for i in range(1, len(y_values)):
|
|
403
|
+
if y_values[i - 1] == y_values[i]:
|
|
404
|
+
FPaidots[0] = float(ai[i - 1])
|
|
405
|
+
break
|
|
406
|
+
FPgflopdots[0] = y_values[i - 1]
|
|
407
|
+
|
|
408
|
+
FPaidots[1] = ai[199999]
|
|
409
|
+
FPgflopdots[1] = y_values[199999]
|
|
410
|
+
|
|
411
|
+
dots[values[6]] = {
|
|
412
|
+
"ridge": [FPaidots[0], FPgflopdots[0]],
|
|
413
|
+
"end": [FPaidots[1], FPgflopdots[1]],
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
return dots
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
def plot_roofline(values, dots, name_suffix, ISA, line_legend, line_size):
|
|
420
|
+
aidots = [0] * 3
|
|
421
|
+
gflopdots = [0] * 3
|
|
422
|
+
|
|
423
|
+
traces = []
|
|
424
|
+
cache_levels = ["L1", "L2", "L3", "DRAM"]
|
|
425
|
+
if name_suffix == "":
|
|
426
|
+
colors = ["black", "black", "black", "black"]
|
|
427
|
+
color_inst = "black"
|
|
428
|
+
else:
|
|
429
|
+
colors = ["red", "red", "red", "red"]
|
|
430
|
+
color_inst = "red"
|
|
431
|
+
linestyles = ["solid", "solid", "dash", "dot"]
|
|
432
|
+
|
|
433
|
+
for cache_level, color, linestyle in zip(cache_levels, colors, linestyles, strict=True):
|
|
434
|
+
cache_dots = dots.get(cache_level)
|
|
435
|
+
if cache_dots:
|
|
436
|
+
aidots = [
|
|
437
|
+
cache_dots["start"][0],
|
|
438
|
+
cache_dots["ridge"][0],
|
|
439
|
+
cache_dots["end"][0],
|
|
440
|
+
]
|
|
441
|
+
gflopdots = [
|
|
442
|
+
cache_dots["start"][1],
|
|
443
|
+
cache_dots["ridge"][1],
|
|
444
|
+
cache_dots["end"][1],
|
|
445
|
+
]
|
|
446
|
+
trace = go.Scatter(
|
|
447
|
+
x=aidots,
|
|
448
|
+
y=gflopdots,
|
|
449
|
+
mode="lines",
|
|
450
|
+
text=[
|
|
451
|
+
"",
|
|
452
|
+
f"{cache_level} {ISA.upper()} Peak Bandwidth: {values[cache_levels.index(cache_level)]} GB/s",
|
|
453
|
+
f"FP FMA {ISA.upper()} Peak: {values[5]} GFLOP/s",
|
|
454
|
+
],
|
|
455
|
+
hovertemplate="<b>%{text}</b><br>(%{x}, %{y})<br><extra></extra>",
|
|
456
|
+
line={"color": color, "dash": linestyle, "width": line_size},
|
|
457
|
+
name=f"{cache_level} {ISA.upper()}",
|
|
458
|
+
showlegend=line_legend,
|
|
459
|
+
)
|
|
460
|
+
traces.append(trace)
|
|
461
|
+
|
|
462
|
+
aidots = [dots[values[6]]["ridge"][0], dots[values[6]]["end"][0]]
|
|
463
|
+
gflopdots = [dots[values[6]]["ridge"][1], dots[values[6]]["end"][1]]
|
|
464
|
+
|
|
465
|
+
trace_inst = go.Scatter(
|
|
466
|
+
x=aidots,
|
|
467
|
+
y=gflopdots,
|
|
468
|
+
mode="lines",
|
|
469
|
+
text=[
|
|
470
|
+
f"FP {ISA.upper()} {values[6].upper()} Peak: {values[4]} GFLOP/s",
|
|
471
|
+
f"FP {ISA.upper()} {values[6].upper()} Peak: {values[4]} GFLOP/s",
|
|
472
|
+
],
|
|
473
|
+
hovertemplate="<b>%{text}</b><br>(%{x}, %{y})<br><extra></extra>",
|
|
474
|
+
line={"color": color_inst, "dash": "dashdot", "width": line_size},
|
|
475
|
+
name=f"{values[6].upper()} {ISA.upper()}",
|
|
476
|
+
showlegend=line_legend,
|
|
477
|
+
)
|
|
478
|
+
traces.append(trace_inst)
|
|
479
|
+
|
|
480
|
+
return traces
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
def draw_annotation(
|
|
484
|
+
values,
|
|
485
|
+
lines,
|
|
486
|
+
name_suffix,
|
|
487
|
+
ISA,
|
|
488
|
+
cache_level,
|
|
489
|
+
graph_width,
|
|
490
|
+
graph_height,
|
|
491
|
+
x_range=None,
|
|
492
|
+
y_range=None,
|
|
493
|
+
):
|
|
494
|
+
aidots = [0] * 3
|
|
495
|
+
gflopdots = [0] * 3
|
|
496
|
+
annotation = {}
|
|
497
|
+
cache_levels = ["L1", "L2", "L3", "DRAM"]
|
|
498
|
+
angle_degrees = {}
|
|
499
|
+
|
|
500
|
+
if cache_level in cache_levels:
|
|
501
|
+
log_x1, log_x2 = (
|
|
502
|
+
math.log10(lines[cache_level]["start"][0]),
|
|
503
|
+
math.log10(lines[cache_level]["ridge"][0]),
|
|
504
|
+
)
|
|
505
|
+
log_y1, log_y2 = (
|
|
506
|
+
math.log10(lines[cache_level]["start"][1]),
|
|
507
|
+
math.log10(lines[cache_level]["ridge"][1]),
|
|
508
|
+
)
|
|
509
|
+
|
|
510
|
+
log_xmin, log_xmax = x_range[0], x_range[1]
|
|
511
|
+
log_ymin, log_ymax = y_range[0], y_range[1]
|
|
512
|
+
|
|
513
|
+
x1_pixel = ((log_x1 - log_xmin) / (log_xmax - log_xmin)) * graph_width
|
|
514
|
+
x2_pixel = ((log_x2 - log_xmin) / (log_xmax - log_xmin)) * graph_width
|
|
515
|
+
|
|
516
|
+
y1_pixel = graph_height - ((log_y1 - log_ymin) / (log_ymax - log_ymin)) * graph_height
|
|
517
|
+
y2_pixel = graph_height - ((log_y2 - log_ymin) / (log_ymax - log_ymin)) * graph_height
|
|
518
|
+
|
|
519
|
+
pixel_slope = (y2_pixel - y1_pixel) / (x2_pixel - x1_pixel)
|
|
520
|
+
|
|
521
|
+
angle_degrees[cache_level] = math.degrees(math.atan(pixel_slope))
|
|
522
|
+
|
|
523
|
+
ai = np.linspace(0.00390625, 256, num=200000)
|
|
524
|
+
|
|
525
|
+
if name_suffix == "1":
|
|
526
|
+
colors = ["black", "black", "black", "black"]
|
|
527
|
+
factor = 1.3
|
|
528
|
+
else:
|
|
529
|
+
colors = ["red", "red", "red", "red"]
|
|
530
|
+
factor = 0.7
|
|
531
|
+
|
|
532
|
+
if cache_level in cache_levels and values[cache_levels.index(cache_level)] > 0:
|
|
533
|
+
aidots[0] = 0.00390625
|
|
534
|
+
y_values = carm_eq(ai, values[cache_levels.index(cache_level)], values[5])
|
|
535
|
+
gflopdots[0] = y_values[0]
|
|
536
|
+
for i in range(1, len(y_values)):
|
|
537
|
+
if y_values[i - 1] == y_values[i]:
|
|
538
|
+
aidots[1] = float(ai[i - 1])
|
|
539
|
+
break
|
|
540
|
+
gflopdots[1] = y_values[i - 1]
|
|
541
|
+
|
|
542
|
+
annotation = go.layout.Annotation(
|
|
543
|
+
x=math.log10(lines[cache_level]["mid"][0] * factor),
|
|
544
|
+
y=math.log10(lines[cache_level]["mid"][1] * factor),
|
|
545
|
+
text=f"{cache_level} {ISA} Bandwidth: {values[cache_levels.index(cache_level)]:.3f} GB/s",
|
|
546
|
+
showarrow=False,
|
|
547
|
+
font={
|
|
548
|
+
"color": colors[0],
|
|
549
|
+
"size": 12,
|
|
550
|
+
},
|
|
551
|
+
align="center",
|
|
552
|
+
bgcolor="white",
|
|
553
|
+
bordercolor=colors[0],
|
|
554
|
+
borderwidth=1,
|
|
555
|
+
textangle=angle_degrees[cache_level],
|
|
556
|
+
name=f"{cache_level}_{name_suffix}",
|
|
557
|
+
)
|
|
558
|
+
|
|
559
|
+
if cache_level == "FMA" and values[5] > 0:
|
|
560
|
+
mid_ai = np.sqrt(lines["L1"]["ridge"][0] * lines["L1"]["end"][0])
|
|
561
|
+
mid_gflops = lines["L1"]["ridge"][1]
|
|
562
|
+
annotation = go.layout.Annotation(
|
|
563
|
+
x=math.log10(mid_ai),
|
|
564
|
+
y=math.log10(mid_gflops),
|
|
565
|
+
text=f"FP FMA {ISA} Peak: {values[5]:.3f} GFLOP/s",
|
|
566
|
+
showarrow=False,
|
|
567
|
+
font={
|
|
568
|
+
"color": colors[0],
|
|
569
|
+
"size": 12,
|
|
570
|
+
},
|
|
571
|
+
align="center",
|
|
572
|
+
bgcolor="white",
|
|
573
|
+
bordercolor=colors[0],
|
|
574
|
+
borderwidth=1,
|
|
575
|
+
textangle=0,
|
|
576
|
+
name=f"FP_FMA_{name_suffix}",
|
|
577
|
+
)
|
|
578
|
+
|
|
579
|
+
if cache_level == "FP" and values[4] > 0:
|
|
580
|
+
mid_ai = np.sqrt(lines["L1"]["ridge"][0] * lines["L1"]["end"][0])
|
|
581
|
+
mid_gflops = values[4]
|
|
582
|
+
annotation = go.layout.Annotation(
|
|
583
|
+
x=math.log10(mid_ai),
|
|
584
|
+
y=math.log10(mid_gflops),
|
|
585
|
+
text=f"FP {ISA} Peak: {values[4]:.3f} GFLOP/s",
|
|
586
|
+
showarrow=False,
|
|
587
|
+
font={
|
|
588
|
+
"color": colors[0],
|
|
589
|
+
"size": 12,
|
|
590
|
+
},
|
|
591
|
+
align="center",
|
|
592
|
+
bgcolor="white",
|
|
593
|
+
bordercolor=colors[0],
|
|
594
|
+
borderwidth=1,
|
|
595
|
+
textangle=0,
|
|
596
|
+
name=f"FP_{name_suffix}",
|
|
597
|
+
)
|
|
598
|
+
|
|
599
|
+
return annotation
|
|
600
|
+
|
|
601
|
+
|
|
602
|
+
def build_total_tooltip_text(name_app, threads_app, totals, total_FP_inst, total_mem_inst):
|
|
603
|
+
lines = [f"{name_app} Total</b><br>Extra Details</b><br> Threads: {threads_app}"]
|
|
604
|
+
|
|
605
|
+
metrics = {
|
|
606
|
+
"Scalar Flops": totals["Intel_FP_Scalar_SP"] + totals["Intel_FP_Scalar_DP"],
|
|
607
|
+
"SSE Flops": totals["Intel_FP_SSE_SP"] + totals["Intel_FP_SSE_DP"],
|
|
608
|
+
"AVX2 Flops": totals["Intel_FP_AVX2_SP"] + totals["Intel_FP_AVX2_DP"],
|
|
609
|
+
"AVX512 Flops": totals["Intel_FP_AVX512_SP"] + totals["Intel_FP_AVX512_DP"],
|
|
610
|
+
"SP Flops": (
|
|
611
|
+
totals["Intel_FP_Scalar_SP"]
|
|
612
|
+
+ totals["Intel_FP_SSE_SP"]
|
|
613
|
+
+ totals["Intel_FP_AVX2_SP"]
|
|
614
|
+
+ totals["Intel_FP_AVX512_SP"]
|
|
615
|
+
),
|
|
616
|
+
"DP Flops": (
|
|
617
|
+
totals["Intel_FP_Scalar_DP"]
|
|
618
|
+
+ totals["Intel_FP_SSE_DP"]
|
|
619
|
+
+ totals["Intel_FP_AVX2_DP"]
|
|
620
|
+
+ totals["Intel_FP_AVX512_DP"]
|
|
621
|
+
),
|
|
622
|
+
"Loads": totals["Intel_Loads"],
|
|
623
|
+
"Stores": totals["Intel_Stores"],
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
for label, value in metrics.items():
|
|
627
|
+
if value != 0:
|
|
628
|
+
if "Flops" in label:
|
|
629
|
+
percentage = custom_round((value / total_FP_inst) * 100, 1)
|
|
630
|
+
value_formatted = f"{value:.2e}"
|
|
631
|
+
lines.append(f"</b><br> {label}: {value_formatted} ({percentage}%)")
|
|
632
|
+
elif label in ["Loads", "Stores"]:
|
|
633
|
+
percentage = custom_round((value / total_mem_inst) * 100, 1)
|
|
634
|
+
value_formatted = f"{value:.2e}"
|
|
635
|
+
lines.append(f"</b><br> {label}: {value_formatted} ({percentage}%)")
|
|
636
|
+
|
|
637
|
+
tooltip_text = "</b>".join(lines)
|
|
638
|
+
return tooltip_text
|
|
639
|
+
|
|
640
|
+
|
|
641
|
+
def build_timestamp_tooltip_text(
|
|
642
|
+
scalar,
|
|
643
|
+
sse,
|
|
644
|
+
avx2,
|
|
645
|
+
avx512,
|
|
646
|
+
dp,
|
|
647
|
+
load,
|
|
648
|
+
timestamp_label,
|
|
649
|
+
thread_ID,
|
|
650
|
+
duration,
|
|
651
|
+
paraver_value,
|
|
652
|
+
paraver_label,
|
|
653
|
+
window_name=None,
|
|
654
|
+
):
|
|
655
|
+
metrics = [
|
|
656
|
+
("Scalar Flops", scalar),
|
|
657
|
+
("SSE Flops", sse),
|
|
658
|
+
("AVX2 Flops", avx2),
|
|
659
|
+
("AVX512 Flops", avx512),
|
|
660
|
+
("SP Flops", 100 - dp),
|
|
661
|
+
("DP Flops", dp),
|
|
662
|
+
("Loads", load),
|
|
663
|
+
("Stores", 100 - load),
|
|
664
|
+
]
|
|
665
|
+
tooltip_lines = [f"Timestamp: {timestamp_label}"]
|
|
666
|
+
tooltip_lines.append(f"</b><br> Thread: {thread_ID}</b><br> Duration(us): {duration}")
|
|
667
|
+
tooltip_lines.append("</b><br><b>Extra Details</b>")
|
|
668
|
+
|
|
669
|
+
for label, value in metrics:
|
|
670
|
+
if value > 0.1:
|
|
671
|
+
rounded_value = custom_round(value, 1)
|
|
672
|
+
tooltip_lines.append(f"</b><br> {label}: {rounded_value}%")
|
|
673
|
+
|
|
674
|
+
if window_name:
|
|
675
|
+
tooltip_lines.append("</b><br><b>Paraver Data</b>")
|
|
676
|
+
tooltip_lines.append(f"</b><br> Window: {window_name}")
|
|
677
|
+
tooltip_lines.append(f"</b><br> Value: {paraver_value}")
|
|
678
|
+
tooltip_lines.append(f"</b><br> Label: {paraver_label}")
|
|
679
|
+
|
|
680
|
+
tooltip_text = "".join(tooltip_lines)
|
|
681
|
+
return tooltip_text
|
|
682
|
+
|
|
683
|
+
|
|
684
|
+
def hsv_to_rgb(h, s, v):
|
|
685
|
+
i = int(h * 6)
|
|
686
|
+
f = h * 6 - i
|
|
687
|
+
p = v * (1 - s)
|
|
688
|
+
q = v * (1 - f * s)
|
|
689
|
+
t = v * (1 - (1 - f) * s)
|
|
690
|
+
|
|
691
|
+
i = i % 6
|
|
692
|
+
|
|
693
|
+
if i == 0:
|
|
694
|
+
r, g, b = v, t, p
|
|
695
|
+
elif i == 1:
|
|
696
|
+
r, g, b = q, v, p
|
|
697
|
+
elif i == 2:
|
|
698
|
+
r, g, b = p, v, t
|
|
699
|
+
elif i == 3:
|
|
700
|
+
r, g, b = p, q, v
|
|
701
|
+
elif i == 4:
|
|
702
|
+
r, g, b = t, p, v
|
|
703
|
+
elif i == 5:
|
|
704
|
+
r, g, b = v, p, q
|
|
705
|
+
|
|
706
|
+
return int(r * 255), int(g * 255), int(b * 255)
|
|
707
|
+
|
|
708
|
+
|
|
709
|
+
def hash_to_color(name):
|
|
710
|
+
isa_hash = int(hashlib.sha256(name.encode("utf-8")).hexdigest(), 16)
|
|
711
|
+
hue = isa_hash % 361
|
|
712
|
+
saturation = 0.8
|
|
713
|
+
value = 0.9
|
|
714
|
+
return hsv_to_rgb(hue / 360.0, saturation, value)
|
|
715
|
+
|
|
716
|
+
|
|
717
|
+
def blend_rgb(weights, color_dict, return_rgb):
|
|
718
|
+
if not weights:
|
|
719
|
+
return "#000000"
|
|
720
|
+
total = sum(weights.values())
|
|
721
|
+
normalized = {k: v / total for k, v in weights.items()}
|
|
722
|
+
|
|
723
|
+
r = g = b = 0
|
|
724
|
+
for k, w in normalized.items():
|
|
725
|
+
cr, cg, cb = color_dict[k]
|
|
726
|
+
r += cr * w
|
|
727
|
+
g += cg * w
|
|
728
|
+
b += cb * w
|
|
729
|
+
|
|
730
|
+
r = round(r)
|
|
731
|
+
g = round(g)
|
|
732
|
+
b = round(b)
|
|
733
|
+
if return_rgb:
|
|
734
|
+
return r, g, b
|
|
735
|
+
else:
|
|
736
|
+
return f"#{r:02x}{g:02x}{b:02x}"
|
|
737
|
+
|
|
738
|
+
|
|
739
|
+
def blend_colors(scalar, sse, avx2, avx512, dp, load, thread_ID, color_radio, return_rgb):
|
|
740
|
+
if color_radio == "ISA":
|
|
741
|
+
weights = {"scalar": scalar, "sse": sse, "avx2": avx2, "avx512": avx512}
|
|
742
|
+
active = {k: v for k, v in weights.items() if v > 0}
|
|
743
|
+
if not active:
|
|
744
|
+
if return_rgb:
|
|
745
|
+
return 0, 0, 0
|
|
746
|
+
else:
|
|
747
|
+
return "#000000"
|
|
748
|
+
|
|
749
|
+
isa_colors = {isa: color_map[intel_ISA_colors[isa]] for isa in active}
|
|
750
|
+
return blend_rgb(active, isa_colors, return_rgb)
|
|
751
|
+
|
|
752
|
+
elif color_radio == "Precision":
|
|
753
|
+
weights = {}
|
|
754
|
+
if dp > 0:
|
|
755
|
+
weights["dp"] = dp
|
|
756
|
+
weights["sp"] = 100 - dp
|
|
757
|
+
|
|
758
|
+
if not weights:
|
|
759
|
+
if return_rgb:
|
|
760
|
+
return 0, 0, 0
|
|
761
|
+
else:
|
|
762
|
+
return "#000000"
|
|
763
|
+
return blend_rgb(weights, precision_color_map, return_rgb)
|
|
764
|
+
|
|
765
|
+
elif color_radio == "LD/ST Percentage":
|
|
766
|
+
weights = {}
|
|
767
|
+
if load > 0:
|
|
768
|
+
weights["load"] = load
|
|
769
|
+
weights["store"] = 100 - load
|
|
770
|
+
|
|
771
|
+
if not weights:
|
|
772
|
+
if return_rgb:
|
|
773
|
+
return 0, 0, 0
|
|
774
|
+
else:
|
|
775
|
+
return "#000000"
|
|
776
|
+
|
|
777
|
+
return blend_rgb(weights, loadstore_color_map, return_rgb)
|
|
778
|
+
|
|
779
|
+
elif color_radio == "Thread ID":
|
|
780
|
+
r, g, b = hash_to_color(str(thread_ID))
|
|
781
|
+
return f"#{r:02x}{g:02x}{b:02x}"
|
|
782
|
+
|
|
783
|
+
else:
|
|
784
|
+
if return_rgb:
|
|
785
|
+
return 0, 0, 0
|
|
786
|
+
else:
|
|
787
|
+
return "#000000"
|
|
788
|
+
|
|
789
|
+
|
|
790
|
+
def group_consecutive_by_rgb(color_map_df):
|
|
791
|
+
grouped_data = []
|
|
792
|
+
current_group = []
|
|
793
|
+
|
|
794
|
+
for i in range(len(color_map_df)):
|
|
795
|
+
row = color_map_df.iloc[i]
|
|
796
|
+
|
|
797
|
+
if not current_group:
|
|
798
|
+
current_group.append(row)
|
|
799
|
+
else:
|
|
800
|
+
prev_row = current_group[-1]
|
|
801
|
+
if (row["r"], row["g"], row["b"]) == (
|
|
802
|
+
prev_row["r"],
|
|
803
|
+
prev_row["g"],
|
|
804
|
+
prev_row["b"],
|
|
805
|
+
):
|
|
806
|
+
current_group.append(row)
|
|
807
|
+
else:
|
|
808
|
+
grouped_data.append(process_group(current_group))
|
|
809
|
+
current_group = [row]
|
|
810
|
+
|
|
811
|
+
if current_group:
|
|
812
|
+
grouped_data.append(process_group(current_group))
|
|
813
|
+
|
|
814
|
+
return pd.DataFrame(grouped_data)
|
|
815
|
+
|
|
816
|
+
|
|
817
|
+
def process_group(group_rows):
|
|
818
|
+
if len(group_rows) == 1:
|
|
819
|
+
ratio = group_rows[0]["Load/Store_ratio"]
|
|
820
|
+
ratio_str = group_rows[0]["Load/Store_ratio_string"]
|
|
821
|
+
else:
|
|
822
|
+
min_ratio = min(r["Load/Store_ratio"] for r in group_rows)
|
|
823
|
+
max_ratio = max(r["Load/Store_ratio"] for r in group_rows)
|
|
824
|
+
ratio = f"{min_ratio:.6f}-{max_ratio:.6f}"
|
|
825
|
+
ratio_str = f"LD/ST: {ratio}"
|
|
826
|
+
|
|
827
|
+
return {
|
|
828
|
+
"Load/Store_ratio": ratio,
|
|
829
|
+
"Load/Store_ratio_string": ratio_str,
|
|
830
|
+
"r": group_rows[0]["r"],
|
|
831
|
+
"g": group_rows[0]["g"],
|
|
832
|
+
"b": group_rows[0]["b"],
|
|
833
|
+
}
|
|
834
|
+
|
|
835
|
+
|
|
836
|
+
def format_ld_st_csv(color_map_df, output_path):
|
|
837
|
+
with open(output_path, "w") as f:
|
|
838
|
+
for _, row in color_map_df.iterrows():
|
|
839
|
+
ratio = row["percentage"]
|
|
840
|
+
label = row["percentage_string"]
|
|
841
|
+
r, g, b = row["r"], row["g"], row["b"]
|
|
842
|
+
ratio_str = f"{ratio}"
|
|
843
|
+
|
|
844
|
+
line = f'{ratio_str} "{label}",{r},{g},{b}\n'
|
|
845
|
+
f.write(line)
|