carm-paraver 1.0.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. carm_paraver/GUI_utils.py +845 -0
  2. carm_paraver/Paraver_CARM.py +4050 -0
  3. carm_paraver/__init__.py +5 -0
  4. carm_paraver/__main__.py +14 -0
  5. carm_paraver/analysis_helpers.py +894 -0
  6. carm_paraver/assets/CARM_icon3.svg +4 -0
  7. carm_paraver/assets/CHAMP_logo.svg +44 -0
  8. carm_paraver/assets/__init__.py +0 -0
  9. carm_paraver/assets/bsc.svg +4 -0
  10. carm_paraver/assets/carm_bsc.png +0 -0
  11. carm_paraver/assets/carm_bsc.svg +4 -0
  12. carm_paraver/assets/menu_icon.png +0 -0
  13. carm_paraver/assets/style.css +45 -0
  14. carm_paraver/carm_results/__init__.py +0 -0
  15. carm_paraver/carm_results/roofline/MN5_roofline.csv +43 -0
  16. carm_paraver/carm_results/roofline/__init__.py +0 -0
  17. carm_paraver/paraver_carm_configs/Intel/Intel_FP_AVX2_DP.cfg +51 -0
  18. carm_paraver/paraver_carm_configs/Intel/Intel_FP_AVX2_SP.cfg +51 -0
  19. carm_paraver/paraver_carm_configs/Intel/Intel_FP_AVX512_DP.cfg +51 -0
  20. carm_paraver/paraver_carm_configs/Intel/Intel_FP_AVX512_SP.cfg +51 -0
  21. carm_paraver/paraver_carm_configs/Intel/Intel_FP_SSE_DP.cfg +51 -0
  22. carm_paraver/paraver_carm_configs/Intel/Intel_FP_SSE_SP.cfg +51 -0
  23. carm_paraver/paraver_carm_configs/Intel/Intel_FP_Scalar_DP.cfg +50 -0
  24. carm_paraver/paraver_carm_configs/Intel/Intel_FP_Scalar_SP.cfg +51 -0
  25. carm_paraver/paraver_carm_configs/Intel/Intel_Loads.cfg +51 -0
  26. carm_paraver/paraver_carm_configs/Intel/Intel_Stores.cfg +51 -0
  27. carm_paraver/paraver_carm_configs/Intel/__init__.py +0 -0
  28. carm_paraver/paraver_carm_configs/IntelV2/Intel_FP_AVX2_DP.cfg +51 -0
  29. carm_paraver/paraver_carm_configs/IntelV2/Intel_FP_AVX2_SP.cfg +50 -0
  30. carm_paraver/paraver_carm_configs/IntelV2/Intel_FP_AVX512_DP.cfg +51 -0
  31. carm_paraver/paraver_carm_configs/IntelV2/Intel_FP_AVX512_SP.cfg +50 -0
  32. carm_paraver/paraver_carm_configs/IntelV2/Intel_FP_SSE_DP.cfg +51 -0
  33. carm_paraver/paraver_carm_configs/IntelV2/Intel_FP_SSE_SP.cfg +51 -0
  34. carm_paraver/paraver_carm_configs/IntelV2/Intel_FP_Scalar_DP.cfg +50 -0
  35. carm_paraver/paraver_carm_configs/IntelV2/Intel_FP_Scalar_SP.cfg +50 -0
  36. carm_paraver/paraver_carm_configs/IntelV2/Intel_Loads.cfg +51 -0
  37. carm_paraver/paraver_carm_configs/IntelV2/Intel_Stores.cfg +50 -0
  38. carm_paraver/paraver_carm_configs/IntelV2/__init__.py +0 -0
  39. carm_paraver/paraver_carm_configs/Intel_CARM_DP.cfg +2388 -0
  40. carm_paraver/paraver_carm_configs/Intel_CARM_DPV2.cfg +2388 -0
  41. carm_paraver/paraver_carm_configs/Intel_CARM_DP_Extrae.xml +110 -0
  42. carm_paraver/paraver_carm_configs/Intel_CARM_SPV2.cfg +2388 -0
  43. carm_paraver/paraver_carm_configs/Intel_CARM_SP_Extrae.xml +110 -0
  44. carm_paraver/paraver_carm_configs/__init__.py +0 -0
  45. carm_paraver-1.0.0.dev0.dist-info/METADATA +140 -0
  46. carm_paraver-1.0.0.dev0.dist-info/RECORD +50 -0
  47. carm_paraver-1.0.0.dev0.dist-info/WHEEL +5 -0
  48. carm_paraver-1.0.0.dev0.dist-info/entry_points.txt +3 -0
  49. carm_paraver-1.0.0.dev0.dist-info/licenses/LICENSE +504 -0
  50. carm_paraver-1.0.0.dev0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,845 @@
1
+ import csv
2
+ import hashlib
3
+ import math
4
+ import os
5
+ import shutil
6
+ import subprocess
7
+ import sys
8
+
9
+ import numpy as np
10
+ import pandas as pd
11
+ import plotly.graph_objects as go
12
+
13
+ CONFIG_FILE = "./config/auto_config/config.txt"
14
+
15
+
16
+ intel_ISA_colors = {
17
+ "avx512": "blue",
18
+ "avx2": "green",
19
+ "sse": "purple",
20
+ "scalar": "black",
21
+ }
22
+ color_map = {
23
+ "blue": (0, 0, 255),
24
+ "green": (0, 255, 0),
25
+ "purple": (128, 0, 128),
26
+ "black": (0, 0, 0),
27
+ }
28
+
29
+ # Colors for SP/DP and Load/Store
30
+ precision_color_map = {
31
+ "sp": (0, 0, 255), # orange
32
+ "dp": (0, 255, 0), # red
33
+ }
34
+
35
+ loadstore_color_map = {"load": (0, 0, 255), "store": (255, 0, 0)}
36
+
37
+
38
+ def find_and_run(command):
39
+ cmd_path = shutil.which(command)
40
+ if cmd_path is None:
41
+ print(f"ERROR: '{command}' not found on PATH.", file=sys.stderr)
42
+ return False, None
43
+
44
+ try:
45
+ subprocess.run(cmd_path, check=True, capture_output=True, text=True)
46
+ return True
47
+
48
+ except subprocess.CalledProcessError as e:
49
+ print(f"'{command}' failed with exit {e.returncode}", file=sys.stderr)
50
+ print("STDERR:", e.stderr, file=sys.stderr)
51
+ return False, None
52
+
53
+
54
+ def carm_eq(ai, bw, fp):
55
+ return np.minimum(ai * bw, fp)
56
+
57
+
58
+ def custom_round(value, digits=4):
59
+ if value == 0:
60
+ return 0 # Directly return 0 if the value is 0
61
+ str_value = str(value)
62
+ if abs(value) >= 1 or "e" in str_value or "E" in str_value or "." not in str_value:
63
+ # For numbers greater than or equal to 1, round normally
64
+ return round(value, digits)
65
+
66
+ decimal_part = str_value.split(".")[1]
67
+ leading_zeros = 0
68
+ for char in decimal_part:
69
+ if char == "0":
70
+ leading_zeros += 1
71
+ else:
72
+ break
73
+
74
+ # Adjust the number of digits based on the position of the first significant digit
75
+ total_digits = digits + leading_zeros
76
+ return round(value, total_digits)
77
+
78
+
79
+ def is_valid_paraver_value(val):
80
+ val_str = str(val).strip()
81
+ if val_str == "":
82
+ return False
83
+ try:
84
+ return float(val_str) > 0
85
+ except ValueError:
86
+ return True
87
+
88
+
89
+ def find_nearest_positive(df, index, lower_filter, duration_filter, use_paraver_mask, min_bound=0):
90
+ max_index = len(df) - 1
91
+
92
+ def row_is_valid(i):
93
+ if use_paraver_mask:
94
+ return (
95
+ df.loc[i, "GFLOPS"] >= lower_filter
96
+ and df.loc[i, "Arithmetic_Intensity"] >= lower_filter
97
+ and df.loc[i, "Duration"] >= duration_filter
98
+ and is_valid_paraver_value(df.loc[i, "Paraver_Label"])
99
+ )
100
+
101
+ else:
102
+ return (
103
+ df.loc[i, "GFLOPS"] >= lower_filter
104
+ and df.loc[i, "Arithmetic_Intensity"] >= lower_filter
105
+ and df.loc[i, "Duration"] >= duration_filter
106
+ )
107
+
108
+ if index >= min_bound and row_is_valid(index):
109
+ return index
110
+ distance = 1
111
+ while (index - distance >= min_bound) or (index + distance <= max_index):
112
+ left_index = index - distance
113
+ if left_index >= min_bound and row_is_valid(left_index):
114
+ return left_index
115
+
116
+ right_index = index + distance
117
+ if right_index <= max_index and row_is_valid(right_index):
118
+ return right_index
119
+
120
+ distance += 1
121
+ raise ValueError(
122
+ f"No row with valid GFLOPS, Arithmetic_Intensity, and Duration found from index {index} with lower bound {min_bound}."
123
+ )
124
+
125
+
126
+ def read_library_path(tag):
127
+ if os.path.exists(CONFIG_FILE):
128
+ with open(CONFIG_FILE) as file:
129
+ for line in file:
130
+ if line.strip() == "":
131
+ continue
132
+ parts = line.strip().split("=")
133
+ if len(parts) == 2:
134
+ key, value = parts
135
+ if key == tag:
136
+ return value
137
+ return None
138
+
139
+
140
+ def write_library_path(tag, path):
141
+ with open(CONFIG_FILE, "a") as file:
142
+ file.write(f"{tag}={path}\n")
143
+
144
+
145
+ def read_csv_file(file_path):
146
+ data_list = []
147
+ with open(file_path, newline="") as csvfile:
148
+ reader = csv.reader(csvfile)
149
+ header = next(reader)
150
+ machine_name = header[1]
151
+ l1_size = int(header[3])
152
+ l2_size = int(header[5])
153
+ l3_size = int(header[7])
154
+
155
+ _header2 = next(reader)
156
+ for row in reader:
157
+ if not row or not "".join(row).strip():
158
+ continue
159
+ data = {}
160
+ data["Date"] = row[0]
161
+ data["ISA"] = row[1]
162
+ data["Precision"] = row[2]
163
+ data["Threads"] = int(row[3])
164
+ data["Loads"] = int(row[4])
165
+ data["Stores"] = int(row[5])
166
+ data["Interleaved"] = row[6]
167
+ data["DRAMBytes"] = int(row[7])
168
+ data["FPInst"] = row[8]
169
+ data["L1"] = float(row[9])
170
+ data["L2"] = float(row[11])
171
+ data["L3"] = float(row[13])
172
+ data["DRAM"] = float(row[15])
173
+ data["FP"] = float(row[17])
174
+ data["FP_FMA"] = float(row[19])
175
+ data_list.append(data)
176
+
177
+ return machine_name, l1_size, l2_size, l3_size, data_list
178
+
179
+
180
+ def natural_key_for_dotstring(val):
181
+ """Convert a dotted string like '1.10.2' into a tuple of ints for natural sorting.
182
+
183
+ If the value isn't a string or any piece cannot be converted to an integer, the
184
+ original value (or tuple of parts) is returned so that sorting still works in a
185
+ deterministic way.
186
+ """
187
+ if isinstance(val, str):
188
+ parts = val.split(".")
189
+ try:
190
+ return tuple(int(p) for p in parts)
191
+ except ValueError:
192
+ # some part wasn't numeric; fall back to strings
193
+ return tuple(parts)
194
+ return val
195
+
196
+
197
+ def natural_sort_series(series):
198
+ """Return a transformed Series suitable for use as a ``key`` in :func:`sort_values`.
199
+
200
+ This will map each element through :func:`natural_key_for_dotstring`.
201
+ """
202
+ return series.map(natural_key_for_dotstring)
203
+
204
+
205
+ def read_application_csv_file(file_path):
206
+ if not os.path.exists(file_path):
207
+ print("Application file does not exist:", file_path)
208
+ return False
209
+
210
+ data_list = []
211
+ try:
212
+ with open(file_path, newline="") as csvfile:
213
+ reader = csv.reader(csvfile)
214
+ header = next(reader, None)
215
+
216
+ if header is None:
217
+ print("File is empty:", file_path)
218
+ return False
219
+
220
+ for row in reader:
221
+ if row:
222
+ data = {
223
+ "Date": row[0],
224
+ "Method": row[1],
225
+ "Name": row[2],
226
+ "ISA": row[3],
227
+ "Precision": row[4],
228
+ "Threads": row[5],
229
+ "AI": float(row[6]),
230
+ "GFLOPS": float(row[7]),
231
+ "Bandwidth": float(row[8]),
232
+ "Time": float(row[9]),
233
+ }
234
+ data_list.append(data)
235
+
236
+ except Exception as e:
237
+ print("Failed to read the file:", file_path, "Error:", e)
238
+ return False
239
+ return data_list if data_list else False
240
+
241
+
242
+ def ensure_list(marker_dict, attr_name, default_value, n_points):
243
+ # If marker[attr_name] doesn't exist or is not a list, convert it to a repeated list.
244
+ if attr_name not in marker_dict:
245
+ return [default_value] * n_points
246
+
247
+ val = marker_dict[attr_name]
248
+ if isinstance(val, list):
249
+ return val
250
+ else:
251
+ return [val] * n_points
252
+
253
+
254
+ def make_power_of_two_ticks(min_val, max_val):
255
+ min_val = max(min_val, 0.0000000001)
256
+ max_val = max(max_val, 0.0000000001)
257
+ start_exp = math.floor(math.log2(min_val))
258
+ end_exp = math.ceil(math.log2(max_val))
259
+ tickvals = [2**i for i in range(start_exp, end_exp + 1)]
260
+ ticktext = [f"2<sup>{i}</sup>" for i in range(start_exp, end_exp + 1)]
261
+ return tickvals, ticktext
262
+
263
+
264
+ def extract_last_segment(s):
265
+ return s.split("_")[-1] if "_" in s else s
266
+
267
+
268
+ def extract_prefix(s):
269
+ if "_" in s:
270
+ return s.rsplit("_", 1)[0]
271
+ return s
272
+
273
+
274
+ def interpolate_color(start_color, end_color, factor):
275
+ r = int(start_color[0] + factor * (end_color[0] - start_color[0]))
276
+ g = int(start_color[1] + factor * (end_color[1] - start_color[1]))
277
+ b = int(start_color[2] + factor * (end_color[2] - start_color[2]))
278
+ return f"rgb({r},{g},{b})"
279
+
280
+
281
+ def construct_query(filters):
282
+ field_specs = [
283
+ ("ISA", True),
284
+ ("Precision", True), # string values should be quoted in the query
285
+ ("Threads", False), # numeric values should not be quoted
286
+ ("Loads", False),
287
+ ("Stores", False),
288
+ ("Interleaved", True),
289
+ ("DRAMBytes", False),
290
+ ("FPInst", True),
291
+ ("Date", True),
292
+ ]
293
+
294
+ query_parts = []
295
+ for field_name, quote_value in field_specs:
296
+ if value := filters.get(field_name):
297
+ formatted = f"'{value}'" if quote_value else value
298
+ query_parts.append(f"{field_name} == {formatted}")
299
+
300
+ return " and ".join(query_parts) if query_parts else None
301
+
302
+
303
+ def construct_query_timestamp(df, ISA_list, Precision_list, Threads_list):
304
+ selected_columns = []
305
+ for isa in ISA_list:
306
+ for precision in Precision_list:
307
+ column_name = f"Intel_FP_{isa}_{precision}"
308
+ if column_name in df.columns:
309
+ selected_columns.append(column_name)
310
+
311
+ if not selected_columns:
312
+ print("No matching columns found for the selected ISA and Precision.")
313
+ return pd.DataFrame()
314
+
315
+ condition = (df[selected_columns] > 1).any(axis=1)
316
+ thread_condition = df["ThreadID"].isin(Threads_list)
317
+ combined_condition = condition & thread_condition
318
+ df_filtered = df[combined_condition]
319
+
320
+ return df_filtered
321
+
322
+
323
+ def roof_value_at_x(roof, x):
324
+ start, ridge, end = roof["start"], roof["ridge"], roof["end"]
325
+ if x <= ridge[0]:
326
+ if ridge[0] == start[0]:
327
+ return start[1]
328
+ slope = (ridge[1] - start[1]) / (ridge[0] - start[0])
329
+ return start[1] + slope * (x - start[0])
330
+ else:
331
+ if end[0] == ridge[0]:
332
+ return ridge[1]
333
+ slope = (end[1] - ridge[1]) / (end[0] - ridge[0])
334
+ return ridge[1] + slope * (x - ridge[0])
335
+
336
+
337
+ def label_cache_level(row, roofs):
338
+ """
339
+ Determine the cache level at which a performance point (Arithmetic Intensity, GFLOPS)
340
+ lies below the roofline. The first matching roof (highest bandwidth) is returned.
341
+ """
342
+ x = row["Arithmetic_Intensity"]
343
+ y = row["GFLOPS"]
344
+
345
+ roof_priority = ["DRAM", "L3", "L2", "L1"]
346
+ roof_translation = {"DRAM": 4, "L3": 3, "L2": 2, "L1": 1}
347
+
348
+ if x <= 0 or y <= 0:
349
+ return 0
350
+
351
+ for level in roof_priority:
352
+ if level in roofs:
353
+ roof_y = roof_value_at_x(roofs[level], x)
354
+ if y < roof_y:
355
+ return roof_translation[level]
356
+
357
+ return 6 # Not below any roof
358
+
359
+
360
+ def calculate_roofline(values, min_ai):
361
+ aidots = [0] * 3
362
+ FPaidots = [0] * 2
363
+ FPgflopdots = [0] * 2
364
+
365
+ ai = np.linspace(min(0.00390625, min_ai), 256, num=200000)
366
+ cache_levels = ["L1", "L2", "L3", "DRAM"]
367
+
368
+ dots = {}
369
+
370
+ for cache_level in cache_levels:
371
+ if values[cache_levels.index(cache_level)] > 0:
372
+ aidots = [0, 0, 0]
373
+ # Compute the first point
374
+ y_values = carm_eq(ai, values[cache_levels.index(cache_level)], values[5])
375
+
376
+ # Find the point where y_values stops increasing or reaches a plateau
377
+ for i in range(1, len(y_values)):
378
+ if y_values[i - 1] == y_values[i]:
379
+ aidots[1] = float(ai[i - 1])
380
+ break
381
+ else:
382
+ aidots[1] = float(ai[-1])
383
+ i = len(y_values) - 12
384
+
385
+ mid_ai = np.sqrt(aidots[1] * min(0.00390625, min_ai))
386
+ mid_gflops = np.sqrt(y_values[0] * y_values[i - 1])
387
+
388
+ dots[cache_level] = {
389
+ "start": [min(0.00390625, min_ai), y_values[0]],
390
+ "mid": [mid_ai, mid_gflops],
391
+ "ridge": [aidots[1], y_values[i - 1]],
392
+ "end": [ai[-1], y_values[-1]],
393
+ }
394
+
395
+ for i in range(4):
396
+ if values[i]:
397
+ top_roof = values[i]
398
+ break
399
+
400
+ y_values = carm_eq(ai, top_roof, values[4])
401
+
402
+ for i in range(1, len(y_values)):
403
+ if y_values[i - 1] == y_values[i]:
404
+ FPaidots[0] = float(ai[i - 1])
405
+ break
406
+ FPgflopdots[0] = y_values[i - 1]
407
+
408
+ FPaidots[1] = ai[199999]
409
+ FPgflopdots[1] = y_values[199999]
410
+
411
+ dots[values[6]] = {
412
+ "ridge": [FPaidots[0], FPgflopdots[0]],
413
+ "end": [FPaidots[1], FPgflopdots[1]],
414
+ }
415
+
416
+ return dots
417
+
418
+
419
+ def plot_roofline(values, dots, name_suffix, ISA, line_legend, line_size):
420
+ aidots = [0] * 3
421
+ gflopdots = [0] * 3
422
+
423
+ traces = []
424
+ cache_levels = ["L1", "L2", "L3", "DRAM"]
425
+ if name_suffix == "":
426
+ colors = ["black", "black", "black", "black"]
427
+ color_inst = "black"
428
+ else:
429
+ colors = ["red", "red", "red", "red"]
430
+ color_inst = "red"
431
+ linestyles = ["solid", "solid", "dash", "dot"]
432
+
433
+ for cache_level, color, linestyle in zip(cache_levels, colors, linestyles, strict=True):
434
+ cache_dots = dots.get(cache_level)
435
+ if cache_dots:
436
+ aidots = [
437
+ cache_dots["start"][0],
438
+ cache_dots["ridge"][0],
439
+ cache_dots["end"][0],
440
+ ]
441
+ gflopdots = [
442
+ cache_dots["start"][1],
443
+ cache_dots["ridge"][1],
444
+ cache_dots["end"][1],
445
+ ]
446
+ trace = go.Scatter(
447
+ x=aidots,
448
+ y=gflopdots,
449
+ mode="lines",
450
+ text=[
451
+ "",
452
+ f"{cache_level} {ISA.upper()} Peak Bandwidth: {values[cache_levels.index(cache_level)]} GB/s",
453
+ f"FP FMA {ISA.upper()} Peak: {values[5]} GFLOP/s",
454
+ ],
455
+ hovertemplate="<b>%{text}</b><br>(%{x}, %{y})<br><extra></extra>",
456
+ line={"color": color, "dash": linestyle, "width": line_size},
457
+ name=f"{cache_level} {ISA.upper()}",
458
+ showlegend=line_legend,
459
+ )
460
+ traces.append(trace)
461
+
462
+ aidots = [dots[values[6]]["ridge"][0], dots[values[6]]["end"][0]]
463
+ gflopdots = [dots[values[6]]["ridge"][1], dots[values[6]]["end"][1]]
464
+
465
+ trace_inst = go.Scatter(
466
+ x=aidots,
467
+ y=gflopdots,
468
+ mode="lines",
469
+ text=[
470
+ f"FP {ISA.upper()} {values[6].upper()} Peak: {values[4]} GFLOP/s",
471
+ f"FP {ISA.upper()} {values[6].upper()} Peak: {values[4]} GFLOP/s",
472
+ ],
473
+ hovertemplate="<b>%{text}</b><br>(%{x}, %{y})<br><extra></extra>",
474
+ line={"color": color_inst, "dash": "dashdot", "width": line_size},
475
+ name=f"{values[6].upper()} {ISA.upper()}",
476
+ showlegend=line_legend,
477
+ )
478
+ traces.append(trace_inst)
479
+
480
+ return traces
481
+
482
+
483
+ def draw_annotation(
484
+ values,
485
+ lines,
486
+ name_suffix,
487
+ ISA,
488
+ cache_level,
489
+ graph_width,
490
+ graph_height,
491
+ x_range=None,
492
+ y_range=None,
493
+ ):
494
+ aidots = [0] * 3
495
+ gflopdots = [0] * 3
496
+ annotation = {}
497
+ cache_levels = ["L1", "L2", "L3", "DRAM"]
498
+ angle_degrees = {}
499
+
500
+ if cache_level in cache_levels:
501
+ log_x1, log_x2 = (
502
+ math.log10(lines[cache_level]["start"][0]),
503
+ math.log10(lines[cache_level]["ridge"][0]),
504
+ )
505
+ log_y1, log_y2 = (
506
+ math.log10(lines[cache_level]["start"][1]),
507
+ math.log10(lines[cache_level]["ridge"][1]),
508
+ )
509
+
510
+ log_xmin, log_xmax = x_range[0], x_range[1]
511
+ log_ymin, log_ymax = y_range[0], y_range[1]
512
+
513
+ x1_pixel = ((log_x1 - log_xmin) / (log_xmax - log_xmin)) * graph_width
514
+ x2_pixel = ((log_x2 - log_xmin) / (log_xmax - log_xmin)) * graph_width
515
+
516
+ y1_pixel = graph_height - ((log_y1 - log_ymin) / (log_ymax - log_ymin)) * graph_height
517
+ y2_pixel = graph_height - ((log_y2 - log_ymin) / (log_ymax - log_ymin)) * graph_height
518
+
519
+ pixel_slope = (y2_pixel - y1_pixel) / (x2_pixel - x1_pixel)
520
+
521
+ angle_degrees[cache_level] = math.degrees(math.atan(pixel_slope))
522
+
523
+ ai = np.linspace(0.00390625, 256, num=200000)
524
+
525
+ if name_suffix == "1":
526
+ colors = ["black", "black", "black", "black"]
527
+ factor = 1.3
528
+ else:
529
+ colors = ["red", "red", "red", "red"]
530
+ factor = 0.7
531
+
532
+ if cache_level in cache_levels and values[cache_levels.index(cache_level)] > 0:
533
+ aidots[0] = 0.00390625
534
+ y_values = carm_eq(ai, values[cache_levels.index(cache_level)], values[5])
535
+ gflopdots[0] = y_values[0]
536
+ for i in range(1, len(y_values)):
537
+ if y_values[i - 1] == y_values[i]:
538
+ aidots[1] = float(ai[i - 1])
539
+ break
540
+ gflopdots[1] = y_values[i - 1]
541
+
542
+ annotation = go.layout.Annotation(
543
+ x=math.log10(lines[cache_level]["mid"][0] * factor),
544
+ y=math.log10(lines[cache_level]["mid"][1] * factor),
545
+ text=f"{cache_level} {ISA} Bandwidth: {values[cache_levels.index(cache_level)]:.3f} GB/s",
546
+ showarrow=False,
547
+ font={
548
+ "color": colors[0],
549
+ "size": 12,
550
+ },
551
+ align="center",
552
+ bgcolor="white",
553
+ bordercolor=colors[0],
554
+ borderwidth=1,
555
+ textangle=angle_degrees[cache_level],
556
+ name=f"{cache_level}_{name_suffix}",
557
+ )
558
+
559
+ if cache_level == "FMA" and values[5] > 0:
560
+ mid_ai = np.sqrt(lines["L1"]["ridge"][0] * lines["L1"]["end"][0])
561
+ mid_gflops = lines["L1"]["ridge"][1]
562
+ annotation = go.layout.Annotation(
563
+ x=math.log10(mid_ai),
564
+ y=math.log10(mid_gflops),
565
+ text=f"FP FMA {ISA} Peak: {values[5]:.3f} GFLOP/s",
566
+ showarrow=False,
567
+ font={
568
+ "color": colors[0],
569
+ "size": 12,
570
+ },
571
+ align="center",
572
+ bgcolor="white",
573
+ bordercolor=colors[0],
574
+ borderwidth=1,
575
+ textangle=0,
576
+ name=f"FP_FMA_{name_suffix}",
577
+ )
578
+
579
+ if cache_level == "FP" and values[4] > 0:
580
+ mid_ai = np.sqrt(lines["L1"]["ridge"][0] * lines["L1"]["end"][0])
581
+ mid_gflops = values[4]
582
+ annotation = go.layout.Annotation(
583
+ x=math.log10(mid_ai),
584
+ y=math.log10(mid_gflops),
585
+ text=f"FP {ISA} Peak: {values[4]:.3f} GFLOP/s",
586
+ showarrow=False,
587
+ font={
588
+ "color": colors[0],
589
+ "size": 12,
590
+ },
591
+ align="center",
592
+ bgcolor="white",
593
+ bordercolor=colors[0],
594
+ borderwidth=1,
595
+ textangle=0,
596
+ name=f"FP_{name_suffix}",
597
+ )
598
+
599
+ return annotation
600
+
601
+
602
+ def build_total_tooltip_text(name_app, threads_app, totals, total_FP_inst, total_mem_inst):
603
+ lines = [f"{name_app} Total</b><br>Extra Details</b><br> Threads: {threads_app}"]
604
+
605
+ metrics = {
606
+ "Scalar Flops": totals["Intel_FP_Scalar_SP"] + totals["Intel_FP_Scalar_DP"],
607
+ "SSE Flops": totals["Intel_FP_SSE_SP"] + totals["Intel_FP_SSE_DP"],
608
+ "AVX2 Flops": totals["Intel_FP_AVX2_SP"] + totals["Intel_FP_AVX2_DP"],
609
+ "AVX512 Flops": totals["Intel_FP_AVX512_SP"] + totals["Intel_FP_AVX512_DP"],
610
+ "SP Flops": (
611
+ totals["Intel_FP_Scalar_SP"]
612
+ + totals["Intel_FP_SSE_SP"]
613
+ + totals["Intel_FP_AVX2_SP"]
614
+ + totals["Intel_FP_AVX512_SP"]
615
+ ),
616
+ "DP Flops": (
617
+ totals["Intel_FP_Scalar_DP"]
618
+ + totals["Intel_FP_SSE_DP"]
619
+ + totals["Intel_FP_AVX2_DP"]
620
+ + totals["Intel_FP_AVX512_DP"]
621
+ ),
622
+ "Loads": totals["Intel_Loads"],
623
+ "Stores": totals["Intel_Stores"],
624
+ }
625
+
626
+ for label, value in metrics.items():
627
+ if value != 0:
628
+ if "Flops" in label:
629
+ percentage = custom_round((value / total_FP_inst) * 100, 1)
630
+ value_formatted = f"{value:.2e}"
631
+ lines.append(f"</b><br> {label}: {value_formatted} ({percentage}%)")
632
+ elif label in ["Loads", "Stores"]:
633
+ percentage = custom_round((value / total_mem_inst) * 100, 1)
634
+ value_formatted = f"{value:.2e}"
635
+ lines.append(f"</b><br> {label}: {value_formatted} ({percentage}%)")
636
+
637
+ tooltip_text = "</b>".join(lines)
638
+ return tooltip_text
639
+
640
+
641
+ def build_timestamp_tooltip_text(
642
+ scalar,
643
+ sse,
644
+ avx2,
645
+ avx512,
646
+ dp,
647
+ load,
648
+ timestamp_label,
649
+ thread_ID,
650
+ duration,
651
+ paraver_value,
652
+ paraver_label,
653
+ window_name=None,
654
+ ):
655
+ metrics = [
656
+ ("Scalar Flops", scalar),
657
+ ("SSE Flops", sse),
658
+ ("AVX2 Flops", avx2),
659
+ ("AVX512 Flops", avx512),
660
+ ("SP Flops", 100 - dp),
661
+ ("DP Flops", dp),
662
+ ("Loads", load),
663
+ ("Stores", 100 - load),
664
+ ]
665
+ tooltip_lines = [f"Timestamp: {timestamp_label}"]
666
+ tooltip_lines.append(f"</b><br> Thread: {thread_ID}</b><br> Duration(us): {duration}")
667
+ tooltip_lines.append("</b><br><b>Extra Details</b>")
668
+
669
+ for label, value in metrics:
670
+ if value > 0.1:
671
+ rounded_value = custom_round(value, 1)
672
+ tooltip_lines.append(f"</b><br> {label}: {rounded_value}%")
673
+
674
+ if window_name:
675
+ tooltip_lines.append("</b><br><b>Paraver Data</b>")
676
+ tooltip_lines.append(f"</b><br> Window: {window_name}")
677
+ tooltip_lines.append(f"</b><br> Value: {paraver_value}")
678
+ tooltip_lines.append(f"</b><br> Label: {paraver_label}")
679
+
680
+ tooltip_text = "".join(tooltip_lines)
681
+ return tooltip_text
682
+
683
+
684
+ def hsv_to_rgb(h, s, v):
685
+ i = int(h * 6)
686
+ f = h * 6 - i
687
+ p = v * (1 - s)
688
+ q = v * (1 - f * s)
689
+ t = v * (1 - (1 - f) * s)
690
+
691
+ i = i % 6
692
+
693
+ if i == 0:
694
+ r, g, b = v, t, p
695
+ elif i == 1:
696
+ r, g, b = q, v, p
697
+ elif i == 2:
698
+ r, g, b = p, v, t
699
+ elif i == 3:
700
+ r, g, b = p, q, v
701
+ elif i == 4:
702
+ r, g, b = t, p, v
703
+ elif i == 5:
704
+ r, g, b = v, p, q
705
+
706
+ return int(r * 255), int(g * 255), int(b * 255)
707
+
708
+
709
+ def hash_to_color(name):
710
+ isa_hash = int(hashlib.sha256(name.encode("utf-8")).hexdigest(), 16)
711
+ hue = isa_hash % 361
712
+ saturation = 0.8
713
+ value = 0.9
714
+ return hsv_to_rgb(hue / 360.0, saturation, value)
715
+
716
+
717
+ def blend_rgb(weights, color_dict, return_rgb):
718
+ if not weights:
719
+ return "#000000"
720
+ total = sum(weights.values())
721
+ normalized = {k: v / total for k, v in weights.items()}
722
+
723
+ r = g = b = 0
724
+ for k, w in normalized.items():
725
+ cr, cg, cb = color_dict[k]
726
+ r += cr * w
727
+ g += cg * w
728
+ b += cb * w
729
+
730
+ r = round(r)
731
+ g = round(g)
732
+ b = round(b)
733
+ if return_rgb:
734
+ return r, g, b
735
+ else:
736
+ return f"#{r:02x}{g:02x}{b:02x}"
737
+
738
+
739
+ def blend_colors(scalar, sse, avx2, avx512, dp, load, thread_ID, color_radio, return_rgb):
740
+ if color_radio == "ISA":
741
+ weights = {"scalar": scalar, "sse": sse, "avx2": avx2, "avx512": avx512}
742
+ active = {k: v for k, v in weights.items() if v > 0}
743
+ if not active:
744
+ if return_rgb:
745
+ return 0, 0, 0
746
+ else:
747
+ return "#000000"
748
+
749
+ isa_colors = {isa: color_map[intel_ISA_colors[isa]] for isa in active}
750
+ return blend_rgb(active, isa_colors, return_rgb)
751
+
752
+ elif color_radio == "Precision":
753
+ weights = {}
754
+ if dp > 0:
755
+ weights["dp"] = dp
756
+ weights["sp"] = 100 - dp
757
+
758
+ if not weights:
759
+ if return_rgb:
760
+ return 0, 0, 0
761
+ else:
762
+ return "#000000"
763
+ return blend_rgb(weights, precision_color_map, return_rgb)
764
+
765
+ elif color_radio == "LD/ST Percentage":
766
+ weights = {}
767
+ if load > 0:
768
+ weights["load"] = load
769
+ weights["store"] = 100 - load
770
+
771
+ if not weights:
772
+ if return_rgb:
773
+ return 0, 0, 0
774
+ else:
775
+ return "#000000"
776
+
777
+ return blend_rgb(weights, loadstore_color_map, return_rgb)
778
+
779
+ elif color_radio == "Thread ID":
780
+ r, g, b = hash_to_color(str(thread_ID))
781
+ return f"#{r:02x}{g:02x}{b:02x}"
782
+
783
+ else:
784
+ if return_rgb:
785
+ return 0, 0, 0
786
+ else:
787
+ return "#000000"
788
+
789
+
790
+ def group_consecutive_by_rgb(color_map_df):
791
+ grouped_data = []
792
+ current_group = []
793
+
794
+ for i in range(len(color_map_df)):
795
+ row = color_map_df.iloc[i]
796
+
797
+ if not current_group:
798
+ current_group.append(row)
799
+ else:
800
+ prev_row = current_group[-1]
801
+ if (row["r"], row["g"], row["b"]) == (
802
+ prev_row["r"],
803
+ prev_row["g"],
804
+ prev_row["b"],
805
+ ):
806
+ current_group.append(row)
807
+ else:
808
+ grouped_data.append(process_group(current_group))
809
+ current_group = [row]
810
+
811
+ if current_group:
812
+ grouped_data.append(process_group(current_group))
813
+
814
+ return pd.DataFrame(grouped_data)
815
+
816
+
817
+ def process_group(group_rows):
818
+ if len(group_rows) == 1:
819
+ ratio = group_rows[0]["Load/Store_ratio"]
820
+ ratio_str = group_rows[0]["Load/Store_ratio_string"]
821
+ else:
822
+ min_ratio = min(r["Load/Store_ratio"] for r in group_rows)
823
+ max_ratio = max(r["Load/Store_ratio"] for r in group_rows)
824
+ ratio = f"{min_ratio:.6f}-{max_ratio:.6f}"
825
+ ratio_str = f"LD/ST: {ratio}"
826
+
827
+ return {
828
+ "Load/Store_ratio": ratio,
829
+ "Load/Store_ratio_string": ratio_str,
830
+ "r": group_rows[0]["r"],
831
+ "g": group_rows[0]["g"],
832
+ "b": group_rows[0]["b"],
833
+ }
834
+
835
+
836
+ def format_ld_st_csv(color_map_df, output_path):
837
+ with open(output_path, "w") as f:
838
+ for _, row in color_map_df.iterrows():
839
+ ratio = row["percentage"]
840
+ label = row["percentage_string"]
841
+ r, g, b = row["r"], row["g"], row["b"]
842
+ ratio_str = f"{ratio}"
843
+
844
+ line = f'{ratio_str} "{label}",{r},{g},{b}\n'
845
+ f.write(line)