carm-paraver 1.0.0.dev0__tar.gz → 1.0.0.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. carm_paraver-1.0.0.dev1/PKG-INFO +168 -0
  2. carm_paraver-1.0.0.dev1/README.md +151 -0
  3. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/GUI_utils.py +24 -2
  4. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/Paraver_CARM.py +76 -19
  5. carm_paraver-1.0.0.dev1/carm_paraver.egg-info/PKG-INFO +168 -0
  6. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver.egg-info/SOURCES.txt +1 -3
  7. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver.egg-info/requires.txt +1 -0
  8. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver.egg-info/top_level.txt +1 -0
  9. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/pyproject.toml +3 -2
  10. carm_paraver-1.0.0.dev0/PKG-INFO +0 -140
  11. carm_paraver-1.0.0.dev0/README.md +0 -124
  12. carm_paraver-1.0.0.dev0/carm_paraver/paraver_carm_configs/IntelV2/__init__.py +0 -0
  13. carm_paraver-1.0.0.dev0/carm_paraver/paraver_carm_configs/__init__.py +0 -0
  14. carm_paraver-1.0.0.dev0/carm_paraver.egg-info/PKG-INFO +0 -140
  15. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/LICENSE +0 -0
  16. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/__init__.py +0 -0
  17. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/__main__.py +0 -0
  18. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/analysis_helpers.py +0 -0
  19. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/assets/CARM_icon3.svg +0 -0
  20. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/assets/CHAMP_logo.svg +0 -0
  21. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/assets/__init__.py +0 -0
  22. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/assets/bsc.svg +0 -0
  23. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/assets/carm_bsc.png +0 -0
  24. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/assets/carm_bsc.svg +0 -0
  25. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/assets/menu_icon.png +0 -0
  26. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/assets/style.css +0 -0
  27. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/paraver_carm_configs/Intel/Intel_FP_AVX2_DP.cfg +0 -0
  28. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/paraver_carm_configs/Intel/Intel_FP_AVX2_SP.cfg +0 -0
  29. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/paraver_carm_configs/Intel/Intel_FP_AVX512_DP.cfg +0 -0
  30. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/paraver_carm_configs/Intel/Intel_FP_AVX512_SP.cfg +0 -0
  31. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/paraver_carm_configs/Intel/Intel_FP_SSE_DP.cfg +0 -0
  32. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/paraver_carm_configs/Intel/Intel_FP_SSE_SP.cfg +0 -0
  33. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/paraver_carm_configs/Intel/Intel_FP_Scalar_DP.cfg +0 -0
  34. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/paraver_carm_configs/Intel/Intel_FP_Scalar_SP.cfg +0 -0
  35. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/paraver_carm_configs/Intel/Intel_Loads.cfg +0 -0
  36. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/paraver_carm_configs/Intel/Intel_Stores.cfg +0 -0
  37. {carm_paraver-1.0.0.dev0/carm_paraver/carm_results → carm_paraver-1.0.0.dev1/carm_paraver/paraver_carm_configs/Intel}/__init__.py +0 -0
  38. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/paraver_carm_configs/IntelV2/Intel_FP_AVX2_DP.cfg +0 -0
  39. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/paraver_carm_configs/IntelV2/Intel_FP_AVX2_SP.cfg +0 -0
  40. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/paraver_carm_configs/IntelV2/Intel_FP_AVX512_DP.cfg +0 -0
  41. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/paraver_carm_configs/IntelV2/Intel_FP_AVX512_SP.cfg +0 -0
  42. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/paraver_carm_configs/IntelV2/Intel_FP_SSE_DP.cfg +0 -0
  43. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/paraver_carm_configs/IntelV2/Intel_FP_SSE_SP.cfg +0 -0
  44. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/paraver_carm_configs/IntelV2/Intel_FP_Scalar_DP.cfg +0 -0
  45. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/paraver_carm_configs/IntelV2/Intel_FP_Scalar_SP.cfg +0 -0
  46. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/paraver_carm_configs/IntelV2/Intel_Loads.cfg +0 -0
  47. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/paraver_carm_configs/IntelV2/Intel_Stores.cfg +0 -0
  48. {carm_paraver-1.0.0.dev0/carm_paraver/carm_results/roofline → carm_paraver-1.0.0.dev1/carm_paraver/paraver_carm_configs/IntelV2}/__init__.py +0 -0
  49. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/paraver_carm_configs/Intel_CARM_DP.cfg +0 -0
  50. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/paraver_carm_configs/Intel_CARM_DPV2.cfg +0 -0
  51. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/paraver_carm_configs/Intel_CARM_DP_Extrae.xml +0 -0
  52. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/paraver_carm_configs/Intel_CARM_SPV2.cfg +0 -0
  53. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver/paraver_carm_configs/Intel_CARM_SP_Extrae.xml +0 -0
  54. {carm_paraver-1.0.0.dev0/carm_paraver/paraver_carm_configs/Intel → carm_paraver-1.0.0.dev1/carm_paraver/paraver_carm_configs}/__init__.py +0 -0
  55. {carm_paraver-1.0.0.dev0/carm_paraver/carm_results → carm_paraver-1.0.0.dev1/carm_paraver/sample_data}/roofline/MN5_roofline.csv +0 -0
  56. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver.egg-info/dependency_links.txt +0 -0
  57. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/carm_paraver.egg-info/entry_points.txt +0 -0
  58. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/setup.cfg +0 -0
  59. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/tests/test_analysis_helpers.py +0 -0
  60. {carm_paraver-1.0.0.dev0 → carm_paraver-1.0.0.dev1}/tools/aggregate_profiles.py +0 -0
@@ -0,0 +1,168 @@
1
+ Metadata-Version: 2.4
2
+ Name: carm-paraver
3
+ Version: 1.0.0.dev1
4
+ Summary: Dash-based CARM analysis for Paraver traces
5
+ Author: CARM Contributors
6
+ Requires-Python: >=3.9
7
+ Description-Content-Type: text/markdown
8
+ License-File: LICENSE
9
+ Requires-Dist: dash>=4.1.0
10
+ Requires-Dist: dash-bootstrap-components>=2.0.4
11
+ Requires-Dist: dash-daq>=0.6.0
12
+ Requires-Dist: numpy>=2.0.0
13
+ Requires-Dist: pandas>=2.3.3
14
+ Requires-Dist: platformdirs>=4.2.2
15
+ Requires-Dist: plotly>=6.0.0
16
+ Dynamic: license-file
17
+
18
+ # CARM-Paraver GUI
19
+
20
+ This GUI allows the analysis of [Paraver](https://tools.bsc.es/paraver) traces on the Cache-Aware Roofline Model (CARM) for floating-point operations. It can be launched from the Paraver interface and send labeled events back to Paraver for visualization.
21
+
22
+ # Requirements
23
+ - Python (tested with 3.9.25, 3.10.12, 3.12.3)
24
+ - [Paraver, Extrae](https://tools.bsc.es/downloads)
25
+
26
+ # How to use
27
+
28
+ ## Installation
29
+ **The recommended way to install the package** is via `pip`:
30
+ ```bash
31
+ pip install carm-paraver
32
+ ```
33
+ Alternatively, you can install it from source by cloning this repository and running:
34
+ ```bash
35
+ pip install .
36
+ ```
37
+ If the install fails due to dependency conflicts, you can use a Python virtual environment to install the package and its dependencies in an isolated environment. To do this, you can run:
38
+ ```bash
39
+ python -m venv .venv
40
+ source .venv/bin/activate
41
+ pip install carm-paraver
42
+ ```
43
+ If you install in a virtual environment, make sure to run Paraver from the same environment:
44
+ ```bash
45
+ source .venv/bin/activate
46
+ wxparaver
47
+ ```
48
+
49
+ ## First-time Setup
50
+ CARM-Paraver needs `paramedir` to be in your PATH in order to run. To add it, add Paraver's bin directory to your PATH. You can make this permanent by appending it to your `.bashrc` or `.bash_profile` (change the path accordingly):
51
+
52
+ ```bash
53
+ export PATH=/path/to/paraver/bin:$PATH
54
+ ```
55
+
56
+ ## Running
57
+ The GUI is launched via the Paraver interface like so:
58
+ 1. Use [Extrae](https://github.com/bsc-performance-tools/extrae) to generate a trace with the required counters ([see how to configure Extrae below](#paraver-trace-requirements)).
59
+ 2. Load the trace in Paraver, and zoom into a section of interest.
60
+ 3. Right click the timeline and select the option to launch the CARM GUI.
61
+ 4. Configure the options in Paraver to your liking (see [Launch Configuration](#launch-configuration)), and click "Run".
62
+ 5. Click the link printed in the Paraver console to open the GUI in your browser.
63
+
64
+ You will now have the CARM GUI open, showing the architecture's roofline, and the events from the Paraver trace represented as points on the plot. Their position on the roofline, which is determined by their performance and arithmetic intensity, can be used to identify bottlenecks and optimization opportunities for the respective code section. Check the [CARM GUI Features](#carm-gui-features) section for more details about the GUI, and how you can label events and send them back to Paraver for visualization.
65
+
66
+ If you get any errors, be sure to consult the [First-time Setup](#first-time-setup) and [Paraver Trace Requirements](#paraver-trace-requirements) sections.
67
+
68
+ ## Paraver Trace Requirements
69
+
70
+ To enable CARM analysis, your Paraver trace needs to include information on the floating-point and memory operations performed by the application. To do this, [configure Extrae](https://tools.bsc.es/doc/html/extrae/xml.html#xml-section-performance-counters) to include the counters in the tables below.
71
+
72
+ #### Which counters to include?
73
+ Include only the necessary counters for your analysis, so they fit in a single counter set. If too many counters are active, accuracy may be reduced.
74
+
75
+ Take the application examples below. For each case, the tables below indicate which counters you should include in your Extrae configuration:
76
+ - **App 1**: The application only uses double precision, but you don't know which vector ISAs it uses.
77
+ - **App 2**: The application is vectorized with AVX2, using both precisions.
78
+
79
+ If you are unsure, include all counters and prune them later as you learn more about the application. Using separate load and store counters is recommended, as it allows for a more detailed analysis.
80
+
81
+ #### Intel CPUs
82
+ | FP/Mem Operation | Intel Counter | App 1 | App 2 |
83
+ | ---------------- | ------------------------------------------ | ------- | ------- |
84
+ | Scalar DP Insts | `FP_ARITH_INST_RETIRED:SCALAR_DOUBLE` | ✓ | ✓ |
85
+ | Scalar SP Insts | `FP_ARITH_INST_RETIRED:SCALAR_SINGLE` | | ✓ |
86
+ | SSE DP Insts | `FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE` | ✓ | |
87
+ | SSE SP Insts | `FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE` | | |
88
+ | AVX2 DP Insts | `FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE` | ✓ | ✓ |
89
+ | AVX2 SP Insts | `FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE` | | ✓ |
90
+ | AVX512 DP Insts | `FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE` | ✓ | |
91
+ | AVX512 SP Insts | `FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE` | | |
92
+ | Loads | `MEM_INST_RETIRED:ALL_LOADS` | ✓ | ✓ |
93
+ | Stores | `MEM_INST_RETIRED:ALL_STORES` | ✓ | ✓ |
94
+ | Loads and Stores | `MEM_INST_RETIRED:ALL` | | |
95
+
96
+ #### AMD CPUs
97
+ | FP/Mem Operation | AMD Counter | App 1 | App 2 |
98
+ | ---------------- | ---------------------------------------------- | ------- | ------- |
99
+ | Mul/Add DP Flops | `retired_sse_avx_operations:dp_mult_add_flops` | ✓ | ✓ |
100
+ | Mul/Add SP Flops | `retired_sse_avx_operations:sp_mult_add_flops` | | ✓ |
101
+ | Add/Sub DP Flops | `retired_sse_avx_operations:dp_add_sub_flops` | ✓ | ✓ |
102
+ | Add/Sub SP Flops | `retired_sse_avx_operations:sp_add_sub_flops` | | ✓ |
103
+ | Mul DP Flops | `retired_sse_avx_operations:dp_mult_flops` | ✓ | ✓ |
104
+ | Mul SP Flops | `retired_sse_avx_operations:sp_mult_flops` | | ✓ |
105
+ | Div DP Flops | `retired_sse_avx_operations:dp_div_flops` | ✓ | ✓ |
106
+ | Div SP Flops | `retired_sse_avx_operations:sp_div_flops` | | ✓ |
107
+ | Loads | `ls_dispatch:ld_dispatch` | ✓ | ✓ |
108
+ | Stores | `ls_dispatch:store_dispatch` | ✓ | ✓ |
109
+
110
+ #### Additional recommendations
111
+ For best results, when labeling your code with [Extrae events](https://tools.bsc.es/doc/html/extrae/api.html), e.g. with `Extrae_eventandcounters` calls, **avoid labeling regions that include MPI calls**. Focus on labeling regions of pure computation, as MPI calls will cause the region and hardware counter timestamps to not match, preventing them from being shown on the CARM GUI.
112
+
113
+ ## CARM Benchmarking
114
+
115
+ To benchmark your architecture and display its roofline in the CARM GUI, use the [CARM Tool](https://github.com/champ-hub/carm-roofline). **Note: for compatibility, use the [latest version of the CARM Tool](https://pypi.org/project/carm-roofline/)**
116
+
117
+ This tool ships a series of sample rooflines from a MareNostrum 5 GPP node.
118
+
119
+ ## CARM GUI Features
120
+
121
+ ### Launch Configuration
122
+ **Use window colors:**
123
+ Controls which coloring scheme is used in the CARM GUI: the same colors as the Paraver timeline (if enabled) or the selected CARM GUI coloring scheme (see right sidebar options).
124
+
125
+ **Use Semantic Window:**
126
+ Controls whether the Paraver semantic window is used: if enabled, the GUI displays only the timestamps that are within the semantic window of the Paraver timeline. If disabled, all timestamps in the trace are displayed.
127
+
128
+ **Accumulate values:**
129
+ Controls whether timestamps (with the same underlying Paraver value) are averaged. Allows for similar timestamps to be grouped into a single, per-thread point, or to plot all timestamps individually.
130
+
131
+ ### Left Sidebar
132
+
133
+ **Use Paraver/CARM Colors:**
134
+ Same as above's "Use window colors"
135
+
136
+ **Use Semantic Window / All Timestamps:**
137
+ Same as above's "Use Semantic Window"
138
+
139
+ **Plot Raw/Accumulated Values:**
140
+ Same as above's "Accumulate values"
141
+
142
+ **Re-Sync Timeline With Paraver:**
143
+ Re-syncs the plotted timestamps in the CARM GUI with the timestamps being viewed in the Paraver timeline from which the CARM GUI was launched. This first requires the **Time Sync** button to be clicked on the Paraver side, the CARM GUI will usually keep itself synced to the Paraver timeline whenever the **Time Sync** button is clicked in the Paraver interface. In case the user changes the displayed timestamps in the CARM GUI and wishes to return to the same interval that they have in the Paraver timeline, they can use the **Re-Sync Timeline With Paraver** button.
144
+
145
+ **Send Timestamps Roof Labels:**
146
+ Labels the timestamps based on which roof they are under, for viewing in Paraver. The path of the generated trace will be printed in the Paraver console, and can be clicked to open the trace in Paraver. You can then select the trace and click *New single timeline window* to view the timestamps with the new labels.
147
+
148
+ **Send Timestamps LD/ST Percentage Colors:**
149
+ Same as above, but labels the timestamps based on the percentage of loads to stores.
150
+
151
+ **Send Timestamps SP/DP Percentage Colors:**
152
+ Same as above, but labels the timestamps based on the percentage of single to double precision operations.
153
+
154
+ ### Right Sidebar
155
+ The right sidebar controls the CARM GUI specific features, which include various filtering and coloring options as well as graphical annotations.
156
+
157
+ Useful options include:
158
+ - **Filter points** by vector ISA or precision
159
+ - **Color points** based on thread ID, precision, vector ISA or load/store ratio
160
+ - Note that this requires the left sidebar option to be set to "Use CARM GUI Colors".
161
+
162
+ The plot can be configured to normalize the performance roof to the number of threads. The normalized roofs represent the performance per thread, which matches the Paraver timestamps (also per thread). This mode is recommended when relating application performance to the underlying hardware. The non-normalized roofs represent the overall performance of the architecture, and is best for understanding the hardware capabilities.
163
+
164
+ ## GUI Performance
165
+ The GUI may become slow when plotting a very large number of events. To improve performance, you can:
166
+ - Enable the "Accumulate values" option to group similar events into a single point.
167
+ - Enable the "Use Semantic Window" option to only plot events visible in Paraver.
168
+ - Focus your analysis on a smaller time window in the Paraver timeline.
@@ -0,0 +1,151 @@
1
+ # CARM-Paraver GUI
2
+
3
+ This GUI allows the analysis of [Paraver](https://tools.bsc.es/paraver) traces on the Cache-Aware Roofline Model (CARM) for floating-point operations. It can be launched from the Paraver interface and send labeled events back to Paraver for visualization.
4
+
5
+ # Requirements
6
+ - Python (tested with 3.9.25, 3.10.12, 3.12.3)
7
+ - [Paraver, Extrae](https://tools.bsc.es/downloads)
8
+
9
+ # How to use
10
+
11
+ ## Installation
12
+ **The recommended way to install the package** is via `pip`:
13
+ ```bash
14
+ pip install carm-paraver
15
+ ```
16
+ Alternatively, you can install it from source by cloning this repository and running:
17
+ ```bash
18
+ pip install .
19
+ ```
20
+ If the install fails due to dependency conflicts, you can use a Python virtual environment to install the package and its dependencies in an isolated environment. To do this, you can run:
21
+ ```bash
22
+ python -m venv .venv
23
+ source .venv/bin/activate
24
+ pip install carm-paraver
25
+ ```
26
+ If you install in a virtual environment, make sure to run Paraver from the same environment:
27
+ ```bash
28
+ source .venv/bin/activate
29
+ wxparaver
30
+ ```
31
+
32
+ ## First-time Setup
33
+ CARM-Paraver needs `paramedir` to be in your PATH in order to run. To add it, add Paraver's bin directory to your PATH. You can make this permanent by appending it to your `.bashrc` or `.bash_profile` (change the path accordingly):
34
+
35
+ ```bash
36
+ export PATH=/path/to/paraver/bin:$PATH
37
+ ```
38
+
39
+ ## Running
40
+ The GUI is launched via the Paraver interface like so:
41
+ 1. Use [Extrae](https://github.com/bsc-performance-tools/extrae) to generate a trace with the required counters ([see how to configure Extrae below](#paraver-trace-requirements)).
42
+ 2. Load the trace in Paraver, and zoom into a section of interest.
43
+ 3. Right click the timeline and select the option to launch the CARM GUI.
44
+ 4. Configure the options in Paraver to your liking (see [Launch Configuration](#launch-configuration)), and click "Run".
45
+ 5. Click the link printed in the Paraver console to open the GUI in your browser.
46
+
47
+ You will now have the CARM GUI open, showing the architecture's roofline, and the events from the Paraver trace represented as points on the plot. Their position on the roofline, which is determined by their performance and arithmetic intensity, can be used to identify bottlenecks and optimization opportunities for the respective code section. Check the [CARM GUI Features](#carm-gui-features) section for more details about the GUI, and how you can label events and send them back to Paraver for visualization.
48
+
49
+ If you get any errors, be sure to consult the [First-time Setup](#first-time-setup) and [Paraver Trace Requirements](#paraver-trace-requirements) sections.
50
+
51
+ ## Paraver Trace Requirements
52
+
53
+ To enable CARM analysis, your Paraver trace needs to include information on the floating-point and memory operations performed by the application. To do this, [configure Extrae](https://tools.bsc.es/doc/html/extrae/xml.html#xml-section-performance-counters) to include the counters in the tables below.
54
+
55
+ #### Which counters to include?
56
+ Include only the necessary counters for your analysis, so they fit in a single counter set. If too many counters are active, accuracy may be reduced.
57
+
58
+ Take the application examples below. For each case, the tables below indicate which counters you should include in your Extrae configuration:
59
+ - **App 1**: The application only uses double precision, but you don't know which vector ISAs it uses.
60
+ - **App 2**: The application is vectorized with AVX2, using both precisions.
61
+
62
+ If you are unsure, include all counters and prune them later as you learn more about the application. Using separate load and store counters is recommended, as it allows for a more detailed analysis.
63
+
64
+ #### Intel CPUs
65
+ | FP/Mem Operation | Intel Counter | App 1 | App 2 |
66
+ | ---------------- | ------------------------------------------ | ------- | ------- |
67
+ | Scalar DP Insts | `FP_ARITH_INST_RETIRED:SCALAR_DOUBLE` | ✓ | ✓ |
68
+ | Scalar SP Insts | `FP_ARITH_INST_RETIRED:SCALAR_SINGLE` | | ✓ |
69
+ | SSE DP Insts | `FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE` | ✓ | |
70
+ | SSE SP Insts | `FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE` | | |
71
+ | AVX2 DP Insts | `FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE` | ✓ | ✓ |
72
+ | AVX2 SP Insts | `FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE` | | ✓ |
73
+ | AVX512 DP Insts | `FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE` | ✓ | |
74
+ | AVX512 SP Insts | `FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE` | | |
75
+ | Loads | `MEM_INST_RETIRED:ALL_LOADS` | ✓ | ✓ |
76
+ | Stores | `MEM_INST_RETIRED:ALL_STORES` | ✓ | ✓ |
77
+ | Loads and Stores | `MEM_INST_RETIRED:ALL` | | |
78
+
79
+ #### AMD CPUs
80
+ | FP/Mem Operation | AMD Counter | App 1 | App 2 |
81
+ | ---------------- | ---------------------------------------------- | ------- | ------- |
82
+ | Mul/Add DP Flops | `retired_sse_avx_operations:dp_mult_add_flops` | ✓ | ✓ |
83
+ | Mul/Add SP Flops | `retired_sse_avx_operations:sp_mult_add_flops` | | ✓ |
84
+ | Add/Sub DP Flops | `retired_sse_avx_operations:dp_add_sub_flops` | ✓ | ✓ |
85
+ | Add/Sub SP Flops | `retired_sse_avx_operations:sp_add_sub_flops` | | ✓ |
86
+ | Mul DP Flops | `retired_sse_avx_operations:dp_mult_flops` | ✓ | ✓ |
87
+ | Mul SP Flops | `retired_sse_avx_operations:sp_mult_flops` | | ✓ |
88
+ | Div DP Flops | `retired_sse_avx_operations:dp_div_flops` | ✓ | ✓ |
89
+ | Div SP Flops | `retired_sse_avx_operations:sp_div_flops` | | ✓ |
90
+ | Loads | `ls_dispatch:ld_dispatch` | ✓ | ✓ |
91
+ | Stores | `ls_dispatch:store_dispatch` | ✓ | ✓ |
92
+
93
+ #### Additional recommendations
94
+ For best results, when labeling your code with [Extrae events](https://tools.bsc.es/doc/html/extrae/api.html), e.g. with `Extrae_eventandcounters` calls, **avoid labeling regions that include MPI calls**. Focus on labeling regions of pure computation, as MPI calls will cause the region and hardware counter timestamps to not match, preventing them from being shown on the CARM GUI.
95
+
96
+ ## CARM Benchmarking
97
+
98
+ To benchmark your architecture and display its roofline in the CARM GUI, use the [CARM Tool](https://github.com/champ-hub/carm-roofline). **Note: for compatibility, use the [latest version of the CARM Tool](https://pypi.org/project/carm-roofline/)**
99
+
100
+ This tool ships a series of sample rooflines from a MareNostrum 5 GPP node.
101
+
102
+ ## CARM GUI Features
103
+
104
+ ### Launch Configuration
105
+ **Use window colors:**
106
+ Controls which coloring scheme is used in the CARM GUI: the same colors as the Paraver timeline (if enabled) or the selected CARM GUI coloring scheme (see right sidebar options).
107
+
108
+ **Use Semantic Window:**
109
+ Controls whether the Paraver semantic window is used: if enabled, the GUI displays only the timestamps that are within the semantic window of the Paraver timeline. If disabled, all timestamps in the trace are displayed.
110
+
111
+ **Accumulate values:**
112
+ Controls whether timestamps (with the same underlying Paraver value) are averaged. Allows for similar timestamps to be grouped into a single, per-thread point, or to plot all timestamps individually.
113
+
114
+ ### Left Sidebar
115
+
116
+ **Use Paraver/CARM Colors:**
117
+ Same as above's "Use window colors"
118
+
119
+ **Use Semantic Window / All Timestamps:**
120
+ Same as above's "Use Semantic Window"
121
+
122
+ **Plot Raw/Accumulated Values:**
123
+ Same as above's "Accumulate values"
124
+
125
+ **Re-Sync Timeline With Paraver:**
126
+ Re-syncs the plotted timestamps in the CARM GUI with the timestamps being viewed in the Paraver timeline from which the CARM GUI was launched. This first requires the **Time Sync** button to be clicked on the Paraver side, the CARM GUI will usually keep itself synced to the Paraver timeline whenever the **Time Sync** button is clicked in the Paraver interface. In case the user changes the displayed timestamps in the CARM GUI and wishes to return to the same interval that they have in the Paraver timeline, they can use the **Re-Sync Timeline With Paraver** button.
127
+
128
+ **Send Timestamps Roof Labels:**
129
+ Labels the timestamps based on which roof they are under, for viewing in Paraver. The path of the generated trace will be printed in the Paraver console, and can be clicked to open the trace in Paraver. You can then select the trace and click *New single timeline window* to view the timestamps with the new labels.
130
+
131
+ **Send Timestamps LD/ST Percentage Colors:**
132
+ Same as above, but labels the timestamps based on the percentage of loads to stores.
133
+
134
+ **Send Timestamps SP/DP Percentage Colors:**
135
+ Same as above, but labels the timestamps based on the percentage of single to double precision operations.
136
+
137
+ ### Right Sidebar
138
+ The right sidebar controls the CARM GUI specific features, which include various filtering and coloring options as well as graphical annotations.
139
+
140
+ Useful options include:
141
+ - **Filter points** by vector ISA or precision
142
+ - **Color points** based on thread ID, precision, vector ISA or load/store ratio
143
+ - Note that this requires the left sidebar option to be set to "Use CARM GUI Colors".
144
+
145
+ The plot can be configured to normalize the performance roof to the number of threads. The normalized roofs represent the performance per thread, which matches the Paraver timestamps (also per thread). This mode is recommended when relating application performance to the underlying hardware. The non-normalized roofs represent the overall performance of the architecture, and is best for understanding the hardware capabilities.
146
+
147
+ ## GUI Performance
148
+ The GUI may become slow when plotting a very large number of events. To improve performance, you can:
149
+ - Enable the "Accumulate values" option to group similar events into a single point.
150
+ - Enable the "Use Semantic Window" option to only plot events visible in Paraver.
151
+ - Focus your analysis on a smaller time window in the Paraver timeline.
@@ -362,6 +362,18 @@ def calculate_roofline(values, min_ai):
362
362
  FPaidots = [0] * 2
363
363
  FPgflopdots = [0] * 2
364
364
 
365
+ try:
366
+ fp_fma = float(values[5])
367
+ except (TypeError, ValueError):
368
+ fp_fma = 0.0
369
+ try:
370
+ fp_base = float(values[4])
371
+ except (TypeError, ValueError):
372
+ fp_base = 0.0
373
+
374
+ # Fall back to non-FMA peak when FP_FMA is missing/zero.
375
+ fp_peak = fp_fma if fp_fma > 0 else fp_base
376
+
365
377
  ai = np.linspace(min(0.00390625, min_ai), 256, num=200000)
366
378
  cache_levels = ["L1", "L2", "L3", "DRAM"]
367
379
 
@@ -371,7 +383,7 @@ def calculate_roofline(values, min_ai):
371
383
  if values[cache_levels.index(cache_level)] > 0:
372
384
  aidots = [0, 0, 0]
373
385
  # Compute the first point
374
- y_values = carm_eq(ai, values[cache_levels.index(cache_level)], values[5])
386
+ y_values = carm_eq(ai, values[cache_levels.index(cache_level)], fp_peak)
375
387
 
376
388
  # Find the point where y_values stops increasing or reaches a plateau
377
389
  for i in range(1, len(y_values)):
@@ -531,7 +543,17 @@ def draw_annotation(
531
543
 
532
544
  if cache_level in cache_levels and values[cache_levels.index(cache_level)] > 0:
533
545
  aidots[0] = 0.00390625
534
- y_values = carm_eq(ai, values[cache_levels.index(cache_level)], values[5])
546
+ try:
547
+ fp_fma = float(values[5])
548
+ except (TypeError, ValueError):
549
+ fp_fma = 0.0
550
+ try:
551
+ fp_base = float(values[4])
552
+ except (TypeError, ValueError):
553
+ fp_base = 0.0
554
+ fp_peak = fp_fma if fp_fma > 0 else fp_base
555
+
556
+ y_values = carm_eq(ai, values[cache_levels.index(cache_level)], fp_peak)
535
557
  gflopdots[0] = y_values[0]
536
558
  for i in range(1, len(y_values)):
537
559
  if y_values[i - 1] == y_values[i]:
@@ -10,12 +10,14 @@ import logging
10
10
  import math
11
11
  import os
12
12
  import re
13
+ import shutil
13
14
  import signal
14
15
  import socket
15
16
  import subprocess
16
17
  import sys
17
18
  import tempfile
18
19
  import time
20
+ from importlib import resources
19
21
  from typing import Any
20
22
 
21
23
  import dash
@@ -26,6 +28,7 @@ import dash_daq as daq
26
28
  # Run: pip install dash dash-bootstrap-components dash-daq numpy pandas plotly
27
29
  # To get all of the Libraries in case requirements.txt method fails
28
30
  import pandas as pd
31
+ import platformdirs
29
32
  import plotly.graph_objects as go
30
33
  from dash import ALL, Input, Output, State, callback_context, dcc, html
31
34
  from dash.exceptions import PreventUpdate
@@ -106,7 +109,45 @@ if SELECTED_PORT is None:
106
109
 
107
110
  script_dir = os.path.dirname(os.path.abspath(__file__))
108
111
  assets_dir = os.path.join(script_dir, "assets")
109
- carm_results_path = os.path.join(script_dir, "carm_results", "roofline")
112
+
113
+
114
+ def _resolve_roofline_data_dir() -> str:
115
+ data_dir = platformdirs.user_data_dir("carm", appauthor=False)
116
+ roofline_dir = os.path.join(data_dir, "roofline")
117
+ os.makedirs(roofline_dir, exist_ok=True)
118
+ return roofline_dir
119
+
120
+
121
+ def _seed_roofline_data(roofline_dir: str) -> None:
122
+ if any(name.endswith(".csv") for name in os.listdir(roofline_dir)):
123
+ return
124
+
125
+ sample_ref = resources.files("carm_paraver").joinpath(
126
+ "sample_data",
127
+ "roofline",
128
+ "MN5_roofline.csv",
129
+ )
130
+ try:
131
+ with resources.as_file(sample_ref) as sample_path:
132
+ shutil.copy2(sample_path, os.path.join(roofline_dir, sample_path.name))
133
+ except FileNotFoundError:
134
+ print(
135
+ "ERROR: bundled MN5 roofline sample is missing; unable to seed data directory.",
136
+ file=sys.stderr,
137
+ flush=True,
138
+ )
139
+ sys.exit(1)
140
+ except OSError as exc:
141
+ print(
142
+ f"ERROR: unable to seed roofline data in {roofline_dir}: {exc}",
143
+ file=sys.stderr,
144
+ flush=True,
145
+ )
146
+ sys.exit(1)
147
+
148
+
149
+ carm_results_path = _resolve_roofline_data_dir()
150
+ _seed_roofline_data(carm_results_path)
110
151
 
111
152
  # Global Variables
112
153
  n_segments = 0
@@ -278,9 +319,15 @@ parser.add_argument("--mask_csv", action="store_true", help="Use mask CSV")
278
319
  parser.add_argument("-ac", action="store_true", help="Optional flag for accumulate values mode")
279
320
  parser.add_argument("--csv", type=str, required=True, help="Path to the mask CSV")
280
321
  parser.add_argument("trace_path", type=str, help="Path to the .prv file")
322
+ parser.add_argument("--debug", "-d", action="store_true", help="Enable debug logging")
281
323
 
282
324
  args = parser.parse_args()
283
325
 
326
+ if args.debug:
327
+ logging.basicConfig(level=logging.DEBUG)
328
+
329
+ logging.debug(f"Parsed arguments: {args}")
330
+
284
331
  min_dur = args.min_dur
285
332
  use_paraver_coloring = args.color_csv
286
333
  use_mask_csv = args.mask_csv
@@ -434,10 +481,13 @@ if prv_trace_path.endswith(".prv") or prv_trace_path.endswith(".gz"):
434
481
  print("Paramedir execution finished, calculating CARM metrics.", flush=True)
435
482
 
436
483
  # Get CARM results
437
- if os.path.exists(carm_results_path):
438
- csv_files = [f for f in os.listdir(carm_results_path) if f.endswith("_roofline.csv")]
439
- else:
440
- print("ERROR: No CARM results found. Please add them to the ./carm-results/roofline folder.")
484
+ csv_files = sorted(f for f in os.listdir(carm_results_path) if f.endswith("_roofline.csv"))
485
+ if not csv_files:
486
+ print(
487
+ f"ERROR: No CARM roofline results found in {carm_results_path}. Add files named *_roofline.csv.",
488
+ file=sys.stderr,
489
+ flush=True,
490
+ )
441
491
  sys.exit(1)
442
492
 
443
493
  # Extract machine names from filenames
@@ -728,6 +778,7 @@ for row in counter_data_df.itertuples(index=False):
728
778
  processed += 1
729
779
  duration = row.Duration * scaling_unit
730
780
  timestamp = row.Timestamp
781
+ # if FLOP counters are all zero or NaN, skip calculations and set metrics to zero/defaults
731
782
  if all(pd.isnull(getattr(row, col)) or getattr(row, col) == 0 for col in columns_to_check):
732
783
  no_flops += 1
733
784
  full_base_statistics["ThreadID"].append(row.ThreadID)
@@ -2035,12 +2086,16 @@ def update_slider_from_csv(
2035
2086
  current_values,
2036
2087
  selected_file,
2037
2088
  ):
2089
+ def prevent_update_for_reason(reason: str):
2090
+ logging.debug(f"Preventing update on update_slider_from_csv: {reason}")
2091
+ raise PreventUpdate
2092
+
2038
2093
  global sync_csv_path
2039
2094
  global current_file_timestamps
2040
2095
  if mask_button_offset == -1:
2041
- raise PreventUpdate
2096
+ prevent_update_for_reason("Mask button offset is -1.")
2042
2097
  if not selected_file:
2043
- raise PreventUpdate
2098
+ prevent_update_for_reason("No file selected.")
2044
2099
  else:
2045
2100
  global no_sync
2046
2101
  global first_load
@@ -2049,7 +2104,7 @@ def update_slider_from_csv(
2049
2104
  new_timestamps = [float(csv_df.iloc[0, 0]), float(csv_df.iloc[1, 0])]
2050
2105
  except Exception:
2051
2106
  first_load += 1
2052
- raise PreventUpdate from None
2107
+ new_timestamps = current_file_timestamps
2053
2108
 
2054
2109
  ctx = callback_context
2055
2110
  if not ctx.triggered:
@@ -2057,13 +2112,13 @@ def update_slider_from_csv(
2057
2112
  trigger_id = ctx.triggered[0]["prop_id"].split(".")[0]
2058
2113
 
2059
2114
  if new_timestamps == current_file_timestamps and trigger_id != "button-paraver-sync":
2060
- raise PreventUpdate
2115
+ prevent_update_for_reason("Timestamps in CSV have not changed and trigger is not sync button.")
2061
2116
 
2062
2117
  first_load += 1
2063
2118
  current_file_timestamps = new_timestamps
2064
2119
 
2065
2120
  if first_load <= 1:
2066
- raise PreventUpdate
2121
+ prevent_update_for_reason("First load.")
2067
2122
 
2068
2123
  try:
2069
2124
  start_index = (full_base_statistics_df["Timestamp"] - new_timestamps[0]).abs().idxmin()
@@ -2114,8 +2169,11 @@ def update_slider_from_csv(
2114
2169
 
2115
2170
  new_slider_indices = [int(new_start_index), int(new_end_index)]
2116
2171
 
2172
+ def print_separator():
2173
+ print("-" * 50, flush=True)
2174
+
2117
2175
  if trigger_id == "button-paraver-sync":
2118
- print("----------------------------------------------", flush=True)
2176
+ print_separator()
2119
2177
  print(
2120
2178
  "Sync Button Clicked, updating slider to timestamp range {} - {}".format(
2121
2179
  filtered_base.loc[new_start_index, "Timestamp"],
@@ -2141,12 +2199,12 @@ def update_slider_from_csv(
2141
2199
  flush=True,
2142
2200
  )
2143
2201
 
2144
- print("----------------------------------------------", flush=True)
2202
+ print_separator()
2145
2203
  no_sync = True
2146
2204
  return new_slider_indices, new_slider_indices, new_timestamps
2147
2205
 
2148
2206
  if new_slider_indices != current_values:
2149
- print("----------------------------------------------", flush=True)
2207
+ print_separator()
2150
2208
  print(
2151
2209
  "Sync CSV values changed, updating slider to timestamp range {} - {}".format(
2152
2210
  filtered_base.loc[new_start_index, "Timestamp"],
@@ -2172,7 +2230,7 @@ def update_slider_from_csv(
2172
2230
  flush=True,
2173
2231
  )
2174
2232
 
2175
- print("----------------------------------------------", flush=True)
2233
+ print_separator()
2176
2234
  no_sync = True
2177
2235
  return new_slider_indices, new_slider_indices, new_timestamps
2178
2236
 
@@ -3946,12 +4004,11 @@ def update_slider_marks(
3946
4004
  triggered_id = ctx.triggered[0]["prop_id"].split(".")[0]
3947
4005
  reset_view = current_values is None or triggered_id in SLIDER_MARKS_CONFIG["value"]["reset_triggers"]
3948
4006
 
3949
- grouped_count = len(_group_slider_segments(selected_segments, group_value)) if selected_segments else 0
3950
- max_index = max(grouped_count - 1, 0)
3951
- if grouped_count < max_dots_auto:
3952
- initial_range = [0, max_index] if max_index > 0 else [0, 0]
4007
+ if selected_segments:
4008
+ grouped_count = len(_group_slider_segments(selected_segments, group_value))
4009
+ initial_range = [0, max(grouped_count - 1, 0)]
3953
4010
  else:
3954
- initial_range = [0, min(max_index, 1)] if max_index > 0 else [0, 0]
4011
+ initial_range = [0, 0]
3955
4012
 
3956
4013
  return _resolve_slider_marks_result(
3957
4014
  selected_segments,