nsight-python 0.9.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nsight/exceptions.py ADDED
@@ -0,0 +1,51 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ from dataclasses import dataclass
5
+
6
+ """
7
+ Exceptions specific to Nsight Python profiling and analysis.
8
+ """
9
+
10
+
11
+ class ProfilerException(Exception):
12
+ """
13
+ Exception raised for errors specific to the Profiler.
14
+
15
+ Attributes:
16
+ message: Explanation of the error.
17
+ """
18
+
19
+ pass
20
+
21
+
22
+ class NCUNotAvailableError(Exception):
23
+ """
24
+ Exception raised when NVIDIA Nsight Compute CLI (NCU) is not available or accessible.
25
+
26
+ This can occur when:
27
+ - NCU is not installed on the system
28
+ - NCU is not in the system PATH
29
+ - Required permissions are missing
30
+ """
31
+
32
+ pass
33
+
34
+
35
+ CUDA_CORE_UNAVAILABLE_MSG = "cuda-core is required for ignore_failures functionality.\n Install it with:\n - pip install nsight-python[cu12] (if you have CUDA 12.x)\n - pip install nsight-python[cu13] (if you have CUDA 13.x)"
36
+
37
+
38
+ @dataclass
39
+ class NCUErrorContext:
40
+ """
41
+ Context information for NCU error handling.
42
+
43
+ Attributes:
44
+ errors: The error logs from NCU
45
+ log_file_path: Path to the NCU log file
46
+ metric: The metric that was being collected
47
+ """
48
+
49
+ errors: list[str]
50
+ log_file_path: str
51
+ metric: str
nsight/extraction.py ADDED
@@ -0,0 +1,224 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """
5
+ Extraction utilities for analyzing NVIDIA Nsight Compute profiling data.
6
+
7
+ This module provides functionality to load `.ncu-rep` reports, extract performance data,
8
+ and transform it into structured pandas DataFrames for further analysis.
9
+
10
+ Functions:
11
+ extract_ncu_action_data(action, metric):
12
+ Extracts performance data for a specific kernel action from an NVIDIA Nsight Compute report.
13
+
14
+ extract_df_from_report(metric, configs, iterations, func, derive_metric, ignore_kernel_list, verbose, combine_kernel_metrics=None):
15
+ Processes the full NVIDIA Nsight Compute report and returns a pandas DataFrame containing performance metrics.
16
+ """
17
+
18
+ import functools
19
+ import inspect
20
+ import socket
21
+ from collections.abc import Callable
22
+ from typing import Any, List, Tuple
23
+
24
+ import ncu_report
25
+ import pandas as pd
26
+
27
+ from nsight import exceptions, utils
28
+
29
+
30
+ def extract_ncu_action_data(action: Any, metric: str) -> utils.NCUActionData:
31
+ """
32
+ Extracts performance data from an NVIDIA Nsight Compute kernel action.
33
+
34
+ Args:
35
+ action: The NVIDIA Nsight Compute action object.
36
+ metric: The metric name to extract from the action.
37
+
38
+ Returns:
39
+ A data container with extracted metric, clock rates, and GPU name.
40
+ """
41
+ return utils.NCUActionData(
42
+ name=action.name(),
43
+ value=(
44
+ None if "dummy_kernel_failure" in action.name() else action[metric].value()
45
+ ),
46
+ compute_clock=action["device__attribute_clock_rate"].value(),
47
+ memory_clock=action["device__attribute_memory_clock_rate"].value(),
48
+ gpu=action["device__attribute_display_name"].value(),
49
+ )
50
+
51
+
52
+ def extract_df_from_report(
53
+ report_path: str,
54
+ metric: str,
55
+ configs: List[Tuple[Any, ...]],
56
+ iterations: int,
57
+ func: Callable[..., Any],
58
+ derive_metric: Callable[..., Any] | None,
59
+ ignore_kernel_list: List[str] | None,
60
+ output_progress: bool,
61
+ combine_kernel_metrics: Callable[[float, float], float] | None = None,
62
+ ) -> pd.DataFrame:
63
+ """
64
+ Extracts and aggregates profiling results from an NVIDIA Nsight Compute report.
65
+
66
+ Args:
67
+ report_path: Path to the report file.
68
+ metric: The NVIDIA Nsight Compute metric to extract.
69
+ configs: Configuration settings used during profiling runs.
70
+ iterations: Number of times each configuration was run.
71
+ func: Function representing the kernel launch with parameter signature.
72
+ derive_metric: Function to transform the raw metric value with config values.
73
+ ignore_kernel_list: Kernel names to ignore in the analysis.
74
+ combine_kernel_metrics: Function to merge multiple kernel metrics.
75
+ verbose: Toggles the printing of extraction progress
76
+
77
+ Returns:
78
+ A DataFrame containing the extracted and transformed performance data.
79
+
80
+ Raises:
81
+ RuntimeError: If multiple kernels are detected per config without a combining function.
82
+ exceptions.ProfilerException: If profiling results are missing or incomplete.
83
+ """
84
+ if output_progress:
85
+ print("[NSIGHT-PYTHON] Loading profiled data")
86
+ try:
87
+ report = ncu_report.load_report(report_path)
88
+ except FileNotFoundError:
89
+ raise exceptions.ProfilerException(
90
+ "No NVIDIA Nsight Compute report found. Please run nsight-python with `@nsight.analyze.kernel(output='verbose')`"
91
+ "to identify the issue."
92
+ )
93
+
94
+ annotations: List[str] = []
95
+ values: List[float | None] = []
96
+ kernel_names: List[str] = []
97
+ gpus: List[str] = []
98
+ compute_clocks: List[int] = []
99
+ memory_clocks: List[int] = []
100
+ metrics: List[str] = []
101
+ transformed_metrics: List[str | bool] = []
102
+ hostnames: List[str] = []
103
+
104
+ sig = inspect.signature(func)
105
+
106
+ # Create a new array for each argument in the signature
107
+ arg_arrays: dict[str, list[Any]] = {name: [] for name in sig.parameters.keys()}
108
+
109
+ # Extract all profiling data
110
+ if output_progress:
111
+ print(f"Extracting profiling data")
112
+ profiling_data: dict[str, list[utils.NCUActionData]] = {}
113
+ for range_idx in range(report.num_ranges()):
114
+ current_range = report.range_by_idx(range_idx)
115
+ for action_idx in range(current_range.num_actions()):
116
+ action = current_range.action_by_idx(action_idx)
117
+ state = action.nvtx_state()
118
+
119
+ for domain_idx in state.domains():
120
+ domain = state.domain_by_id(domain_idx)
121
+
122
+ # ignore actions not in the nsight-python nvtx domain
123
+ if domain.name() != utils.NVTX_DOMAIN:
124
+ continue
125
+ # ignore kernels in ignore_kernel_list
126
+ if ignore_kernel_list and action.name() in ignore_kernel_list:
127
+ continue
128
+
129
+ annotation = domain.push_pop_ranges()[0]
130
+ data = extract_ncu_action_data(action, metric)
131
+
132
+ if annotation not in profiling_data:
133
+ profiling_data[annotation] = []
134
+ profiling_data[annotation].append(data)
135
+
136
+ for annotation, annotation_data in profiling_data.items():
137
+ if output_progress:
138
+ print(f"Extracting {annotation} profiling data")
139
+
140
+ configs_repeated = [config for config in configs for _ in range(iterations)]
141
+
142
+ if len(annotation_data) == 0:
143
+ raise RuntimeError("No kernels were profiled")
144
+ if len(annotation_data) % len(configs_repeated) != 0:
145
+ raise RuntimeError(
146
+ "Expect same number of kernels per run. "
147
+ f"Got average of {len(annotation_data) / len(configs_repeated)} per run"
148
+ )
149
+ num_kernels = len(annotation_data) // len(configs_repeated)
150
+
151
+ if num_kernels > 1:
152
+ if combine_kernel_metrics is None:
153
+ raise RuntimeError(
154
+ (
155
+ f"More than one (total={num_kernels}) kernel is launched within the {annotation} annotation.\n"
156
+ "We expect one kernel per annotation.\n"
157
+ "Try `combine_kernel_metrics = lambda x, y: ...` to combine the metrics of multiple kernels\n"
158
+ "or add some of the kernels to the ignore_kernel_list .\n"
159
+ "Kernels are:\n"
160
+ + "\n".join(sorted(set(x.name for x in annotation_data)))
161
+ )
162
+ )
163
+
164
+ assert (
165
+ callable(combine_kernel_metrics)
166
+ and combine_kernel_metrics.__code__.co_argcount == 2
167
+ ), "Profiler error: combine_kernel_metrics must be a binary function"
168
+
169
+ # rewrite annotation_data to combine the kernels
170
+ action_data: list[utils.NCUActionData] = []
171
+ for data_tuple in utils.batched(annotation_data, num_kernels):
172
+ # Convert tuple to list for functools.reduce
173
+ batch_list: list[utils.NCUActionData] = list(data_tuple)
174
+ action_data.append(
175
+ functools.reduce(
176
+ utils.NCUActionData.combine(combine_kernel_metrics), batch_list
177
+ )
178
+ )
179
+
180
+ for conf, data in zip(configs_repeated, action_data):
181
+ compute_clocks.append(data.compute_clock)
182
+ memory_clocks.append(data.memory_clock)
183
+ gpus.append(data.gpu)
184
+ kernel_names.append(data.name)
185
+
186
+ # evaluate the measured metric
187
+ value = data.value
188
+ if derive_metric is not None:
189
+ derived_metric = None if value is None else derive_metric(value, *conf)
190
+ value = derived_metric
191
+ derive_metric_name = derive_metric.__name__
192
+ transformed_metrics.append(derive_metric_name)
193
+ else:
194
+ transformed_metrics.append(False)
195
+
196
+ values.append(value)
197
+
198
+ # gather remaining required data
199
+ annotations.append(annotation)
200
+ metrics.append(metric)
201
+ hostnames.append(socket.gethostname())
202
+ # Add a field for every config argument
203
+ bound_args = sig.bind(*conf)
204
+ for name, val in bound_args.arguments.items():
205
+ arg_arrays[name].append(val)
206
+
207
+ # Create the DataFrame with the initial columns
208
+ df_data = {
209
+ "Annotation": annotations,
210
+ "Value": values,
211
+ "Metric": metrics,
212
+ "Transformed": transformed_metrics,
213
+ "Kernel": kernel_names,
214
+ "GPU": gpus,
215
+ "Host": hostnames,
216
+ "ComputeClock": compute_clocks,
217
+ "MemoryClock": memory_clocks,
218
+ }
219
+
220
+ # Add each array in arg_arrays to the DataFrame
221
+ for arg_name, arg_values in arg_arrays.items():
222
+ df_data[arg_name] = arg_values
223
+
224
+ return pd.DataFrame(df_data)
nsight/thermovision.py ADDED
@@ -0,0 +1,115 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ import time
5
+ from typing import Any
6
+
7
+ """
8
+ This module provides GPU thermal monitoring and throttling prevention using NVIDIA's NVML library.
9
+
10
+ It monitors GPU temperature and T.limit, and delays execution when the GPU
11
+ is too hot to avoid thermal throttling. Initialization is done lazily when needed.
12
+ """
13
+
14
+ # Guard NVML imports
15
+ try:
16
+ from pynvml import (
17
+ NVML_TEMPERATURE_GPU,
18
+ NVMLError_NotSupported,
19
+ nvmlDeviceGetHandleByIndex,
20
+ nvmlDeviceGetMarginTemperature,
21
+ nvmlDeviceGetTemperature,
22
+ nvmlInit,
23
+ )
24
+
25
+ PYNVML_AVAILABLE = True
26
+ except ImportError:
27
+ PYNVML_AVAILABLE = False
28
+ print(
29
+ "Warning: Cannot import pynvml (provided by nvidia-ml-py). Ensure nsight-python was installed properly with all dependencies."
30
+ )
31
+
32
+ HANDLE: Any = None # Will be initialized lazily
33
+
34
+
35
+ def init() -> bool:
36
+ """
37
+ Initializes the thermovision module by setting up the necessary hardware handle
38
+ and checking if temperature retrieval is supported.
39
+
40
+ Returns:
41
+ True if temperature retrieval is supported, False otherwise.
42
+
43
+ Notes:
44
+ - This function uses the NVML (NVIDIA Management Library) to initialize
45
+ the GPU handle if the handle has not been set.
46
+ - The global variable `HANDLE` is used to store the GPU handle.
47
+ """
48
+
49
+ if not PYNVML_AVAILABLE:
50
+ return False
51
+
52
+ global HANDLE
53
+ if HANDLE is None:
54
+ nvmlInit()
55
+ HANDLE = nvmlDeviceGetHandleByIndex(0)
56
+
57
+ return is_temp_retrieval_supported()
58
+
59
+
60
+ def throttle_guard(wait_threshold: int = 10, continue_threshold: int = 40) -> None:
61
+ """
62
+ Delays execution if the GPU T.limit is below a specified threshold.
63
+
64
+ This function polls the GPU T.limit using NVML, and if it's below the `wait_threshold`,
65
+ it waits until it reaches at least the `continue_threshold`, checking at regular intervals.
66
+
67
+ Args:
68
+ wait_threshold: The T.limit value below which execution is paused.
69
+ Default: ``10``
70
+ continue_threshold: The T.limit value at or above which execution resumes.
71
+ Default: ``40``
72
+ """
73
+
74
+ tlimit = get_gpu_tlimit(HANDLE)
75
+ if tlimit is None:
76
+ return
77
+
78
+ if tlimit <= wait_threshold:
79
+ while tlimit is not None and tlimit < continue_threshold:
80
+ temperature = get_gpu_temp(HANDLE)
81
+ tlimit = get_gpu_tlimit(HANDLE)
82
+ print(
83
+ f"Waiting for GPU to cool down. Current temperature: {temperature}°C, T.limit: {tlimit}"
84
+ )
85
+ time.sleep(0.5)
86
+
87
+
88
+ def is_temp_retrieval_supported() -> bool:
89
+ """
90
+ Checks if the GPU supports temperature retrieval.
91
+ """
92
+ try:
93
+ nvmlDeviceGetMarginTemperature(HANDLE)
94
+ return True
95
+ except Exception as e:
96
+ print("Warning: Nsight Python Thermovision is not supported on this machine")
97
+ return False
98
+
99
+
100
+ def get_gpu_tlimit(handle: Any) -> int | None:
101
+ """
102
+ Returns the GPU T.Limit temparature for the given device handle.
103
+ """
104
+ try:
105
+ return nvmlDeviceGetMarginTemperature(handle) # type: ignore[no-any-return]
106
+ except NVMLError_NotSupported as e:
107
+ # Handle the case where the GPU does not support this feature
108
+ print("Error: GPU does not support temperature limit retrieval:", e)
109
+ return None
110
+ except Exception as e:
111
+ raise e
112
+
113
+
114
+ def get_gpu_temp(handle: Any) -> int:
115
+ return nvmlDeviceGetTemperature(handle, NVML_TEMPERATURE_GPU) # type: ignore[no-any-return]
@@ -0,0 +1,167 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """
5
+ Data transformation utilities for Nsight Python profiling output.
6
+
7
+ This module contains functions that process raw profiling results, aggregate metrics,
8
+ normalize them, and prepare the data for visualization or further statistical analysis.
9
+ """
10
+
11
+ import inspect
12
+ from collections.abc import Callable
13
+ from typing import Any
14
+
15
+ import numpy as np
16
+ import pandas as pd
17
+
18
+
19
+ def aggregate_data(
20
+ df: pd.DataFrame,
21
+ func: Callable[..., Any],
22
+ normalize_against: str | None,
23
+ output_progress: bool,
24
+ ) -> pd.DataFrame:
25
+ """
26
+ Groups and aggregates profiling data by configuration and annotation.
27
+
28
+ Args:
29
+ df: The raw profiling results.
30
+ func: Function representing kernel configuration parameters.
31
+ normalize_against: Name of the annotation to normalize against.
32
+ output_progress: Toggles the display of data processing logs
33
+
34
+ Returns:
35
+ Aggregated DataFrame and the (possibly normalized) metric name.
36
+ """
37
+ if output_progress:
38
+ print("[NSIGHT-PYTHON] Processing profiled data")
39
+
40
+ # Get the number of arguments in the signature of func
41
+ num_args = len(inspect.signature(func).parameters)
42
+
43
+ # Get the last N fields of the dataframe where N is the number of arguments
44
+ func_fields = df.columns[-num_args:].tolist()
45
+
46
+ # Function to convert non-sortable columns to strings
47
+ def convert_non_sortable_columns(dframe: pd.DataFrame) -> pd.DataFrame:
48
+ for col in dframe.columns:
49
+ # Try sorting the column to check if it's sortable
50
+ try:
51
+ sorted(dframe[col].dropna())
52
+ except TypeError:
53
+ # If sorting fails, convert the column to string
54
+ dframe[col] = dframe[col].astype(str)
55
+ return dframe
56
+
57
+ # Convert non-sortable columns before grouping
58
+ df = convert_non_sortable_columns(df)
59
+
60
+ # Preserve original order by adding an index column
61
+ df = df.reset_index(drop=True)
62
+ df["_original_order"] = df.index
63
+
64
+ # Build named aggregation dict for static fields
65
+ named_aggs = {
66
+ "AvgValue": ("Value", "mean"),
67
+ "StdDev": ("Value", "std"),
68
+ "MinValue": ("Value", "min"),
69
+ "MaxValue": ("Value", "max"),
70
+ "NumRuns": ("Value", "count"),
71
+ "_original_order": (
72
+ "_original_order",
73
+ "min",
74
+ ), # Use min to preserve first occurrence
75
+ }
76
+
77
+ # Add assertion-based unique selection for remaining fields
78
+ remaining_fields = [
79
+ col
80
+ for col in df.columns
81
+ if col not in ["Value", "Annotation", "_original_order"] + func_fields
82
+ ]
83
+
84
+ for col in remaining_fields:
85
+ if col == "Kernel":
86
+ named_aggs[col] = (col, "first")
87
+ else:
88
+ named_aggs[col] = ( # type: ignore[assignment]
89
+ col,
90
+ (
91
+ lambda colname: lambda x: (
92
+ x.unique()[0]
93
+ if len(x.unique()) == 1
94
+ else (_ for _ in ()).throw(
95
+ AssertionError(
96
+ f"Column '{colname}' has multiple values in group: {x.unique()}"
97
+ )
98
+ )
99
+ )
100
+ )(col),
101
+ )
102
+
103
+ # Apply aggregation with named aggregation
104
+ agg_df = df.groupby(["Annotation"] + func_fields).agg(**named_aggs).reset_index()
105
+
106
+ # Compute 95% confidence intervals
107
+ agg_df["CI95_Lower"] = agg_df["AvgValue"] - 1.96 * (
108
+ agg_df["StdDev"] / np.sqrt(agg_df["NumRuns"])
109
+ )
110
+ agg_df["CI95_Upper"] = agg_df["AvgValue"] + 1.96 * (
111
+ agg_df["StdDev"] / np.sqrt(agg_df["NumRuns"])
112
+ )
113
+
114
+ # Compute relative standard deviation as a percentage
115
+ agg_df["RelativeStdDevPct"] = (agg_df["StdDev"] / agg_df["AvgValue"]) * 100
116
+
117
+ # Flag measurements as stable if relative stddev is less than 2%
118
+ agg_df["StableMeasurement"] = agg_df["RelativeStdDevPct"] < 2.0
119
+
120
+ # Flatten the multi-index columns
121
+ agg_df.columns = [col if isinstance(col, str) else col[0] for col in agg_df.columns]
122
+
123
+ # Sort by original order to preserve user-provided configuration order
124
+ agg_df = agg_df.sort_values("_original_order").reset_index(drop=True)
125
+ agg_df = agg_df.drop("_original_order", axis=1) # Remove the helper column
126
+
127
+ do_normalize = normalize_against is not None
128
+ if do_normalize:
129
+
130
+ assert (
131
+ normalize_against in agg_df["Annotation"].values
132
+ ), f"Annotation '{normalize_against}' not found in data."
133
+
134
+ # Create a DataFrame to hold the normalization values
135
+ normalization_df = agg_df[agg_df["Annotation"] == normalize_against][
136
+ func_fields + ["AvgValue"]
137
+ ]
138
+ normalization_df = normalization_df.rename(
139
+ columns={"AvgValue": "NormalizationValue"}
140
+ )
141
+
142
+ # Merge with the original DataFrame to apply normalization
143
+ agg_df = pd.merge(agg_df, normalization_df, on=func_fields)
144
+
145
+ # Normalize the AvgValue by the values of the normalization annotation
146
+ agg_df["AvgValue"] = agg_df["NormalizationValue"] / agg_df["AvgValue"]
147
+
148
+ # Update the metric name to reflect the normalization
149
+ agg_df["Metric"] = (
150
+ agg_df["Metric"].astype(str) + f" relative to {normalize_against}"
151
+ )
152
+
153
+ # Calculate geometric mean for each annotation
154
+ geomean_values = {}
155
+ for annotation in agg_df["Annotation"].unique():
156
+ annotation_data = agg_df[agg_df["Annotation"] == annotation]
157
+ valid_values = annotation_data["AvgValue"].dropna()
158
+ if not valid_values.empty:
159
+ geomean = np.exp(np.mean(np.log(valid_values)))
160
+ geomean_values[annotation] = geomean
161
+ else:
162
+ geomean_values[annotation] = np.nan
163
+
164
+ # Add geomean values to the DataFrame
165
+ agg_df["Geomean"] = agg_df["Annotation"].map(geomean_values)
166
+
167
+ return agg_df