PyPI - nsight-python - Versions diffs - 0.9.4__py3-none-any.whl - Mend

nsight-python 0.9.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

nsight/__init__.py +12 -0
nsight/analyze.py +363 -0
nsight/annotation.py +80 -0
nsight/collection/__init__.py +10 -0
nsight/collection/core.py +399 -0
nsight/collection/ncu.py +268 -0
nsight/exceptions.py +51 -0
nsight/extraction.py +224 -0
nsight/thermovision.py +115 -0
nsight/transformation.py +167 -0
nsight/utils.py +320 -0
nsight/visualization.py +470 -0
nsight_python-0.9.4.dist-info/METADATA +254 -0
nsight_python-0.9.4.dist-info/RECORD +16 -0
nsight_python-0.9.4.dist-info/WHEEL +4 -0
nsight_python-0.9.4.dist-info/licenses/LICENSE +202 -0

nsight/exceptions.py ADDED Viewed

@@ -0,0 +1,51 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+from dataclasses import dataclass
+"""
+Exceptions specific to Nsight Python profiling and analysis.
+"""
+class ProfilerException(Exception):
+    """
+    Exception raised for errors specific to the Profiler.
+    Attributes:
+        message: Explanation of the error.
+    """
+    pass
+class NCUNotAvailableError(Exception):
+    """
+    Exception raised when NVIDIA Nsight Compute CLI (NCU) is not available or accessible.
+    This can occur when:
+    - NCU is not installed on the system
+    - NCU is not in the system PATH
+    - Required permissions are missing
+    """
+    pass
+CUDA_CORE_UNAVAILABLE_MSG = "cuda-core is required for ignore_failures functionality.\n Install it with:\n  - pip install nsight-python[cu12]  (if you have CUDA 12.x)\n  - pip install nsight-python[cu13]  (if you have CUDA 13.x)"
+@dataclass
+class NCUErrorContext:
+    """
+    Context information for NCU error handling.
+    Attributes:
+        errors: The error logs from NCU
+        log_file_path: Path to the NCU log file
+        metric: The metric that was being collected
+    """
+    errors: list[str]
+    log_file_path: str
+    metric: str

nsight/extraction.py ADDED Viewed

@@ -0,0 +1,224 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""
+Extraction utilities for analyzing NVIDIA Nsight Compute profiling data.
+This module provides functionality to load `.ncu-rep` reports, extract performance data,
+and transform it into structured pandas DataFrames for further analysis.
+Functions:
+    extract_ncu_action_data(action, metric):
+        Extracts performance data for a specific kernel action from an NVIDIA Nsight Compute report.
+    extract_df_from_report(metric, configs, iterations, func, derive_metric, ignore_kernel_list, verbose, combine_kernel_metrics=None):
+        Processes the full NVIDIA Nsight Compute report and returns a pandas DataFrame containing performance metrics.
+"""
+import functools
+import inspect
+import socket
+from collections.abc import Callable
+from typing import Any, List, Tuple
+import ncu_report
+import pandas as pd
+from nsight import exceptions, utils
+def extract_ncu_action_data(action: Any, metric: str) -> utils.NCUActionData:
+    """
+    Extracts performance data from an NVIDIA Nsight Compute kernel action.
+    Args:
+        action: The NVIDIA Nsight Compute action object.
+        metric: The metric name to extract from the action.
+    Returns:
+        A data container with extracted metric, clock rates, and GPU name.
+    """
+    return utils.NCUActionData(
+        name=action.name(),
+        value=(
+            None if "dummy_kernel_failure" in action.name() else action[metric].value()
+        ),
+        compute_clock=action["device__attribute_clock_rate"].value(),
+        memory_clock=action["device__attribute_memory_clock_rate"].value(),
+        gpu=action["device__attribute_display_name"].value(),
+    )
+def extract_df_from_report(
+    report_path: str,
+    metric: str,
+    configs: List[Tuple[Any, ...]],
+    iterations: int,
+    func: Callable[..., Any],
+    derive_metric: Callable[..., Any] | None,
+    ignore_kernel_list: List[str] | None,
+    output_progress: bool,
+    combine_kernel_metrics: Callable[[float, float], float] | None = None,
+) -> pd.DataFrame:
+    """
+    Extracts and aggregates profiling results from an NVIDIA Nsight Compute report.
+    Args:
+        report_path: Path to the report file.
+        metric: The NVIDIA Nsight Compute metric to extract.
+        configs: Configuration settings used during profiling runs.
+        iterations: Number of times each configuration was run.
+        func: Function representing the kernel launch with parameter signature.
+        derive_metric: Function to transform the raw metric value with config values.
+        ignore_kernel_list: Kernel names to ignore in the analysis.
+        combine_kernel_metrics: Function to merge multiple kernel metrics.
+        verbose: Toggles the printing of extraction progress
+    Returns:
+        A DataFrame containing the extracted and transformed performance data.
+    Raises:
+        RuntimeError: If multiple kernels are detected per config without a combining function.
+        exceptions.ProfilerException: If profiling results are missing or incomplete.
+    """
+    if output_progress:
+        print("[NSIGHT-PYTHON] Loading profiled data")
+    try:
+        report = ncu_report.load_report(report_path)
+    except FileNotFoundError:
+        raise exceptions.ProfilerException(
+            "No NVIDIA Nsight Compute report found. Please run nsight-python with `@nsight.analyze.kernel(output='verbose')`"
+            "to identify the issue."
+        )
+    annotations: List[str] = []
+    values: List[float | None] = []
+    kernel_names: List[str] = []
+    gpus: List[str] = []
+    compute_clocks: List[int] = []
+    memory_clocks: List[int] = []
+    metrics: List[str] = []
+    transformed_metrics: List[str | bool] = []
+    hostnames: List[str] = []
+    sig = inspect.signature(func)
+    # Create a new array for each argument in the signature
+    arg_arrays: dict[str, list[Any]] = {name: [] for name in sig.parameters.keys()}
+    # Extract all profiling data
+    if output_progress:
+        print(f"Extracting profiling data")
+    profiling_data: dict[str, list[utils.NCUActionData]] = {}
+    for range_idx in range(report.num_ranges()):
+        current_range = report.range_by_idx(range_idx)
+        for action_idx in range(current_range.num_actions()):
+            action = current_range.action_by_idx(action_idx)
+            state = action.nvtx_state()
+            for domain_idx in state.domains():
+                domain = state.domain_by_id(domain_idx)
+                # ignore actions not in the nsight-python nvtx domain
+                if domain.name() != utils.NVTX_DOMAIN:
+                    continue
+                # ignore kernels in ignore_kernel_list
+                if ignore_kernel_list and action.name() in ignore_kernel_list:
+                    continue
+                annotation = domain.push_pop_ranges()[0]
+                data = extract_ncu_action_data(action, metric)
+                if annotation not in profiling_data:
+                    profiling_data[annotation] = []
+                profiling_data[annotation].append(data)
+    for annotation, annotation_data in profiling_data.items():
+        if output_progress:
+            print(f"Extracting {annotation} profiling data")
+        configs_repeated = [config for config in configs for _ in range(iterations)]
+        if len(annotation_data) == 0:
+            raise RuntimeError("No kernels were profiled")
+        if len(annotation_data) % len(configs_repeated) != 0:
+            raise RuntimeError(
+                "Expect same number of kernels per run. "
+                f"Got average of {len(annotation_data) / len(configs_repeated)} per run"
+            )
+        num_kernels = len(annotation_data) // len(configs_repeated)
+        if num_kernels > 1:
+            if combine_kernel_metrics is None:
+                raise RuntimeError(
+                    (
+                        f"More than one (total={num_kernels}) kernel is launched within the {annotation} annotation.\n"
+                        "We expect one kernel per annotation.\n"
+                        "Try `combine_kernel_metrics = lambda x, y: ...` to combine the metrics of multiple kernels\n"
+                        "or add some of the kernels to the ignore_kernel_list .\n"
+                        "Kernels are:\n"
+                        + "\n".join(sorted(set(x.name for x in annotation_data)))
+                    )
+                )
+            assert (
+                callable(combine_kernel_metrics)
+                and combine_kernel_metrics.__code__.co_argcount == 2
+            ), "Profiler error: combine_kernel_metrics must be a binary function"
+        # rewrite annotation_data to combine the kernels
+        action_data: list[utils.NCUActionData] = []
+        for data_tuple in utils.batched(annotation_data, num_kernels):
+            # Convert tuple to list for functools.reduce
+            batch_list: list[utils.NCUActionData] = list(data_tuple)
+            action_data.append(
+                functools.reduce(
+                    utils.NCUActionData.combine(combine_kernel_metrics), batch_list
+                )
+            )
+        for conf, data in zip(configs_repeated, action_data):
+            compute_clocks.append(data.compute_clock)
+            memory_clocks.append(data.memory_clock)
+            gpus.append(data.gpu)
+            kernel_names.append(data.name)
+            # evaluate the measured metric
+            value = data.value
+            if derive_metric is not None:
+                derived_metric = None if value is None else derive_metric(value, *conf)
+                value = derived_metric
+                derive_metric_name = derive_metric.__name__
+                transformed_metrics.append(derive_metric_name)
+            else:
+                transformed_metrics.append(False)
+            values.append(value)
+            # gather remaining required data
+            annotations.append(annotation)
+            metrics.append(metric)
+            hostnames.append(socket.gethostname())
+            # Add a field for every config argument
+            bound_args = sig.bind(*conf)
+            for name, val in bound_args.arguments.items():
+                arg_arrays[name].append(val)
+    # Create the DataFrame with the initial columns
+    df_data = {
+        "Annotation": annotations,
+        "Value": values,
+        "Metric": metrics,
+        "Transformed": transformed_metrics,
+        "Kernel": kernel_names,
+        "GPU": gpus,
+        "Host": hostnames,
+        "ComputeClock": compute_clocks,
+        "MemoryClock": memory_clocks,
+    }
+    # Add each array in arg_arrays to the DataFrame
+    for arg_name, arg_values in arg_arrays.items():
+        df_data[arg_name] = arg_values
+    return pd.DataFrame(df_data)

nsight/thermovision.py ADDED Viewed

@@ -0,0 +1,115 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+import time
+from typing import Any
+"""
+This module provides GPU thermal monitoring and throttling prevention using NVIDIA's NVML library.
+It monitors GPU temperature and T.limit, and delays execution when the GPU
+is too hot to avoid thermal throttling. Initialization is done lazily when needed.
+"""
+# Guard NVML imports
+try:
+    from pynvml import (
+        NVML_TEMPERATURE_GPU,
+        NVMLError_NotSupported,
+        nvmlDeviceGetHandleByIndex,
+        nvmlDeviceGetMarginTemperature,
+        nvmlDeviceGetTemperature,
+        nvmlInit,
+    )
+    PYNVML_AVAILABLE = True
+except ImportError:
+    PYNVML_AVAILABLE = False
+    print(
+        "Warning: Cannot import pynvml (provided by nvidia-ml-py). Ensure nsight-python was installed properly with all dependencies."
+    )
+HANDLE: Any = None  # Will be initialized lazily
+def init() -> bool:
+    """
+    Initializes the thermovision module by setting up the necessary hardware handle
+    and checking if temperature retrieval is supported.
+    Returns:
+        True if temperature retrieval is supported, False otherwise.
+    Notes:
+        - This function uses the NVML (NVIDIA Management Library) to initialize
+          the GPU handle if the handle has not been set.
+        - The global variable `HANDLE` is used to store the GPU handle.
+    """
+    if not PYNVML_AVAILABLE:
+        return False
+    global HANDLE
+    if HANDLE is None:
+        nvmlInit()
+        HANDLE = nvmlDeviceGetHandleByIndex(0)
+    return is_temp_retrieval_supported()
+def throttle_guard(wait_threshold: int = 10, continue_threshold: int = 40) -> None:
+    """
+    Delays execution if the GPU T.limit is below a specified threshold.
+    This function polls the GPU T.limit using NVML, and if it's below the `wait_threshold`,
+    it waits until it reaches at least the `continue_threshold`, checking at regular intervals.
+    Args:
+        wait_threshold: The T.limit value below which execution is paused.
+        Default: ``10``
+        continue_threshold: The T.limit value at or above which execution resumes.
+        Default: ``40``
+    """
+    tlimit = get_gpu_tlimit(HANDLE)
+    if tlimit is None:
+        return
+    if tlimit <= wait_threshold:
+        while tlimit is not None and tlimit < continue_threshold:
+            temperature = get_gpu_temp(HANDLE)
+            tlimit = get_gpu_tlimit(HANDLE)
+            print(
+                f"Waiting for GPU to cool down. Current temperature: {temperature}°C, T.limit: {tlimit}"
+            )
+            time.sleep(0.5)
+def is_temp_retrieval_supported() -> bool:
+    """
+    Checks if the GPU supports temperature retrieval.
+    """
+    try:
+        nvmlDeviceGetMarginTemperature(HANDLE)
+        return True
+    except Exception as e:
+        print("Warning: Nsight Python Thermovision is not supported on this machine")
+        return False
+def get_gpu_tlimit(handle: Any) -> int | None:
+    """
+    Returns the GPU T.Limit temparature for the given device handle.
+    """
+    try:
+        return nvmlDeviceGetMarginTemperature(handle)  # type: ignore[no-any-return]
+    except NVMLError_NotSupported as e:
+        # Handle the case where the GPU does not support this feature
+        print("Error: GPU does not support temperature limit retrieval:", e)
+        return None
+    except Exception as e:
+        raise e
+def get_gpu_temp(handle: Any) -> int:
+    return nvmlDeviceGetTemperature(handle, NVML_TEMPERATURE_GPU)  # type: ignore[no-any-return]

nsight/transformation.py ADDED Viewed

@@ -0,0 +1,167 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""
+Data transformation utilities for Nsight Python profiling output.
+This module contains functions that process raw profiling results, aggregate metrics,
+normalize them, and prepare the data for visualization or further statistical analysis.
+"""
+import inspect
+from collections.abc import Callable
+from typing import Any
+import numpy as np
+import pandas as pd
+def aggregate_data(
+    df: pd.DataFrame,
+    func: Callable[..., Any],
+    normalize_against: str | None,
+    output_progress: bool,
+) -> pd.DataFrame:
+    """
+    Groups and aggregates profiling data by configuration and annotation.
+    Args:
+        df: The raw profiling results.
+        func: Function representing kernel configuration parameters.
+        normalize_against: Name of the annotation to normalize against.
+        output_progress: Toggles the display of data processing logs
+    Returns:
+        Aggregated DataFrame and the (possibly normalized) metric name.
+    """
+    if output_progress:
+        print("[NSIGHT-PYTHON] Processing profiled data")
+    # Get the number of arguments in the signature of func
+    num_args = len(inspect.signature(func).parameters)
+    # Get the last N fields of the dataframe where N is the number of arguments
+    func_fields = df.columns[-num_args:].tolist()
+    # Function to convert non-sortable columns to strings
+    def convert_non_sortable_columns(dframe: pd.DataFrame) -> pd.DataFrame:
+        for col in dframe.columns:
+            # Try sorting the column to check if it's sortable
+            try:
+                sorted(dframe[col].dropna())
+            except TypeError:
+                # If sorting fails, convert the column to string
+                dframe[col] = dframe[col].astype(str)
+        return dframe
+    # Convert non-sortable columns before grouping
+    df = convert_non_sortable_columns(df)
+    # Preserve original order by adding an index column
+    df = df.reset_index(drop=True)
+    df["_original_order"] = df.index
+    # Build named aggregation dict for static fields
+    named_aggs = {
+        "AvgValue": ("Value", "mean"),
+        "StdDev": ("Value", "std"),
+        "MinValue": ("Value", "min"),
+        "MaxValue": ("Value", "max"),
+        "NumRuns": ("Value", "count"),
+        "_original_order": (
+            "_original_order",
+            "min",
+        ),  # Use min to preserve first occurrence
+    }
+    # Add assertion-based unique selection for remaining fields
+    remaining_fields = [
+        col
+        for col in df.columns
+        if col not in ["Value", "Annotation", "_original_order"] + func_fields
+    ]
+    for col in remaining_fields:
+        if col == "Kernel":
+            named_aggs[col] = (col, "first")
+        else:
+            named_aggs[col] = (  # type: ignore[assignment]
+                col,
+                (
+                    lambda colname: lambda x: (
+                        x.unique()[0]
+                        if len(x.unique()) == 1
+                        else (_ for _ in ()).throw(
+                            AssertionError(
+                                f"Column '{colname}' has multiple values in group: {x.unique()}"
+                            )
+                        )
+                    )
+                )(col),
+            )
+    # Apply aggregation with named aggregation
+    agg_df = df.groupby(["Annotation"] + func_fields).agg(**named_aggs).reset_index()
+    # Compute 95% confidence intervals
+    agg_df["CI95_Lower"] = agg_df["AvgValue"] - 1.96 * (
+        agg_df["StdDev"] / np.sqrt(agg_df["NumRuns"])
+    )
+    agg_df["CI95_Upper"] = agg_df["AvgValue"] + 1.96 * (
+        agg_df["StdDev"] / np.sqrt(agg_df["NumRuns"])
+    )
+    # Compute relative standard deviation as a percentage
+    agg_df["RelativeStdDevPct"] = (agg_df["StdDev"] / agg_df["AvgValue"]) * 100
+    # Flag measurements as stable if relative stddev is less than 2%
+    agg_df["StableMeasurement"] = agg_df["RelativeStdDevPct"] < 2.0
+    # Flatten the multi-index columns
+    agg_df.columns = [col if isinstance(col, str) else col[0] for col in agg_df.columns]
+    # Sort by original order to preserve user-provided configuration order
+    agg_df = agg_df.sort_values("_original_order").reset_index(drop=True)
+    agg_df = agg_df.drop("_original_order", axis=1)  # Remove the helper column
+    do_normalize = normalize_against is not None
+    if do_normalize:
+        assert (
+            normalize_against in agg_df["Annotation"].values
+        ), f"Annotation '{normalize_against}' not found in data."
+        # Create a DataFrame to hold the normalization values
+        normalization_df = agg_df[agg_df["Annotation"] == normalize_against][
+            func_fields + ["AvgValue"]
+        ]
+        normalization_df = normalization_df.rename(
+            columns={"AvgValue": "NormalizationValue"}
+        )
+        # Merge with the original DataFrame to apply normalization
+        agg_df = pd.merge(agg_df, normalization_df, on=func_fields)
+        # Normalize the AvgValue by the values of the normalization annotation
+        agg_df["AvgValue"] = agg_df["NormalizationValue"] / agg_df["AvgValue"]
+        # Update the metric name to reflect the normalization
+        agg_df["Metric"] = (
+            agg_df["Metric"].astype(str) + f" relative to {normalize_against}"
+        )
+    # Calculate geometric mean for each annotation
+    geomean_values = {}
+    for annotation in agg_df["Annotation"].unique():
+        annotation_data = agg_df[agg_df["Annotation"] == annotation]
+        valid_values = annotation_data["AvgValue"].dropna()
+        if not valid_values.empty:
+            geomean = np.exp(np.mean(np.log(valid_values)))
+            geomean_values[annotation] = geomean
+        else:
+            geomean_values[annotation] = np.nan
+    # Add geomean values to the DataFrame
+    agg_df["Geomean"] = agg_df["Annotation"].map(geomean_values)
+    return agg_df