PyPI - jax-hpc-profiler - Versions diffs - 0.2.0__py3-none-any.whl - Mend

jax-hpc-profiler 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

jax_hpc_profiler/__init__.py +9 -0
jax_hpc_profiler/create_argparse.py +158 -0
jax_hpc_profiler/main.py +57 -0
jax_hpc_profiler/plotting.py +214 -0
jax_hpc_profiler/timer.py +185 -0
jax_hpc_profiler/utils.py +396 -0
jax_hpc_profiler-0.2.0.dist-info/LICENSE +674 -0
jax_hpc_profiler-0.2.0.dist-info/METADATA +847 -0
jax_hpc_profiler-0.2.0.dist-info/RECORD +12 -0
jax_hpc_profiler-0.2.0.dist-info/WHEEL +5 -0
jax_hpc_profiler-0.2.0.dist-info/entry_points.txt +2 -0
jax_hpc_profiler-0.2.0.dist-info/top_level.txt +1 -0

jax_hpc_profiler/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+from .create_argparse import create_argparser
+from .plotting import plot_strong_scaling, plot_weak_scaling
+from .timer import Timer
+from .utils import clean_up_csv, concatenate_csvs, plot_with_pdims_strategy
+__all__ = [
+    'create_argparser', 'plot_strong_scaling', 'plot_weak_scaling', 'Timer',
+    'clean_up_csv', 'concatenate_csvs', 'plot_with_pdims_strategy'
+]

jax_hpc_profiler/create_argparse.py ADDED Viewed

@@ -0,0 +1,158 @@
+import argparse
+def create_argparser():
+    """
+    Create argument parser for the HPC Plotter package.
+    Returns
+    -------
+    argparse.Namespace
+        Parsed and validated arguments.
+    """
+    parser = argparse.ArgumentParser(
+        description='HPC Plotter for benchmarking data')
+    # Group for concatenation to ensure mutually exclusive behavior
+    subparsers = parser.add_subparsers(dest='command', required=True)
+    concat_parser = subparsers.add_parser('concat',
+                                          help='Concatenate CSV files')
+    concat_parser.add_argument('input',
+                               type=str,
+                               help='Input directory for concatenation')
+    concat_parser.add_argument('output',
+                               type=str,
+                               help='Output directory for concatenation')
+    # Arguments for plotting
+    plot_parser = subparsers.add_parser('plot', help='Plot CSV data')
+    plot_parser.add_argument('-f',
+                             '--csv_files',
+                             nargs='+',
+                             help='List of CSV files to plot',
+                             required=True)
+    plot_parser.add_argument('-g',
+                             '--gpus',
+                             nargs='*',
+                             type=int,
+                             help='List of number of GPUs to plot')
+    plot_parser.add_argument('-d',
+                             '--data_size',
+                             nargs='*',
+                             type=int,
+                             help='List of data sizes to plot')
+    # pdims related arguments
+    plot_parser.add_argument('-fd',
+                             '--filter_pdims',
+                             nargs='*',
+                             help='List of pdims to filter, e.g., 1x4 2x2 4x8')
+    plot_parser.add_argument(
+        '-ps',
+        '--pdim_strategy',
+        choices=['plot_all', 'plot_fastest', 'slab_yz', 'slab_xy', 'pencils'],
+        nargs='*',
+        default=['plot_fastest'],
+        help='Strategy for plotting pdims')
+    # Function and precision related arguments
+    plot_parser.add_argument(
+        '-pr',
+        '--precision',
+        choices=['float32', 'float64'],
+        default=['float32', 'float64'],
+        nargs='*',
+        help='Precision to filter by (float32 or float64)')
+    plot_parser.add_argument('-fn',
+                             '--function_name',
+                             nargs='+',
+                             default=['FFT'],
+                             help='Function names to filter')
+    # Time or memory related arguments
+    plotting_group = plot_parser.add_mutually_exclusive_group(required=True)
+    plotting_group.add_argument('-pt',
+                                '--plot_times',
+                                nargs='*',
+                                choices=[
+                                    'jit_time', 'min_time', 'max_time',
+                                    'mean_time', 'std_time', 'last_time'
+                                ],
+                                help='Time columns to plot')
+    plotting_group.add_argument('-pm',
+                                '--plot_memory',
+                                nargs='*',
+                                choices=[
+                                    'generated_code', 'argument_size',
+                                    'output_size', 'temp_size'
+                                ],
+                                help='Memory columns to plot')
+    plot_parser.add_argument('-mu',
+                        '--memory_units',
+                        default='GB',
+                        help='Memory units to plot (KB, MB, GB, TB)')
+    # Plot customization arguments
+    plot_parser.add_argument('-fs',
+                             '--figure_size',
+                             nargs=2,
+                             type=int,
+                             help='Figure size')
+    plot_parser.add_argument('-o',
+                             '--output',
+                             help='Output file (if none then only show plot)',
+                             default=None)
+    plot_parser.add_argument('-db',
+                             '--dark_bg',
+                             action='store_true',
+                             help='Use dark background for plotting')
+    plot_parser.add_argument('-pd',
+                             '--print_decompositions',
+                             action='store_true',
+                             help='Print decompositions on plot')
+    # Backend related arguments
+    plot_parser.add_argument('-b',
+                             '--backends',
+                             nargs='*',
+                             default=['MPI', 'NCCL', 'MPI4JAX'],
+                             help='List of backends to include')
+    # Scaling type argument
+    plot_parser.add_argument('-sc',
+                             '--scaling',
+                             choices=['Weak', 'Strong'],
+                             required=True,
+                             help='Scaling type (Weak or Strong)')
+    # Label customization argument
+    plot_parser.add_argument(
+        '-l',
+        '--label_text',
+        type=str,
+        help=
+        'Custom label for the plot. You can use placeholders: %decomposition% (or %p%), %precision% (or %pr%), %plot_name% (or %pn%), %backend% (or %b%), %node% (or %n%), %methodname% (or %m%)',
+        default="%m%-%f%-%pn%-%pr%-%b%-%p%-%n%")
+    args = parser.parse_args()
+    if 'plot_all' in args.pdim_strategy and len(args.pdim_strategy) > 1:
+        print(
+            "Warning: 'plot_all' strategy is combined with other strategies. Using 'plot_all' only."
+        )
+        args.pdim_strategy = ['plot_all']
+    if 'plot_fastest' in args.pdim_strategy and len(args.pdim_strategy) > 1:
+        print(
+            "Warning: 'plot_fastest' strategy is combined with other strategies. Using 'plot_fastest' only."
+        )
+        args.pdim_strategy = ['plot_fastest']
+    if args.plot_times is not None:
+        args.plot_columns = args.plot_times
+    elif args.plot_memory is not None:
+        args.plot_columns = args.plot_memory
+    else:
+        raise ValueError('Either plot_times or plot_memory should be provided')
+    return args

jax_hpc_profiler/main.py ADDED Viewed

@@ -0,0 +1,57 @@
+import sys
+from .create_argparse import create_argparser
+from .plotting import plot_strong_scaling, plot_weak_scaling
+from .utils import clean_up_csv, concatenate_csvs
+def main():
+    args = create_argparser()
+    if args.command == 'concat':
+        input_dir, output_dir = args.input, args.output
+        concatenate_csvs(input_dir, output_dir)
+    elif args.command == 'plot':
+        dataframes, available_gpu_counts, available_data_sizes = clean_up_csv(
+            args.csv_files, args.precision, args.function_name, args.gpus,
+            args.data_size, args.filter_pdims, args.pdim_strategy,
+            args.backends,args.memory_units)
+        if len(dataframes) == 0:
+            print(f"No dataframes found for the given arguments. Exiting...")
+            sys.exit(1)
+        print(
+            f"requested GPUS: {args.gpus} available GPUS: {available_gpu_counts}"
+        )
+        # filter back the requested data sizes and gpus
+        args.gpus = [gpu for gpu in args.gpus if gpu in available_gpu_counts]
+        args.data_size = [
+            data_size for data_size in args.data_size
+            if data_size in available_data_sizes
+        ]
+        if len(args.gpus) == 0:
+            print(
+                f"No dataframes found for the given GPUs. Exiting...")
+            sys.exit(1)
+        if len(args.data_size) == 0:
+            print(
+                f"No dataframes found for the given data sizes. Exiting...")
+            sys.exit(1)
+        if args.scaling == 'Weak':
+            plot_weak_scaling(dataframes, args.gpus, args.figure_size,
+                              args.output, args.dark_bg,
+                              args.print_decompositions, args.backends,
+                              args.precision, args.function_name,
+                              args.plot_columns, args.memory_units,
+                              args.label_text, args.pdim_strategy)
+        elif args.scaling == 'Strong':
+            plot_strong_scaling(dataframes, args.data_size, args.figure_size,
+                                args.output, args.dark_bg,
+                                args.print_decompositions, args.backends,
+                                args.precision, args.function_name,
+                                args.plot_columns, args.memory_units,
+                                args.label_text, args.pdim_strategy)
+if __name__ == "__main__":
+    main()

jax_hpc_profiler/plotting.py ADDED Viewed

@@ -0,0 +1,214 @@
+from itertools import product
+from typing import Dict, List, Optional
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from matplotlib.axes import Axes
+from matplotlib.patches import FancyBboxPatch
+from .utils import inspect_df, plot_with_pdims_strategy
+np.seterr(divide='ignore')
+plt.rcParams.update({'font.size': 10})
+def configure_axes(ax: Axes,
+                   x_values: List[int],
+                   y_values: List[float],
+                   xlabel: str,
+                   title: str,
+                   plotting_memory: bool = False,
+                   memory_units: str = 'bytes'):
+    """
+    Configure the axes for the plot.
+    Parameters
+    ----------
+    ax : Axes
+        The axes to configure.
+    x_values : List[int]
+        The x-axis values.
+    y_values : List[float]
+        The y-axis values.
+    xlabel : str
+        The label for the x-axis.
+    """
+    ylabel = 'Time (milliseconds)' if not plotting_memory else f'Memory ({memory_units})'
+    f2 = lambda x: np.log2(x)
+    g2 = lambda x: 2**x
+    ax.set_xlim([min(x_values), max(x_values)])
+    y_min, y_max = min(y_values) * 0.9, max(y_values) * 1.1
+    ax.set_title(title)
+    ax.set_ylim([y_min, y_max])
+    ax.set_xscale('function', functions=(f2, g2))
+    if not plotting_memory:
+        ax.set_yscale('symlog')
+        time_ticks = [
+            10**t for t in range(int(np.floor(np.log10(y_min))), 1 +
+                                 int(np.ceil(np.log10(y_max))))
+        ]
+        ax.set_yticks(time_ticks)
+    ax.set_xticks(x_values)
+    ax.set_xlabel(xlabel)
+    ax.set_ylabel(ylabel)
+    for x_value in x_values:
+        ax.axvline(x=x_value, color='gray', linestyle='--', alpha=0.5)
+    ax.legend(loc='lower center',
+              bbox_to_anchor=(0.5, 0.05),
+              ncol=4,
+              prop={'size': 14})
+def plot_scaling(dataframes: Dict[str, pd.DataFrame],
+                 fixed_sizes: List[int],
+                 size_column: str,
+                 fixed_column: str,
+                 xlabel: str,
+                 title: str,
+                 figure_size: tuple = (6, 4),
+                 output: Optional[str] = None,
+                 dark_bg: bool = False,
+                 print_decompositions: bool = False,
+                 backends: List[str] = ['NCCL'],
+                 precisions: List[str] = ['float32'],
+                 functions: List[str] | None = None,
+                 plot_columns: List[str] = ['mean_time'],
+                 memory_units: str = 'bytes',
+                 label_text: str = 'plot',
+                 pdims_strategy: str = 'plot_fastest'):
+    """
+    General scaling plot function based on the number of GPUs or data size.
+    Parameters
+    ----------
+    dataframes : Dict[str, pd.DataFrame]
+        Dictionary of method names to dataframes.
+    fixed_sizes : List[int]
+        List of fixed sizes (data or GPUs) to plot.
+    size_column : str
+        Column name for the size axis ('x' for weak scaling, 'gpus' for strong scaling).
+    fixed_column : str
+        Column name for the fixed axis ('gpus' for weak scaling, 'x' for strong scaling).
+    xlabel : str
+        Label for the x-axis.
+    figure_size : tuple, optional
+        Size of the figure, by default (6, 4).
+    output : Optional[str], optional
+        Output file to save the plot, by default None.
+    dark_bg : bool, optional
+        Whether to use dark background for the plot, by default False.
+    print_decompositions : bool, optional
+        Whether to print decompositions on the plot, by default False.
+    backends : Optional[List[str]], optional
+        List of backends to include, by default None.
+    pdims_strategy : str, optional
+        Strategy for plotting pdims ('plot_all' or 'plot_fastest'), by default 'plot_fastest'.
+    """
+    if dark_bg:
+        plt.style.use('dark_background')
+    num_subplots = len(fixed_sizes)
+    num_rows = int(np.ceil(np.sqrt(num_subplots)))
+    num_cols = int(np.ceil(num_subplots / num_rows))
+    fig, axs = plt.subplots(num_rows, num_cols, figsize=figure_size)
+    if num_subplots > 1:
+        axs = axs.flatten()
+    else:
+        axs = [axs]
+    for i, fixed_size in enumerate(fixed_sizes):
+        ax: Axes = axs[i]
+        for method, df in dataframes.items():
+            x_values = []
+            y_values = []
+            filtered_method_df = df[df[fixed_column] == int(fixed_size)]
+            if filtered_method_df.empty:
+                continue
+            filtered_method_df = filtered_method_df.sort_values(
+                by=[size_column])
+            functions = pd.unique(filtered_method_df['function']
+                                  ) if functions is None else functions
+            combinations = product(backends, precisions, functions,
+                                   plot_columns)
+            for backend, precision, function, plot_column in combinations:
+                filtered_params_df = filtered_method_df[
+                    (filtered_method_df['backend'] == backend)
+                    & (filtered_method_df['precision'] == precision) &
+                    (filtered_method_df['function'] == function)]
+                if filtered_params_df.empty:
+                    continue
+                x_vals, y_vals = plot_with_pdims_strategy(
+                    ax, filtered_params_df, method, pdims_strategy,
+                    print_decompositions, size_column, plot_column, label_text)
+                x_values.extend(x_vals)
+                y_values.extend(y_vals)
+        plotting_memory = 'time' not in plot_columns[0].lower()
+        configure_axes(ax, x_values, y_values, f"{title} {fixed_size}", xlabel,
+                       plotting_memory, memory_units)
+    for i in range(num_subplots, num_rows * num_cols):
+        fig.delaxes(axs[i])
+    fig.tight_layout()
+    rect = FancyBboxPatch((0.1, 0.1),
+                          0.8,
+                          0.8,
+                          boxstyle="round,pad=0.02",
+                          ec="black",
+                          fc="none")
+    fig.patches.append(rect)
+    if output is None:
+        plt.show()
+    else:
+        plt.savefig(output, bbox_inches='tight', transparent=False)
+def plot_strong_scaling(dataframes: Dict[str, pd.DataFrame],
+                        fixed_data_size: List[int],
+                        figure_size: tuple = (6, 4),
+                        output: Optional[str] = None,
+                        dark_bg: bool = False,
+                        print_decompositions: bool = False,
+                        backends: List[str] = ['NCCL'],
+                        precisions: List[str] = ['float32'],
+                        functions: List[str] | None = None,
+                        plot_columns: List[str] = ['mean_time'],
+                        memory_units: str = 'bytes',
+                        label_text: str = 'plot',
+                        pdims_strategy: str = 'plot_fastest'):
+    """
+    Plot strong scaling based on the number of GPUs.
+    """
+    plot_scaling(dataframes, fixed_data_size, 'gpus', 'x', 'Number of GPUs',
+                 'Data size', figure_size, output, dark_bg,
+                 print_decompositions, backends, precisions, functions,
+                 plot_columns, memory_units, label_text, pdims_strategy)
+def plot_weak_scaling(dataframes: Dict[str, pd.DataFrame],
+                      fixed_gpu_size: List[int],
+                      figure_size: tuple = (6, 4),
+                      output: Optional[str] = None,
+                      dark_bg: bool = False,
+                      print_decompositions: bool = False,
+                      backends: List[str] = ['NCCL'],
+                      precisions: List[str] = ['float32'],
+                      functions: List[str] | None = None,
+                      plot_columns: List[str] = ['mean_time'],
+                      memory_units: str = 'bytes',
+                      label_text: str = 'plot',
+                      pdims_strategy: str = 'plot_fastest'):
+    """
+    Plot weak scaling based on the data size.
+    """
+    plot_scaling(dataframes, fixed_gpu_size, 'x', 'gpus', 'Data size',
+                 'Number of GPUs', figure_size, output, dark_bg,
+                 print_decompositions, backends, precisions, functions,
+                 plot_columns, memory_units, label_text, pdims_strategy)

jax_hpc_profiler/timer.py ADDED Viewed

@@ -0,0 +1,185 @@
+import os
+import time
+from functools import partial
+from typing import Any, Callable, List
+import jax
+import jax.numpy as jnp
+import numpy as np
+from jax import make_jaxpr
+from jax.experimental.shard_map import shard_map
+from jax.sharding import Mesh, NamedSharding
+from jax.sharding import PartitionSpec as P
+from tabulate import tabulate
+class Timer:
+    def __init__(self, save_jaxpr=False):
+        self.jit_time = None
+        self.times = []
+        self.profiling_data = {}
+        self.compiled_code = {}
+        self.save_jaxpr = save_jaxpr
+    def chrono_jit(self, fun: Callable, *args, ndarray_arg=None) -> np.ndarray:
+        start = time.perf_counter()
+        out = jax.jit(fun)(*args)
+        if ndarray_arg is None:
+            out.block_until_ready()
+        else:
+            out[ndarray_arg].block_until_ready()
+        end = time.perf_counter()
+        self.jit_time = (end - start) * 1e3
+        if self.save_jaxpr:
+            jaxpr = make_jaxpr(fun)(*args)
+            self.compiled_code["JAXPR"] = jaxpr.pretty_print()
+        lowered = jax.jit(fun).lower(*args)
+        compiled = lowered.compile()
+        memory_analysis = compiled.memory_analysis()
+        self.compiled_code["LOWERED"] = lowered.as_text()
+        self.compiled_code["COMPILED"] = compiled.as_text()
+        self.profiling_data["FLOPS"] = compiled.cost_analysis()[0]['flops']
+        self.profiling_data[
+            "generated_code"] = memory_analysis.generated_code_size_in_bytes
+        self.profiling_data[
+            "argument_size"] = memory_analysis.argument_size_in_bytes
+        self.profiling_data[
+            "output_size"] = memory_analysis.output_size_in_bytes
+        self.profiling_data["temp_size"] = memory_analysis.temp_size_in_bytes
+        return out
+    def chrono_fun(self, fun: Callable, *args, ndarray_arg=None) -> np.ndarray:
+        start = time.perf_counter()
+        out = fun(*args)
+        if ndarray_arg is None:
+            out.block_until_ready()
+        else:
+            out[ndarray_arg].block_until_ready()
+        end = time.perf_counter()
+        self.times.append((end - start) * 1e3)
+        return out
+    def _get_mean_times(self, times_array: jnp.ndarray,
+                        sharding: NamedSharding):
+        mesh = sharding.mesh
+        specs = sharding.spec
+        valid_letters = [letter for letter in specs if letter is not None]
+        assert len(valid_letters
+                   ) > 0, "Sharding was provided but with no partition specs"
+        @partial(shard_map,
+                 mesh=mesh,
+                 in_specs=specs,
+                 out_specs=P(),
+                 check_rep=False)
+        def get_mean_times(times):
+            mean = jax.lax.pmean(times, axis_name=valid_letters[0])
+            for axis_name in valid_letters[1:]:
+                mean = jax.lax.pmean(mean, axis_name=axis_name)
+            return mean
+        times_array = get_mean_times(times_array)
+        times_array.block_until_ready()
+        return times_array
+    def report(self,
+               csv_filename: str,
+               function: str,
+               precision: str,
+               x: int,
+               y: int,
+               z: int,
+               px: int,
+               py: int,
+               backend: str,
+               nodes: int,
+               sharding: NamedSharding | None = None,
+               md_filename: str | None = None,
+               extra_info: dict = {}):
+        times_array = jnp.array(self.times)
+        if md_filename is None:
+            dirname, filename = os.path.dirname(csv_filename), os.path.splitext(os.path.basename(csv_filename))[0]
+            report_folder = filename if dirname == "" else f"{dirname}/{filename}"
+            print(f"report_folder: {report_folder} csv_filename: {csv_filename}")
+            os.makedirs(report_folder, exist_ok=True)
+            md_filename = f"{report_folder}/{x}_{px}_{py}_{backend}_{precision}_{function}.md"
+        if sharding is not None:
+            times_array = self._get_mean_times(times_array, sharding)
+        times_array = np.array(times_array)
+        min_time = np.min(times_array)
+        max_time = np.max(times_array)
+        mean_time = np.mean(times_array)
+        std_time = np.std(times_array)
+        last_time = times_array[-1]
+        flops = self.profiling_data["FLOPS"]
+        generated_code = self.profiling_data["generated_code"]
+        argument_size = self.profiling_data["argument_size"]
+        output_size = self.profiling_data["output_size"]
+        temp_size = self.profiling_data["temp_size"]
+        csv_line = (
+            f"{function},{precision},{x},{y},{z},{px},{py},{backend},{nodes},"
+            f"{self.jit_time:.4f},{min_time:.4f},{max_time:.4f},{mean_time:.4f},{std_time:.4f},{last_time:.4f},"
+            f"{generated_code},{argument_size},{output_size},{temp_size},{flops}\n"
+        )
+        with open(csv_filename, 'a') as f:
+            f.write(csv_line)
+        param_dict = {
+            "Function": function,
+            "Precision": precision,
+            "X": x,
+            "Y": y,
+            "Z": z,
+            "PX": px,
+            "PY": py,
+            "Backend": backend,
+            "Nodes": nodes,
+        }
+        param_dict.update(extra_info)
+        profiling_result = {
+            "JIT Time": self.jit_time,
+            "Min Time": min_time,
+            "Max Time": max_time,
+            "Mean Time": mean_time,
+            "Std Time": std_time,
+            "Last Time": last_time,
+            "Generated Code": generated_code,
+            "Argument Size": argument_size,
+            "Output Size": output_size,
+            "Temporary Size": temp_size,
+            "FLOPS": self.profiling_data["FLOPS"]
+        }
+        with open(md_filename, 'w') as f:
+            f.write(f"# Reporting for {function}\n")
+            f.write(f"## Parameters\n")
+            f.write(tabulate(param_dict.items() , headers=["Parameter" , "Value"] , tablefmt='github'))
+            f.write("\n---\n")
+            f.write(f"## Profiling Data\n")
+            f.write(tabulate(profiling_result.items() , headers=["Parameter" , "Value"] , tablefmt='github'))
+            f.write("\n---\n")
+            f.write(f"## Compiled Code\n")
+            f.write(f"```hlo\n")
+            f.write(self.compiled_code["COMPILED"])
+            f.write(f"\n```\n")
+            f.write("\n---\n")
+            f.write(f"## Lowered Code\n")
+            f.write(f"```hlo\n")
+            f.write(self.compiled_code["LOWERED"])
+            f.write(f"\n```\n")
+            f.write("\n---\n")
+            if self.save_jaxpr:
+                f.write(f"## JAXPR\n")
+                f.write(f"```haskel\n")
+                f.write(self.compiled_code["JAXPR"])
+                f.write(f"\n```\n")