PyPI - jax-hpc-profiler - Versions diffs - 0.2.10__py3-none-any.whl - Mend

jax-hpc-profiler 0.2.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

jax_hpc_profiler/__init__.py +9 -0
jax_hpc_profiler/create_argparse.py +197 -0
jax_hpc_profiler/main.py +65 -0
jax_hpc_profiler/plotting.py +313 -0
jax_hpc_profiler/timer.py +274 -0
jax_hpc_profiler/utils.py +411 -0
jax_hpc_profiler-0.2.10.dist-info/LICENSE +674 -0
jax_hpc_profiler-0.2.10.dist-info/METADATA +902 -0
jax_hpc_profiler-0.2.10.dist-info/RECORD +12 -0
jax_hpc_profiler-0.2.10.dist-info/WHEEL +5 -0
jax_hpc_profiler-0.2.10.dist-info/entry_points.txt +2 -0
jax_hpc_profiler-0.2.10.dist-info/top_level.txt +1 -0

jax_hpc_profiler/timer.py ADDED Viewed

@@ -0,0 +1,274 @@
+import os
+import time
+from functools import partial
+from typing import Any, Callable, List, Optional, Tuple
+import jax
+import jax.numpy as jnp
+import numpy as np
+from jax import make_jaxpr
+from jax.experimental import mesh_utils
+from jax.experimental.shard_map import shard_map
+from jax.sharding import Mesh, NamedSharding
+from jax.sharding import PartitionSpec as P
+from tabulate import tabulate
+class Timer:
+    def __init__(self, save_jaxpr=False, jax_fn=True, devices=None):
+        self.jit_time = 0.0
+        self.times = []
+        self.profiling_data = {}
+        self.compiled_code = {}
+        self.save_jaxpr = save_jaxpr
+        self.jax_fn = jax_fn
+        self.devices = devices
+    def _normalize_memory_units(self, memory_analysis) -> str:
+        if not self.jax_fn:
+            return memory_analysis
+        sizes_str = ["B", "KB", "MB", "GB", "TB", "PB"]
+        factors = [1, 1024, 1024**2, 1024**3, 1024**4, 1024**5]
+        factor = 0 if memory_analysis == 0 else int(
+            np.log10(memory_analysis) // 3)
+        return f"{memory_analysis / factors[factor]:.2f} {sizes_str[factor]}"
+    def _read_memory_analysis(self, memory_analysis: Any) -> Tuple:
+        if memory_analysis is None:
+            return None, None, None, None
+        return (
+            memory_analysis.generated_code_size_in_bytes,
+            memory_analysis.argument_size_in_bytes,
+            memory_analysis.output_size_in_bytes,
+            memory_analysis.temp_size_in_bytes,
+        )
+    def chrono_jit(self, fun: Callable, *args, ndarray_arg=None) -> np.ndarray:
+        start = time.perf_counter()
+        out = fun(*args)
+        if self.jax_fn:
+            if ndarray_arg is None:
+                out.block_until_ready()
+            else:
+                out[ndarray_arg].block_until_ready()
+        end = time.perf_counter()
+        self.jit_time = (end - start) * 1e3
+        if self.save_jaxpr:
+            jaxpr = make_jaxpr(fun)(*args)
+            self.compiled_code["JAXPR"] = jaxpr.pretty_print()
+        if self.jax_fn:
+            lowered = jax.jit(fun).lower(*args)
+            compiled = lowered.compile()
+            memory_analysis = self._read_memory_analysis(
+                compiled.memory_analysis())
+            self.compiled_code["LOWERED"] = lowered.as_text()
+            self.compiled_code["COMPILED"] = compiled.as_text()
+            self.profiling_data["generated_code"] = memory_analysis[0]
+            self.profiling_data["argument_size"] = memory_analysis[1]
+            self.profiling_data["output_size"] = memory_analysis[2]
+            self.profiling_data["temp_size"] = memory_analysis[3]
+        return out
+    def chrono_fun(self, fun: Callable, *args, ndarray_arg=None) -> np.ndarray:
+        start = time.perf_counter()
+        out = fun(*args)
+        if self.jax_fn:
+            if ndarray_arg is None:
+                out.block_until_ready()
+            else:
+                out[ndarray_arg].block_until_ready()
+        end = time.perf_counter()
+        self.times.append((end - start) * 1e3)
+        return out
+    def _get_mean_times(self) -> np.ndarray:
+        if jax.device_count() == 1 or jax.process_count() == 1:
+            return np.array(self.times)
+        if self.devices is None:
+            self.devices = jax.devices()
+        mesh = jax.make_mesh((len(self.devices), ), ("x", ),
+                             devices=self.devices)
+        sharding = NamedSharding(mesh, P("x"))
+        times_array = jnp.array(self.times)
+        global_shape = (jax.device_count(), times_array.shape[0])
+        global_times = jax.make_array_from_callback(
+            shape=global_shape,
+            sharding=sharding,
+            data_callback=lambda _: jnp.expand_dims(times_array, axis=0),
+        )
+        @partial(shard_map,
+                 mesh=mesh,
+                 in_specs=P("x"),
+                 out_specs=P(),
+                 check_rep=False)
+        def get_mean_times(times):
+            return jax.lax.pmean(times, axis_name="x")
+        times_array = get_mean_times(global_times)
+        times_array.block_until_ready()
+        return np.array(times_array.addressable_data(0)[0])
+    def report(
+        self,
+        csv_filename: str,
+        function: str,
+        x: int,
+        y: int | None = None,
+        z: int | None = None,
+        precision: str = "float32",
+        px: int = 1,
+        py: int = 1,
+        backend: str = "NCCL",
+        nodes: int = 1,
+        md_filename: str | None = None,
+        npz_data: Optional[dict] = None,
+        extra_info: dict = {},
+    ):
+        if self.jit_time == 0.0 and len(self.times) == 0:
+            print(f"No profiling data to report for {function}")
+            return
+        if md_filename is None:
+            dirname, filename = (
+                os.path.dirname(csv_filename),
+                os.path.splitext(os.path.basename(csv_filename))[0],
+            )
+            report_folder = filename if dirname == "" else f"{dirname}/{filename}"
+            os.makedirs(report_folder, exist_ok=True)
+            md_filename = (
+                f"{report_folder}/{x}_{px}_{py}_{backend}_{precision}_{function}.md"
+            )
+        if npz_data is not None:
+            dirname, filename = (
+                os.path.dirname(csv_filename),
+                os.path.splitext(os.path.basename(csv_filename))[0],
+            )
+            report_folder = filename if dirname == "" else f"{dirname}/{filename}"
+            os.makedirs(report_folder, exist_ok=True)
+            npz_filename = (
+                f"{report_folder}/{x}_{px}_{py}_{backend}_{precision}_{function}.npz"
+            )
+            np.savez(npz_filename, **npz_data)
+        y = x if y is None else y
+        z = x if z is None else z
+        times_array = self._get_mean_times()
+        if jax.process_index() == 0:
+            min_time = np.min(times_array)
+            max_time = np.max(times_array)
+            mean_time = np.mean(times_array)
+            std_time = np.std(times_array)
+            last_time = times_array[-1]
+            if self.jax_fn:
+                generated_code = self.profiling_data["generated_code"]
+                argument_size = self.profiling_data["argument_size"]
+                output_size = self.profiling_data["output_size"]
+                temp_size = self.profiling_data["temp_size"]
+            else:
+                generated_code = "N/A"
+                argument_size = "N/A"
+                output_size = "N/A"
+                temp_size = "N/A"
+            csv_line = (
+                f"{function},{precision},{x},{y},{z},{px},{py},{backend},{nodes},"
+                f"{self.jit_time:.4f},{min_time:.4f},{max_time:.4f},{mean_time:.4f},{std_time:.4f},{last_time:.4f},"
+                f"{generated_code},{argument_size},{output_size},{temp_size}\n"
+            )
+            with open(csv_filename, "a") as f:
+                f.write(csv_line)
+            param_dict = {
+                "Function": function,
+                "Precision": precision,
+                "X": x,
+                "Y": y,
+                "Z": z,
+                "PX": px,
+                "PY": py,
+                "Backend": backend,
+                "Nodes": nodes,
+            }
+            param_dict.update(extra_info)
+            profiling_result = {
+                "JIT Time": self.jit_time,
+                "Min Time": min_time,
+                "Max Time": max_time,
+                "Mean Time": mean_time,
+                "Std Time": std_time,
+                "Last Time": last_time,
+                "Generated Code": self._normalize_memory_units(generated_code),
+                "Argument Size": self._normalize_memory_units(argument_size),
+                "Output Size": self._normalize_memory_units(output_size),
+                "Temporary Size": self._normalize_memory_units(temp_size),
+            }
+            iteration_runs = {}
+            for i in range(len(times_array)):
+                iteration_runs[f"Run {i}"] = times_array[i]
+            with open(md_filename, "w") as f:
+                f.write(f"# Reporting for {function}\n")
+                f.write(f"## Parameters\n")
+                f.write(
+                    tabulate(
+                        param_dict.items(),
+                        headers=["Parameter", "Value"],
+                        tablefmt="github",
+                    ))
+                f.write("\n---\n")
+                f.write(f"## Profiling Data\n")
+                f.write(
+                    tabulate(
+                        profiling_result.items(),
+                        headers=["Parameter", "Value"],
+                        tablefmt="github",
+                    ))
+                f.write("\n---\n")
+                f.write(f"## Iteration Runs\n")
+                f.write(
+                    tabulate(
+                        iteration_runs.items(),
+                        headers=["Iteration", "Time"],
+                        tablefmt="github",
+                    ))
+                if self.jax_fn:
+                    f.write("\n---\n")
+                    f.write(f"## Compiled Code\n")
+                    f.write(f"```hlo\n")
+                    f.write(self.compiled_code["COMPILED"])
+                    f.write(f"\n```\n")
+                    f.write("\n---\n")
+                    f.write(f"## Lowered Code\n")
+                    f.write(f"```hlo\n")
+                    f.write(self.compiled_code["LOWERED"])
+                    f.write(f"\n```\n")
+                    f.write("\n---\n")
+                    if self.save_jaxpr:
+                        f.write(f"## JAXPR\n")
+                        f.write(f"```haskel\n")
+                        f.write(self.compiled_code["JAXPR"])
+                        f.write(f"\n```\n")
+        # Reset the timer
+        self.jit_time = 0.0
+        self.times = []
+        self.profiling_data = {}
+        self.compiled_code = {}

jax_hpc_profiler/utils.py ADDED Viewed

@@ -0,0 +1,411 @@
+import os
+from typing import Dict, List, Optional, Tuple
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from matplotlib.axes import Axes
+def inspect_data(dataframes: Dict[str, pd.DataFrame]):
+    """
+    Inspect the dataframes.
+    Parameters
+    ----------
+    dataframes : Dict[str, pd.DataFrame]
+        Dictionary of method names to dataframes.
+    """
+    print("=" * 80)
+    print("Inspecting dataframes...")
+    print("=" * 80)
+    for method, df in dataframes.items():
+        print(f"Method: {method}")
+        inspect_df(df)
+    print("=" * 80)
+def inspect_df(df: pd.DataFrame):
+    """
+    Inspect the dataframe.
+    Parameters
+    ----------
+    df : pd.DataFrame
+        The dataframe to inspect.
+    """
+    print(df.to_markdown())
+    print("-" * 80)
+params_dict = {
+    "%pn%": "%plot_name%",
+    "%m%": "%method_name%",
+    "%n%": "%node%",
+    "%b%": "%backend%",
+    "%f%": "%function%",
+    "%cn%": "%column_name%",
+    "%pr%": "%precision%",
+    "%p%": "%decomposition%",
+    "%d%": "%data_size%",
+    "%g%": "%nb_gpu%"
+}
+def expand_label(label_template: str, params: dict) -> str:
+    """
+    Expand the label template with the provided parameters.
+    Parameters
+    ----------
+    label_template : str
+        The label template with placeholders.
+    params : dict
+        The dictionary with actual values to replace placeholders.
+    Returns
+    -------
+    str
+        The expanded label.
+    """
+    for key, value in params_dict.items():
+        label_template = label_template.replace(key, value)
+    for key, value in params.items():
+        label_template = label_template.replace(f"%{key}%", str(value))
+    return label_template
+def plot_with_pdims_strategy(ax: Axes, df: pd.DataFrame, method: str,
+                             pdims_strategy: List[str],
+                             print_decompositions: bool, x_col: str,
+                             y_col: str, label_template: str):
+    """
+    Plot the data based on the pdims strategy.
+    Parameters
+    ----------
+    ax : Axes
+        The axes to plot on.
+    df : pd.DataFrame
+        The dataframe to plot.
+    method : str
+        The method name.
+    backend : str
+        The backend name.
+    nodes_in_label : bool
+        Whether to include node names in labels.
+    pdims_strategy : List[str]
+        Strategy for plotting pdims.
+    print_decompositions : bool
+        Whether to print decompositions on the plot.
+    x_col : str
+        The column name for the x-axis values.
+    x_label : str
+        The label for the x-axis.
+    y_label : str
+        The label for the y-axis.
+    label_template : str
+        Template for plot labels with placeholders.
+    """
+    label_params = {
+        "plot_name": y_col,
+        "method_name": method,
+        "backend": df['backend'].values[0],
+        "node": df['nodes'].values[0],
+        "precision": df['precision'].values[0],
+        "function": df['function'].values[0],
+    }
+    if 'plot_fastest' in pdims_strategy:
+        df_decomp = df.groupby([x_col])
+        # Sort all and keep fastest
+        sorted_dfs = []
+        for _, group in df_decomp:
+            group.sort_values(by=[y_col], inplace=True, ascending=True)
+            sorted_dfs.append(group.iloc[0])
+        sorted_df = pd.DataFrame(sorted_dfs)
+        label_params.update({
+            "decomposition":
+            f"{group['px'].values[0]}x{group['py'].values[0]}"
+        })
+        label = expand_label(label_template, label_params)
+        ax.plot(sorted_df[x_col].values,
+                sorted_df[y_col],
+                marker='o',
+                linestyle='-',
+                label=label)
+        # TODO(wassim) : this is not working very well
+        if print_decompositions:
+            for j, (px, py) in enumerate(zip(sorted_df['px'],
+                                             sorted_df['py'])):
+                ax.annotate(
+                    f"{px}x{py}",
+                    (sorted_df[x_col].values[j], sorted_df[y_col].values[j]),
+                    textcoords="offset points",
+                    xytext=(0, 10),
+                    ha='center',
+                    color='red' if j == 0 else 'white')
+        return sorted_df[x_col].values, sorted_df[y_col].values
+    elif any(strategy in pdims_strategy
+             for strategy in ['plot_all', 'slab_yz', 'slab_xy', 'pencils']):
+        df_decomp = df.groupby(['decomp'])
+        x_values = []
+        y_values = []
+        for _, group in df_decomp:
+            group.drop_duplicates(subset=[x_col, 'decomp'],
+                                  keep='last',
+                                  inplace=True)
+            group.sort_values(by=[x_col], inplace=True, ascending=False)
+            # filter decomp based on pdims_strategy
+            if 'plot_all' not in pdims_strategy and group['decomp'].values[
+                    0] not in pdims_strategy:
+                continue
+            label_params.update({"decomposition": group['decomp'].values[0]})
+            label = expand_label(label_template, label_params)
+            ax.plot(group[x_col].values,
+                    group[y_col],
+                    marker='o',
+                    linestyle='-',
+                    label=label)
+            x_values.extend(group[x_col].values)
+            y_values.extend(group[y_col].values)
+        return x_values, y_values
+def concatenate_csvs(root_dir: str, output_dir: str):
+    """
+    Concatenate CSV files and remove duplicates by GPU type.
+    Parameters
+    ----------
+    root_dir : str
+        Root directory containing CSV files.
+    output_dir : str
+        Output directory to save concatenated CSV files.
+    """
+    # Iterate over each GPU type directory
+    for gpu in os.listdir(root_dir):
+        gpu_dir = os.path.join(root_dir, gpu)
+        # Check if the GPU directory exists and is a directory
+        if not os.path.isdir(gpu_dir):
+            continue
+        # Dictionary to hold combined dataframes for each CSV file name
+        combined_dfs = {}
+        # List CSV in directory and subdirectories
+        for root, dirs, files in os.walk(gpu_dir):
+            for file in files:
+                if file.endswith('.csv'):
+                    csv_file_path = os.path.join(root, file)
+                    print(f'Concatenating {csv_file_path}...')
+                    df = pd.read_csv(csv_file_path,
+                                     header=None,
+                                     names=[
+                                         "function", "precision", "x", "y",
+                                         "z", "px", "py", "backend", "nodes",
+                                         "jit_time", "min_time", "max_time",
+                                         "mean_time", "std_div", "last_time",
+                                         "generated_code", "argument_size",
+                                         "output_size", "temp_size", "flops"
+                                     ],
+                                     index_col=False)
+                    if file not in combined_dfs:
+                        combined_dfs[file] = df
+                    else:
+                        combined_dfs[file] = pd.concat(
+                            [combined_dfs[file], df], ignore_index=True)
+        # Remove duplicates based on specified columns and save
+        for file_name, combined_df in combined_dfs.items():
+            combined_df.drop_duplicates(subset=[
+                "function", "precision", "x", "y", "z", "px", "py", "backend",
+                "nodes"
+            ],
+                                        keep='last',
+                                        inplace=True)
+            gpu_output_dir = os.path.join(output_dir, gpu)
+            if not os.path.exists(gpu_output_dir):
+                print(f"Creating directory {gpu_output_dir}")
+                os.makedirs(gpu_output_dir)
+            output_file = os.path.join(gpu_output_dir, file_name)
+            print(f"Writing file to {output_file}...")
+            combined_df.to_csv(output_file, index=False)
+def clean_up_csv(
+    csv_files: List[str],
+    precisions: Optional[List[str]] = None,
+    function_names: Optional[List[str]] = None,
+    gpus: Optional[List[int]] = None,
+    data_sizes: Optional[List[int]] = None,
+    pdims: Optional[List[str]] = None,
+    pdims_strategy: List[str] = ['plot_fastest'],
+    backends: Optional[List[str]] = None,
+    memory_units: str = 'KB',
+) -> Tuple[Dict[str, pd.DataFrame], List[int], List[int]]:
+    """
+    Clean up and aggregate data from CSV files.
+    Parameters
+    ----------
+    csv_files : List[str]
+        List of CSV files to process.
+    precisions : Optional[List[str]], optional
+        Precisions to filter by, by default None.
+    function_names : Optional[List[str]], optional
+        Function names to filter by, by default None.
+    gpus : Optional[List[int]], optional
+        List of GPU counts to filter by, by default None.
+    data_sizes : Optional[List[int]], optional
+        List of data sizes to filter by, by default None.
+    pdims : Optional[List[str]], optional
+        List of pdims to filter by, by default None.
+    pdims_strategy : List[str], optional
+        Strategy for plotting pdims, by default ['plot_fastest'].
+    backends : List[str], optional
+        List of backends to filter by, by default ['MPI', 'NCCL', 'MPI4JAX'].
+    time_columns : List[str], optional
+        Time columns to use for aggregation, by default ['mean_time'].
+    Returns
+    -------
+    Dict[str, pd.DataFrame]
+        Dictionary of method names to aggregated dataframes.
+    """
+    dataframes = {}
+    available_gpu_counts = set()
+    available_data_sizes = set()
+    for csv_file in csv_files:
+        file_name = os.path.splitext(os.path.basename(csv_file))[0]
+        ext = os.path.splitext(os.path.basename(csv_file))[1]
+        if ext != '.csv':
+            print(f"Ignoring {csv_file} as it is not a CSV file")
+            continue
+        df = pd.read_csv(csv_file,
+                         header=None,
+                         skiprows=1,
+                         names=[
+                             "function", "precision", "x", "y", "z", "px",
+                             "py", "backend", "nodes", "jit_time", "min_time",
+                             "max_time", "mean_time", "std_div", "last_time",
+                             "generated_code", "argument_size", "output_size",
+                             "temp_size", "flops"
+                         ],
+                         dtype={
+                             "function": str,
+                             "precision": str,
+                             "x": int,
+                             "y": int,
+                             "z": int,
+                             "px": int,
+                             "py": int,
+                             "backend": str,
+                             "nodes": int,
+                             "jit_time": float,
+                             "min_time": float,
+                             "max_time": float,
+                             "mean_time": float,
+                             "std_div": float,
+                             "last_time": float,
+                             "generated_code": float,
+                             "argument_size": float,
+                             "output_size": float,
+                             "temp_size": float,
+                             "flops": float
+                         },
+                         index_col=False)
+        # Filter precisions
+        if precisions:
+            df = df[df['precision'].isin(precisions)]
+        # Filter function names
+        if function_names:
+            df = df[df['function'].isin(function_names)]
+        # Filter backends
+        if backends:
+            df = df[df['backend'].isin(backends)]
+        # Filter data sizes
+        if data_sizes:
+            df = df[df['x'].isin(data_sizes)]
+        # Filter pdims
+        if pdims:
+            px_list, py_list = zip(*[map(int, p.split('x')) for p in pdims])
+            df = df[(df['px'].isin(px_list)) & (df['py'].isin(py_list))]
+        # convert memory units columns to remquested memory_units
+        match memory_units:
+            case 'KB':
+                factor = 1024
+            case 'MB':
+                factor = 1024**2
+            case 'GB':
+                factor = 1024**3
+            case 'TB':
+                factor = 1024**4
+            case _:
+                factor = 1
+        df['generated_code'] = df['generated_code'] / factor
+        df['argument_size'] = df['argument_size'] / factor
+        df['output_size'] = df['output_size'] / factor
+        df['temp_size'] = df['temp_size'] / factor
+        # in case of the same test is run multiple times, keep the last one
+        df = df.drop_duplicates(subset=[
+            "function", "precision", "x", "y", "z", "px", "py", "backend",
+            "nodes"
+        ],
+                                keep='last')
+        df['gpus'] = df['px'] * df['py']
+        if gpus:
+            df = df[df['gpus'].isin(gpus)]
+        if 'plot_all' in pdims_strategy or 'slab_yz' in pdims_strategy or 'slab_xy' in pdims_strategy or 'pencils' in pdims_strategy:
+            def get_decomp_from_px_py(row):
+                if row['px'] > 1 and row['py'] == 1:
+                    return 'slab_yz'
+                elif row['px'] == 1 and row['py'] > 1:
+                    return 'slab_xy'
+                else:
+                    return 'pencils'
+            df['decomp'] = df.apply(get_decomp_from_px_py, axis=1)
+            df.drop(columns=['px', 'py'], inplace=True)
+            if not 'plot_all' in pdims_strategy:
+                df = df[df['decomp'].isin(pdims_strategy)]
+        # check available gpus in dataset
+        available_gpu_counts.update(df['gpus'].unique())
+        available_data_sizes.update(df['x'].unique())
+        if dataframes.get(file_name) is None:
+            dataframes[file_name] = df
+        else:
+            dataframes[file_name] = pd.concat([dataframes[file_name], df])
+    print(f"requested GPUS: {gpus} available GPUS: {available_gpu_counts}")
+    print(
+        f"requested data sizes: {data_sizes} available data sizes: {available_data_sizes}"
+    )
+    available_gpu_counts = (available_gpu_counts if gpus is None else [
+        gpu for gpu in gpus if gpu in available_gpu_counts
+    ])
+    available_data_sizes = (available_data_sizes if data_sizes is None else [
+        data_size for data_size in data_sizes
+        if data_size in available_data_sizes
+    ])
+    return dataframes, available_gpu_counts, available_data_sizes