PyPI - reproplot - Versions diffs - 0.0.1__tar.gz - Mend

reproplot 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

reproplot-0.0.1/LICENSE +21 -0
reproplot-0.0.1/PKG-INFO +33 -0
reproplot-0.0.1/README.md +1 -0
reproplot-0.0.1/pyproject.toml +25 -0
reproplot-0.0.1/src/reproplot_oscarvlld/__init__.py +0 -0
reproplot-0.0.1/src/reproplot_oscarvlld/plotting.py +191 -0
reproplot-0.0.1/src/reproplot_oscarvlld/reprod_exps.py +141 -0
reproplot-0.0.1/src/reproplot_oscarvlld/reprod_plots.py +248 -0
reproplot-0.0.1/src/reproplot_oscarvlld/reproduce.py +157 -0
reproplot-0.0.1/src/reproplot_oscarvlld/utils.py +235 -0

reproplot-0.0.1/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2024 Oscar Villemaud, Indy Lab, EPFL
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

reproplot-0.0.1/PKG-INFO ADDED Viewed

@@ -0,0 +1,33 @@
+Metadata-Version: 2.1
+Name: reproplot
+Version: 0.0.1
+Summary: General package for experimenting and automatically plotting.
+Author-email: Oscar Villemaud <oscar.villemaud@epfl.ch>
+License: MIT License
+        Copyright (c) 2024 Oscar Villemaud, Indy Lab, EPFL
+        Permission is hereby granted, free of charge, to any person obtaining a copy
+        of this software and associated documentation files (the "Software"), to deal
+        in the Software without restriction, including without limitation the rights
+        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+        copies of the Software, and to permit persons to whom the Software is
+        furnished to do so, subject to the following conditions:
+        The above copyright notice and this permission notice shall be included in all
+        copies or substantial portions of the Software.
+        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+        SOFTWARE.
+License-File: LICENSE
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+General package for experimenting and automatically plotting.

reproplot-0.0.1/README.md ADDED Viewed

	@@ -0,0 +1 @@
1	+ General package for experimenting and automatically plotting.

reproplot-0.0.1/pyproject.toml ADDED Viewed

@@ -0,0 +1,25 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[project]
+license = {file = "LICENSE"}
+name = "reproplot"
+version = "0.0.1"
+authors = [
+  { name="Oscar Villemaud", email="oscar.villemaud@epfl.ch" },
+]
+description = "General package for experimenting and automatically plotting. "
+readme = "README.md"
+requires-python = ">=3.8"
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "Operating System :: OS Independent",
+]
+[tool.hatch.build.targets.wheel]
+include = [
+  "reproplot/*.py",
+  "/tests",
+]

reproplot-0.0.1/src/reproplot_oscarvlld/__init__.py ADDED Viewed

File without changes

reproplot-0.0.1/src/reproplot_oscarvlld/plotting.py ADDED Viewed

@@ -0,0 +1,191 @@
+###
+ # @file   plotting.py
+ # @author Oscar Villemaud <oscar.villemaud@epfl.ch>
+ #
+ # @section LICENSE
+ #
+ # Copyright © 2024-2026 École Polytechnique Fédérale de Lausanne (EPFL).
+ # All rights reserved.
+ #
+ # @section DESCRIPTION
+ #
+ # Plotting functions based on pyplot.
+###
+import numpy as np
+from matplotlib import pyplot as plt
+from matplotlib import colors
+def _finalize_plot(title, xlab, ylab, fontsize, savepath, extension, show):
+    plt.title(title)
+    plt.xlabel(xlab, size=fontsize)
+    plt.ylabel(ylab, size=fontsize)
+    if savepath is not None:
+        plt.savefig(f"{savepath}.{extension}")
+    if show:
+        plt.show()
+    plt.close()
+def plot(data, legend=None, title="", log=False):
+    """ simple plotting function """
+    def _plot_scaled(curve):
+        if log:
+            plt.semilogy(curve)
+        else:
+            plt.plot(curve)
+    if not hasattr(data[0], '__iter__'):
+        _plot_scaled(data)
+    else:
+        for curve in data:
+            _plot_scaled(curve)
+    if legend is not None:
+        plt.legend(legend)
+    plt.title(title)
+    plt.show()
+    plt.close()
+def seeds_plot(
+    list_list, legend=None, x_vals=None, color=None, style=None,
+    xlog=False, ylog=False, confidence=True, std=False, plot_all=False, x_vlines=False):
+    """ plots one line and confidence interval from multiple seeds """
+    if len(list_list) == 0:
+        return
+    def _log_switch_plot(values, x_vals):
+        if ylog and xlog:
+            plt.loglog(x_vals, values, label=legend, linestyle=style, color=color)
+        elif ylog and not xlog:
+            plt.semilogy(x_vals, values, label=legend, linestyle=style, color=color)
+        elif xlog and not ylog:
+            plt.semilogx(x_vals, values, label=legend, linestyle=style, color=color)
+        else:
+            plt.plot(x_vals, values, label=legend, linestyle=style, color=color)
+    nb_samples = len(list_list)
+    if plot_all:
+        for values in list_list:
+            if x_vals is None:
+                x_vals = list(range(len(values)))
+            _log_switch_plot(values, x_vals)
+    else:
+        if x_vals is None:
+            x_vals = list(range(len(list_list[0])))
+        arr = np.array(list_list)
+        vals = np.nanmean(arr, axis=0)
+        _log_switch_plot(vals, x_vals)
+        if confidence:
+            confs = 1.96 * np.nanstd(arr, axis=0) / nb_samples**0.5
+            plt.plot(x_vals, vals - confs, linestyle=style, color=color, linewidth=0.3)
+            plt.plot(x_vals, vals + confs, linestyle=style, color=color, linewidth=0.3)
+        if std:
+            stds = np.nanstd(arr, axis=0)
+            plt.fill_between(list(x_vals), vals - stds, vals + stds, alpha=0.1, color=color)
+    if x_vlines:
+        for x in x_vals:
+            plt.axvline(x, linewidth=0.3)
+def seeds_plot_together(
+    all_curves, legends=None, title="", xlog=False, ylog=False, confidence=True, std=False,
+    vlines=[], x_vals=None, xlab=None, ylab=None, fontsize=11, xlims=None, ylims=None,
+    savepath=None, figsize=(8, 5), show=False, plot_all=False, x_vlines=False, extension="png",
+    ):
+    """
+    Plots several lines of several seeds (for each line average and confidence interval)
+    Args:
+        - all_curves (float list list list) : order 3 array/list of lists of lists
+                        one sublist is one line, one subsublist is one seed
+        - legends (str list) : labels to use for each line (in order)
+        - title (str) : title of the plot
+        - xlog (bool) : True for x axis log scale
+        - ylog (bool) : True for y axis log scale
+        - confidence (bool) : True to display 95% mean estimate confidence intervals
+        - std (bool) : True to display standard deviation accross seeds
+        - vlines (float list) : list of x coordinates where to add vertical lines
+        - x_vals (list) : x axis values, default is 0 to n
+        - xlab (str) : label of x axis
+        - ylab (str) : label of y axis
+        - fontsize (int) : font size
+        - xlims (float pair) : plot limits for x axis, None for auto
+        - ylims (float pair) : plot limits for y axis, None for auto
+        - savepath (str) : path where to save the plot, not saved if None
+        - figsize (int pair) : dimensions of the plot
+        - show (bool) : True to show the plot
+        - plot_all (bool) : True to display one line for each seed instead of average
+        - x_vlines (bool) : True to draw a vertical line at each data point
+        - extension (str) : format for the saved image file
+    """
+    plt.figure(figsize=figsize)
+    colors = ["orange", "green", "blue", "red", "purple", "black"] * 10
+    styles = ["-", "--", "-.", ":", "-"] * 10
+    if legends is None:
+        legends = [[]] * len(all_curves)
+    for curve, color, style, legend in zip(all_curves, colors, styles, legends):
+        seeds_plot(
+            curve, color=color, x_vals=x_vals, style=style, legend=legend,
+            xlog=xlog, ylog=ylog, confidence=confidence, std=std, plot_all=plot_all, x_vlines=x_vlines)
+    for x in vlines:
+        plt.axvline(x)
+    plt.xlim(xlims)
+    plt.ylim(ylims)
+    if legend is not None:
+        plt.legend(prop={'size': fontsize})
+    _finalize_plot(title, xlab, ylab, fontsize, savepath, extension, show)
+def seeds_plot_color3d(
+        all_seeds, x_vals, y_vals, title, xlab, ylab,
+        savepath, show=False, label="", fontsize=11, std=False,
+        xlog=False, ylog=False, zlog=False, extension="png",
+        **kwargs):
+    """ 3d color plot """
+    all_seeds = np.array(all_seeds)
+    means = np.nanmean(all_seeds, axis=0)
+    if zlog:
+        plt.pcolor(x_vals, y_vals, means, norm=colors.LogNorm())
+    else:
+        plt.pcolor(x_vals, y_vals, means)
+    if xlog:
+        plt.xscale('log')
+    if ylog:
+        plt.yscale('log')
+    if std:
+        stds = np.nanstd(all_seeds, axis=0)
+        for y, row in enumerate(stds):
+            for x, val in enumerate(row):
+                plt.text(x_vals[x], y_vals[y] ,
+                         f"+{round(val, 2)}", ha='center',
+                         va='center', color='black')
+    plt.colorbar(label=label)
+    _finalize_plot(title, xlab, ylab, fontsize, savepath, extension, show)
+def seeds_plot_surface3d(
+        all_seeds, x_vals, y_vals, title, xlab, ylab,
+        savepath, show=False, label="", fontsize=11,
+        xlog=False, ylog=False, zlog=False, angle=None, extension="png",
+        **kwargs):
+    """ 3d color plot """
+    all_seeds = np.array(all_seeds)
+    means = np.nanmean(all_seeds, axis=0)
+    ax = plt.axes(projection='3d')
+    if angle is not None:
+        ax.view_init(*angle)
+    x, y = x_vals, y_vals
+    y_len, x_len = len(y), len(x)
+    x = np.expand_dims(x, axis=1)
+    x = np.repeat(x, [y_len], axis=1).transpose()
+    y = np.expand_dims(y, axis=0)
+    y = np.repeat(y, [x_len], axis=0).transpose()
+    # if xlog:
+    #     ax.set_xscale('log')
+    # if ylog:
+    #     ax.set_yscale('log')
+    if zlog:
+        ax.set_zscale('log')
+    ax.plot_surface(x, y, means, cmap='viridis',\
+                    edgecolor='green')
+    ax.set_zlabel(label, size=fontsize)
+    _finalize_plot(title, xlab, ylab, fontsize, savepath, extension, show)

reproplot-0.0.1/src/reproplot_oscarvlld/reprod_exps.py ADDED Viewed

@@ -0,0 +1,141 @@
+###
+ # @file   reprod_exps.py
+ # @author Oscar Villemaud <oscar.villemaud@epfl.ch>
+ #
+ # @section LICENSE
+ #
+ # Copyright © 2024-2026 École Polytechnique Fédérale de Lausanne (EPFL).
+ # All rights reserved.
+ #
+ # @section DESCRIPTION
+ #
+ # Experiment running.
+###
+import os
+import time
+from tqdm import tqdm
+import traceback
+import multiprocessing as mp
+from .utils import seedall, dump_json, load_json, make_exp_name, make_grid, update_params, scan_runs
+def _run_one_run(experiment_func, params, seed, exp_name, run_path, verb):
+    """ return 0 if run completed, 1 if failed"""
+    if "runs" in verb:
+        print(f"running {exp_name} seed {seed}.")
+    def _handle_error(exc):
+        count = 1
+        while os.path.isdir(f"{run_path[:-1]}_failed_{count}"):
+            count += 1
+        new_path = f"{run_path[:-1]}_failed_{count}"
+        os.rename(run_path, new_path)
+        print(f"{exp_name} seed {seed} failed with error: {exc}, renaming to: failed_{count}")
+        with open(new_path  + "/" + "traceback.txt", 'w') as f:
+            f.write(traceback.format_exc())
+    try:
+        run_time = time.time()
+        seedall(seed)
+        metrics = experiment_func(**params)
+        if type(metrics) is not dict:
+            metrics = {"metric": metrics}
+        run_time = time.time() - run_time
+        metrics["run_time"] = run_time
+        dump_json(metrics, run_path + f"metrics.json")
+        if "runs" in verb:
+            print(f"{exp_name} seed {seed} saved. ({round(run_time)} secs)")
+    except Exception as exc:
+        _handle_error(exc)
+        return 1
+    except KeyboardInterrupt:
+        _handle_error("Keyboard Interrupt")
+        raise KeyboardInterrupt
+    return 0
+def run_experiments(
+        experiment_func=None, res_dir=None,
+        seeds=None, nametag="", params_common=None, diff_plots=None, same_plot=None, same_line=None,
+        exp_tags=None, set_depending_params=None, verb=None):
+    """ run an experiment series given a grid of parameters, saves results in json files
+    Args:
+        - experiment_func (func) : function running a (random) experiment
+                                and outputing a dictionnary of metrics
+        - res_dir (str) : name of the directory where to store results as json
+        - seeds (int list) : list of random seeds to use for reproducibility
+        - nametag (str) : prefix that identifies the series of experiments
+        - params_common (dict) : dictionnary of {"param_name": value}
+                                that are de default parameters of experiment_func
+        - diff_plots (list dict) : dictionnary of {"param_name": list of values} each value combination on a different plot
+        - same_plot (list dict) : dictionnary of {"param_name": list of values} each value combination on a different line of the same plot
+        - same_line (list dict) : dictionnary of {"param_name": list of values}, values on the x axis of the plot (only one parameter)
+        - exp_tags (str list) : list of parameters to put in experiment names
+        - set_depending_params (func) : function editing in-place a dictionnary of parameters
+        - verb (str list) : string codes to indicate verbose
+    """
+    multiprocess = 0
+    dir_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+    nb_runs_found, nb_runs_total, confs_paths =  scan_runs(
+        res_dir=res_dir, dir_path=dir_path,
+        seeds=seeds, nametag=nametag, params_common=params_common,
+        diff_plots=diff_plots, same_plot=same_plot, same_line=same_line,
+        exp_tags=exp_tags, set_depending_params=set_depending_params)
+    nb_runs_needed = nb_runs_total - nb_runs_found
+    print(f"Recovered {nb_runs_found}/{nb_runs_total} runs. Running the remaining {nb_runs_needed}.")
+    # nb_exps = count_combinations(diff_plots) * count_combinations(same_plot) * count_combinations(same_line) * len(seeds)
+    nb_runs_ran = 0
+    init_time = time.time()
+    # TODO use confs_paths ?
+    if multiprocess:
+        mp_pool = mp.Pool(processes=multiprocess)
+    else:
+        nb_failed = 0
+    with tqdm(total=nb_runs_needed, leave=True, disable=("pbar" not in verb)) as pbar:
+        for params1 in make_grid(diff_plots):
+            for params2 in make_grid(same_plot):
+                for params3 in make_grid(same_line):
+                    params = update_params(params_common, params1, params2, params3)
+                    set_depending_params(params)
+                    exp_name = make_exp_name(params, nametag, exp_tags)
+                    exp_dir = f"{dir_path}/{res_dir}/{exp_name}/"
+                    can_run = True
+                    try:  # check if parameters are matching
+                        loaded_params = load_json(exp_dir + "params.json")
+                        if loaded_params != params:
+                            print("WARNING : old and new parameters don't match despite same experiment name !")
+                            print("found :", loaded_params)
+                            print("but has :", params)
+                            can_run = False
+                    except OSError:
+                        os.makedirs(exp_dir)
+                        dump_json(params, exp_dir + "params.json", indent=2)
+                    if can_run:
+                        for seed in seeds:
+                            run_path = exp_dir + f"seed_{seed}/"
+                            if os.path.isdir(run_path):
+                                if "runs" in verb:
+                                    print(exp_name, "seed", seed, "already exists.")
+                            else:
+                                os.makedirs(run_path)
+                                if multiprocess:
+                                        mp_pool.apply_async(
+                                            func=_run_one_run,
+                                            args=(experiment_func, params, seed, exp_name, run_path, verb))
+                                else:
+                                    failed = _run_one_run(experiment_func, params, seed, exp_name, run_path, verb)
+                                    nb_failed += failed
+                                    pbar.update()
+                                nb_runs_ran += 1
+                    else:
+                        print("Skipping experiment")
+    if multiprocess:
+        mp_pool.close()
+        mp_pool.join()
+    time_taken = time.time() - init_time
+    if multiprocess:
+        print("Unable to count fails in multiprocess mode")
+        print(f"{nb_runs_ran} runs ran and {nb_runs_found} recovered in {round(time_taken)} seconds")
+    else:
+        print(f"{nb_runs_ran - nb_failed} runs ran, {nb_runs_found} recovered and {nb_failed} failed in {round(time_taken)} seconds")

reproplot-0.0.1/src/reproplot_oscarvlld/reprod_plots.py ADDED Viewed

@@ -0,0 +1,248 @@
+###
+ # @file   reprod_plots.py
+ # @author Oscar Villemaud <oscar.villemaud@epfl.ch>
+ #
+ # @section LICENSE
+ #
+ # Copyright © 2024-2026 École Polytechnique Fédérale de Lausanne (EPFL).
+ # All rights reserved.
+ #
+ # @section DESCRIPTION
+ #
+ # Data retrieval for plotting.
+###
+import os
+import copy
+from tqdm import tqdm
+import numpy as np
+from .utils import load_json, make_exp_name, make_grid, make_legend, make_plot_name, update_params, make_title
+from .plotting import seeds_plot_together, seeds_plot_color3d, seeds_plot_surface3d
+def _extract_line_data(lineconf, res_dir, nametag, exp_tags, ignore_missing):
+    """ extracts data from json files and puts it in list of lists
+    Args:
+        - lineconf (dict) : dictionnary containing directions to the data to plot on one line
+            lineconf1 = {"confs": conf_list, "seeds":[1, 2], "metric": "metric"}
+            or
+            lineconf1 = {"conf": conf, "seeds":[1, 2], "metric": "metric"}
+        - res_dir (str) : directory containing the data to plot
+        - nametag (str) : prefix identifying the series of experiments
+        - exp_tags (str list) : list of parameters used in experiment names
+        - ignore_missing (bool) : True to plot despite missing data
+    Returns:
+        - (float list list) list of lists of values of the metric, one sublist is one seed
+    """
+    dir_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+    def _handle_missing(ignore_missing, path):
+        if ignore_missing:
+            print(f"{path} not found, plotting without it.")
+        else:
+            raise Exception("{} not found, use ignore_missing=True to plot anyway".format(path))
+    if "confs" in lineconf:  # if the metric chosen gives one value (eg final loss)
+        all_seeds = []
+        for seed in lineconf["seeds"]:
+            one_seed = []
+            for params in lineconf["confs"]:
+                exp_name = make_exp_name(params, nametag, exp_tags)
+                exp_dir = f"{dir_path}/{res_dir}/{exp_name}/"
+                path = exp_dir + f"seed_{seed}/"
+                try:
+                    value = load_json(path + "metrics.json")[lineconf["metric"]]
+                    if type(value) is list:  # implicitely taking value at end of training
+                        value = value[-1]
+                    one_seed.append(value)
+                except OSError:
+                    _handle_missing(ignore_missing, path)
+                    one_seed.append(np.nan)
+            all_seeds.append(one_seed)
+    elif "conf" in lineconf:  # if the metric chosen gives a list of values (eg training loss)
+        all_seeds = []
+        for seed in lineconf["seeds"]:
+            params = lineconf["conf"]
+            exp_name = make_exp_name(params, nametag, exp_tags)
+            exp_dir = f"{dir_path}/{res_dir}/{exp_name}/"
+            path = exp_dir + f"seed_{seed}/"
+            try:
+                one_seed = load_json(path + f"metrics.json")[lineconf["metric"]]
+                all_seeds.append(one_seed)
+            except OSError:
+                _handle_missing(ignore_missing, path)
+    return all_seeds
+def _plot_from_conf(
+        plot_conf, res_dir, plot_dir, nametag, exp_tags,
+        same_line, metric, ignore_missing, plot_kwargs=None,
+        custom_xlab=None, custom_ylab=None):
+    """ creates and saves a plot from a config
+    Args:
+        - plot_conf (dict) : configuration of the plot, including instructions on the data to use
+                        exp : plot_conf = { "filename" : "plot", "title": "title_foo",
+                                "lines": { "legend1" : lineconf1, "legend2" : lineconf2}}
+        - res_dir (str) : directory containing the data to plot
+        - plot_dir (str) : directory where to save the plot
+        - nametag (str) : prefix identifying the series of experiments
+        - exp_tags (str list) : list of parameters used in experiment names
+        - same_line (list dict or empty dict) : {"param_name": list of values} values on x axis,
+                                       empty dict will use training steps as x axis
+        - metric (str) : name of the metric to plot
+        - ignore_missing (bool) : True to plot despite missing data
+        - plot_kwargs (dict) : dictionnary of parameters to forward to seeds_plot_together()
+        - custom_xlab (func) : function that gives x label from x parameter name
+        - custom_ylab (func) : function that gives y label from y parameter name
+    """
+    dir_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+    os.makedirs(f"{dir_path}/{plot_dir}/", exist_ok=True)
+    path = f"{dir_path}/{plot_dir}/{plot_conf['filename']}"
+    if plot_conf["type"] == "lines2d":  # 2D line plot
+        all_lines = []
+        all_legends = []
+        for legend, lineconf in tqdm(plot_conf["lines"].items(), desc=f"Plotting {plot_conf['filename']}"):
+            all_lines.append(_extract_line_data(lineconf, res_dir, nametag, exp_tags, ignore_missing))
+            all_legends.append(legend)
+        if len(same_line) == 0: # implicit values for x axis
+            xlab, xvals = "steps", None
+        else:  # explicit values for x axis
+            xlab, xvals = list(same_line.items())[0]
+        seeds_plot_together(
+            all_lines, all_legends, title=plot_conf["title"], savepath=path,
+            ylab=custom_ylab(metric), xlab=custom_xlab(xlab), x_vals=xvals, **plot_kwargs
+            )
+    elif plot_conf["type"] in ["colors3d", "surface3d"]:  # 3D plots
+        all_rows = []
+        xlab, xvals = list(same_line.items())[0]
+        ylab, yvals = list(same_line.items())[1]
+        for lineconf in tqdm(plot_conf["rows"], desc=f"Plotting {plot_conf['filename']}"):
+            all_rows.append(_extract_line_data(lineconf, res_dir, nametag, exp_tags, ignore_missing))
+        all_seeds = np.transpose(np.array(all_rows), (1, 0, 2))
+        plot_funcs = {"colors3d": seeds_plot_color3d, "surface3d": seeds_plot_surface3d}
+        plot_funcs[plot_conf["type"]](
+                all_seeds, x_vals=xvals, y_vals=yvals, label=metric,
+                title=plot_conf["title"], xlab=custom_xlab(xlab), ylab=custom_ylab(ylab),
+                savepath=path, std=False, **plot_kwargs)
+#  plot_conf = { "filename" : "plot", "title": "title_foo", "type":"3d",
+#                               "rows": [lineconf1, lineconf2]}
+def plot_experiments(
+        res_dir=None, plot_dir=None, metrics=None,
+        seeds=None, nametag="", params_common=None,
+        diff_plots=None, same_plot=None, same_line=None, set_depending_params=None,
+        exp_tags=None, ignore_missing=None, style3Dplot=None,
+        custom_title=None, custom_legend=None,
+        custom_xlab=None, custom_ylab=None,
+    ):
+    """ Plot a series of experiments
+    Args:
+        - res_dir (str) : directory containing the data to plot
+        - plot_dir (str) : directory where to save the plot
+        - metrics ((str | str tuple) list) : list of metrics to plot (batch to put on same plot)
+        - seeds (int list) : seeds of runs to plot
+        - nametag (str) : prefix identifying the series of experiments
+        - exp_tags (str list) : list of parameters used in experiment names
+        - same_line (list dict or empty dict) : {"param_name": list of values} values on x axis,
+                                       empty dict will use training steps as x axis
+        - params_common (dict) : dictionnary of {"param_name": value}
+                                that are de default parameters of experiment_func
+        - diff_plots (list dict) : dictionnary of {"param_name": list of values} each value combination on a different plot
+        - same_plot (list dict) : dictionnary of {"param_name": list of values} each value combination on a different line of the same plot
+        - same_line (list dict) : dictionnary of {"param_name": list of values}, values on the x axis of the plot (only one parameter)
+        - set_depending_params (func) : function editing in-place a dictionnary of parameters
+        - exp_tags (str list) : list of parameters to put in experiment names
+        - ignore_missing (bool) : True to plot despite missing data
+        - style3Dplot (str) : 'surface3d' or 'colors3d'
+        - custom_title (func or str) : (optionnal) function that gives a title from parameters
+        - custom_legend (func) : (optionnal) function that gives a legend from parameters
+        - custom_xlab (func or str) : function that gives x label from x parameter name
+        - custom_ylab (func or str) : function that gives y label from y parameter name
+    """
+    # handling constant functions
+    if type(custom_title) is str:
+        _title = custom_title
+        custom_title = lambda x, y : _title
+    if type(custom_xlab) is str:
+        _xlab = custom_xlab
+        custom_xlab = lambda x : _xlab
+    if type(custom_ylab) is str:
+        _ylab = custom_ylab
+        custom_ylab = lambda x : _ylab
+    # infering plot type
+    if len(same_line) < 2:
+        plot_type = "lines2d"
+    else:
+        plot_type = style3Dplot
+    # searching for config options
+    if os.path.exists("plot_config.json"):
+        plot_params = load_json("plot_config.json")[plot_type]
+    else:
+        print("plot_config.json not found, plotting without config")
+        plot_params = {"_overwrite":{}}
+    plot_tags = list(diff_plots.keys())
+    legend_tags = list(same_plot.keys())
+    # creating plot configs and plotting
+    for metrics_plot in metrics:
+        if type(metrics_plot) is str:  # if only one metric on the plot
+            metrics_plot = [metrics_plot]
+        for params1 in make_grid(diff_plots):
+            if custom_title is None:
+                title = make_title(params1, plot_tags)
+            else:
+                title = custom_title(params1, metrics_plot)
+            plot_conf = {
+                "title": title,
+                "filename": make_plot_name(params1, nametag, metrics_plot[0], plot_tags)}
+            if plot_type == "lines2d":  # if 2D line plot
+                plot_conf["lines"] = {}
+                for params2 in make_grid(same_plot):
+                    if len(metrics_plot) > 1:
+                        prefix = metric + ", "
+                    else:
+                        prefix = ""
+                    for metric in metrics_plot:
+                        def _pick_legend(params):
+                            if custom_legend is None:
+                                return prefix + make_legend(params, legend_tags)
+                            return custom_legend(params, metric)
+                        if same_line == {}:
+                            params = update_params(params_common, params1, params2)
+                            set_depending_params(params)
+                            plot_conf["lines"][_pick_legend(params)] = {
+                                "seeds": seeds, "metric": copy.deepcopy(metric), "conf": params
+                                }
+                        else:
+                            params_list = []
+                            for params3 in make_grid(same_line):
+                                params = update_params(params_common, params1, params2, params3)
+                                set_depending_params(params)
+                                params_list.append(params)
+                            plot_conf["lines"][_pick_legend(params)] = {
+                                "seeds": seeds, "metric": copy.deepcopy(metric), "confs": params_list,
+                                }
+            elif plot_type in ["colors3d", "surface3d"]:  # if 3d plot
+                for metric in metrics_plot:
+                    plot_conf["rows"] = []
+                    x_param, x_values = list(same_line.items())[0]
+                    y_param, y_values = list(same_line.items())[1]
+                    for y_value in y_values:
+                        params_list = []
+                        for x_value in x_values:
+                            params = update_params(params_common, params1, {x_param: x_value, y_param: y_value})
+                            set_depending_params(params)
+                            params_list.append(params)
+                        lineconf = {"seeds": seeds, "metric": copy.deepcopy(metric), "confs": params_list,
+                                    }
+                        plot_conf["rows"].append(lineconf)
+            plot_conf["type"] = plot_type
+            if plot_params is not None:
+                plot_kwargs = update_params(plot_params.get(metric, {}), plot_params["_overwrite"])
+            _plot_from_conf(
+                plot_conf, res_dir, plot_dir,
+                nametag, exp_tags,
+                same_line, metric, ignore_missing, plot_kwargs,
+                custom_xlab, custom_ylab
+                )
+# lineconf = {"confs": conf_list, "seeds":[1, 2], "metric": "metric"}

reproplot-0.0.1/src/reproplot_oscarvlld/reproduce.py ADDED Viewed

@@ -0,0 +1,157 @@
+###
+ # @file   reproduce.py
+ # @author Oscar Villemaud <oscar.villemaud@epfl.ch>
+ #
+ # @section LICENSE
+ #
+ # Copyright © 2024-2026 École Polytechnique Fédérale de Lausanne (EPFL).
+ # All rights reserved.
+ #
+ # @section DESCRIPTION
+ #
+ # reproplot main functions to run, plot and manage experiments.
+###
+import os
+import random
+from tqdm import tqdm
+from .utils import load_json, make_exp_name, check_compatibility
+from .reprod_exps import run_experiments
+from .reprod_plots import plot_experiments
+def rename_exps(
+        directory, new_tag, new_exp_tags, old_tag=None):
+    """ rename experiment names using parameters saved
+    Args:
+        - directory : directory of experiments
+        - new_tag (str) : new prefix for experiment names
+        - new_exp_tags (str list) : new names of metrics to put in experiment names
+        - old_tag (str) : specify to rename only experiments with that tag
+    """
+    print("Renaming experiments of directory :", directory)
+    exp_names_paths = [(f.name, f.path) for f in os.scandir(directory) if f.is_dir()]
+    counter = 0
+    for exp_name, exp_path in exp_names_paths:
+        prefix_cond = True
+        if old_tag is not None:
+            prefix = exp_name.split("-")[0]
+            prefix_cond = prefix == old_tag
+        if prefix_cond:
+            params = load_json(exp_path + "/params.json")
+            exp_name_new = make_exp_name(params, new_tag, tags_list=new_exp_tags)
+            os.rename(exp_path, directory + "/" + exp_name_new)
+            counter += 1
+            print("renaming:", exp_name, "\n into: ", exp_name_new)
+    print(f"Renamed {counter} experiments")
+def index_exps(directory, nametag=None, param_requirements=None):
+    """ gives a summary of available experiments
+    Args:
+        - directory : directory of experiments
+        - nametag (str) : specify to see only experiments with that tag
+        - param_requirements (bool func) : function taking params as input
+                    and outputing True if this experiment should be included
+    """
+    exp_names_paths = [(f.name, f.path) for f in os.scandir(directory) if f.is_dir()]
+    all_params = {}
+    seeds = set()
+    nb_runs, nb_exps = 0, 0
+    for exp_name, exp_path in tqdm(exp_names_paths):
+        loaded_params = load_json(exp_path + "/params.json")
+        select = True
+        if param_requirements is not None:
+            select = param_requirements(loaded_params)
+        if nametag is not None:
+            prefix = exp_name.split("-")[0]
+            select = select and (prefix == nametag)
+        if select:
+            nb_exps += 1
+            for name, value in loaded_params.items():
+                if type(value) is list:
+                    value = tuple(value)
+                if name in all_params:
+                    all_params[name].add(value)
+                else:
+                    all_params[name] = {value}
+            for seed_name in [f.name for f in os.scandir(exp_path) if f.is_dir()]:
+                if "failed" not in seed_name:
+                    seed = int(seed_name[5:])
+                    seeds.add(seed)
+                    nb_runs += 1
+    print(f"found {nb_runs} runs grouped in {nb_exps}/{len(exp_names_paths)} experiments with parameters :")
+    def _custom_key(obj):
+        if obj is None:
+            return 0
+        elif type(obj) is int or float:
+            return obj
+        else:
+            return len(obj)
+    for name, values in all_params.items():
+        print(name, sorted(values, key=_custom_key))
+    print("seeds", sorted(seeds))
+def run_and_plot(
+    seeds=None, res_dir="results_RPP", plot_dir="plots_RPP",
+    metrics=["metric"], experiment_func=None,
+    nametag="", params_common={},
+    diff_plots={}, same_plot={}, same_line={},
+    exp_tags=None, set_depending_params=lambda x : None,
+    no_run=False, no_plot=False,
+    ignore_missing=False, style3Dplot="colors3d", verb=["pbar"],
+    custom_title=None, custom_legend=None, custom_xlab=lambda x:x, custom_ylab=lambda x:x,
+  ):
+  """ run and plot experiments by using all hyperparameters in a grid-search fashion
+    Args:
+        - res_dir (str) : directory containing the data to plot
+        - plot_dir (str) : directory where to save the plot
+        - seeds (int list) : seeds of runs to plot
+        - metrics ((str | str tuple) list) : list of metrics to plot (batch to put on same plot)
+        - nametag (str) : prefix identifying the series of experiments
+        - exp_tags (str list) : list of parameters used in experiment names
+        - same_line (list dict or empty dict) : {"param_name": list of values} values on x axis,
+                                       empty dict will use training steps as x axis
+        - params_common (dict) : dictionnary of {"param_name": value}
+                                that are de default parameters of experiment_func
+        - diff_plots (list dict) : dictionnary of {"param_name": list of values} each value combination on a different plot
+        - same_plot (list dict) : dictionnary of {"param_name": list of values} each value combination on a different line of the same plot
+        - same_line (list dict) : dictionnary of {"param_name": list of values}, values on the x axis of the plot (only one parameter)
+        - set_depending_params (func) : function editing in-place a dictionnary of parameters
+        - exp_tags (str list) : list of parameters to put in experiment names
+        - no_run (bool) : True to disable running new experiments
+        - no_plot (bool) : True to disable plotting
+        - ignore_missing (bool) : True to plot despite missing data
+        - style3Dplot (str) : 'surface3d' or 'colors3d'
+        - verb (str list) : string codes to indicate verbose
+        - custom_title (func or str) : (optionnal) function that gives a title from parameters
+        - custom_legend (func) : (optionnal) function that gives a legend from parameters
+        - custom_xlab (func or str) : (optionnal) function that gives x label from x parameter name
+        - custom_ylab (func or str) : (optionnal) function that gives y label from y parameter name
+  """
+  if exp_tags is None:
+      exp_tags = sorted(list(set(diff_plots.keys()) | set(same_plot.keys()) | set(same_line.keys())))
+  if seeds is None:
+      seeds = [random.randint(1, 99999)]
+      print(f"No seeds specified, using random seed {seeds[0]}")
+  check_compatibility(diff_plots, same_plot, same_line, exp_tags)
+  if not no_run:
+    run_experiments(
+        experiment_func=experiment_func, res_dir=res_dir, seeds=seeds, nametag=nametag,
+        params_common=params_common, diff_plots=diff_plots, same_plot=same_plot, same_line=same_line,
+        exp_tags=exp_tags, set_depending_params=set_depending_params, verb=verb,
+    )
+  if not no_plot:
+    plot_experiments(
+        res_dir=res_dir, plot_dir=plot_dir, seeds=seeds,
+        nametag=nametag, metrics=metrics, same_line=same_line,
+        params_common=params_common, diff_plots=diff_plots, same_plot=same_plot,
+        exp_tags=exp_tags, set_depending_params=set_depending_params,
+        ignore_missing=ignore_missing, style3Dplot=style3Dplot,
+        custom_title=custom_title, custom_legend=custom_legend,
+        custom_xlab=custom_xlab, custom_ylab=custom_ylab,
+    )

reproplot-0.0.1/src/reproplot_oscarvlld/utils.py ADDED Viewed

@@ -0,0 +1,235 @@
+###
+ # @file   utils.py
+ # @author Oscar Villemaud <oscar.villemaud@epfl.ch>
+ #
+ # @section LICENSE
+ #
+ # Copyright © 2024-2026 École Polytechnique Fédérale de Lausanne (EPFL).
+ # All rights reserved.
+ #
+ # @section DESCRIPTION
+ #
+ # Utilitary functions.
+###
+import os
+import json
+import pickle
+import random
+from itertools import product
+import numpy as np
+import torch
+def dump_json(object, path, indent=0):
+    """ save object in json file
+    Args:
+        - object : python object to save
+        - path (str) : path where to save the object
+        - indent (int) : indent level to use (for readability)
+    """
+    with open(path, "w") as outfile:
+        json.dump(object, outfile, indent=indent)
+def load_json(path):
+    """ load object from json file
+    Args:
+        - path (str) : path where to save the object
+    """
+    with open(path, 'r') as openfile:
+        return json.load(openfile)
+def dump_pickle(object, path):
+    """ save object in pickle file
+    Args:
+        - object : python object to save
+        - path (str) : path where to save the object
+    """
+    with open(path, "wb") as outfile:
+        pickle.dump(object, outfile)
+def load_pickle(path):
+    """ load object from pickle file
+    Args:
+        - path (str) : path where to save the object
+    """
+    with open(path, 'rb') as openfile:
+        return pickle.load(openfile)
+def seedall(seed):
+    """ seed random, numpy and pytorch with the same seed
+    Args:
+        - seed (int) : seed to use
+    """
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    random.seed(seed)
+def make_grid(list_dic):
+    """ build iterator over different combination of params (gridsearch)
+    Args:
+        - list_dic (list dict): dictionnary of {"param_name": list of values}
+    Yields:
+        - (dict)
+    """
+    for params in product(*list_dic.values()):
+        yield {key : param for key, param in zip(list_dic.keys(), params)}
+def count_combinations(list_dic):
+    """ count number of parameter combinations
+    Args:
+        - list_dic : dictionnary of {"metric": list of values}
+    Returns:
+        - (int) number of possible parameter combinations
+    """
+    nb = 1
+    for _, params in list_dic.items():
+        nb *= len(params)
+    return nb
+def update_params(params, *updates):
+    """ add new parameters or replace existing ones
+    Args:
+        - params   : dictionnary of parameters
+        - *updates : dictionnaries of additionnal parameters
+    Return:
+         - (dict) updated dictionnary of parameters
+    """
+    paramsall = params.copy()
+    for new_params in updates:
+        paramsall.update(new_params)
+    return paramsall
+def make_exp_name(params, nametag, tags_list):
+    """ create experiment name
+    Args:
+        - params : dictionnary of {"metric": value}
+        - nametag (str) : prefix to identify experiment series
+        - tag_list (str list) : list of metrics to put in experiment name
+    Returns:
+        - (str) experiment name
+    """
+    exp_name = nametag
+    for name in tags_list:
+        exp_name += f"-{name}_{params[name]}"
+    return exp_name
+def make_plot_name(params, nametag, metric, tags_list):
+    """" result should depend on diff_plot
+    Args:
+        - params : dictionnary of {"metric": value}
+        - nametag (str) : prefix to identify experiment series
+        - metric (str) : metric plotted
+        - tag_list (str list) : list of param names to put in experiment name
+    Returns:
+        - (str) plot name
+    """
+    plot_name = f"{nametag}-{metric}"
+    for name in tags_list:
+        plot_name += f"-{name}_{params[name]}"
+    return plot_name
+def make_title(params, tags_list):
+    """
+    Args:
+        - params : dictionnary of {"metric": value}
+        - tag_list (str list) : list of param names to put in experiment name
+    Returns:
+        - (str) plot title
+    """
+    if len(tags_list) == 0:
+        return "X"
+    # title = f"{tags_list[0]}={params[tags_list[0]]}"
+    title = ""
+    for name in tags_list:
+        if len(title):
+            title += ", "
+        title += f"{name}={params[name]}"
+    return title
+def make_legend(params, tags_list):
+    """ create a legend for a line on a plot
+    from parameters and a list of parameter names
+    Args:
+        - params : dictionnary of {"metric": value}
+        - tag_list (str list) : list of param names to put in experiment name
+    Returns:
+        - (str) line legend
+    """
+    if len(tags_list) == 0:
+        return "X"
+    legend = f"{tags_list[0]}={params[tags_list[0]]}"
+    for name in tags_list[1:]:
+        legend += f", {name}={params[name]}"
+    return legend
+def check_compatibility(
+        diff_plots, same_plot, same_line, exp_tags
+        ):
+    """ check if the same name if generated twice
+    Args:
+        - diff_plots (list dict) : dictionnary of {"param_name": list of values} each value combination on a different plot
+        - same_plot (list dict) : dictionnary of {"param_name": list of values} each value combination on a different line of the same plot
+        - same_line (list dict) : dictionnary of {"param_name": list of values}, values on the x axis of the plot (only one parameter)
+        - exp_tags (str list) : list of parameters to put in experiment names
+    """
+    for param in list(diff_plots.keys()) + list(same_plot.keys()) + list(same_line.keys()):
+        if param not in exp_tags:
+            print(f"WARNING : Experiment names should depend on -{param} to avoid having the same name")
+def scan_runs(
+        res_dir, dir_path=None,
+        seeds=None, nametag="", params_common=None,
+        diff_plots=None, same_plot=None, same_line=None,
+        exp_tags=None, set_depending_params=lambda x: None):
+    """ finds existing and missing runs
+    - res_dir (str) : name of the directory where to store results as json
+    - dir_path (str) : path to directory containing -res_dir
+    - seeds (int list) : list of random seeds to use for reproducibility
+    - nametag (str) : prefix that identifies the series of experiments
+    - params_common (dict) : dictionnary of {"param_name": value}
+                            that are de default parameters of experiment_func
+    - diff_plots (list dict) : dictionnary of {"param_name": list of values} each value combination on a different plot
+    - same_plot (list dict) : dictionnary of {"param_name": list of values} each value combination on a different line of the same plot
+    - same_line (list dict) : dictionnary of {"param_name": list of values}, values on the x axis of the plot (only one parameter)
+    - exp_tags (str list) : list of parameters to put in experiment names
+    - set_depending_params (func) : function editing in-place a dictionnary of parameters
+    """
+    confs_paths = []
+    count_done, count_total = 0, 0
+    for params1 in make_grid(diff_plots):
+        for params2 in make_grid(same_plot):
+            for params3 in make_grid(same_line):
+                params = update_params(params_common, params1, params2, params3)
+                set_depending_params(params)
+                exp_name = make_exp_name(params, nametag, exp_tags)
+                exp_dir = f"{dir_path}/{res_dir}/{exp_name}/"
+                for seed in seeds:
+                    path = exp_dir + f"seed_{seed}/"
+                    if os.path.isdir(path):
+                        count_done += 1
+                    else:
+                        confs_paths.append((params.copy(), path))
+                    count_total += 1
+    return count_done, count_total, confs_paths