PyPI - halib - Versions diffs - 0.1.7__py3-none-any.whl → 0.1.99__py3-none-any.whl - Mend

halib 0.1.7py3-none-any.whl → 0.1.99py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

halib/__init__.py +84 -0
halib/common.py +151 -0
halib/cuda.py +39 -0
halib/dataset.py +209 -0
halib/filetype/csvfile.py +151 -45
halib/filetype/ipynb.py +63 -0
halib/filetype/jsonfile.py +1 -1
halib/filetype/textfile.py +4 -4
halib/filetype/videofile.py +44 -33
halib/filetype/yamlfile.py +95 -0
halib/gdrive.py +1 -1
halib/online/gdrive.py +104 -54
halib/online/gdrive_mkdir.py +29 -17
halib/online/gdrive_test.py +31 -18
halib/online/projectmake.py +58 -43
halib/plot.py +296 -11
halib/projectmake.py +1 -1
halib/research/__init__.py +0 -0
halib/research/base_config.py +100 -0
halib/research/base_exp.py +100 -0
halib/research/benchquery.py +131 -0
halib/research/dataset.py +208 -0
halib/research/flop_csv.py +34 -0
halib/research/flops.py +156 -0
halib/research/metrics.py +133 -0
halib/research/mics.py +68 -0
halib/research/params_gen.py +108 -0
halib/research/perfcalc.py +336 -0
halib/research/perftb.py +780 -0
halib/research/plot.py +758 -0
halib/research/profiler.py +300 -0
halib/research/torchloader.py +162 -0
halib/research/wandb_op.py +116 -0
halib/rich_color.py +285 -0
halib/sys/filesys.py +17 -10
halib/system/__init__.py +0 -0
halib/system/cmd.py +8 -0
halib/system/filesys.py +124 -0
halib/tele_noti.py +166 -0
halib/torchloader.py +162 -0
halib/utils/__init__.py +0 -0
halib/utils/dataclass_util.py +40 -0
halib/utils/dict_op.py +9 -0
halib/utils/gpu_mon.py +58 -0
halib/utils/listop.py +13 -0
halib/utils/tele_noti.py +166 -0
halib/utils/video.py +82 -0
halib/videofile.py +1 -1
halib-0.1.99.dist-info/METADATA +209 -0
halib-0.1.99.dist-info/RECORD +64 -0
{halib-0.1.7.dist-info → halib-0.1.99.dist-info}/WHEEL +1 -1
halib-0.1.7.dist-info/METADATA +0 -59
halib-0.1.7.dist-info/RECORD +0 -30
{halib-0.1.7.dist-info → halib-0.1.99.dist-info/licenses}/LICENSE.txt +0 -0
{halib-0.1.7.dist-info → halib-0.1.99.dist-info}/top_level.txt +0 -0

halib/plot.py CHANGED Viewed

@@ -1,16 +1,301 @@
+from .common import now_str, norm_str, ConsoleLog
+from .filetype import csvfile
+from .system import filesys as fs
+from functools import partial
+from rich.console import Console
+from rich.pretty import pprint
+import click
+import csv
+import matplotlib
 import matplotlib.pyplot as plt
+import numpy as np
+import os
+import pandas as pd
 import seaborn as sns
-import matplotlib
-def save_fig_latex_pgf(filename, directory='.'):
-    matplotlib.use("pgf")
-    matplotlib.rcParams.update({
-        "pgf.texsystem": "pdflatex",
-        'font.family': 'serif',
-        'text.usetex': True,
-        'pgf.rcfonts': False,
-    })
-    if '.pgf' not in filename:
-        filename = f'{directory}/{filename}.pgf'
+console = Console()
+desktop_path = os.path.expanduser("~/Desktop")
+REQUIRED_COLUMNS = ["epoch", "train_loss", "val_loss", "train_acc", "val_acc"]
+import csv
+def get_delimiter(file_path, bytes=4096):
+    sniffer = csv.Sniffer()
+    data = open(file_path, "r").read(bytes)
+    delimiter = sniffer.sniff(data).delimiter
+    return delimiter
+# Function to verify that the DataFrame has the required columns, and only the required columns
+def verify_csv(csv_file, required_columns=REQUIRED_COLUMNS):
+    delimiter = get_delimiter(csv_file)
+    df = pd.read_csv(csv_file, sep=delimiter)
+    # change the column names to lower case
+    df.columns = [col.lower() for col in df.columns]
+    for col in required_columns:
+        if col not in df.columns:
+            raise ValueError(
+                f"Required columns are: {REQUIRED_COLUMNS}, but found {df.columns}"
+            )
+    df = df[required_columns].copy()
+    return df
+def get_valid_tags(csv_files, tags):
+    if tags is not None and len(tags) > 0:
+        assert all(
+            isinstance(tag, str) for tag in tags
+        ), "tags must be a list of strings"
+        assert all(
+            len(tag) > 0 for tag in tags
+        ), "tags must be a list of non-empty strings"
+        valid_tags = tags
+    else:
+        valid_tags = []
+        for csv_file in csv_files:
+            file_name = fs.get_file_name(csv_file, split_file_ext=True)[0]
+            tag = norm_str(file_name)
+            valid_tags.append(tag)
+    return valid_tags
+def plot_ax(df, ax, metric="loss", tag=""):
+    pprint(locals())
+    # reset plt
+    assert metric in ["loss", "acc"], "metric must be either 'loss' or 'acc'"
+    part = ["train", "val"]
+    for p in part:
+        label = f"{tag}_{p}_{metric}"
+        ax.plot(df["epoch"], df[f"{p}_{metric}"], label=label)
+    return ax
+def actual_plot_seaborn(frame, csv_files, axes, tags, log):
+    # clear the axes
+    for ax in axes:
+        ax.clear()
+    ls_df = []
+    valid_tags = get_valid_tags(csv_files, tags)
+    for csv_file in csv_files:
+        df = verify_csv(csv_file)
+        if log:
+            with ConsoleLog(f"plotting {csv_file}"):
+                csvfile.fn_display_df(df)
+        ls_df.append(df)
+    ls_metrics = ["loss", "acc"]
+    for df_item, tag in zip(ls_df, valid_tags):
+        # add tag to columns,excpet epoch
+        df_item.columns = [
+            f"{tag}_{col}" if col != "epoch" else col for col in df_item.columns
+        ]
+    # merge the dataframes on the epoch column
+    df_combined = ls_df[0]
+    for df_item in ls_df[1:]:
+        df_combined = pd.merge(df_combined, df_item, on="epoch", how="outer")
+    # csvfile.fn_display_df(df_combined)
+    for i, metric in enumerate(ls_metrics):
+        tags_str = "+".join(valid_tags) if len(valid_tags) > 1 else valid_tags[0]
+        title = f"{tags_str}_{metric}-by-epoch"
+        cols = [col for col in df_combined.columns if col != "epoch" and metric in col]
+        cols = sorted(cols)
+        # pprint(cols)
+        plot_data = df_combined[cols]
+        # line from same csv file (same tag) should have the same marker
+        all_markers = [
+            marker for marker in plt.Line2D.markers if marker and marker != " "
+        ]
+        tag2marker = {tag: marker for tag, marker in zip(valid_tags, all_markers)}
+        plot_markers = []
+        for col in cols:
+            # find the tag:
+            tag = None
+            for valid_tag in valid_tags:
+                if valid_tag in col:
+                    tag = valid_tag
+                    break
+            plot_markers.append(tag2marker[tag])
+        # pprint(list(zip(cols, plot_markers)))
+        # create color
+        sequential_palettes = [
+            "Reds",
+            "Greens",
+            "Blues",
+            "Oranges",
+            "Purples",
+            "Greys",
+            "BuGn",
+            "BuPu",
+            "GnBu",
+            "OrRd",
+            "PuBu",
+            "PuRd",
+            "RdPu",
+            "YlGn",
+            "PuBuGn",
+            "YlGnBu",
+            "YlOrBr",
+            "YlOrRd",
+        ]
+        # each csvfile (tag) should have a unique color
+        tag2palette = {
+            tag: palette for tag, palette in zip(valid_tags, sequential_palettes)
+        }
+        plot_colors = []
+        for tag in valid_tags:
+            palette = tag2palette[tag]
+            total_colors = 10
+            ls_colors = sns.color_palette(palette, total_colors).as_hex()
+            num_part = len(ls_metrics)
+            subarr = np.array_split(np.arange(total_colors), num_part)
+            for idx, col in enumerate(cols):
+                if tag in col:
+                    chosen_color = ls_colors[
+                        subarr[int(idx % num_part)].mean().astype(int)
+                    ]
+                    plot_colors.append(chosen_color)
+        # pprint(list(zip(cols, plot_colors)))
+        sns.lineplot(
+            data=plot_data,
+            markers=plot_markers,
+            palette=plot_colors,
+            ax=axes[i],
+            dashes=False,
+        )
+        axes[i].set(xlabel="epoch", ylabel=metric, title=title)
+        axes[i].legend()
+        axes[i].grid()
+def actual_plot(frame, csv_files, axes, tags, log):
+    ls_df = []
+    valid_tags = get_valid_tags(csv_files, tags)
+    for csv_file in csv_files:
+        df = verify_csv(csv_file)
+        if log:
+            with ConsoleLog(f"plotting {csv_file}"):
+                csvfile.fn_display_df(df)
+        ls_df.append(df)
+    metric_values = ["loss", "acc"]
+    for i, metric in enumerate(metric_values):
+        for df_item, tag in zip(ls_df, valid_tags):
+            metric_ax = plot_ax(df_item, axes[i], metric, tag)
+        # set the title, xlabel, ylabel, legend, and grid
+        tags_str = "+".join(valid_tags) if len(valid_tags) > 1 else valid_tags[0]
+        metric_ax.set(
+            xlabel="epoch", ylabel=metric, title=f"{tags_str}_{metric}-by-epoch"
+        )
+        metric_ax.legend()
+        metric_ax.grid()
+def plot_csv_files(
+    csv_files,
+    outdir="./out/plot",
+    tags=None,
+    log=False,
+    save_fig=False,
+    update_in_min=1,
+):
+    # if csv_files is a string, convert it to a list
+    if isinstance(csv_files, str):
+        csv_files = [csv_files]
+    # if tags is a string, convert it to a list
+    if isinstance(tags, str):
+        tags = [tags]
+    valid_tags = get_valid_tags(csv_files, tags)
+    assert len(valid_tags) == len(
+        csv_files
+    ), "Unable to determine tags for each csv file"
+    live_update_in_ms = int(update_in_min * 60 * 1000)
+    fig, axes = plt.subplots(2, 1, figsize=(10, 17))
+    if live_update_in_ms:  # live update in min should be > 0
+        from matplotlib.animation import FuncAnimation
+        anim = FuncAnimation(
+            fig,
+            partial(
+                actual_plot_seaborn, csv_files=csv_files, axes=axes, tags=tags, log=log
+            ),
+            interval=live_update_in_ms,
+            blit=False,
+            cache_frame_data=False,
+        )
+        plt.show()
+    else:
+        actual_plot_seaborn(None, csv_files, axes, tags, log)
+        plt.show()
+    if save_fig:
+        os.makedirs(outdir, exist_ok=True)
+        tags_str = "+".join(valid_tags) if len(valid_tags) > 1 else valid_tags[0]
+        tag = f"{now_str()}_{tags_str}"
+        fig.savefig(f"{outdir}/{tag}_plot.png")
+        enable_plot_pgf()
+        fig.savefig(f"{outdir}/{tag}_plot.pdf")
+    if live_update_in_ms:
+        return anim
+def enable_plot_pgf():
+    matplotlib.use("pdf")
+    matplotlib.rcParams.update(
+        {
+            "pgf.texsystem": "pdflatex",
+            "font.family": "serif",
+            "text.usetex": True,
+            "pgf.rcfonts": False,
+        }
+    )
+def save_fig_latex_pgf(filename, directory="."):
+    enable_plot_pgf()
+    if ".pgf" not in filename:
+        filename = f"{directory}/{filename}.pgf"
     plt.savefig(filename)
+# https: // click.palletsprojects.com/en/8.1.x/api/
+@click.command()
+@click.option("--csvfiles", "-f", multiple=True, type=str, help="csv files to plot")
+@click.option(
+    "--outdir",
+    "-o",
+    type=str,
+    help="output directory for the plot",
+    default=str(desktop_path),
+)
+@click.option(
+    "--tags", "-t", multiple=True, type=str, help="tags for the csv files", default=[]
+)
+@click.option("--log", "-l", is_flag=True, help="log the csv files")
+@click.option("--save_fig", "-s", is_flag=True, help="save the plot as a file")
+@click.option(
+    "--update_in_min",
+    "-u",
+    type=float,
+    help="update the plot every x minutes",
+    default=0.0,
+)
+def main(
+    csvfiles,
+    outdir,
+    tags,
+    log,
+    save_fig,
+    update_in_min,
+):
+    plot_csv_files(list(csvfiles), outdir, list(tags), log, save_fig, update_in_min)
+if __name__ == "__main__":
+    main()

halib/projectmake.py CHANGED Viewed

@@ -10,7 +10,7 @@ import certifi
 import pycurl
 from halib.filetype import jsonfile
-from halib.sys import filesys
+from halib.system import filesys
 def get_curl(url, user_and_pass, verbose=True):

halib/research/__init__.py ADDED Viewed

File without changes

halib/research/base_config.py ADDED Viewed

@@ -0,0 +1,100 @@
+import os
+from rich.pretty import pprint
+from abc import ABC, abstractmethod
+from dataclass_wizard import YAMLWizard
+class NamedConfig(ABC):
+    """
+    Base class for named configurations.
+    All configurations should have a name.
+    """
+    @abstractmethod
+    def get_name(self):
+        """
+        Get the name of the configuration.
+        This method should be implemented in subclasses.
+        """
+        pass
+class ExpBaseConfig(ABC, YAMLWizard):
+    """
+    Base class for configuration objects.
+    What a cfg class must have:
+    1 - a dataset cfg
+    2 - a metric cfg
+    3 - a method cfg
+    """
+    # Save to yaml fil
+    def save_to_outdir(
+        self, filename: str = "__config.yaml", outdir=None, override: bool = False
+    ) -> None:
+        """
+        Save the configuration to the output directory.
+        """
+        if outdir is not None:
+            output_dir = outdir
+        else:
+            output_dir = self.get_outdir()
+        os.makedirs(output_dir, exist_ok=True)
+        assert (output_dir is not None) and (
+            os.path.isdir(output_dir)
+        ), f"Output directory '{output_dir}' does not exist or is not a directory."
+        file_path = os.path.join(output_dir, filename)
+        if os.path.exists(file_path) and not override:
+            pprint(
+                f"File '{file_path}' already exists. Use 'override=True' to overwrite."
+            )
+        else:
+            # method of YAMLWizard to_yaml_file
+            self.to_yaml_file(file_path)
+    @classmethod
+    @abstractmethod
+    # load from a custom YAML file
+    def from_custom_yaml_file(cls, yaml_file: str):
+        """Load a configuration from a custom YAML file."""
+        pass
+    @abstractmethod
+    def get_cfg_name(self):
+        """
+        Get the name of the configuration.
+        This method should be implemented in subclasses.
+        """
+        pass
+    @abstractmethod
+    def get_outdir(self):
+        """
+        Get the output directory for the configuration.
+        This method should be implemented in subclasses.
+        """
+        return None
+    @abstractmethod
+    def get_general_cfg(self):
+        """
+        Get the general configuration like output directory, log settings, SEED, etc.
+        This method should be implemented in subclasses.
+        """
+        pass
+    @abstractmethod
+    def get_dataset_cfg(self) -> NamedConfig:
+        """
+        Get the dataset configuration.
+        This method should be implemented in subclasses.
+        """
+        pass
+    @abstractmethod
+    def get_metric_cfg(self) -> NamedConfig:
+        """
+        Get the metric configuration.
+        This method should be implemented in subclasses.
+        """
+        pass

halib/research/base_exp.py ADDED Viewed

@@ -0,0 +1,100 @@
+from abc import ABC, abstractmethod
+from ..research.base_config import ExpBaseConfig
+from ..research.perfcalc import PerfCalc
+from ..research.metrics import MetricsBackend
+# ! SEE https://github.com/hahv/base_exp for sample usage
+class BaseExperiment(PerfCalc, ABC):
+    """
+    Base class for experiments.
+    Orchestrates the experiment pipeline using a pluggable metrics backend.
+    """
+    def __init__(self, config: ExpBaseConfig):
+        self.config = config
+        self.metric_backend = None
+    # -----------------------
+    # PerfCalc Required Methods
+    # -----------------------
+    def get_dataset_name(self):
+        return self.config.get_dataset_cfg().get_name()
+    def get_experiment_name(self):
+        return self.config.get_cfg_name()
+    def get_metric_backend(self):
+        if not self.metric_backend:
+            self.metric_backend = self.prepare_metrics(self.config.get_metric_cfg())
+        return self.metric_backend
+    # -----------------------
+    # Abstract Experiment Steps
+    # -----------------------
+    @abstractmethod
+    def init_general(self, general_cfg):
+        """Setup general settings like SEED, logging, env variables."""
+        pass
+    @abstractmethod
+    def prepare_dataset(self, dataset_cfg):
+        """Load/prepare dataset."""
+        pass
+    @abstractmethod
+    def prepare_metrics(self, metric_cfg) -> MetricsBackend:
+        """
+        Prepare the metrics for the experiment.
+        This method should be implemented in subclasses.
+        """
+        pass
+    @abstractmethod
+    def exec_exp(self, *args, **kwargs):
+        """Run experiment process, e.g.: training/evaluation loop.
+        Return: raw_metrics_data, and extra_data as input for calc_and_save_exp_perfs
+        """
+        pass
+    def eval_exp(self):
+        """Optional: re-run evaluation from saved results."""
+        pass
+    # -----------------------
+    # Main Experiment Runner
+    # -----------------------
+    def run_exp(self, do_calc_metrics=True, *args, **kwargs):
+        """
+        Run the whole experiment pipeline.
+        Params:
+            + 'outfile' to save csv file results,
+            + 'outdir' to set output directory for experiment results.
+            + 'return_df' to return a DataFrame of results instead of a dictionary.
+        Full pipeline:
+            1. Init
+            2. Dataset
+            3. Metrics Preparation
+            4. Save Config
+            5. Execute
+            6. Calculate & Save Metrics
+        """
+        self.init_general(self.config.get_general_cfg())
+        self.prepare_dataset(self.config.get_dataset_cfg())
+        self.prepare_metrics(self.config.get_metric_cfg())
+        # Save config before running
+        self.config.save_to_outdir()
+        # Execute experiment
+        results = self.exec_exp(*args, **kwargs)
+        if do_calc_metrics:
+            metrics_data, extra_data = results
+            # Calculate & Save metrics
+            perf_results = self.calc_and_save_exp_perfs(
+                raw_metrics_data=metrics_data, extra_data=extra_data, *args, **kwargs
+            )
+            return perf_results
+        else:
+            return results

halib/research/benchquery.py ADDED Viewed

@@ -0,0 +1,131 @@
+import pandas as pd
+from rich.pretty import pprint
+from argparse import ArgumentParser
+def cols_to_col_groups(df):
+    columns = list(df.columns)
+    # pprint(columns)
+    col_groups = []
+    current_group = []
+    def have_unnamed(col_group):
+        return any("unnamed" in col.lower() for col in col_group)
+    for i, col in enumerate(columns):
+        # Add the first column to the current group
+        if not current_group:
+            current_group.append(col)
+            continue
+        prev_col = columns[i - 1]
+        # Check if current column is "unnamed" or shares base name with previous
+        # Assuming "equal" means same base name (before any suffix like '_1')
+        base_prev = (
+            prev_col.split("_")[0].lower() if "_" in prev_col else prev_col.lower()
+        )
+        base_col = col.split("_")[0].lower() if "_" in col else col.lower()
+        is_unnamed = "unnamed" in col.lower()
+        is_equal = base_col == base_prev
+        if is_unnamed or is_equal:
+            # Add to current group
+            current_group.append(col)
+        else:
+            # Start a new group
+            col_groups.append(current_group)
+            current_group = [col]
+    # Append the last group
+    if current_group:
+        col_groups.append(current_group)
+    meta_dict = {"common_cols": [], "db_cols": []}
+    for group in col_groups:
+        if not have_unnamed(group):
+            meta_dict["common_cols"].extend(group)
+        else:
+            # find the first unnamed column
+            named_col = next(
+                (col for col in group if "unnamed" not in col.lower()), None
+            )
+            group_cols = [f"{named_col}_{i}" for i in range(len(group))]
+            meta_dict["db_cols"].extend(group_cols)
+    return meta_dict
+# def bech_by_db_name(df, db_list="db1, db2", key_metrics="p, r, f1, acc"):
+def str_2_list(input_str, sep=","):
+    out_ls = []
+    if len(input_str.strip()) == 0:
+        return out_ls
+    if sep not in input_str:
+        out_ls.append(input_str.strip())
+        return out_ls
+    else:
+        out_ls = [item.strip() for item in input_str.split(sep) if item.strip()]
+        return out_ls
+def filter_bech_df_by_db_and_metrics(df, db_list="", key_metrics=""):
+    meta_cols_dict = cols_to_col_groups(df)
+    op_df = df.copy()
+    op_df.columns = (
+        meta_cols_dict["common_cols"].copy() + meta_cols_dict["db_cols"].copy()
+    )
+    filterd_cols = []
+    filterd_cols.extend(meta_cols_dict["common_cols"])
+    selected_db_list = str_2_list(db_list)
+    db_filted_cols = []
+    if len(selected_db_list) > 0:
+        for db_name in db_list.split(","):
+            db_name = db_name.strip()
+            for col_name in meta_cols_dict["db_cols"]:
+                if db_name.lower() in col_name.lower():
+                    db_filted_cols.append(col_name)
+    else:
+        db_filted_cols = meta_cols_dict["db_cols"]
+    filterd_cols.extend(db_filted_cols)
+    df_filtered = op_df[filterd_cols].copy()
+    df_filtered
+    selected_metrics_ls = str_2_list(key_metrics)
+    if len(selected_metrics_ls) > 0:
+        # get the second row as metrics row (header)
+        metrics_row = df_filtered.iloc[0].copy()
+        # only get the values in columns in (db_filterd_cols)
+        metrics_values = metrics_row[db_filted_cols].values
+        keep_metrics_cols = []
+        # create a zip of db_filted_cols and metrics_values (in that metrics_row)
+        metrics_list = list(zip(metrics_values, db_filted_cols))
+        selected_metrics_ls = [metric.strip().lower() for metric in selected_metrics_ls]
+        for metric, col_name in metrics_list:
+            if metric.lower() in selected_metrics_ls:
+                keep_metrics_cols.append(col_name)
+    else:
+        pprint("No metrics selected, keeping all db columns")
+        keep_metrics_cols = db_filted_cols
+    final_filterd_cols = meta_cols_dict["common_cols"].copy() + keep_metrics_cols
+    df_final = df_filtered[final_filterd_cols].copy()
+    return df_final
+def parse_args():
+    parser = ArgumentParser(
+        description="desc text")
+    parser.add_argument('-csv', '--csv', type=str, help='CSV file path', default=r"E:\Dev\__halib\test\bench.csv")
+    return parser.parse_args()
+def main():
+    args = parse_args()
+    csv_file = args.csv
+    df = pd.read_csv(csv_file, sep=";", encoding="utf-8")
+    filtered_df = filter_bech_df_by_db_and_metrics(df, "bowfire", "acc")
+    print(filtered_df)
+if __name__ == "__main__":
+    main()

halib 0.1.7__py3-none-any.whl → 0.1.99__py3-none-any.whl

halib 0.1.7py3-none-any.whl → 0.1.99py3-none-any.whl