PyPI - halib - Versions diffs - 0.1.99__py3-none-any.whl → 0.2.21__py3-none-any.whl - Mend

halib 0.1.99py3-none-any.whl → 0.2.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

halib/__init__.py +12 -6
halib/common/__init__.py +0 -0
halib/common/common.py +207 -0
halib/common/rich_color.py +285 -0
halib/exp/__init__.py +0 -0
halib/exp/core/__init__.py +0 -0
halib/exp/core/base_config.py +167 -0
halib/exp/core/base_exp.py +147 -0
halib/exp/core/param_gen.py +189 -0
halib/exp/core/wandb_op.py +117 -0
halib/exp/data/__init__.py +0 -0
halib/exp/data/dataclass_util.py +41 -0
halib/exp/data/dataset.py +208 -0
halib/exp/data/torchloader.py +165 -0
halib/exp/perf/__init__.py +0 -0
halib/exp/perf/flop_calc.py +190 -0
halib/exp/perf/gpu_mon.py +58 -0
halib/exp/perf/perfcalc.py +440 -0
halib/exp/perf/perfmetrics.py +137 -0
halib/exp/perf/perftb.py +778 -0
halib/exp/perf/profiler.py +507 -0
halib/exp/viz/__init__.py +0 -0
halib/exp/viz/plot.py +754 -0
halib/filetype/csvfile.py +3 -9
halib/filetype/ipynb.py +3 -5
halib/filetype/jsonfile.py +0 -3
halib/filetype/textfile.py +0 -1
halib/filetype/videofile.py +119 -3
halib/filetype/yamlfile.py +8 -16
halib/online/projectmake.py +7 -6
halib/online/tele_noti.py +165 -0
halib/research/base_exp.py +75 -18
halib/research/core/__init__.py +0 -0
halib/research/core/base_config.py +144 -0
halib/research/core/base_exp.py +157 -0
halib/research/core/param_gen.py +108 -0
halib/research/core/wandb_op.py +117 -0
halib/research/data/__init__.py +0 -0
halib/research/data/dataclass_util.py +41 -0
halib/research/data/dataset.py +208 -0
halib/research/data/torchloader.py +165 -0
halib/research/dataset.py +1 -1
halib/research/metrics.py +4 -0
halib/research/mics.py +8 -2
halib/research/perf/__init__.py +0 -0
halib/research/perf/flop_calc.py +190 -0
halib/research/perf/gpu_mon.py +58 -0
halib/research/perf/perfcalc.py +363 -0
halib/research/perf/perfmetrics.py +137 -0
halib/research/perf/perftb.py +778 -0
halib/research/perf/profiler.py +301 -0
halib/research/perfcalc.py +57 -32
halib/research/viz/__init__.py +0 -0
halib/research/viz/plot.py +754 -0
halib/system/_list_pc.csv +6 -0
halib/system/filesys.py +60 -20
halib/system/path.py +106 -0
halib/utils/dict.py +9 -0
halib/utils/list.py +12 -0
halib-0.2.21.dist-info/METADATA +192 -0
halib-0.2.21.dist-info/RECORD +109 -0
halib-0.1.99.dist-info/METADATA +0 -209
halib-0.1.99.dist-info/RECORD +0 -64
{halib-0.1.99.dist-info → halib-0.2.21.dist-info}/WHEEL +0 -0
{halib-0.1.99.dist-info → halib-0.2.21.dist-info}/licenses/LICENSE.txt +0 -0
{halib-0.1.99.dist-info → halib-0.2.21.dist-info}/top_level.txt +0 -0

halib/research/perf/profiler.py ADDED Viewed

@@ -0,0 +1,301 @@
+import os
+import time
+import json
+from pathlib import Path
+from pprint import pprint
+from threading import Lock
+from loguru import logger
+from plotly.subplots import make_subplots
+import plotly.graph_objects as go
+import plotly.express as px  # for dynamic color scales
+from ...common.common import ConsoleLog
+class zProfiler:
+    """A singleton profiler to measure execution time of contexts and steps.
+    Args:
+        interval_report (int): Frequency of periodic reports (0 to disable).
+        stop_to_view (bool): Pause execution to view reports if True (only in debug mode).
+        output_file (str): Path to save the profiling report.
+        report_format (str): Output format for reports ("json" or "csv").
+    Example:
+        prof = zProfiler()
+        prof.ctx_start("my_context")
+        prof.step_start("my_context", "step1")
+        time.sleep(0.1)
+        prof.step_end("my_context", "step1")
+        prof.ctx_end("my_context")
+    """
+    _instance = None
+    _lock = Lock()
+    def __new__(cls, *args, **kwargs):
+        with cls._lock:
+            if cls._instance is None:
+                cls._instance = super().__new__(cls)
+            return cls._instance
+    def __init__(
+        self,
+    ):
+        if not hasattr(self, "_initialized"):
+            self.time_dict = {}
+            self._initialized = True
+    def ctx_start(self, ctx_name="ctx_default"):
+        if not isinstance(ctx_name, str) or not ctx_name:
+            raise ValueError("ctx_name must be a non-empty string")
+        if ctx_name not in self.time_dict:
+            self.time_dict[ctx_name] = {
+                "start": time.perf_counter(),
+                "step_dict": {},
+                "report_count": 0,
+            }
+        self.time_dict[ctx_name]["report_count"] += 1
+    def ctx_end(self, ctx_name="ctx_default", report_func=None):
+        if ctx_name not in self.time_dict:
+            return
+        self.time_dict[ctx_name]["end"] = time.perf_counter()
+        self.time_dict[ctx_name]["duration"] = (
+            self.time_dict[ctx_name]["end"] - self.time_dict[ctx_name]["start"]
+        )
+    def step_start(self, ctx_name, step_name):
+        if not isinstance(step_name, str) or not step_name:
+            raise ValueError("step_name must be a non-empty string")
+        if ctx_name not in self.time_dict:
+            return
+        if step_name not in self.time_dict[ctx_name]["step_dict"]:
+            self.time_dict[ctx_name]["step_dict"][step_name] = []
+        self.time_dict[ctx_name]["step_dict"][step_name].append([time.perf_counter()])
+    def step_end(self, ctx_name, step_name):
+        if (
+            ctx_name not in self.time_dict
+            or step_name not in self.time_dict[ctx_name]["step_dict"]
+        ):
+            return
+        self.time_dict[ctx_name]["step_dict"][step_name][-1].append(time.perf_counter())
+    def _step_dict_to_detail(self, ctx_step_dict):
+        """
+                'ctx_step_dict': {
+        │   │   'preprocess': [
+        │   │   │   [278090.947465806, 278090.960484853],
+        │   │   │   [278091.178424035, 278091.230944486],
+        │   │   'infer': [
+        │   │   │   [278090.960490534, 278091.178424035],
+        │   │   │   [278091.230944486, 278091.251378469],
+        │   }
+        """
+        assert (
+            len(ctx_step_dict.keys()) > 0
+        ), "step_dict must have only one key (step_name) for detail."
+        normed_ctx_step_dict = {}
+        for step_name, time_list in ctx_step_dict.items():
+            if not isinstance(ctx_step_dict[step_name], list):
+                raise ValueError(f"Step data for {step_name} must be a list")
+            # step_name = list(ctx_step_dict.keys())[0] # ! debug
+            normed_time_ls = []
+            for idx, time_data in enumerate(time_list):
+                elapsed_time = -1
+                if len(time_data) == 2:
+                    start, end = time_data[0], time_data[1]
+                    elapsed_time = end - start
+                normed_time_ls.append((idx, elapsed_time))  # including step
+            normed_ctx_step_dict[step_name] = normed_time_ls
+        return normed_ctx_step_dict
+    def get_report_dict(self, with_detail=False):
+        report_dict = {}
+        for ctx_name, ctx_dict in self.time_dict.items():
+            report_dict[ctx_name] = {
+                "duration": ctx_dict.get("duration", 0.0),
+                "step_dict": {
+                    "summary": {"avg_time": {}, "percent_time": {}},
+                    "detail": {},
+                },
+            }
+            if with_detail:
+                report_dict[ctx_name]["step_dict"]["detail"] = (
+                    self._step_dict_to_detail(ctx_dict["step_dict"])
+                )
+            avg_time_list = []
+            epsilon = 1e-5
+            for step_name, step_list in ctx_dict["step_dict"].items():
+                durations = []
+                try:
+                    for time_data in step_list:
+                        if len(time_data) != 2:
+                            continue
+                        start, end = time_data
+                        durations.append(end - start)
+                except Exception as e:
+                    logger.error(
+                        f"Error processing step {step_name} in context {ctx_name}: {e}"
+                    )
+                    continue
+                if not durations:
+                    continue
+                avg_time = sum(durations) / len(durations)
+                if avg_time < epsilon:
+                    continue
+                avg_time_list.append((step_name, avg_time))
+            total_avg_time = (
+                sum(time for _, time in avg_time_list) or 1e-10
+            )  # Avoid division by zero
+            for step_name, avg_time in avg_time_list:
+                report_dict[ctx_name]["step_dict"]["summary"]["percent_time"][
+                    f"per_{step_name}"
+                ] = (avg_time / total_avg_time) * 100.0
+                report_dict[ctx_name]["step_dict"]["summary"]["avg_time"][
+                    f"avg_{step_name}"
+                ] = avg_time
+            report_dict[ctx_name]["step_dict"]["summary"][
+                "total_avg_time"
+            ] = total_avg_time
+            report_dict[ctx_name]["step_dict"]["summary"] = dict(
+                sorted(report_dict[ctx_name]["step_dict"]["summary"].items())
+            )
+        return report_dict
+    @classmethod
+    @classmethod
+    def plot_formatted_data(
+        cls, profiler_data, outdir=None, file_format="png", do_show=False, tag=""
+    ):
+        """
+        Plot each context in a separate figure with bar + pie charts.
+        Save each figure in the specified format (png or svg).
+        """
+        if outdir is not None:
+            os.makedirs(outdir, exist_ok=True)
+        if file_format.lower() not in ["png", "svg"]:
+            raise ValueError("file_format must be 'png' or 'svg'")
+        results = {}  # {context: fig}
+        for ctx, ctx_data in profiler_data.items():
+            summary = ctx_data["step_dict"]["summary"]
+            avg_times = summary["avg_time"]
+            percent_times = summary["percent_time"]
+            step_names = [s.replace("avg_", "") for s in avg_times.keys()]
+            # pprint(f'{step_names=}')
+            n_steps = len(step_names)
+            assert n_steps > 0, "No steps found for context: {}".format(ctx)
+            # Generate dynamic colors
+            colors = px.colors.sample_colorscale(
+                "Viridis", [i / (n_steps - 1) for i in range(n_steps)]
+            ) if n_steps > 1 else [px.colors.sample_colorscale("Viridis", [0])[0]]
+            # pprint(f'{len(colors)} colors generated for {n_steps} steps')
+            color_map = dict(zip(step_names, colors))
+            # Create figure
+            fig = make_subplots(
+                rows=1,
+                cols=2,
+                subplot_titles=[f"Avg Time", f"% Time"],
+                specs=[[{"type": "bar"}, {"type": "pie"}]],
+            )
+            # Bar chart
+            fig.add_trace(
+                go.Bar(
+                    x=step_names,
+                    y=list(avg_times.values()),
+                    text=[f"{v*1000:.2f} ms" for v in avg_times.values()],
+                    textposition="outside",
+                    marker=dict(color=[color_map[s] for s in step_names]),
+                    name="",  # unified legend
+                    showlegend=False,
+                ),
+                row=1,
+                col=1,
+            )
+            # Pie chart (colors match bar)
+            fig.add_trace(
+                go.Pie(
+                    labels=step_names,
+                    values=list(percent_times.values()),
+                    marker=dict(colors=[color_map[s] for s in step_names]),
+                    hole=0.4,
+                    name="",
+                    showlegend=True,
+                ),
+                row=1,
+                col=2,
+            )
+            tag_str = tag if tag and len(tag) > 0 else ""
+            # Layout
+            fig.update_layout(
+                title_text=f"[{tag_str}] Context Profiler: {ctx}",
+                width=1000,
+                height=400,
+                showlegend=True,
+                legend=dict(title="Steps", x=1.05, y=0.5, traceorder="normal"),
+                hovermode="x unified",
+            )
+            fig.update_xaxes(title_text="Steps", row=1, col=1)
+            fig.update_yaxes(title_text="Avg Time (ms)", row=1, col=1)
+            # Show figure
+            if do_show:
+                fig.show()
+            # Save figure
+            if outdir is not None:
+                file_prefix = ctx if len(tag_str) == 0 else f"{tag_str}_{ctx}"
+                file_path = os.path.join(outdir, f"{file_prefix}_summary.{file_format.lower()}")
+                fig.write_image(file_path)
+                print(f"Saved figure: {file_path}")
+            results[ctx] = fig
+        return results
+    def report_and_plot(self, outdir=None, file_format="png", do_show=False, tag=""):
+        """
+        Generate the profiling report and plot the formatted data.
+        Args:
+            outdir (str): Directory to save figures. If None, figures are only shown.
+            file_format (str): Target file format, "png" or "svg". Default is "png".
+            do_show (bool): Whether to display the plots. Default is False.
+        """
+        report = self.get_report_dict()
+        self.get_report_dict(with_detail=False)
+        return self.plot_formatted_data(
+            report, outdir=outdir, file_format=file_format, do_show=do_show, tag=tag
+        )
+    def meta_info(self):
+        """
+        Print the structure of the profiler's time dictionary.
+        Useful for debugging and understanding the profiler's internal state.
+        """
+        for ctx_name, ctx_dict in self.time_dict.items():
+            with ConsoleLog(f"Context: {ctx_name}"):
+                step_names = list(ctx_dict['step_dict'].keys())
+                for step_name in step_names:
+                    pprint(f"Step: {step_name}")
+    def save_report_dict(self, output_file, with_detail=False):
+        try:
+            report = self.get_report_dict(with_detail=with_detail)
+            with open(output_file, "w") as f:
+                json.dump(report, f, indent=4)
+        except Exception as e:
+            logger.error(f"Failed to save report to {output_file}: {e}")

halib/research/perfcalc.py CHANGED Viewed

@@ -3,12 +3,9 @@ import glob
 from typing import Optional, Tuple
 import pandas as pd
-from rich.pretty import pprint
 from abc import ABC, abstractmethod
 from collections import OrderedDict
-from ..filetype import csvfile
 from ..system import filesys as fs
 from ..common import now_str
 from ..research.perftb import PerfTB
@@ -19,6 +16,7 @@ REQUIRED_COLS = ["experiment", "dataset"]
 CSV_FILE_POSTFIX = "__perf"
 METRIC_PREFIX = "metric_"
 class PerfCalc(ABC):  # Abstract base class for performance calculation
     @abstractmethod
     def get_experiment_name(self) -> str:
@@ -44,29 +42,32 @@ class PerfCalc(ABC):  # Abstract base class for performance calculation
         """
         pass
-    def valid_proc_extra_data(
-        self, proc_extra_data
-    ):
+    def valid_proc_extra_data(self, proc_extra_data):
         # make sure that all items in proc_extra_data are dictionaries, with same keys
         if proc_extra_data is None or len(proc_extra_data) == 0:
             return
         if not all(isinstance(item, dict) for item in proc_extra_data):
             raise TypeError("All items in proc_extra_data must be dictionaries")
-        if not all(item.keys() == proc_extra_data[0].keys() for item in proc_extra_data):
-            raise ValueError("All dictionaries in proc_extra_data must have the same keys")
+        if not all(
+            item.keys() == proc_extra_data[0].keys() for item in proc_extra_data
+        ):
+            raise ValueError(
+                "All dictionaries in proc_extra_data must have the same keys"
+            )
-    def valid_proc_metric_raw_data(
-        self, metric_names, proc_metric_raw_data
-    ):
+    def valid_proc_metric_raw_data(self, metric_names, proc_metric_raw_data):
         # make sure that all items in proc_metric_raw_data are dictionaries, with same keys as metric_names
-        assert isinstance(proc_metric_raw_data, list) and len(proc_metric_raw_data) > 0, \
-            "raw_data_for_metrics must be a non-empty list of dictionaries"
+        assert (
+            isinstance(proc_metric_raw_data, list) and len(proc_metric_raw_data) > 0
+        ), "raw_data_for_metrics must be a non-empty list of dictionaries"
         # make sure that all items in proc_metric_raw_data are dictionaries with keys as metric_names
         if not all(isinstance(item, dict) for item in proc_metric_raw_data):
             raise TypeError("All items in raw_data_for_metrics must be dictionaries")
-        if not all( set(item.keys()) == set(metric_names) for item in proc_metric_raw_data):
+        if not all(
+            set(item.keys()) == set(metric_names) for item in proc_metric_raw_data
+        ):
             raise ValueError(
                 "All dictionaries in raw_data_for_metrics must have the same keys as metric_names"
             )
@@ -75,21 +76,30 @@ class PerfCalc(ABC):  # Abstract base class for performance calculation
     def calc_exp_perf_metrics(
         self, metric_names, raw_metrics_data, extra_data=None, *args, **kwargs
     ):
-        assert isinstance(raw_metrics_data, dict) or isinstance(raw_metrics_data, list), \
-            "raw_data_for_metrics must be a dictionary or a list"
+        assert isinstance(raw_metrics_data, dict) or isinstance(
+            raw_metrics_data, list
+        ), "raw_data_for_metrics must be a dictionary or a list"
         if extra_data is not None:
-            assert isinstance(extra_data, type(raw_metrics_data)), \
-                "extra_data must be of the same type as raw_data_for_metrics (dict or list)"
+            assert isinstance(
+                extra_data, type(raw_metrics_data)
+            ), "extra_data must be of the same type as raw_data_for_metrics (dict or list)"
         # prepare raw_metric data for processing
-        proc_metric_raw_data_ls = raw_metrics_data if isinstance(raw_metrics_data, list) else [raw_metrics_data.copy()]
+        proc_metric_raw_data_ls = (
+            raw_metrics_data
+            if isinstance(raw_metrics_data, list)
+            else [raw_metrics_data.copy()]
+        )
         self.valid_proc_metric_raw_data(metric_names, proc_metric_raw_data_ls)
         # prepare extra data for processing
         proc_extra_data_ls = []
         if extra_data is not None:
-            proc_extra_data_ls = extra_data if isinstance(extra_data, list) else [extra_data.copy()]
-            assert len(proc_extra_data_ls) == len(proc_metric_raw_data_ls), \
-                "extra_data must have the same length as raw_data_for_metrics if it is a list"
+            proc_extra_data_ls = (
+                extra_data if isinstance(extra_data, list) else [extra_data.copy()]
+            )
+            assert len(proc_extra_data_ls) == len(
+                proc_metric_raw_data_ls
+            ), "extra_data must have the same length as raw_data_for_metrics if it is a list"
         # validate the extra_data
         self.valid_proc_extra_data(proc_extra_data_ls)
@@ -102,7 +112,7 @@ class PerfCalc(ABC):  # Abstract base class for performance calculation
                 "experiment": self.get_experiment_name(),
             }
             custom_fields = []
-            if len(proc_extra_data_ls)> 0:
+            if len(proc_extra_data_ls) > 0:
                 # add extra data to the output dictionary
                 extra_data_item = proc_extra_data_ls[idx]
                 out_dict.update(extra_data_item)
@@ -110,7 +120,9 @@ class PerfCalc(ABC):  # Abstract base class for performance calculation
             metric_results = metrics_backend.calc_metrics(
                 metrics_data_dict=raw_metrics_data, *args, **kwargs
             )
-            metric_results_prefix = {f"metric_{k}": v for k, v in metric_results.items()}
+            metric_results_prefix = {
+                f"metric_{k}": v for k, v in metric_results.items()
+            }
             out_dict.update(metric_results_prefix)
             ordered_cols = (
                 REQUIRED_COLS + custom_fields + list(metric_results_prefix.keys())
@@ -126,7 +138,7 @@ class PerfCalc(ABC):  # Abstract base class for performance calculation
     #! outfile - if provided, will save the output to a CSV file with the given path
     #! outdir - if provided, will save the output to a CSV file in the given directory with a generated filename
     #! return_df - if True, will return a DataFrame instead of a dictionary
-    def calc_and_save_exp_perfs(
+    def calc_perfs(
         self,
         raw_metrics_data: Union[List[dict], dict],
         extra_data: Optional[Union[List[dict], dict]] = None,
@@ -140,9 +152,11 @@ class PerfCalc(ABC):  # Abstract base class for performance calculation
         """
         metric_names = self.get_metric_backend().metric_names
         out_dict_list = self.calc_exp_perf_metrics(
-            metric_names=metric_names, raw_metrics_data=raw_metrics_data,
+            metric_names=metric_names,
+            raw_metrics_data=raw_metrics_data,
             extra_data=extra_data,
-            *args, **kwargs
+            *args,
+            **kwargs,
         )
         csv_outfile = kwargs.get("outfile", None)
         if csv_outfile is not None:
@@ -176,13 +190,18 @@ class PerfCalc(ABC):  # Abstract base class for performance calculation
         return "__perf.csv" in exp_file_name
     @classmethod
-    def gen_perf_report_for_multip_exps(
-        cls, indir: str, exp_csv_filter_fn=default_exp_csv_filter_fn, include_file_name=False, csv_sep=";"
+    def get_perftb_for_multi_exps(
+        cls,
+        indir: str,
+        exp_csv_filter_fn=default_exp_csv_filter_fn,
+        include_file_name=False,
+        csv_sep=";",
     ) -> PerfTB:
         """
         Generate a performance report by scanning experiment subdirectories.
         Must return a dictionary with keys as metric names and values as performance tables.
         """
         def get_df_for_all_exp_perf(csv_perf_files, csv_sep=";"):
             """
             Create a single DataFrame from all CSV files.
@@ -194,7 +213,9 @@ class PerfCalc(ABC):  # Abstract base class for performance calculation
             for csv_file in csv_perf_files:
                 temp_df = pd.read_csv(csv_file, sep=csv_sep)
                 if FILE_NAME_COL:
-                    temp_df[FILE_NAME_COL] = fs.get_file_name(csv_file, split_file_ext=False)
+                    temp_df[FILE_NAME_COL] = fs.get_file_name(
+                        csv_file, split_file_ext=False
+                    )
                     # csvfile.fn_display_df(temp_df)
                 temp_df_cols = temp_df.columns.tolist()
                 for col in temp_df_cols:
@@ -205,7 +226,9 @@ class PerfCalc(ABC):  # Abstract base class for performance calculation
             for csv_file in csv_perf_files:
                 temp_df = pd.read_csv(csv_file, sep=csv_sep)
                 if FILE_NAME_COL:
-                    temp_df[FILE_NAME_COL] = fs.get_file_name(csv_file, split_file_ext=False)
+                    temp_df[FILE_NAME_COL] = fs.get_file_name(
+                        csv_file, split_file_ext=False
+                    )
                 # Drop all-NA columns to avoid dtype inconsistency
                 temp_df = temp_df.dropna(axis=1, how="all")
                 # ensure all columns are present in the final DataFrame
@@ -215,7 +238,9 @@ class PerfCalc(ABC):  # Abstract base class for performance calculation
                 df = pd.concat([df, temp_df], ignore_index=True)
             # assert that REQUIRED_COLS are present in the DataFrame
             # pprint(df.columns.tolist())
-            sticky_cols = REQUIRED_COLS + ([FILE_NAME_COL] if include_file_name else []) # columns that must always be present
+            sticky_cols = REQUIRED_COLS + (
+                [FILE_NAME_COL] if include_file_name else []
+            )  # columns that must always be present
             for col in sticky_cols:
                 if col not in df.columns:
                     raise ValueError(

halib/research/viz/__init__.py ADDED Viewed

File without changes

halib 0.1.99__py3-none-any.whl → 0.2.21__py3-none-any.whl

halib 0.1.99py3-none-any.whl → 0.2.21py3-none-any.whl