PyPI - halib - Versions diffs - 0.2.30__py3-none-any.whl - Mend

halib 0.2.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (110) hide show

halib/__init__.py +94 -0
halib/common/__init__.py +0 -0
halib/common/common.py +326 -0
halib/common/rich_color.py +285 -0
halib/common.py +151 -0
halib/csvfile.py +48 -0
halib/cuda.py +39 -0
halib/dataset.py +209 -0
halib/exp/__init__.py +0 -0
halib/exp/core/__init__.py +0 -0
halib/exp/core/base_config.py +167 -0
halib/exp/core/base_exp.py +147 -0
halib/exp/core/param_gen.py +170 -0
halib/exp/core/wandb_op.py +117 -0
halib/exp/data/__init__.py +0 -0
halib/exp/data/dataclass_util.py +41 -0
halib/exp/data/dataset.py +208 -0
halib/exp/data/torchloader.py +165 -0
halib/exp/perf/__init__.py +0 -0
halib/exp/perf/flop_calc.py +190 -0
halib/exp/perf/gpu_mon.py +58 -0
halib/exp/perf/perfcalc.py +470 -0
halib/exp/perf/perfmetrics.py +137 -0
halib/exp/perf/perftb.py +778 -0
halib/exp/perf/profiler.py +507 -0
halib/exp/viz/__init__.py +0 -0
halib/exp/viz/plot.py +754 -0
halib/filesys.py +117 -0
halib/filetype/__init__.py +0 -0
halib/filetype/csvfile.py +192 -0
halib/filetype/ipynb.py +61 -0
halib/filetype/jsonfile.py +19 -0
halib/filetype/textfile.py +12 -0
halib/filetype/videofile.py +266 -0
halib/filetype/yamlfile.py +87 -0
halib/gdrive.py +179 -0
halib/gdrive_mkdir.py +41 -0
halib/gdrive_test.py +37 -0
halib/jsonfile.py +22 -0
halib/listop.py +13 -0
halib/online/__init__.py +0 -0
halib/online/gdrive.py +229 -0
halib/online/gdrive_mkdir.py +53 -0
halib/online/gdrive_test.py +50 -0
halib/online/projectmake.py +131 -0
halib/online/tele_noti.py +165 -0
halib/plot.py +301 -0
halib/projectmake.py +115 -0
halib/research/__init__.py +0 -0
halib/research/base_config.py +100 -0
halib/research/base_exp.py +157 -0
halib/research/benchquery.py +131 -0
halib/research/core/__init__.py +0 -0
halib/research/core/base_config.py +144 -0
halib/research/core/base_exp.py +157 -0
halib/research/core/param_gen.py +108 -0
halib/research/core/wandb_op.py +117 -0
halib/research/data/__init__.py +0 -0
halib/research/data/dataclass_util.py +41 -0
halib/research/data/dataset.py +208 -0
halib/research/data/torchloader.py +165 -0
halib/research/dataset.py +208 -0
halib/research/flop_csv.py +34 -0
halib/research/flops.py +156 -0
halib/research/metrics.py +137 -0
halib/research/mics.py +74 -0
halib/research/params_gen.py +108 -0
halib/research/perf/__init__.py +0 -0
halib/research/perf/flop_calc.py +190 -0
halib/research/perf/gpu_mon.py +58 -0
halib/research/perf/perfcalc.py +363 -0
halib/research/perf/perfmetrics.py +137 -0
halib/research/perf/perftb.py +778 -0
halib/research/perf/profiler.py +301 -0
halib/research/perfcalc.py +361 -0
halib/research/perftb.py +780 -0
halib/research/plot.py +758 -0
halib/research/profiler.py +300 -0
halib/research/torchloader.py +162 -0
halib/research/viz/__init__.py +0 -0
halib/research/viz/plot.py +754 -0
halib/research/wandb_op.py +116 -0
halib/rich_color.py +285 -0
halib/sys/__init__.py +0 -0
halib/sys/cmd.py +8 -0
halib/sys/filesys.py +124 -0
halib/system/__init__.py +0 -0
halib/system/_list_pc.csv +6 -0
halib/system/cmd.py +8 -0
halib/system/filesys.py +164 -0
halib/system/path.py +106 -0
halib/tele_noti.py +166 -0
halib/textfile.py +13 -0
halib/torchloader.py +162 -0
halib/utils/__init__.py +0 -0
halib/utils/dataclass_util.py +40 -0
halib/utils/dict.py +317 -0
halib/utils/dict_op.py +9 -0
halib/utils/gpu_mon.py +58 -0
halib/utils/list.py +17 -0
halib/utils/listop.py +13 -0
halib/utils/slack.py +86 -0
halib/utils/tele_noti.py +166 -0
halib/utils/video.py +82 -0
halib/videofile.py +139 -0
halib-0.2.30.dist-info/METADATA +237 -0
halib-0.2.30.dist-info/RECORD +110 -0
halib-0.2.30.dist-info/WHEEL +5 -0
halib-0.2.30.dist-info/licenses/LICENSE.txt +17 -0
halib-0.2.30.dist-info/top_level.txt +1 -0

halib/exp/perf/profiler.py ADDED Viewed

@@ -0,0 +1,507 @@
+import os
+import time
+import json
+from pathlib import Path
+from pprint import pprint
+from threading import Lock
+from functools import wraps
+from loguru import logger
+# Plotting libraries
+from plotly.subplots import make_subplots
+import plotly.graph_objects as go
+import plotly.express as px  # for dynamic color scales
+from contextlib import contextmanager
+from ...common.common import ConsoleLog
+from ...system.path import *
+# ==========================================
+# 1. The Decorator
+# ==========================================
+def check_enabled(func):
+    """
+    Decorator to skip method execution if the profiler is disabled.
+    This acts as a 'guard clause' for the entire function. If the profiler
+    instance has 'enabled=False', the decorated function is not executed at all,
+    saving processing time and avoiding side effects.
+    """
+    @wraps(func)
+    def wrapper(self, *args, **kwargs):
+        # Gracefully handle cases where 'enabled' might not be set yet (default to True)
+        # print('check_enabled called')
+        if not getattr(self, "enabled", True):
+            # print('Profiler disabled, skipping function execution.')
+            return  # Exit immediately, returning None
+        return func(self, *args, **kwargs)
+    return wrapper
+# ==========================================
+# 2. The Class
+# ==========================================
+class ContextScope:
+    """Helper to remember the current context name so you don't have to repeat it."""
+    def __init__(self, profiler, ctx_name):
+        self.profiler = profiler
+        self.ctx_name = ctx_name
+    @contextmanager
+    def step(self, step_name):
+        # Automatically passes the stored ctx_name + the new step_name
+        with self.profiler.measure(self.ctx_name, step_name):
+            yield
+class zProfiler:
+    """A singleton profiler to measure execution time of contexts and steps.
+    Args:
+        interval_report (int): Frequency of periodic reports (0 to disable).
+        stop_to_view (bool): Pause execution to view reports if True (only in debug mode).
+        output_file (str): Path to save the profiling report.
+        report_format (str): Output format for reports ("json" or "csv").
+    Example (using context manager):
+        prof = zProfiler()
+        with prof.measure("my_context") as ctx:
+            with ctx.step("step1"):
+                time.sleep(0.1)
+            with ctx.step("step2"):
+                time.sleep(0.2)
+    Example (using raw methods):
+        prof = zProfiler()
+        prof.ctx_start("my_context")
+        prof.step_start("my_context", "step1")
+        time.sleep(0.1)
+        prof.step_end("my_context", "step1")
+        prof.ctx_end("my_context")
+    """
+    _instance = None
+    _lock = Lock()
+    def __new__(cls, *args, **kwargs):
+        with cls._lock:
+            if cls._instance is None:
+                cls._instance = super().__new__(cls)
+            return cls._instance
+    def __init__(self, enabled=None):
+        """
+        Args:
+            enabled (bool, optional):
+                - If True/False: Updates the enabled state immediately.
+                - If None: Keeps the current state (defaults to True on first init).
+        """
+        # 1. First-time initialization
+        if not hasattr(self, "_initialized"):
+            self.enabled = enabled if enabled is not None else True
+            self.time_dict = {}
+            self._initialized = True
+        # 2. If initialized, allow updating 'enabled' ONLY if explicitly passed
+        elif enabled is not None:
+            self.enabled = enabled
+    @check_enabled
+    def ctx_start(self, ctx_name="ctx_default"):
+        if not isinstance(ctx_name, str) or not ctx_name:
+            raise ValueError("ctx_name must be a non-empty string")
+        if ctx_name not in self.time_dict:
+            self.time_dict[ctx_name] = {
+                "start": time.perf_counter(),
+                "step_dict": {},
+                "report_count": 0,
+            }
+        self.time_dict[ctx_name]["report_count"] += 1
+    @check_enabled
+    def ctx_end(self, ctx_name="ctx_default", report_func=None):
+        if ctx_name not in self.time_dict:
+            return
+        self.time_dict[ctx_name]["end"] = time.perf_counter()
+        self.time_dict[ctx_name]["duration"] = (
+            self.time_dict[ctx_name]["end"] - self.time_dict[ctx_name]["start"]
+        )
+    @check_enabled
+    def step_start(self, ctx_name, step_name):
+        if not isinstance(step_name, str) or not step_name:
+            raise ValueError("step_name must be a non-empty string")
+        if ctx_name not in self.time_dict:
+            return
+        if step_name not in self.time_dict[ctx_name]["step_dict"]:
+            self.time_dict[ctx_name]["step_dict"][step_name] = []
+        self.time_dict[ctx_name]["step_dict"][step_name].append([time.perf_counter()])
+    @check_enabled
+    def step_end(self, ctx_name, step_name):
+        if (
+            ctx_name not in self.time_dict
+            or step_name not in self.time_dict[ctx_name]["step_dict"]
+        ):
+            return
+        self.time_dict[ctx_name]["step_dict"][step_name][-1].append(time.perf_counter())
+    @contextmanager
+    def measure(self, ctx_name, step_name=None):
+        if step_name is None:
+            # --- Context Mode ---
+            self.ctx_start(ctx_name)
+            try:
+                # Yield the helper object initialized with the current context name
+                yield ContextScope(self, ctx_name)
+            finally:
+                self.ctx_end(ctx_name)
+        else:
+            # --- Step Mode ---
+            self.step_start(ctx_name, step_name)
+            try:
+                yield
+            finally:
+                self.step_end(ctx_name, step_name)
+    def _step_dict_to_detail(self, ctx_step_dict):
+        """
+                'ctx_step_dict': {
+        │   │   'preprocess': [
+        │   │   │   [278090.947465806, 278090.960484853],
+        │   │   │   [278091.178424035, 278091.230944486],
+        │   │   'infer': [
+        │   │   │   [278090.960490534, 278091.178424035],
+        │   │   │   [278091.230944486, 278091.251378469],
+        │   }
+        """
+        assert len(ctx_step_dict.keys()) > 0, (
+            "step_dict must have only one key (step_name) for detail."
+        )
+        normed_ctx_step_dict = {}
+        for step_name, time_list in ctx_step_dict.items():
+            if not isinstance(ctx_step_dict[step_name], list):
+                raise ValueError(f"Step data for {step_name} must be a list")
+            # step_name = list(ctx_step_dict.keys())[0] # ! debug
+            normed_time_ls = []
+            for idx, time_data in enumerate(time_list):
+                elapsed_time = -1
+                if len(time_data) == 2:
+                    start, end = time_data[0], time_data[1]
+                    elapsed_time = end - start
+                normed_time_ls.append((idx, elapsed_time))  # including step
+            normed_ctx_step_dict[step_name] = normed_time_ls
+        return normed_ctx_step_dict
+    def get_report_dict(self, with_detail=False):
+        report_dict = {}
+        for ctx_name, ctx_dict in self.time_dict.items():
+            report_dict[ctx_name] = {
+                "duration": ctx_dict.get("duration", 0.0),
+                "step_dict": {
+                    "summary": {"avg_time": {}, "percent_time": {}},
+                    "detail": {},
+                },
+            }
+            if with_detail:
+                report_dict[ctx_name]["step_dict"]["detail"] = (
+                    self._step_dict_to_detail(ctx_dict["step_dict"])
+                )
+            avg_time_list = []
+            epsilon = 1e-5
+            for step_name, step_list in ctx_dict["step_dict"].items():
+                durations = []
+                try:
+                    for time_data in step_list:
+                        if len(time_data) != 2:
+                            continue
+                        start, end = time_data
+                        durations.append(end - start)
+                except Exception as e:
+                    logger.error(
+                        f"Error processing step {step_name} in context {ctx_name}: {e}"
+                    )
+                    continue
+                if not durations:
+                    continue
+                avg_time = sum(durations) / len(durations)
+                if avg_time < epsilon:
+                    continue
+                avg_time_list.append((step_name, avg_time))
+            total_avg_time = (
+                sum(time for _, time in avg_time_list) or 1e-10
+            )  # Avoid division by zero
+            for step_name, avg_time in avg_time_list:
+                report_dict[ctx_name]["step_dict"]["summary"]["percent_time"][
+                    f"per_{step_name}"
+                ] = (avg_time / total_avg_time) * 100.0
+                report_dict[ctx_name]["step_dict"]["summary"]["avg_time"][
+                    f"avg_{step_name}"
+                ] = avg_time
+            report_dict[ctx_name]["step_dict"]["summary"]["total_avg_time"] = (
+                total_avg_time
+            )
+            report_dict[ctx_name]["step_dict"]["summary"] = dict(
+                sorted(report_dict[ctx_name]["step_dict"]["summary"].items())
+            )
+        return report_dict
+    def get_report_dataframes(self):
+        """
+        Returns two pandas DataFrames containing profiling data.
+        Returns:
+            tuple: (df_summary, df_detail)
+                - df_summary: Aggregated stats (Context, Step, Avg, %, Count)
+                - df_detail: Raw duration for every single iteration
+        """
+        try:
+            import pandas as pd
+        except ImportError:
+            logger.error(
+                "Pandas is required for get_pandas_dfs(). Please pip install pandas."
+            )
+            return None, None
+        # Get full data structure
+        data = self.get_report_dict(with_detail=True)
+        summary_rows = []
+        detail_rows = []
+        for ctx_name, ctx_data in data.items():
+            summary = ctx_data["step_dict"]["summary"]
+            detail_dict = ctx_data["step_dict"]["detail"]
+            # --- 1. Build Summary Data ---
+            # Iterate keys in 'avg_time' to ensure we capture all steps
+            for avg_key, avg_val in summary["avg_time"].items():
+                step_name = avg_key.replace("avg_", "")
+                # Get corresponding percent
+                per_key = f"per_{step_name}"
+                percent_val = summary["percent_time"].get(per_key, 0.0)
+                # Get sample count from detail list length
+                count = len(detail_dict.get(step_name, []))
+                summary_rows.append(
+                    {
+                        "context_name": ctx_name,
+                        "step_name": step_name,
+                        "avg_time_sec": avg_val,
+                        "percent_total": percent_val,
+                        "sample_count": count,
+                    }
+                )
+            # --- 2. Build Detail Data ---
+            for step_name, time_list in detail_dict.items():
+                # time_list format: [(idx, duration), (idx, duration)...]
+                for item in time_list:
+                    if len(item) == 2:
+                        idx, duration = item
+                        detail_rows.append(
+                            {
+                                "context_name": ctx_name,
+                                "step_name": step_name,
+                                "iteration_idx": idx,
+                                "duration_sec": duration,
+                            }
+                        )
+        # Create DataFrames
+        df_summary = pd.DataFrame(summary_rows)
+        df_detail = pd.DataFrame(detail_rows)
+        # Reorder columns for readability (optional but nice)
+        if not df_summary.empty:
+            df_summary = df_summary[
+                [
+                    "context_name",
+                    "step_name",
+                    "avg_time_sec",
+                    "percent_total",
+                    "sample_count",
+                ]
+            ]
+        if not df_detail.empty:
+            df_detail = df_detail[
+                ["context_name", "step_name", "iteration_idx", "duration_sec"]
+            ]
+        return df_summary, df_detail
+    def get_report_csv_files(self, outdir, tag="profiler"):
+        """
+        Exports profiling data to two CSV files:
+        1. {tag}_summary.csv: Aggregated stats (Avg time, %)
+        2. {tag}_detailed_logs.csv: Raw duration for every iteration
+        Args:
+            outdir (str): Directory to save files.
+            tag (str): Optional prefix for filenames.
+        """
+        if not os.path.exists(outdir):
+            os.makedirs(outdir)
+        tag_str = f"{tag}_" if tag else ""
+        summary_file = os.path.join(outdir, f"{tag_str}summary.csv")
+        detail_file = os.path.join(outdir, f"{tag_str}detailed_logs.csv")
+        df_summary, df_detail = self.get_report_dataframes()
+        if df_summary is not None:
+            df_summary.to_csv(summary_file, index=False, sep=";", encoding="utf-8")
+            logger.info(f"Saved summary CSV to: {summary_file}")
+        if df_detail is not None:
+            df_detail.to_csv(detail_file, index=False, sep=";", encoding="utf-8")
+            logger.info(f"Saved detailed logs CSV to: {detail_file}")
+    @classmethod
+    def plot_formatted_data(
+        cls, profiler_data, outdir=None, file_format="png", do_show=False, tag=""
+    ):
+        """
+        Plot each context in a separate figure with bar + pie charts.
+        Save each figure in the specified format (png or svg).
+        """
+        if outdir is not None:
+            os.makedirs(outdir, exist_ok=True)
+        if file_format.lower() not in ["png", "svg"]:
+            raise ValueError("file_format must be 'png' or 'svg'")
+        results = {}  # {context: fig}
+        for ctx, ctx_data in profiler_data.items():
+            summary = ctx_data["step_dict"]["summary"]
+            avg_times = summary["avg_time"]
+            percent_times = summary["percent_time"]
+            step_names = [s.replace("avg_", "") for s in avg_times.keys()]
+            # pprint(f'{step_names=}')
+            n_steps = len(step_names)
+            assert n_steps > 0, "No steps found for context: {}".format(ctx)
+            # Generate dynamic colors
+            colors = (
+                px.colors.sample_colorscale(
+                    "Viridis", [i / (n_steps - 1) for i in range(n_steps)]
+                )
+                if n_steps > 1
+                else [px.colors.sample_colorscale("Viridis", [0])[0]]
+            )
+            # pprint(f'{len(colors)} colors generated for {n_steps} steps')
+            color_map = dict(zip(step_names, colors))
+            # Create figure
+            fig = make_subplots(
+                rows=1,
+                cols=2,
+                subplot_titles=[f"Avg Time", f"% Time"],
+                specs=[[{"type": "bar"}, {"type": "pie"}]],
+            )
+            # Bar chart
+            fig.add_trace(
+                go.Bar(
+                    x=step_names,
+                    y=list(avg_times.values()),
+                    text=[f"{v * 1000:.2f} ms" for v in avg_times.values()],
+                    textposition="outside",
+                    marker=dict(color=[color_map[s] for s in step_names]),
+                    name="",  # unified legend
+                    showlegend=False,
+                ),
+                row=1,
+                col=1,
+            )
+            # Pie chart (colors match bar)
+            fig.add_trace(
+                go.Pie(
+                    labels=step_names,
+                    values=list(percent_times.values()),
+                    marker=dict(colors=[color_map[s] for s in step_names]),
+                    hole=0.4,
+                    name="",
+                    showlegend=True,
+                ),
+                row=1,
+                col=2,
+            )
+            tag_str = tag if tag and len(tag) > 0 else ""
+            # Layout
+            fig.update_layout(
+                title_text=f"[{tag_str}] Context Profiler: {ctx}",
+                width=1000,
+                height=400,
+                showlegend=True,
+                legend=dict(title="Steps", x=1.05, y=0.5, traceorder="normal"),
+                hovermode="x unified",
+            )
+            fig.update_xaxes(title_text="Steps", row=1, col=1)
+            fig.update_yaxes(title_text="Avg Time (ms)", row=1, col=1)
+            # Show figure
+            if do_show:
+                fig.show()
+            # Save figure
+            if outdir is not None:
+                file_prefix = ctx if len(tag_str) == 0 else f"{tag_str}_{ctx}"
+                file_path = os.path.join(
+                    outdir, f"{file_prefix}_summary.{file_format.lower()}"
+                )
+                fig.write_image(file_path)
+                pprint(f"Saved figure to: 🔽")
+                pprint_local_path(file_path)
+            results[ctx] = fig
+        return results
+    def report_and_plot(self, outdir=None, file_format="png", do_show=False, tag=""):
+        """
+        Generate the profiling report and plot the formatted data.
+        Args:
+            outdir (str): Directory to save figures. If None, figures are only shown.
+            file_format (str): Target file format, "png" or "svg". Default is "png".
+            do_show (bool): Whether to display the plots. Default is False.
+        """
+        report = self.get_report_dict()
+        self.get_report_dict(with_detail=False)
+        return self.plot_formatted_data(
+            report, outdir=outdir, file_format=file_format, do_show=do_show, tag=tag
+        )
+    def meta_info(self):
+        """
+        Print the structure of the profiler's time dictionary.
+        Useful for debugging and understanding the profiler's internal state.
+        """
+        for ctx_name, ctx_dict in self.time_dict.items():
+            with ConsoleLog(f"Context: {ctx_name}"):
+                step_names = list(ctx_dict["step_dict"].keys())
+                for step_name in step_names:
+                    pprint(f"Step: {step_name}")
+    def save_report_dict(self, output_file, with_detail=False):
+        try:
+            report = self.get_report_dict(with_detail=with_detail)
+            with open(output_file, "w") as f:
+                json.dump(report, f, indent=4)
+        except Exception as e:
+            logger.error(f"Failed to save report to {output_file}: {e}")

halib/exp/viz/__init__.py ADDED Viewed

File without changes