PyPI - halib - Versions diffs - 0.1.91__py3-none-any.whl → 0.2.21__py3-none-any.whl - Mend

halib 0.1.91py3-none-any.whl → 0.2.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

halib/__init__.py +12 -6
halib/common/__init__.py +0 -0
halib/common/common.py +207 -0
halib/common/rich_color.py +285 -0
halib/common.py +53 -10
halib/exp/__init__.py +0 -0
halib/exp/core/__init__.py +0 -0
halib/exp/core/base_config.py +167 -0
halib/exp/core/base_exp.py +147 -0
halib/exp/core/param_gen.py +189 -0
halib/exp/core/wandb_op.py +117 -0
halib/exp/data/__init__.py +0 -0
halib/exp/data/dataclass_util.py +41 -0
halib/exp/data/dataset.py +208 -0
halib/exp/data/torchloader.py +165 -0
halib/exp/perf/__init__.py +0 -0
halib/exp/perf/flop_calc.py +190 -0
halib/exp/perf/gpu_mon.py +58 -0
halib/exp/perf/perfcalc.py +440 -0
halib/exp/perf/perfmetrics.py +137 -0
halib/exp/perf/perftb.py +778 -0
halib/exp/perf/profiler.py +507 -0
halib/exp/viz/__init__.py +0 -0
halib/exp/viz/plot.py +754 -0
halib/filetype/csvfile.py +3 -9
halib/filetype/ipynb.py +61 -0
halib/filetype/jsonfile.py +0 -3
halib/filetype/textfile.py +0 -1
halib/filetype/videofile.py +119 -3
halib/filetype/yamlfile.py +16 -1
halib/online/projectmake.py +7 -6
halib/online/tele_noti.py +165 -0
halib/research/base_exp.py +75 -18
halib/research/core/__init__.py +0 -0
halib/research/core/base_config.py +144 -0
halib/research/core/base_exp.py +157 -0
halib/research/core/param_gen.py +108 -0
halib/research/core/wandb_op.py +117 -0
halib/research/data/__init__.py +0 -0
halib/research/data/dataclass_util.py +41 -0
halib/research/data/dataset.py +208 -0
halib/research/data/torchloader.py +165 -0
halib/research/dataset.py +6 -7
halib/research/flop_csv.py +34 -0
halib/research/flops.py +156 -0
halib/research/metrics.py +4 -0
halib/research/mics.py +59 -1
halib/research/perf/__init__.py +0 -0
halib/research/perf/flop_calc.py +190 -0
halib/research/perf/gpu_mon.py +58 -0
halib/research/perf/perfcalc.py +363 -0
halib/research/perf/perfmetrics.py +137 -0
halib/research/perf/perftb.py +778 -0
halib/research/perf/profiler.py +301 -0
halib/research/perfcalc.py +60 -35
halib/research/perftb.py +2 -1
halib/research/plot.py +480 -218
halib/research/viz/__init__.py +0 -0
halib/research/viz/plot.py +754 -0
halib/system/_list_pc.csv +6 -0
halib/system/filesys.py +60 -20
halib/system/path.py +106 -0
halib/utils/dict.py +9 -0
halib/utils/list.py +12 -0
halib/utils/video.py +6 -0
halib-0.2.21.dist-info/METADATA +192 -0
halib-0.2.21.dist-info/RECORD +109 -0
halib-0.1.91.dist-info/METADATA +0 -201
halib-0.1.91.dist-info/RECORD +0 -61
{halib-0.1.91.dist-info → halib-0.2.21.dist-info}/WHEEL +0 -0
{halib-0.1.91.dist-info → halib-0.2.21.dist-info}/licenses/LICENSE.txt +0 -0
{halib-0.1.91.dist-info → halib-0.2.21.dist-info}/top_level.txt +0 -0

halib/filetype/csvfile.py CHANGED Viewed

@@ -1,19 +1,13 @@
+import csv
+import textwrap
 import pandas as pd
+import pygwalker as pyg
 from tabulate import tabulate
 from rich.console import Console
-from rich import print as rprint
-from rich import inspect
-from rich.pretty import pprint
-from tqdm import tqdm
-from loguru import logger
 from itables import init_notebook_mode, show
-import pygwalker as pyg
-import textwrap
-import csv
 console = Console()
 def read(file, separator=","):
     df = pd.read_csv(file, separator)
     return df

halib/filetype/ipynb.py ADDED Viewed

@@ -0,0 +1,61 @@
+import ipynbname
+from pathlib import Path
+from contextlib import contextmanager
+from ..common.common import now_str
+@contextmanager
+def gen_ipynb_name(
+    filename,
+    add_time_stamp=False,
+    nb_prefix="nb__",
+    separator="__",
+):
+    """
+    Context manager that prefixes the filename with the notebook name.
+    Output: NotebookName_OriginalName.ext
+    """
+    try:
+        nb_name = ipynbname.name()
+    except FileNotFoundError:
+        nb_name = "script"  # Fallback
+    p = Path(filename)
+    # --- FIX START ---
+    # 1. Get the parts separately
+    original_stem = p.stem  # "test" (no extension)
+    extension = p.suffix  # ".csv"
+    now_string = now_str() if add_time_stamp else ""
+    # 2. Construct the base name (Notebook + Separator + OriginalName)
+    base_name = f"{nb_prefix}{nb_name}{separator}{original_stem}"
+    # 3. Append timestamp if needed
+    if now_string:
+        base_name = f"{base_name}{separator}{now_string}"
+    # 4. Add the extension at the VERY END
+    new_filename = f"{base_name}{extension}"
+    # --- FIX END ---
+    final_path = p.parent / new_filename
+    # Assuming you use 'rich' console based on your snippet
+    # console.rule()
+    # print(f"📝 Saving as: {final_path}")
+    yield str(final_path)
+if __name__ == "__main__":
+    # --- Usage Example ---
+    # Assume Notebook Name is: "MyThesisWork"
+    filename = "results.csv"
+    with gen_ipynb_name(filename) as filename_ipynb:
+        # filename_ipynb is now: "MyThesisWork_results.csv"
+        print(f"File to save: {filename_ipynb}")
+        # df.to_csv(filename_ipynb)

halib/filetype/jsonfile.py CHANGED Viewed

@@ -1,17 +1,14 @@
 import json
 def read(file):
     with open(file) as f:
         data = json.load(f)
         return data
 def write(data_dict, outfile):
     with open(outfile, "w") as json_file:
         json.dump(data_dict, json_file)
 def beautify(json_str):
     formatted_json = json_str
     try:

halib/filetype/textfile.py CHANGED Viewed

@@ -4,7 +4,6 @@ def read_line_by_line(file_path):
         lines = [line.rstrip() for line in lines]
         return lines
 def write(lines, outfile, append=False):
     mode = "a" if append else "w"
     with open(outfile, mode, encoding="utf-8") as f:

halib/filetype/videofile.py CHANGED Viewed

@@ -1,11 +1,128 @@
+import os
 import cv2
-import textfile
 import enlighten
 from enum import Enum
-from ..system import filesys
 from tube_dl import Youtube, Playlist
 from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
+from . import textfile
+from . import csvfile
+from ..system import filesys
+class VideoUtils:
+    @staticmethod
+    def _default_meta_extractor(video_path):
+        """Default video metadata extractor function."""
+        # Open the video file
+        cap = cv2.VideoCapture(video_path)
+        # Check if the video was opened successfully
+        if not cap.isOpened():
+            print(f"Error: Could not open video file {video_path}")
+            return None
+        # Get the frame count
+        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        # Get the FPS
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        # get frame size
+        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        # Release the video capture object
+        cap.release()
+        meta_dict = {
+            "video_path": video_path,
+            "width": width,
+            "height": height,
+            "frame_count": frame_count,
+            "fps": fps,
+        }
+        return meta_dict
+    @staticmethod
+    def get_video_meta_dict(video_path, meta_dict_extractor_func=None):
+        assert os.path.exists(video_path), f"Video file {video_path} does not exist"
+        if meta_dict_extractor_func and callable(meta_dict_extractor_func):
+            assert (
+                meta_dict_extractor_func.__code__.co_argcount == 1
+            ), "meta_dict_extractor_func must take exactly one argument (video_path)"
+            meta_dict = meta_dict_extractor_func(video_path)
+            assert isinstance(
+                meta_dict, dict
+            ), "meta_dict_extractor_func must return a dictionary"
+            assert "video_path" in meta_dict, "meta_dict must contain 'video_path'"
+        else:
+            meta_dict = VideoUtils._default_meta_extractor(video_path=video_path)
+        return meta_dict
+    @staticmethod
+    def get_video_dir_meta_df(
+        video_dir,
+        video_exts=[".mp4", ".avi", ".mov", ".mkv"],
+        search_recursive=False,
+        csv_outfile=None,
+    ):
+        assert os.path.exists(video_dir), f"Video directory {video_dir} does not exist"
+        video_files = filesys.filter_files_by_extension(
+            video_dir, video_exts, recursive=search_recursive
+        )
+        assert (
+            len(video_files) > 0
+        ), f"No video files found in {video_dir} with extensions {video_exts}"
+        video_meta_list = []
+        for vfile in video_files:
+            meta_dict = VideoUtils.get_video_meta_dict(vfile)
+            if meta_dict:
+                video_meta_list.append(meta_dict)
+        dfmk = csvfile.DFCreator()
+        columns = list(video_meta_list[0].keys())
+        assert len(columns) > 0, "No video metadata found"
+        assert "video_path" in columns, "video_path column not found in video metadata"
+        # move video_path to the first column
+        columns.remove("video_path")
+        columns.insert(0, "video_path")
+        dfmk.create_table("video_meta", columns)
+        rows = [[meta[col] for col in columns] for meta in video_meta_list]
+        dfmk.insert_rows("video_meta", rows)
+        dfmk.fill_table_from_row_pool("video_meta")
+        if csv_outfile:
+            dfmk["video_meta"].to_csv(csv_outfile, index=False, sep=";")
+        return dfmk["video_meta"].copy()
+    # -----------------------------
+    # FFmpeg Horizontal Stack
+    # -----------------------------
+    @staticmethod
+    def hstack(video_files, output_file):
+        """Horizontally stack multiple videos using FFmpeg."""
+        tmp_file = "video_list.txt"
+        try:
+            with open(tmp_file, "w") as f:
+                for video in video_files:
+                    f.write(f"file '{video}'\n")
+            ffmpeg_cmd = (
+                f"ffmpeg -f concat -safe 0 -i {tmp_file} "
+                f'-filter_complex "[0:v][1:v][2:v]hstack=inputs={len(video_files)}[v]" '
+                f'-map "[v]" -c:v libx264 -preset fast -crf 22 {output_file}'
+            )
+            os.system(ffmpeg_cmd)
+            print(f"[INFO] Video stacked successfully: {output_file}")
+        except Exception as e:
+            print(f"[ERROR] Video stacking failed: {e}")
+        finally:
+            if os.path.exists(tmp_file):
+                os.remove(tmp_file)
 class VideoResolution(Enum):
     VR480p = "720x480"
@@ -57,7 +174,6 @@ def trim_video(source, destination, start_time, end_time):
 progress_bar = None
 def on_progress(bytes_done, total_bytes):
     global progress_bar
     if progress_bar is None:

halib/filetype/yamlfile.py CHANGED Viewed

@@ -2,10 +2,13 @@ import time
 import networkx as nx
 from rich import inspect
 from rich.pretty import pprint
-from omegaconf import OmegaConf
 from rich.console import Console
+from omegaconf import OmegaConf
 from argparse import ArgumentParser
+from ..system.path import *
 console = Console()
@@ -52,6 +55,18 @@ def load_yaml(yaml_file, to_dict=False, log_info=False):
         return omgconf
+def load_yaml_with_PC_abbr(
+    yaml_file, abbr_disk_map=ABBR_DISK_MAP
+):
+    # load yaml file
+    data_dict = load_yaml(yaml_file=yaml_file, to_dict=True)
+    # Normalize paths in the loaded data
+    data_dict = normalize_paths(
+        data_dict, get_working_disk(abbr_disk_map), get_os_platform()
+    )
+    return data_dict
 def parse_args():
     parser = ArgumentParser(description="desc text")
     parser.add_argument(

halib/online/projectmake.py CHANGED Viewed

@@ -1,17 +1,18 @@
 # coding=utf-8
-import json
 import os
+import json
+import pycurl
 import shutil
-from argparse import ArgumentParser
-from io import BytesIO
+import certifi
 import subprocess
+from io import BytesIO
+from argparse import ArgumentParser
-import certifi
-import pycurl
 from ..filetype import jsonfile
 from ..system import filesys
 def get_curl(url, user_and_pass, verbose=True):
     c = pycurl.Curl()
     c.setopt(pycurl.VERBOSE, verbose)

halib/online/tele_noti.py ADDED Viewed

@@ -0,0 +1,165 @@
+# Watch a log file and send a telegram message when train reaches a certain epoch or end
+import os
+import yaml
+import asyncio
+import telegram
+import pandas as pd
+from rich.pretty import pprint
+from rich.console import Console
+import plotly.graph_objects as go
+from ..system import filesys as fs
+from ..filetype import textfile, csvfile
+from argparse import ArgumentParser
+tele_console = Console()
+def parse_args():
+    parser = ArgumentParser(description="desc text")
+    parser.add_argument(
+        "-cfg",
+        "--cfg",
+        type=str,
+        help="yaml file for tele",
+        default=r"E:\Dev\__halib\halib\online\tele_noti_cfg.yaml",
+    )
+    return parser.parse_args()
+def get_watcher_message_df(target_file, num_last_lines):
+    file_ext = fs.get_file_name(target_file, split_file_ext=True)[1]
+    supported_ext = [".txt", ".log", ".csv"]
+    assert (
+        file_ext in supported_ext
+    ), f"File extension {file_ext} not supported. Supported extensions are {supported_ext}"
+    last_lines_df = None
+    if file_ext in [".txt", ".log"]:
+        lines = textfile.read_line_by_line(target_file)
+        if num_last_lines > len(lines):
+            num_last_lines = len(lines)
+        last_line_arr = lines[-num_last_lines:]
+        # add a line start with word "epoch"
+        epoch_info_list = "Epoch: n/a"
+        for line in reversed(lines):
+            if "epoch" in line.lower():
+                epoch_info_list = line
+                break
+        last_line_arr.insert(0, epoch_info_list)  # insert at the beginning
+        dfCreator = csvfile.DFCreator()
+        dfCreator.create_table("last_lines", ["line"])
+        last_line_arr = [[line] for line in last_line_arr]
+        dfCreator.insert_rows("last_lines", last_line_arr)
+        dfCreator.fill_table_from_row_pool("last_lines")
+        last_lines_df = dfCreator["last_lines"].copy()
+    else:
+        df = pd.read_csv(target_file)
+        num_rows = len(df)
+        if num_last_lines > num_rows:
+            num_last_lines = num_rows
+        last_lines_df = df.tail(num_last_lines)
+    return last_lines_df
+def df2img(df: pd.DataFrame, output_img_dir, decimal_places, out_img_scale):
+    df = df.round(decimal_places)
+    fig = go.Figure(
+        data=[
+            go.Table(
+                header=dict(values=list(df.columns), align="center"),
+                cells=dict(
+                    values=df.values.transpose(),
+                    fill_color=[["white", "lightgrey"] * df.shape[0]],
+                    align="center",
+                ),
+            )
+        ]
+    )
+    if not os.path.exists(output_img_dir):
+        os.makedirs(output_img_dir)
+    img_path = os.path.normpath(os.path.join(output_img_dir, "last_lines.png"))
+    fig.write_image(img_path, scale=out_img_scale)
+    return img_path
+def compose_message_and_img_path(
+    target_file, project, num_last_lines, decimal_places, out_img_scale, output_img_dir
+):
+    context_msg = f">> Project: {project} \n>> File: {target_file} \n>> Last {num_last_lines} lines:"
+    msg_df = get_watcher_message_df(target_file, num_last_lines)
+    try:
+        img_path = df2img(msg_df, output_img_dir, decimal_places, out_img_scale)
+    except Exception as e:
+        pprint(f"Error: {e}")
+        img_path = None
+    return context_msg, img_path
+async def send_to_telegram(cfg_dict, interval_in_sec):
+    # pprint(cfg_dict)
+    token = cfg_dict["telegram"]["token"]
+    chat_id = cfg_dict["telegram"]["chat_id"]
+    noti_settings = cfg_dict["noti_settings"]
+    project = noti_settings["project"]
+    target_file = noti_settings["target_file"]
+    num_last_lines = noti_settings["num_last_lines"]
+    output_img_dir = noti_settings["output_img_dir"]
+    decimal_places = noti_settings["decimal_places"]
+    out_img_scale = noti_settings["out_img_scale"]
+    bot = telegram.Bot(token=token)
+    async with bot:
+        try:
+            context_msg, img_path = compose_message_and_img_path(
+                target_file,
+                project,
+                num_last_lines,
+                decimal_places,
+                out_img_scale,
+                output_img_dir,
+            )
+            time_now = next_time = pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S")
+            sep_line = "-" * 50
+            context_msg = f"{sep_line}\n>> Time: {time_now}\n{context_msg}"
+            # calculate the next time to send message
+            next_time = pd.Timestamp.now() + pd.Timedelta(seconds=interval_in_sec)
+            next_time = next_time.strftime("%Y-%m-%d %H:%M:%S")
+            next_time_info = f"Next msg: {next_time}"
+            tele_console.rule()
+            tele_console.print("[green] Send message to telegram [/green]")
+            tele_console.print(
+                f"[red] Next message will be sent at <{next_time}> [/red]"
+            )
+            await bot.send_message(text=context_msg, chat_id=chat_id)
+            if img_path:
+                await bot.send_photo(chat_id=chat_id, photo=open(img_path, "rb"))
+            await bot.send_message(text=next_time_info, chat_id=chat_id)
+        except Exception as e:
+            pprint(f"Error: {e}")
+            pprint("Message not sent to telegram")
+async def run_forever(cfg_path):
+    cfg_dict = yaml.safe_load(open(cfg_path, "r"))
+    noti_settings = cfg_dict["noti_settings"]
+    interval_in_min = noti_settings["interval_in_min"]
+    interval_in_sec = int(interval_in_min * 60)
+    pprint(
+        f"Message will be sent every {interval_in_min} minutes or {interval_in_sec} seconds"
+    )
+    while True:
+        await send_to_telegram(cfg_dict, interval_in_sec)
+        await asyncio.sleep(interval_in_sec)
+async def main():
+    args = parse_args()
+    await run_forever(args.cfg)
+if __name__ == "__main__":
+    asyncio.run(main())

halib/research/base_exp.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
+from typing import Tuple, Any, Optional
 from ..research.base_config import ExpBaseConfig
 from ..research.perfcalc import PerfCalc
 from ..research.metrics import MetricsBackend
@@ -14,6 +14,8 @@ class BaseExperiment(PerfCalc, ABC):
     def __init__(self, config: ExpBaseConfig):
         self.config = config
         self.metric_backend = None
+        # Flag to track if init_general/prepare_dataset has run
+        self._is_env_ready = False
     # -----------------------
     # PerfCalc Required Methods
@@ -51,50 +53,105 @@ class BaseExperiment(PerfCalc, ABC):
         pass
     @abstractmethod
-    def exec_exp(self, *args, **kwargs):
+    def before_exec_exp_once(self, *args, **kwargs):
+        """Optional: any setup before exec_exp. Note this is called once per run_exp."""
+        pass
+    @abstractmethod
+    def exec_exp(self, *args, **kwargs) -> Optional[Tuple[Any, Any]]:
         """Run experiment process, e.g.: training/evaluation loop.
-        Return: raw_metrics_data, and extra_data as input for calc_and_save_exp_perfs
+        Return: either `None` or a tuple of (raw_metrics_data, extra_data) for calc_and_save_exp_perfs
         """
         pass
-    def eval_exp(self):
-        """Optional: re-run evaluation from saved results."""
+    @abstractmethod
+    def exec_eval(self, *args, **kwargs) -> Optional[Tuple[Any, Any]]:
+        """Run evaluation process.
+        Return: either `None` or a tuple of (raw_metrics_data, extra_data) for calc_and_save_exp_perfs
+        """
         pass
+    # -----------------------
+    # Internal Helpers
+    # -----------------------
+    def _validate_and_unpack(self, results):
+        if results is None:
+            return None
+        if not isinstance(results, (tuple, list)) or len(results) != 2:
+            raise ValueError("exec must return (metrics_data, extra_data)")
+        return results[0], results[1]
+    def _prepare_environment(self, force_reload: bool = False):
+        """
+        Common setup. Skips if already initialized, unless force_reload is True.
+        """
+        if self._is_env_ready and not force_reload:
+            # Environment is already prepared, skipping setup.
+            return
+        # 1. Run Setup
+        self.init_general(self.config.get_general_cfg())
+        self.prepare_dataset(self.config.get_dataset_cfg())
+        # 2. Update metric backend (refresh if needed)
+        self.metric_backend = self.prepare_metrics(self.config.get_metric_cfg())
+        # 3. Mark as ready
+        self._is_env_ready = True
     # -----------------------
     # Main Experiment Runner
     # -----------------------
-    def run_exp(self, do_calc_metrics=True, *args, **kwargs):
+    def run_exp(self, should_calc_metrics=True, reload_env=False, *args, **kwargs):
         """
         Run the whole experiment pipeline.
-        Params:
+        :param reload_env: If True, forces dataset/general init to run again.
+        :param should_calc_metrics: Whether to calculate and save metrics after execution.
+        :kwargs Params:
             + 'outfile' to save csv file results,
             + 'outdir' to set output directory for experiment results.
             + 'return_df' to return a DataFrame of results instead of a dictionary.
         Full pipeline:
             1. Init
-            2. Dataset
-            3. Metrics Preparation
-            4. Save Config
-            5. Execute
-            6. Calculate & Save Metrics
+            2. Prepare Environment (General + Dataset + Metrics)
+            3. Save Config
+            4. Execute
+            5. Calculate & Save Metrics
         """
-        self.init_general(self.config.get_general_cfg())
-        self.prepare_dataset(self.config.get_dataset_cfg())
-        self.prepare_metrics(self.config.get_metric_cfg())
+        self._prepare_environment(force_reload=reload_env)
+        # Any pre-exec setup (loading models, etc)
+        self.before_exec_exp_once(*args, **kwargs)
         # Save config before running
         self.config.save_to_outdir()
         # Execute experiment
         results = self.exec_exp(*args, **kwargs)
-        if do_calc_metrics:
-            metrics_data, extra_data = results
+        if should_calc_metrics and results is not None:
+            metrics_data, extra_data = self._validate_and_unpack(results)
             # Calculate & Save metrics
-            perf_results = self.calc_and_save_exp_perfs(
+            perf_results = self.calc_perfs(
                 raw_metrics_data=metrics_data, extra_data=extra_data, *args, **kwargs
             )
             return perf_results
         else:
             return results
+    # -----------------------
+    # Main Experiment Evaluator
+    # -----------------------
+    def eval_exp(self, reload_env=False, *args, **kwargs):
+        """
+        Run evaluation only.
+        :param reload_env: If True, forces dataset/general init to run again.
+        """
+        self._prepare_environment(force_reload=reload_env)
+        results = self.exec_eval(*args, **kwargs)
+        if results is not None:
+            metrics_data, extra_data = self._validate_and_unpack(results)
+            return self.calc_perfs(
+                raw_metrics_data=metrics_data, extra_data=extra_data, *args, **kwargs
+            )
+        return None

halib/research/core/__init__.py ADDED Viewed

File without changes

halib 0.1.91__py3-none-any.whl → 0.2.21__py3-none-any.whl

halib 0.1.91py3-none-any.whl → 0.2.21py3-none-any.whl