PyPI - halib - Versions diffs - 0.1.89__tar.gz → 0.2.13__tar.gz - Mend

halib 0.1.89tar.gz → 0.2.13tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

{halib-0.1.89 → halib-0.2.13}/.gitignore RENAMED Viewed

@@ -50,7 +50,6 @@ Thumbs.db
 build
 dist
-data
 venv*/

halib-0.2.13/MANIFEST.in ADDED Viewed

@@ -0,0 +1,5 @@
+prune _archived
+prune test
+prune zout
+exclude *.toml
+include halib/system/_list_pc.csv

{halib-0.1.89 → halib-0.2.13}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: halib
-Version: 0.1.89
+Version: 0.2.13
 Summary: Small library for common tasks
 Author: Hoang Van Ha
 Author-email: hoangvanhauit@gmail.com
@@ -40,6 +40,7 @@ Requires-Dist: timebudget
 Requires-Dist: tqdm
 Requires-Dist: tube_dl
 Requires-Dist: wandb
+Requires-Dist: ipynbname
 Dynamic: author
 Dynamic: author-email
 Dynamic: classifier
@@ -50,9 +51,26 @@ Dynamic: requires-dist
 Dynamic: requires-python
 Dynamic: summary
-Helper package for coding and automation
+# Helper package for coding and automation
-**Version 0.1.89**
+**Version 0.2.13**
++ reorganize packages with most changes in `research` package; also rename `research` to `exp` (package for experiment management and utilities)
++ update `exp/perfcalc.py` to allow save computed performance to csv file (without explicit calling method `calc_perfs`)
+**Version 0.2.1**
++ `research/base_exp`: add `eval_exp` method to evaluate experiment (e.g., model evaluation on test set) after experiment running is done.
+**Version 0.1.99**
++ `filetype/ipynb`: add `gen_ipynb_name` generator to create file name based on current notebook name as prefix (with optional timestamp)
+**Version 0.1.96**
++ `research/plot`: add `PlotHelper` class to plot train history + plot grid of images (e.g., image samples from dataset or model outputs)
+**Version 0.1.91**
++ `research/param_gen`: add `ParamGen` class to generate parameter list from yaml file for hyperparameter search (grid search, random search, etc.)
+**Version 0.1.90**
 + `research/profiler`: add `zProfiler` class to measure execution time of contexts and steps, with support for dynamic color scales in plots.

{halib-0.1.89 → halib-0.2.13}/README.md RENAMED Viewed

@@ -1,6 +1,23 @@
-Helper package for coding and automation
+# Helper package for coding and automation
-**Version 0.1.89**
+**Version 0.2.13**
++ reorganize packages with most changes in `research` package; also rename `research` to `exp` (package for experiment management and utilities)
++ update `exp/perfcalc.py` to allow save computed performance to csv file (without explicit calling method `calc_perfs`)
+**Version 0.2.1**
++ `research/base_exp`: add `eval_exp` method to evaluate experiment (e.g., model evaluation on test set) after experiment running is done.
+**Version 0.1.99**
++ `filetype/ipynb`: add `gen_ipynb_name` generator to create file name based on current notebook name as prefix (with optional timestamp)
+**Version 0.1.96**
++ `research/plot`: add `PlotHelper` class to plot train history + plot grid of images (e.g., image samples from dataset or model outputs)
+**Version 0.1.91**
++ `research/param_gen`: add `ParamGen` class to generate parameter list from yaml file for hyperparameter search (grid search, random search, etc.)
+**Version 0.1.90**
 + `research/profiler`: add `zProfiler` class to measure execution time of contexts and steps, with support for dynamic color scales in plots.

{halib-0.1.89 → halib-0.2.13}/halib/__init__.py RENAMED Viewed

@@ -56,8 +56,7 @@ from .filetype.yamlfile import load_yaml
 from .system import cmd
 from .system import filesys as fs
 from .filetype import csvfile
-from .cuda import tcuda
-from .common import (
+from .common.common import (
     console,
     console_log,
     ConsoleLog,
@@ -65,6 +64,7 @@ from .common import (
     norm_str,
     pprint_box,
     pprint_local_path,
+    tcuda
 )
 # for log
@@ -76,7 +76,7 @@ from timebudget import timebudget
 import omegaconf
 from omegaconf import OmegaConf
 from omegaconf.dictconfig import DictConfig
-from .rich_color import rcolor_str, rcolor_palette, rcolor_palette_all, rcolor_all_str
+from .common.rich_color import rcolor_str, rcolor_palette, rcolor_palette_all, rcolor_all_str
 # for visualization
 import seaborn as sns

halib-0.2.13/halib/common/common.py ADDED Viewed

@@ -0,0 +1,178 @@
+import os
+import re
+import arrow
+import importlib
+import rich
+from rich import print
+from rich.panel import Panel
+from rich.console import Console
+from rich.pretty import pprint, Pretty
+from pathlib import Path, PureWindowsPath
+console = Console()
+def seed_everything(seed=42):
+    import random
+    import numpy as np
+    random.seed(seed)
+    np.random.seed(seed)
+    # import torch if it is available
+    try:
+        import torch
+        torch.manual_seed(seed)
+        torch.cuda.manual_seed(seed)
+        torch.cuda.manual_seed_all(seed)
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cudnn.benchmark = False
+    except ImportError:
+        pprint("torch not imported, skipping torch seed_everything")
+        pass
+def now_str(sep_date_time="."):
+    assert sep_date_time in [
+        ".",
+        "_",
+        "-",
+    ], "sep_date_time must be one of '.', '_', or '-'"
+    now_string = arrow.now().format(f"YYYYMMDD{sep_date_time}HHmmss")
+    return now_string
+def norm_str(in_str):
+    # Replace one or more whitespace characters with a single underscore
+    norm_string = re.sub(r"\s+", "_", in_str)
+    # Remove leading and trailing spaces
+    norm_string = norm_string.strip()
+    return norm_string
+def pprint_box(obj, title="", border_style="green"):
+    """
+    Pretty print an object in a box.
+    """
+    rich.print(
+        Panel(Pretty(obj, expand_all=True), title=title, border_style=border_style)
+    )
+def console_rule(msg, do_norm_msg=True, is_end_tag=False):
+    msg = norm_str(msg) if do_norm_msg else msg
+    if is_end_tag:
+        console.rule(f"</{msg}>")
+    else:
+        console.rule(f"<{msg}>")
+def console_log(func):
+    def wrapper(*args, **kwargs):
+        console_rule(func.__name__)
+        result = func(*args, **kwargs)
+        console_rule(func.__name__, is_end_tag=True)
+        return result
+    return wrapper
+class ConsoleLog:
+    def __init__(self, message):
+        self.message = message
+    def __enter__(self):
+        console_rule(self.message)
+        return self
+    def __exit__(self, exc_type, exc_value, traceback):
+        console_rule(self.message, is_end_tag=True)
+        if exc_type is not None:
+            print(f"An exception of type {exc_type} occurred.")
+            print(f"Exception message: {exc_value}")
+def linux_to_wins_path(path: str) -> str:
+    """
+    Convert a Linux-style WSL path (/mnt/c/... or /mnt/d/...) to a Windows-style path (C:\...).
+    """
+    # Handle only /mnt/<drive>/... style
+    if (
+        path.startswith("/mnt/")
+        and len(path) > 6
+        and path[5].isalpha()
+        and path[6] == "/"
+    ):
+        drive = path[5].upper()  # Extract drive letter
+        win_path = f"{drive}:{path[6:]}"  # Replace "/mnt/c/" with "C:/"
+    else:
+        win_path = path  # Return unchanged if not a WSL-style path
+    # Normalize to Windows-style backslashes
+    return str(PureWindowsPath(win_path))
+def pprint_local_path(
+    local_path: str, get_wins_path: bool = False, tag: str = ""
+) -> str:
+    """
+    Pretty-print a local path with emoji and clickable file:// URI.
+    Args:
+        local_path: Path to file or directory (Linux or Windows style).
+        get_wins_path: If True on Linux, convert WSL-style path to Windows style before printing.
+        tag: Optional console log tag.
+    Returns:
+        The file URI string.
+    """
+    p = Path(local_path).resolve()
+    type_str = "📄" if p.is_file() else "📁" if p.is_dir() else "❓"
+    if get_wins_path and os.name == "posix":
+        # Try WSL → Windows conversion
+        converted = linux_to_wins_path(str(p))
+        if converted != str(p):  # Conversion happened
+            file_uri = str(PureWindowsPath(converted).as_uri())
+        else:
+            file_uri = p.as_uri()
+    else:
+        file_uri = p.as_uri()
+    content_str = f"{type_str} [link={file_uri}]{file_uri}[/link]"
+    if tag:
+        with ConsoleLog(tag):
+            console.print(content_str)
+    else:
+        console.print(content_str)
+    return file_uri
+def tcuda():
+    NOT_INSTALLED = "Not Installed"
+    GPU_AVAILABLE = "GPU(s) Available"
+    ls_lib = ["torch", "tensorflow"]
+    lib_stats = {lib: NOT_INSTALLED for lib in ls_lib}
+    for lib in ls_lib:
+        spec = importlib.util.find_spec(lib)
+        if spec:
+            if lib == "torch":
+                import torch
+                lib_stats[lib] = str(torch.cuda.device_count()) + " " + GPU_AVAILABLE
+            elif lib == "tensorflow":
+                import tensorflow as tf
+                lib_stats[lib] = (
+                    str(len(tf.config.list_physical_devices("GPU")))
+                    + " "
+                    + GPU_AVAILABLE
+                )
+    console.rule("<CUDA Library Stats>")
+    pprint(lib_stats)
+    console.rule("</CUDA Library Stats>")
+    return lib_stats

halib-0.2.13/halib/exp/core/base_config.py ADDED Viewed

@@ -0,0 +1,167 @@
+import os
+from rich.pretty import pprint
+from abc import ABC, abstractmethod
+from typing import List, Optional, TypeVar, Generic
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from dataclass_wizard import YAMLWizard
+class NamedCfg(ABC):
+    """
+    Base class for named configurations.
+    All configurations should have a name.
+    """
+    @abstractmethod
+    def get_name(self):
+        """
+        Get the name of the configuration.
+        This method should be implemented in subclasses.
+        """
+        pass
+@dataclass
+class AutoNamedCfg(YAMLWizard, NamedCfg):
+    """
+    Mixin that automatically implements get_name() by returning self.name.
+    Classes using this MUST have a 'name' field.
+    """
+    name: Optional[str] = None
+    def get_name(self):
+        return self.name
+    def __post_init__(self):
+        # Enforce the "MUST" rule here
+        if self.name is None:
+            # We allow None during initial load, but it must be set before usage
+            # or handled by the loader.
+            pass
+T = TypeVar("T", bound=AutoNamedCfg)
+class BaseSelectorCfg(Generic[T]):
+    """
+    Base class to handle the logic of selecting an item from a list by name.
+    """
+    def _resolve_selection(self, items: List[T], selected_name: str, context: str) -> T:
+        if selected_name is None:
+            raise ValueError(f"No {context} selected in the configuration.")
+        # Create a lookup dict for O(1) access, or just iterate if list is short
+        for item in items:
+            if item.name == selected_name:
+                return item
+        raise ValueError(
+            f"{context.capitalize()} '{selected_name}' not found in the configuration list."
+        )
+class ExpBaseCfg(ABC, YAMLWizard):
+    """
+    Base class for configuration objects.
+    What a cfg class must have:
+    1 - a dataset cfg
+    2 - a metric cfg
+    3 - a method cfg
+    """
+    cfg_name: Optional[str] = None
+    # Save to yaml fil
+    def save_to_outdir(
+        self, filename: str = "__config.yaml", outdir=None, override: bool = False
+    ) -> None:
+        """
+        Save the configuration to the output directory.
+        """
+        if outdir is not None:
+            output_dir = outdir
+        else:
+            output_dir = self.get_outdir()
+        os.makedirs(output_dir, exist_ok=True)
+        assert (output_dir is not None) and (
+            os.path.isdir(output_dir)
+        ), f"Output directory '{output_dir}' does not exist or is not a directory."
+        file_path = os.path.join(output_dir, filename)
+        if os.path.exists(file_path) and not override:
+            pprint(
+                f"File '{file_path}' already exists. Use 'override=True' to overwrite."
+            )
+        else:
+            # method of YAMLWizard to_yaml_file
+            self.to_yaml_file(file_path)
+    @classmethod
+    @abstractmethod
+    # load from a custom YAML file
+    def from_custom_yaml_file(cls, yaml_file: str):
+        """Load a configuration from a custom YAML file."""
+        pass
+    def get_cfg_name(self, sep: str = "__", *args, **kwargs) -> str:
+        # auto get the config name from dataset, method, metric
+        # 2. Generate the canonical Config Name
+        name_parts = []
+        general_info = self.get_general_cfg().get_name()
+        dataset_info = self.get_dataset_cfg().get_name()
+        method_info = self.get_method_cfg().get_name()
+        name_parts = [
+            general_info,
+            f"ds_{dataset_info}",
+            f"mt_{method_info}",
+        ]
+        if "extra" in kwargs:
+            extra_info = kwargs["extra"]
+            assert isinstance(extra_info, str), "'extra' kwarg must be a string."
+            name_parts.append(extra_info)
+        self.cfg_name = sep.join(name_parts)
+        return self.cfg_name
+    @abstractmethod
+    def get_outdir(self):
+        """
+        Get the output directory for the configuration.
+        This method should be implemented in subclasses.
+        """
+        return None
+    @abstractmethod
+    def get_general_cfg(self) -> NamedCfg:
+        """
+        Get the general configuration like output directory, log settings, SEED, etc.
+        This method should be implemented in subclasses.
+        """
+        pass
+    @abstractmethod
+    def get_dataset_cfg(self) -> NamedCfg:
+        """
+        Get the dataset configuration.
+        This method should be implemented in subclasses.
+        """
+        pass
+    @abstractmethod
+    def get_method_cfg(self) -> NamedCfg:
+        """
+        Get the method configuration.
+        This method should be implemented in subclasses.
+        """
+        pass
+    @abstractmethod
+    def get_metric_cfg(self) -> NamedCfg:
+        """
+        Get the metric configuration.
+        This method should be implemented in subclasses.
+        """
+        pass

halib-0.2.13/halib/exp/core/base_exp.py ADDED Viewed

@@ -0,0 +1,147 @@
+from abc import ABC, abstractmethod
+from typing import Tuple, Any, Optional
+from .base_config import ExpBaseCfg
+from ..perf.perfcalc import PerfCalc
+from ..perf.perfmetrics import MetricsBackend
+class ExpHook:
+    """Base interface for all experiment hooks."""
+    def on_before_run(self, exp): pass
+    def on_after_run(self, exp, results): pass
+# ! SEE https://github.com/hahv/base_exp for sample usage
+class BaseExp(PerfCalc, ABC):
+    """
+    Base class for experiments.
+    Orchestrates the experiment pipeline using a pluggable metrics backend.
+    """
+    def __init__(self, config: ExpBaseCfg):
+        self.config = config
+        self.metric_backend = None
+        # Flag to track if init_general/prepare_dataset has run
+        self._is_env_ready = False
+        self.hooks = []
+    def register_hook(self, hook: ExpHook):
+        self.hooks.append(hook)
+    def _trigger_hooks(self, method_name: str, *args, **kwargs):
+        for hook in self.hooks:
+            method = getattr(hook, method_name, None)
+            if callable(method):
+                method(*args, **kwargs)
+    # -----------------------
+    # PerfCalc Required Methods
+    # -----------------------
+    def get_dataset_name(self):
+        return self.config.get_dataset_cfg().get_name()
+    def get_experiment_name(self):
+        return self.config.get_cfg_name()
+    def get_metric_backend(self):
+        if not self.metric_backend:
+            self.metric_backend = self.prepare_metrics(self.config.get_metric_cfg())
+        return self.metric_backend
+    # -----------------------
+    # Abstract Experiment Steps
+    # -----------------------
+    @abstractmethod
+    def init_general(self, general_cfg):
+        """Setup general settings like SEED, logging, env variables."""
+        pass
+    @abstractmethod
+    def prepare_dataset(self, dataset_cfg):
+        """Load/prepare dataset."""
+        pass
+    @abstractmethod
+    def prepare_metrics(self, metric_cfg) -> MetricsBackend:
+        """
+        Prepare the metrics for the experiment.
+        This method should be implemented in subclasses.
+        """
+        pass
+    @abstractmethod
+    def exec_exp(self, *args, **kwargs) -> Optional[Tuple[Any, Any]]:
+        """Run experiment process, e.g.: training/evaluation loop.
+        Return: either `None` or a tuple of (raw_metrics_data, extra_data) for calc_and_save_exp_perfs
+        """
+        pass
+    # -----------------------
+    # Internal Helpers
+    # -----------------------
+    def _validate_and_unpack(self, results):
+        if results is None:
+            return None
+        if not isinstance(results, (tuple, list)) or len(results) != 2:
+            raise ValueError("exec must return (metrics_data, extra_data)")
+        return results[0], results[1]
+    def _prepare_environment(self, force_reload: bool = False):
+        """
+        Common setup. Skips if already initialized, unless force_reload is True.
+        """
+        if self._is_env_ready and not force_reload:
+            # Environment is already prepared, skipping setup.
+            return
+        # 1. Run Setup
+        self.init_general(self.config.get_general_cfg())
+        self.prepare_dataset(self.config.get_dataset_cfg())
+        # 2. Update metric backend (refresh if needed)
+        self.metric_backend = self.prepare_metrics(self.config.get_metric_cfg())
+        # 3. Mark as ready
+        self._is_env_ready = True
+    # -----------------------
+    # Main Experiment Runner
+    # -----------------------
+    def run_exp(self, should_calc_metrics=True, reload_env=False, *args, **kwargs):
+        """
+        Run the whole experiment pipeline.
+        :param reload_env: If True, forces dataset/general init to run again.
+        :param should_calc_metrics: Whether to calculate and save metrics after execution.
+        :kwargs Params:
+            + 'outfile' to save csv file results,
+            + 'outdir' to set output directory for experiment results.
+            + 'return_df' to return a DataFrame of results instead of a dictionary.
+        Full pipeline:
+            1. Init
+            2. Prepare Environment (General + Dataset + Metrics)
+            3. Save Config
+            4. Execute
+            5. Calculate & Save Metrics
+        """
+        self._prepare_environment(force_reload=reload_env)
+        self._trigger_hooks("before_run", self)
+        # Save config before running
+        self.config.save_to_outdir()
+        # Execute experiment
+        results = self.exec_exp(*args, **kwargs)
+        if should_calc_metrics and results is not None:
+            metrics_data, extra_data = self._validate_and_unpack(results)
+            # Calculate & Save metrics
+            perf_results = self.calc_perfs(
+                raw_metrics_data=metrics_data, extra_data=extra_data, *args, **kwargs
+            )
+            self._trigger_hooks("after_run", self, perf_results)
+            return perf_results
+        else:
+            self._trigger_hooks("after_run", self, results)
+            return results

halib 0.1.89__tar.gz → 0.2.13__tar.gz

halib 0.1.89tar.gz → 0.2.13tar.gz