PyPI - halib - Versions diffs - 0.1.47__tar.gz → 0.1.49__tar.gz - Mend

halib 0.1.47tar.gz → 0.1.49tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

{halib-0.1.47/halib.egg-info → halib-0.1.49}/PKG-INFO RENAMED Viewed

@@ -1,20 +1,52 @@
 Metadata-Version: 2.1
 Name: halib
-Version: 0.1.47
+Version: 0.1.49
 Summary: Small library for common tasks
 Author: Hoang Van Ha
 Author-email: hoangvanhauit@gmail.com
-License: UNKNOWN
-Platform: UNKNOWN
 Classifier: Programming Language :: Python :: 3
 Classifier: License :: OSI Approved :: MIT License
 Classifier: Operating System :: OS Independent
 Requires-Python: >=3.8
 Description-Content-Type: text/markdown
 License-File: LICENSE.txt
+Requires-Dist: arrow
+Requires-Dist: click
+Requires-Dist: enlighten
+Requires-Dist: kaleido==0.1.*
+Requires-Dist: loguru
+Requires-Dist: more-itertools
+Requires-Dist: moviepy
+Requires-Dist: networkx
+Requires-Dist: numpy
+Requires-Dist: omegaconf
+Requires-Dist: opencv-python
+Requires-Dist: pandas
+Requires-Dist: Pillow
+Requires-Dist: Pyarrow
+Requires-Dist: pycurl
+Requires-Dist: python-telegram-bot
+Requires-Dist: requests
+Requires-Dist: rich
+Requires-Dist: scikit-learn
+Requires-Dist: matplotlib
+Requires-Dist: seaborn
+Requires-Dist: plotly
+Requires-Dist: pygwalker
+Requires-Dist: tabulate
+Requires-Dist: itables
+Requires-Dist: timebudget
+Requires-Dist: tqdm
+Requires-Dist: tube_dl
+Requires-Dist: wandb
 Helper package for coding and automation
+**Version 0.1.49**
++ add `research` package to help with research tasks, including `benchquery` for benchmarking queries from dataframe
++ add `wandb` module to allow easy sync offline data to Weights & Biases (wandb) and batch clear wandb runs.
 **Version 0.1.47**
 + add `pprint_box` to print object/string in a box frame (like in `inspect`)
@@ -122,5 +154,3 @@ New Features
 New Features
 + add support to upload local to google drive.

{halib-0.1.47 → halib-0.1.49}/README.md RENAMED Viewed

@@ -1,5 +1,10 @@
 Helper package for coding and automation
+**Version 0.1.49**
++ add `research` package to help with research tasks, including `benchquery` for benchmarking queries from dataframe
++ add `wandb` module to allow easy sync offline data to Weights & Biases (wandb) and batch clear wandb runs.
 **Version 0.1.47**
 + add `pprint_box` to print object/string in a box frame (like in `inspect`)

halib-0.1.49/halib/research/benchquery.py ADDED Viewed

@@ -0,0 +1,131 @@
+import pandas as pd
+from rich.pretty import pprint
+from argparse import ArgumentParser
+def cols_to_col_groups(df):
+    columns = list(df.columns)
+    # pprint(columns)
+    col_groups = []
+    current_group = []
+    def have_unnamed(col_group):
+        return any("unnamed" in col.lower() for col in col_group)
+    for i, col in enumerate(columns):
+        # Add the first column to the current group
+        if not current_group:
+            current_group.append(col)
+            continue
+        prev_col = columns[i - 1]
+        # Check if current column is "unnamed" or shares base name with previous
+        # Assuming "equal" means same base name (before any suffix like '_1')
+        base_prev = (
+            prev_col.split("_")[0].lower() if "_" in prev_col else prev_col.lower()
+        )
+        base_col = col.split("_")[0].lower() if "_" in col else col.lower()
+        is_unnamed = "unnamed" in col.lower()
+        is_equal = base_col == base_prev
+        if is_unnamed or is_equal:
+            # Add to current group
+            current_group.append(col)
+        else:
+            # Start a new group
+            col_groups.append(current_group)
+            current_group = [col]
+    # Append the last group
+    if current_group:
+        col_groups.append(current_group)
+    meta_dict = {"common_cols": [], "db_cols": []}
+    for group in col_groups:
+        if not have_unnamed(group):
+            meta_dict["common_cols"].extend(group)
+        else:
+            # find the first unnamed column
+            named_col = next(
+                (col for col in group if "unnamed" not in col.lower()), None
+            )
+            group_cols = [f"{named_col}_{i}" for i in range(len(group))]
+            meta_dict["db_cols"].extend(group_cols)
+    return meta_dict
+# def bech_by_db_name(df, db_list="db1, db2", key_metrics="p, r, f1, acc"):
+def str_2_list(input_str, sep=","):
+    out_ls = []
+    if len(input_str.strip()) == 0:
+        return out_ls
+    if sep not in input_str:
+        out_ls.append(input_str.strip())
+        return out_ls
+    else:
+        out_ls = [item.strip() for item in input_str.split(sep) if item.strip()]
+        return out_ls
+def filter_bech_df_by_db_and_metrics(df, db_list="", key_metrics=""):
+    meta_cols_dict = cols_to_col_groups(df)
+    op_df = df.copy()
+    op_df.columns = (
+        meta_cols_dict["common_cols"].copy() + meta_cols_dict["db_cols"].copy()
+    )
+    filterd_cols = []
+    filterd_cols.extend(meta_cols_dict["common_cols"])
+    selected_db_list = str_2_list(db_list)
+    db_filted_cols = []
+    if len(selected_db_list) > 0:
+        for db_name in db_list.split(","):
+            db_name = db_name.strip()
+            for col_name in meta_cols_dict["db_cols"]:
+                if db_name.lower() in col_name.lower():
+                    db_filted_cols.append(col_name)
+    else:
+        db_filted_cols = meta_cols_dict["db_cols"]
+    filterd_cols.extend(db_filted_cols)
+    df_filtered = op_df[filterd_cols].copy()
+    df_filtered
+    selected_metrics_ls = str_2_list(key_metrics)
+    if len(selected_metrics_ls) > 0:
+        # get the second row as metrics row (header)
+        metrics_row = df_filtered.iloc[0].copy()
+        # only get the values in columns in (db_filterd_cols)
+        metrics_values = metrics_row[db_filted_cols].values
+        keep_metrics_cols = []
+        # create a zip of db_filted_cols and metrics_values (in that metrics_row)
+        metrics_list = list(zip(metrics_values, db_filted_cols))
+        selected_metrics_ls = [metric.strip().lower() for metric in selected_metrics_ls]
+        for metric, col_name in metrics_list:
+            if metric.lower() in selected_metrics_ls:
+                keep_metrics_cols.append(col_name)
+    else:
+        pprint("No metrics selected, keeping all db columns")
+        keep_metrics_cols = db_filted_cols
+    final_filterd_cols = meta_cols_dict["common_cols"].copy() + keep_metrics_cols
+    df_final = df_filtered[final_filterd_cols].copy()
+    return df_final
+def parse_args():
+    parser = ArgumentParser(
+        description="desc text")
+    parser.add_argument('-csv', '--csv', type=str, help='CSV file path', default=r"E:\Dev\__halib\test\bench.csv")
+    return parser.parse_args()
+def main():
+    args = parse_args()
+    csv_file = args.csv
+    df = pd.read_csv(csv_file, sep=";", encoding="utf-8")
+    filtered_df = filter_bech_df_by_db_and_metrics(df, "bowfire", "acc")
+    print(filtered_df)
+if __name__ == "__main__":
+    main()

{halib-0.1.47/halib → halib-0.1.49/halib/research}/plot.py RENAMED Viewed

@@ -1,6 +1,6 @@
-from .common import now_str, norm_str, ConsoleLog
-from .filetype import csvfile
-from .system import filesys as fs
+from ..common import now_str, norm_str, ConsoleLog
+from ..filetype import csvfile
+from ..system import filesys as fs
 from functools import partial
 from rich.console import Console
 from rich.pretty import pprint

{halib-0.1.47/halib → halib-0.1.49/halib/research}/torchloader.py RENAMED Viewed

@@ -6,9 +6,9 @@
  * @desc this module works as a utility tools for finding the best configuration for dataloader (num_workers, batch_size, pin_menory, etc.) that fits your hardware.
 """
 from argparse import ArgumentParser
-from .common import *
-from .filetype import csvfile
-from .filetype.yamlfile import load_yaml
+from ..common import *
+from ..filetype import csvfile
+from ..filetype.yamlfile import load_yaml
 from rich import inspect
 from torch.utils.data import DataLoader
 from torchvision import datasets, transforms

halib-0.1.49/halib/research/wandb_op.py ADDED Viewed

@@ -0,0 +1,116 @@
+import glob
+from rich.pretty import pprint
+import os
+import subprocess
+import argparse
+import wandb
+from tqdm import tqdm
+from rich.console import Console
+console = Console()
+def sync_runs(outdir):
+    outdir = os.path.abspath(outdir)
+    assert os.path.exists(outdir), f"Output directory {outdir} does not exist."
+    sub_dirs = [name for name in os.listdir(outdir) if os.path.isdir(os.path.join(outdir, name))]
+    assert len(sub_dirs) > 0, f"No subdirectories found in {outdir}."
+    console.rule("Parent Directory")
+    console.print(f"[yellow]{outdir}[/yellow]")
+    exp_dirs = [os.path.join(outdir, sub_dir) for sub_dir in sub_dirs]
+    wandb_dirs = []
+    for exp_dir in exp_dirs:
+        wandb_dirs.extend(glob.glob(f"{exp_dir}/wandb/*run-*"))
+    if len(wandb_dirs) == 0:
+        console.print(f"No wandb runs found in {outdir}.")
+        return
+    else:
+        console.print(f"Found [bold]{len(wandb_dirs)}[/bold] wandb runs in {outdir}.")
+        for i, wandb_dir in enumerate(wandb_dirs):
+            console.rule(f"Syncing wandb run {i + 1}/{len(wandb_dirs)}")
+            console.print(f"Syncing: {wandb_dir}")
+            process = subprocess.Popen(
+                ["wandb", "sync", wandb_dir],
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+                text=True,
+            )
+            for line in process.stdout:
+                console.print(line.strip())
+                if " ERROR Error while calling W&B API" in line:
+                    break
+            process.stdout.close()
+            process.wait()
+            if process.returncode != 0:
+                console.print(f"[red]Error syncing {wandb_dir}. Return code: {process.returncode}[/red]")
+            else:
+                console.print(f"Successfully synced {wandb_dir}.")
+def delete_runs(project, pattern=None):
+    console.rule("Delete W&B Runs")
+    confirm_msg = f"Are you sure you want to delete all runs in"
+    confirm_msg += f" \n\tproject: [red]{project}[/red]"
+    if pattern:
+        confirm_msg += f"\n\tpattern: [blue]{pattern}[/blue]"
+    console.print(confirm_msg)
+    confirmation = input(f"This action cannot be undone. [y/N]: ").strip().lower()
+    if confirmation != "y":
+        print("Cancelled.")
+        return
+    print("Confirmed. Proceeding...")
+    api = wandb.Api()
+    runs = api.runs(project)
+    deleted = 0
+    console.rule("Deleting W&B Runs")
+    if len(runs) == 0:
+        print("No runs found in the project.")
+        return
+    for run in tqdm(runs):
+        if pattern is None or pattern in run.name:
+            run.delete()
+            console.print(f"Deleted run: [red]{run.name}[/red]")
+            deleted += 1
+    console.print(f"Total runs deleted: {deleted}")
+def valid_argument(args):
+    if args.op == "sync":
+        assert os.path.exists(args.outdir), f"Output directory {args.outdir} does not exist."
+    elif args.op == "delete":
+        assert isinstance(args.project, str) and len(args.project.strip()) > 0, "Project name must be a non-empty string."
+    else:
+        raise ValueError(f"Unknown operation: {args.op}")
+def parse_args():
+    parser = argparse.ArgumentParser(description="Operations on W&B runs")
+    parser.add_argument("-op", "--op", type=str, help="Operation to perform", default="sync", choices=["delete", "sync"])
+    parser.add_argument("-prj", "--project", type=str, default="fire-paper2-2025", help="W&B project name")
+    parser.add_argument("-outdir", "--outdir", type=str, help="arg1 description", default="./zout/train")
+    parser.add_argument("-pt", "--pattern",
+        type=str,
+        default=None,
+        help="Run name pattern to match for deletion",
+    )
+    return parser.parse_args()
+def main():
+    args = parse_args()
+    # Validate arguments, stop if invalid
+    valid_argument(args)
+    op = args.op
+    if op == "sync":
+        sync_runs(args.outdir)
+    elif op == "delete":
+        delete_runs(args.project, args.pattern)
+    else:
+        raise ValueError(f"Unknown operation: {op}")
+if __name__ == "__main__":
+    main()

halib-0.1.49/halib/system/__init__.py ADDED Viewed

File without changes

halib-0.1.49/halib/utils/__init__.py ADDED Viewed

File without changes

{halib-0.1.47/halib → halib-0.1.49/halib/utils}/tele_noti.py RENAMED Viewed

@@ -10,8 +10,8 @@ from rich.pretty import pprint
 from rich.console import Console
 import plotly.graph_objects as go
-from .system import filesys as fs
-from .filetype import textfile, csvfile
+from ..system import filesys as fs
+from ..filetype import textfile, csvfile
 from argparse import ArgumentParser

{halib-0.1.47 → halib-0.1.49/halib.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,20 +1,52 @@
 Metadata-Version: 2.1
 Name: halib
-Version: 0.1.47
+Version: 0.1.49
 Summary: Small library for common tasks
 Author: Hoang Van Ha
 Author-email: hoangvanhauit@gmail.com
-License: UNKNOWN
-Platform: UNKNOWN
 Classifier: Programming Language :: Python :: 3
 Classifier: License :: OSI Approved :: MIT License
 Classifier: Operating System :: OS Independent
 Requires-Python: >=3.8
 Description-Content-Type: text/markdown
 License-File: LICENSE.txt
+Requires-Dist: arrow
+Requires-Dist: click
+Requires-Dist: enlighten
+Requires-Dist: kaleido==0.1.*
+Requires-Dist: loguru
+Requires-Dist: more-itertools
+Requires-Dist: moviepy
+Requires-Dist: networkx
+Requires-Dist: numpy
+Requires-Dist: omegaconf
+Requires-Dist: opencv-python
+Requires-Dist: pandas
+Requires-Dist: Pillow
+Requires-Dist: Pyarrow
+Requires-Dist: pycurl
+Requires-Dist: python-telegram-bot
+Requires-Dist: requests
+Requires-Dist: rich
+Requires-Dist: scikit-learn
+Requires-Dist: matplotlib
+Requires-Dist: seaborn
+Requires-Dist: plotly
+Requires-Dist: pygwalker
+Requires-Dist: tabulate
+Requires-Dist: itables
+Requires-Dist: timebudget
+Requires-Dist: tqdm
+Requires-Dist: tube_dl
+Requires-Dist: wandb
 Helper package for coding and automation
+**Version 0.1.49**
++ add `research` package to help with research tasks, including `benchquery` for benchmarking queries from dataframe
++ add `wandb` module to allow easy sync offline data to Weights & Biases (wandb) and batch clear wandb runs.
 **Version 0.1.47**
 + add `pprint_box` to print object/string in a box frame (like in `inspect`)
@@ -122,5 +154,3 @@ New Features
 New Features
 + add support to upload local to google drive.

{halib-0.1.47 → halib-0.1.49}/halib.egg-info/SOURCES.txt RENAMED Viewed

@@ -7,12 +7,7 @@ setup.py
 halib/__init__.py
 halib/common.py
 halib/cuda.py
-halib/dataset.py
-halib/listop.py
-halib/plot.py
 halib/rich_color.py
-halib/tele_noti.py
-halib/torchloader.py
 halib.egg-info/PKG-INFO
 halib.egg-info/SOURCES.txt
 halib.egg-info/dependency_links.txt
@@ -29,8 +24,17 @@ halib/online/gdrive.py
 halib/online/gdrive_mkdir.py
 halib/online/gdrive_test.py
 halib/online/projectmake.py
+halib/research/__init__.py
+halib/research/benchquery.py
+halib/research/dataset.py
+halib/research/plot.py
+halib/research/torchloader.py
+halib/research/wandb_op.py
 halib/system/__init__.py
 halib/system/cmd.py
 halib/system/filesys.py
+halib/utils/__init__.py
+halib/utils/listop.py
+halib/utils/tele_noti.py
 test/test15.py
 test/test_df_creator.py

{halib-0.1.47 → halib-0.1.49}/halib.egg-info/requires.txt RENAMED Viewed

@@ -26,3 +26,4 @@ itables
 timebudget
 tqdm
 tube_dl
+wandb

{halib-0.1.47 → halib-0.1.49}/setup.py RENAMED Viewed

@@ -8,7 +8,7 @@ with open("requirements.txt") as f:
 setuptools.setup(
     name="halib",
-    version="0.1.47",
+    version="0.1.49",
     author="Hoang Van Ha",
     author_email="hoangvanhauit@gmail.com",
     description="Small library for common tasks",