PyPI - halib - Versions diffs - 0.1.7__py3-none-any.whl → 0.1.99__py3-none-any.whl - Mend

halib 0.1.7py3-none-any.whl → 0.1.99py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

halib/__init__.py +84 -0
halib/common.py +151 -0
halib/cuda.py +39 -0
halib/dataset.py +209 -0
halib/filetype/csvfile.py +151 -45
halib/filetype/ipynb.py +63 -0
halib/filetype/jsonfile.py +1 -1
halib/filetype/textfile.py +4 -4
halib/filetype/videofile.py +44 -33
halib/filetype/yamlfile.py +95 -0
halib/gdrive.py +1 -1
halib/online/gdrive.py +104 -54
halib/online/gdrive_mkdir.py +29 -17
halib/online/gdrive_test.py +31 -18
halib/online/projectmake.py +58 -43
halib/plot.py +296 -11
halib/projectmake.py +1 -1
halib/research/__init__.py +0 -0
halib/research/base_config.py +100 -0
halib/research/base_exp.py +100 -0
halib/research/benchquery.py +131 -0
halib/research/dataset.py +208 -0
halib/research/flop_csv.py +34 -0
halib/research/flops.py +156 -0
halib/research/metrics.py +133 -0
halib/research/mics.py +68 -0
halib/research/params_gen.py +108 -0
halib/research/perfcalc.py +336 -0
halib/research/perftb.py +780 -0
halib/research/plot.py +758 -0
halib/research/profiler.py +300 -0
halib/research/torchloader.py +162 -0
halib/research/wandb_op.py +116 -0
halib/rich_color.py +285 -0
halib/sys/filesys.py +17 -10
halib/system/__init__.py +0 -0
halib/system/cmd.py +8 -0
halib/system/filesys.py +124 -0
halib/tele_noti.py +166 -0
halib/torchloader.py +162 -0
halib/utils/__init__.py +0 -0
halib/utils/dataclass_util.py +40 -0
halib/utils/dict_op.py +9 -0
halib/utils/gpu_mon.py +58 -0
halib/utils/listop.py +13 -0
halib/utils/tele_noti.py +166 -0
halib/utils/video.py +82 -0
halib/videofile.py +1 -1
halib-0.1.99.dist-info/METADATA +209 -0
halib-0.1.99.dist-info/RECORD +64 -0
{halib-0.1.7.dist-info → halib-0.1.99.dist-info}/WHEEL +1 -1
halib-0.1.7.dist-info/METADATA +0 -59
halib-0.1.7.dist-info/RECORD +0 -30
{halib-0.1.7.dist-info → halib-0.1.99.dist-info/licenses}/LICENSE.txt +0 -0
{halib-0.1.7.dist-info → halib-0.1.99.dist-info}/top_level.txt +0 -0

halib/filetype/csvfile.py CHANGED Viewed

@@ -6,87 +6,193 @@ from rich import inspect
 from rich.pretty import pprint
 from tqdm import tqdm
 from loguru import logger
+from itables import init_notebook_mode, show
+import pygwalker as pyg
+import textwrap
+import csv
 console = Console()
 def read(file, separator=","):
     df = pd.read_csv(file, separator)
     return df
+def read_auto_sep(filepath, sample_size=2048, **kwargs):
+    """
+    Read a CSV file with automatic delimiter detection.
+    Parameters
+    ----------
+    filepath : str
+        Path to the CSV file.
+    sample_size : int, optional
+        Number of bytes to read for delimiter sniffing.
+    **kwargs : dict
+        Extra keyword args passed to pandas.read_csv.
+    Returns
+    -------
+    df : pandas.DataFrame
+    """
+    with open(filepath, "r", newline="", encoding=kwargs.get("encoding", "utf-8")) as f:
+        sample = f.read(sample_size)
+        f.seek(0)
+        try:
+            dialect = csv.Sniffer().sniff(sample, delimiters=[",", ";", "\t", "|", ":"])
+            sep = dialect.delimiter
+        except csv.Error:
+            sep = ","  # fallback if detection fails
+    return pd.read_csv(filepath, sep=sep, **kwargs)
 # for append, mode = 'a'
-def write(df, outfile, mode='w', header=True, index_label=None):
-    if not outfile.endswith('.csv'):
-        outfile = f'{outfile}.csv'
+def fn_write(df, outfile, mode="w", header=True, index_label=None):
+    if not outfile.endswith(".csv"):
+        outfile = f"{outfile}.csv"
     if index_label is not None:
         df.to_csv(outfile, mode=mode, header=header, index_label=index_label)
     else:
         df.to_csv(outfile, mode=mode, header=header, index=False)
-def make_df_with_columns(columns):
+def fn_make_df_with_columns(columns):
     df = pd.DataFrame(columns=columns)
     return df
-def insert_rows(df, singleRow_or_rowList):
-    row_data = singleRow_or_rowList if type(singleRow_or_rowList[0]) is list else [singleRow_or_rowList]
+def fn_insert_rows(df, singleRow_or_rowList):
+    row_data = (
+        singleRow_or_rowList
+        if type(singleRow_or_rowList[0]) is list
+        else [singleRow_or_rowList]
+    )
     new_row_df = pd.DataFrame(row_data, columns=df.columns)
     df = pd.concat([df, new_row_df], ignore_index=True)
     return df
+    # Auto-wrap function for each cell
+def auto_wrap(cell, width=40):
+    return textwrap.fill(str(cell), width=width)
+def fn_display_df(df, max_col_width=40):
+    # Apply wrapping; tablefmt="psql" for PostgreSQL-like output
+    # wrapped_df = df.applymap(lambda x: auto_wrap(x, width=max_col_width))
+    # fix the future warning of applymap
+    wrapped_df = df.apply(
+        lambda col: col.map(lambda x: auto_wrap(x, width=max_col_width))
+    )
+    print(tabulate(wrapped_df, headers="keys", tablefmt="grid", numalign="right"))
+def showdf(df, display_mode="itable", in_jupyter=True, all_interactive=False):
+    if display_mode == "itable":
+        if in_jupyter:
+            init_notebook_mode(all_interactive=all_interactive)
+        show(
+            df,
+            # layout={"top1": "searchPanes"},
+            # searchPanes={"layout": "column-3", "cascadePanes": True},
+            caption="table caption",
+            layout={"top1": "searchBuilder"},
+            buttons=["csvHtml5", "excelHtml5", "colvis"],
+            search={"regex": True, "caseInsensitive": True},
+            paging=False,  # no paging
+            scrollY="300px",  # height of table
+            scrollCollapse=True,
+            showIndex=True,  # show row no.
+            select=True,  # allow row selected
+            keys=True,  # enable navigate using arrow keys
+        )
+    elif display_mode == "pygwalker":
+        return pyg.walk(df)
+    else:
+        raise ValueError("Invalid display mode, current support [itable, pygwalker]")
-def display_df(df):
-    print(tabulate(df, headers='keys', tablefmt='psql', numalign="right"))
-def config_display_pd(max_rows=None, max_columns=None,
-                      display_width=1000, col_header_justify='center',
-                      precision=10):
-    pd.set_option('display.max_rows', max_rows)
-    pd.set_option('display.max_columns', max_columns)
-    pd.set_option('display.width', display_width)
-    pd.set_option('display.colheader_justify', col_header_justify)
-    pd.set_option('display.precision', precision)
+def fn_config_display_pd(
+    max_rows=None,
+    max_columns=None,
+    display_width=1000,
+    col_header_justify="center",
+    precision=10,
+):
+    pd.set_option("display.max_rows", max_rows)
+    pd.set_option("display.max_columns", max_columns)
+    pd.set_option("display.width", display_width)
+    pd.set_option("display.colheader_justify", col_header_justify)
+    pd.set_option("display.precision", precision)
 class DFCreator(dict):
     """docstring for ClassName."""
-    def __init__(self,*arg,**kw):
-      super(DFCreator, self).__init__(*arg, **kw)
+    def __init__(self, *arg, **kw):
+        super(DFCreator, self).__init__(*arg, **kw)
+        self.row_pool_dict = {}
     def create_table(self, table_name, columns):
         self[table_name] = pd.DataFrame(columns=columns)
+        self.row_pool_dict[table_name] = []
+    """Instead of inserting to dataframe, insert to row pool for fast computation"""
     def insert_rows(self, table_name, singleRow_or_rowList):
-        self[table_name] = insert_rows(self[table_name], singleRow_or_rowList)
+        rows_data = (
+            singleRow_or_rowList
+            if type(singleRow_or_rowList[0]) is list
+            else [singleRow_or_rowList]
+        )
+        self.row_pool_dict[table_name].extend(rows_data)
+    """Fill from row pool to actual table dataframe"""
+    def fill_table_from_row_pool(self, table_name):
+        if len(self.row_pool_dict[table_name]) > 0:
+            # concat row pool to table dataframe
+            self[table_name] = fn_insert_rows(
+                self[table_name], self.row_pool_dict[table_name]
+            )
+            # free the pool
+            self.row_pool_dict[table_name] = []
+    def write_table(
+        self,
+        table_name,
+        output_dir,
+        out_file_name=None,
+        mode="w",
+        header=True,
+        index_label=None,
+    ):
+        self.fill_table_from_row_pool(table_name)
+        if not out_file_name:
+            outfile = f"{output_dir}/{table_name}.csv"
+        else:
+            outfile = f"{output_dir}/{out_file_name}.csv"
+        fn_write(self[table_name], outfile, mode, header, index_label)
+    def write_all_table(self, output_dir, mode="w", header=True, index_label=None):
+        for table_name in self.keys():
+            outfile = f"{output_dir}/{table_name}.csv"
+            fn_write(self[table_name], outfile, mode, header, index_label)
     def display_table(self, table_name):
-        display_df(self[table_name])
-    def write_table(self, table_name, outfile, mode='w', header=True, index_label=None):
-        write(self[table_name], outfile, mode, header, index_label)
+        self.fill_table_from_row_pool(table_name)
+        fn_display_df(self[table_name])
     def display_table_schema(self, table_name):
         columns = list(self[table_name].columns)
-        console.print(f'TABLE {table_name}: {columns}', style='bold blue')
+        console.print(f"TABLE {table_name}: {columns}", style="bold blue")
     def display_all_table_schema(self):
-       table_names = list(self.keys())
-       for table_name in table_names:
-           self.display_table_schema(table_name)
+        table_names = list(self.keys())
+        for table_name in table_names:
+            self.display_table_schema(table_name)
     def display_all_table(self):
         for table_name in self.keys():
             console.rule(table_name)
-            display_df(self[table_name])
-    def write_all_table(self, output_dir, mode='w', header=True, index_label=None):
-        for table_name in self.keys():
-            outfile = f'{output_dir}/{table_name}.csv'
-            write(self[table_name], outfile, mode, header, index_label)
+            self.display_table(table_name)

halib/filetype/ipynb.py ADDED Viewed

@@ -0,0 +1,63 @@
+from contextlib import contextmanager
+from pathlib import Path
+import ipynbname
+from ..common import console, now_str
+@contextmanager
+def gen_ipynb_name(
+    filename,
+    add_time_stamp=False,
+    nb_prefix="nb__",
+    separator="__",
+):
+    """
+    Context manager that prefixes the filename with the notebook name.
+    Output: NotebookName_OriginalName.ext
+    """
+    try:
+        nb_name = ipynbname.name()
+    except FileNotFoundError:
+        nb_name = "script"  # Fallback
+    p = Path(filename)
+    # --- FIX START ---
+    # 1. Get the parts separately
+    original_stem = p.stem  # "test" (no extension)
+    extension = p.suffix  # ".csv"
+    now_string = now_str() if add_time_stamp else ""
+    # 2. Construct the base name (Notebook + Separator + OriginalName)
+    base_name = f"{nb_prefix}{nb_name}{separator}{original_stem}"
+    # 3. Append timestamp if needed
+    if now_string:
+        base_name = f"{base_name}{separator}{now_string}"
+    # 4. Add the extension at the VERY END
+    new_filename = f"{base_name}{extension}"
+    # --- FIX END ---
+    final_path = p.parent / new_filename
+    # Assuming you use 'rich' console based on your snippet
+    # console.rule()
+    # print(f"📝 Saving as: {final_path}")
+    yield str(final_path)
+if __name__ == "__main__":
+    # --- Usage Example ---
+    # Assume Notebook Name is: "MyThesisWork"
+    filename = "results.csv"
+    with gen_ipynb_name(filename) as filename_ipynb:
+        # filename_ipynb is now: "MyThesisWork_results.csv"
+        print(f"File to save: {filename_ipynb}")
+        # df.to_csv(filename_ipynb)

halib/filetype/jsonfile.py CHANGED Viewed

@@ -8,7 +8,7 @@ def read(file):
 def write(data_dict, outfile):
-    with open(outfile, 'w') as json_file:
+    with open(outfile, "w") as json_file:
         json.dump(data_dict, json_file)

halib/filetype/textfile.py CHANGED Viewed

@@ -1,13 +1,13 @@
 def read_line_by_line(file_path):
-    with open(file_path, 'r') as file:
+    with open(file_path, "r") as file:
         lines = file.readlines()
         lines = [line.rstrip() for line in lines]
         return lines
 def write(lines, outfile, append=False):
-    mode = 'a' if append else 'w'
-    with open(outfile, mode, encoding='utf-8') as f:
+    mode = "a" if append else "w"
+    with open(outfile, mode, encoding="utf-8") as f:
         for line in lines:
             f.write(line)
-            f.write('\n')
+            f.write("\n")

halib/filetype/videofile.py CHANGED Viewed

@@ -1,36 +1,35 @@
-from enum import Enum
 import cv2
+import textfile
 import enlighten
-from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
+from enum import Enum
+from ..system import filesys
 from tube_dl import Youtube, Playlist
-from halib.sys import filesys
-from halib.filetype import textfile
+from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
 class VideoResolution(Enum):
-    VR480p = '720x480'
-    VR576p = '1280x720'
-    VR720p_hd = '1280x720'
-    VR1080p_full_hd = '1920x1080 '
-    VR4K_uhd = '3840x2160'
-    VR8K_uhd = '7680x4320'
+    VR480p = "720x480"
+    VR576p = "1280x720"
+    VR720p_hd = "1280x720"
+    VR1080p_full_hd = "1920x1080 "
+    VR4K_uhd = "3840x2160"
+    VR8K_uhd = "7680x4320"
     def __str__(self):
-        return '%s' % self.value
+        return "%s" % self.value
 def get_video_resolution_size(video_resolution):
-    separator = 'x'
+    separator = "x"
     resolution_str = str(video_resolution)
     info_arr = resolution_str.split(separator)
     width, height = int(info_arr[0]), int(info_arr[1])
     return width, height
-def get_videos_by_resolution(directory, video_resolution,
-                             video_ext='mp4', include_better=True):
+def get_videos_by_resolution(
+    directory, video_resolution, video_ext="mp4", include_better=True
+):
     video_paths = filesys.filter_files_by_extension(directory, video_ext)
     filtered_video_paths = []
     for path in video_paths:
@@ -62,7 +61,9 @@ progress_bar = None
 def on_progress(bytes_done, total_bytes):
     global progress_bar
     if progress_bar is None:
-        progress_bar = enlighten.get_manager().counter(total=20, desc="Downloading", unit="byte", color="blue")
+        progress_bar = enlighten.get_manager().counter(
+            total=20, desc="Downloading", unit="byte", color="blue"
+        )
     progress_bar.total = total_bytes
     progress_bar.count = bytes_done
@@ -74,33 +75,38 @@ def on_progress(bytes_done, total_bytes):
 def get_youtube_url(full_url_or_video_code):
-    if 'youtube' in full_url_or_video_code:
+    if "youtube" in full_url_or_video_code:
         url = full_url_or_video_code
     else:
-        url = f'https://youtube.com/watch?v={full_url_or_video_code}'
+        url = f"https://youtube.com/watch?v={full_url_or_video_code}"
     return url
-def download_yt_video(full_url_or_video_code, save_folder='./',
-                      report_progress=False, video_idx='1', total_video='1'):
+def download_yt_video(
+    full_url_or_video_code,
+    save_folder="./",
+    report_progress=False,
+    video_idx="1",
+    total_video="1",
+):
     url = get_youtube_url(full_url_or_video_code)
     filesys.make_dir(save_folder)
     filesys.change_current_dir(save_folder)
     try:
         yt = Youtube(url)
-        title_en = yt.title.encode('ascii', 'ignore')
+        title_en = yt.title.encode("ascii", "ignore")
         file_download = yt.formats.first()
         if report_progress:
-            print(f'\n[{video_idx}/{total_video}][DOWNLOAD]{title_en}')
+            print(f"\n[{video_idx}/{total_video}][DOWNLOAD]{title_en}")
             file_download.download(onprogress=on_progress, skip_existing=True)
     except TypeError:
-        print(f'[ERROR] download {url}')
+        print(f"[ERROR] download {url}")
-def download_playlist(playlist_url, save_folder='./',
-                      report_progress=False,
-                      start_pattern=None):
-    print(f'[DOWNLOAD PLAYLIST] {playlist_url}')
+def download_playlist(
+    playlist_url, save_folder="./", report_progress=False, start_pattern=None
+):
+    print(f"[DOWNLOAD PLAYLIST] {playlist_url}")
     pl = Playlist(playlist_url).videos
     total_video = len(pl)
     should_start = False
@@ -108,7 +114,7 @@ def download_playlist(playlist_url, save_folder='./',
     count = 0
     for idx, code in enumerate(pl):
         try:
-            url = f'https://youtube.com/watch?v={code}'
+            url = f"https://youtube.com/watch?v={code}"
             yt = Youtube(url)
             count += 1
             if start_pattern is None:
@@ -116,12 +122,16 @@ def download_playlist(playlist_url, save_folder='./',
             elif start_pattern in yt.title:
                 should_start = True
             if should_start:
-                download_yt_video(url, save_folder, report_progress,
-                                  video_idx=str(count),
-                                  total_video=str(total_video))
+                download_yt_video(
+                    url,
+                    save_folder,
+                    report_progress,
+                    video_idx=str(count),
+                    total_video=str(total_video),
+                )
         except TypeError:
-            print(f'[ERROR] download {url}')
+            print(f"[ERROR] download {url}")
     enlighten.get_manager().stop()
@@ -134,6 +144,7 @@ def download_multiple_playlist_in_files(text_file, report_progress=False):
         plUrl = folder_plUrl.split()[1]
         download_playlist(plUrl, save_folder=folder, report_progress=report_progress)
 # test code
 # pl = 'https://youtube.com/playlist?list=PLYaaU301HUe03PabLEGbMGB8nhHgq58Zr'
 # download_playlist(pl, './test', report_progress=True)

halib/filetype/yamlfile.py ADDED Viewed

@@ -0,0 +1,95 @@
+import time
+import networkx as nx
+from rich import inspect
+from rich.pretty import pprint
+from omegaconf import OmegaConf
+from rich.console import Console
+from argparse import ArgumentParser
+from ..research.mics import *
+console = Console()
+def _load_yaml_recursively(
+    yaml_file, yaml_files=[], share_nx_graph=nx.DiGraph(), log_info=False
+):
+    conf = OmegaConf.load(yaml_file)
+    yaml_files.append(yaml_file)
+    if "__base__" in conf:
+        parent = conf["__base__"]
+        if isinstance(parent, str):
+            parent = [parent]
+        for p in parent:
+            edge = (yaml_file, p)
+            share_nx_graph.add_edge(*edge)
+            for cycle in nx.simple_cycles(share_nx_graph):
+                assert False, f"Cyclic dependency detected: {cycle}"
+            # update conf with parent; BY loading parent and merging with conf (the child)
+            conf = OmegaConf.merge(
+                _load_yaml_recursively(p, yaml_files, share_nx_graph), conf
+            )
+    if log_info:
+        console.rule()
+        console.print(f"current yaml_file: {yaml_file}")
+        inspect(yaml_files)
+        pprint(OmegaConf.to_container(conf, resolve=True))
+        time.sleep(1)
+    return conf
+def load_yaml(yaml_file, to_dict=False, log_info=False):
+    yaml_files = []
+    share_nx_graph = nx.DiGraph()
+    omgconf = _load_yaml_recursively(
+        yaml_file,
+        yaml_files=yaml_files,
+        share_nx_graph=share_nx_graph,
+        log_info=log_info,
+    )
+    if to_dict:
+        return OmegaConf.to_container(omgconf, resolve=True)
+    else:
+        return omgconf
+def load_yaml_with_PC_abbr(
+    yaml_file, pc_abbr_to_working_disk=DEFAULT_ABBR_WORKING_DISK
+):
+    # current PC abbreviation
+    pc_abbr = get_PC_abbr_name()
+    # current plaftform: windows or linux
+    current_platform = platform.system().lower()
+    assert pc_abbr in pc_abbr_to_working_disk, f"The is no mapping for {pc_abbr} to <working_disk>"
+    # working disk
+    working_disk = pc_abbr_to_working_disk.get(pc_abbr)
+    # load yaml file
+    data_dict = load_yaml(yaml_file=yaml_file, to_dict=True)
+    # Normalize paths in the loaded data
+    data_dict = normalize_paths(data_dict, working_disk, current_platform)
+    return data_dict
+def parse_args():
+    parser = ArgumentParser(description="desc text")
+    parser.add_argument(
+        "-cfg", "--cfg", type=str, help="cfg file", default="cfg__default.yaml"
+    )
+    return parser.parse_args()
+def main():
+    args = parse_args()
+    cfg_file = args.cfg
+    cfg = load_yaml(cfg_file, to_dict=True)
+    console.rule()
+    pprint(cfg)
+if __name__ == "__main__":
+    main()

halib/gdrive.py CHANGED Viewed

@@ -12,7 +12,7 @@ from pydrive.auth import GoogleAuth
 from pydrive.drive import GoogleDrive
 from pydrive.files import GoogleDriveFileList
-from halib.sys import filesys
+from halib.system import filesys
 from halib.filetype import textfile
 # Import general libraries

halib 0.1.7__py3-none-any.whl → 0.1.99__py3-none-any.whl

halib 0.1.7py3-none-any.whl → 0.1.99py3-none-any.whl