PyPI - py2ls - Versions diffs - 0.2.4.8__py3-none-any.whl → 0.2.4.9__py3-none-any.whl - Mend

py2ls 0.2.4.8py3-none-any.whl → 0.2.4.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

py2ls/.git/index +0 -0
py2ls/ips.py +809 -398
py2ls/ml2ls.py +73 -38
py2ls/plot.py +2 -9
{py2ls-0.2.4.8.dist-info → py2ls-0.2.4.9.dist-info}/METADATA +2 -2
{py2ls-0.2.4.8.dist-info → py2ls-0.2.4.9.dist-info}/RECORD +7 -7
{py2ls-0.2.4.8.dist-info → py2ls-0.2.4.9.dist-info}/WHEEL +0 -0

py2ls/ips.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import numpy as np
-import pandas as pd
+import pandas as pd
 import sys, os
-from IPython.display import display
+from IPython.display import display
 from typing import List, Optional, Union
 try:
     get_ipython().run_line_magic("load_ext", "autoreload")
     get_ipython().run_line_magic("autoreload", "2")
@@ -10,11 +11,14 @@ except NameError:
     pass
 import warnings
 warnings.simplefilter("ignore", category=pd.errors.SettingWithCopyWarning)
 warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning)
-def run_once_within(duration=60): # default 60s
+def run_once_within(duration=60):  # default 60s
     import time
     """
     usage:
     if run_once_within():
@@ -26,7 +30,9 @@ def run_once_within(duration=60): # default 60s
         run_once_within.time_last = None
     time_curr = time.time()
-    if (run_once_within.time_last is None) or (time_curr - run_once_within.time_last >= duration):
+    if (run_once_within.time_last is None) or (
+        time_curr - run_once_within.time_last >= duration
+    ):
         run_once_within.time_last = time_curr  # Update the last execution time
         return True
     else:
@@ -42,13 +48,14 @@ def plt_font(dir_font: str = "/System/Library/Fonts/Hiragino Sans GB.ttc"):
     """
     import matplotlib.pyplot as plt
     from matplotlib import font_manager
-    slashtype = "/" if 'mac' in get_os() else "\\"
+    slashtype = "/" if "mac" in get_os() else "\\"
     if slashtype in dir_font:
         font_manager.fontManager.addfont(dir_font)
         fontname = os.path.basename(dir_font).split(".")[0]
     else:
         if "cn" in dir_font.lower() or "ch" in dir_font.lower():
-            fontname = "Hiragino Sans GB" # default Chinese font
+            fontname = "Hiragino Sans GB"  # default Chinese font
         else:
             fontname = dir_font
@@ -62,6 +69,7 @@ def plt_font(dir_font: str = "/System/Library/Fonts/Hiragino Sans GB.ttc"):
         plt.rcParams["font.sans-serif"] = ["Arial"]
     return fontname
 # set 'dir_save'
 if "dar" in sys.platform:
     dir_save = "/Users/macjianfeng/Dropbox/Downloads/"
@@ -133,6 +141,7 @@ def run_every(when: str = None, job=None, wait: int = 60):
     """
     import schedule
     import time
     if job is None:
         print("No job provided!")
         return
@@ -180,6 +189,7 @@ def run_at(when: str, job=None, wait: int = 60):
     """
     from datetime import datetime
     import time
     if job is None:
         print("No job provided!")
         return
@@ -260,11 +270,12 @@ def get_timezone(timezone: str | list = None):
 def is_package_installed(package_name):
     """Check if a package is installed."""
     import importlib.util
     package_spec = importlib.util.find_spec(package_name)
     return package_spec is not None
-def upgrade(module="py2ls",uninstall=False):
+def upgrade(module="py2ls", uninstall=False):
     """
     Installs or upgrades a specified Python module.
@@ -273,6 +284,7 @@ def upgrade(module="py2ls",uninstall=False):
     uninstall (bool): If True, uninstalls the webdriver-manager before upgrading.
     """
     import subprocess
     if not is_package_installed(module):
         try:
             subprocess.check_call([sys.executable, "-m", "pip", "install", module])
@@ -310,6 +322,7 @@ def get_version(pkg):
 def rm_folder(folder_path, verbose=True):
     import shutil
     try:
         shutil.rmtree(folder_path)
         if verbose:
@@ -329,6 +342,7 @@ def fremove(path, verbose=True):
     try:
         if os.path.isdir(path):
             import shutil
             shutil.rmtree(path)
             if verbose:
                 print(f"Successfully deleted folder {path}")
@@ -364,11 +378,13 @@ def fremove(path, verbose=True):
 def get_cwd():
     from pathlib import Path
     # Get the current script's directory as a Path object
-    current_directory = Path(__file__).resolve().parent
+    current_directory = Path(__file__).resolve().parent
     return current_directory
 def search(
     query,
     limit=5,
@@ -380,6 +396,7 @@ def search(
     **kwargs,
 ):
     from duckduckgo_search import DDGS
     if "te" in kind.lower():
         results = DDGS().text(query, max_results=limit)
         res = pd.DataFrame(results)
@@ -413,6 +430,7 @@ def echo(*args, **kwargs):
     """
     global dir_save
     from duckduckgo_search import DDGS
     query = None
     model = kwargs.get("model", "gpt")
     verbose = kwargs.get("verbose", True)
@@ -461,10 +479,12 @@ def echo(*args, **kwargs):
     res = DDGS().chat(query, model=model_valid)
     if verbose:
         from pprint import pp
         pp(res)
     if log:
         from datetime import datetime
         import time
         dt_str = datetime.fromtimestamp(time.time()).strftime("%Y-%m-%d_%H:%M:%S")
         res_ = f"\n\n####Q:{query}\n\n#####Ans:{dt_str}\n\n>{res}\n"
         if bool(os.path.basename(dir_save)):
@@ -487,6 +507,7 @@ def ai(*args, **kwargs):
 def detect_lang(text, output="lang", verbose=True):
     from langdetect import detect
     dir_curr_script = os.path.dirname(os.path.abspath(__file__))
     dir_lang_code = dir_curr_script + "/data/lang_code_iso639.json"
     print(dir_curr_script, os.getcwd(), dir_lang_code)
@@ -516,13 +537,14 @@ def is_text(s):
 from typing import Any, Union
 def shared(*args, strict=True, n_shared=2, verbose=True):
     """
     check the shared elelements in two list.
     usage:
         list1 = [1, 2, 3, 4, 5]
         list2 = [4, 5, 6, 7, 8]
-        list3 = [5, 6, 9, 10]
+        list3 = [5, 6, 9, 10]
         a = shared(list1, list2,list3)
     """
     if verbose:
@@ -538,26 +560,34 @@ def shared(*args, strict=True, n_shared=2, verbose=True):
         print(f"{' ' * 2}All inputs must be lists.")
         return []
     first_list = flattened_lists[0]
-    shared_elements = [item for item in first_list if all(item in lst for lst in flattened_lists)]
+    shared_elements = [
+        item for item in first_list if all(item in lst for lst in flattened_lists)
+    ]
     if strict:
-            # Strict mode: require elements to be in all lists
-            shared_elements = set(flattened_lists[0])
-            for lst in flattened_lists[1:]:
-                shared_elements.intersection_update(lst)
+        # Strict mode: require elements to be in all lists
+        shared_elements = set(flattened_lists[0])
+        for lst in flattened_lists[1:]:
+            shared_elements.intersection_update(lst)
     else:
         from collections import Counter
         all_elements = [item for sublist in flattened_lists for item in sublist]
         element_count = Counter(all_elements)
         # Get elements that appear in at least n_shared lists
-        shared_elements = [item for item, count in element_count.items() if count >= n_shared]
+        shared_elements = [
+            item for item, count in element_count.items() if count >= n_shared
+        ]
     shared_elements = flatten(shared_elements, verbose=verbose)
     if verbose:
-        elements2show = shared_elements if len(shared_elements)<10 else shared_elements[:5]
+        elements2show = (
+            shared_elements if len(shared_elements) < 10 else shared_elements[:5]
+        )
         print(f"{' '*2}{len(shared_elements)} elements shared: {' '*2}{elements2show}")
         print("********* checking shared elements *********")
     return shared_elements
 def not_shared(*args, strict=True, n_shared=2, verbose=False):
     """
     To find the elements in list1 that are not shared with list2 while maintaining the original order of list1
@@ -568,7 +598,7 @@ def not_shared(*args, strict=True, n_shared=2, verbose=False):
     """
     _common = shared(*args, strict=strict, n_shared=n_shared, verbose=verbose)
     list1 = flatten(args[0], verbose=verbose)
-    _not_shared=[item for item in list1 if item not in _common]
+    _not_shared = [item for item in list1 if item not in _common]
     return _not_shared
@@ -578,29 +608,41 @@ def flatten(nested: Any, unique_list=True, verbose=False):
     Parameters:
         nested : Any, Can be a list, tuple, dictionary, or set.
     Returns: list, A flattened list.
-    """
+    """
     flattened_list = []
     stack = [nested]
     while stack:
         current = stack.pop()
         if isinstance(current, dict):
-            stack.extend(current.values())
+            stack.extend(current.values())
         elif isinstance(current, (list, tuple, set)):
             stack.extend(current)
         elif isinstance(current, pd.Series):
             stack.extend(current)
-        elif isinstance(current, (pd.Index,np.ndarray)): # df.columns df.index are object of type pd.Index
+        elif isinstance(
+            current, (pd.Index, np.ndarray)
+        ):  # df.columns df.index are object of type pd.Index
             stack.extend(current.tolist())
         else:
             flattened_list.append(current)
     if verbose:
-        print(f"{' '*2}<in info: {len(unique(flattened_list))} elements after flattened>")
+        print(
+            f"{' '*2}<in info: {len(unique(flattened_list))} elements after flattened>"
+        )
     if unique_list:
         return unique(flattened_list)[::-1]
     else:
         return flattened_list
-def strcmp(search_term, candidates, ignore_case=True,get_rank=False, verbose=False, scorer="WR"):
+def strcmp(
+    search_term,
+    candidates,
+    ignore_case=True,
+    get_rank=False,
+    verbose=False,
+    scorer="WR",
+):
     """
     Compares a search term with a list of candidate strings and finds the best match based on similarity score.
@@ -614,13 +656,14 @@ def strcmp(search_term, candidates, ignore_case=True,get_rank=False, verbose=Fal
     tuple: A tuple containing the best match and its index in the candidates list.
     """
     from fuzzywuzzy import fuzz, process
     def to_lower(s, ignore_case=True):
         # Converts a string or list of strings to lowercase if ignore_case is True.
         if ignore_case:
             if isinstance(s, str):
                 return s.lower()
             elif isinstance(s, list):
-                s=[str(i) for i in s]# convert all to str
+                s = [str(i) for i in s]  # convert all to str
                 return [elem.lower() for elem in s]
         return s
@@ -630,12 +673,15 @@ def strcmp(search_term, candidates, ignore_case=True,get_rank=False, verbose=Fal
             similarity_scores = [fuzz.partial_ratio(str1_, word) for word in str2_]
         elif "W" in scorer.lower():
             similarity_scores = [fuzz.WRatio(str1_, word) for word in str2_]
-        elif "ratio" in scorer.lower() or "stri" in scorer.lower():#Ratio (Strictest)
+        elif "ratio" in scorer.lower() or "stri" in scorer.lower():  # Ratio (Strictest)
             similarity_scores = [fuzz.ratio(str1_, word) for word in str2_]
         else:
             similarity_scores = [fuzz.WRatio(str1_, word) for word in str2_]
         if get_rank:
-            idx = [similarity_scores.index(i) for i in sorted(similarity_scores,reverse=True)]
+            idx = [
+                similarity_scores.index(i)
+                for i in sorted(similarity_scores, reverse=True)
+            ]
             if verbose:
                 display([candidates[ii] for ii in idx])
             return [candidates[ii] for ii in idx]
@@ -663,6 +709,7 @@ def strcmp(search_term, candidates, ignore_case=True,get_rank=False, verbose=Fal
 # str2 = ['PLoS Computational Biology', 'PLOS BIOLOGY']
 # best_match, idx = strcmp(str1, str2, ignore_case=1)
 def cn2pinyin(
     cn_str: Union[str, list] = None,
     sep: str = " ",
@@ -727,19 +774,21 @@ def cn2pinyin(
         style = Style.PL
     else:
         style = Style.NORMAL
-    if not isinstance(cn_str,list):
-        cn_str=[cn_str]
-    pinyin_flat=[]
+    if not isinstance(cn_str, list):
+        cn_str = [cn_str]
+    pinyin_flat = []
     for cn_str_ in cn_str:
         pinyin_string = pinyin(cn_str_, style=style)
         pinyin_flat.append(sep.join([item[0] for item in pinyin_string]))
-    if len(pinyin_flat)==1:
+    if len(pinyin_flat) == 1:
         return pinyin_flat[0]
     else:
         return pinyin_flat
 def counter(list_, verbose=True):
     from collections import Counter
     c = Counter(list_)
     # Print the name counts
     for item, count in c.items():
@@ -769,6 +818,7 @@ def str2time(time_str, fmt="24"):
     - str: The converted time string.
     """
     from datetime import datetime
     def time_len_corr(time_str):
         time_str_ = (
             ssplit(time_str, by=[":", " ", "digital_num"]) if ":" in time_str else None
@@ -830,6 +880,7 @@ def str2date(date_str, fmt="%Y-%m-%d_%H:%M:%S"):
     - str: The converted date string.
     """
     from dateutil import parser
     try:
         date_obj = parser.parse(date_str)
     except ValueError as e:
@@ -847,6 +898,7 @@ def str2date(date_str, fmt="%Y-%m-%d_%H:%M:%S"):
 def str2num(s, *args, **kwargs):
     import re
     delimiter = kwargs.get("sep", None)
     round_digits = kwargs.get("round", None)
     if delimiter is not None:
@@ -863,6 +915,7 @@ def str2num(s, *args, **kwargs):
             num = float(s)
         except ValueError:
             from numerizer import numerize
             try:
                 numerized = numerize(s)
                 num = int(numerized) if "." not in numerized else float(numerized)
@@ -1067,13 +1120,12 @@ def inch2px(*inch, dpi=300) -> list:
     # Case 1: When the user passes a single argument that is a list or tuple, e.g., inch2px([1, 2]) or inch2px((1, 2))
     if len(inch) == 1 and isinstance(inch[0], (list, tuple)):
         return [i * dpi for i in inch[0]]
     # Case 2: When the user passes multiple arguments directly, e.g., inch2px(1, 2)
     else:
         return [i * dpi for i in inch]
 def cm2inch(*inch) -> list:
     """
     Usage:
@@ -1191,6 +1243,7 @@ def paper_size(paper_type_str="a4"):
 def docx2pdf(dir_docx, dir_pdf=None):
     from docx2pdf import convert
     if dir_pdf:
         convert(dir_docx, dir_pdf)
     else:
@@ -1199,6 +1252,7 @@ def docx2pdf(dir_docx, dir_pdf=None):
 def img2pdf(dir_img, kind="jpeg", page=None, dir_save=None, page_size="a4", dpi=300):
     import img2pdf as image2pdf
     def mm_to_point(size):
         return (image2pdf.mm_to_pt(size[0]), image2pdf.mm_to_pt(size[1]))
@@ -1253,6 +1307,7 @@ def pdf2ppt(dir_pdf, dir_ppt):
     from PyPDF2 import PdfReader
     from pptx.util import Inches
     from pptx import Presentation
     prs = Presentation()
     # Open the PDF file
@@ -1282,6 +1337,7 @@ def pdf2ppt(dir_pdf, dir_ppt):
 def ssplit(text, by="space", verbose=False, strict=False, **kws):
     import re
     if isinstance(text, list):
         nested_list = [ssplit(i, by=by, verbose=verbose, **kws) for i in text]
         flat_list = [item for sublist in nested_list for item in sublist]
@@ -1331,6 +1387,7 @@ def ssplit(text, by="space", verbose=False, strict=False, **kws):
     def split_by_sent_num(text, n=10):
         from nltk.tokenize import sent_tokenize
         from itertools import pairwise
         # split text into sentences
         text_split_by_sent = sent_tokenize(text)
         cut_loc_array = np.arange(0, len(text_split_by_sent), n)
@@ -1404,11 +1461,13 @@ def ssplit(text, by="space", verbose=False, strict=False, **kws):
         return split_by_camel_case(text)
     elif ("word" in by) and not strict:
         from nltk.tokenize import word_tokenize
         if verbose:
             print(f"splited by word")
         return word_tokenize(text)
     elif ("sen" in by and not "num" in by) and not strict:
         from nltk.tokenize import sent_tokenize
         if verbose:
             print(f"splited by sentence")
         return sent_tokenize(text)
@@ -1459,10 +1518,12 @@ def ssplit(text, by="space", verbose=False, strict=False, **kws):
 def pdf2img(dir_pdf, dir_save=None, page=None, kind="png", verbose=True, **kws):
     from pdf2image import convert_from_path, pdfinfo_from_path
     df_dir_img_single_page = pd.DataFrame()
     dir_single_page = []
     if verbose:
         from pprint import pp
         pp(pdfinfo_from_path(dir_pdf))
     if isinstance(page, tuple) and page:
         page = list(page)
@@ -1582,6 +1643,7 @@ def unzip(dir_path, output_dir=None):
     if os.path.exists(output_dir):
         if os.path.isdir(output_dir):  # check if it is a folder
             import shutil
             shutil.rmtree(output_dir)  # remove folder
         else:
             os.remove(output_dir)  # remove file
@@ -1600,6 +1662,7 @@ def unzip(dir_path, output_dir=None):
         output_file = os.path.splitext(dir_path)[0]  # remove the .gz extension
         try:
             import shutil
             with gzip.open(dir_path, "rb") as gz_file:
                 with open(output_file, "wb") as out_file:
                     shutil.copyfileobj(gz_file, out_file)
@@ -1607,11 +1670,14 @@ def unzip(dir_path, output_dir=None):
         except FileNotFoundError:
             print(f"Error: The file '{dir_path}' was not found.")
         except PermissionError:
-            print(f"Error: Permission denied when accessing '{dir_path}' or writing to '{output_file}'.")
+            print(
+                f"Error: Permission denied when accessing '{dir_path}' or writing to '{output_file}'."
+            )
         except Exception as e:
             try:
                 import tarfile
-                with tarfile.open(dir_path, 'r:gz') as tar:
+                with tarfile.open(dir_path, "r:gz") as tar:
                     tar.extractall(path=output_file)
             except Exception as final_e:
                 print(f"An final unexpected error occurred: {final_e}")
@@ -1698,9 +1764,9 @@ def is_df_abnormal(df: pd.DataFrame, verbose=False) -> bool:
     """
     if not isinstance(df, pd.DataFrame):
         if verbose:
-            print('not pd.DataFrame')
+            print("not pd.DataFrame")
         return False
-    df.columns = df.columns.astype(str)# 把它变成str, 这样就可以进行counts运算了
+    df.columns = df.columns.astype(str)  # 把它变成str, 这样就可以进行counts运算了
     # Initialize a list to hold messages about abnormalities
     messages = []
     is_abnormal = False
@@ -1729,28 +1795,28 @@ def is_df_abnormal(df: pd.DataFrame, verbose=False) -> bool:
         if verbose:
             print(f'len(column_names) == 1 and delimiter_counts["\t"] > 1')
     if verbose:
-        print("1",is_abnormal)
+        print("1", is_abnormal)
     if any(delimiter_counts[d] > 3 for d in delimiter_counts if d != ""):
         messages.append("Abnormal: Too many delimiters in column names.")
         is_abnormal = True
         if verbose:
             print(f'any(delimiter_counts[d] > 3 for d in delimiter_counts if d != "")')
     if verbose:
-        print("2",is_abnormal)
+        print("2", is_abnormal)
     if delimiter_counts[""] > 3:
         messages.append("Abnormal: There are empty column names.")
         is_abnormal = True
         if verbose:
             print(f'delimiter_counts[""] > 3')
     if verbose:
-        print("3",is_abnormal)
+        print("3", is_abnormal)
     if any(delimiter_counts[d] > 3 for d in ["\t", ",", "\n"]):
         messages.append("Abnormal: Some column names contain unexpected characters.")
         is_abnormal = True
         if verbose:
             print(f'any(delimiter_counts[d] > 3 for d in ["\t", ",", "\n"])')
     if verbose:
-        print("4",is_abnormal)
+        print("4", is_abnormal)
     # # Check for missing values
     # missing_values = df.isnull().sum()
     # if missing_values.any():
@@ -1769,9 +1835,9 @@ def is_df_abnormal(df: pd.DataFrame, verbose=False) -> bool:
         messages.append(f"Abnormal: Columns with constant values: {constant_columns}")
         is_abnormal = True
         if verbose:
-            print(f'df.columns[df.nunique() == 1].tolist()')
+            print(f"df.columns[df.nunique() == 1].tolist()")
     if verbose:
-        print("5",is_abnormal)
+        print("5", is_abnormal)
     # Check for an unreasonable number of rows or columns
     if actual_shape[0] < 2 or actual_shape[1] < 2:
         messages.append(
@@ -1779,9 +1845,9 @@ def is_df_abnormal(df: pd.DataFrame, verbose=False) -> bool:
         )
         is_abnormal = True
         if verbose:
-            print(f'actual_shape[0] < 2 or actual_shape[1] < 2')
+            print(f"actual_shape[0] < 2 or actual_shape[1] < 2")
     if verbose:
-        print("6",is_abnormal)
+        print("6", is_abnormal)
     # Compile results
     if verbose:
         print("\n".join(messages))
@@ -1798,8 +1864,10 @@ def fload(fpath, kind=None, **kwargs):
     Returns:
         content: The content loaded from the file.
     """
     def read_mplstyle(style_file):
         import matplotlib.pyplot as plt
         # Load the style file
         plt.style.use(style_file)
@@ -1812,10 +1880,11 @@ def fload(fpath, kind=None, **kwargs):
         for i, j in style_dict.items():
             print(f"\n{i}::::{j}")
         return style_dict
     # #example usage:
     # style_file = "/ std-colors.mplstyle"
     # style_dict = read_mplstyle(style_file)
     def load_txt_md(fpath):
         with open(fpath, "r") as file:
             content = file.read()
@@ -1825,28 +1894,30 @@ def fload(fpath, kind=None, **kwargs):
     #     with open(fpath, "r") as file:
     #         content = file.read()
     #     return content
-    def load_html(fpath,**kwargs):
-        return pd.read_html(fpath,**kwargs)
+    def load_html(fpath, **kwargs):
+        return pd.read_html(fpath, **kwargs)
     def load_json(fpath, **kwargs):
-        output=kwargs.pop("output","json")
-        if output=='json':
+        output = kwargs.pop("output", "json")
+        if output == "json":
             import json
             with open(fpath, "r") as file:
                 content = json.load(file)
             return content
         else:
-            return pd.read_json(fpath,**kwargs)
+            return pd.read_json(fpath, **kwargs)
     def load_yaml(fpath):
         import yaml
         with open(fpath, "r") as file:
             content = yaml.safe_load(file)
         return content
     def load_xml(fpath, fsize_thr: int = 100):
         from lxml import etree
         def load_small_xml(fpath):
             tree = etree.parse(fpath)
             root = tree.getroot()
@@ -1905,7 +1976,7 @@ def fload(fpath, kind=None, **kwargs):
                 if line.startswith(char):
                     return char
         return None
     def _get_chunks(df_fake):
         """
         helper func for 'load_csv'
@@ -1926,20 +1997,22 @@ def fload(fpath, kind=None, **kwargs):
         encoding = kwargs.pop("encoding", "utf-8")
         on_bad_lines = kwargs.pop("on_bad_lines", "skip")
         comment = kwargs.pop("comment", None)
-        fmt=kwargs.pop("fmt",False)
-        chunksize=kwargs.pop("chunksize", None)
-        engine='c' if chunksize else engine # when chunksize, recommend 'c'
-        low_memory=kwargs.pop("low_memory",True)
-        low_memory=False if chunksize else True # when chunksize, recommend low_memory=False
-        verbose=kwargs.pop("verbose",False)
+        fmt = kwargs.pop("fmt", False)
+        chunksize = kwargs.pop("chunksize", None)
+        engine = "c" if chunksize else engine  # when chunksize, recommend 'c'
+        low_memory = kwargs.pop("low_memory", True)
+        low_memory = (
+            False if chunksize else True
+        )  # when chunksize, recommend low_memory=False
+        verbose = kwargs.pop("verbose", False)
         if run_once_within():
             use_pd("read_csv", verbose=verbose)
         if comment is None:
             comment = get_comment(
                 fpath, comment=None, encoding="utf-8", lines_to_check=5
             )
         try:
             df = pd.read_csv(
                 fpath,
@@ -1956,9 +2029,9 @@ def fload(fpath, kind=None, **kwargs):
                 **kwargs,
             )
             if chunksize:
-                df=_get_chunks(df)
+                df = _get_chunks(df)
                 print(df.shape)
-            if is_df_abnormal(df, verbose=0): # raise error
+            if is_df_abnormal(df, verbose=0):  # raise error
                 raise ValueError("the df is abnormal")
         except:
             try:
@@ -1991,7 +2064,7 @@ def fload(fpath, kind=None, **kwargs):
                             **kwargs,
                         )
                     if chunksize:
-                        df=_get_chunks(df)
+                        df = _get_chunks(df)
                         print(df.shape)
                     if is_df_abnormal(df, verbose=0):
                         raise ValueError("the df is abnormal")
@@ -2026,7 +2099,7 @@ def fload(fpath, kind=None, **kwargs):
                             **kwargs,
                         )
                     if chunksize:
-                        df=_get_chunks(df)
+                        df = _get_chunks(df)
                         print(df.shape)
                     if is_df_abnormal(df, verbose=0):
                         raise ValueError("the df is abnormal")
@@ -2049,7 +2122,7 @@ def fload(fpath, kind=None, **kwargs):
                             **kwargs,
                         )
                         if chunksize:
-                            df=_get_chunks(df)
+                            df = _get_chunks(df)
                             print(df.shape)
                         if not is_df_abnormal(df, verbose=0):  # normal
                             display(df.head(2))
@@ -2059,7 +2132,7 @@ def fload(fpath, kind=None, **kwargs):
                         pass
                 else:
                     if not chunksize:
-                        engines = [None,"c", "python"]
+                        engines = [None, "c", "python"]
                         for engine in engines:
                             separators = [",", "\t", ";", "|", " "]
                             for sep in separators:
@@ -2080,11 +2153,19 @@ def fload(fpath, kind=None, **kwargs):
                                     # display(df.head(2))
                                     # print(f"is_df_abnormal:{is_df_abnormal(df, verbose=0)}")
                                     if chunksize:
-                                        df=_get_chunks(df)
+                                        df = _get_chunks(df)
                                         print(df.shape)
                                     if not is_df_abnormal(df, verbose=0):
-                                        display(df.head(2)) if isinstance(df, pd.DataFrame) else display("it is not a DataFrame")
-                                        print(f"shape: {df.shape}") if isinstance(df, pd.DataFrame) else display("it is not a DataFrame")
+                                        (
+                                            display(df.head(2))
+                                            if isinstance(df, pd.DataFrame)
+                                            else display("it is not a DataFrame")
+                                        )
+                                        (
+                                            print(f"shape: {df.shape}")
+                                            if isinstance(df, pd.DataFrame)
+                                            else display("it is not a DataFrame")
+                                        )
                                         return df
                                 except EmptyDataError as e:
                                     continue
@@ -2096,19 +2177,18 @@ def fload(fpath, kind=None, **kwargs):
     def load_excel(fpath, **kwargs):
         engine = kwargs.get("engine", "openpyxl")
-        verbose=kwargs.pop("verbose",False)
+        verbose = kwargs.pop("verbose", False)
         if run_once_within():
             use_pd("read_excel", verbose=verbose)
         df = pd.read_excel(fpath, engine=engine, **kwargs)
         try:
-            meata=pd.ExcelFile(fpath)
+            meata = pd.ExcelFile(fpath)
             print(f"n_sheet={len(meata.sheet_names)},\t'sheetname = 0 (default)':")
-            [print(f"{i}:\t{i_}") for i,i_ in enumerate(meata.sheet_names)]
+            [print(f"{i}:\t{i_}") for i, i_ in enumerate(meata.sheet_names)]
         except:
             pass
         return df
     def load_parquet(fpath, **kwargs):
         """
         Load a Parquet file into a Pandas DataFrame with advanced options.
@@ -2124,16 +2204,16 @@ def fload(fpath, kind=None, **kwargs):
         Returns:
         - df (DataFrame): The loaded DataFrame.
         """
         engine = kwargs.get("engine", "pyarrow")
         verbose = kwargs.pop("verbose", False)
         if run_once_within():
             use_pd("read_parquet", verbose=verbose)
         try:
             df = pd.read_parquet(fpath, engine=engine, **kwargs)
             if verbose:
-                if 'columns' in kwargs:
+                if "columns" in kwargs:
                     print(f"Loaded columns: {kwargs['columns']}")
                 else:
                     print("Loaded all columns.")
@@ -2142,11 +2222,12 @@ def fload(fpath, kind=None, **kwargs):
             print(f"An error occurred while loading the Parquet file: {e}")
             df = None
-        return df
+        return df
     def load_ipynb(fpath, **kwargs):
         import nbformat
         from nbconvert import MarkdownExporter
         as_version = kwargs.get("as_version", 4)
         with open(fpath, "r") as file:
             nb = nbformat.read(file, as_version=as_version)
@@ -2177,6 +2258,7 @@ def fload(fpath, kind=None, **kwargs):
         If the specified page is not found, it returns the string "Page is not found".
         """
         from PyPDF2 import PdfReader
         text_dict = {}
         with open(fpath, "rb") as file:
             pdf_reader = PdfReader(file)
@@ -2207,6 +2289,7 @@ def fload(fpath, kind=None, **kwargs):
     def load_docx(fpath):
         from docx import Document
         doc = Document(fpath)
         content = [para.text for para in doc.paragraphs]
         return content
@@ -2216,21 +2299,55 @@ def fload(fpath, kind=None, **kwargs):
         kind = kind.lower()
     kind = kind.lstrip(".").lower()
     img_types = [
-        "bmp","eps","gif","png","jpg","jpeg","jpeg2000","tiff","tif",
-        "icns","ico","im","msp","pcx","ppm","sgi","spider","tga","webp",
+        "bmp",
+        "eps",
+        "gif",
+        "png",
+        "jpg",
+        "jpeg",
+        "jpeg2000",
+        "tiff",
+        "tif",
+        "icns",
+        "ico",
+        "im",
+        "msp",
+        "pcx",
+        "ppm",
+        "sgi",
+        "spider",
+        "tga",
+        "webp",
     ]
     doc_types = [
-        "docx","pdf",
-        "txt","csv","xlsx","tsv","parquet","snappy",
-        "md","html",
-        "json","yaml","xml",
+        "docx",
+        "pdf",
+        "txt",
+        "csv",
+        "xlsx",
+        "tsv",
+        "parquet",
+        "snappy",
+        "md",
+        "html",
+        "json",
+        "yaml",
+        "xml",
         "ipynb",
-        "mtx"
+        "mtx",
     ]
     zip_types = [
-        "gz","zip","7z","rar","tgz",
-        "tar","tar.gz","tar.bz2",
-        "bz2","xz","gzip"
+        "gz",
+        "zip",
+        "7z",
+        "rar",
+        "tgz",
+        "tar",
+        "tar.gz",
+        "tar.bz2",
+        "bz2",
+        "xz",
+        "gzip",
     ]
     other_types = ["fcs"]
     supported_types = [*doc_types, *img_types, *zip_types, *other_types]
@@ -2266,17 +2383,17 @@ def fload(fpath, kind=None, **kwargs):
         return load_yaml(fpath)
     elif kind == "xml":
         return load_xml(fpath)
-    elif kind in ["csv","tsv"]:
-        verbose=kwargs.pop('verbose',False)
+    elif kind in ["csv", "tsv"]:
+        verbose = kwargs.pop("verbose", False)
         if run_once_within():
             use_pd("read_csv")
         content = load_csv(fpath, **kwargs)
         return content
-    elif kind=='pkl':
-        verbose=kwargs.pop('verbose',False)
+    elif kind == "pkl":
+        verbose = kwargs.pop("verbose", False)
         if run_once_within():
             use_pd("read_pickle")
-        return pd.read_pickle(fpath,**kwargs)
+        return pd.read_pickle(fpath, **kwargs)
     elif kind in ["ods", "ods", "odt"]:
         engine = kwargs.get("engine", "odf")
         kwargs.pop("engine", None)
@@ -2286,38 +2403,39 @@ def fload(fpath, kind=None, **kwargs):
         kwargs.pop("engine", None)
         content = load_excel(fpath, engine=engine, **kwargs)
         print(f"shape: {content.shape}") if isinstance(content, pd.DataFrame) else None
-        display(content.head(3))  if isinstance(content, pd.DataFrame) else None
+        display(content.head(3)) if isinstance(content, pd.DataFrame) else None
         return content
     elif kind == "xlsx":
         content = load_excel(fpath, **kwargs)
         display(content.head(3)) if isinstance(content, pd.DataFrame) else None
         print(f"shape: {content.shape}") if isinstance(content, pd.DataFrame) else None
         return content
-    elif kind=='mtx':
+    elif kind == "mtx":
         from scipy.io import mmread
-        dat_mtx=mmread(fpath)
-        content=pd.DataFrame.sparse.from_spmatrix(dat_mtx,**kwargs)
+        dat_mtx = mmread(fpath)
+        content = pd.DataFrame.sparse.from_spmatrix(dat_mtx, **kwargs)
         display(content.head(3)) if isinstance(content, pd.DataFrame) else None
         print(f"shape: {content.shape}")
         return content
     elif kind == "ipynb":
         return load_ipynb(fpath, **kwargs)
-    elif kind in ['parquet','snappy']:
-        verbose=kwargs.pop('verbose',False)
+    elif kind in ["parquet", "snappy"]:
+        verbose = kwargs.pop("verbose", False)
         if run_once_within():
             use_pd("read_parquet")
-        return load_parquet(fpath,**kwargs)
-    elif kind =='feather':
-        verbose=kwargs.pop('verbose',False)
+        return load_parquet(fpath, **kwargs)
+    elif kind == "feather":
+        verbose = kwargs.pop("verbose", False)
         if run_once_within():
             use_pd("read_feather")
-        content=pd.read_feather(fpath,**kwargs)
+        content = pd.read_feather(fpath, **kwargs)
         return content
-    elif kind =='h5':
-        content=pd.read_hdf(fpath,**kwargs)
+    elif kind == "h5":
+        content = pd.read_hdf(fpath, **kwargs)
         return content
-    elif kind =='pkl':
-        content=pd.read_pickle(fpath,**kwargs)
+    elif kind == "pkl":
+        content = pd.read_pickle(fpath, **kwargs)
         return content
     elif kind == "pdf":
         # print('usage:load_pdf(fpath, page="all", verbose=False)')
@@ -2325,11 +2443,13 @@ def fload(fpath, kind=None, **kwargs):
     elif kind.lower() in img_types:
         print(f'Image ".{kind}" is loaded.')
         return load_img(fpath)
-    elif kind=="gz" and fpath.endswith(".soft.gz"):
+    elif kind == "gz" and fpath.endswith(".soft.gz"):
         import GEOparse
         return GEOparse.get_GEO(filepath=fpath)
     elif kind.lower() in zip_types:
         from pprint import pp
         keep = kwargs.get("keep", False)
         fpath_unzip = unzip(fpath)
         if os.path.isdir(fpath_unzip):
@@ -2364,7 +2484,7 @@ def fload(fpath, kind=None, **kwargs):
         meta, data = fcsparser.parse(fpath, reformat_meta=True)
         return meta, data
-    elif kind=="mplstyle":
+    elif kind == "mplstyle":
         return read_mplstyle(fpath)
     else:
@@ -2408,7 +2528,7 @@ def fupdate(fpath, content=None, how="head"):
     """
     Update a file by adding new content at the top and moving the old content to the bottom.
     If the file is a JSON file, merge the new content with the old content.
     Parameters
     ----------
     fpath : str
@@ -2416,7 +2536,7 @@ def fupdate(fpath, content=None, how="head"):
     content : str or dict, optional
         The new content to add at the top of the file (for text) or merge (for JSON).
         If not provided, the function will not add any new content.
     Notes
     -----
     - If the file at `fpath` does not exist, it will be created.
@@ -2425,14 +2545,20 @@ def fupdate(fpath, content=None, how="head"):
     """
     content = content or ""
     file_ext = os.path.splitext(fpath)[1]
-    how_s=["head", "tail","start","end","beginning", "stop",'last',"before"]
+    how_s = ["head", "tail", "start", "end", "beginning", "stop", "last", "before"]
     how = strcmp(how, how_s)[0]
     print(how)
-    add_where = 'head' if how in ["head", "start","beginning", "before"] else "tail"
+    add_where = "head" if how in ["head", "start", "beginning", "before"] else "tail"
     if "json" in file_ext.lower():
-        old_content=fload(fpath,kind='json') if os.path.exists(fpath) else {}
-        updated_content =  {**content,**old_content} if add_where=="head" else {**old_content, **content} if isinstance(content, dict) else old_content
-        fsave(fpath,updated_content)
+        old_content = fload(fpath, kind="json") if os.path.exists(fpath) else {}
+        updated_content = (
+            {**content, **old_content}
+            if add_where == "head"
+            else (
+                {**old_content, **content} if isinstance(content, dict) else old_content
+            )
+        )
+        fsave(fpath, updated_content)
     else:
         # Handle text file
         if os.path.exists(fpath):
@@ -2443,7 +2569,7 @@ def fupdate(fpath, content=None, how="head"):
         # Write new content at the top followed by old content
         with open(fpath, "w") as file:
-            if add_where=="head":
+            if add_where == "head":
                 file.write(content + "\n")
                 file.write(old_content)
             else:
@@ -2478,7 +2604,9 @@ def filter_kwargs(kws, valid_kwargs):
         }
     return kwargs_filtered
-str_space_speed='sapce cmp:parquet(0.56GB)<feather(1.14GB)<csv(6.55GB)<pkl=h5("26.09GB")\nsaving time: pkl=feather("13s")<parquet("35s")<h5("2m31s")<csv("58m")\nloading time: pkl("6.9s")<parquet("16.1s")=feather("15s")<h5("2m 53s")<csv(">>>30m")'
+str_space_speed = 'sapce cmp:parquet(0.56GB)<feather(1.14GB)<csv(6.55GB)<pkl=h5("26.09GB")\nsaving time: pkl=feather("13s")<parquet("35s")<h5("2m31s")<csv("58m")\nloading time: pkl("6.9s")<parquet("16.1s")=feather("15s")<h5("2m 53s")<csv(">>>30m")'
 def fsave(
     fpath,
@@ -2515,6 +2643,7 @@ def fsave(
     def save_docx(fpath, content, font_name, font_size, spacing):
         import docx
         if isinstance(content, str):
             content = content.split(". ")
         doc = docx.Document()
@@ -2543,6 +2672,7 @@ def fsave(
     def save_pdf(fpath, content, font_name, font_size):
         from fpdf import FPDF
         pdf = FPDF()
         pdf.add_page()
         # pdf.add_font('Arial','',r'/System/Library/Fonts/Supplemental/Arial.ttf',uni=True)
@@ -2555,7 +2685,7 @@ def fsave(
     def save_csv(fpath, data, **kwargs):
         # https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_csv.html
-        verbose=kwargs.pop("verbose",False)
+        verbose = kwargs.pop("verbose", False)
         if run_once_within():
             use_pd("to_csv", verbose=verbose)
         kwargs_csv = dict(
@@ -2586,7 +2716,7 @@ def fsave(
         df.to_csv(fpath, **kwargs_valid)
     def save_xlsx(fpath, data, **kwargs):
-        verbose=kwargs.pop("verbose",False)
+        verbose = kwargs.pop("verbose", False)
         sheet_name = kwargs.pop("sheet_name", "Sheet1")
         if run_once_within():
             use_pd("to_excel", verbose=verbose)
@@ -2595,9 +2725,21 @@ def fsave(
         else:
             # Remove non-relevant kwargs
             irrelevant_keys = [
-            "format", "usage", "cell", "width", "height", "height_max", "merge",
-            "shade", "comment", "link", "protect", "number_format", "conditional_format",
-            "index_default"]
+                "format",
+                "usage",
+                "cell",
+                "width",
+                "height",
+                "height_max",
+                "merge",
+                "shade",
+                "comment",
+                "link",
+                "protect",
+                "number_format",
+                "conditional_format",
+                "index_default",
+            ]
             for key in irrelevant_keys:
                 kwargs.pop(key, None)
@@ -2605,19 +2747,21 @@ def fsave(
             # Check if the file exists, then append the sheet, otherwise create a new file
             try:
                 # Use ExcelWriter with append mode if the file exists
-                with pd.ExcelWriter(fpath, engine='openpyxl', mode='a', if_sheet_exists='new') as writer:
+                with pd.ExcelWriter(
+                    fpath, engine="openpyxl", mode="a", if_sheet_exists="new"
+                ) as writer:
                     df.to_excel(writer, sheet_name=sheet_name, index=False, **kwargs)
             except FileNotFoundError:
                 # If file doesn't exist, create a new one
                 df.to_excel(fpath, sheet_name=sheet_name, index=False, **kwargs)
     def save_ipynb(fpath, data, **kwargs):
         # Split the content by code fences to distinguish between code and markdown
         import nbformat
         parts = data.split("```")
         cells = []
         for i, part in enumerate(parts):
             if i % 2 == 0:
                 # Even index: markdown content
@@ -2638,17 +2782,18 @@ def fsave(
     def save_json(fpath_fname, var_dict_or_df):
         import json
         def _convert_js(data):
             if isinstance(data, pd.DataFrame):
-                return data.to_dict(orient="list")
+                return data.to_dict(orient="list")
             elif isinstance(data, np.ndarray):
                 return data.tolist()
             elif isinstance(data, dict):
                 return {key: _convert_js(value) for key, value in data.items()}
-            return data
+            return data
         serializable_data = _convert_js(var_dict_or_df)
         # Save the serializable data to the JSON file
         with open(fpath_fname, "w") as f_json:
             json.dump(serializable_data, f_json, indent=4)
@@ -2660,11 +2805,13 @@ def fsave(
     def save_yaml(fpath, data, **kwargs):
         import yaml
         with open(fpath, "w") as file:
             yaml.dump(data, file, **kwargs)
     def save_xml(fpath, data):
         from lxml import etree
         root = etree.Element("root")
         if isinstance(data, dict):
             for key, val in data.items():
@@ -2675,24 +2822,37 @@ def fsave(
         tree = etree.ElementTree(root)
         tree.write(fpath, pretty_print=True, xml_declaration=True, encoding="UTF-8")
-    def save_parquet(fpath:str, data:pd.DataFrame, **kwargs):
-        engine = kwargs.pop("engine","auto") # auto先试pyarrow, 不行就转为fastparquet, {‘auto’, ‘pyarrow’, ‘fastparquet’}
-        compression=kwargs.pop("compression",None) # Use None for no compression. Supported options: ‘snappy’, ‘gzip’, ‘brotli’, ‘lz4’, ‘zstd’
+    def save_parquet(fpath: str, data: pd.DataFrame, **kwargs):
+        engine = kwargs.pop(
+            "engine", "auto"
+        )  # auto先试pyarrow, 不行就转为fastparquet, {‘auto’, ‘pyarrow’, ‘fastparquet’}
+        compression = kwargs.pop(
+            "compression", None
+        )  # Use None for no compression. Supported options: ‘snappy’, ‘gzip’, ‘brotli’, ‘lz4’, ‘zstd’
         try:
             # Attempt to save with "pyarrow" if engine is set to "auto"
-                data.to_parquet(fpath, engine=engine, compression=compression, **kwargs)
-                print(f"DataFrame successfully saved to {fpath} with engine '{engine}' and {compression} compression.")
+            data.to_parquet(fpath, engine=engine, compression=compression, **kwargs)
+            print(
+                f"DataFrame successfully saved to {fpath} with engine '{engine}' and {compression} compression."
+            )
         except Exception as e:
-            print(f"Error using with engine '{engine}' and {compression} compression: {e}")
+            print(
+                f"Error using with engine '{engine}' and {compression} compression: {e}"
+            )
             if "Sparse" in str(e):
                 try:
                     # Handle sparse data by converting columns to dense
                     print("Attempting to convert sparse columns to dense format...")
-                    data = data.apply(lambda x: x.sparse.to_dense() if pd.api.types.is_sparse(x) else x)
-                    save_parquet(fpath, data=data,**kwargs)
+                    data = data.apply(
+                        lambda x: (
+                            x.sparse.to_dense() if pd.api.types.is_sparse(x) else x
+                        )
+                    )
+                    save_parquet(fpath, data=data, **kwargs)
                 except Exception as last_e:
-                    print(f"After converted sparse columns to dense format, Error using with engine '{engine}' and {compression} compression: {last_e}")
+                    print(
+                        f"After converted sparse columns to dense format, Error using with engine '{engine}' and {compression} compression: {last_e}"
+                    )
     if kind is None:
         _, kind = os.path.splitext(fpath)
@@ -2739,92 +2899,95 @@ def fsave(
         save_yaml(fpath, content, **kwargs)
     elif kind == "ipynb":
         save_ipynb(fpath, content, **kwargs)
-    elif kind.lower() in ["parquet","pq","big","par"]:
-        verbose=kwargs.pop('verbose',False)
+    elif kind.lower() in ["parquet", "pq", "big", "par"]:
+        verbose = kwargs.pop("verbose", False)
         if verbose:
             print(str_space_speed)
             use_pd("to_parquet")
             return None
-        compression=kwargs.pop("compression",None) # Use None for no compression. Supported options: ‘snappy’, ‘gzip’, ‘brotli’, ‘lz4’, ‘zstd’
+        compression = kwargs.pop(
+            "compression", None
+        )  # Use None for no compression. Supported options: ‘snappy’, ‘gzip’, ‘brotli’, ‘lz4’, ‘zstd’
         # fix the fpath ends
         _fpath, _ext = os.path.splitext(fpath)
-        fpath = _fpath+_ext.replace(kind, 'parquet')
+        fpath = _fpath + _ext.replace(kind, "parquet")
         if compression is not None:
             if not fpath.endswith(compression):
-                fpath=fpath+f".{compression}"
-        save_parquet(fpath=fpath, data=content,compression=compression,**kwargs)
-    elif kind.lower() in ["pkl","pk","pickle","pick"]:
-        # Pickle: Although not as efficient in terms of I/O speed and storage as Parquet or Feather,
-        # Pickle is convenient if you want to preserve exact Python object types.
-        verbose=kwargs.pop('verbose',False)
+                fpath = fpath + f".{compression}"
+        save_parquet(fpath=fpath, data=content, compression=compression, **kwargs)
+    elif kind.lower() in ["pkl", "pk", "pickle", "pick"]:
+        # Pickle: Although not as efficient in terms of I/O speed and storage as Parquet or Feather,
+        # Pickle is convenient if you want to preserve exact Python object types.
+        verbose = kwargs.pop("verbose", False)
         if verbose:
             print(str_space_speed)
             use_pd("to_pickle")
             return None
         _fpath, _ext = os.path.splitext(fpath)
-        fpath = _fpath+_ext.replace(kind, 'pkl')
-        compression=kwargs.pop("compression",None)
+        fpath = _fpath + _ext.replace(kind, "pkl")
+        compression = kwargs.pop("compression", None)
         if compression is not None:
             if not fpath.endswith(compression["method"]):
-                fpath=fpath+f".{compression["method"]}"
+                fpath = fpath + f".{compression['method']}"
         if isinstance(content, pd.DataFrame):
-            content.to_pickle(fpath,**kwargs)
+            content.to_pickle(fpath, **kwargs)
         else:
             try:
                 print("trying to convert it as a DataFrame...")
-                content=pd.DataFrame(content)
-                content.to_pickle(fpath,**kwargs)
+                content = pd.DataFrame(content)
+                content.to_pickle(fpath, **kwargs)
             except Exception as e:
                 raise ValueError(
-                        f"content is not a DataFrame, cannot be saved as a 'pkl' format: {e}"
-                    )
-    elif kind.lower() in ["fea",'feather','ft','fe','feat','fether']:
-        # Feather: The Feather format, based on Apache Arrow, is designed for fast I/O operations. It's
-        # optimized for data analytics tasks and is especially fast when working with Pandas.
-        verbose=kwargs.pop('verbose',False)
+                    f"content is not a DataFrame, cannot be saved as a 'pkl' format: {e}"
+                )
+    elif kind.lower() in ["fea", "feather", "ft", "fe", "feat", "fether"]:
+        # Feather: The Feather format, based on Apache Arrow, is designed for fast I/O operations. It's
+        # optimized for data analytics tasks and is especially fast when working with Pandas.
+        verbose = kwargs.pop("verbose", False)
         if verbose:
             print(str_space_speed)
             use_pd("to_feather")
             return None
         _fpath, _ext = os.path.splitext(fpath)
-        fpath = _fpath+_ext.replace(kind, 'feather')
+        fpath = _fpath + _ext.replace(kind, "feather")
         if isinstance(content, pd.DataFrame):
-            content.to_feather(fpath,**kwargs)
+            content.to_feather(fpath, **kwargs)
         else:
             try:
                 print("trying to convert it as a DataFrame...")
-                content=pd.DataFrame(content)
+                content = pd.DataFrame(content)
                 content.to_feather(fpath, **kwargs)
             except Exception as e:
                 raise ValueError(
-                        f"content is not a DataFrame, cannot be saved as a 'pkl' format: {e}"
-                    )
-    elif kind.lower() in ["hd",'hdf','h','h5']:
+                    f"content is not a DataFrame, cannot be saved as a 'pkl' format: {e}"
+                )
+    elif kind.lower() in ["hd", "hdf", "h", "h5"]:
         # particularly useful for large datasets and can handle complex data structures
-        verbose=kwargs.pop('verbose',False)
+        verbose = kwargs.pop("verbose", False)
         if verbose:
             print(str_space_speed)
             use_pd("to_hdf")
         _fpath, _ext = os.path.splitext(fpath)
-        fpath = _fpath+_ext.replace(kind, 'h5')
-        compression=kwargs.pop("compression",None)
+        fpath = _fpath + _ext.replace(kind, "h5")
+        compression = kwargs.pop("compression", None)
         if compression is not None:
             if not fpath.endswith(compression):
-                fpath=fpath+f".{compression}"
+                fpath = fpath + f".{compression}"
         if isinstance(content, pd.DataFrame):
-            content.to_hdf(fpath,key='content',**kwargs)
+            content.to_hdf(fpath, key="content", **kwargs)
         else:
             try:
                 print("trying to convert it as a DataFrame...")
-                content=pd.DataFrame(content)
-                content.to_hdf(fpath,**kwargs)
+                content = pd.DataFrame(content)
+                content.to_hdf(fpath, **kwargs)
             except Exception as e:
                 raise ValueError(
-                        f"content is not a DataFrame, cannot be saved as a 'pkl' format: {e}"
-                    )
+                    f"content is not a DataFrame, cannot be saved as a 'pkl' format: {e}"
+                )
     else:
         from . import netfinder
         try:
             netfinder.downloader(url=content, dir_save=dirname(fpath), kind=kind)
         except:
@@ -2948,6 +3111,7 @@ def isa(content, kind):
         return is_str_color(content)
     elif "html" in kind.lower():
         import re
         if content is None or not isinstance(content, str):
             return False
         # Remove leading and trailing whitespace
@@ -2997,8 +3161,8 @@ def listdir(
     verbose=True,
 ):
     if kind is None:
-        ls=os.listdir(rootdir)
-        ls = [f for f in ls if not f.startswith('.') and not f.startswith('~')]
+        ls = os.listdir(rootdir)
+        ls = [f for f in ls if not f.startswith(".") and not f.startswith("~")]
         print(ls)
         df_all = pd.DataFrame(
             {
@@ -3029,7 +3193,7 @@ def listdir(
     if os.path.isdir(rootdir):
         ls = os.listdir(rootdir)
-        ls = [f for f in ls if not f.startswith('.') and not f.startswith('~')]
+        ls = [f for f in ls if not f.startswith(".") and not f.startswith("~")]
         fd = [".fd", ".fld", ".fol", ".fd", ".folder"]
         i = 0
         f = {
@@ -3108,6 +3272,7 @@ def listdir(
         return f
     else:
         from box import Box
         if "l" in orient.lower():  # list # default
             res_output = Box(f.to_dict(orient="list"))
             return res_output
@@ -3151,7 +3316,7 @@ def mkdir_nest(fpath: str) -> str:
     # Split the full path into directories
     f_slash = "/" if "mac" in get_os().lower() else "\\"
     if os.path.isdir(fpath):
-        fpath =fpath+f_slash if not fpath.endswith(f_slash) else fpath
+        fpath = fpath + f_slash if not fpath.endswith(f_slash) else fpath
         return fpath
     dir_parts = fpath.split(f_slash)  # Split the path by the OS-specific separator
@@ -3181,27 +3346,27 @@ def mkdir(pardir: str = None, chdir: str | list = None, overwrite=False):
     - str: The path of the created directory or an error message.
     """
-    rootdir = []
+    rootdir = []
     if chdir is None:
         return mkdir_nest(pardir)
     if isinstance(chdir, str):
-        chdir = [chdir]
+        chdir = [chdir]
     chdir = list(set(chdir))
     if isinstance(pardir, str):  # Dir_parents should be 'str' type
-        pardir = os.path.normpath(pardir)
+        pardir = os.path.normpath(pardir)
     if "mac" in get_os().lower() or "lin" in get_os().lower():
         stype = "/"
     elif "win" in get_os().lower():
         stype = "\\"
     else:
         stype = "/"
     if os.path.isdir(pardir):
         os.chdir(pardir)  # Set current path
         # Check if subdirectories are not empty
         if chdir:
-            chdir.sort()
-            for folder in chdir:
+            chdir.sort()
+            for folder in chdir:
                 child_tmp = os.path.join(pardir, folder)
                 if not os.path.isdir(child_tmp):
                     os.mkdir("./" + folder)
@@ -3221,7 +3386,7 @@ def mkdir(pardir: str = None, chdir: str | list = None, overwrite=False):
     # Dir is the main output, if only one dir, then str type is inconvenient
     if len(rootdir) == 1:
         rootdir = rootdir[0]
-    rootdir=rootdir+stype if not rootdir.endswith(stype) else rootdir
+    rootdir = rootdir + stype if not rootdir.endswith(stype) else rootdir
     return rootdir
@@ -3236,6 +3401,7 @@ def split_path(fpath):
 def figsave(*args, dpi=300):
     import matplotlib.pyplot as plt
     from PIL import Image
     dir_save = None
     fname = None
     img = None
@@ -3250,7 +3416,7 @@ def figsave(*args, dpi=300):
             img = arg  # Store the PIL image if provided
     if dir_save is None:
-        dir_save="./"
+        dir_save = "./"
     # dir_save=dir_save+f_slash if not dir_save.endswith(f_slash) else dir_save
     dir_par = f_slash.join(dir_save.split(f_slash)[:-1])
@@ -3343,8 +3509,9 @@ def figsave(*args, dpi=300):
 def is_str_color(s):
     # Regular expression pattern for hexadecimal color codes
-    if isinstance(s,str):
+    if isinstance(s, str):
         import re
         color_code_pattern = r"^#([A-Fa-f0-9]{6}|[A-Fa-f0-9]{8})$"
         return re.match(color_code_pattern, s) is not None
     else:
@@ -3372,6 +3539,7 @@ def isnum(s):
 def is_image(fpath):
     import mimetypes
     mime_type, _ = mimetypes.guess_type(fpath)
     if mime_type and mime_type.startswith("image"):
         return True
@@ -3381,6 +3549,7 @@ def is_image(fpath):
 def is_document(fpath):
     import mimetypes
     mime_type, _ = mimetypes.guess_type(fpath)
     if mime_type and (
         mime_type.startswith("text/")
@@ -3402,6 +3571,7 @@ def is_document(fpath):
 def is_zip(fpath):
     import mimetypes
     mime_type, _ = mimetypes.guess_type(fpath)
     if mime_type == "application/zip":
         return True
@@ -3411,6 +3581,7 @@ def is_zip(fpath):
 def adjust_spines(ax=None, spines=["left", "bottom"], distance=2):
     import matplotlib.pyplot as plt
     if ax is None:
         ax = plt.gca()
     for loc, spine in ax.spines.items():
@@ -3500,6 +3671,7 @@ def apply_filter(img, *args):
         PIL.Image: The filtered image.
     """
     from PIL import ImageFilter
     def correct_filter_name(filter_name):
         if "bl" in filter_name.lower() and "box" not in filter_name.lower():
             return "BLUR"
@@ -3742,7 +3914,8 @@ def imgsets(img, **kwargs):
         return {"brightness": avg_brightness_factor, "contrast": avg_contrast_factor}
     import matplotlib.pyplot as plt
-    from PIL import ImageEnhance,ImageOps
+    from PIL import ImageEnhance, ImageOps
     # Load image if input is a file path
     if isinstance(img, str):
         img = load_img(img)
@@ -3807,6 +3980,7 @@ def imgsets(img, **kwargs):
             img_update = ImageOps.pad(img_update, size=value)
         elif "rem" in k.lower() or "rm" in k.lower() or "back" in k.lower():
             from rembg import remove, new_session
             if isinstance(value, bool):
                 session = new_session("isnet-general-use")
                 img_update = remove(img_update, session=session)
@@ -3846,6 +4020,7 @@ def imgsets(img, **kwargs):
                     img_update = remove(img_update)
         elif "bg" in k.lower() and "color" in k.lower():
             from rembg import remove
             if isinstance(value, list):
                 value = tuple(value)
             if isinstance(value, tuple):  # replace the background color
@@ -3879,6 +4054,7 @@ def thumbnail(dir_img_list, figsize=(10, 10), dpi=100, dir_save=None, kind=".png
     """
     import matplotlib.pyplot as plt
     from PIL import Image
     num_images = len(dir_img_list)
     if not kind.startswith("."):
         kind = "." + kind
@@ -3917,12 +4093,11 @@ def thumbnail(dir_img_list, figsize=(10, 10), dpi=100, dir_save=None, kind=".png
 # thumbnail(listdir(fpath,'png').fpath.to_list(),dir_save=dirname(fpath))
 # search and fine the director of the libary, which installed at local
 def dir_lib(lib_oi):
     """
     # example usage:
-    # dir_lib("seaborn")
+    # dir_lib("seaborn")
     """
     import site
@@ -3941,6 +4116,7 @@ def dir_lib(lib_oi):
         print(f"Cannot find the {lib_oi} in site-packages directory.")
     return dir_list
 class FileInfo:
     def __init__(
         self,
@@ -4018,6 +4194,7 @@ class FileInfo:
 def finfo(fpath):
     import time
     fname, fmt = os.path.splitext(fpath)
     dir_par = os.path.dirname(fpath) + "/"
     data = {
@@ -4033,6 +4210,7 @@ def finfo(fpath):
     extra_info = {}
     if data["kind"] == ".pdf":
         from pdf2image import pdfinfo_from_path
         extra_info = pdfinfo_from_path(fpath)
     return FileInfo(
@@ -4047,6 +4225,7 @@ def finfo(fpath):
         extra_info=extra_info,
     )
 # ! format excel file
 def hex2argb(hex_color):
     """
@@ -4078,7 +4257,10 @@ def hex2argb(hex_color):
             return hex_color[-9:]
         else:
             return "F" * (9 - len(hex_color)) + hex_color
-    raise ValueError("Invalid hex color format. Use RRGGBB, #RRGGBB, or aARRGGBB format.")
+    raise ValueError(
+        "Invalid hex color format. Use RRGGBB, #RRGGBB, or aARRGGBB format."
+    )
 def format_excel(
     df=None,
@@ -4137,7 +4319,15 @@ def format_excel(
                 font_bold = False
                 font_strike = False
                 font_italic = False
-                kws_font = ["name","size","bold","underline","color","strike","italic"]
+                kws_font = [
+                    "name",
+                    "size",
+                    "bold",
+                    "underline",
+                    "color",
+                    "strike",
+                    "italic",
+                ]
                 for k_, v_ in cell.get(K, {}).items():
                     if strcmp(k_, kws_font)[0] == "name":
                         font_name = v_
@@ -4167,9 +4357,31 @@ def format_excel(
             if strcmp(K, kws_cell)[0] == "fill":
                 #! fill
                 kws_fill = ["start_color", "end_color", "fill_type", "color"]
-                kws_fill_type = ["darkVertical","lightDown","lightGrid","solid","darkDown","lightGray","lightUp","gray0625","lightVertical","lightHorizontal",
-                                "darkHorizontal","gray125","darkUp","mediumGray","darkTrellis","darkGray","lightTrellis","darkGrid"]
-                start_color, end_color, fill_type = "FFFFFF", "FFFFFF", "solid"  # default
+                kws_fill_type = [
+                    "darkVertical",
+                    "lightDown",
+                    "lightGrid",
+                    "solid",
+                    "darkDown",
+                    "lightGray",
+                    "lightUp",
+                    "gray0625",
+                    "lightVertical",
+                    "lightHorizontal",
+                    "darkHorizontal",
+                    "gray125",
+                    "darkUp",
+                    "mediumGray",
+                    "darkTrellis",
+                    "darkGray",
+                    "lightTrellis",
+                    "darkGrid",
+                ]
+                start_color, end_color, fill_type = (
+                    "FFFFFF",
+                    "FFFFFF",
+                    "solid",
+                )  # default
                 for k, v in cell.get(K, {}).items():
                     if strcmp(k, kws_fill)[0] == "color":
                         start_color, end_color = hex2argb(v), hex2argb(v)
@@ -4241,27 +4453,78 @@ def format_excel(
             if strcmp(K, kws_cell)[0] == "border":
                 #! border
-                kws_border = ["color_left","color_l","color_right","color_r","color_top","color_t","color_bottom","color_b",
-                    "color_diagonal","color_d","color_outline","color_o","color_vertical","color_v","color_horizontal",
-                    "color_h","color","style_left","style_l","style_right","style_r","style_top","style_t","style_bottom","style_b",
-                    "style_diagonal","style_d","style_outline","style_o","style_vertical","style_v","style_horizontal",
-                    "style_h","style"]
+                kws_border = [
+                    "color_left",
+                    "color_l",
+                    "color_right",
+                    "color_r",
+                    "color_top",
+                    "color_t",
+                    "color_bottom",
+                    "color_b",
+                    "color_diagonal",
+                    "color_d",
+                    "color_outline",
+                    "color_o",
+                    "color_vertical",
+                    "color_v",
+                    "color_horizontal",
+                    "color_h",
+                    "color",
+                    "style_left",
+                    "style_l",
+                    "style_right",
+                    "style_r",
+                    "style_top",
+                    "style_t",
+                    "style_bottom",
+                    "style_b",
+                    "style_diagonal",
+                    "style_d",
+                    "style_outline",
+                    "style_o",
+                    "style_vertical",
+                    "style_v",
+                    "style_horizontal",
+                    "style_h",
+                    "style",
+                ]
                 # * border color
-                border_color_l, border_color_r, border_color_t, border_color_b = ("FF000000","FF000000","FF000000","FF000000")
-                border_color_d, border_color_o, border_color_v, border_color_h = ("FF000000","FF000000","FF000000","FF000000")
+                border_color_l, border_color_r, border_color_t, border_color_b = (
+                    "FF000000",
+                    "FF000000",
+                    "FF000000",
+                    "FF000000",
+                )
+                border_color_d, border_color_o, border_color_v, border_color_h = (
+                    "FF000000",
+                    "FF000000",
+                    "FF000000",
+                    "FF000000",
+                )
                 # get colors config
                 for k, v in cell.get(K, {}).items():
                     if strcmp(k, kws_border)[0] in ["color"]:
                         border_color_all = hex2argb(v)
                         # 如果设置了color,表示其它的所有的都设置成为一样的
                         # 然后再才开始自己定义其它的color
-                        border_color_l, border_color_r, border_color_t, border_color_b = (
+                        (
+                            border_color_l,
+                            border_color_r,
+                            border_color_t,
+                            border_color_b,
+                        ) = (
                             border_color_all,
                             border_color_all,
                             border_color_all,
                             border_color_all,
                         )
-                        border_color_d, border_color_o, border_color_v, border_color_h = (
+                        (
+                            border_color_d,
+                            border_color_o,
+                            border_color_v,
+                            border_color_h,
+                        ) = (
                             border_color_all,
                             border_color_all,
                             border_color_all,
@@ -4284,10 +4547,31 @@ def format_excel(
                     elif strcmp(k, kws_border)[0] in ["color_horizontal", "color_h"]:
                         border_color_h = hex2argb(v)
                 # *border style
-                border_styles = ["thin","medium","thick","dotted","dashed",
-                    "hair","mediumDashed","dashDot","dashDotDot","slantDashDot","none"]
-                border_style_l, border_style_r, border_style_t, border_style_b = (None,None,None,None)
-                border_style_d, border_style_o, border_style_v, border_style_h = (None,None,None,None)
+                border_styles = [
+                    "thin",
+                    "medium",
+                    "thick",
+                    "dotted",
+                    "dashed",
+                    "hair",
+                    "mediumDashed",
+                    "dashDot",
+                    "dashDotDot",
+                    "slantDashDot",
+                    "none",
+                ]
+                border_style_l, border_style_r, border_style_t, border_style_b = (
+                    None,
+                    None,
+                    None,
+                    None,
+                )
+                border_style_d, border_style_o, border_style_v, border_style_h = (
+                    None,
+                    None,
+                    None,
+                    None,
+                )
                 # get styles config
                 for k, v in cell.get(K, {}).items():
                     # if not "style" in k:
@@ -4296,13 +4580,23 @@ def format_excel(
                         border_style_all = strcmp(v, border_styles)[0]
                         # 如果设置了style,表示其它的所有的都设置成为一样的
                         # 然后再才开始自己定义其它的style
-                        border_style_l, border_style_r, border_style_t, border_style_b = (
+                        (
+                            border_style_l,
+                            border_style_r,
+                            border_style_t,
+                            border_style_b,
+                        ) = (
                             border_style_all,
                             border_style_all,
                             border_style_all,
                             border_style_all,
                         )
-                        border_style_d, border_style_o, border_style_v, border_style_h = (
+                        (
+                            border_style_d,
+                            border_style_o,
+                            border_style_v,
+                            border_style_h,
+                        ) = (
                             border_style_all,
                             border_style_all,
                             border_style_all,
@@ -4348,6 +4642,7 @@ def format_excel(
                     cell_.alignment = cell_alignment
                 if border:
                     cell_.border = border
     if not isinstance(df, pd.DataFrame):
         try:
             print(f"is loading file {os.path.basename(df)}")
@@ -4697,6 +4992,7 @@ def preview(var):
     """Master function to preview formatted variables in Jupyter."""
     from bs4 import BeautifulSoup
     from IPython.display import display, HTML, Markdown
     if isinstance(var, str):
         if isa(var, "html"):
             display(HTML(var))  # Render as HTML
@@ -4714,6 +5010,7 @@ def preview(var):
     elif isinstance(var, list) or isinstance(var, dict):
         import json
         # Display JSON
         json_str = json.dumps(var, indent=4)
         display(Markdown(f"```json\n{json_str}\n```"))
@@ -4728,6 +5025,7 @@ def preview(var):
     elif isinstance(var, dict):
         import json
         # Handle dictionary formatting
         json_str = json.dumps(var, indent=4)
         display(Markdown(f"```json\n{json_str}\n```"))
@@ -4735,12 +5033,15 @@ def preview(var):
     else:
         # If the format is not recognized, print a message
         print("Format not recognized or unsupported.")
 # # Example usages:
 # preview("This is a plain text message.")
 # preview("# This is a Markdown header")
 # preview(pd.DataFrame({"Name": ["Alice", "Bob"], "Age": [25, 30]}))
 # preview({"key": "value", "numbers": [1, 2, 3]})
 def _df_outlier(
     data,
     columns=None,
@@ -4880,51 +5181,53 @@ def df_outlier(
     processed_data = pd.concat([_outlier_df_tmp, non_numeric_data], axis=1)
     processed_data = processed_data[col_names_org]
     return processed_data
 def df_extend(data: pd.DataFrame, column, axis=0, sep=None, prefix="col"):
     """
     Extend a DataFrame by the list elecments in the column.
     Parameters:
     ----------
     data : pd.DataFrame
         The input DataFrame to be extended.
     column : str
         The name of the column to be split.
     axis : int, optional
-        The axis along which to expand the DataFrame.
+        The axis along which to expand the DataFrame.
         - 0 (default): Expand the specified column into multiple rows.
         - 1: Expand the specified column into multiple columns.
     sep : str, optional
         The separator used to split the values in the specified column.
         Must be provided for the function to work correctly.
     """
-    data = data.copy()
+    data = data.copy()
     mask = data[column].str.contains(sep, na=False)
     data = data.copy()
     if mask.any():
-        data[column] = (
-            data[column]
-            .apply(lambda x: x.split(sep) if isinstance(x, str) else x)  # Only split if x is a string
-        )
+        data[column] = data[column].apply(
+            lambda x: x.split(sep) if isinstance(x, str) else x
+        )  # Only split if x is a string
         # Strip spaces from each item in the lists
-        data[column] = data[column].apply(lambda x: [item.strip() for item in x] if isinstance(x, list) else x)
+        data[column] = data[column].apply(
+            lambda x: [item.strip() for item in x] if isinstance(x, list) else x
+        )
     data = data.explode(column, ignore_index=True)
     return data
 # ! DataFrame
 def df_astype(
     data: pd.DataFrame,
     columns: Optional[Union[str, List[str]]] = None,
     astype: str = "datetime",
-    skip_row:Union[str,list]=None,
+    skip_row: Union[str, list] = None,
     fmt: Optional[str] = None,
     inplace: bool = True,
     errors: str = "coerce",  # Can be "ignore", "raise", or "coerce"
@@ -4982,7 +5285,8 @@ def df_astype(
         "second",
         "time",
         "week",
-        "date","day",
+        "date",
+        "day",
         "month",
         "year",
     ]
@@ -4990,18 +5294,18 @@ def df_astype(
     if not inplace:
         data = data.copy()
     if skip_row is not None:
-        data = data.drop(index=skip_row, errors='ignore')
+        data = data.drop(index=skip_row, errors="ignore")
     # If columns is None, apply to all columns
     if columns is None:
         columns = data.columns.tolist()
     # correct the astype input
-    if isinstance(astype,str):
+    if isinstance(astype, str):
         astype = strcmp(astype, astypes)[0]
         print(f"converting as type: {astype}")
-    elif isinstance(astype,dict):
+    elif isinstance(astype, dict):
         for col, dtype in astype.items():
-            dtype='date' if dtype=="day" else dtype
-            data["col"]=data["col"].adtype(strcmp(dtype, astypes)[0])
+            dtype = "date" if dtype == "day" else dtype
+            data["col"] = data["col"].adtype(strcmp(dtype, astypes)[0])
         return data if not inplace else None
     # Ensure columns is a list
@@ -5112,13 +5416,15 @@ def df_sort_values(df, column, by=None, ascending=True, inplace=True, **kwargs):
     if column not in data.columns:
         raise ValueError(f"Column '{column}' does not exist in the DataFrame.")
-    if isinstance(by, str) and 'count' in by.lower():
+    if isinstance(by, str) and "count" in by.lower():
         # Count occurrences of each value in the specified column
         value_counts = df[column].value_counts()
         # Determine the order based on counts
         count_ascending = kwargs.pop("count_ascending", ascending)
-        sorted_counts = value_counts.sort_values(ascending=count_ascending).index.tolist()
+        sorted_counts = value_counts.sort_values(
+            ascending=count_ascending
+        ).index.tolist()
         # Convert to a categorical type with the new order
         df[column] = pd.Categorical(df[column], categories=sorted_counts, ordered=True)
@@ -5236,6 +5542,7 @@ def df_merge(
         )
     return df_merged
 def df_drop_duplicates(
     data: pd.DataFrame,
     by: Union[
@@ -5244,16 +5551,16 @@ def df_drop_duplicates(
     keep="first",  # Options: 'first', 'last', or False (drop all duplicates)
     ignore_index=True,
     inplace: bool = False,
-    verbose=True
+    verbose=True,
 ):
     """
     data (pd.DataFrame): DataFrame to drop duplicates from.
     by (str): Specify by to drop duplicates:
                  - 'index': Drop duplicates based on the DataFrame index.
                  - Column name(s) for row-wise duplicate checking.
-    keep (str): Which duplicates to keep:
-        'first',
-        'last',
+    keep (str): Which duplicates to keep:
+        'first',
+        'last',
         False (drop all duplicates).
     inplace (bool): Whether to modify the original DataFrame in place.
     """
@@ -5263,8 +5570,8 @@ def df_drop_duplicates(
         result = data[~data.index.duplicated(keep=keep)]
     else:
         # Drop duplicates row-wise based on column(s)
-        result = data.drop_duplicates(subset=by, keep=keep,ignore_index=ignore_index)
-    if original_shape!=result.shape or verbose:
+        result = data.drop_duplicates(subset=by, keep=keep, ignore_index=ignore_index)
+    if original_shape != result.shape or verbose:
         print(f"\nshape:{original_shape} (before drop_duplicates)")
         print(f"shape:{result.shape} (after drop_duplicates)")
     if inplace:
@@ -5274,16 +5581,18 @@ def df_drop_duplicates(
         return None
     else:
         return result
 #! fillna()
 def df_fillna(
     data: pd.DataFrame,
     method: str = "knn",
-    axis: int = 0,# column-wise
+    axis: int = 0,  # column-wise
     constant: float = None,
     n_neighbors: int = 5,  # KNN-specific
-    max_iter: int = 10, # Iterative methods specific
+    max_iter: int = 10,  # Iterative methods specific
     inplace: bool = False,
-    random_state:int = 1
+    random_state: int = 1,
 ) -> pd.DataFrame:
     """
     Fill missing values in a DataFrame using specified imputation method.
@@ -5299,11 +5608,11 @@ def df_fillna(
         - 'iterative': Use Iterative imputation; each feature with missing values as a function of other features and estimates them iteratively
         - 'mice' (Multivariate Imputation by Chained Equations): A special case of iterative imputation.
         # - 'missforest': A random forest-based imputation method. Uses a random forest model to predict and fill missing values
-        # - 'softimpute': Matrix factorization imputation.A matrix factorization technique where missing values are imputed by
+        # - 'softimpute': Matrix factorization imputation.A matrix factorization technique where missing values are imputed by
         #       reconstructing the data matrix using low-rank approximation
         # - EM (Expectation-Maximization): Often used in advanced statistics to estimate missing values in a probabilistic framework.
         # - 'svd': Use IterativeSVD (matrix factorization via Singular Value Decomposition).
     axis (int): The axis along which to impute:
         - 0: Impute column-wise (default).
         - 1: Impute row-wise.
@@ -5312,7 +5621,7 @@ def df_fillna(
     """
     if isinstance(data, pd.Series):
-        data=pd.DataFrame(data)
+        data = pd.DataFrame(data)
     # handle None
     for col in data.columns:
         data[col] = data[col].apply(lambda x: np.nan if x is None else x)
@@ -5322,13 +5631,19 @@ def df_fillna(
     # Separate numeric and non-numeric columns
     numeric_data = data.select_dtypes(include=[np.number])
     non_numeric_data = data.select_dtypes(exclude=[np.number])
     if data.empty:
         raise ValueError("Input DataFrame is empty.")
     # Validate method
-    methods = ["mean", "median", "most_frequent",
-               "constant", "knn", "iterative"]#,"missforest","softimpute","svd"]
+    methods = [
+        "mean",
+        "median",
+        "most_frequent",
+        "constant",
+        "knn",
+        "iterative",
+    ]  # ,"missforest","softimpute","svd"]
     method = strcmp(method, methods)[0]
     # If using constant method, ask for a constant value
@@ -5342,17 +5657,20 @@ def df_fillna(
     # Initialize SimpleImputer with the chosen method
     if method == "constant":
         from sklearn.impute import SimpleImputer
         imputer = SimpleImputer(strategy=method, fill_value=constant)
     elif method == "knn":
         from sklearn.impute import KNNImputer
         imputer = KNNImputer(n_neighbors=n_neighbors)
     elif method == "iterative" or method == "mice":
         from sklearn.experimental import enable_iterative_imputer
         from sklearn.impute import IterativeImputer
-        imputer = IterativeImputer(max_iter=max_iter, random_state=random_state)
-    else: # mean, median, most_frequent
+        imputer = IterativeImputer(max_iter=max_iter, random_state=random_state)
+    else:  # mean, median, most_frequent
         from sklearn.impute import SimpleImputer
         imputer = SimpleImputer(strategy=method)
     # Fit and transform the data
@@ -5376,23 +5694,29 @@ def df_fillna(
     # Handle non-numeric data imputation
     if not non_numeric_data.empty:
         from sklearn.impute import SimpleImputer
         if method == "constant":
-            non_numeric_imputer = SimpleImputer(strategy="constant", fill_value=constant)
+            non_numeric_imputer = SimpleImputer(
+                strategy="constant", fill_value=constant
+            )
         else:
             non_numeric_imputer = SimpleImputer(strategy="most_frequent")
         # Impute non-numeric columns column-wise (axis=0)
         imputed_non_numeric = non_numeric_imputer.fit_transform(non_numeric_data)
         # Convert imputed non-numeric array back to DataFrame with original index and column names
         imputed_non_numeric_df = pd.DataFrame(
-            imputed_non_numeric, index=non_numeric_data.index, columns=non_numeric_data.columns
+            imputed_non_numeric,
+            index=non_numeric_data.index,
+            columns=non_numeric_data.columns,
         )
     else:
         imputed_non_numeric_df = pd.DataFrame(index=data.index)
-    imputed_data = pd.concat([imputed_data, imputed_non_numeric_df], axis=1).reindex(columns=data.columns)
+    imputed_data = pd.concat([imputed_data, imputed_non_numeric_df], axis=1).reindex(
+        columns=data.columns
+    )
     if inplace:
         # Modify the original DataFrame
@@ -5401,6 +5725,8 @@ def df_fillna(
     else:
         # Return the modified DataFrame
         return imputed_data[col_names_org]
 # # example
 # data = {
 #     "A": [1, 2, np.nan, 4, 5],
@@ -5430,14 +5756,15 @@ def df_fillna(
 #     display(df)
 #     display(df_fillna(data=df, method=method_name, inplace=False, axis=0))
 def df_encoder(
     data: pd.DataFrame,
-    method: str = "dummy",#'dummy', 'onehot', 'ordinal', 'label', 'target', 'binary'
+    method: str = "dummy",  #'dummy', 'onehot', 'ordinal', 'label', 'target', 'binary'
     columns=None,
     target_column=None,  # Required for 'target' encoding method
-    **kwargs
+    **kwargs,
 ) -> pd.DataFrame:
-    """
+    """
     Methods explained:
     - 'dummy': pandas' `get_dummies` to create dummy variables for categorical columns, which is another form of one-hot encoding, but with a simpler interface.
@@ -5454,18 +5781,20 @@ def df_encoder(
     # Select categorical columns
     categorical_cols = data.select_dtypes(exclude=np.number).columns.tolist()
-    methods = ["dummy","onehot", "ordinal", "label", "target", "binary"]
+    methods = ["dummy", "onehot", "ordinal", "label", "target", "binary"]
     method = strcmp(method, methods)[0]
     if columns is None:
         columns = categorical_cols
     # pd.get_dummies()
-    if method=='dummy':
-        dtype=kwargs.pop("dtype",int)
-        drop_first=kwargs.pop("drop_first",True)
+    if method == "dummy":
+        dtype = kwargs.pop("dtype", int)
+        drop_first = kwargs.pop("drop_first", True)
         try:
-            encoded_df = pd.get_dummies(data[columns], drop_first=drop_first, dtype=dtype, **kwargs)
+            encoded_df = pd.get_dummies(
+                data[columns], drop_first=drop_first, dtype=dtype, **kwargs
+            )
             return pd.concat([data.drop(columns, axis=1), encoded_df], axis=1)
         except Exception as e:
             # print(f"Warning, 没有进行转换, 因为: {e}")
@@ -5518,8 +5847,9 @@ def df_encoder(
         encoded_data = encoder.fit_transform(data[columns])
         return pd.concat([data.drop(columns, axis=1), encoded_data], axis=1)
 def df_scaler(
-    data: pd.DataFrame, # should be numeric dtype
+    data: pd.DataFrame,  # should be numeric dtype
     method="standard",
     columns=None,  # default, select all numeric col/row
     inplace=False,
@@ -5603,6 +5933,8 @@ def df_scaler(
             scaled_df = data.copy()
             scaled_df.loc[numeric_rows.index] = scaled_data
             return scaled_df
 def df_special_characters_cleaner(
     data: pd.DataFrame, where=["column", "content", "index"]
 ) -> pd.DataFrame:
@@ -5628,6 +5960,8 @@ def df_special_characters_cleaner(
         data.index = data.index.str.replace(r"[^\w\s]", "_", regex=True)
     return data
 def df_cluster(
     data: pd.DataFrame,
     columns: Optional[list] = None,
@@ -5636,7 +5970,7 @@ def df_cluster(
     scale: bool = True,
     plot: Union[str, list] = "all",
     inplace: bool = True,
-    ax = None,
+    ax=None,
 ):
     from sklearn.preprocessing import StandardScaler
     from sklearn.cluster import KMeans
@@ -5952,24 +6286,23 @@ def df_reducer(
     umap_neighbors: int = 15,  # UMAP-specific
     umap_min_dist: float = 0.1,  # UMAP-specific
     tsne_perplexity: int = 30,  # t-SNE-specific
-    hue:str = None,# lda-specific
+    hue: str = None,  # lda-specific
     scale: bool = True,
     fill_missing: bool = True,
     debug: bool = False,
     inplace: bool = True,  # replace the oringinal data
-    plot_:bool = False,# plot scatterplot, but no 'hue',so it is meaningless
+    plot_: bool = False,  # plot scatterplot, but no 'hue',so it is meaningless
     random_state=1,
-    ax = None,
+    ax=None,
     figsize=None,
-    **kwargs
-) -> pd.DataFrame:
+    **kwargs,
+) -> pd.DataFrame:
     dict_methods = {
         #!Linear Dimensionality Reduction: For simplifying data with techniques that assume linearity.
         "pca": "pca(Principal Component Analysis): \n\tUseful for reducing dimensionality of continuous data while retaining variance. Advantage: Simplifies data, speeds up computation, reduces noise. Limitation: Assumes linear relationships, may lose interpretability in transformed dimensions.",
         "lda": "lda(Linear Discriminant Analysis):\n\tUseful for supervised dimensionality reduction when class separability is important. Advantage: Enhances separability between classes, can improve classification performance. Limitation: Assumes normal distribution and equal class covariances, linear boundaries only.",
         "factor": "factor(Factor Analysis):\n\tSuitable for datasets with observed and underlying latent variables. Advantage: Reveals hidden structure in correlated data, dimensionality reduction with interpretable factors. Limitation: Assumes factors are linear combinations, less effective for nonlinear data.",
         "svd": "svd(Singular Value Decomposition):\n\tSuitable for matrix decomposition, dimensionality reduction in tasks like topic modeling or image compression. Advantage: Efficient, preserves variance, useful in linear transformations. Limitation: Assumes linear relationships, sensitive to noise, may not capture non-linear structure.",
         #! Non-linear Dimensionality Reduction (Manifold Learning)
         "umap": "umap(Uniform Manifold Approximation and Projection):\n\tBest for high-dimensional data visualization (e.g., embeddings). Advantage: Captures complex structure while preserving both local and global data topology. Limitation: Non-deterministic results can vary, sensitive to parameter tuning.",
         "tsne": "tsne(t-Distributed Stochastic Neighbor Embedding):\n\tt-SNE excels at preserving local structure (i.e., clusters), but it often loses global. relationships, causing clusters to appear in arbitrary proximities to each other.  Ideal for clustering and visualizing high-dimensional data, especially for clear cluster separation. Advantage: Captures local relationships effectively. Limitation: Computationally intensive, does not preserve global structure well, requires parameter tuning.",
@@ -5977,28 +6310,40 @@ def df_reducer(
         "lle": "lle(Locally Linear Embedding):\n\tUseful for non-linear dimensionality reduction when local relationships are important (e.g., manifold learning). Advantage: Preserves local data structure, good for manifold-type data. Limitation: Sensitive to noise and number of neighbors, not effective for global structure.",
         "kpca": "kpca(Kernel Principal Component Analysis):\n\tGood for non-linear data with complex structure, enhancing separability. Advantage: Extends PCA to capture non-linear relationships. Limitation: Computationally expensive, sensitive to kernel and parameter choice, less interpretable.",
         "ica": "ica(Independent Component Analysis):\n\tEffective for blind source separation (e.g., EEG, audio signal processing).is generally categorized under Non-linear Dimensionality Reduction, but it also serves a distinct role in Blind Source Separation. While ICA is commonly used for dimensionality reduction, particularly in contexts where data sources need to be disentangled (e.g., separating mixed signals like EEG or audio data), it focuses on finding statistically independent components rather than maximizing variance (like PCA) or preserving distances (like MDS or UMAP). Advantage: Extracts independent signals/components, useful in mixed signal scenarios. Limitation: Assumes statistical independence, sensitive to noise and algorithm choice.",
         #! Anomaly Detection: Specialized for detecting outliers or unusual patterns
         "isolation_forest": "Isolation Forest:\n\tDesigned for anomaly detection, especially in high-dimensional data. Advantage: Effective in detecting outliers, efficient for large datasets. Limitation: Sensitive to contamination ratio parameter, not ideal for highly structured or non-anomalous data.",
     }
     from sklearn.preprocessing import StandardScaler
     from sklearn.impute import SimpleImputer
-    if plot_:
-        import matplotlib.pyplot as plt
+    if plot_:
+        import matplotlib.pyplot as plt
         import seaborn as sns
     # Check valid method input
-    methods=["pca", "umap","tsne","factor","isolation_forest","lda","kpca","ica","mds","lle","svd"]
-    method=strcmp(method, methods)[0]
+    methods = [
+        "pca",
+        "umap",
+        "tsne",
+        "factor",
+        "isolation_forest",
+        "lda",
+        "kpca",
+        "ica",
+        "mds",
+        "lle",
+        "svd",
+    ]
+    method = strcmp(method, methods)[0]
     print(f"\nprocessing with using {dict_methods[method]}:")
-    xlabel,ylabel=None,None
+    xlabel, ylabel = None, None
     if columns is None:
-        columns = data.select_dtypes(include='number').columns.tolist()
+        columns = data.select_dtypes(include="number").columns.tolist()
     if hue is None:
-        hue  = data.select_dtypes(exclude='number').columns.tolist()
+        hue = data.select_dtypes(exclude="number").columns.tolist()
     if isinstance(hue, list):
         print("Warning: hue is a list, only select the 1st one")
-        hue=hue[0]
+        hue = hue[0]
     if not hue:
         # Select columns if specified, else use all columns
         X = data[columns].values if columns else data.values
@@ -6018,11 +6363,12 @@ def df_reducer(
         X = scaler.fit_transform(X)
     # Apply PCA if selected
-    if method == "pca":
+    if method == "pca":
         from sklearn.decomposition import PCA
         pca = PCA(n_components=n_components)
         X_reduced = pca.fit_transform(X)
         # Additional PCA information
         explained_variance = pca.explained_variance_ratio_
         singular_values = pca.singular_values_
@@ -6038,56 +6384,72 @@ def df_reducer(
             # Plot explained variance
             cumulative_variance = np.cumsum(explained_variance)
             plt.figure(figsize=(8, 5))
-            plt.plot(range(1, len(cumulative_variance) + 1), cumulative_variance, marker="o")
+            plt.plot(
+                range(1, len(cumulative_variance) + 1), cumulative_variance, marker="o"
+            )
             plt.title("Cumulative Explained Variance by Principal Components")
             plt.xlabel("Number of Principal Components")
             plt.ylabel("Cumulative Explained Variance")
             plt.axhline(y=0.95, color="r", linestyle="--", label="Threshold (95%)")
-            plt.axvline(x=n_components, color="g", linestyle="--", label=f"n_components = {n_components}")
+            plt.axvline(
+                x=n_components,
+                color="g",
+                linestyle="--",
+                label=f"n_components = {n_components}",
+            )
             plt.legend()
             plt.grid()
             plt.show()
         # Prepare reduced DataFrame with additional PCA info
         pca_df = pd.DataFrame(
-            X_reduced, index=data.index,
-            columns=[f"PC_{i+1}" for i in range(n_components)]
-        )
+            X_reduced,
+            index=data.index,
+            columns=[f"PC_{i+1}" for i in range(n_components)],
+        )
         # pca_df["Explained Variance"] = np.tile(explained_variance[:n_components], (pca_df.shape[0], 1))
         # pca_df["Singular Values"] = np.tile(singular_values[:n_components], (pca_df.shape[0], 1))
         # Expand explained variance to multiple columns if needed
         for i in range(n_components):
-            pca_df[f"Explained Variance PC_{i+1}"] = np.tile(format(explained_variance[i]*100,".3f")+"%", (pca_df.shape[0], 1))
+            pca_df[f"Explained Variance PC_{i+1}"] = np.tile(
+                format(explained_variance[i] * 100, ".3f") + "%", (pca_df.shape[0], 1)
+            )
         for i in range(n_components):
-            pca_df[f"Singular Values PC_{i+1}"] = np.tile(singular_values[i], (pca_df.shape[0], 1))
+            pca_df[f"Singular Values PC_{i+1}"] = np.tile(
+                singular_values[i], (pca_df.shape[0], 1)
+            )
         if hue:
-            pca_df[hue]=y
-    elif method =='lda':
+            pca_df[hue] = y
+    elif method == "lda":
         from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
         if "hue" not in locals() or hue is None:
-            raise ValueError("LDA requires a 'hue' col parameter to specify class labels.")
+            raise ValueError(
+                "LDA requires a 'hue' col parameter to specify class labels."
+            )
         lda_reducer = LinearDiscriminantAnalysis(n_components=n_components)
         X_reduced = lda_reducer.fit_transform(X, y)
         # Prepare reduced DataFrame with additional LDA info
         lda_df = pd.DataFrame(
-            X_reduced, index=data.index,
-            columns=[f"LDA_{i+1}" for i in range(n_components)]
+            X_reduced,
+            index=data.index,
+            columns=[f"LDA_{i+1}" for i in range(n_components)],
         )
         if debug:
             print(f"LDA completed: Reduced to {n_components} components.")
             print("Class separability achieved by LDA.")
         if hue:
-            lda_df[hue]=y
+            lda_df[hue] = y
     # Apply UMAP if selected
     elif method == "umap":
         import umap
         umap_reducer = umap.UMAP(
             n_neighbors=umap_neighbors,
             min_dist=umap_min_dist,
-            n_components=n_components
+            n_components=n_components,
         )
         X_reduced = umap_reducer.fit_transform(X)
@@ -6102,45 +6464,57 @@ def df_reducer(
         # Prepare reduced DataFrame with additional UMAP info
         umap_df = pd.DataFrame(
-            X_reduced, index=data.index,
-            columns=[f"UMAP_{i+1}" for i in range(n_components)]
+            X_reduced,
+            index=data.index,
+            columns=[f"UMAP_{i+1}" for i in range(n_components)],
         )
         umap_df["Embedding"] = embedding[:, 0]  # Example of embedding data
         umap_df["Trustworthiness"] = trustworthiness[:, 0]  # Trustworthiness metric
         if hue:
-            umap_df[hue]=y
+            umap_df[hue] = y
     elif method == "tsne":
         from sklearn.manifold import TSNE
-        tsne = TSNE(n_components=n_components, perplexity=tsne_perplexity, random_state=random_state)
-        X_reduced = tsne.fit_transform(X)
+        tsne = TSNE(
+            n_components=n_components,
+            perplexity=tsne_perplexity,
+            random_state=random_state,
+        )
+        X_reduced = tsne.fit_transform(X)
         tsne_df = pd.DataFrame(
-            X_reduced,
+            X_reduced,
             index=data.index,
-            columns=[f"tSNE_{i+1}" for i in range(n_components)]
+            columns=[f"tSNE_{i+1}" for i in range(n_components)],
+        )
+        tsne_df["Perplexity"] = np.tile(
+            f"Perplexity: {tsne_perplexity}", (tsne_df.shape[0], 1)
         )
-        tsne_df["Perplexity"] = np.tile(f"Perplexity: {tsne_perplexity}", (tsne_df.shape[0], 1))
         if hue:
-            tsne_df[hue]=y
+            tsne_df[hue] = y
     # Apply Factor Analysis if selected
     elif method == "factor":
         from sklearn.decomposition import FactorAnalysis
         factor = FactorAnalysis(n_components=n_components, random_state=random_state)
         X_reduced = factor.fit_transform(X)
         # Factor Analysis does not directly provide explained variance, but we can approximate it
         fa_variance = factor.noise_variance_
         # Prepare reduced DataFrame with additional Factor Analysis info
         factor_df = pd.DataFrame(
-            X_reduced,
+            X_reduced,
             index=data.index,
-            columns=[f"Factor_{i+1}" for i in range(n_components)]
+            columns=[f"Factor_{i+1}" for i in range(n_components)],
+        )
+        factor_df["Noise Variance"] = np.tile(
+            format(np.mean(fa_variance) * 100, ".3f") + "%", (factor_df.shape[0], 1)
         )
-        factor_df["Noise Variance"] = np.tile(format(np.mean(fa_variance) * 100, ".3f") + "%", (factor_df.shape[0], 1))
         if hue:
-            factor_df[hue]=y
+            factor_df[hue] = y
     # Apply Isolation Forest for outlier detection if selected
     elif method == "isolation_forest":
         from sklearn.decomposition import PCA
         from sklearn.ensemble import IsolationForest
         # Step 1: Apply PCA for dimensionality reduction to 2 components
         pca = PCA(n_components=n_components)
         X_pca = pca.fit_transform(X)
@@ -6150,87 +6524,108 @@ def df_reducer(
         # Prepare reduced DataFrame with additional PCA info
         iso_forest_df = pd.DataFrame(
-            X_pca, index=data.index,
-            columns=[f"PC_{i+1}" for i in range(n_components)]
+            X_pca, index=data.index, columns=[f"PC_{i+1}" for i in range(n_components)]
         )
-        isolation_forest = IsolationForest(n_estimators=100, contamination='auto',random_state=1)
+        isolation_forest = IsolationForest(
+            n_estimators=100, contamination="auto", random_state=1
+        )
         isolation_forest.fit(X)
-        anomaly_scores = isolation_forest.decision_function(X)  # Anomaly score: larger is less anomalous
+        anomaly_scores = isolation_forest.decision_function(
+            X
+        )  # Anomaly score: larger is less anomalous
         # Predict labels: 1 (normal), -1 (anomaly)
-        anomaly_labels = isolation_forest.fit_predict(X)
+        anomaly_labels = isolation_forest.fit_predict(X)
         # Add anomaly scores and labels to the DataFrame
         iso_forest_df["Anomaly Score"] = anomaly_scores
         iso_forest_df["Anomaly Label"] = anomaly_labels
         # add info from pca
         for i in range(n_components):
-            iso_forest_df[f"Explained Variance PC_{i+1}"] = np.tile(format(explained_variance[i]*100,".3f")+"%", (iso_forest_df.shape[0], 1))
+            iso_forest_df[f"Explained Variance PC_{i+1}"] = np.tile(
+                format(explained_variance[i] * 100, ".3f") + "%",
+                (iso_forest_df.shape[0], 1),
+            )
         for i in range(n_components):
-            iso_forest_df[f"Singular Values PC_{i+1}"] = np.tile(singular_values[i], (iso_forest_df.shape[0], 1))
+            iso_forest_df[f"Singular Values PC_{i+1}"] = np.tile(
+                singular_values[i], (iso_forest_df.shape[0], 1)
+            )
         if hue:
-            iso_forest_df[hue]=y
-    #* Apply Kernel PCA if selected
+            iso_forest_df[hue] = y
+    # * Apply Kernel PCA if selected
     elif method == "kpca":
         from sklearn.decomposition import KernelPCA
-        kpca = KernelPCA(n_components=n_components, kernel="rbf", random_state=random_state)
+        kpca = KernelPCA(
+            n_components=n_components, kernel="rbf", random_state=random_state
+        )
         X_reduced = kpca.fit_transform(X)
         # Prepare reduced DataFrame with KPCA info
         kpca_df = pd.DataFrame(
-            X_reduced,
+            X_reduced,
             index=data.index,
-            columns=[f"KPCA_{i+1}" for i in range(n_components)]
+            columns=[f"KPCA_{i+1}" for i in range(n_components)],
         )
         if debug:
             print("Kernel PCA completed with RBF kernel.")
         if hue:
-            kpca_df[hue]=y
-    #* Apply ICA if selected
+            kpca_df[hue] = y
+    # * Apply ICA if selected
     elif method == "ica":
         from sklearn.decomposition import FastICA
         ica = FastICA(n_components=n_components, random_state=random_state)
         X_reduced = ica.fit_transform(X)
         # Prepare reduced DataFrame with ICA info
         ica_df = pd.DataFrame(
-            X_reduced, index=data.index,
-            columns=[f"ICA_{i+1}" for i in range(n_components)]
+            X_reduced,
+            index=data.index,
+            columns=[f"ICA_{i+1}" for i in range(n_components)],
         )
         if debug:
             print("Independent Component Analysis (ICA) completed.")
         if hue:
-            ica_df[hue]=y
-    #* Apply MDS if selected
+            ica_df[hue] = y
+    # * Apply MDS if selected
     elif method == "mds":
         from sklearn.manifold import MDS
         mds = MDS(n_components=n_components, random_state=random_state)
         X_reduced = mds.fit_transform(X)
         # Prepare reduced DataFrame with MDS info
         mds_df = pd.DataFrame(
-            X_reduced, index=data.index,
-            columns=[f"MDS_{i+1}" for i in range(n_components)]
+            X_reduced,
+            index=data.index,
+            columns=[f"MDS_{i+1}" for i in range(n_components)],
         )
         if debug:
             print("Multidimensional Scaling (MDS) completed.")
         if hue:
-            mds_df[hue]=y
-    #* Apply Locally Linear Embedding (LLE) if selected
+            mds_df[hue] = y
+    # * Apply Locally Linear Embedding (LLE) if selected
     elif method == "lle":
         from sklearn.manifold import LocallyLinearEmbedding
-        lle = LocallyLinearEmbedding(n_components=n_components, n_neighbors=umap_neighbors, random_state=random_state)
+        lle = LocallyLinearEmbedding(
+            n_components=n_components,
+            n_neighbors=umap_neighbors,
+            random_state=random_state,
+        )
         X_reduced = lle.fit_transform(X)
         # Prepare reduced DataFrame with LLE info
         lle_df = pd.DataFrame(
-            X_reduced, index=data.index,
-            columns=[f"LLE_{i+1}" for i in range(n_components)]
+            X_reduced,
+            index=data.index,
+            columns=[f"LLE_{i+1}" for i in range(n_components)],
         )
         if debug:
             print("Locally Linear Embedding (LLE) completed.")
         if hue:
-            lle_df[hue]=y
-    #* Apply Singular Value Decomposition (SVD) if selected
+            lle_df[hue] = y
+    # * Apply Singular Value Decomposition (SVD) if selected
     elif method == "svd":
         # Using NumPy's SVD for dimensionality reduction
         U, s, Vt = np.linalg.svd(X, full_matrices=False)
@@ -6238,11 +6633,12 @@ def df_reducer(
         # Prepare reduced DataFrame with SVD info
         svd_df = pd.DataFrame(
-            X_reduced, index=data.index,
-            columns=[f"SVD_{i+1}" for i in range(n_components)]
+            X_reduced,
+            index=data.index,
+            columns=[f"SVD_{i+1}" for i in range(n_components)],
         )
         if hue:
-            svd_df[hue]=y
+            svd_df[hue] = y
         if debug:
             print("Singular Value Decomposition (SVD) completed.")
@@ -6250,17 +6646,17 @@ def df_reducer(
     if method == "pca":
         reduced_df = pca_df
         colname_met = "PC_"
-        xlabel= f"PC_1 ({pca_df["Explained Variance PC_1"].tolist()[0]})"
-        ylabel= f"PC_2 ({pca_df["Explained Variance PC_2"].tolist()[0]})"
+        xlabel = f"PC_1 ({pca_df["Explained Variance PC_1"].tolist()[0]})"
+        ylabel = f"PC_2 ({pca_df["Explained Variance PC_2"].tolist()[0]})"
     elif method == "umap":
         reduced_df = umap_df
-        colname_met = "UMAP_"
+        colname_met = "UMAP_"
     elif method == "tsne":
         reduced_df = tsne_df
-        colname_met = "tSNE_"
+        colname_met = "tSNE_"
     elif method == "factor":
         reduced_df = factor_df
-        colname_met = "Factor_"
+        colname_met = "Factor_"
     elif method == "isolation_forest":
         reduced_df = iso_forest_df  # Already a DataFrame for outliers
         colname_met = "PC_"
@@ -6269,7 +6665,8 @@ def df_reducer(
                 data=iso_forest_df[iso_forest_df["Anomaly Label"] == 1],
                 x="PC_1",
                 y="PC_2",
-                label="normal", c="b",
+                label="normal",
+                c="b",
             )
             ax = sns.scatterplot(
                 ax=ax,
@@ -6277,73 +6674,86 @@ def df_reducer(
                 x="PC_1",
                 y="PC_2",
                 c="r",
-                label="outlier", marker="+", s=30,
+                label="outlier",
+                marker="+",
+                s=30,
             )
-    elif method=='lda':
-        reduced_df=lda_df
-        colname_met="LDA_"
-    elif method=="kpca":
-        reduced_df=kpca_df
-        colname_met="KPCA_"
-    elif method=="ica":
-        reduced_df=ica_df
-        colname_met="ICA_"
-    elif method=="mds":
-        reduced_df=mds_df
-        colname_met="MDS_"
-    elif method=="lle":
-        reduced_df=lle_df
-        colname_met="LLE_"
-    elif method=="svd":
-        reduced_df=svd_df
-        colname_met="SVD_"
+    elif method == "lda":
+        reduced_df = lda_df
+        colname_met = "LDA_"
+    elif method == "kpca":
+        reduced_df = kpca_df
+        colname_met = "KPCA_"
+    elif method == "ica":
+        reduced_df = ica_df
+        colname_met = "ICA_"
+    elif method == "mds":
+        reduced_df = mds_df
+        colname_met = "MDS_"
+    elif method == "lle":
+        reduced_df = lle_df
+        colname_met = "LLE_"
+    elif method == "svd":
+        reduced_df = svd_df
+        colname_met = "SVD_"
     # Quick plots
     if plot_ and (not method in ["isolation_forest"]):
         from .plot import plotxy
         if ax is None:
             if figsize is None:
-                _, ax = plt.subplots(figsize=cm2inch(8,8))
+                _, ax = plt.subplots(figsize=cm2inch(8, 8))
             else:
                 _, ax = plt.subplots(figsize=figsize)
         else:
-            ax=ax.cla()
-        ax=plotxy(data=reduced_df,
-                  x=colname_met+"1",
-                  y=colname_met+"2",
-                  hue=hue,
-                  s=1,
-                  edgecolor='none',
-                  kind='scater',
-                  figsets=dict(legend=dict(loc='best',markerscale=4),
-                               xlabel=xlabel if xlabel else None,
-                               ylabel=ylabel if ylabel else None),
-                  ax=ax,
-                  verbose=False,
-                  **kwargs
-                  )
+            ax = ax.cla()
+        ax = plotxy(
+            data=reduced_df,
+            x=colname_met + "1",
+            y=colname_met + "2",
+            hue=hue,
+            s=1,
+            edgecolor="none",
+            kind="scater",
+            figsets=dict(
+                legend=dict(loc="best", markerscale=4),
+                xlabel=xlabel if xlabel else None,
+                ylabel=ylabel if ylabel else None,
+            ),
+            ax=ax,
+            verbose=False,
+            **kwargs,
+        )
     if inplace:
         # If inplace=True, add components back into the original data
         for col_idx in range(n_components):
-            data.loc[:,f"{colname_met}{col_idx+1}"] = reduced_df.iloc[:, col_idx]
+            data.loc[:, f"{colname_met}{col_idx+1}"] = reduced_df.iloc[:, col_idx]
         # Add extra info for PCA/UMAP
         if method == "pca":
             for i in range(n_components):
-                data.loc[:,f"Explained Variance PC_{i+1}"] = reduced_df.loc[:,f"Explained Variance PC_{i+1}"]
+                data.loc[:, f"Explained Variance PC_{i+1}"] = reduced_df.loc[
+                    :, f"Explained Variance PC_{i+1}"
+                ]
             for i in range(n_components):
-                data.loc[:,f"Singular Values PC_{i+1}"] = reduced_df.loc[:,f"Singular Values PC_{i+1}"]
-        elif method == "umap":
+                data.loc[:, f"Singular Values PC_{i+1}"] = reduced_df.loc[
+                    :, f"Singular Values PC_{i+1}"
+                ]
+        elif method == "umap":
             for i in range(n_components):
-                data.loc[:,f"UMAP_{i+1}"]=reduced_df.loc[:,f"UMAP_{i+1}"]
-            data.loc[:,"Embedding"] = reduced_df.loc[:,"Embedding"]
-            data.loc[:,"Trustworthiness"] = reduced_df.loc[:,"Trustworthiness"]
+                data.loc[:, f"UMAP_{i+1}"] = reduced_df.loc[:, f"UMAP_{i+1}"]
+            data.loc[:, "Embedding"] = reduced_df.loc[:, "Embedding"]
+            data.loc[:, "Trustworthiness"] = reduced_df.loc[:, "Trustworthiness"]
         return None  # No return when inplace=True
-    return reduced_df
+    return reduced_df
 # example:
 # df_reducer(data=data_log, columns=markers, n_components=2)
 def plot_cluster(
     data: pd.DataFrame,
     labels: np.ndarray,
@@ -6368,6 +6778,7 @@ def plot_cluster(
     import seaborn as sns
     from sklearn.metrics import silhouette_samples
     import matplotlib.pyplot as plt
     if metrics is None:
         metrics = evaluate_cluster(data=data, labels=labels, true_labels=true_labels)
@@ -6597,10 +7008,10 @@ def use_pd(
     verbose=True,
     dir_json="/Users/macjianfeng/Dropbox/github/python/py2ls/py2ls/data/usages_pd.json",
 ):
-    default_settings = fload(dir_json, output='json')
+    default_settings = fload(dir_json, output="json")
     valid_kinds = list(default_settings.keys())
     kind = strcmp(func_name, valid_kinds)[0]
-    usage=default_settings[kind]
+    usage = default_settings[kind]
     if verbose:
         for i, i_ in enumerate(ssplit(usage, by=",")):
             i_ = i_.replace("=", "\t= ") + ","

py2ls 0.2.4.8__py3-none-any.whl → 0.2.4.9__py3-none-any.whl

py2ls 0.2.4.8py3-none-any.whl → 0.2.4.9py3-none-any.whl