PyPI - py2ls - Versions diffs - 0.2.4.1__py3-none-any.whl → 0.2.4.2__py3-none-any.whl - Mend

py2ls 0.2.4.1py3-none-any.whl → 0.2.4.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

py2ls/bio.py +272 -0
py2ls/data/usages_pd copy.json +1105 -0
py2ls/data/usages_pd.json +1413 -52
py2ls/fetch_update.py +45 -27
py2ls/ips.py +362 -99
py2ls/plot.py +71 -64
{py2ls-0.2.4.1.dist-info → py2ls-0.2.4.2.dist-info}/METADATA +1 -1
{py2ls-0.2.4.1.dist-info → py2ls-0.2.4.2.dist-info}/RECORD +9 -7
{py2ls-0.2.4.1.dist-info → py2ls-0.2.4.2.dist-info}/WHEEL +0 -0

py2ls/ips.py CHANGED Viewed

@@ -61,19 +61,31 @@ except NameError:
 def plt_font(dir_font: str = "/System/Library/Fonts/Hiragino Sans GB.ttc"):
     """
-    Add the Chinese font to the font manager
+    Add the Chinese (default) font to the font manager
     Args:
         dir_font (str, optional): _description_. Defaults to "/System/Library/Fonts/Hiragino Sans GB.ttc".
     """
     import matplotlib.pyplot as plt
     from matplotlib import font_manager
+    slashtype = "/" if 'mac' in get_os() else "\\"
+    if slashtype in dir_font:
+        font_manager.fontManager.addfont(dir_font)
+        fontname = os.path.basename(dir_font).split(".")[0]
+    else:
+        if "cn" in dir_font.lower() or "ch" in dir_font.lower():
+            fontname = "Hiragino Sans GB" # default Chinese font
+        else:
+            fontname = dir_font
-    font_manager.fontManager.addfont(dir_font)
-    fontname_chinese = os.path.basename(dir_font).split(".")[0]
-    plt.rcParams["font.sans-serif"] = [fontname_chinese]
-    plt.rcParams["font.family"] = "sans-serif"
+    plt.rcParams["font.sans-serif"] = [fontname]
+    # plt.rcParams["font.family"] = "sans-serif"
     plt.rcParams["axes.unicode_minus"] = False
+    fonts_in_system = font_manager.findSystemFonts(fontpaths=None, fontext="ttf")
+    fontname_in_system = [os.path.basename(i).split(".")[0] for i in fonts_in_system]
+    if fontname not in fontname_in_system:
+        print(f"Font '{fontname}' not found. Falling back to default.")
+        plt.rcParams["font.sans-serif"] = ["Arial"]
+    return fontname
 # set 'dir_save'
 if "dar" in sys.platform:
@@ -526,6 +538,7 @@ def strcmp(search_term, candidates, ignore_case=True, verbose=False, scorer="WR"
             if isinstance(s, str):
                 return s.lower()
             elif isinstance(s, list):
+                s=[str(i) for i in s]# convert all to str
                 return [elem.lower() for elem in s]
         return s
@@ -1697,24 +1710,16 @@ def fload(fpath, kind=None, **kwargs):
     def load_csv(fpath, **kwargs):
         from pandas.errors import EmptyDataError
-        engine = kwargs.get("engine", "pyarrow")
-        kwargs.pop("engine", None)
-        sep = kwargs.get("sep", "\t")
-        kwargs.pop("sep", None)
-        index_col = kwargs.get("index_col", None)
-        kwargs.pop("index_col", None)
-        memory_map = kwargs.get("memory_map", True)
-        kwargs.pop("memory_map", None)
-        skipinitialspace = kwargs.get("skipinitialspace", True)
-        kwargs.pop("skipinitialspace", None)
-        encoding = kwargs.get("encoding", "utf-8")
-        kwargs.pop("encoding", None)
-        on_bad_lines = kwargs.get("on_bad_lines", "skip")
-        kwargs.pop("on_bad_lines", None)
-        comment = kwargs.get("comment", None)
-        kwargs.pop("comment", None)
+        engine = kwargs.pop("engine", "pyarrow")
+        sep = kwargs.pop("sep", "\t")
+        index_col = kwargs.pop("index_col", None)
+        memory_map = kwargs.pop("memory_map", False)
+        skipinitialspace = kwargs.pop("skipinitialspace", False)
+        encoding = kwargs.pop("encoding", "utf-8")
+        on_bad_lines = kwargs.pop("on_bad_lines", "skip")
+        comment = kwargs.pop("comment", None)
         fmt=kwargs.pop("fmt",False)
+        verbose=kwargs.pop("verbose",False)
         if verbose:
             print_pd_usage("read_csv", verbose=verbose)
             return
@@ -1800,7 +1805,7 @@ def fload(fpath, kind=None, **kwargs):
                 separators = [",", "\t", ";", "|", " "]
                 for sep in separators:
                     sep2show = sep if sep != "\t" else "\\t"
-                    # print(f'trying with: engine=pyarrow, sep="{sep2show}"')
+                    print(f'trying with: engine=pyarrow, sep="{sep2show}"')
                     try:
                         df = pd.read_csv(
                             fpath,
@@ -1819,13 +1824,13 @@ def fload(fpath, kind=None, **kwargs):
                     except:
                         pass
                 else:
-                    engines = ["c", "python"]
+                    engines = [None,"c", "python"]
                     for engine in engines:
-                        # separators = [",", "\t", ";", "|", " "]
+                        separators = [",", "\t", ";", "|", " "]
                         for sep in separators:
                             try:
                                 sep2show = sep if sep != "\t" else "\\t"
-                                # print(f"trying with: engine={engine}, sep='{sep2show}'")
+                                print(f"trying with: engine={engine}, sep='{sep2show}'")
                                 df = pd.read_csv(
                                     fpath,
                                     engine=engine,
@@ -2031,6 +2036,9 @@ def fload(fpath, kind=None, **kwargs):
     elif kind.lower() in img_types:
         print(f'Image ".{kind}" is loaded.')
         return load_img(fpath)
+    elif kind=="gz" and fpath.endswith(".soft.gz"):
+        import GEOparse
+        return GEOparse.get_GEO(filepath=fpath)
     elif kind.lower() in zip_types:
         keep = kwargs.get("keep", False)
         fpath_unzip = unzip(fpath)
@@ -2105,30 +2113,51 @@ def fload(fpath, kind=None, **kwargs):
 # docx_content = fload('sample.docx')
-def fupdate(fpath, content=None):
+def fupdate(fpath, content=None, how="head"):
     """
     Update a file by adding new content at the top and moving the old content to the bottom.
+    If the file is a JSON file, merge the new content with the old content.
     Parameters
     ----------
     fpath : str
         The file path where the content should be updated.
-    content : str, optional
-        The new content to add at the top of the file. If not provided, the function will not add any new content.
+    content : str or dict, optional
+        The new content to add at the top of the file (for text) or merge (for JSON).
+        If not provided, the function will not add any new content.
     Notes
     -----
     - If the file at `fpath` does not exist, it will be created.
-    - The new content will be added at the top, followed by the old content of the file.
+    - For text files, the new content will be added at the top, followed by the old content.
+    - For JSON files, the new content will be merged with the existing JSON content.
     """
     content = content or ""
-    if os.path.exists(fpath):
-        with open(fpath, "r") as file:
-            old_content = file.read()
+    file_ext = os.path.splitext(fpath)[1]
+    how_s=["head", "tail","start","end","beginning", "stop",'last',"before"]
+    how = strcmp(how, how_s)[0]
+    print(how)
+    add_where = 'head' if how in ["head", "start","beginning", "before"] else "tail"
+    if "json" in file_ext.lower():
+        old_content=fload(fpath,kind='json') if os.path.exists(fpath) else {}
+        updated_content =  {**content,**old_content} if add_where=="head" else {**old_content, **content} if isinstance(content, dict) else old_content
+        fsave(fpath,updated_content)
     else:
-        old_content = ""
+        # Handle text file
+        if os.path.exists(fpath):
+            with open(fpath, "r") as file:
+                old_content = file.read()
+        else:
+            old_content = ""
-    with open(fpath, "w") as file:
-        file.write(content)
-        file.write(old_content)
+        # Write new content at the top followed by old content
+        with open(fpath, "w") as file:
+            if add_where=="head":
+                file.write(content + "\n")
+                file.write(old_content)
+            else:
+                file.write(old_content)
+                file.write(content + "\n")
 def fappend(fpath, content=None):
@@ -2710,12 +2739,14 @@ def mkdir_nest(fpath: str) -> str:
     Returns:
     - str: The path of the created directory.
     """
-    # Check if the directory already exists
-    if os.path.isdir(fpath):
-        return fpath
     # Split the full path into directories
     f_slash = "/" if "mac" in get_os().lower() else "\\"
+    if os.path.isdir(fpath):
+        fpath =fpath+f_slash if not fpath.endswith(f_slash) else fpath
+        print(fpath)
+        return fpath
     dir_parts = fpath.split(f_slash)  # Split the path by the OS-specific separator
     # Start creating directories from the root to the desired path
@@ -2744,34 +2775,27 @@ def mkdir(pardir: str = None, chdir: str | list = None, overwrite=False):
     - str: The path of the created directory or an error message.
     """
-    rootdir = []
-    # Convert string to list
+    rootdir = []
     if chdir is None:
         return mkdir_nest(pardir)
     if isinstance(chdir, str):
-        chdir = [chdir]
-    # Subfoldername should be unique
+        chdir = [chdir]
     chdir = list(set(chdir))
     if isinstance(pardir, str):  # Dir_parents should be 'str' type
-        pardir = os.path.normpath(pardir)
-    # Get the slash type: "/" or "\"
-    stype = "/" if "/" in pardir else "\\"
+        pardir = os.path.normpath(pardir)
     if "mac" in get_os().lower() or "lin" in get_os().lower():
         stype = "/"
     elif "win" in get_os().lower():
         stype = "\\"
     else:
         stype = "/"
-    # Check if the parent directory exists and is a directory path
     if os.path.isdir(pardir):
         os.chdir(pardir)  # Set current path
         # Check if subdirectories are not empty
         if chdir:
-            chdir.sort()
-            # Create multiple subdirectories at once
-            for folder in chdir:
-                # Check if the subfolder already exists
+            chdir.sort()
+            for folder in chdir:
                 child_tmp = os.path.join(pardir, folder)
                 if not os.path.isdir(child_tmp):
                     os.mkdir("./" + folder)
@@ -2791,6 +2815,8 @@ def mkdir(pardir: str = None, chdir: str | list = None, overwrite=False):
     # Dir is the main output, if only one dir, then str type is inconvenient
     if len(rootdir) == 1:
         rootdir = rootdir[0]
+    rootdir=rootdir+stype if not rootdir.endswith(stype) else rootdir
+    print(rootdir)
     return rootdir
@@ -2805,22 +2831,25 @@ def figsave(*args, dpi=300):
     dir_save = None
     fname = None
     img = None
+    f_slash = "/" if "mac" in get_os().lower() else "\\"
     for arg in args:
         if isinstance(arg, str):
-            if "/" in arg or "\\" in arg:
+            if f_slash in arg:
                 dir_save = arg
-            elif "/" not in arg and "\\" not in arg:
+            else:
                 fname = arg
         elif isinstance(arg, (Image.Image, np.ndarray)):
             img = arg  # Store the PIL image if provided
-    f_slash = "/" if "mac" in get_os().lower() else "\\"
     if dir_save is None:
         dir_save="./"
+    print(dir_save)
+    # dir_save=dir_save+f_slash if not dir_save.endswith(f_slash) else dir_save
     dir_par = f_slash.join(dir_save.split(f_slash)[:-1])
     dir_ch = "".join(dir_save.split(f_slash)[-1:])
     if not dir_par.endswith(f_slash):
         dir_par += f_slash
+    print(dir_par)
     if fname is None:
         fname = dir_ch
     mkdir(dir_par)
@@ -4418,9 +4447,10 @@ def preview(var):
 # ! DataFrame
 def df_astype(
-    df: pd.DataFrame,
+    data: pd.DataFrame,
     columns: Optional[Union[str, List[str]]] = None,
     astype: str = "datetime",
+    skip_row:Union[str,list]=None,
     fmt: Optional[str] = None,
     inplace: bool = True,
     errors: str = "coerce",  # Can be "ignore", "raise", or "coerce"
@@ -4484,22 +4514,24 @@ def df_astype(
     ]
     # If inplace is False, make a copy of the DataFrame
     if not inplace:
-        df = df.copy()
+        data = data.copy()
+    if skip_row is not None:
+        data = data.drop(index=skip_row, errors='ignore')
+    # If columns is None, apply to all columns
+    if columns is None:
+        columns = data.columns.tolist()
     # correct the astype input
     if isinstance(astype,str):
         astype = strcmp(astype, astypes)[0]
-        print(f"converting {columns} as type: {astype}")
+        print(f"converting as type: {astype}")
     elif isinstance(astype,dict):
         for col, dtype in astype.items():
             dtype='date' if dtype=="day" else dtype
-            df["col"]=df["col"].adtype(strcmp(dtype, astypes)[0])
-        return
-    # If columns is None, apply to all columns
-    if columns is None:
-        columns = df.columns
+            data["col"]=data["col"].adtype(strcmp(dtype, astypes)[0])
+        return data if not inplace else None
     # Ensure columns is a list
-    if isinstance(columns, (str, int)):
+    if isinstance(columns, str):
         columns = [columns]
     # Convert specified columns
@@ -4519,72 +4551,74 @@ def df_astype(
                 kwargs.pop("errors", None)
                 # convert it as type: datetime
                 if isinstance(column, int):
-                    df.iloc[:, column] = pd.to_datetime(
-                        df.iloc[:, column], format=fmt, errors=errors, **kwargs
+                    data.iloc[:, column] = pd.to_datetime(
+                        data.iloc[:, column], format=fmt, errors=errors, **kwargs
                     )
                     # further convert:
                     if astype == "time":
-                        df.iloc[:, column] = df.iloc[:, column].dt.time
+                        data.iloc[:, column] = data.iloc[:, column].dt.time
                     elif astype == "month":
-                        df.iloc[:, column] = df.iloc[:, column].dt.month
+                        data.iloc[:, column] = data.iloc[:, column].dt.month
                     elif astype == "year":
-                        df.iloc[:, column] = df.iloc[:, column].dt.year
+                        data.iloc[:, column] = data.iloc[:, column].dt.year
                     elif astype == "date" or astype == "day":
-                        df.iloc[:, column] = df.iloc[:, column].dt.date
+                        data.iloc[:, column] = data.iloc[:, column].dt.date
                     elif astype == "hour":
-                        df.iloc[:, column] = df.iloc[:, column].dt.hour
+                        data.iloc[:, column] = data.iloc[:, column].dt.hour
                     elif astype == "minute":
-                        df.iloc[:, column] = df.iloc[:, column].dt.minute
+                        data.iloc[:, column] = data.iloc[:, column].dt.minute
                     elif astype == "second":
-                        df.iloc[:, column] = df.iloc[:, column].dt.second
+                        data.iloc[:, column] = data.iloc[:, column].dt.second
                     elif astype == "week":
-                        df.iloc[:, column] = df.iloc[:, column].dt.day_name()
+                        data.iloc[:, column] = data.iloc[:, column].dt.day_name()
                 else:
-                    df[column] = (
+                    data[column] = (
                         pd.to_datetime(
-                            df[column], format=fmt, errors=errors, **kwargs
+                            data[column], format=fmt, errors=errors, **kwargs
                         )
                         if fmt
-                        else pd.to_datetime(df[column], errors=errors, **kwargs)
+                        else pd.to_datetime(data[column], errors=errors, **kwargs)
                     )
                     # further convert:
                     if astype == "time":
-                        df[column] = df[column].dt.time
+                        data[column] = data[column].dt.time
                     elif astype == "month":
-                        df[column] = df[column].dt.month
+                        data[column] = data[column].dt.month
                     elif astype == "year":
-                        df[column] = df[column].dt.year
+                        data[column] = data[column].dt.year
                     elif astype == "date":
-                        df[column] = df[column].dt.date
+                        data[column] = data[column].dt.date
                     elif astype == "hour":
-                        df[column] = df[column].dt.hour
+                        data[column] = data[column].dt.hour
                     elif astype == "minute":
-                        df[column] = df[column].dt.minute
+                        data[column] = data[column].dt.minute
                     elif astype == "second":
-                        df[column] = df[column].dt.second
+                        data[column] = data[column].dt.second
                     elif astype == "week":
-                        df[column] = df[column].dt.day_name()
+                        data[column] = data[column].dt.day_name()
             elif astype == "numeric":
                 kwargs.pop("errors", None)
-                df[column] = pd.to_numeric(df[column], errors=errors, **kwargs)
+                data[column] = pd.to_numeric(data[column], errors=errors, **kwargs)
                 # print(f"Successfully converted '{column}' to numeric.")
             elif astype == "timedelta":
                 kwargs.pop("errors", None)
-                df[column] = pd.to_timedelta(df[column], errors=errors, **kwargs)
+                data[column] = pd.to_timedelta(data[column], errors=errors, **kwargs)
                 # print(f"Successfully converted '{column}' to timedelta.")
             else:
                 # Convert to other types (e.g., float, int)
-                df[column] = df[column].astype(astype)
+                data[column] = data[column].astype(astype)
                 # print(f"Successfully converted '{column}' to {astype}.")
         except Exception as e:
             print(f"Error converting '{column}' to {astype}: {e}")
-    # Return the modified DataFrame if inplace is False
-    return df
+    try:
+        display(data.info()[:10])
+    except:
+        pass
+    return data
-# ! DataFrame
+# ! DataFrame
 def df_sort_values(df, column, by=None, ascending=True, inplace=True, **kwargs):
     """
     Sort a DataFrame by a specified column based on a custom order or by count.
@@ -4601,7 +4635,7 @@ def df_sort_values(df, column, by=None, ascending=True, inplace=True, **kwargs):
     Returns:
     - Sorted DataFrame if inplace is False, otherwise None.
     """
-    if column not in df.columns:
+    if column not in data.columns:
         raise ValueError(f"Column '{column}' does not exist in the DataFrame.")
     if isinstance(by, str) and 'count' in by.lower():
@@ -4624,11 +4658,11 @@ def df_sort_values(df, column, by=None, ascending=True, inplace=True, **kwargs):
     try:
         if inplace:  # replace the original
-            df.sort_values(column, ascending=ascending, inplace=True, **kwargs)
+            data.sort_values(column, ascending=ascending, inplace=True, **kwargs)
             print(f"Successfully sorted DataFrame by '{column}'")
             return None
         else:
-            sorted_df = df.sort_values(column, ascending=ascending, **kwargs)
+            sorted_df = data.sort_values(column, ascending=ascending, **kwargs)
             print(f"Successfully sorted DataFrame by '{column}' using custom order.")
             return sorted_df
     except Exception as e:
@@ -4636,7 +4670,6 @@ def df_sort_values(df, column, by=None, ascending=True, inplace=True, **kwargs):
         return df
 # # Example usage:
 # # Sample DataFrame
 # data = {
@@ -4667,6 +4700,236 @@ def df_sort_values(df, column, by=None, ascending=True, inplace=True, **kwargs):
 # display(df_month)
+def df_merge(
+    df1: pd.DataFrame,
+    df2: pd.DataFrame,
+    use_index: bool = True,
+    columns: list = ["col_left", "col_right"],
+    how: str = "left",
+) -> pd.DataFrame:
+    """
+    Merges two DataFrames based on either the index or shared columns with matching data types.
+    usage:
+        #(1) if the index are the same
+            df_merged = df_merge(df1, df2, use_index=True(defalut), how='outer')
+        #(2) if there are shaed columns, then based on shared columns
+            df_merged = df_merge(df1, df2, how='outer')
+        #(3) if columns: then based on the specific columns
+            df_merged = df_merge(df1, df2, columns=["col_left", "col_right"],how='outer')
+    Parameters:
+    - df1 (pd.DataFrame): The first DataFrame.
+    - df2 (pd.DataFrame): The second DataFrame.
+    - use_index (bool): If True, first try to merge by index if they are comparable; otherwise, fall back to column-based merge.
+    - how (str): Type of merge to perform: 'inner', 'outer', 'left', or 'right'. Default is 'inner'.
+    'inner': only the rows that have matching values in both DataFrames (intersection)
+    'outer': keeps all rows from both DataFrames and fills in missing values with NaN
+    'left': keeps all rows from the left DataFrame and matches rows from the right DataFrame
+    'right': keeps all rows from the right DataFrame and matches rows from the left DataFrame, filling with NaN if there is no match.
+    Returns:
+    - pd.DataFrame: The merged DataFrame.
+    """
+    # 1. Check if indices are comparable (same length and types)
+    if use_index or df1.index.equals(df2.index):
+        print(f"Merging based on index using '{how}' join...")
+        df_merged = pd.merge(df1, df2, left_index=True, right_index=True, how=how)
+        return df_merged
+    # 2. Find common columns with the same dtype
+    common_columns = df1.columns.intersection(df2.columns)
+    shared_columns = []
+    for col in common_columns:
+        if df1[col].dtype == df2[col].dtype:
+            shared_columns.append(col)
+    if not isinstance(columns, list):
+        columns = [columns]
+    if len(columns) != 2:
+        raise ValueError(
+            "'columns':list shoule be a list: columns=['col_left','col_right']"
+        )
+    if all(columns):
+        print(f"Merging based on columns: {columns} using '{how}' join...")
+        df_merged = pd.merge(df1, df2, left_on=columns[0], right_on=columns[1], how=how)
+    elif shared_columns:
+        print(
+            f"Merging based on shared columns: {shared_columns} using '{how}' join..."
+        )
+        df_merged = pd.merge(df1, df2, on=shared_columns, how=how)
+    else:
+        raise ValueError(
+            "No common columns with matching data types to merge on, and indices are not comparable."
+        )
+    return df_merged
+def df_fillna(
+    data: pd.DataFrame,
+    method: str = "mean",
+    axis: int = 0,# column-wise
+    constant: float = None,
+    inplace: bool = True,
+) -> pd.DataFrame:
+    """
+    Fill missing values in a DataFrame using specified imputation method.
+    Parameters:
+    data (pd.DataFrame): The DataFrame to fill missing values.
+    method (str): The imputation method to use. Options are:
+        - 'mean': Replace missing values with the mean of the column.
+        - 'median': Replace missing values with the median of the column.
+        - 'most_frequent': Replace missing values with the most frequent value in the column.
+        - 'constant': Replace missing values with a constant value provided by the `constant` parameter.
+        - 'knn': Use K-Nearest Neighbors imputation.
+        - 'iterative': Use Iterative imputation.
+    axis (int): The axis along which to impute:
+        - 0: Impute column-wise (default).
+        - 1: Impute row-wise.
+    constant (float, optional): Constant value to use for filling NaNs if method is 'constant'.
+    inplace (bool): If True, modify the original DataFrame. If False, return a new DataFrame.
+    """
+    if data.empty:
+        raise ValueError("Input DataFrame is empty.")
+    # Validate method
+    methods = ["mean", "median", "most_frequent", "constant", "knn", "iterative"]
+    method = strcmp(method, methods)[0]
+    # If using constant method, ask for a constant value
+    if constant is not None:
+        method = "constant"
+        try:
+            constant = float(constant)
+        except ValueError:
+            raise ValueError("Constant value must be a number.")
+    # Initialize SimpleImputer with the chosen method
+    if method == "constant":
+        imputer = SimpleImputer(strategy=method, fill_value=constant)
+    elif method == "knn":
+        from sklearn.impute import KNNImputer
+        imputer = KNNImputer(n_neighbors=n_neighbors)
+    elif method == "iterative":
+        from sklearn.impute import IterativeImputer
+        imputer = IterativeImputer(max_iter=max_iter)
+    else:
+        from sklearn.impute import SimpleImputer
+        imputer = SimpleImputer(strategy=method)
+    # Fit and transform the data
+    if axis == 0:
+        # Impute column-wise
+        imputed_data = imputer.fit_transform(data)
+        imputed_data.shape
+    elif axis == 1:
+        # Impute row-wise
+        imputed_data = imputer.fit_transform(data.T)
+        imputed_data.shape
+    else:
+        raise ValueError("Invalid axis. Use 0 for columns or 1 for rows.")
+    df_filled = pd.DataFrame(
+        imputed_data if axis == 0 else imputed_data.T,
+        index=data.index,# if axis == 0 else data.columns,
+        columns=data.columns,# if axis == 0 else data.index,
+    )
+    if inplace:
+        data.update(df_filled)
+        return None  # replace original
+    else:
+        return df_filled
+def df_scaler(
+    data: pd.DataFrame,
+    method="standard",
+    columns=None,  # default, select all numeric col/row
+    inplace=False,
+    verbose=False,  # show usage
+    axis=0,  # defalut column-wise
+    **kwargs,
+):
+    """
+    df_scaler(data, scaler="standard", inplace=False, axis=0, verbose=True)
+    Parameters:
+    - data: pandas DataFrame to be scaled.
+    - method: Scaler type ('standard', 'minmax', 'robust'). Default is 'standard'.
+    - columns: List of columns (for axis=0) or rows (for axis=1) to scale.
+               If None, all numeric columns/rows will be scaled.
+    - inplace: If True, modify the DataFrame in place. Otherwise, return a new DataFrame.
+    - axis: Axis along which to scale. 0 for column-wise, 1 for row-wise. Default is 0.
+    - verbose: If True, prints logs of the process.
+    - kwargs: Additional arguments to be passed to the scaler.
+    """
+    if verbose:
+        print('df_scaler(data, scaler="standard", inplace=False, axis=0, verbose=True)')
+    methods = ["standard", "minmax", "robust"]
+    method = strcmp(method, methods)[0]
+    if method == "standard":
+        from sklearn.preprocessing import StandardScaler
+        scaler = StandardScaler(**kwargs)
+    elif method == "minmax":
+        from sklearn.preprocessing import MinMaxScaler
+        scaler = MinMaxScaler(**kwargs)
+    elif method == "robust":
+        from sklearn.preprocessing import RobustScaler
+        scaler = RobustScaler(**kwargs)
+    if axis not in [0, 1]:
+        raise ValueError("Axis must be 0 (column-wise) or 1 (row-wise).")
+    if axis == 0:
+        # Column-wise scaling (default)
+        if columns is None:
+            columns = data.select_dtypes(include=["float64", "int64"]).columns.tolist()
+        non_numeric_columns = data.columns.difference(columns)
+        print(f"Scaling columns")
+        scaled_data = scaler.fit_transform(data[columns])
+        if inplace:
+            data[columns] = scaled_data
+            print("Original DataFrame modified in place (column-wise).")
+        else:
+            scaled_df = pd.concat(
+                [
+                    pd.DataFrame(scaled_data, columns=columns, index=data.index),
+                    data[non_numeric_columns],
+                ],
+                axis=1,
+            )
+            scaled_df = scaled_df[data.columns]  # Maintain column order
+            return scaled_df
+    elif axis == 1:
+        # Row-wise scaling
+        if columns is None:
+            columns = data.index.tolist()
+        numeric_rows = data.loc[columns].select_dtypes(include=["float64", "int64"])
+        if numeric_rows.empty:
+            raise ValueError("No numeric rows to scale.")
+        print(f"Scaling rows")
+        scaled_data = scaler.fit_transform(
+            numeric_rows.T
+        ).T  # Transpose for scaling and then back
+        if inplace:
+            data.loc[numeric_rows.index] = scaled_data
+            print("Original DataFrame modified in place (row-wise).")
+        else:
+            scaled_df = data.copy()
+            scaled_df.loc[numeric_rows.index] = scaled_data
+            return scaled_df
 def df_cluster(
     data: pd.DataFrame,
     columns: Optional[list] = None,
@@ -5387,4 +5650,4 @@ def print_pd_usage(
             i_ = i_.replace("=", "\t= ") + ","
             print(i_) if i == 0 else print("\t", i_)
     else:
-        print(usage)
+        print(usage)

py2ls 0.2.4.1__py3-none-any.whl → 0.2.4.2__py3-none-any.whl

py2ls 0.2.4.1py3-none-any.whl → 0.2.4.2py3-none-any.whl