PyPI - py2ls - Versions diffs - 0.1.10.26__py3-none-any.whl → 0.1.10.27__py3-none-any.whl - Mend

py2ls 0.1.10.26py3-none-any.whl → 0.1.10.27py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

py2ls/ips.py +388 -10
py2ls/plot.py +90 -0
{py2ls-0.1.10.26.dist-info → py2ls-0.1.10.27.dist-info}/METADATA +1 -1
{py2ls-0.1.10.26.dist-info → py2ls-0.1.10.27.dist-info}/RECORD +5 -5
{py2ls-0.1.10.26.dist-info → py2ls-0.1.10.27.dist-info}/WHEEL +0 -0

py2ls/ips.py CHANGED Viewed

@@ -46,7 +46,7 @@ from collections import Counter
 from fuzzywuzzy import fuzz, process
 from langdetect import detect
 from duckduckgo_search import DDGS
+from typing import List, Optional, Union
 from bs4 import BeautifulSoup
 from . import netfinder
@@ -1250,6 +1250,119 @@ def get_encoding(fpath, alternative_encodings=None, verbose=False):
     return None
+def unzip(dir_path, output_dir=None):
+    """
+    Unzips or extracts various compressed file formats (.gz, .zip, .7z, .tar, .bz2, .xz, .rar).
+    If the output directory already exists, it will be replaced.
+    Parameters:
+    dir_path (str): Path to the compressed file.
+    output_dir (str): Directory where the extracted files will be saved.
+                      If None, it extracts to the same directory as the file, with the same name.
+    Returns:
+    str: The path to the output directory where files are extracted.
+    """
+    # Set default output directory to the same as the input file
+    if output_dir is None:
+        output_dir = os.path.splitext(dir_path)[0]
+    # If the output directory already exists, remove it and replace it
+    if os.path.exists(output_dir):
+        if os.path.isdir(output_dir):  # check if it is a folder
+            shutil.rmtree(output_dir)  # remove folder
+        else:
+            os.remove(output_dir)  # remove file
+    # Handle .tar.gz files
+    if dir_path.endswith(".tar.gz"):
+        import tarfile
+        with tarfile.open(dir_path, "r:gz") as tar_ref:
+            tar_ref.extractall(output_dir)
+        return output_dir
+    # Handle .gz files
+    if dir_path.endswith(".gz"):
+        import gzip
+        output_file = os.path.splitext(dir_path)[0]  # remove the .gz extension
+        with gzip.open(dir_path, "rb") as gz_file:
+            with open(output_file, "wb") as out_file:
+                shutil.copyfileobj(gz_file, out_file)
+        return output_file
+    # Handle .zip files
+    elif dir_path.endswith(".zip"):
+        import zipfile
+        with zipfile.ZipFile(dir_path, "r") as zip_ref:
+            zip_ref.extractall(output_dir)
+        return output_dir
+    # Handle .7z files (requires py7zr)
+    elif dir_path.endswith(".7z"):
+        import py7zr
+        with py7zr.SevenZipFile(dir_path, mode="r") as z:
+            z.extractall(path=output_dir)
+        return output_dir
+    # Handle .tar files
+    elif dir_path.endswith(".tar"):
+        import tarfile
+        with tarfile.open(dir_path, "r") as tar_ref:
+            tar_ref.extractall(output_dir)
+        return output_dir
+    # Handle .tar.bz2 files
+    elif dir_path.endswith(".tar.bz2"):
+        import tarfile
+        with tarfile.open(dir_path, "r:bz2") as tar_ref:
+            tar_ref.extractall(output_dir)
+        return output_dir
+    # Handle .bz2 files
+    elif dir_path.endswith(".bz2"):
+        import bz2
+        output_file = os.path.splitext(dir_path)[0]  # remove the .bz2 extension
+        with bz2.open(dir_path, "rb") as bz_file:
+            with open(output_file, "wb") as out_file:
+                shutil.copyfileobj(bz_file, out_file)
+        return output_file
+    # Handle .xz files
+    elif dir_path.endswith(".xz"):
+        import lzma
+        output_file = os.path.splitext(dir_path)[0]  # remove the .xz extension
+        with lzma.open(dir_path, "rb") as xz_file:
+            with open(output_file, "wb") as out_file:
+                shutil.copyfileobj(xz_file, out_file)
+        return output_file
+    # Handle .rar files (requires rarfile)
+    elif dir_path.endswith(".rar"):
+        import rarfile
+        with rarfile.RarFile(dir_path) as rar_ref:
+            rar_ref.extractall(output_dir)
+        return output_dir
+    else:
+        raise ValueError(f"Unsupported file format: {os.path.splitext(dir_path)[1]}")
+# Example usage:
+# output_dir = unzip('data.tar.gz')
+# output_file = unzip('file.csv.gz')
+# output_dir_zip = unzip('archive.zip')
+# output_dir_7z = unzip('archive.7z')
 def fload(fpath, kind=None, **kwargs):
     """
     Load content from a file with specified file type.
@@ -1398,11 +1511,26 @@ def fload(fpath, kind=None, **kwargs):
         "pdf",
         "ipynb",
     ]
-    supported_types = [*doc_types, *img_types]
+    zip_types = ["gz", "zip", "7z", "tar", "tar.gz", "tar.bz2", "bz2", "xz", "rar"]
+    supported_types = [*doc_types, *img_types, *zip_types]
     if kind not in supported_types:
-        raise ValueError(
-            f"Error:\n{kind} is not in the supported list {supported_types}"
-        )
+        print(f'Error:\n"{kind}" is not in the supported list {supported_types}')
+    # if os.path.splitext(fpath)[1][1:].lower() in zip_types:
+    #     keep=kwargs.get("keep", False)
+    #     ifile=kwargs.get("ifile",(0,0))
+    #     kwargs.pop("keep",None)
+    #     kwargs.pop("ifile",None)
+    #     fpath_unzip=unzip(fpath)
+    #     if isinstance(fpath_unzip,list):
+    #         fpath_unzip=fpath_unzip[ifile[0]]
+    #     if os.path.isdir(fpath_unzip):
+    #         fpath_selected=listdir(fpath_unzip,kind=kind).fpath[ifile[1]]
+    #         fpath_unzip=fpath_selected
+    #     content_unzip=fload(fpath_unzip, **kwargs)
+    #     if not keep:
+    #         os.remove(fpath_unzip)
+    #     return content_unzip
     if kind == "docx":
         return load_docx(fpath)
     elif kind == "txt" or kind == "md":
@@ -1427,10 +1555,21 @@ def fload(fpath, kind=None, **kwargs):
     elif kind.lower() in img_types:
         print(f'Image ".{kind}" is loaded.')
         return load_img(fpath)
+    elif kind.lower() in zip_types:
+        keep = kwargs.get("keep", False)
+        fpath_unzip = unzip(fpath)
+        content_unzip = fload(fpath_unzip, **kwargs)
+        if not keep:
+            os.remove(fpath_unzip)
+        return content_unzip
     else:
-        raise ValueError(
-            f"Error:\n{kind} is not in the supported list {supported_types}"
-        )
+        try:
+            with open(fpath, "r") as f:
+                content = f.readlines()
+        except:
+            with open(fpath, "r") as f:
+                content = f.read()
+        return content
 # Example usage
@@ -2030,10 +2169,10 @@ def mkdir(*args, **kwargs):
         if isinstance(arg, (str, list)):
             if "/" in arg or "\\" in arg:
                 pardir = arg
-                print(f"pardir{pardir}")
+                print(f'pardir: "{pardir}"')
             else:
                 chdir = arg
-                print(f"chdir{chdir}")
+                print(f'chdir:"{chdir}"')
         elif isinstance(arg, bool):
             overwrite = arg
             print(overwrite)
@@ -2049,6 +2188,13 @@ def mkdir(*args, **kwargs):
         pardir = os.path.normpath(pardir)
     # Get the slash type: "/" or "\"
     stype = "/" if "/" in pardir else "\\"
+    if "mac" in get_os().lower() or "lin" in get_os().lower():
+        stype = "/"
+    elif "win" in get_os().lower():
+        stype = "\\"
+    else:
+        stype = "/"
     # Check if the parent directory exists and is a directory path
     if os.path.isdir(pardir):
         os.chdir(pardir)  # Set current path
@@ -4046,3 +4192,235 @@ def preview(var):
 # preview("# This is a Markdown header")
 # preview(pd.DataFrame({"Name": ["Alice", "Bob"], "Age": [25, 30]}))
 # preview({"key": "value", "numbers": [1, 2, 3]})
+# ! DataFrame
+def df_as_type(
+    df: pd.DataFrame,
+    columns: Optional[Union[str, List[str]]] = None,
+    astype: str = "datetime",
+    format: Optional[str] = None,
+    inplace: bool = False,
+    errors: str = "coerce",  # Can be "ignore", "raise", or "coerce"
+    **kwargs,
+) -> Optional[pd.DataFrame]:
+    """
+    Convert specified columns of a DataFrame to a specified type (e.g., datetime, float, int, numeric, timedelta).
+    If columns is None, all columns in the DataFrame will be converted.
+    Parameters:
+    - df: DataFrame containing the columns to convert.
+    - columns: Either a single column name, a list of column names, or None to convert all columns.
+    - astype: The target type to convert the columns to ('datetime', 'float', 'int', 'numeric', 'timedelta', etc.).
+    - format: Optional; format to specify the datetime format (only relevant for 'datetime' conversion).
+    - inplace: Whether to modify the DataFrame in place or return a new one. Defaults to False.
+    - errors: Can be "ignore", "raise", or "coerce"
+    - **kwargs: Additional keyword arguments to pass to the conversion function (e.g., errors='ignore' for pd.to_datetime or pd.to_numeric).
+    Returns:
+    - If inplace=False: DataFrame with the specified columns (or all columns if columns=None) converted to the specified type.
+    - If inplace=True: The original DataFrame is modified in place, and nothing is returned.
+    """
+    astypes = [
+        "datetime",
+        "timedelta",
+        "numeric",
+        "int",
+        "int8",
+        "int16",
+        "int32",
+        "int64",
+        "uint8",
+        "uint16",
+        "uint32",
+        "uint64",
+        "float",
+        "float16",
+        "float32",
+        "float64",
+        "complex",
+        "complex64",
+        "complex128",
+        "str",
+        "string",
+        "bool",
+        "datetime64",
+        "datetime64[ns]",
+        "timedelta64",
+        "timedelta64[ns]",
+        "category",
+        "object",
+        "Sparse",
+        "hour",
+        "minute",
+        "second",
+        "time",
+        "week",
+        "date",
+        "month",
+        "year",
+    ]
+    # correct the astype input
+    astype = strcmp(astype, astypes)[0]
+    print(f"converting {columns} as type: {astype}")
+    # If inplace is False, make a copy of the DataFrame
+    if not inplace:
+        df = df.copy()
+    # If columns is None, apply to all columns
+    if columns is None:
+        columns = df.columns
+    # Ensure columns is a list
+    if isinstance(columns, (str, int)):
+        columns = [columns]
+    # Convert specified columns
+    for column in columns:
+        try:
+            if astype in [
+                "datetime",
+                "hour",
+                "minute",
+                "second",
+                "time",
+                "week",
+                "date",
+                "month",
+                "year",
+            ]:
+                kwargs.pop("errors", None)
+                # convert it as type: datetime
+                if isinstance(column, int):
+                    df.iloc[:, column] = pd.to_datetime(
+                        df.iloc[:, column], format=format, errors=errors, **kwargs
+                    )
+                    # further convert:
+                    if astype == "time":
+                        df.iloc[:, column] = df.iloc[:, column].dt.time
+                    elif astype == "month":
+                        df.iloc[:, column] = df.iloc[:, column].dt.month
+                    elif astype == "year":
+                        df.iloc[:, column] = df.iloc[:, column].dt.year
+                    elif astype == "date":
+                        df.iloc[:, column] = df.iloc[:, column].dt.date
+                    elif astype == "hour":
+                        df.iloc[:, column] = df.iloc[:, column].dt.hour
+                    elif astype == "minute":
+                        df.iloc[:, column] = df.iloc[:, column].dt.minute
+                    elif astype == "second":
+                        df.iloc[:, column] = df.iloc[:, column].dt.second
+                    elif astype == "week":
+                        df.iloc[:, column] = df.iloc[:, column].dt.day_name()
+                else:
+                    df[column] = (
+                        pd.to_datetime(
+                            df[column], format=format, errors=errors, **kwargs
+                        )
+                        if format
+                        else pd.to_datetime(df[column], errors=errors, **kwargs)
+                    )
+                    # further convert:
+                    if astype == "time":
+                        df[column] = df[column].dt.time
+                    elif astype == "month":
+                        df[column] = df[column].dt.month
+                    elif astype == "year":
+                        df[column] = df[column].dt.year
+                    elif astype == "date":
+                        df[column] = df[column].dt.date
+                    elif astype == "hour":
+                        df[column] = df[column].dt.hour
+                    elif astype == "minute":
+                        df[column] = df[column].dt.minute
+                    elif astype == "second":
+                        df[column] = df[column].dt.second
+                    elif astype == "week":
+                        df[column] = df[column].dt.day_name()
+            elif astype == "numeric":
+                kwargs.pop("errors", None)
+                df[column] = pd.to_numeric(df[column], errors=errors, **kwargs)
+                # print(f"Successfully converted '{column}' to numeric.")
+            elif astype == "timedelta":
+                kwargs.pop("errors", None)
+                df[column] = pd.to_timedelta(df[column], errors=errors, **kwargs)
+                # print(f"Successfully converted '{column}' to timedelta.")
+            else:
+                # Convert to other types (e.g., float, int)
+                df[column] = df[column].astype(astype)
+                # print(f"Successfully converted '{column}' to {astype}.")
+        except Exception as e:
+            print(f"Error converting '{column}' to {astype}: {e}")
+    # Return the modified DataFrame if inplace is False
+    if not inplace:
+        return df
+# ! DataFrame
+def df_sort_values(df, column, by=None, ascending=True, inplace=False, **kwargs):
+    """
+    Sort a DataFrame by a specified column based on a custom order.
+    Parameters:
+    - df: DataFrame to be sorted.
+    - column: The name of the column to sort by.
+    - by: List specifying the custom order for sorting.
+    - ascending: Boolean or list of booleans, default True.
+                 Sort ascending vs. descending.
+    - inplace: If True, perform operation in place and return None.
+    - **kwargs: Additional arguments to pass to sort_values.
+    Returns:
+    - Sorted DataFrame if inplace is False, otherwise None.
+    """
+    if column not in df.columns:
+        raise ValueError(f"Column '{column}' does not exist in the DataFrame.")
+    if not isinstance(by, list):
+        raise ValueError("custom_order must be a list.")
+    try:
+        # Convert the specified column to a categorical type with the custom order
+        df[column] = pd.Categorical(df[column], categories=by, ordered=True)
+        if inplace:  # replace the original
+            df.sort_values(column, ascending=ascending, inplace=True, **kwargs)
+            print(f"Successfully sorted DataFrame by '{column}'")
+            return None
+        else:
+            sorted_df = df.sort_values(column, ascending=ascending, **kwargs)
+            print(f"Successfully sorted DataFrame by '{column}' using custom order.")
+            return sorted_df
+    except Exception as e:
+        print(f"Error sorting DataFrame by '{column}': {e}")
+        return df
+# # Example usage:
+# # Sample DataFrame
+# data = {
+#     "month": ["March", "January", "February", "April", "December"],
+#     "Amount": [200, 100, 150, 300, 250],
+# }
+# df_month = pd.DataFrame(data)
+# # Define the month order
+# month_order = [
+#     "January",
+#     "February",
+#     "March",
+#     "April",
+#     "May",
+#     "June",
+#     "July",
+#     "August",
+#     "September",
+#     "October",
+#     "November",
+#     "December",
+# ]
+# display(df_month)
+# sorted_df_month = df_sort_values(df_month, "month", month_order, ascending=True)
+# display(sorted_df_month)
+# df_sort_values(df_month, "month", month_order, ascending=True, inplace=True)
+# display(df_month)

py2ls/plot.py CHANGED Viewed

@@ -17,6 +17,96 @@ from .stats import *
 logging.getLogger("fontTools").setLevel(logging.WARNING)
+def df_corr(
+    df,
+    columns="all",
+    tri="u",
+    mask=True,
+    k=1,
+    annot=True,
+    cmap="coolwarm",
+    fmt=".2f",
+    cluster=False,  # New parameter for clustermap option
+    figsize=(10, 8),
+    row_cluster=True,  # Perform clustering on rows
+    col_cluster=True,  # Perform clustering on columns
+    dendrogram_ratio=(0.2, 0.1),  # Adjust size of dendrograms
+    cbar_pos=(0.02, 0.8, 0.03, 0.2),  # Adjust colorbar position
+    xticklabels=True,  # Show column labels
+    yticklabels=True,  # Show row labels
+    **kwargs,
+):
+    # Select numeric columns or specific subset of columns
+    if columns == "all":
+        df_numeric = df.select_dtypes(include=[float, int])
+    else:
+        df_numeric = df[columns]
+    # Compute the correlation matrix
+    correlation_matrix = df_numeric.corr()
+    # Generate mask for the upper triangle if mask is True
+    if mask:
+        if "u" in tri.lower():  # upper => np.tril
+            mask_array = np.tril(np.ones_like(correlation_matrix, dtype=bool), k=k)
+        else:  # lower => np.triu
+            mask_array = np.triu(np.ones_like(correlation_matrix, dtype=bool), k=k)
+    else:
+        mask_array = None
+    # Remove conflicting kwargs
+    kwargs.pop("mask", None)
+    kwargs.pop("annot", None)
+    kwargs.pop("cmap", None)
+    kwargs.pop("fmt", None)
+    kwargs.pop("clustermap", None)
+    kwargs.pop("row_cluster", None)
+    kwargs.pop("col_cluster", None)
+    kwargs.pop("dendrogram_ratio", None)
+    kwargs.pop("cbar_pos", None)
+    kwargs.pop("xticklabels", None)
+    kwargs.pop("col_cluster", None)
+    # Plot the heatmap or clustermap
+    if cluster:
+        # Create a clustermap
+        cluster_obj = sns.clustermap(
+            correlation_matrix,
+            mask=mask_array,
+            annot=annot,
+            cmap=cmap,
+            fmt=fmt,
+            figsize=figsize,  # Figure size, adjusted for professional display
+            row_cluster=row_cluster,  # Perform clustering on rows
+            col_cluster=col_cluster,  # Perform clustering on columns
+            dendrogram_ratio=dendrogram_ratio,  # Adjust size of dendrograms
+            cbar_pos=cbar_pos,  # Adjust colorbar position
+            xticklabels=xticklabels,  # Show column labels
+            yticklabels=yticklabels,  # Show row labels
+            **kwargs,  # Pass any additional arguments to sns.clustermap
+        )
+        return (
+            cluster_obj.ax_row_dendrogram,
+            cluster_obj.ax_col_dendrogram,
+            cluster_obj.ax_heatmap,
+        )
+    else:
+        # Create a standard heatmap
+        plt.figure(figsize=(10, 8))
+        ax = sns.heatmap(
+            correlation_matrix,
+            mask=mask_array,
+            annot=annot,
+            cmap=cmap,
+            fmt=fmt,
+            **kwargs,  # Pass any additional arguments to sns.heatmap
+        )
+        # Return the Axes object for further customization if needed
+        return ax
 def catplot(data, *args, **kwargs):
     """
     catplot(data, opt=None, ax=None)

{py2ls-0.1.10.26.dist-info → py2ls-0.1.10.27.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: py2ls
-Version: 0.1.10.26
+Version: 0.1.10.27
 Summary: py(thon)2(too)ls
 Author: Jianfeng
 Author-email: Jianfeng.Liu0413@gmail.com

{py2ls-0.1.10.26.dist-info → py2ls-0.1.10.27.dist-info}/RECORD RENAMED Viewed

@@ -207,15 +207,15 @@ py2ls/doc.py,sha256=xN3g1OWfoaGUhikbJ0NqbN5eKy1VZVvWwRlhHMgyVEc,4243
 py2ls/export_requirements.py,sha256=x2WgUF0jYKz9GfA1MVKN-MdsM-oQ8yUeC6Ua8oCymio,2325
 py2ls/freqanalysis.py,sha256=F4218VSPbgL5tnngh6xNCYuNnfR-F_QjECUUxrPYZss,32594
 py2ls/ich2ls.py,sha256=3E9R8oVpyYZXH5PiIQgT3CN5NxLe4Dwtm2LwaeacE6I,21381
-py2ls/ips.py,sha256=nhQBp5J2QjodplBXMCVBW-7w7rVzG6IalqCuaJgf7Xc,147799
+py2ls/ips.py,sha256=TMmk9kwxY8KWFWLbNpTxD4nad95uMpXgSqke0S-gAMo,161096
 py2ls/netfinder.py,sha256=vgOOMhzwbjRuLWMAPyf_kh3HoOhsJ9dlA-tCkMf7kNU,55371
 py2ls/ocr.py,sha256=5lhUbJufIKRSOL6wAWVLEo8TqMYSjoI_Q-IO-_4u3DE,31419
-py2ls/plot.py,sha256=yj-AfnYNr1ha_Y5EimTsUVSooFc36nE0KCQ8cP9_Trs,97601
+py2ls/plot.py,sha256=vkKwppGLjErM6s1L0JOhukX54XbfKXUl6ojhVztCBN4,100538
 py2ls/setuptools-70.1.0-py3-none-any.whl,sha256=2bi3cUVal8ip86s0SOvgspteEF8SKLukECi-EWmFomc,882588
 py2ls/sleep_events_detectors.py,sha256=bQA3HJqv5qnYKJJEIhCyhlDtkXQfIzqksnD0YRXso68,52145
 py2ls/stats.py,sha256=fJmXQ9Lq460StOn-kfEljE97cySq7876HUPTnpB5hLs,38123
 py2ls/translator.py,sha256=zBeq4pYZeroqw3DT-5g7uHfVqKd-EQptT6LJ-Adi8JY,34244
 py2ls/wb_detector.py,sha256=7y6TmBUj9exCZeIgBAJ_9hwuhkDh1x_-yg4dvNY1_GQ,6284
-py2ls-0.1.10.26.dist-info/METADATA,sha256=jwKMA56onOk6i2BpDchh8dsg8HTzNDbODlNNLu61On4,20040
-py2ls-0.1.10.26.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
-py2ls-0.1.10.26.dist-info/RECORD,,
+py2ls-0.1.10.27.dist-info/METADATA,sha256=vby3fz3bfQ7SzPfG49id_MWTInQE1VGqFRxRca7sahs,20040
+py2ls-0.1.10.27.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
+py2ls-0.1.10.27.dist-info/RECORD,,

{py2ls-0.1.10.26.dist-info → py2ls-0.1.10.27.dist-info}/WHEEL RENAMED Viewed

File without changes

py2ls 0.1.10.26__py3-none-any.whl → 0.1.10.27__py3-none-any.whl

py2ls 0.1.10.26py3-none-any.whl → 0.1.10.27py3-none-any.whl