PyPI - py2ls - Versions diffs - 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl - Mend

py2ls 0.2.1py3-none-any.whl → 0.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

py2ls/data/sns_info.json +74 -0
py2ls/data/usages_sns.json +25 -0
py2ls/ips.py +1204 -505
py2ls/plot.py +808 -30
py2ls/stats.py +18 -9
{py2ls-0.2.1.dist-info → py2ls-0.2.2.dist-info}/METADATA +1 -1
{py2ls-0.2.1.dist-info → py2ls-0.2.2.dist-info}/RECORD +8 -6
{py2ls-0.2.1.dist-info → py2ls-0.2.2.dist-info}/WHEEL +0 -0

py2ls/plot.py CHANGED Viewed

@@ -9,24 +9,97 @@ import matplotlib.ticker as tck
 from cycler import cycler
 import logging
 import os
+import re
-from .ips import fsave, fload, mkdir, listdir, figsave
+from .ips import fsave, fload, mkdir, listdir, figsave, strcmp, unique, get_os, ssplit
 from .stats import *
+from .netfinder import get_soup, fetch
 # Suppress INFO messages from fontTools
 logging.getLogger("fontTools").setLevel(logging.WARNING)
-def df_corr(
-    df,
-    columns="all",
+def update_sns_usages(
+    url="https://seaborn.pydata.org/generated/seaborn.swarmplot.html",
+    dir_save=None,
+):
+    """
+    Fetches usage examples of various Seaborn plotting functions from the Seaborn documentation website.
+    It filters the relevant plot-related links, extracts usage examples, and saves them in a JSON file.
+    Parameters:
+    - url (str): URL of the Seaborn page to start extracting plot usages (default is swarmplot page).
+    - dir_save (str): Directory where the JSON file containing usages will be saved (default is a local path).
+    Saves:
+    - A JSON file named 'usages_sns.json' containing plotting function names and their usage descriptions.
+    Returns:
+    - None
+    """
+    # extract each usage from its url
+    def get_usage(url):
+        sp = get_soup(url, driver="se")
+        # preview(sp)
+        return fetch(sp, where="dt")[0]
+    if dir_save is None:
+        if "mac" in get_os():
+            dir_save = "/Users/macjianfeng/Dropbox/github/python/py2ls/py2ls/data/"
+        else:
+            dir_save = "Z:\\Jianfeng\\temp\\"
+    sp = get_soup(url, driver="se")
+    links_all = fetch(sp, where="a", get="href", class_="reference internal")
+    filtered_links = unique(
+        [
+            i
+            for i in links_all
+            if not any(
+                [
+                    i.startswith(cond)
+                    for cond in [
+                        "seaborn.JointGrid",
+                        "seaborn.PairGrid",
+                        "seaborn.objects",
+                    ]
+                ]
+                + ["plot" not in i]
+            )
+        ]
+    )
+    links = ["https://seaborn.pydata.org/generated/" + i for i in filtered_links]
+    usages = [get_usage(i) for i in links]
+    dict_usage = {}
+    for usage in usages:
+        dict_usage.update(
+            {ssplit(usage, by="(")[0].replace("seaborn.", ""): usage[:-1]}
+        )
+    # save to local
+    dir_save += "/" if not dir_save.endswith("/") else ""
+    fsave(
+        dir_save + "usages_sns.json",
+        dict_usage,
+    )
+def heatmap(
+    data,
+    ax=None,
+    kind="corr",  #'corr','direct','pivot'
+    columns="all",  # pivot, default: coll numeric columns
+    index=None,  # pivot
+    values=None,  # pivot
     tri="u",
     mask=True,
     k=1,
     annot=True,
     cmap="coolwarm",
     fmt=".2f",
-    cluster=False,  # New parameter for clustermap option
+    cluster=False,
+    inplace=False,
     figsize=(10, 8),
     row_cluster=True,  # Perform clustering on rows
     col_cluster=True,  # Perform clustering on columns
@@ -36,24 +109,115 @@ def df_corr(
     yticklabels=True,  # Show row labels
     **kwargs,
 ):
+    if ax is None and not cluster:
+        ax = plt.gca()
     # Select numeric columns or specific subset of columns
     if columns == "all":
-        df_numeric = df.select_dtypes(include=[float, int])
+        df_numeric = data.select_dtypes(include=[float, int])
     else:
-        df_numeric = df[columns]
-    # Compute the correlation matrix
-    correlation_matrix = df_numeric.corr()
+        df_numeric = data[columns]
+    kinds = ["corr", "direct", "pivot"]
+    kind = strcmp(kind, kinds)[0]
+    if kind == "corr":
+        # Compute the correlation matrix
+        data4heatmap = df_numeric.corr()
+        # Generate mask for the upper triangle if mask is True
+        if mask:
+            if "u" in tri.lower():  # upper => np.tril
+                mask_array = np.tril(np.ones_like(data4heatmap, dtype=bool), k=k)
+            else:  # lower => np.triu
+                mask_array = np.triu(np.ones_like(data4heatmap, dtype=bool), k=k)
+        else:
+            mask_array = None
+        # Remove conflicting kwargs
+        kwargs.pop("mask", None)
+        kwargs.pop("annot", None)
+        kwargs.pop("cmap", None)
+        kwargs.pop("fmt", None)
+        kwargs.pop("clustermap", None)
+        kwargs.pop("row_cluster", None)
+        kwargs.pop("col_cluster", None)
+        kwargs.pop("dendrogram_ratio", None)
+        kwargs.pop("cbar_pos", None)
+        kwargs.pop("xticklabels", None)
+        kwargs.pop("col_cluster", None)
+        # Plot the heatmap or clustermap
+        if cluster:
+            # Create a clustermap
+            cluster_obj = sns.clustermap(
+                data4heatmap,
+                # ax=ax,
+                mask=mask_array,
+                annot=annot,
+                cmap=cmap,
+                fmt=fmt,
+                figsize=figsize,  # Figure size, adjusted for professional display
+                row_cluster=row_cluster,  # Perform clustering on rows
+                col_cluster=col_cluster,  # Perform clustering on columns
+                dendrogram_ratio=dendrogram_ratio,  # Adjust size of dendrograms
+                cbar_pos=cbar_pos,  # Adjust colorbar position
+                xticklabels=xticklabels,  # Show column labels
+                yticklabels=yticklabels,  # Show row labels
+                **kwargs,  # Pass any additional arguments to sns.clustermap
+            )
+            df_row_cluster = pd.DataFrame()
+            df_col_cluster = pd.DataFrame()
+            if row_cluster:
+                from scipy.cluster.hierarchy import linkage, fcluster
+                from scipy.spatial.distance import pdist
+                # Compute pairwise distances
+                distances = pdist(data, metric="euclidean")
+                # Perform hierarchical clustering
+                linkage_matrix = linkage(distances, method="average")
+                # Get cluster assignments based on the distance threshold
+                row_clusters_value = fcluster(
+                    linkage_matrix, t=1.5, criterion="distance"
+                )
+                df_row_cluster["row_cluster"] = row_clusters_value
+            if col_cluster:
+                col_distances = pdist(
+                    data4heatmap.T, metric="euclidean"
+                )  # Transpose for column clustering
+                col_linkage_matrix = linkage(col_distances, method="average")
+                col_clusters_value = fcluster(
+                    col_linkage_matrix, t=1.5, criterion="distance"
+                )
+                df_col_cluster = pd.DataFrame(
+                    {"Cluster": col_clusters_value}, index=data4heatmap.columns
+                )
-    # Generate mask for the upper triangle if mask is True
-    if mask:
-        if "u" in tri.lower():  # upper => np.tril
-            mask_array = np.tril(np.ones_like(correlation_matrix, dtype=bool), k=k)
-        else:  # lower => np.triu
-            mask_array = np.triu(np.ones_like(correlation_matrix, dtype=bool), k=k)
+            return (
+                cluster_obj.ax_row_dendrogram,
+                cluster_obj.ax_col_dendrogram,
+                cluster_obj.ax_heatmap,
+                df_row_cluster,
+                df_col_cluster,
+            )
+        else:
+            # Create a standard heatmap
+            ax = sns.heatmap(
+                data4heatmap,
+                ax=ax,
+                mask=mask_array,
+                annot=annot,
+                cmap=cmap,
+                fmt=fmt,
+                **kwargs,  # Pass any additional arguments to sns.heatmap
+            )
+            # Return the Axes object for further customization if needed
+            return ax
+    elif kind == "direct":
+        data4heatmap = df_numeric
+    elif kind == "pivot":
+        print('need 3 param: e.g., index="Task", columns="Model", values="Score"')
+        data4heatmap = data.pivot(index=index, columns=columns, values=values)
     else:
-        mask_array = None
+        print(f'"{kind}" is not supported')
     # Remove conflicting kwargs
     kwargs.pop("mask", None)
     kwargs.pop("annot", None)
@@ -72,8 +236,9 @@ def df_corr(
     if cluster:
         # Create a clustermap
         cluster_obj = sns.clustermap(
-            correlation_matrix,
-            mask=mask_array,
+            data4heatmap,
+            # ax=ax,
+            # mask=mask_array,
             annot=annot,
             cmap=cmap,
             fmt=fmt,
@@ -86,18 +251,43 @@ def df_corr(
             yticklabels=yticklabels,  # Show row labels
             **kwargs,  # Pass any additional arguments to sns.clustermap
         )
+        df_row_cluster = pd.DataFrame()
+        df_col_cluster = pd.DataFrame()
+        if row_cluster:
+            from scipy.cluster.hierarchy import linkage, fcluster
+            from scipy.spatial.distance import pdist
+            # Compute pairwise distances
+            distances = pdist(data, metric="euclidean")
+            # Perform hierarchical clustering
+            linkage_matrix = linkage(distances, method="average")
+            # Get cluster assignments based on the distance threshold
+            row_clusters_value = fcluster(linkage_matrix, t=1.5, criterion="distance")
+            df_row_cluster["row_cluster"] = row_clusters_value
+        if col_cluster:
+            col_distances = pdist(
+                data4heatmap.T, metric="euclidean"
+            )  # Transpose for column clustering
+            col_linkage_matrix = linkage(col_distances, method="average")
+            col_clusters_value = fcluster(
+                col_linkage_matrix, t=1.5, criterion="distance"
+            )
+            df_col_cluster = pd.DataFrame(
+                {"Cluster": col_clusters_value}, index=data4heatmap.columns
+            )
         return (
             cluster_obj.ax_row_dendrogram,
             cluster_obj.ax_col_dendrogram,
             cluster_obj.ax_heatmap,
+            df_row_cluster,
+            df_col_cluster,
         )
     else:
         # Create a standard heatmap
-        plt.figure(figsize=figsize)
         ax = sns.heatmap(
-            correlation_matrix,
-            mask=mask_array,
+            data4heatmap,
+            ax=ax,
             annot=annot,
             cmap=cmap,
             fmt=fmt,
@@ -107,6 +297,60 @@ def df_corr(
         return ax
+# !usage: py2ls.plot.heatmap()
+# penguins_clean = penguins.replace([np.inf, -np.inf], np.nan).dropna()
+# from py2ls import plot
+# _, axs = plt.subplots(2, 2, figsize=(10, 10))
+# # kind='pivot'
+# plot.heatmap(
+#     ax=axs[0][0],
+#     data=sns.load_dataset("glue"),
+#     kind="pi",
+#     index="Model",
+#     columns="Task",
+#     values="Score",
+#     fmt=".1f",
+#     cbar_kws=dict(shrink=1),
+#     annot_kws=dict(size=7),
+# )
+# # kind='direct'
+# plot.heatmap(
+#     ax=axs[0][1],
+#     data=sns.load_dataset("penguins").iloc[:10, 2:6],
+#     kind="direct",
+#     tri="lower",
+#     fmt=".1f",
+#     k=1,
+#     cbar_kws=dict(shrink=1),
+#     annot_kws=dict(size=7),
+# )
+# # kind='corr'
+# plot.heatmap(
+#     ax=axs[1][0],
+#     data=sns.load_dataset("penguins"),
+#     kind="corr",
+#     fmt=".1f",
+#     k=-1,
+#     cbar_kws=dict(shrink=1),
+#     annot_kws=dict(size=7),
+# )
+# # kind='corr'
+# plot.heatmap(
+#     ax=axs[1][1],
+#     data=penguins_clean.iloc[:15, :10],
+#     kind="direct",
+#     tri="lower",
+#     fmt=".1f",
+#     k=1,
+#     annot=False,
+#     cluster=True,
+#     cbar_kws=dict(shrink=1),
+#     annot_kws=dict(size=7),
+# )
 def catplot(data, *args, **kwargs):
     """
     catplot(data, opt=None, ax=None)
@@ -1524,6 +1768,10 @@ def figsets(*args, **kwargs):
                         alignment='left')
         )
     """
+    import matplotlib
+    matplotlib.rc("text", usetex=False)
     fig = plt.gcf()
     fontsize = 11
     fontname = "Arial"
@@ -1615,6 +1863,16 @@ def figsets(*args, **kwargs):
                 if isinstance(value, list):
                     loc = []
                     for i in value:
+                        ax.tick_params(
+                            axis="both",
+                            which="both",
+                            bottom=False,
+                            top=False,
+                            left=False,
+                            right=False,
+                            labelbottom=False,
+                            labelleft=False,
+                        )
                         if ("l" in i.lower()) and ("a" not in i.lower()):
                             ax.yaxis.set_ticks_position("left")
                         if "r" in i.lower():
@@ -1624,12 +1882,38 @@ def figsets(*args, **kwargs):
                         if "b" in i.lower():
                             ax.xaxis.set_ticks_position("bottom")
                         if i.lower() in ["a", "both", "all", "al", ":"]:
-                            ax.xaxis.set_ticks_position("both")
-                            ax.yaxis.set_ticks_position("both")
+                            ax.tick_params(
+                                axis="both",  # Apply to both axes
+                                which="both",  # Apply to both major and minor ticks
+                                bottom=True,  # Show ticks at the bottom
+                                top=True,  # Show ticks at the top
+                                left=True,  # Show ticks on the left
+                                right=True,  # Show ticks on the right
+                                labelbottom=True,  # Show labels at the bottom
+                                labelleft=True,  # Show labels on the left
+                            )
                         if i.lower() in ["xnone", "xoff", "none"]:
-                            ax.xaxis.set_ticks_position("none")
+                            ax.tick_params(
+                                axis="x",
+                                which="both",
+                                bottom=False,
+                                top=False,
+                                left=False,
+                                right=False,
+                                labelbottom=False,
+                                labelleft=False,
+                            )
                         if i.lower() in ["ynone", "yoff", "none"]:
-                            ax.yaxis.set_ticks_position("none")
+                            ax.tick_params(
+                                axis="y",
+                                which="both",
+                                bottom=False,
+                                top=False,
+                                left=False,
+                                right=False,
+                                labelbottom=False,
+                                labelleft=False,
+                            )
             # ticks / labels
             elif "x" in key.lower():
                 if value is None:
@@ -1674,6 +1958,10 @@ def figsets(*args, **kwargs):
         if "bo" in key in key:  # box setting, and ("p" in key or "l" in key):
             if isinstance(value, (str, list)):
+                # locations = ["left", "right", "top", "bottom"]
+                # for loc, spi in ax.spines.items():
+                #     if loc in locations:
+                #         spi.set_color("none")  # no spine
                 locations = []
                 for i in value:
                     if "l" in i.lower() and not "t" in i.lower():
@@ -1689,12 +1977,12 @@ def figsets(*args, **kwargs):
                             locations.append(x)
                             for x in ["left", "right", "top", "bottom"]
                         ]
-                for i in value:
-                    if i.lower() in "none":
-                        locations = []
+                if "none" in value:
+                    locations = []  # hide all
                 # check spines
                 for loc, spi in ax.spines.items():
                     if loc in locations:
+                        # spi.set_color("k")
                         spi.set_position(("outward", 0))
                     else:
                         spi.set_color("none")  # no spine
@@ -2527,3 +2815,493 @@ def thumbnail(dir_img_list: list, figsize=(10, 10), dpi=100, show=False, usage=F
     plt.tight_layout()
     if show:
         plt.show()
+def get_params_from_func_usage(function_signature):
+    # Regular expression to match parameter names, ignoring '*' and '**kwargs'
+    keys_pattern = r"(?<!\*\*)\b(\w+)="
+    # Find all matches
+    matches = re.findall(keys_pattern, function_signature)
+    return matches
+def plot_xy(
+    data: pd.DataFrame = None,
+    x=None,
+    y=None,
+    ax=None,
+    kind: str = None,  # Specify the kind of plot
+    usage=False,
+    # kws_figsets:dict=None,
+    **kwargs,
+):
+    """
+    e.g., plot_xy(data=data_log, x="Component_1", y="Component_2", hue="Cluster",kind='scater)
+    Create a variety of plots based on the kind parameter.
+    Parameters:
+        data (pd.DataFrame): DataFrame containing the data.
+        x (str): Column name for the x-axis.
+        y (str): Column name for the y-axis.
+        hue (str): Column name for the hue (color) grouping.
+        ax: Matplotlib axes object for the plot.
+        kind (str): Type of plot ('scatter', 'line', 'displot', 'kdeplot', etc.).
+        usage (bool): If True, print default settings instead of plotting.
+        **kwargs: Additional keyword arguments for the plot functions.
+    Returns:
+        ax or FacetGrid: Matplotlib axes object or FacetGrid for displot.
+    """
+    # Check for valid plot kind
+    # Default arguments for various plot types
+    default_settings = fload(
+        "/Users/macjianfeng/Dropbox/github/python/py2ls/py2ls/data/usages_sns.json"
+    )
+    sns_info = pd.DataFrame(
+        fload(
+            "/Users/macjianfeng/Dropbox/github/python/py2ls/py2ls/data/sns_info.json",
+        )
+    )
+    valid_kinds = list(default_settings.keys())
+    print(valid_kinds)
+    if kind is not None:
+        if isinstance(kind, str):
+            kind = [kind]
+        kind = [strcmp(i, valid_kinds)[0] for i in kind]
+    else:
+        usage = True
+    if usage:
+        if kind is not None:
+            for k in kind:
+                if k in valid_kinds:
+                    print(f"{k}:\n\t{default_settings[k]}")
+                    print(
+                        sns_info[sns_info["Functions"].str.contains(k)]
+                        .iloc[:, -1]
+                        .tolist()[0]
+                    )
+                    print()
+        usage_str = """plot_xy(data=ranked_genes,
+        x="log2(fold_change)",
+        y="-log10(p-value)",
+        palette=get_color(3, cmap="coolwarm"),
+        kind=["scatter","rug"],
+        kws_rug=dict(height=0.2),
+        kws_scatter=dict(s=20, color=get_color(3)[2]),
+        usage=0)
+        """
+        print(f"currently support to plot:\n{valid_kinds}\n\nusage:\n{usage_str}")
+        return  # Do not plot, just print the usage
+    kws_figsets = {}
+    for k_arg, v_arg in kwargs.items():
+        if "figset" in k_arg:
+            kws_figsets = v_arg
+            kwargs.pop(k_arg, None)
+            break
+    for k in kind:
+        # indicate 'col' features
+        col = kwargs.get("col", None)
+        sns_with_col = [
+            "catplot",
+            "histplot",
+            "relplot",
+            "lmplot",
+            "pairplot",
+            "displot",
+            "kdeplot",
+        ]
+        if col is not None:
+            if not k in sns_with_col:
+                print(
+                    f"tips:\n'{k}' has no 'col' param, you could try with {sns_with_col}"
+                )
+        # (1) return FcetGrid
+        if k == "jointplot":
+            kws_joint = kwargs.pop("kws_joint", kwargs)
+            g = sns.jointplot(data=data, x=x, y=y, hue=hue, **kws_joint)
+        elif k == "lmplot":
+            kws_lm = kwargs.pop("kws_lm", kwargs)
+            g = sns.lmplot(data=data, x=x, y=y, hue=hue, **kws_lm)
+        elif k == "catplot_sns":
+            kws_cat = kwargs.pop("kws_cat", kwargs)
+            g = sns.catplot(data=data, x=x, y=y, hue=hue, **kws_cat)
+        elif k == "displot":
+            kws_dis = kwargs.pop("kws_dis", kwargs)
+            # displot creates a new figure and returns a FacetGrid
+            g = sns.displot(data=data, x=x, hue=hue, **kws_dis)
+        # (2) return axis
+        if ax is None:
+            ax = plt.gca()
+        if k == "catplot":
+            kws_cat = kwargs.pop("kws_cat", kwargs)
+            g = catplot(data=data, x=x, y=y, ax=ax, **kws_cat)
+        elif k == "scatterplot":
+            kws_scatter = kwargs.pop("kws_scatter", kwargs)
+            palette = kws_scatter.pop(
+                "palette",
+                (
+                    sns.color_palette("tab10", data[hue].nunique())
+                    if hue is not None
+                    else sns.color_palette("tab10")
+                ),
+            )
+            s = kws_scatter.pop("s", 10)
+            alpha = kws_scatter.pop("alpha", 0.7)
+            ax = sns.scatterplot(
+                ax=ax,
+                data=data,
+                x=x,
+                y=y,
+                hue=hue,
+                palette=palette,
+                s=s,
+                alpha=alpha,
+                **kws_scatter,
+            )
+        elif k == "histplot":
+            kws_hist = kwargs.pop("kws_hist", kwargs)
+            ax = sns.histplot(data=data, x=x, hue=hue, ax=ax, **kws_hist)
+        elif k == "kdeplot":
+            kws_kde = kwargs.pop("kws_kde", kwargs)
+            ax = sns.kdeplot(data=data, x=x, hue=hue, ax=ax, **kws_kde)
+        elif k == "ecdfplot":
+            kws_ecdf = kwargs.pop("kws_ecdf", kwargs)
+            ax = sns.ecdfplot(data=data, x=x, hue=hue, ax=ax, **kws_ecdf)
+        elif k == "rugplot":
+            kws_rug = kwargs.pop("kws_rug", kwargs)
+            print(kws_rug)
+            ax = sns.rugplot(data=data, x=x, hue=hue, ax=ax, **kws_rug)
+        elif k == "stripplot":
+            kws_strip = kwargs.pop("kws_strip", kwargs)
+            ax = sns.stripplot(data=data, x=x, y=y, hue=hue, ax=ax, **kws_strip)
+        elif k == "swarmplot":
+            kws_swarm = kwargs.pop("kws_swarm", kwargs)
+            ax = sns.swarmplot(data=data, x=x, y=y, hue=hue, ax=ax, **kws_swarm)
+        elif k == "boxplot":
+            kws_box = kwargs.pop("kws_box", kwargs)
+            ax = sns.boxplot(data=data, x=x, y=y, hue=hue, ax=ax, **kws_box)
+        elif k == "violinplot":
+            kws_violin = kwargs.pop("kws_violin", kwargs)
+            ax = sns.violinplot(data=data, x=x, y=y, hue=hue, ax=ax, **kws_violin)
+        elif k == "boxenplot":
+            kws_boxen = kwargs.pop("kws_boxen", kwargs)
+            ax = sns.boxenplot(data=data, x=x, y=y, hue=hue, ax=ax, **kws_boxen)
+        elif k == "pointplot":
+            kws_point = kwargs.pop("kws_point", kwargs)
+            ax = sns.pointplot(data=data, x=x, y=y, hue=hue, ax=ax, **kws_point)
+        elif k == "barplot":
+            kws_bar = kwargs.pop("kws_bar", kwargs)
+            ax = sns.barplot(data=data, x=x, y=y, hue=hue, ax=ax, **kws_bar)
+        elif k == "countplot":
+            kws_count = kwargs.pop("kws_count", kwargs)
+            ax = sns.countplot(data=data, x=x, hue=hue, ax=ax, **kws_count)
+        elif k == "regplot":
+            kws_reg = kwargs.pop("kws_reg", kwargs)
+            ax = sns.regplot(data=data, x=x, y=y, ax=ax, **kws_reg)
+        elif k == "residplot":
+            kws_resid = kwargs.pop("kws_resid", kwargs)
+            ax = sns.residplot(data=data, x=x, y=y, lowess=True, ax=ax, **kws_resid)
+        elif k == "lineplot":
+            kws_line = kwargs.pop("kws_line", kwargs)
+            ax = sns.lineplot(ax=ax, data=data, x=x, y=y, hue=hue, **kws_line)
+        figsets(**kws_figsets)
+        print(k, " ⤵ ")
+        print(default_settings[k])
+        print(
+            "=>\t",
+            sns_info[sns_info["Functions"].str.contains(k)].iloc[:, -1].tolist()[0],
+        )
+        print()
+    if "g" in locals():
+        if ax is not None:
+            return g, ax
+    return ax
+def volcano(
+    data,
+    x,
+    y,
+    gene_col=None,
+    top_genes=5,
+    thr_x=np.log2(1.5),
+    thr_y=-np.log10(0.05),
+    colors=("#e70b0b", "#0d26e3", "#b8bbbe"),
+    s=20,
+    fill=True,  # plot filled scatter
+    facecolor="none",
+    edgecolor="none",
+    edgelinewidth=0.5,
+    alpha=0.8,
+    legend=False,
+    ax=None,
+    usage=False,
+    kws_arrow=None,
+    kws_text=None,
+    **kwargs,
+):
+    """
+    Generates a customizable scatter plot (e.g., volcano plot).
+    Parameters:
+    -----------
+    data : pd.DataFrame
+        The DataFrame containing the data to plot.
+    x : str
+        Column name for x-axis values (e.g., log2FoldChange).
+    y : str
+        Column name for y-axis values (e.g., -log10(FDR)).
+    gene_col : str, optional
+        Column name for gene names. If provided, gene names will be displayed. Default is None.
+    top_genes : int, optional
+        Number of top genes to label based on y-axis values. Default is 5.
+    thr_x : float, optional
+        Threshold for x-axis values. Default is 0.585.
+    thr_y : float, optional
+        Threshold for y-axis values (e.g., significance threshold). Default is -np.log10(0.05).
+    colors : tuple, optional
+        Colors for points above/below thresholds and neutral points. Default is ("red", "blue", "gray").
+    figsize : tuple, optional
+        Figure size. Default is (6, 4).
+    s : int, optional
+        Size of points in the plot. Default is 20.
+    fontsize : int, optional
+        Font size for gene labels. Default is 10.
+    alpha : float, optional
+        Transparency of the points. Default is 0.8.
+    legend : bool, optional
+        Whether to show a legend. Default is False.
+    """
+    usage_str = """
+    _, axs = plt.subplots(1, 1, figsize=(4, 5))
+    volcano(
+        ax=axs,
+        data=ranked_genes,
+        x="log2(fold_change)",
+        y="-log10(p-value)",
+        gene_col="ID_REF",
+        top_genes=6,
+        thr_x=np.log2(1.2),
+        # thr_y=-np.log10(0.05),
+        colors=("#00BFFF", "#9d9a9a", "#FF3030"),
+        fill=0,
+        alpha=1,
+        facecolor="none",
+        s=20,
+        edgelinewidth=0.5,
+        edgecolor="0.5",
+        kws_text=dict(fontsize=10, color="k"),
+        kws_arrow=dict(style="-", color="k", lw=0.5),
+        # usage=True,
+        figsets=dict(ylim=[0, 10], title="df"),
+    )
+    """
+    if usage:
+        print(usage_str)
+        return
+    from adjustText import adjust_text
+    kws_figsets = {}
+    for k_arg, v_arg in kwargs.items():
+        if "figset" in k_arg:
+            kws_figsets = v_arg
+            kwargs.pop(k_arg, None)
+            break
+    # Color-coding based on thresholds using np.where
+    data["color"] = np.where(
+        (data[x] > thr_x) & (data[y] > thr_y),
+        colors[2],
+        np.where((data[x] < -thr_x) & (data[y] > thr_y), colors[0], colors[1]),
+    )
+    # Selecting top significant points for labeling
+    sele_gene = (
+        data.query("color != @colors[2]")  # Exclude gray points
+        .groupby("color", axis=0)
+        .apply(lambda x: x.sort_values(y, ascending=False).head(top_genes))
+        .droplevel(level=0)
+    )
+    palette = {colors[0]: colors[0], colors[1]: colors[1], colors[2]: colors[2]}
+    # Plot setup
+    if ax is None:
+        ax = plt.gca()
+    # Handle fill parameter
+    if fill:
+        facecolors = data["color"]  # Fill with colors
+        edgecolors = edgecolor  # Set edgecolor
+    else:
+        facecolors = facecolor  # No fill, use edge color as the face color
+        edgecolors = data["color"]
+    ax = sns.scatterplot(
+        ax=ax,
+        data=data,
+        x=x,
+        y=y,
+        # hue="color",
+        palette=palette,
+        s=s,
+        linewidths=edgelinewidth,
+        color=facecolors,
+        edgecolor=edgecolors,
+        alpha=alpha,
+        legend=legend,
+        **kwargs,
+    )
+    # Add threshold lines for x and y axes
+    plt.axhline(y=thr_y, color="black", linestyle="--")
+    plt.axvline(x=-thr_x, color="black", linestyle="--")
+    plt.axvline(x=thr_x, color="black", linestyle="--")
+    # Add gene labels for selected significant points
+    if gene_col:
+        texts = []
+        if kws_text:
+            fontname = kws_text.pop("fontname", "Arial")
+            textcolor = kws_text.pop("color", "k")
+            fontsize = kws_text.pop("fontsize", 10)
+        for i in range(sele_gene.shape[0]):
+            if isinstance(textcolor, list):  # be consistant with dots's color
+                textcolor = colors[0] if sele_gene[x].iloc[i] > 0 else colors[1]
+            texts.append(
+                plt.text(
+                    x=sele_gene[x].iloc[i],
+                    y=sele_gene[y].iloc[i],
+                    s=sele_gene[gene_col].iloc[i],
+                    fontdict={
+                        "fontsize": fontsize,
+                        "color": textcolor,
+                        "fontname": fontname,
+                    },
+                )
+            )
+        arrowstyles = [
+            "-",
+            "->",
+            "-[",
+            "|->",
+            "<-",
+            "<->",
+            "<|-",
+            "<|-|>",
+            "-|>",
+            "-[ ",
+            "fancy",
+            "simple",
+            "wedge",
+        ]
+        arrowstyle = kws_arrow.pop("style", "-")
+        arrowcolor = kws_arrow.pop("color", "0.5")
+        arrowlinewidth = kws_arrow.pop("lw", 0.5)
+        shrinkA = kws_arrow.pop("shrinkA", 5)
+        shrinkB = kws_arrow.pop("shrinkB", 5)
+        arrowstyle = strcmp(arrowstyle, arrowstyles)[0]
+        adjust_text(
+            texts,
+            expand_text=(1.05, 1.2),
+            arrowprops=dict(
+                arrowstyle=arrowstyle,
+                color=arrowcolor,
+                lw=arrowlinewidth,
+                shrinkA=shrinkA,
+                shrinkB=shrinkB,
+                **kws_arrow,
+            ),
+        )
+    figsets(**kws_figsets)
+def sns_func_info(dir_save=None):
+    sns_info = {
+        "Functions": [
+            "relplot",
+            "scatterplot",
+            "lineplot",
+            "lmplot",
+            "catplot",
+            "stripplot",
+            "boxplot",
+            "violinplot",
+            "boxenplot",
+            "pointplot",
+            "barplot",
+            "countplot",
+            "displot",
+            "histplot",
+            "kdeplot",
+            "ecdfplot",
+            "rugplot",
+            "regplot",
+            "residplot",
+            "pairplot",
+            "jointplot",
+            "plotting_context",
+        ],
+        "Category": [
+            "relational",
+            "relational",
+            "relational",
+            "relational",
+            "categorical",
+            "categorical",
+            "categorical",
+            "categorical",
+            "categorical",
+            "categorical",
+            "categorical",
+            "categorical",
+            "distribution",
+            "distribution",
+            "distribution",
+            "distribution",
+            "distribution",
+            "regression",
+            "regression",
+            "grid-based(fig)",
+            "grid-based(fig)",
+            "context",
+        ],
+        "Detail": [
+            "A figure-level function for creating scatter plots and line plots. It combines the functionality of scatterplot and lineplot.",
+            "A function for creating scatter plots, useful for visualizing the relationship between two continuous variables.",
+            "A function for drawing line plots, often used to visualize trends over time or ordered categories.",
+            "A figure-level function for creating linear model plots, combining regression lines with scatter plots.",
+            "A figure-level function for creating categorical plots, which can display various types of plots like box plots, violin plots, and bar plots in one function.",
+            "A function for creating a scatter plot where one of the variables is categorical, helping visualize distribution along a categorical axis.",
+            "A function for creating box plots, which summarize the distribution of a continuous variable based on a categorical variable.",
+            "A function for creating violin plots, which combine box plots and KDEs to visualize the distribution of data.",
+            "A function for creating boxen plots, an enhanced version of box plots that better represent data distributions with more quantiles.",
+            "A function for creating point plots, which show the mean (or another estimator) of a variable for each level of a categorical variable.",
+            "A function for creating bar plots, which represent the mean (or other estimators) of a variable with bars, typically used with categorical data.",
+            "A function for creating count plots, which show the counts of observations in each categorical bin.",
+            "A figure-level function that creates distribution plots. It can visualize histograms, KDEs, and ECDFs, making it versatile for analyzing the distribution of data.",
+            "A function for creating histograms, useful for showing the frequency distribution of a continuous variable.",
+            "A function for creating kernel density estimate (KDE) plots, which visualize the probability density function of a continuous variable.",
+            "A function for creating empirical cumulative distribution function (ECDF) plots, which show the proportion of observations below a certain value.",
+            "A function that adds a rug plot to the axes, representing individual data points along an axis.",
+            "A function for creating regression plots, which fit and visualize a regression model on scatter data.",
+            "A function for creating residual plots, useful for diagnosing the fit of a regression model.",
+            "A figure-level function that creates a grid of scatter plots for each pair of variables in a dataset, often used for exploratory data analysis.",
+            "A figure-level function that combines scatter plots and histograms (or KDEs) to visualize the relationship between two variables and their distributions.",
+            "Not a plot itself, but a function that allows you to change the context (style and scaling) of your plots to fit different publication requirements or visual preferences.",
+        ],
+    }
+    if dir_save is None:
+        if "mac" in get_os():
+            dir_save = "/Users/macjianfeng/Dropbox/github/python/py2ls/py2ls/data/"
+        else:
+            dir_save = "Z:\\Jianfeng\\temp\\"
+    dir_save += "/" if not dir_save.endswith("/") else ""
+    fsave(
+        dir_save + "sns_info.json",
+        sns_info,
+    )

py2ls 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl

py2ls 0.2.1py3-none-any.whl → 0.2.2py3-none-any.whl