PyPI - py2ls - Versions diffs - 0.2.4.4__py3-none-any.whl → 0.2.4.6__py3-none-any.whl - Mend

py2ls 0.2.4.4py3-none-any.whl → 0.2.4.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

py2ls/.git/index +0 -0
py2ls/bio.py +959 -38
py2ls/ips.py +15 -6
py2ls/mol.py +289 -0
py2ls/plot.py +304 -109
{py2ls-0.2.4.4.dist-info → py2ls-0.2.4.6.dist-info}/METADATA +1 -1
{py2ls-0.2.4.4.dist-info → py2ls-0.2.4.6.dist-info}/RECORD +8 -7
{py2ls-0.2.4.4.dist-info → py2ls-0.2.4.6.dist-info}/WHEEL +0 -0

py2ls/bio.py CHANGED Viewed

@@ -324,7 +324,7 @@ def find_condition(data:pd.DataFrame, columns=["characteristics_ch1","title"]):
     # 详细看看每个信息的有哪些类, 其中有数字的, 要去除
     for col in columns:
         print(f"{"="*10} {col} {"="*10}")
-        display(ips.flatten([ips.ssplit(i, by="numer")[0] for i in data[col]]))
+        display(ips.flatten([ips.ssplit(i, by="numer")[0] for i in data[col]],verbose=False))
 def add_condition(
     data: pd.DataFrame,
@@ -581,7 +581,7 @@ def batch_effect(
     return df_corrected
 def get_common_genes(elment1, elment2):
-    common_genes=ips.shared(elment1, elment2)
+    common_genes=ips.shared(elment1, elment2,verbose=False)
     return common_genes
 def counts2expression(
@@ -667,7 +667,7 @@ def counts2expression(
             length.index=length.index.astype(str).str.strip()
             counts.columns = counts.columns.astype(str).str.strip()
-            shared_genes=ips.shared(length.index, counts.columns)
+            shared_genes=ips.shared(length.index, counts.columns,verbose=False)
             length=length.loc[shared_genes]
             counts=counts.loc[:,shared_genes]
             columns_org = counts.columns.tolist()
@@ -814,7 +814,11 @@ def counts_deseq(counts_sam_gene: pd.DataFrame,
     #     .reset_index()
     #     .rename(columns={"index": "gene"})
     # )
-    return dds, diff,stat_res
+    df_norm=pd.DataFrame(dds.layers['normed_counts'])
+    df_norm.index=counts_sam_gene.index
+    df_norm.columns=counts_sam_gene.columns
+    print("res[0]: dds\nres[1]:diff\nres[2]:stat_res\nres[3]:df_normalized")
+    return dds, diff, stat_res,df_norm
 def scope_genes(gene_list: list, scopes:str=None, fields: str = "symbol", species="human"):
     """
@@ -842,6 +846,7 @@ def scope_genes(gene_list: list, scopes:str=None, fields: str = "symbol", specie
 def get_enrichr(gene_symbol_list,
                 gene_sets:str,
+                download:bool = False,
                 species='Human',
                 dir_save="./",
                 plot_=False,
@@ -854,6 +859,7 @@ def get_enrichr(gene_symbol_list,
                 title=None,# 'KEGG'
                 cutoff=0.05,
                 cmap="coolwarm",
+                size=5,
                 **kwargs):
     """
     Note: Enrichr uses a list of Entrez gene symbols as input.
@@ -878,16 +884,22 @@ def get_enrichr(gene_symbol_list,
         lib_support_names = gp.get_library_name()
         # correct input gene_set name
         gene_sets_name=ips.strcmp(gene_sets,lib_support_names)[0]
         # download it
-        gene_sets = gp.get_library(name=gene_sets_name, organism=species)
-    print(f"gene_sets get ready: {gene_sets_name}")
+        if download:
+            gene_sets = gp.get_library(name=gene_sets_name, organism=species)
+        else:
+            gene_sets = gene_sets_name # 避免重复下载
+    print(f"\ngene_sets get ready: {gene_sets_name}")
     # gene symbols are uppercase
     gene_symbol_list=[str(i).upper() for i in gene_symbol_list]
     # # check how shared genes
-    if check_shared:
-        shared_genes=ips.shared(ips.flatten(gene_symbol_list,verbose=False), ips.flatten(gene_sets,verbose=False))
+    if check_shared and isinstance(gene_sets, dict):
+        shared_genes=ips.shared(ips.flatten(gene_symbol_list,verbose=False),
+                                ips.flatten(gene_sets,verbose=False),
+                                verbose=False)
     #! enrichr
     try:
@@ -903,13 +915,13 @@ def get_enrichr(gene_symbol_list,
         return None
     results_df = enr.results
-    print(f"got enrichr reslutls; shape: {results_df.shape}")
+    print(f"got enrichr reslutls; shape: {results_df.shape}\n")
     results_df["-log10(Adjusted P-value)"] = -np.log10(results_df["Adjusted P-value"])
     results_df.sort_values("-log10(Adjusted P-value)", inplace=True, ascending=False)
     if plot_:
         if palette is None:
-            palette=plot.get_color(n_top, cmap="coolwarm")[::-1]
+            palette=plot.get_color(n_top, cmap=cmap)[::-1]
         #! barplot
         if n_top<5:
             height_=4
@@ -921,11 +933,12 @@ def get_enrichr(gene_symbol_list,
             height_=7
         elif 15<=n_top<20:
             height_=8
-        elif 25<=n_top<30:
+        elif 20<=n_top<30:
             height_=9
         else:
             height_=int(n_top/3)
-        plt.figure(figsize=[5, height_])
+        plt.figure(figsize=[10, height_])
         ax1=plot.plotxy(
             data=results_df.head(n_top),
             kind="barplot",
@@ -935,18 +948,17 @@ def get_enrichr(gene_symbol_list,
             palette=palette,
             legend=None,
         )
+        plot.figsets(ax=ax1, **kws_figsets)
         if dir_save:
             ips.figsave(f"{dir_save} enr_barplot.pdf")
-        plot.figsets(ax=ax1, **kws_figsets)
         plt.show()
         #! dotplot
         cutoff_curr = cutoff
         step=0.05
         cutoff_stop = 0.5
-        while cutoff_curr <=cutoff_stop:
+        while cutoff_curr <= cutoff_stop:
             try:
-                print(kws_figsets)
                 if cutoff_curr!=cutoff:
                     plt.clf()
                 ax2 = gp.dotplot(enr.res2d,
@@ -957,7 +969,8 @@ def get_enrichr(gene_symbol_list,
                                 cmap=cmap,
                                 cutoff=cutoff_curr,
                                 top_term=n_top,
-                                figsize=[6, height_])
+                                size=size,
+                                figsize=[10, height_])
                 if len(ax2.collections)>=n_top:
                     print(f"cutoff={cutoff_curr} done! ")
                     break
@@ -975,7 +988,813 @@ def get_enrichr(gene_symbol_list,
     return results_df
+def plot_enrichr(results_df,
+                 kind="bar",# 'barplot', 'dotplot'
+                 cutoff=0.05,
+                 show_ring=False,
+                 xticklabels_rot=0,
+                 title=None,# 'KEGG'
+                 cmap="coolwarm",
+                 n_top=10,
+                 size=5,
+                 ax=None,
+                 **kwargs):
+    kws_figsets = {}
+    for k_arg, v_arg in kwargs.items():
+        if "figset" in k_arg:
+            kws_figsets = v_arg
+            kwargs.pop(k_arg, None)
+            break
+    if isinstance(cmap,str):
+        palette = plot.get_color(n_top, cmap=cmap)[::-1]
+    elif isinstance(cmap,list):
+        palette=cmap
+    if n_top < 5:
+        height_ = 3
+    elif 5 <= n_top < 10:
+        height_ = 3
+    elif 10 <= n_top < 15:
+        height_ = 3
+    elif 15 <= n_top < 20:
+        height_ =4
+    elif 20 <= n_top < 30:
+        height_ = 5
+    elif 30 <= n_top < 40:
+        height_ = int(n_top / 6)
+    else:
+        height_ = int(n_top / 8)
+    #! barplot
+    if 'bar' in kind.lower():
+        if ax is None:
+            _,ax=plt.subplots(1,1,figsize=[10, height_])
+        ax=plot.plotxy(
+            data=results_df.head(n_top),
+            kind="barplot",
+            x="-log10(Adjusted P-value)",
+            y="Term",
+            hue="Term",
+            palette=palette,
+            legend=None,
+        )
+        plot.figsets(ax=ax, **kws_figsets)
+        return ax,results_df
+    #! dotplot
+    elif 'dot' in kind.lower():
+        #! dotplot
+        cutoff_curr = cutoff
+        step=0.05
+        cutoff_stop = 0.5
+        while cutoff_curr <= cutoff_stop:
+            try:
+                if cutoff_curr!=cutoff:
+                    plt.clf()
+                ax = gp.dotplot(results_df,
+                                column="Adjusted P-value",
+                                show_ring=show_ring,
+                                xticklabels_rot=xticklabels_rot,
+                                title=title,
+                                cmap=cmap,
+                                cutoff=cutoff_curr,
+                                top_term=n_top,
+                                size=size,
+                                figsize=[10, height_])
+                if len(ax.collections)>=n_top:
+                    print(f"cutoff={cutoff_curr} done! ")
+                    break
+                if cutoff_curr==cutoff_stop:
+                    break
+                cutoff_curr+=step
+            except Exception as e:
+                cutoff_curr+=step
+                print(f"Warning: trying cutoff={cutoff_curr}, cutoff={cutoff_curr-step} failed: {e} ")
+        plot.figsets(ax=ax, **kws_figsets)
+        return ax,results_df
+    #! barplot with counts
+    elif 'count' in kind.lower():
+        if ax is None:
+            _,ax=plt.subplots(1,1,figsize=[10, height_])
+        # 从overlap中提取出个数
+        results_df["Count"] = results_df["Overlap"].apply(
+        lambda x: int(x.split("/")[0]) if isinstance(x, str) else x)
+        df_=results_df.sort_values(by="Count", ascending=False)
+        ax=plot.plotxy(
+            data=df_.head(n_top),
+            kind="barplot",
+            x="Count",
+            y="Term",
+            hue="Term",
+            palette=palette,
+            legend=None,
+            ax=ax
+        )
+        plot.figsets(ax=ax, **kws_figsets)
+        return ax,df_
+def plot_bp_cc_mf(
+    deg_gene_list,
+    gene_sets=[
+        "GO_Biological_Process_2023",
+        "GO_Cellular_Component_2023",
+        "GO_Molecular_Function_2023",
+    ],
+    species="human",
+    download=False,
+    n_top=10,
+    plot_=True,
+    ax=None,
+    palette=plot.get_color(3),
+    **kwargs,
+):
+    def res_enrichr_2_count(res_enrichr, n_top=10):
+        """把enrich resulst 提取出count,并排序"""
+        res_enrichr["Count"] = res_enrichr["Overlap"].apply(
+            lambda x: int(x.split("/")[0]) if isinstance(x, str) else x
+        )
+        res_enrichr.sort_values(by="Count", ascending=False, inplace=True)
+        return res_enrichr.head(n_top)#[["Term", "Count"]]
+    res_enrichr_BP = get_enrichr(
+        deg_gene_list, gene_sets[0], species=species, plot_=False,download=download
+    )
+    res_enrichr_CC = get_enrichr(
+        deg_gene_list, gene_sets[1], species=species, plot_=False,download=download
+    )
+    res_enrichr_MF = get_enrichr(
+        deg_gene_list, gene_sets[2], species=species, plot_=False,download=download
+    )
+    df_BP = res_enrichr_2_count(res_enrichr_BP, n_top=n_top)
+    df_BP["Ontology"] = ["BP"] * n_top
+    df_CC = res_enrichr_2_count(res_enrichr_CC, n_top=n_top)
+    df_CC["Ontology"] = ["CC"] * n_top
+    df_MF = res_enrichr_2_count(res_enrichr_MF, n_top=n_top)
+    df_MF["Ontology"] = ["MF"] * n_top
+    # 合并
+    df2plot = pd.concat([df_BP, df_CC, df_MF])
+    n_top=n_top*3
+    if n_top < 5:
+        height_ = 4
+    elif 5 <= n_top < 10:
+        height_ = 5
+    elif 10 <= n_top < 15:
+        height_ = 6
+    elif 15 <= n_top < 20:
+        height_ = 7
+    elif 20 <= n_top < 30:
+        height_ = 8
+    elif 30 <= n_top < 40:
+        height_ = int(n_top / 4)
+    else:
+        height_ = int(n_top / 5)
+    if ax is None:
+        _,ax=plt.subplots(1,1,figsize=[10, height_])
+    # 作图
+    display(df2plot)
+    if df2plot["Term"].tolist()[0].endswith(")"):
+        df2plot["Term"] = df2plot["Term"].apply(lambda x: x.split("(")[0][:-1])
+    if plot_:
+        ax = plot.plotxy(
+            data=df2plot,
+            x="Count",
+            y="Term",
+            hue="Ontology",
+            kind="bar",
+            palette=palette,
+            ax=ax,
+            **kwargs
+        )
+    return ax, df2plot
+def get_library_name(by=None, verbose=False):
+    lib_names=gp.get_library_name()
+    if by is None:
+        if verbose:
+            [print(i) for i in lib_names]
+        return lib_names
+    else:
+        return ips.flatten(ips.strcmp(by, lib_names, get_rank=True,verbose=verbose),verbose=verbose)
+def get_gsva(
+    data_gene_samples: pd.DataFrame,  # index(gene),columns(samples)
+    gene_sets: str,
+    species:str="Human",
+    dir_save:str="./",
+    plot_:bool=False,
+    n_top:int=30,
+    check_shared:bool=True,
+    cmap="coolwarm",
+    min_size=1,
+    max_size=1000,
+    kcdf="Gaussian",# 'Gaussian' for continuous data
+    method='gsva',
+    seed=1,
+    **kwargs,
+):
+    kws_figsets = {}
+    for k_arg, v_arg in kwargs.items():
+        if "figset" in k_arg:
+            kws_figsets = v_arg
+            kwargs.pop(k_arg, None)
+            break
+    species_org = species
+    # organism (str) – Select one from { ‘Human’, ‘Mouse’, ‘Yeast’, ‘Fly’, ‘Fish’, ‘Worm’ }
+    organisms = ["Human", "Mouse", "Yeast", "Fly", "Fish", "Worm"]
+    species = ips.strcmp(species, organisms)[0]
+    if species_org.lower() != species.lower():
+        print(f"species was corrected to {species}, becasue only support {organisms}")
+    if os.path.isfile(gene_sets):
+        gene_sets_name = os.path.basename(gene_sets)
+        gene_sets = ips.fload(gene_sets)
+    else:
+        lib_support_names = gp.get_library_name()
+        # correct input gene_set name
+        gene_sets_name = ips.strcmp(gene_sets, lib_support_names)[0]
+        # download it
+        gene_sets = gp.get_library(name=gene_sets_name, organism=species)
+    print(f"gene_sets get ready: {gene_sets_name}")
+    # gene symbols are uppercase
+    gene_symbol_list = [str(i).upper() for i in data_gene_samples.index]
+    data_gene_samples.index=gene_symbol_list
+    # display(data_gene_samples.head(3))
+    # # check how shared genes
+    if check_shared:
+        ips.shared(
+            ips.flatten(gene_symbol_list, verbose=False),
+            ips.flatten(gene_sets, verbose=False),
+            verbose=False
+        )
+    gsva_results = gp.gsva(
+        data=data_gene_samples,  #  matrix should have genes as rows and samples as columns
+        gene_sets=gene_sets,
+        outdir=None,
+        kcdf=kcdf,  # 'Gaussian' for continuous data
+        min_size=min_size,
+        method=method,
+        max_size=max_size,
+        verbose=True,
+        seed=seed,
+        # no_plot=False,
+    )
+    gsva_res = gsva_results.res2d.copy()
+    gsva_res["ES_abs"] = gsva_res["ES"].apply(np.abs)
+    gsva_res = gsva_res.sort_values(by="ES_abs", ascending=False)
+    gsva_res = (
+        gsva_res.drop_duplicates(subset="Term").drop(columns="ES_abs")
+        # .iloc[:80, :]
+        .reset_index(drop=True)
+    )
+    gsva_res = gsva_res.sort_values(by="ES", ascending=False)
+    if plot_:
+        if gsva_res.shape[0]>=2*n_top:
+            gsva_res_plot=pd.concat([gsva_res.head(n_top),gsva_res.tail(n_top)])
+        else:
+            gsva_res_plot = gsva_res
+        if isinstance(cmap,str):
+            palette = plot.get_color(n_top*2, cmap=cmap)[::-1]
+        elif isinstance(cmap,list):
+            if len(cmap)==2:
+                palette = [cmap[0]]*n_top+[cmap[1]]*n_top
+            else:
+                palette=cmap
+        # ! barplot
+        if n_top < 5:
+            height_ = 3
+        elif 5 <= n_top < 10:
+            height_ = 4
+        elif 10 <= n_top < 15:
+            height_ = 5
+        elif 15 <= n_top < 20:
+            height_ = 6
+        elif 20 <= n_top < 30:
+            height_ = 7
+        elif 30 <= n_top < 40:
+            height_ = int(n_top / 3.5)
+        else:
+            height_ = int(n_top / 3)
+        plt.figure(figsize=[10, height_])
+        ax2 = plot.plotxy(
+            data=gsva_res_plot,
+            x="ES",
+            y="Term",
+            hue="Term",
+            palette=palette,
+            kind=["bar"],
+            figsets=dict(yticklabel=[], ticksloc="b", boxloc="b", ylabel=None),
+        )
+        # 改变labels的位置
+        for i, bar in enumerate(ax2.patches):
+            term = gsva_res_plot.iloc[i]["Term"]
+            es_value = gsva_res_plot.iloc[i]["ES"]
+            # Positive ES values: Align y-labels to the left
+            if es_value > 0:
+                ax2.annotate(
+                    term,
+                    xy=(0, bar.get_y() + bar.get_height() / 2),
+                    xytext=(-5, 0),  # Move to the left
+                    textcoords="offset points",
+                    ha="right",
+                    va="center",  # Align labels to the right
+                    fontsize=10,
+                    color="black",
+                )
+            # Negative ES values: Align y-labels to the right
+            else:
+                ax2.annotate(
+                    term,
+                    xy=(0, bar.get_y() + bar.get_height() / 2),
+                    xytext=(5, 0),  # Move to the right
+                    textcoords="offset points",
+                    ha="left",
+                    va="center",  # Align labels to the left
+                    fontsize=10,
+                    color="black",
+                )
+        plot.figsets(ax=ax2, **kws_figsets)
+        if dir_save:
+            ips.figsave(dir_save + f"GSVA_{gene_sets_name}.pdf")
+        plt.show()
+    return gsva_res.reset_index(drop=True)
+def plot_gsva(gsva_res, # output from bio.get_gsva()
+              n_top=10,
+              ax=None,
+              x="ES",
+              y="Term",
+              hue="Term",
+              cmap="coolwarm",
+              **kwargs
+              ):
+    kws_figsets = {}
+    for k_arg, v_arg in kwargs.items():
+        if "figset" in k_arg:
+            kws_figsets = v_arg
+            kwargs.pop(k_arg, None)
+            break
+    # ! barplot
+    if n_top < 5:
+        height_ = 4
+    elif 5 <= n_top < 10:
+        height_ = 5
+    elif 10 <= n_top < 15:
+        height_ = 6
+    elif 15 <= n_top < 20:
+        height_ = 7
+    elif 20 <= n_top < 30:
+        height_ = 8
+    elif 30 <= n_top < 40:
+        height_ = int(n_top / 3.5)
+    else:
+        height_ = int(n_top / 3)
+    if ax is None:
+        _,ax=plt.subplots(1,1,figsize=[10, height_])
+    gsva_res = gsva_res.sort_values(by=x, ascending=False)
+    if gsva_res.shape[0]>=2*n_top:
+        gsva_res_plot=pd.concat([gsva_res.head(n_top),gsva_res.tail(n_top)])
+    else:
+        gsva_res_plot = gsva_res
+    if isinstance(cmap,str):
+        palette = plot.get_color(n_top*2, cmap=cmap)[::-1]
+    elif isinstance(cmap,list):
+        if len(cmap)==2:
+            palette = [cmap[0]]*n_top+[cmap[1]]*n_top
+        else:
+            palette=cmap
+    ax = plot.plotxy(
+        ax=ax,
+        data=gsva_res_plot,
+        x=x,
+        y=y,
+        hue=hue,
+        palette=palette,
+        kind=["bar"],
+        figsets=dict(yticklabel=[], ticksloc="b", boxloc="b", ylabel=None),
+    )
+    # 改变labels的位置
+    for i, bar in enumerate(ax.patches):
+        term = gsva_res_plot.iloc[i]["Term"]
+        es_value = gsva_res_plot.iloc[i]["ES"]
+        # Positive ES values: Align y-labels to the left
+        if es_value > 0:
+            ax.annotate(
+                term,
+                xy=(0, bar.get_y() + bar.get_height() / 2),
+                xytext=(-5, 0),  # Move to the left
+                textcoords="offset points",
+                ha="right",
+                va="center",  # Align labels to the right
+                fontsize=10,
+                color="black",
+            )
+        # Negative ES values: Align y-labels to the right
+        else:
+            ax.annotate(
+                term,
+                xy=(0, bar.get_y() + bar.get_height() / 2),
+                xytext=(5, 0),  # Move to the right
+                textcoords="offset points",
+                ha="left",
+                va="center",  # Align labels to the left
+                fontsize=10,
+                color="black",
+            )
+    plot.figsets(ax=ax, **kws_figsets)
+    return ax
+def get_prerank(
+    rnk: pd.DataFrame,
+    gene_sets: str,
+    download: bool = False,
+    species="Human",
+    threads=8,  # Number of CPU cores to use
+    permutation_num=1000,  # Number of permutations for significance
+    min_size=1,  # Minimum gene set size
+    max_size=2000,  # Maximum gene set size
+    seed=1,  # Seed for reproducibility
+    verbose=True,  # Verbosity
+    dir_save="./",
+    plot_=False,
+    size=5,
+    cutoff=0.25,
+    show_ring=False,
+    cmap="coolwarm",
+    check_shared=True,
+    **kwargs,
+):
+    """
+    Note: Enrichr uses a list of Entrez gene symbols as input.
+    """
+    kws_figsets = {}
+    for k_arg, v_arg in kwargs.items():
+        if "figset" in k_arg:
+            kws_figsets = v_arg
+            kwargs.pop(k_arg, None)
+            break
+    species_org = species
+    # organism (str) – Select one from { ‘Human’, ‘Mouse’, ‘Yeast’, ‘Fly’, ‘Fish’, ‘Worm’ }
+    organisms = ["Human", "Mouse", "Yeast", "Fly", "Fish", "Worm"]
+    species = ips.strcmp(species, organisms)[0]
+    if species_org.lower() != species.lower():
+        print(f"species was corrected to {species}, becasue only support {organisms}")
+    if os.path.isfile(gene_sets):
+        gene_sets_name = os.path.basename(gene_sets)
+        gene_sets = ips.fload(gene_sets)
+    else:
+        lib_support_names = gp.get_library_name()
+        # correct input gene_set name
+        gene_sets_name = ips.strcmp(gene_sets, lib_support_names)[0]
+        # download it
+        if download:
+            gene_sets = gp.get_library(name=gene_sets_name, organism=species)
+        else:
+            gene_sets = gene_sets_name  # 避免重复下载
+    print(f"\ngene_sets get ready: {gene_sets_name}")
+    #! prerank
+    try:
+        pre_res = gp.prerank(
+            rnk=rnk,
+            gene_sets=gene_sets,
+            threads=threads,  # Number of CPU cores to use
+            permutation_num=permutation_num,  # Number of permutations for significance
+            min_size=min_size,  # Minimum gene set size
+            max_size=max_size,  # Maximum gene set size
+            seed=seed,  # Seed for reproducibility
+            verbose=verbose,  # Verbosity
+        )
+    except ValueError as e:
+        print(f"\n{'!'*10}  Error  {'!'*10}\n{' '*4}{e}\n{'!'*10}  Error  {'!'*10}")
+        return None
+    df_prerank = pre_res.res2d
+    if plot_:
+        #! gseaplot
+        # # (1) easy way
+        # terms = df_prerank.Term
+        # axs = pre_res.plot(terms=terms[0])
+        # (2) # to make more control on the plot, use
+        terms = df_prerank.Term
+        axs = pre_res.plot(
+            terms=terms[:7],
+            # legend_kws={"loc": (1.2, 0)},  # set the legend loc
+            # show_ranking=True,  # whether to show the second yaxis
+            figsize=(3, 4),
+        )
+        ips.figsave(dir_save + f"prerank_gseaplot_{gene_sets}.pdf")
+        #!dotplot
+        from gseapy import dotplot
+        # to save your figure, make sure that ``ofname`` is not None
+        ax = dotplot(
+            df_prerank,
+            column="NOM p-val",  # FDR q-val",
+            cmap=cmap,
+            size=size,
+            figsize=(10, 5),
+            cutoff=cutoff,
+            show_ring=show_ring,
+        )
+        ips.figsave(dir_save + f"prerank_dotplot_{gene_sets}.pdf")
+        #! network plot
+        from gseapy import enrichment_map
+        import networkx as nx
+        for top_term in range(5, 50):
+            try:
+                # return two dataframe
+                nodes, edges = enrichment_map(
+                    df=df_prerank,
+                    columns="FDR q-val",
+                    cutoff=0.25,  # 0.25 when "FDR q-val"; 0.05 when "Nom p-value"
+                    top_term=top_term,
+                )
+                # build graph
+                G = nx.from_pandas_edgelist(
+                    edges,
+                    source="src_idx",
+                    target="targ_idx",
+                    edge_attr=["jaccard_coef", "overlap_coef", "overlap_genes"],
+                )
+                # to check if nodes.Hits_ratio or nodes.NES doesn’t match the number of nodes
+                if len(list(nodes.Hits_ratio)) == len(G.nodes):
+                    node_sizes = list(nodes.Hits_ratio * 1000)
+                else:
+                    raise ValueError(
+                        "The size of node_size list does not match the number of nodes in the graph."
+                    )
+                layout = "circular"
+                fig, ax = plt.subplots(figsize=(8, 8))
+                if layout == "spring":
+                    pos = nx.layout.spring_layout(G)
+                elif layout == "circular":
+                    pos = nx.layout.circular_layout(G)
+                elif layout == "shell":
+                    pos = nx.layout.shell_layout(G)
+                elif layout == "spectral":
+                    pos = nx.layout.spectral_layout(G)
+                # node_size = nx.get_node_attributes()
+                # draw node
+                nx.draw_networkx_nodes(
+                    G,
+                    pos=pos,
+                    cmap=plt.cm.RdYlBu,
+                    node_color=list(nodes.NES),
+                    node_size=list(nodes.Hits_ratio * 1000),
+                )
+                # draw node label
+                nx.draw_networkx_labels(
+                    G,
+                    pos=pos,
+                    labels=nodes.Term.to_dict(),
+                    font_size=8,
+                    verticalalignment="bottom",
+                )
+                # draw edge
+                edge_weight = nx.get_edge_attributes(G, "jaccard_coef").values()
+                nx.draw_networkx_edges(
+                    G,
+                    pos=pos,
+                    width=list(map(lambda x: x * 10, edge_weight)),
+                    edge_color="#CDDBD4",
+                )
+                ax.set_axis_off()
+                print(f"{gene_sets}(top_term={top_term})")
+                plot.figsets(title=f"{gene_sets}(top_term={top_term})")
+                ips.figsave(dir_save + f"prerank_network_{gene_sets}.pdf")
+                break
+            except:
+                print(f"not work {top_term}")
+    return df_prerank
+def plot_prerank(
+    results_df,
+    kind="bar",  # 'barplot', 'dotplot'
+    cutoff=0.25,
+    show_ring=False,
+    xticklabels_rot=0,
+    title=None,  # 'KEGG'
+    cmap="coolwarm",
+    n_top=10,
+    size=5, # when size is None in network, by "NES"
+    facecolor=None,# default by "NES"
+    linewidth=None,# default by "NES"
+    linecolor=None,# default by "NES"
+    linealpha=None, # default by "NES"
+    alpha=None,# default by "NES"
+    ax=None,
+    **kwargs,
+):
+    kws_figsets = {}
+    for k_arg, v_arg in kwargs.items():
+        if "figset" in k_arg:
+            kws_figsets = v_arg
+            kwargs.pop(k_arg, None)
+            break
+    if isinstance(cmap, str):
+        palette = plot.get_color(n_top, cmap=cmap)[::-1]
+    elif isinstance(cmap, list):
+        palette = cmap
+    if n_top < 5:
+        height_ = 4
+    elif 5 <= n_top < 10:
+        height_ = 5
+    elif 10 <= n_top < 15:
+        height_ = 6
+    elif 15 <= n_top < 20:
+        height_ = 7
+    elif 20 <= n_top < 30:
+        height_ = 8
+    elif 30 <= n_top < 40:
+        height_ = int(n_top / 5)
+    else:
+        height_ = int(n_top / 6)
+    results_df["-log10(Adjusted P-value)"]=results_df["FDR q-val"].apply(lambda x : -np.log10(x))
+    results_df["Count"] = results_df["Lead_genes"].apply(lambda x: len(x.split(";")))
+    #! barplot
+    if "bar" in kind.lower():
+        df_=results_df.sort_values(by="-log10(Adjusted P-value)",ascending=False)
+        if ax is None:
+            _, ax = plt.subplots(1, 1, figsize=[10, height_])
+        ax = plot.plotxy(
+            data=df_.head(n_top),
+            kind="barplot",
+            x="-log10(Adjusted P-value)",
+            y="Term",
+            hue="Term",
+            palette=palette,
+            legend=None,
+        )
+        plot.figsets(ax=ax, **kws_figsets)
+        return ax, df_
+    #! dotplot
+    elif "dot" in kind.lower():
+        #! dotplot
+        cutoff_curr = cutoff
+        step = 0.05
+        cutoff_stop = 0.5
+        while cutoff_curr <= cutoff_stop:
+            try:
+                if cutoff_curr != cutoff:
+                    plt.clf()
+                ax = gp.dotplot(
+                    results_df,
+                    column="NOM p-val",
+                    show_ring=show_ring,
+                    xticklabels_rot=xticklabels_rot,
+                    title=title,
+                    cmap=cmap,
+                    cutoff=cutoff_curr,
+                    top_term=n_top,
+                    size=size,
+                    figsize=[10, height_],
+                )
+                if len(ax.collections) >= n_top:
+                    print(f"cutoff={cutoff_curr} done! ")
+                    break
+                if cutoff_curr == cutoff_stop:
+                    break
+                cutoff_curr += step
+            except Exception as e:
+                cutoff_curr += step
+                print(
+                    f"Warning: trying cutoff={cutoff_curr}, cutoff={cutoff_curr-step} failed: {e} "
+                )
+        plot.figsets(ax=ax, **kws_figsets)
+        return ax, results_df
+    #! barplot with counts
+    elif "co" in kind.lower():
+        if ax is None:
+            _, ax = plt.subplots(1, 1, figsize=[10, height_])
+        # 从overlap中提取出个数
+        df_ = results_df.sort_values(by="Count", ascending=False)
+        ax = plot.plotxy(
+            data=df_.head(n_top),
+            kind="barplot",
+            x="Count",
+            y="Term",
+            hue="Term",
+            palette=palette,
+            legend=None,
+            ax=ax,
+            **kwargs,
+        )
+        plot.figsets(ax=ax, **kws_figsets)
+        return ax, df_
+    #! scatter with counts
+    elif "sca" in kind.lower():
+        if isinstance(cmap, str):
+            palette = plot.get_color(n_top, cmap=cmap)
+        elif isinstance(cmap, list):
+            palette = cmap
+        if ax is None:
+            _, ax = plt.subplots(1, 1, figsize=[10, height_])
+        # 从overlap中提取出个数
+        df_ = results_df.sort_values(by="Count", ascending=False)
+        ax = plot.plotxy(
+            data=df_.head(n_top),
+            kind="scatter",
+            x="Count",
+            y="Term",
+            hue="Count",
+            size="Count",
+            sizes=[10,50],
+            palette=palette,
+            legend=None,
+            ax=ax,
+            **kwargs,
+        )
+        plot.figsets(ax=ax, **kws_figsets)
+        return ax, df_
+    elif "net" in kind.lower():
+        #! network plot
+        from gseapy import enrichment_map
+        import networkx as nx
+        from matplotlib import cm
+        # try:
+        if cutoff>=1 or cutoff is None:
+            print(f"cutoff is {cutoff} => Without applying filter")
+            nodes, edges = enrichment_map(
+                df=results_df,
+                columns="NOM p-val",
+                cutoff=1.1,  # 0.25 when "FDR q-val"; 0.05 when "Nom p-value"
+                top_term=n_top,
+            )
+        else:
+            cutoff_curr = cutoff
+            step = 0.05
+            cutoff_stop = 1.0
+            while cutoff_curr <= cutoff_stop:
+                try:
+                    # return two dataframe
+                    nodes, edges = enrichment_map(
+                        df=results_df,
+                        columns="NOM p-val",
+                        cutoff=cutoff_curr,  # 0.25 when "FDR q-val"; 0.05 when "Nom p-value"
+                        top_term=n_top,
+                    )
+                    if nodes.shape[0] >= n_top:
+                        print(f"cutoff={cutoff_curr} done! ")
+                        break
+                    if cutoff_curr == cutoff_stop:
+                        break
+                    cutoff_curr += step
+                except Exception as e:
+                    cutoff_curr += step
+                    print(
+                        f"{e}: trying cutoff={cutoff_curr}"
+                    )
+        print("size: by 'NES'") if size is None else print("")
+        print("linewidth: by 'NES'") if linewidth is None else print("")
+        print("linecolor: by 'NES'") if linecolor is None else print("")
+        print("linealpha: by 'NES'") if linealpha is None else print("")
+        print("facecolor: by 'NES'")  if facecolor is None else print("")
+        print("alpha: by '-log10(Adjusted P-value)'")  if alpha is None else print("")
+        edges.sort_values(by="jaccard_coef", ascending=False,inplace=True)
+        colormap = cm.get_cmap(cmap)  # Get the 'coolwarm' colormap
+        G,ax=plot_ppi(
+            interactions=edges,
+            player1="src_name",
+            player2="targ_name",
+            weight="jaccard_coef",
+            size=[
+                    node["NES"] * 300 for _, node in nodes.iterrows()
+                ] if size is None else size,  #  size nodes by NES
+            facecolor=[colormap(node["NES"]) for _, node in nodes.iterrows()] if facecolor is None else facecolor,  # Color by FDR q-val
+            linewidth=[node["NES"] * 300 for _, node in nodes.iterrows()] if linewidth is None else linewidth,
+            linecolor=[node["NES"] * 300 for _, node in nodes.iterrows()] if linecolor is None else linecolor,
+            linealpha=[node["NES"] * 300 for _, node in nodes.iterrows()] if linealpha is None else linealpha,
+            alpha=[node["NES"] * 300 for _, node in nodes.iterrows()] if alpha is None else alpha,
+            **kwargs
+            )
+        # except Exception as e:
+        #     print(f"not work {n_top},{e}")
+        return ax, G, nodes, edges
 #! https://string-db.org/help/api/
 import pandas as pd
@@ -1104,16 +1923,22 @@ def plot_ppi(
     n_rank=[5, 10],  # Nodes in each rank for the concentric layout
     dist_node = 10,  # Distance between each rank of circles
     layout="degree",
-    size='auto',#700,
+    size=None,#700,
+    sizes=(50,500),# min and max of size
     facecolor="skyblue",
     cmap='coolwarm',
     edgecolor="k",
     edgelinewidth=1.5,
     alpha=.5,
+    alphas=(0.1, 1.0),# min and max of alpha
     marker="o",
     node_hideticks=True,
     linecolor="gray",
+    line_cmap='coolwarm',
     linewidth=1.5,
+    linewidths=(0.5,5),# min and max of linewidth
+    linealpha=1.0,
+    linealphas=(0.1,1.0),# min and max of linealpha
     linestyle="-",
     line_arrowstyle='-',
     fontsize=10,
@@ -1142,7 +1967,7 @@ def plot_ppi(
     for col in [player1, player2, weight]:
         if col not in interactions.columns:
             raise ValueError(f"Column '{col}' is missing from the interactions DataFrame.")
+    interactions.sort_values(by=[weight], inplace=True)
     # Initialize Pyvis network
     net = Network(height="750px", width="100%", bgcolor=bgcolor, font_color=fontcolor)
     net.force_atlas_2based(
@@ -1161,34 +1986,71 @@ def plot_ppi(
     G = nx.Graph()
     for _, row in interactions.iterrows():
         G.add_edge(row[player1], row[player2], weight=row[weight])
+    # G = nx.from_pandas_edgelist(interactions, source=player1, target=player2, edge_attr=weight)
     # Calculate node degrees
     degrees = dict(G.degree())
     norm = Normalize(vmin=min(degrees.values()), vmax=max(degrees.values()))
     colormap = cm.get_cmap(cmap)  # Get the 'coolwarm' colormap
+    if not ips.isa(facecolor, 'color'):
+        print("facecolor: based on degrees")
+        facecolor = [colormap(norm(deg)) for deg in degrees.values()]  # Use colormap
+    num_nodes = G.number_of_nodes()
+    #* size
     # Set properties based on degrees
     if not isinstance(size, (int,float,list)):
+        print("size: based on degrees")
         size = [deg * 50 for deg in degrees.values()]  # Scale sizes
-    if not ips.isa(facecolor, 'color'):
-        facecolor = [colormap(norm(deg)) for deg in degrees.values()]  # Use colormap
-    if size is None:
-        size = [700] * G.number_of_nodes()  # Default size for all nodes
-    elif isinstance(size, (int, float)):
-        size = [size] * G.number_of_nodes()  # If a scalar, apply to all nodes
-    # else:
-    #     size = size.tolist()  # Ensure size is a list
-    if len(size)>G.number_of_nodes():
-        size=size[:G.number_of_nodes()]
-    for node in G.nodes():
+    size = (size[:num_nodes] if len(size) > num_nodes else size) if isinstance(size, list) else [size] * num_nodes
+    if isinstance(size, list) and len(ips.flatten(size,verbose=False))!=1:
+        # Normalize sizes
+        min_size, max_size = sizes  # Use sizes tuple for min and max values
+        min_degree, max_degree = min(size), max(size)
+        if max_degree > min_degree:  # Avoid division by zero
+            size = [
+                min_size + (max_size - min_size) * (sz - min_degree) / (max_degree - min_degree)
+                for sz in size
+            ]
+        else:
+            # If all values are the same, set them to a default of the midpoint
+            size = [(min_size + max_size) / 2] * len(size)
+    #* facecolor
+    facecolor = (facecolor[:num_nodes] if len(facecolor) > num_nodes else facecolor) if isinstance(facecolor, list) else [facecolor] * num_nodes
+    # * facealpha
+    if isinstance(alpha, list):
+        alpha = (alpha[:num_nodes] if len(alpha) > num_nodes else alpha + [alpha[-1]] * (num_nodes - len(alpha)))
+        min_alphas, max_alphas = alphas  # Use alphas tuple for min and max values
+        if len(alpha) > 0:
+            # Normalize alpha based on the specified min and max
+            min_alpha, max_alpha = min(alpha), max(alpha)
+            if max_alpha > min_alpha:  # Avoid division by zero
+                alpha = [
+                    min_alphas + (max_alphas - min_alphas) * (ea - min_alpha) / (max_alpha - min_alpha)
+                    for ea in alpha
+                ]
+            else:
+                # If all alpha values are the same, set them to the average of min and max
+                alpha = [(min_alphas + max_alphas) / 2] * len(alpha)
+        else:
+            # Default to a full opacity if no edges are provided
+            alpha = [1.0] * num_nodes
+    else:
+        # If alpha is a single value, convert it to a list and normalize it
+        alpha = [alpha] * num_nodes  # Adjust based on alphas
+    for i, node in enumerate(G.nodes()):
         net.add_node(
             node,
             label=node,
-            size=size[list(G.nodes()).index(node)] if isinstance(size,list) else size[0],
-            color=facecolor[list(G.nodes()).index(node)] if isinstance(facecolor,list) else facecolor,
+            size=size[i],
+            color=facecolor[i],
+            alpha=alpha[i],
             font={"size": fontsize, "color": fontcolor},
         )
+    print(f'nodes number: {i+1}')
     for edge in G.edges(data=True):
         net.add_edge(
@@ -1198,6 +2060,7 @@ def plot_ppi(
             color=edgecolor,
             width=edgelinewidth * edge[2]["weight"],
         )
     layouts = [
         "spring",
         "circular",
@@ -1209,7 +2072,8 @@ def plot_ppi(
         "degree"
     ]
     layout = ips.strcmp(layout, layouts)[0]
-    print(layout)
+    print(f"layout:{layout}, or select one in {layouts}")
     # Choose layout
     if layout == "spring":
         pos = nx.spring_layout(G, k=k_value)
@@ -1235,7 +2099,9 @@ def plot_ppi(
         # Calculate node degrees and sort nodes by degree
         degrees = dict(G.degree())
         sorted_nodes = sorted(degrees.items(), key=lambda x: x[1], reverse=True)
+        norm = Normalize(vmin=min(degrees.values()), vmax=max(degrees.values()))
+        colormap = cm.get_cmap(cmap)
         # Create positions for concentric circles based on n_layers and n_rank
         pos = {}
         n_layers=len(n_rank)+1 if n_layers is None else n_layers
@@ -1266,8 +2132,8 @@ def plot_ppi(
     # If ax is None, use plt.gca()
     if ax is None:
-        fig, ax = plt.subplots(1,1,figsize=figsize)
+        fig, ax = plt.subplots(1,1,figsize=figsize)
     # Draw nodes, edges, and labels with customization options
     nx.draw_networkx_nodes(
         G,
@@ -1281,6 +2147,54 @@ def plot_ppi(
         hide_ticks=node_hideticks,
         node_shape=marker
     )
+    #* linewidth
+    if not isinstance(linewidth, list):
+        linewidth = [linewidth] * G.number_of_edges()
+    else:
+        linewidth = (linewidth[:G.number_of_edges()] if len(linewidth) > G.number_of_edges() else linewidth + [linewidth[-1]] * (G.number_of_edges() - len(linewidth)))
+        # Normalize linewidth if it is a list
+        if isinstance(linewidth, list):
+            min_linewidth, max_linewidth = min(linewidth), max(linewidth)
+            vmin, vmax = linewidths  # Use linewidths tuple for min and max values
+            if max_linewidth > min_linewidth:  # Avoid division by zero
+                # Scale between vmin and vmax
+                linewidth = [
+                    vmin + (vmax - vmin) * (lw - min_linewidth) / (max_linewidth - min_linewidth)
+                    for lw in linewidth
+                ]
+            else:
+                # If all values are the same, set them to a default of the midpoint
+                linewidth = [(vmin + vmax) / 2] * len(linewidth)
+        else:
+            # If linewidth is a single value, convert it to a list of that value
+            linewidth = [linewidth] * G.number_of_edges()
+    #* linecolor
+    if not isinstance(linecolor, str):
+        weights = [G[u][v]["weight"] for u, v in G.edges()]
+        norm = Normalize(vmin=min(weights), vmax=max(weights))
+        colormap = cm.get_cmap(line_cmap)
+        linecolor = [colormap(norm(weight)) for weight in weights]
+    else:
+        linecolor = [linecolor] * G.number_of_edges()
+    # * linealpha
+    if isinstance(linealpha, list):
+        linealpha = (linealpha[:G.number_of_edges()] if len(linealpha) > G.number_of_edges() else linealpha + [linealpha[-1]] * (G.number_of_edges() - len(linealpha)))
+        min_alpha, max_alpha = linealphas  # Use linealphas tuple for min and max values
+        if len(linealpha) > 0:
+            min_linealpha, max_linealpha = min(linealpha), max(linealpha)
+            if max_linealpha > min_linealpha:  # Avoid division by zero
+                linealpha = [
+                    min_alpha + (max_alpha - min_alpha) * (ea - min_linealpha) / (max_linealpha - min_linealpha)
+                    for ea in linealpha
+                ]
+            else:
+                linealpha = [(min_alpha + max_alpha) / 2] * len(linealpha)
+        else:
+            linealpha = [1.0] * G.number_of_edges() # 如果设置有误,则将它设置成1.0
+    else:
+        linealpha = [linealpha] * G.number_of_edges()  # Convert to list if single value
     nx.draw_networkx_edges(
         G,
         pos,
@@ -1289,14 +2203,21 @@ def plot_ppi(
         width=linewidth,
         style=linestyle,
         arrowstyle=line_arrowstyle,
-        alpha=0.7
+        alpha=linealpha
     )
     nx.draw_networkx_labels(
         G, pos, ax=ax, font_size=fontsize, font_color=fontcolor,horizontalalignment=ha,verticalalignment=va
     )
     plot.figsets(ax=ax,**kws_figsets)
     ax.axis("off")
-    net.write_html(dir_save)
+    if dir_save:
+        if not os.path.basename(dir_save):
+            dir_save="_.html"
+        net.write_html(dir_save)
+        nx.write_graphml(G, dir_save.replace(".html",".graphml"))  # Export to GraphML
+        print(f"could be edited in Cytoscape \n{dir_save.replace(".html",".graphml")}")
+        ips.figsave(dir_save.replace(".html",".pdf"))
     return G,ax

py2ls 0.2.4.4__py3-none-any.whl → 0.2.4.6__py3-none-any.whl

py2ls 0.2.4.4py3-none-any.whl → 0.2.4.6py3-none-any.whl