PyPI - biopipen - Versions diffs - 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl - Mend

biopipen 0.33.1py3-none-any.whl → 0.34.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biopipen might be problematic. Click here for more details.

Files changed (149) hide show

biopipen/__init__.py +1 -1
biopipen/core/filters.py +10 -183
biopipen/core/proc.py +5 -3
biopipen/core/testing.py +8 -1
biopipen/ns/bam.py +40 -4
biopipen/ns/cnv.py +1 -1
biopipen/ns/cnvkit.py +1 -1
biopipen/ns/delim.py +1 -1
biopipen/ns/gsea.py +63 -37
biopipen/ns/misc.py +38 -0
biopipen/ns/plot.py +8 -0
biopipen/ns/scrna.py +290 -288
biopipen/ns/scrna_metabolic_landscape.py +207 -366
biopipen/ns/tcr.py +165 -97
biopipen/reports/bam/CNVpytor.svelte +4 -9
biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
biopipen/reports/scrna/DimPlots.svelte +1 -1
biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
biopipen/reports/snp/PlinkCallRate.svelte +2 -2
biopipen/reports/snp/PlinkFreq.svelte +1 -1
biopipen/reports/snp/PlinkHWE.svelte +1 -1
biopipen/reports/snp/PlinkHet.svelte +1 -1
biopipen/reports/snp/PlinkIBD.svelte +1 -1
biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
biopipen/scripts/bam/CNAClinic.R +41 -6
biopipen/scripts/bam/CNVpytor.py +2 -1
biopipen/scripts/bam/ControlFREEC.py +2 -3
biopipen/scripts/bam/SamtoolsView.py +33 -0
biopipen/scripts/cnv/AneuploidyScore.R +25 -13
biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
biopipen/scripts/cnv/TMADScore.R +4 -4
biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
biopipen/scripts/delim/RowsBinder.R +1 -1
biopipen/scripts/delim/SampleInfo.R +4 -1
biopipen/scripts/gene/GeneNameConversion.R +14 -12
biopipen/scripts/gsea/Enrichr.R +2 -2
biopipen/scripts/gsea/FGSEA.R +184 -50
biopipen/scripts/gsea/PreRank.R +3 -3
biopipen/scripts/misc/Plot.R +80 -0
biopipen/scripts/plot/VennDiagram.R +2 -2
biopipen/scripts/protein/ProdigySummary.R +34 -27
biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
biopipen/scripts/regulatory/motifs-common.R +10 -9
biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
biopipen/scripts/rnaseq/Simulation.R +0 -2
biopipen/scripts/rnaseq/UnitConversion.R +6 -5
biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
biopipen/scripts/scrna/CellCellCommunication.py +1 -1
biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
biopipen/scripts/scrna/CellsDistribution.R +1 -1
biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
biopipen/scripts/scrna/MarkersFinder.R +348 -217
biopipen/scripts/scrna/MetaMarkers.R +3 -3
biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
biopipen/scripts/scrna/RadarPlots.R +1 -1
biopipen/scripts/scrna/ScFGSEA.R +157 -75
biopipen/scripts/scrna/ScSimulation.R +11 -10
biopipen/scripts/scrna/ScVelo.py +605 -0
biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
biopipen/scripts/scrna/SeuratClusterStats-features.R +39 -30
biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
biopipen/scripts/scrna/SeuratClustering.R +31 -48
biopipen/scripts/scrna/SeuratLoading.R +2 -2
biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
biopipen/scripts/scrna/SeuratPreparing.R +76 -24
biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
biopipen/scripts/scrna/Subset10X.R +2 -2
biopipen/scripts/scrna/TopExpressingGenes.R +141 -184
biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
biopipen/scripts/snp/MatrixEQTL.R +39 -20
biopipen/scripts/snp/PlinkCallRate.R +43 -34
biopipen/scripts/snp/PlinkFreq.R +34 -41
biopipen/scripts/snp/PlinkHWE.R +23 -18
biopipen/scripts/snp/PlinkHet.R +26 -22
biopipen/scripts/snp/PlinkIBD.R +30 -34
biopipen/scripts/stats/ChowTest.R +9 -8
biopipen/scripts/stats/DiffCoexpr.R +13 -11
biopipen/scripts/stats/LiquidAssoc.R +7 -8
biopipen/scripts/stats/Mediation.R +8 -8
biopipen/scripts/stats/MetaPvalue.R +11 -13
biopipen/scripts/stats/MetaPvalue1.R +6 -5
biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
biopipen/scripts/tcr/ClonalStats.R +5 -4
biopipen/scripts/tcr/CloneResidency.R +3 -3
biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
biopipen/scripts/tcr/ScRepLoading.R +114 -92
biopipen/scripts/tcr/TCRClusterStats.R +2 -2
biopipen/scripts/tcr/TCRClustering.R +86 -97
biopipen/scripts/tcr/TESSA.R +65 -115
biopipen/scripts/tcr/VJUsage.R +5 -5
biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
biopipen/utils/common_docstrs.py +66 -63
biopipen/utils/reporter.py +177 -0
{biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/METADATA +2 -1
{biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/RECORD +130 -144
{biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/WHEEL +1 -1
biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
biopipen/reports/utils/gsea.liq +0 -110
biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
biopipen/utils/caching.R +0 -44
biopipen/utils/gene.R +0 -95
biopipen/utils/gsea.R +0 -329
biopipen/utils/io.R +0 -20
biopipen/utils/misc.R +0 -602
biopipen/utils/mutate_helpers.R +0 -581
biopipen/utils/plot.R +0 -209
biopipen/utils/repr.R +0 -146
biopipen/utils/rnaseq.R +0 -48
biopipen/utils/single_cell.R +0 -207
{biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/entry_points.txt +0 -0

biopipen/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.33.1"
1	+ __version__ = "0.34.0"

biopipen/core/filters.py CHANGED Viewed

@@ -6,9 +6,10 @@ import shlex
 from pathlib import Path
 from typing import Any, List, Mapping
-from argx import Namespace
+from argx import Namespace  # pyright: ignore[reportPrivateImportUsage]
 from liquid.filters.manager import FilterManager
-from pipen_report.filters import register_component, render_ui, _tag
+from yunpath import CloudPath
+from pipen_report.filters import register_component, _tag
 # from .defaults import BIOPIPEN_DIR
@@ -172,14 +173,14 @@ def r(
             return "TRUE"
         if obj.upper() == "FALSE":
             return "FALSE"
-        if obj.upper() == "NA" or obj.upper() == "NULL":
+        if obj.upper() == "NA" or obj.upper() == "NULL" or obj == "None":
             return obj.upper()
         if re.match(r"^\d+:\d+$", obj):
             return obj
         if obj.startswith("r:") or obj.startswith("R:"):
             return str(obj)[2:]
         return repr(str(obj))
-    if isinstance(obj, Path):
+    if isinstance(obj, (Path, CloudPath)):
         return repr(str(obj))
     if isinstance(obj, (list, tuple, set)):
         if any(isinstance(i, dict) for i in obj):
@@ -233,6 +234,11 @@ def source_r(path: str | Path, chdir: bool = False) -> str:
     In addition to generating `source(path)`, we also include the mtime for the script
     to trigger the job not cached when the script is updated.
+    If your process is used in a cloud environment, it is recommended to
+    use the `read` filter to load the script content instead of sourcing it using
+    the `source` function in R to void the path issue (path could be different
+    in different environments).
     Args:
         path: The path to the R script
@@ -248,98 +254,6 @@ def source_r(path: str | Path, chdir: bool = False) -> str:
     )
-@register_component("fgsea")
-def _render_fgsea(
-    cont: Mapping[str, Any],
-    job: Mapping[str, Any],
-    level: int,
-    na_arg: str = "10",
-) -> str:
-    """Render fgsea report"""
-    # cont["dir"] is required
-    n_pathways = int(na_arg)
-    pathways = []
-    with Path(cont["dir"]).joinpath("fgsea.txt").open() as f:
-        next(f)  # skip header
-        for line in f:
-            items = line.strip().split("\t")
-            pathways.append((items[0], items[-1]))
-            if len(pathways) >= n_pathways:
-                break
-    components = [
-        # Summary
-        {
-            "title": "Enrichment Analysis Summary",
-            "ui": "tabs",
-            "contents": [
-                {
-                    "title": "Plot",
-                    "ui": "flat",
-                    "contents": [
-                        {
-                            "kind": "descr",
-                            "content": (
-                                "This table presents a comprehensive summary of the "
-                                "top enriched pathways derived from the fgsea. "
-                                "Each row corresponds to a pathway, and the gene ranks "
-                                "are shown based on the ranking metric used in the "
-                                "analysis. The enrichment score, p-value, and adjusted "
-                                "p-value are also provided to assess the significance "
-                                "of the enrichment."
-                            )
-                        },
-                        {
-                            "kind": "image",
-                            "src": str(Path(cont["dir"]).joinpath("gsea_table.png")),
-                            "download": str(Path(cont["dir"]).joinpath("gsea_table.pdf"))
-                        }
-                    ],
-                },
-                {
-                    "title": "Table",
-                    "ui": "flat",
-                    "contents": [
-                        {
-                            "kind": "descr",
-                            "content": (
-                                "This plot represents the GSEA results for a specified "
-                                "gene set, illustrating the distribution and impact of "
-                                "the gene set along the ranked list of genes. "
-                                "The running enrichment score curve shows the "
-                                "cumulative enrichment score as genes from the input "
-                                "list are encountered. Positive peaks on the curve "
-                                "indicate regions where members of the gene set are "
-                                "predominantly found."
-                            )
-                        },
-                        {
-                            "kind": "table",
-                            "src": str(Path(cont["dir"]).joinpath("fgsea.txt")),
-                            "data": {"excluded": {"slug"}},
-                        }
-                    ],
-                },
-            ]
-        },
-        # Pathways
-        {
-            "title": f"Enriched Pathways (Top {n_pathways})",
-            "ui": "table_of_images",
-            "contents": [
-                {
-                    "src": str(Path(cont["dir"]) / f"fgsea_{slug}.png"),
-                    "download": str(Path(cont["dir"]) / f"fgsea_{slug}.pdf"),
-                    "title": pw,
-                }
-                for pw, slug in pathways
-            ]
-        },
-    ]
-    return render_ui(components, "accordion", job, level)  # type: ignore
 @register_component("pdf")
 def _render_pdf(
     cont: Mapping[str, Any],
@@ -367,90 +281,3 @@ def _render_gsea(
     """Render gsea report"""
     # cont["dir"] is required
     raise NotImplementedError()
-@register_component("enrichr")
-def _render_enrichr(
-    cont: Mapping[str, Any],
-    job: Mapping[str, Any],
-    level: int,
-) -> str:
-    """Render enrichr report"""
-    # cont["dir"] is required
-    dbs = [sumfile.stem[8:] for sumfile in Path(cont["dir"]).glob("Enrichr-*.txt")]
-    components = []
-    for db in dbs:
-        enrichr_plots = list(Path(cont["dir"]).glob(f"Enrichr-{db}.*.png"))
-        if len(enrichr_plots) == 0:
-            components.append(
-                {
-                    "title": db,
-                    "ui": "tabs",
-                    "contents": [
-                        {
-                            "title": "Error",
-                            "ui": "flat",
-                            "contents": [
-                                {
-                                    "kind": "descr",
-                                    "content": (
-                                        "The enrichment analysis results of the top "
-                                        "biological pathways associated with the input "
-                                        "gene set. Each bar represents a pathway, "
-                                        "with the length of the bar indicating the "
-                                        "number of input genes overlapping with genes "
-                                        "in that pathway. The color intensity of the "
-                                        "bars reflects the statistical significance of "
-                                        "the enrichment (p-value). "
-                                    )
-                                },
-                                {
-                                    "kind": "error",
-                                    "content": "No enriched terms found.",
-                                }
-                            ],
-                        },
-                    ],
-                }
-            )
-        else:
-            contents = []
-            for enrichr_plot in enrichr_plots:
-                plot_type = enrichr_plot.stem.split(".")[-1]
-                pdf = enrichr_plot.with_suffix(".pdf")
-                contents.append(
-                    {
-                        "src": str(enrichr_plot),
-                        "title": f"{plot_type.title()} Plot",
-                        "download": str(pdf),
-                    }
-                )
-            components.append(
-                {
-                    "title": db,
-                    "ui": "tabs",
-                    "contents": [
-                        {
-                            "title": "Plots",
-                            "ui": "table_of_images",
-                            "contents": contents,
-                        },
-                        {
-                            "title": "Table",
-                            "ui": "flat",
-                            "contents": [
-                                {
-                                    "kind": "table",
-                                    "src": str(
-                                        Path(cont["dir"]).joinpath(f"Enrichr-{db}.txt")
-                                    ),
-                                }
-                            ],
-                        },
-                    ],
-                }
-            )
-    return render_ui(components, "accordion", job, level)

biopipen/core/proc.py CHANGED Viewed

@@ -1,7 +1,9 @@
 """Provides a base class for the processes to subclass"""
-from diot import Diot
+from __future__ import annotations
+from diot import Diot  # type: ignore
 from liquid.defaults import SEARCH_PATHS
-from pipen import Proc as PipenProc
+from pipen import Proc as PipenProc  # type: ignore
 from pipen_filters.filters import FILTERS
 from .filters import filtermanager
@@ -23,7 +25,7 @@ class Proc(PipenProc):
     template_opts = {
         "globals": {**FILTERS, "biopipen_dir": str(BIOPIPEN_DIR)},
         "filters": {**FILTERS, **filtermanager.filters},
-        "search_paths": SEARCH_PATHS + [str(REPORT_DIR)],
+        "search_paths": SEARCH_PATHS + [str(REPORT_DIR)],  # type: ignore
     }
     plugin_opts = {

biopipen/core/testing.py CHANGED Viewed

@@ -44,12 +44,19 @@ def get_pipeline(testfile, loglevel="debug", enable_report=False, **kwargs):
     """Get a pipeline for a test file"""
     name, workdir, outdir = _get_test_dirs(testfile, False)
     report_plugin_prefix = "+" if enable_report else "-"
+    plugins = kwargs.pop("plugins", [])
+    if any("report" in p for p in plugins if isinstance(p, str)):
+        raise ValueError(
+            "Do not pass `report` plugin to `get_pipeline(plugins=[...])`, "
+            "use `enable_report` instead."
+        )
+    plugins.append(f"{report_plugin_prefix}report")
     kws = {
         "name": name,
         "workdir": workdir,
         "outdir": outdir,
         "loglevel": loglevel,
-        "plugins": [f"{report_plugin_prefix}report"],
+        "plugins": plugins,
     }
     kws.update(kwargs)
     return Pipen(**kws)

biopipen/ns/bam.py CHANGED Viewed

@@ -4,6 +4,9 @@ from ..core.proc import Proc
 from ..core.config import config
+# +-------------------------------------------------------------------+
+# | CNV callers                                                       |
+# +-------------------------------------------------------------------+
 class CNVpytor(Proc):
     """Detect CNV using CNVpytor
@@ -26,15 +29,14 @@ class CNVpytor(Proc):
         binsizes: The binsizes
         snp: How to read snp data
         filters: The filters to filter the result
-            See - https://github.com/abyzovlab/CNVpytor/blob/master
-            /GettingStarted.md#predicting-cnv-regions
+            See - https://github.com/abyzovlab/CNVpytor/blob/master/GettingStarted.md#predicting-cnv-regions
         mask_snps: Whether mask 1000 Genome snps
         baf_nomask: Do not use P mask in BAF histograms
     Requires:
         cnvpytor:
            - check: {{proc.envs.cnvpytor}} --version
-    """
+    """  # noqa: E501
     input = "bamfile:file, snpfile:file"
     output = "outdir:dir:{{in.bamfile | stem}}.cnvpytor"
     lang = config.lang.python
@@ -150,7 +152,7 @@ class CNAClinic(Proc):
             A list of sample names
             A float number (0 < x <= 1), the fraction of samples to use
             A integer number (x > 1), the number of samples to use
-        binsize: Directly use this binsize for CNAClinic, in kbp.
+        binsize: Directly use this binsize for CNAClinic, in bp.
         genome: The genome assembly
         run_args: The arguments for CNAClinic::runSegmentation
         plot_args: The arguments for CNAClinic::plotSampleData
@@ -181,6 +183,9 @@ class CNAClinic(Proc):
     }
+# +-------------------------------------------------------------------+
+# | Bam processing tools                                              |
+# +-------------------------------------------------------------------+
 class BamSplitChroms(Proc):
     """Split bam file by chromosomes
@@ -368,3 +373,34 @@ class BamSort(Proc):
         "index": True,
     }
     script = "file://../scripts/bam/BamSort.py"
+class SamtoolsView(Proc):
+    """View bam file using samtools, mostly used for filtering
+    This is a wrapper for `samtools view` command.
+    It will create a new bam file with the same name as the input bam file.
+    Input:
+        bamfile: The bam file
+    Output:
+        outfile: The output bam file
+    Envs:
+        ncores: Number of cores to use
+        samtools: Path to samtools executable
+        index: Whether to index the output bam file
+            Requires the input bam file to be sorted.
+        <more>: Other arguments passed to the view tool
+            See `samtools view` or `sambamba view`.
+    """
+    input = "bamfile:file"
+    output = "outfile:file:{{in.bamfile | stem}}.bam"
+    lang = config.lang.python
+    envs = {
+        "ncores": config.misc.ncores,
+        "samtools": config.exe.samtools,
+        "index": True,
+    }
+    script = "file://../scripts/bam/SamtoolsView.py"

biopipen/ns/cnv.py CHANGED Viewed

@@ -150,7 +150,7 @@ class TMADScore(Proc):
         excl_chroms (list): The chromosomes to be excluded
     """
     input = "segfile:file"
-    output = "outfile:file:{{in.segfile | stem0}}.tmad.txt"
+    output = "outfile:file:{{in.segfile | stem}}.tmad.txt"
     lang = config.lang.rscript
     envs = {
         "chrom_col": "chrom",

biopipen/ns/cnvkit.py CHANGED Viewed

@@ -482,7 +482,7 @@ class CNVkitDiagram(Proc):
     }
     script = "file://../scripts/cnvkit/CNVkitDiagram.py"
     plugin_opts = {
-        "report": "file://../reports/cnvkit/CNVkitScatter.svelte",
+        "report": "file://../reports/cnvkit/CNVkitDiagram.svelte",
         "report_paging": 10,
     }

biopipen/ns/delim.py CHANGED Viewed

@@ -132,4 +132,4 @@ class SampleInfo(Proc):
     }
     lang = config.lang.rscript
     script = "file://../scripts/delim/SampleInfo.R"
-    plugin_opts = {"report": "file://../reports/delim/SampleInfo.svelte"}
+    plugin_opts = {"report": "file://../reports/common.svelte"}

biopipen/ns/gsea.py CHANGED Viewed

@@ -1,8 +1,10 @@
 """Gene set enrichment analysis"""
+from pipen.utils import mark
 from ..core.proc import Proc
 from ..core.config import config
+@mark(deprecated='[{proc.name}] is deprecated, use `FGSEA` instead.')
 class GSEA(Proc):
     """Gene set enrichment analysis
@@ -51,6 +53,7 @@ class GSEA(Proc):
     plugin_opts = {"report": "file://../reports/gsea/GSEA.svelte"}
+@mark(deprecated='[{proc.name}] is deprecated, use `FGSEA` directly.')
 class PreRank(Proc):
     """PreRank the genes for GSEA analysis
@@ -100,59 +103,82 @@ class PreRank(Proc):
 class FGSEA(Proc):
     """Gene set enrichment analysis using `fgsea`
-    Need `devtools::install_github("ctlab/fgsea")`
     Input:
-        infile: The expression file.
-            Either a tab-delimited matrix or an RDS file (on envs.inopts)
+        infile: The expression file (genes x samples).
+            Either a tab-delimited file.
         metafile: The meta data file, determining the class of the samples
-            Two columns are required
-            Sample: The unique sample id for each sample
-            `[Group]`: The groups/classes of the samples
-        gmtfile: The GMT file of reference gene sets
-        configfile: The configuration file in TOML format to specify some envs.
-            `clscol`: If not provided, will use `envs.clscol`
-            `classes`: Defines pos and neg labels. If not provided, use will
-            `envs.classes`.
+            Two columns are required. If column `Sample` is found, it will be used
+            as the samples; otherwise the first column should be the samples.
+            The other column should be the group/class of the samples, whose
+            name is specified by `envs.clscol`.
     Output:
-        outdir: The output directory
+        outdir: The output directory containing the results, including
+            the table and plots.
     Envs:
-        inopts: The options for `read.table()` to read the input file
-            If `rds` will use `readRDS()`
-        metaopts: The options for `read.table()` to read the meta file
-        method: The method to do the preranking.
-            Supported: `s2n(signal_to_noise)`, `abs_s2n(abs_signal_to_noise)`,
-            `t_test`, `ratio_of_classes`, `diff_of_classes` and
-            `log2_ratio_of_classes`.
+        ncores (type=int): Number of cores for parallelization
+            Passed to `nproc` of `fgseaMultilevel()`.
+        case: The case label for the positive class.
+        control: The control label for the negative class.
+            When there are only two classes in `in.metafile` at column `envs.clscol`,
+            either `case` or `control` can be specified and the other will be
+            automatically set to the other class.
+        gmtfile: The pathways in GMT format, with the gene names/ids in the same format as the seurat object.
+            One could also use a URL to a GMT file. For example, from <https://download.baderlab.org/EM_Genesets/current_release/Human/symbol/Pathways/>.
+        method (choice): The method to do the preranking.
+            - signal_to_noise: Signal to noise.
+                The larger the differences of the means (scaled by the standard deviations);
+                that is, the more distinct the gene expression is in each phenotype and the more the gene
+                acts as a "class marker".
+            - s2n: Alias of signal_to_noise.
+            - abs_signal_to_noise: The absolute value of signal_to_noise.
+            - abs_s2n: Alias of abs_signal_to_noise.
+            - t_test: T test.
+                Uses the difference of means scaled by the standard deviation and number of samples.
+            - ratio_of_classes: Also referred to as fold change.
+                Uses the ratio of class means to calculate fold change for natural scale data.
+            - diff_of_classes: Difference of class means.
+                Uses the difference of class means to calculate fold change for nature scale data
+            - log2_ratio_of_classes: Log2 ratio of class means.
+                Uses the log2 ratio of class means to calculate fold change for natural scale data.
+                This is the recommended statistic for calculating fold change for log scale data.
         clscol: The column of metafile specifying the classes of the samples
-        classes: The classes to specify the pos and neg labels.
-            It could be a pair of labels (e.g. `["CASE", "CNTRL"]`), where
-            the first one is pos and second is neg. Or you can have multiple
-            pairs of labels (e.g. `[["CASE1", "CNTRL"], ["CASE2", "CNTRL"]]`)
-        top: Do gsea table and enrich plot for top N pathways. If it is < 1,
-            will apply it to `padj`
-        `<rest>`: Rest arguments for `fgsea()`
+            When `in.metafile` is not specified, it can also be specified as a list of
+            classes, in the same order as the samples in `in.infile`.
+        top (type=auto): Do gsea table and enrich plot for top N pathways.
+            If it is < 1, will apply it to `padj`, selecting pathways with `padj` < `top`.
+        eps (type=float): This parameter sets the boundary for calculating the p value.
+            See <https://rdrr.io/bioc/fgsea/man/fgseaMultilevel.html>
+        minsize (type=int): Minimal size of a gene set to test. All pathways below the threshold are excluded.
+        maxsize (type=int): Maximal size of a gene set to test. All pathways above the threshold are excluded.
+        rest (type=json;order=98): Rest arguments for [`fgsea()`](https://rdrr.io/bioc/fgsea/man/fgsea.html)
+            See also <https://rdrr.io/bioc/fgsea/man/fgseaMultilevel.html>
+        cases (type=json;order=99): If you have multiple cases, you can specify them here.
+            The keys are the names of the cases and the values are the above options except `mutaters`.
+            If some options are not specified, the default values specified above will be used.
+            If no cases are specified, the default case will be added with the name `GSEA`.
     Requires:
         bioconductor-fgsea:
             - check: {{proc.lang}} -e "library(fgsea)"
-    """
-    input = "infile:file, metafile:file, gmtfile:file, configfile:file"
+    """  # noqa: E501
+    input = "infile:file, metafile:file"
     output = "outdir:dir:{{in.infile | stem}}.fgsea"
     lang = config.lang.rscript
     envs = {
-        "inopts": {"header": True, "row.names": -1},
-        "metaopts": {"header": True, "row.names": -1},
-        "method": "s2n",
-        "clscol": None,
-        "classes": None,
-        "top": 20,
         "ncores": config.misc.ncores,
-        "minSize": 10,
-        "maxSize": 100,
+        "case": None,
+        "control": None,
+        "gmtfile": None,
+        "method": "signal_to_noise",
+        "clscol": None,
+        "top": 10,
         "eps": 0,
+        "minsize": 10,
+        "maxsize": 100,
+        "rest": {},
+        "cases": {},
     }
     script = "file://../scripts/gsea/FGSEA.R"
     plugin_opts = {"report": "file://../reports/gsea/FGSEA.svelte"}

biopipen/ns/misc.py CHANGED Viewed

@@ -106,3 +106,41 @@ class Shell(Proc):
     envs = {"cmd": "", "outdir": False}
     lang = config.lang.bash
     script = "file://../scripts/misc/Shell.sh"
+class Plot(Proc):
+    """Plot given data using plotthis package in R
+    Input:
+        datafile: The input data file in RDS or qs/qs2 format.
+            If it is not in RDS nor qs/qs2 format, read.table will be used
+            to read the data file with the options provided by `envs.read_opts`.
+    Output:
+        plotfile: The output plot file in PNG format
+    envs:
+        fn: The plot function to use. Required.
+        devpars (ns): The device parameters for the plot.
+            - width: The width of the plot in pixels.
+            - height: The height of the plot in pixels.
+            - res: The resolution of the plot in DPI.
+        more_formats: The additional formats to save the plot in other than PNG.
+            The file will be saved in the same directory as the plotfile.
+        save_code: Whether to save the R code used for plotting.
+        read_opts: Options to read the data file.
+            If the data file is not in RDS nor qs/qs2 format, these options
+            will be passed to `read.table`.
+        <more>: Additional parameters to the plot function.
+    """
+    input = "datafile:file"
+    output = "plotfile:file:{{in.datafile | stem}}.png"
+    envs = {
+        "fn": None,
+        "devpars": {"res": 100},
+        "more_formats": [],
+        "save_code": False,
+        "read_opts": {},
+    }
+    lang = config.lang.rscript
+    script = "file://../scripts/misc/Plot.R"

biopipen/ns/plot.py CHANGED Viewed

@@ -1,8 +1,16 @@
 """Plotting data"""
+import warnings
 from ..core.proc import Proc
 from ..core.config import config
+warnings.warn(
+    "The `biopipen.ns.plot` module is deprecated and will be removed in the future. "
+    "Please use `biopipen.ns.misc.Plot` process instead.",
+    DeprecationWarning,
+)
 class VennDiagram(Proc):
     """Plot Venn diagram

biopipen 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl

Potentially problematic release.

biopipen 0.33.1py3-none-any.whl → 0.34.0py3-none-any.whl