PyPI - biopipen - Versions diffs - 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl - Mend

biopipen 0.33.1py3-none-any.whl → 0.34.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biopipen might be problematic. Click here for more details.

Files changed (149) hide show

biopipen/__init__.py +1 -1
biopipen/core/filters.py +10 -183
biopipen/core/proc.py +5 -3
biopipen/core/testing.py +8 -1
biopipen/ns/bam.py +40 -4
biopipen/ns/cnv.py +1 -1
biopipen/ns/cnvkit.py +1 -1
biopipen/ns/delim.py +1 -1
biopipen/ns/gsea.py +63 -37
biopipen/ns/misc.py +38 -0
biopipen/ns/plot.py +8 -0
biopipen/ns/scrna.py +290 -288
biopipen/ns/scrna_metabolic_landscape.py +207 -366
biopipen/ns/tcr.py +165 -97
biopipen/reports/bam/CNVpytor.svelte +4 -9
biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
biopipen/reports/scrna/DimPlots.svelte +1 -1
biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
biopipen/reports/snp/PlinkCallRate.svelte +2 -2
biopipen/reports/snp/PlinkFreq.svelte +1 -1
biopipen/reports/snp/PlinkHWE.svelte +1 -1
biopipen/reports/snp/PlinkHet.svelte +1 -1
biopipen/reports/snp/PlinkIBD.svelte +1 -1
biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
biopipen/scripts/bam/CNAClinic.R +41 -6
biopipen/scripts/bam/CNVpytor.py +2 -1
biopipen/scripts/bam/ControlFREEC.py +2 -3
biopipen/scripts/bam/SamtoolsView.py +33 -0
biopipen/scripts/cnv/AneuploidyScore.R +25 -13
biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
biopipen/scripts/cnv/TMADScore.R +4 -4
biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
biopipen/scripts/delim/RowsBinder.R +1 -1
biopipen/scripts/delim/SampleInfo.R +4 -1
biopipen/scripts/gene/GeneNameConversion.R +14 -12
biopipen/scripts/gsea/Enrichr.R +2 -2
biopipen/scripts/gsea/FGSEA.R +184 -50
biopipen/scripts/gsea/PreRank.R +3 -3
biopipen/scripts/misc/Plot.R +80 -0
biopipen/scripts/plot/VennDiagram.R +2 -2
biopipen/scripts/protein/ProdigySummary.R +34 -27
biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
biopipen/scripts/regulatory/motifs-common.R +10 -9
biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
biopipen/scripts/rnaseq/Simulation.R +0 -2
biopipen/scripts/rnaseq/UnitConversion.R +6 -5
biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
biopipen/scripts/scrna/CellCellCommunication.py +1 -1
biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
biopipen/scripts/scrna/CellsDistribution.R +1 -1
biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
biopipen/scripts/scrna/MarkersFinder.R +348 -217
biopipen/scripts/scrna/MetaMarkers.R +3 -3
biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
biopipen/scripts/scrna/RadarPlots.R +1 -1
biopipen/scripts/scrna/ScFGSEA.R +157 -75
biopipen/scripts/scrna/ScSimulation.R +11 -10
biopipen/scripts/scrna/ScVelo.py +605 -0
biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
biopipen/scripts/scrna/SeuratClusterStats-features.R +39 -30
biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
biopipen/scripts/scrna/SeuratClustering.R +31 -48
biopipen/scripts/scrna/SeuratLoading.R +2 -2
biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
biopipen/scripts/scrna/SeuratPreparing.R +76 -24
biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
biopipen/scripts/scrna/Subset10X.R +2 -2
biopipen/scripts/scrna/TopExpressingGenes.R +141 -184
biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
biopipen/scripts/snp/MatrixEQTL.R +39 -20
biopipen/scripts/snp/PlinkCallRate.R +43 -34
biopipen/scripts/snp/PlinkFreq.R +34 -41
biopipen/scripts/snp/PlinkHWE.R +23 -18
biopipen/scripts/snp/PlinkHet.R +26 -22
biopipen/scripts/snp/PlinkIBD.R +30 -34
biopipen/scripts/stats/ChowTest.R +9 -8
biopipen/scripts/stats/DiffCoexpr.R +13 -11
biopipen/scripts/stats/LiquidAssoc.R +7 -8
biopipen/scripts/stats/Mediation.R +8 -8
biopipen/scripts/stats/MetaPvalue.R +11 -13
biopipen/scripts/stats/MetaPvalue1.R +6 -5
biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
biopipen/scripts/tcr/ClonalStats.R +5 -4
biopipen/scripts/tcr/CloneResidency.R +3 -3
biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
biopipen/scripts/tcr/ScRepLoading.R +114 -92
biopipen/scripts/tcr/TCRClusterStats.R +2 -2
biopipen/scripts/tcr/TCRClustering.R +86 -97
biopipen/scripts/tcr/TESSA.R +65 -115
biopipen/scripts/tcr/VJUsage.R +5 -5
biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
biopipen/utils/common_docstrs.py +66 -63
biopipen/utils/reporter.py +177 -0
{biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/METADATA +2 -1
{biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/RECORD +130 -144
{biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/WHEEL +1 -1
biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
biopipen/reports/utils/gsea.liq +0 -110
biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
biopipen/utils/caching.R +0 -44
biopipen/utils/gene.R +0 -95
biopipen/utils/gsea.R +0 -329
biopipen/utils/io.R +0 -20
biopipen/utils/misc.R +0 -602
biopipen/utils/mutate_helpers.R +0 -581
biopipen/utils/plot.R +0 -209
biopipen/utils/repr.R +0 -146
biopipen/utils/rnaseq.R +0 -48
biopipen/utils/single_cell.R +0 -207
{biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/entry_points.txt +0 -0

biopipen/ns/scrna_metabolic_landscape.py CHANGED Viewed

@@ -1,9 +1,10 @@
 """Metabolic landscape analysis for scRNA-seq data"""
 from __future__ import annotations
 from pathlib import Path
 from typing import Type
-from diot import Diot
+from diot import Diot  # type: ignore
 from datar.tibble import tibble
 from pipen.utils import mark
 from pipen_args import ProcGroup
@@ -28,81 +29,76 @@ class MetabolicPathwayActivity(Proc):
     ![MetabolicPathwayActivity_violin](https://pwwang.github.io/immunopipe/latest/processes/images/MetabolicPathwayActivity_violin.png){: width="45%"}
+    Input:
+        sobjfile: The Seurat object file.
+            It should be loaded as a Seurat object
+    Output:
+        outdir: The output directory.
+            It will contain the pathway activity score files and plots.
     Envs:
-        ntimes (type=int): Number of times to do the permutation
+        ntimes (type=int): Number of permutations to estimate the p-values
         ncores (type=int;pgarg): Number of cores to use for parallelization
             Defaults to `ScrnaMetabolicLandscape.ncores`
-        heatmap_devpars (ns): Device parameters for the heatmap
-            - width (type=int): Width of the heatmap
-            - height (type=int): Height of the heatmap
-            - res (type=int): Resolution of the heatmap
-        violin_devpars (ns): Device parameters for the violin plot
-            - width (type=int): Width of the violin plot
-            - height (type=int): Height of the violin plot
-            - res (type=int): Resolution of the violin plot
         gmtfile (pgarg): The GMT file with the metabolic pathways.
             Defaults to `ScrnaMetabolicLandscape.gmtfile`
-        grouping (type=auto;pgarg;readonly): Defines the basic groups to
-            investigate the metabolic activity, typically the clusters.
-            Defaults to `ScrnaMetabolicLandscape.grouping`
-        grouping_prefix (type=auto;pgarg;readonly): Working as a prefix to group
-            names. For example, if we have `grouping_prefix = "cluster"` and
-            we have `1` and `2` in the `grouping` column, the groups
-            will be named as `cluster_1` and `cluster_2`.
-            Defaults to `ScrnaMetabolicLandscape.grouping_prefix`
-        subsetting (type=auto;pgarg;readonly): How do we subset the data. Other
-            columns in the metadata to do comparisons. For example,
-            `"TimePoint"` or `["TimePoint", "Response"]`.
-            Defaults to `ScrnaMetabolicLandscape.subsetting`
-        subsetting_prefix (type=auto;pgarg;readonly): Working as a prefix to
-            subset names.
-            For example, if we have `subsetting_prefix = "timepoint"` and
-            we have `pre` and `post` in the `subsetting` column, the subsets
-            will be named as `timepoint_pre` and `timepoint_post`.
-            If `subsetting` is a list, then this should also be a
-            same-length list. If a single string is given, it will be
-            repeated to a list with the same length as `subsetting`.
-            Defaults to `ScrnaMetabolicLandscape.subsetting_prefix`
-    Requires:
-        r-scater:
-            - check: {{proc.lang}} <(echo "library(scater)")
-        r-reshape2:
-            - check: {{proc.lang}} <(echo "library(reshape2)")
-        r-rcolorbrewer:
-            - check: {{proc.lang}} <(echo "library(RColorBrewer)")
-        r-ggplot2:
-            - check: {{proc.lang}} <(echo "library(ggplot2)")
-        r-ggprism:
-            - check: {{proc.lang}} <(echo "library(ggprism)")
-        r-complexheatmap:
-            - check: {{proc.lang}} <(echo "library(ComplexHeatmap)")
-        r-parallel:
-            - check: {{proc.lang}} <(echo "library(parallel)")
+        subset_by (pgarg;readonly): Subset the data by the given column in the
+            metadata. For example, `Response`.
+            `NA` values will be removed in this column.
+            Defaults to `ScrnaMetabolicLandscape.subset_by`
+            If None, the data will not be subsetted.
+        group_by (pgarg;readonly): Group the data by the given column in the
+            metadata. For example, `cluster`.
+            Defaults to `ScrnaMetabolicLandscape.group_by`
+        plots (type=json): The plots to generate.
+            Names will be used as the prefix for the output files. Values will be
+            a dictionary with the following keys:
+            * `plot_type` is the type of plot to generate. One of `heatmap`,
+            `box`, `violin` or `merged_heatmap` (all subsets in one plot).
+            * `devpars` is a dictionary with the device parameters for the plot.
+            * Other arguments for `plotthis::Heatmap()`, `plotthis::BoxPlot()`
+            or `plotthis::ViolinPlot()`, depending on the `plot_type`.
+        cases (type=json): Multiple cases for the analysis.
+            If you only have one case, you can specify the parameters directly to
+            `envs.ntimes`, `envs.subset_by`, `envs.group_by`, `envs.group1`,
+            `envs.group2`, and `envs.plots`. The name of the case will be
+            `envs.subset_by`.
+            If you have multiple cases, you can specify the parameters for each case
+            in a dictionary. The keys will be the names of the cases and the values
+            will be dictionaries with the parameters for each case, where the values
+            will be inherited from `envs.ntimes`, `envs.subset_by`, `envs.group_by`,
+            `envs.group1`, `envs.group2`, and `envs.plots`.
     """  # noqa: E501
     input = "sobjfile:file"
     output = "outdir:dir:{{in.sobjfile | stem}}.pathwayactivity"
     envs = {
         "ntimes": 5000,
         "ncores": config.misc.ncores,
-        "heatmap_devpars": {},
-        "violin_devpars": {},
         "gmtfile": None,
-        "grouping": None,
-        "grouping_prefix": "",
-        "subsetting": None,
-        "subsetting_prefix": "",
+        "subset_by": None,
+        "group_by": None,
+        "plots": {
+            "Pathway Activity (violin plot)": {
+                "plot_type": "violin",
+                "add_box": True,
+                "devpars": {"res": 100},
+            },
+            "Pathway Activity (heatmap)": {
+                "plot_type": "heatmap",
+                "devpars": {"res": 100},
+            },
+        },
+        "cases": {},
     }
     lang = config.lang.rscript
     script = (
-        "file://../scripts/"
-        "scrna_metabolic_landscape/MetabolicPathwayActivity.R"
+        "file://../scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R"
     )
     plugin_opts = {
-        "report": (
-            "file://../reports/"
-            "scrna_metabolic_landscape/MetabolicPathwayActivity.svelte"
-        )
+        "report":
+        "file://../reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte"
     }
@@ -113,11 +109,18 @@ class MetabolicFeatures(Proc):
     The enrichment analysis is done with [`fgsea`](https://bioconductor.org/packages/release/bioc/html/fgsea.html)
     package or the [`GSEA_R`](https://github.com/GSEA-MSigDB/GSEA_R) package.
+    Input:
+        sobjfile: The Seurat object file in rds.
+            It should be loaded as a Seurat object
+    Output:
+        outdir: The output directory.
+            It will contain the GSEA results and plots.
     Envs:
-        ncores (type=int;pgarg): Number of cores to use for parallelization.
-            Defaults to `ScrnaMetabolicLandscape.ncores`
-        fgsea (flag): Whether to do fast gsea analysis using `fgsea` package.
-            If `False`, the `GSEA_R` package will be used.
+        ncores (type=int;pgarg): Number of cores to use for parallelization for
+            the comparisons for each subset and group.
+            Defaults to `ScrnaMetabolicLandscape.ncores`.
         prerank_method (choice): Method to use for gene preranking.
             Signal to noise: the larger the differences of the means
             (scaled by the standard deviations); that is, the more distinct
@@ -143,142 +146,81 @@ class MetabolicFeatures(Proc):
             - ratio_of_classes: Also referred to as fold change
             - diff_of_classes: Difference of class means
             - log2_ratio_of_classes: Log2 ratio of class means
-        top (type=int): N top of enriched pathways to show
         gmtfile (pgarg): The GMT file with the metabolic pathways.
             Defaults to `ScrnaMetabolicLandscape.gmtfile`
-        grouping (type=auto;pgarg;readonly): Defines the basic groups to
-            investigate the metabolic activity.
-            Defaults to `ScrnaMetabolicLandscape.grouping`
-        grouping_prefix (type=auto;pgarg;readonly): Working as a prefix to
-            group names.
-            Defaults to `ScrnaMetabolicLandscape.grouping_prefix`
-        subsetting (type=auto;pgarg;readonly): How do we subset the data.
-            Another column(s) in the metadata.
-            Defaults to `ScrnaMetabolicLandscape.subsetting`
-        subsetting_prefix (type=auto;pgarg;readonly): Working as a prefix to
-            subset names.
-            Defaults to `ScrnaMetabolicLandscape.subsetting_prefix`
-    Requires:
-        r-parallel:
-            - check: {{proc.lang}} <(echo "library(parallel)")
-        r-fgsea:
-            - check: {{proc.lang}} <(echo "library(fgsea)")
+        subset_by (pgarg;readonly): Subset the data by the given column in the
+            metadata. For example, `Response`.
+            `NA` values will be removed in this column.
+            Defaults to `ScrnaMetabolicLandscape.subset_by`
+            If None, the data will not be subsetted.
+        group_by (pgarg;readonly): Group the data by the given column in the
+            metadata. For example, `cluster`.
+            Defaults to `ScrnaMetabolicLandscape.group_by`
+        comparisons (type=list): The comparison groups to use for the analysis.
+            If not provided, each group in the `group_by` column will be used
+            to compare with the other groups.
+            If a single group is provided as an element, it will be used to
+            compare with all the other groups.
+            For example, if we have `group_by = "cluster"` and we have
+            `1`, `2` and `3` in the `group_by` column, we could have
+            `comparisons = ["1", "2"]`, which will compare the group `1` with groups
+            `2` and `3`, and the group `2` with groups `1` and `3`. We could also
+            have `comparisons = ["1,2", "1,3"]`, which will compare the group `1` with
+            group `2` and group `1` with group `3`.
+        fgsea_args (type=json): Other arguments for the `fgsea::fgsea()` function.
+            For example, `{"minSize": 15, "maxSize": 500}`.
+            See <https://rdrr.io/bioc/fgsea/man/fgsea.html> for more details.
+        plots (type=json): The plots to generate.
+            Names will be used as the title for the plot. Values will be the arguments
+            passed to `biopipen.utils::VizGSEA()` function.
+            See <https://pwwang.github.io/biopipen.utils.R/reference/VizGSEA.html>.
+            A key `level` is supported to specify the level of the plot.
+            Possible values are `case`, which includes all subsets and groups in the
+            case; `subset`, which includes all groups in the subset; otherwise, it
+            will plot for the groups.
+            For `case`/`subset` level plots, current `plot_type` only "dot" is supported
+            for now, then the values will be passed to `plotthis::DotPlot()`
+        cases (type=json): Multiple cases for the analysis.
+            If you only have one case, you can specify the parameters directly to
+            `envs.prerank_method`, `envs.subset_by`, `envs.group_by`,
+            `envs.comparisons`, `envs.fgsea_args` and `envs.plots`.
+            The name of this default case will be `envs.subset_by`.
+            If you have multiple cases, you can specify the parameters for each case
+            in a dictionary. The keys will be the names of the cases and the values
+            will be dictionaries with the parameters for each case, where the values
+            will be inherited from `envs.prerank_method`,
+            `envs.subset_by`, `envs.group_by`, `envs.comparisons`, `envs.fgsea_args`
+            and `envs.plots`.
     """  # noqa: E501
     input = "sobjfile:file"
     output = "outdir:dir:{{in.sobjfile | stem}}.pathwayfeatures"
     lang = config.lang.rscript
     envs = {
         "ncores": config.misc.ncores,
-        "fgsea": True,
         "prerank_method": "signal_to_noise",
-        "top": 10,
         "gmtfile": None,
-        "grouping": None,
-        "grouping_prefix": "",
-        "subsetting": None,
-        "subsetting_prefix": "",
-    }
-    script = (
-        "file://../scripts/scrna_metabolic_landscape/MetabolicFeatures.R"
-    )
-    plugin_opts = {
-        "report": (
-            "file://../reports/"
-            "scrna_metabolic_landscape/MetabolicFeatures.svelte"
-        )
-    }
-class MetabolicFeaturesIntraSubset(Proc):
-    """Intra-subset metabolic features - Enrichment analysis in details
-    Similar to the [`MetabolicFeatures`](!!#biopipennsscrna_metabolic_landscapemetabolicfeatures)
-    process, this process performs enrichment analysis for the metabolic pathways for
-    each subset in each group, instead of each group in each subset.
-    Envs:
-        ncores (type=int; pgarg): Number of cores to use for parallelization
-            Defaults to `ScrnaMetabolicLandscape.ncores`
-        fgsea (flag): Whether to do fast gsea analysis
-        prerank_method (choice): Method to use for gene preranking
-            Signal to noise: the larger the differences of the means
-            (scaled by the standard deviations); that is, the more distinct
-            the gene expression is in each phenotype and the more the gene
-            acts as a “class marker.”.
-            Absolute signal to noise: the absolute value of the signal to
-            noise.
-            T test: Uses the difference of means scaled by the standard
-            deviation and number of samples.
-            Ratio of classes: Uses the ratio of class means to calculate
-            fold change for natural scale data.
-            Diff of classes: Uses the difference of class means to calculate
-            fold change for nature scale data
-            Log2 ratio of classes: Uses the log2 ratio of class means to
-            calculate fold change for natural scale data. This is the
-            recommended statistic for calculating fold change for log scale
-            data.
-            - signal_to_noise: Signal to noise
-            - s2n: Alias of signal_to_noise
-            - abs_signal_to_noise: absolute signal to noise
-            - abs_s2n: Alias of abs_signal_to_noise
-            - t_test: T test
-            - ratio_of_classes: Also referred to as fold change
-            - diff_of_classes: Difference of class means
-            - log2_ratio_of_classes: Log2 ratio of class means
-        top (type=int): N top of enriched pathways to show
-        gmtfile (pgarg): The GMT file with the metabolic pathways.
-            Defaults to `ScrnaMetabolicLandscape.gmtfile`
-        grouping (type=auto;pgarg;readonly): Defines the basic groups to
-            investigate the metabolic activity.
-            Defaults to `ScrnaMetabolicLandscape.grouping`
-        grouping_prefix (type=auto;pgarg;readonly): Working as a prefix to group
-            names.
-            Defaults to `ScrnaMetabolicLandscape.grouping_prefix`
-        subsetting (type=auto;pgarg;readonly): How do we subset the data.
-            Another column(s) in the metadata.
-            Defaults to `ScrnaMetabolicLandscape.subsetting`
-        subsetting_prefix (type=auto;pgarg;readonly): Working as a prefix to
-            subset names.
-            Defaults to `ScrnaMetabolicLandscape.subsetting_prefix`
-        subsetting_comparison (type=json;pgarg;readonly): How do we compare the
-            subsets.
-            Defaults to `ScrnaMetabolicLandscape.subsetting_comparison`
-    Requires:
-        r-parallel:
-            - check: {{proc.lang}} <(echo "library(parallel)")
-        r-scater:
-            - check: {{proc.lang}} <(echo "library(scater)")
-        r-fgsea:
-            - check: {{proc.lang}} <(echo "library(fgsea)")
-    """  # noqa: E501
-    input = "sobjfile:file"
-    output = (
-        "outdir:dir:{{in.sobjfile | stem}}.intra-subset-pathwayfeatures"
-    )
-    lang = config.lang.rscript
-    envs = {
-        "ncores": config.misc.ncores,
-        "gmtfile": None,
-        "fgsea": True,
-        "prerank_method": "signal_to_noise",
-        "top": 10,
-        "grouping": None,
-        "grouping_prefix": "",
-        "subsetting": None,
-        "subsetting_prefix": "",
-        "subsetting_comparison": {},
+        "subset_by": None,
+        "group_by": None,
+        "comparisons": [],
+        "fgsea_args": {},
+        "plots": {
+            "Summary Plot": {
+                "plot_type": "summary",
+                "top_term": 10,
+                "devpars": {"res": 100},
+            },
+            "Enrichment Plots": {
+                "plot_type": "gsea",
+                "top_term": 10,
+                "devpars": {"res": 100},
+            },
+        },
+        "cases": {},
     }
-    script = (
-        "file://../scripts/scrna_metabolic_landscape/"
-        "MetabolicFeaturesIntraSubset.R"
-    )
+    script = "file://../scripts/scrna_metabolic_landscape/MetabolicFeatures.R"
     plugin_opts = {
-        "report": (
-            "file://../reports/scrna_metabolic_landscape/"
-            "MetabolicFeaturesIntraSubset.svelte"
-        )
+        "report": "file://../reports/scrna_metabolic_landscape/MetabolicFeatures.svelte"
     }
@@ -296,7 +238,6 @@ class MetabolicPathwayHeterogeneity(Proc):
     ![MetabolicPathwayHeterogeneity](https://pwwang.github.io/immunopipe/latest/processes/images/MetabolicPathwayHeterogeneity.png)
     Envs:
         gmtfile (pgarg): The GMT file with the metabolic pathways.
             Defaults to `ScrnaMetabolicLandscape.gmtfile`
@@ -305,43 +246,33 @@ class MetabolicPathwayHeterogeneity(Proc):
             the enriched pathways
         ncores (type=int;pgarg): Number of cores to use for parallelization
             Defaults to `ScrnaMetabolicLandscape.ncores`
-        bubble_devpars (ns): The devpars for the bubble plot
-            - width (type=int): The width of the plot
-            - height (type=int): The height of the plot
-            - res (type=int): The resolution of the plot
-        grouping (type=auto;pgarg;readonly): Defines the basic groups to
-            investigate the metabolic activity.
-            Defaults to `ScrnaMetabolicLandscape.grouping`
-        grouping_prefix (type=auto;pgarg;readonly): Working as a prefix to group
-            names.
-            Defaults to `ScrnaMetabolicLandscape.grouping_prefix`
-        subsetting (type=auto;pgarg;readonly): How do we subset the data.
-            Another column(s) in the metadata.
-            Defaults to `ScrnaMetabolicLandscape.subsetting`
-        subsetting_prefix (type=auto;pgarg;readonly): Working as a prefix to
-            subset names.
-            Defaults to `ScrnaMetabolicLandscape.subsetting_prefix`
-    Requires:
-        r-gtools:
-            - check: {{proc.lang}} <(echo "library(gtools)")
-        r-ggplot2:
-            - check: {{proc.lang}} <(echo "library(ggplot2)")
-        r-ggprism:
-            - check: {{proc.lang}} <(echo "library(ggprism)")
-        r-parallel:
-            - check: {{proc.lang}} <(echo "library(parallel)")
-        r-dplyr:
-            - check: {{proc.lang}} <(echo "library(dplyr)")
-        r-tibble:
-            - check: {{proc.lang}} <(echo "library(tibble)")
-        r-enrichr:
-            - check: {{proc.lang}} <(echo "library(enrichR)")
-        r-data.table:
-            - check: {{proc.lang}} <(echo "library(data.table)")
-        r-fgsea:
-            - check: {{proc.lang}} <(echo "library(fgsea)")
+        subset_by (pgarg;readonly): Subset the data by the given column in the
+            metadata. For example, `Response`.
+            `NA` values will be removed in this column.
+            Defaults to `ScrnaMetabolicLandscape.subset_by`
+            If None, the data will not be subsetted.
+        group_by (pgarg;readonly): Group the data by the given column in the
+            metadata. For example, `cluster`.
+            Defaults to `ScrnaMetabolicLandscape.group_by`
+        fgsea_args (type=json): Other arguments for the `fgsea::fgsea()` function.
+            For example, `{"minSize": 15, "maxSize": 500}`.
+            See <https://rdrr.io/bioc/fgsea/man/fgsea.html> for more details.
+        plots (type=json): The plots to generate.
+            Names will be used as the title for the plot. Values will be the arguments
+            passed to `biopipen.utils::VizGSEA()` function.
+            See <https://pwwang.github.io/biopipen.utils.R/reference/VizGSEA.html>.
+        cases (type=json): Multiple cases for the analysis.
+            If you only have one case, you can specify the parameters directly to
+            `envs.subset_by`, `envs.group_by`, `envs.fgsea_args`, `envs.plots`,
+            `envs.select_pcs`, and `envs.pathway_pval_cutoff`.
+            The name of this default case will be `envs.subset_by`.
+            If you have multiple cases, you can specify the parameters for each case
+            in a dictionary. The keys will be the names of the cases and the values
+            will be dictionaries with the parameters for each case, where the values
+            will be inherited from `envs.subset_by`, `envs.group_by`, `envs.fgsea_args`,
+            `envs.plots`, `envs.select_pcs`, and `envs.pathway_pval_cutoff`.
     """  # noqa: E501
     input = "sobjfile:file"
     output = "outdir:dir:{{in.sobjfile | stem}}.pathwayhetero"
     lang = config.lang.rscript
@@ -350,11 +281,16 @@ class MetabolicPathwayHeterogeneity(Proc):
         "select_pcs": 0.8,
         "pathway_pval_cutoff": 0.01,
         "ncores": config.misc.ncores,
-        "bubble_devpars": {},
-        "grouping": None,
-        "grouping_prefix": "",
-        "subsetting": None,
-        "subsetting_prefix": "",
+        "subset_by": None,
+        "group_by": None,
+        "fgsea_args": {"scoreType": "std", "nproc": 1},
+        "plots": {
+            "Pathway Heterogeneity": {
+                "plot_type": "dot",
+                "devpars": {"res": 100},
+            },
+        },
+        "cases": {},
     }
     script = (
         "file://../scripts/scrna_metabolic_landscape/"
@@ -399,49 +335,19 @@ class ScrnaMetabolicLandscape(ProcGroup):
             dependent on other processes, this option will be used to determine
             whether the input is a seurat object or not.
         noimpute (flag): Whether to do imputation for the dropouts.
-            If False, the values will be left as is.
+            If True, the values will be left as is.
         gmtfile: The GMT file with the metabolic pathways. The gene names should
             match the gene names in the gene list in RNAData or
             the Seurat object.
             You can also provide a URL to the GMT file.
             For example, from
             <https://download.baderlab.org/EM_Genesets/current_release/Human/symbol/>.
-        grouping: defines the basic groups to investigate the metabolic activity
-            Typically the clusters.
-        grouping_prefix: Working as a prefix to group names
-            For example, if we have `grouping_prefix = "cluster"` and
-            we have `1` and `2` in the `grouping` column, the groups
-            will be named as `cluster_1` and `cluster_2`
-        subsetting (type=auto): How do we subset the data. Other columns in the
-            metadata to do comparisons. For example, `"TimePoint"` or
-            `["TimePoint", "Response"]`
-        subsetting_prefix (type=auto): Working as a prefix to subset names
-            For example, if we have `subsetting_prefix = "timepoint"` and
-            we have `pre` and `post` in the `subsetting` column, the subsets
-            will be named as `timepoint_pre` and `timepoint_post`
-            If `subsetting` is a list, then this should also be a same-length
-            list. If a single string is given, it will be repeated to a list
-            with the same length as `subsetting`
-        subsetting_comparison (type=json): What kind of comparisons are we
-            doing to compare cells from different subsets.
-            It should be dict with keys as the names of the comparisons and
-            values as the 2 comparison groups from the `subsetting` column.
-            For example, if we have `pre` and `post` in the `subsetting` column,
-            we could have
-            `subsetting_comparison = {"pre_vs_post": ["post", "pre"]}`
-            The second group will be the control group in the comparison.
-            If we also have `1`, `2` and `3` in the `grouping` column,
-            by default, the comparisons are done within each subset for
-            each group. For example, for group `1`, groups `2` and `3`
-            will be used as control, and for group `2`, groups `1` and `3`
-            will be used as control, and for group `3`, groups `1` and `2`
-            will be used as control. It is similar to `Seurat::FindMarkers`
-            procedure. With this option, the comparisons are also done to
-            compare cells from different subsets within each group. With the
-            example above, we will have `pre_vs_post` comparisons within
-            each group.
-            If `subsetting` is a list, this must be a list of dicts with the
-            same length.
+        subset_by (pgarg;readonly): Subset the data by the given column in the
+            metadata. For example, `Response`.
+            `NA` values will be removed in this column.
+            If None, the data will not be subsetted.
+        group_by (pgarg;readonly): Group the data by the given column in the
+            metadata. For example, `cluster`.
         mutaters (type=json): Add new columns to the metadata for
             grouping/subsetting.
             They are passed to `sobj@meta.data |> mutate(...)`. For example,
@@ -451,65 +357,25 @@ class ScrnaMetabolicLandscape(ProcGroup):
         ncores (type=int): Number of cores to use for parallelization for
             each process
     """
     DEFAULTS = Diot(
         metafile=None,
         is_seurat=None,
         gmtfile=None,
-        grouping=None,
-        grouping_prefix="",
-        subsetting=None,
-        subsetting_prefix=None,
-        subsetting_comparison={},
         mutaters=None,
-        noimpute=False,
+        noimpute=True,
         ncores=config.misc.ncores,
+        subset_by=None,
+        group_by=None,
     )
     def post_init(self):
         """Load runtime processes"""
         if self.opts.metafile:
             suffix = Path(self.opts.metafile).suffix
-            self.opts.is_seurat = suffix in (".rds", ".RDS")
-        # Make sure the grouping is a list
-        if self.opts.subsetting and not isinstance(self.opts.subsetting, list):
-            self.opts.subsetting = [self.opts.subsetting]
-        # Make sure the grouping is a list with the same length as subsetting
-        if (
-            self.opts.subsetting
-            and not isinstance(self.opts.subsetting_prefix, list)
-        ):
-            self.opts.subsetting_prefix = [
-                self.opts.subsetting_prefix
-            ] * len(self.opts.subsetting)
-        # Make sure the lengths of subsetting and subsetting_comparison the same
-        if self.opts.subsetting:
-            if len(self.opts.subsetting) == 1 and isinstance(
-                self.opts.subsetting_comparison, dict
-            ):
-                self.opts.subsetting_comparison = [
-                    self.opts.subsetting_comparison
-                ]
-            if len(self.opts.subsetting) > 1 and not isinstance(
-                self.opts.subsetting_comparison, list
-            ):
-                raise ValueError(
-                    "The length of `subsetting` is larger than 1, "
-                    "but `subsetting_comparison` is not a list of dicts."
-                )
-            if len(self.opts.subsetting) != len(
-                self.opts.subsetting_comparison
-            ):
-                raise ValueError(
-                    "The length of `subsetting` and `subsetting_comparison` "
-                    "are not the same"
-                )
-    @ProcGroup.add_proc
+            self.opts.is_seurat = suffix in (".rds", ".RDS", ".qs", ".qs2")
+    @ProcGroup.add_proc  # type: ignore
     def p_input(self) -> Type[Proc]:
         """Build MetabolicInputs process"""
         from .misc import File2Proc
@@ -527,8 +393,8 @@ class ScrnaMetabolicLandscape(ProcGroup):
         return MetabolicInput
-    @ProcGroup.add_proc
-    def p_preparing(self) -> Type[Proc]:
+    @ProcGroup.add_proc  # type: ignore
+    def p_preparing(self) -> Type[Proc] | None:
         """Build SeuratPreparing process"""
         if self.opts.is_seurat:
             return None
@@ -540,11 +406,11 @@ class ScrnaMetabolicLandscape(ProcGroup):
         return MetabolicSeuratPreparing
-    @ProcGroup.add_proc
+    @ProcGroup.add_proc  # type: ignore
     def p_clustering(self) -> Type[Proc]:
         """Build SeuratClustering process"""
         if self.opts.is_seurat:
-            return self.p_input
+            return self.p_input  # type: ignore
         from .scrna import SeuratClustering
@@ -553,11 +419,11 @@ class ScrnaMetabolicLandscape(ProcGroup):
         return MetabolicSeuratClustering
-    @ProcGroup.add_proc
+    @ProcGroup.add_proc  # type: ignore
     def p_mutater(self) -> Type[Proc]:
         """Build SeuratMetadataMutater process"""
         if not self.opts.mutaters:
-            return self.p_clustering
+            return self.p_clustering  # type: ignore
         from .scrna import SeuratMetadataMutater
@@ -571,97 +437,72 @@ class ScrnaMetabolicLandscape(ProcGroup):
         return MetabolicSeuratMetadataMutater
-    @ProcGroup.add_proc
+    @ProcGroup.add_proc  # type: ignore
     def p_expr_impute(self) -> Type[Proc]:
         """Build  process"""
         if self.opts.noimpute:
-            return self.p_mutater
+            return self.p_mutater  # type: ignore
         from .scrna import ExprImputation
-        @annotate.format_doc(indent=3)
+        @annotate.format_doc(indent=3)  # type: ignore
         class MetabolicExprImputation(ExprImputation):
             """{{Summary}}
             You can turn off the imputation by setting the `noimpute` option
             of the process group to `True`.
             """
             requires = self.p_mutater
         return MetabolicExprImputation
-    @ProcGroup.add_proc
+    @ProcGroup.add_proc  # type: ignore
     def p_pathway_activity(self) -> Type[Proc]:
         """Build MetabolicPathwayActivity process"""
-        return Proc.from_proc(
+        return Proc.from_proc(  # type: ignore
             MetabolicPathwayActivity,
             "MetabolicPathwayActivity",
-            requires=self.p_expr_impute,
+            requires=self.p_expr_impute,  # type: ignore
             order=-1,
+            envs_depth=5,
             envs={
                 "ncores": self.opts.ncores,
                 "gmtfile": self.opts.gmtfile,
-                "grouping": self.opts.grouping,
-                "grouping_prefix": self.opts.grouping_prefix,
-                "subsetting": self.opts.subsetting,
-                "subsetting_prefix": self.opts.subsetting_prefix,
+                "group_by": self.opts.group_by,
+                "subset_by": self.opts.subset_by,
             },
         )
-    @ProcGroup.add_proc
+    @ProcGroup.add_proc  # type: ignore
     def p_pathway_heterogeneity(self) -> Type[Proc]:
         """Build MetabolicPathwayHeterogeneity process"""
-        return Proc.from_proc(
+        return Proc.from_proc(  # type: ignore
             MetabolicPathwayHeterogeneity,
             "MetabolicPathwayHeterogeneity",
-            requires=self.p_expr_impute,
+            requires=self.p_mutater,  # type: ignore
+            envs_depth=5,
             envs={
                 "ncores": self.opts.ncores,
                 "gmtfile": self.opts.gmtfile,
-                "grouping": self.opts.grouping,
-                "grouping_prefix": self.opts.grouping_prefix,
-                "subsetting": self.opts.subsetting,
-                "subsetting_prefix": self.opts.subsetting_prefix,
+                "group_by": self.opts.group_by,
+                "subset_by": self.opts.subset_by,
             },
         )
-    @ProcGroup.add_proc
+    @ProcGroup.add_proc  # type: ignore
     def p_features(self) -> Type[Proc]:
         """Build MetabolicFeatures process"""
-        return Proc.from_proc(
+        return Proc.from_proc(  # type: ignore
             MetabolicFeatures,
             "MetabolicFeatures",
-            requires=self.p_expr_impute,
-            envs={
-                "ncores": self.opts.ncores,
-                "gmtfile": self.opts.gmtfile,
-                "grouping": self.opts.grouping,
-                "grouping_prefix": self.opts.grouping_prefix,
-                "subsetting": self.opts.subsetting,
-                "subsetting_prefix": self.opts.subsetting_prefix,
-            },
-        )
-    @ProcGroup.add_proc
-    def p_features_intra_subset(self) -> Type[Proc]:
-        """Build MetabolicFeaturesIntraSubset process"""
-        if self.opts.subsetting_comparison and not self.opts.subsetting:
-            raise ValueError(
-                "Cannot use `subsetting_comparison` without `subsetting`."
-            )
-        return Proc.from_proc(
-            MetabolicFeaturesIntraSubset,
-            "MetabolicFeaturesIntraSubset",
-            requires=self.p_expr_impute,
+            requires=self.p_expr_impute,  # type: ignore
+            envs_depth=5,
             envs={
                 "ncores": self.opts.ncores,
                 "gmtfile": self.opts.gmtfile,
-                "grouping": self.opts.grouping,
-                "grouping_prefix": self.opts.grouping_prefix,
-                "subsetting": self.opts.subsetting,
-                "subsetting_prefix": self.opts.subsetting_prefix,
-                "subsetting_comparison": self.opts.subsetting_comparison,
+                "group_by": self.opts.group_by,
+                "subset_by": self.opts.subset_by,
             },
         )

biopipen 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl

Potentially problematic release.

biopipen 0.33.1py3-none-any.whl → 0.34.0py3-none-any.whl