PyPI - biopipen - Versions diffs - 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl - Mend

biopipen 0.21.0py3-none-any.whl → 0.34.26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (290) hide show

biopipen/__init__.py +1 -1
biopipen/core/config.toml +28 -0
biopipen/core/filters.py +79 -4
biopipen/core/proc.py +12 -3
biopipen/core/testing.py +75 -3
biopipen/ns/bam.py +148 -6
biopipen/ns/bed.py +75 -0
biopipen/ns/cellranger.py +186 -0
biopipen/ns/cellranger_pipeline.py +126 -0
biopipen/ns/cnv.py +19 -3
biopipen/ns/cnvkit.py +1 -1
biopipen/ns/cnvkit_pipeline.py +20 -12
biopipen/ns/delim.py +34 -35
biopipen/ns/gene.py +68 -23
biopipen/ns/gsea.py +63 -37
biopipen/ns/misc.py +39 -14
biopipen/ns/plot.py +304 -1
biopipen/ns/protein.py +183 -0
biopipen/ns/regulatory.py +290 -0
biopipen/ns/rnaseq.py +142 -5
biopipen/ns/scrna.py +2053 -473
biopipen/ns/scrna_metabolic_landscape.py +228 -382
biopipen/ns/snp.py +659 -0
biopipen/ns/stats.py +484 -0
biopipen/ns/tcr.py +683 -98
biopipen/ns/vcf.py +236 -2
biopipen/ns/web.py +97 -6
biopipen/reports/bam/CNVpytor.svelte +4 -9
biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
biopipen/reports/common.svelte +15 -0
biopipen/reports/protein/ProdigySummary.svelte +16 -0
biopipen/reports/scrna/CellsDistribution.svelte +4 -39
biopipen/reports/scrna/DimPlots.svelte +1 -1
biopipen/reports/scrna/MarkersFinder.svelte +6 -126
biopipen/reports/scrna/MetaMarkers.svelte +3 -75
biopipen/reports/scrna/RadarPlots.svelte +4 -20
biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
biopipen/reports/snp/PlinkCallRate.svelte +24 -0
biopipen/reports/snp/PlinkFreq.svelte +18 -0
biopipen/reports/snp/PlinkHWE.svelte +18 -0
biopipen/reports/snp/PlinkHet.svelte +18 -0
biopipen/reports/snp/PlinkIBD.svelte +18 -0
biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
biopipen/reports/tcr/ClonalStats.svelte +16 -0
biopipen/reports/tcr/CloneResidency.svelte +3 -93
biopipen/reports/tcr/Immunarch.svelte +4 -155
biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
biopipen/reports/tcr/TESSA.svelte +11 -28
biopipen/reports/utils/misc.liq +22 -7
biopipen/scripts/bam/BamMerge.py +11 -15
biopipen/scripts/bam/BamSampling.py +90 -0
biopipen/scripts/bam/BamSort.py +141 -0
biopipen/scripts/bam/BamSplitChroms.py +10 -10
biopipen/scripts/bam/BamSubsetByBed.py +38 -0
biopipen/scripts/bam/CNAClinic.R +41 -5
biopipen/scripts/bam/CNVpytor.py +153 -54
biopipen/scripts/bam/ControlFREEC.py +13 -14
biopipen/scripts/bam/SamtoolsView.py +33 -0
biopipen/scripts/bed/Bed2Vcf.py +5 -5
biopipen/scripts/bed/BedConsensus.py +5 -5
biopipen/scripts/bed/BedLiftOver.sh +6 -4
biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
biopipen/scripts/bed/BedtoolsMerge.py +4 -4
biopipen/scripts/cellranger/CellRangerCount.py +138 -0
biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
biopipen/scripts/cnv/AneuploidyScore.R +55 -20
biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
biopipen/scripts/cnv/TMADScore.R +25 -9
biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
biopipen/scripts/cnvkit/guess_baits.py +166 -93
biopipen/scripts/delim/RowsBinder.R +1 -1
biopipen/scripts/delim/SampleInfo.R +116 -118
biopipen/scripts/gene/GeneNameConversion.R +67 -0
biopipen/scripts/gene/GenePromoters.R +61 -0
biopipen/scripts/gsea/Enrichr.R +5 -5
biopipen/scripts/gsea/FGSEA.R +184 -50
biopipen/scripts/gsea/GSEA.R +2 -2
biopipen/scripts/gsea/PreRank.R +5 -5
biopipen/scripts/misc/Config2File.py +2 -2
biopipen/scripts/misc/Plot.R +80 -0
biopipen/scripts/misc/Shell.sh +15 -0
biopipen/scripts/misc/Str2File.py +2 -2
biopipen/scripts/plot/Heatmap.R +3 -3
biopipen/scripts/plot/Manhattan.R +147 -0
biopipen/scripts/plot/QQPlot.R +146 -0
biopipen/scripts/plot/ROC.R +88 -0
biopipen/scripts/plot/Scatter.R +112 -0
biopipen/scripts/plot/VennDiagram.R +5 -9
biopipen/scripts/protein/MMCIF2PDB.py +33 -0
biopipen/scripts/protein/PDB2Fasta.py +60 -0
biopipen/scripts/protein/Prodigy.py +119 -0
biopipen/scripts/protein/ProdigySummary.R +140 -0
biopipen/scripts/protein/RMSD.py +178 -0
biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
biopipen/scripts/regulatory/MotifScan.py +159 -0
biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
biopipen/scripts/regulatory/motifs-common.R +324 -0
biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
biopipen/scripts/rnaseq/Simulation.R +21 -0
biopipen/scripts/rnaseq/UnitConversion.R +325 -54
biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
biopipen/scripts/scrna/CellCellCommunication.py +150 -0
biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
biopipen/scripts/scrna/CellSNPLite.py +30 -0
biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
biopipen/scripts/scrna/CellsDistribution.R +456 -167
biopipen/scripts/scrna/DimPlots.R +1 -1
biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
biopipen/scripts/scrna/ExprImputation.R +7 -0
biopipen/scripts/scrna/LoomTo10X.R +51 -0
biopipen/scripts/scrna/MQuad.py +25 -0
biopipen/scripts/scrna/MarkersFinder.R +679 -400
biopipen/scripts/scrna/MetaMarkers.R +265 -161
biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
biopipen/scripts/scrna/RadarPlots.R +355 -134
biopipen/scripts/scrna/ScFGSEA.R +298 -100
biopipen/scripts/scrna/ScSimulation.R +65 -0
biopipen/scripts/scrna/ScVelo.py +617 -0
biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
biopipen/scripts/scrna/SeuratClustering.R +36 -233
biopipen/scripts/scrna/SeuratLoading.R +2 -2
biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
biopipen/scripts/scrna/SeuratPreparing.R +223 -173
biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
biopipen/scripts/scrna/SeuratTo10X.R +27 -0
biopipen/scripts/scrna/Slingshot.R +65 -0
biopipen/scripts/scrna/Subset10X.R +2 -2
biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
biopipen/scripts/scrna/scvelo_paga.py +313 -0
biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
biopipen/scripts/snp/MatrixEQTL.R +217 -0
biopipen/scripts/snp/Plink2GTMat.py +148 -0
biopipen/scripts/snp/PlinkCallRate.R +199 -0
biopipen/scripts/snp/PlinkFilter.py +100 -0
biopipen/scripts/snp/PlinkFreq.R +291 -0
biopipen/scripts/snp/PlinkFromVcf.py +81 -0
biopipen/scripts/snp/PlinkHWE.R +85 -0
biopipen/scripts/snp/PlinkHet.R +96 -0
biopipen/scripts/snp/PlinkIBD.R +196 -0
biopipen/scripts/snp/PlinkSimulation.py +124 -0
biopipen/scripts/snp/PlinkUpdateName.py +124 -0
biopipen/scripts/stats/ChowTest.R +146 -0
biopipen/scripts/stats/DiffCoexpr.R +152 -0
biopipen/scripts/stats/LiquidAssoc.R +135 -0
biopipen/scripts/stats/Mediation.R +108 -0
biopipen/scripts/stats/MetaPvalue.R +130 -0
biopipen/scripts/stats/MetaPvalue1.R +74 -0
biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
biopipen/scripts/tcr/Attach2Seurat.R +3 -2
biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
biopipen/scripts/tcr/CDR3Clustering.R +343 -0
biopipen/scripts/tcr/ClonalStats.R +526 -0
biopipen/scripts/tcr/CloneResidency.R +255 -131
biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
biopipen/scripts/tcr/GIANA/query.py +164 -162
biopipen/scripts/tcr/Immunarch-basic.R +31 -9
biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
biopipen/scripts/tcr/Immunarch.R +63 -11
biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
biopipen/scripts/tcr/SampleDiversity.R +1 -1
biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
biopipen/scripts/tcr/ScRepLoading.R +166 -0
biopipen/scripts/tcr/TCRClusterStats.R +176 -22
biopipen/scripts/tcr/TCRDock.py +110 -0
biopipen/scripts/tcr/TESSA.R +102 -118
biopipen/scripts/tcr/VJUsage.R +5 -5
biopipen/scripts/tcr/immunarch-patched.R +142 -0
biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
biopipen/scripts/vcf/BcftoolsSort.py +113 -0
biopipen/scripts/vcf/BcftoolsView.py +73 -0
biopipen/scripts/vcf/TruvariBench.sh +14 -7
biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
biopipen/scripts/vcf/TruvariConsistency.R +1 -1
biopipen/scripts/vcf/Vcf2Bed.py +2 -2
biopipen/scripts/vcf/VcfAnno.py +11 -11
biopipen/scripts/vcf/VcfDownSample.sh +22 -10
biopipen/scripts/vcf/VcfFilter.py +5 -5
biopipen/scripts/vcf/VcfFix.py +7 -7
biopipen/scripts/vcf/VcfFix_utils.py +13 -4
biopipen/scripts/vcf/VcfIndex.py +3 -3
biopipen/scripts/vcf/VcfIntersect.py +3 -3
biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
biopipen/scripts/vcf/bcftools_utils.py +52 -0
biopipen/scripts/web/Download.py +8 -4
biopipen/scripts/web/DownloadList.py +5 -5
biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
biopipen/scripts/web/gcloud_common.py +49 -0
biopipen/utils/gene.py +108 -60
biopipen/utils/misc.py +146 -20
biopipen/utils/reference.py +64 -20
biopipen/utils/reporter.py +177 -0
biopipen/utils/vcf.py +1 -1
biopipen-0.34.26.dist-info/METADATA +27 -0
biopipen-0.34.26.dist-info/RECORD +292 -0
{biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
{biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
biopipen/ns/bcftools.py +0 -111
biopipen/ns/scrna_basic.py +0 -255
biopipen/reports/delim/SampleInfo.svelte +0 -36
biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
biopipen/reports/scrna/ScFGSEA.svelte +0 -35
biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
biopipen/reports/utils/gsea.liq +0 -110
biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
biopipen/scripts/gene/GeneNameConversion.py +0 -66
biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
biopipen/scripts/scrna/ExprImpution.R +0 -7
biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
biopipen/scripts/scrna/Write10X.R +0 -11
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
biopipen/scripts/tcr/TCRClustering.R +0 -280
biopipen/utils/common_docstrs.py +0 -61
biopipen/utils/gene.R +0 -49
biopipen/utils/gsea.R +0 -193
biopipen/utils/io.R +0 -20
biopipen/utils/misc.R +0 -114
biopipen/utils/mutate_helpers.R +0 -433
biopipen/utils/plot.R +0 -173
biopipen/utils/rnaseq.R +0 -48
biopipen/utils/single_cell.R +0 -115
biopipen-0.21.0.dist-info/METADATA +0 -22
biopipen-0.21.0.dist-info/RECORD +0 -218

biopipen/scripts/scrna/TopExpressingGenes.R CHANGED Viewed

@@ -1,175 +1,209 @@
-source("{{biopipen_dir}}/utils/misc.R")
 library(Seurat)
-library(tibble)
-library(enrichR)
 library(rlang)
 library(dplyr)
-setEnrichrSite("Enrichr")
+library(tidyselect)
+library(biopipen.utils)
 srtfile <- {{in.srtobj | r}}
 outdir <- {{out.outdir | r}}
+joboutdir <- {{job.outdir | r}}
 mutaters <- {{ envs.mutaters | r }}
 ident <- {{ envs.ident | r }}
-group.by <- {{ envs["group-by"] | r }}  # nolint
+group_by <- {{ envs.group_by | default: envs["group-by"] | default: None | r }}  # nolint
 each <- {{ envs.each | r }}
-prefix_each <- {{ envs.prefix_each | r }}
-section <- {{ envs.section | r }}
 dbs <- {{ envs.dbs | r }}
 n <- {{ envs.n | r }}
+enrich_style <- {{ envs.enrich_style | r }}
+sset <- {{ envs.subset | r }}
+enrich_plots_defaults <- {{ envs.enrich_plots_defaults | r }}
+enrich_plots <- {{ envs.enrich_plots | r }}
 cases <- {{ envs.cases | r: todot = "-" }}  # nolint
 set.seed(8525)
+log <- get_logger()
+reporter <- get_reporter()
-print("- Loading Seurat object ...")
-srtobj <- readRDS(srtfile)
+log$info("Reading Seurat object ...")
+srtobj <- read_obj(srtfile)
+assay <- DefaultAssay(srtobj)
-print("- Mutate meta data if needed ...")
-if (!is.null(mutaters) && length(mutaters)) {
+if (!is.null(mutaters) && length(mutaters) > 0) {
+    log$info("Mutating meta data ...")
     srtobj@meta.data <- srtobj@meta.data %>%
         mutate(!!!lapply(mutaters, parse_expr))
 }
-print("- Expanding cases ...")
-if (is.null(cases) || length(cases) == 0) {
-    cases <- list(
-        DEFAULT = list(
-            ident = ident,
-            group.by = group.by,
-            each = each,
-            prefix_each = prefix_each,
-            section = section,
-            dbs = dbs,
-            n = n
-        )
-    )
-} else {
-    cases <- lapply(cases, function(cs) {
-        list_setdefault(
-            cs,
-            ident = ident,
-            group.by = group.by,
-            each = each,
-            prefix_each = prefix_each,
-            section = section,
-            dbs = dbs,
-            n = n
+enrich_plots <- lapply(enrich_plots, function(x) {
+    list_update(enrich_plots_defaults, x)
+})
+defaults <- list(
+    ident = ident,
+    group_by = group_by,
+    each = each,
+    dbs = dbs,
+    n = n,
+    enrich_style = enrich_style,
+    enrich_plots = enrich_plots,
+    enrich_plots_defaults = enrich_plots_defaults,
+    subset = sset
+)
+cases <- expand_cases(cases, defaults, default_case = "Top Expressing Genes", post = function(name, case) {
+    outcases <- list()
+    if (is.null(case$each) || is.na(case$each) || nchar(case$each) == 0 || isFALSE(each)) {
+        case$enrich_plots <- lapply(
+            case$enrich_plots,
+            function(x) { list_update(case$enrich_plots_defaults, x) }
         )
-    })
-}
+        case$enrich_plots_defaults <- NULL
-# Expand each and ident
-newcases <- list()
-for (name in names(cases)) {  # nolint
-    case <- cases[[name]]
-    if (is.null(case$each) && !is.null(case$ident)) {
-        newcases[[paste0(case$section, ":", name)]] <- case
-    } else if (is.null(case$each)) {
-        idents <- srtobj@meta.data %>%
-            pull(case$group.by) %>%
-            unique() %>%
-            na.omit()
-        for (ident in idents) {
-            key <- paste0(name, ":", ident)
-            newcases[[key]] <- case
-            newcases[[key]]$ident <- ident
-        }
+        outcases[[name]] <- case
     } else {
-        eachs <- srtobj@meta.data %>% pull(case$each) %>% unique() %>% na.omit()
+        eachs <- if (!is.null(case$subset)) {
+            srtobj@meta.data %>%
+                filter(!!parse_expr(case$subset)) %>%
+                pull(case$each) %>% na.omit() %>% unique() %>% as.vector()
+        } else {
+            srtobj@meta.data %>%
+                pull(case$each) %>% na.omit() %>% unique() %>% as.vector()
+        }
+        if (length(cases) == 0 && name == "Top Expressing Genes") {
+            name <- case$each
+        }
         for (each in eachs) {
-            by <- make.names(paste0(".", name, "_", each))
-            srtobj@meta.data <- srtobj@meta.data %>% mutate(
-                !!sym(by) := if_else(
-                    !!sym(case$each) == each,
-                    !!sym(case$group.by),
-                    NA
-                )
-            )
-            if (is.null(case$ident)) {
-                idents <- srtobj@meta.data %>%
-                    pull(case$group.by) %>%
-                    unique() %>%
-                    na.omit()
-                for (ident in idents) {
-                    kname <- if (name == "DEFAULT") "" else paste0("-", name)
-                    key <- paste0(each, kname, ":", ident)
-                    if (case$prefix_each) {
-                        key <- paste0(case$each, "-", key)
-                    }
-                    newcases[[key]] <- case
-                    newcases[[key]]$ident <- ident
-                    newcases[[key]]$group.by <- by  # nolint
-                }
+            newname <- paste0(name, " - ", each)
+            newcase <- case
+            newcase$each_name <- case$each
+            newcase$each <- each
+            if (!is.null(case$subset)) {
+                newcase$subset <- paste0(case$subset, " & ", bQuote(case$each), " == '", each, "'")
             } else {
-                key <- paste0(case$each, ":", each)
-                if (name != "DEFAULT") {
-                    key <- paste0(key, " - ", name)
-                }
-                newcases[[key]] <- case
+                newcase$subset <- paste0(bQuote(case$each), " == '", each, "'")
             }
+            newcase$enrich_plots <- lapply(
+                case$enrich_plots,
+                function(x) { list_update(case$enrich_plots_defaults, x) }
+            )
+            newcase$enrich_plots_defaults <- NULL
+            outcases[[newname]] <- newcase
         }
     }
-}
-cases <- newcases
-do_enrich <- function(expr, odir) {
-    print("  Saving expressions ...")
-    write.table(
-        expr %>% as.data.frame() %>% rownames_to_column("Gene"),
-        file.path(odir, "expr.txt"),
-        sep = "\t",
-        row.names = TRUE,
-        col.names = TRUE,
-        quote = FALSE
+    outcases
+})
+log$info("Running cases ...")
+process_markers <- function(markers, info, case) {
+    # Save markers
+    write.table(markers, file.path(info$prefix, "top_genes.tsv"), sep = "\t", quote = FALSE, row.names = FALSE)
+    reporter$add2(
+        list(
+            name = "Table",
+            contents = list(
+                list(kind = "descr", content = "Showing top expressing genes ordered by their expression descendingly."),
+                list(kind = "table", src = file.path(info$prefix, "top_genes.tsv"), data = list(nrows = 100))
+            )
+        ),
+        hs = c(info$section, info$name),
+        hs2 = paste0("Top Genes"),
+        ui = "tabs"
     )
-    write.table(
-        expr %>% as.data.frame() %>% rownames_to_column("Gene") %>% head(n),
-        file.path(odir, "exprn.txt"),
-        sep = "\t",
-        row.names = TRUE,
-        col.names = TRUE,
-        quote = FALSE
+    enrich <- RunEnrichment(
+        markers$gene,
+        dbs = case$dbs, style = case$enrich_style)
+    write.table(enrich, file.path(info$prefix, "enrich.tsv"), sep = "\t", quote = FALSE, row.names = FALSE)
+    reporter$add2(
+        list(
+            name = "Table",
+            contents = list(list(kind = "table", src = file.path(info$prefix, "enrich.tsv"), data = list(nrows = 100)))
+        ),
+        hs = c(info$section, info$name),
+        hs2 = "Enrichment Analysis",
+        ui = "tabs"
     )
-    print("  Running enrichment ...")
-    enriched <- enrichr(rownames(head(expr, n)), dbs)  # nolint
-    for (db in dbs) {
-        write.table(
-            enriched[[db]],
-            file.path(odir, paste0("Enrichr-", db, ".txt")),
-            sep = "\t",
-            row.names = FALSE,
-            col.names = TRUE,
-            quote = FALSE
-        )
-        png(
-            file.path(odir, paste0("Enrichr-", db, ".png")),
-            res = 100, height = 1000, width = 1000
-        )
-        print(plotEnrich(enriched[[db]], showTerms = 20, title = db))  # nolint
-        dev.off()
+    # Visualize enriched terms
+    if (length(case$enrich_plots) > 0) {
+        for (db in case$dbs) {
+            plots <- list()
+            for (plotname in names(case$enrich_plots)) {
+                plotargs <- case$enrich_plots[[plotname]]
+                plotargs$data <- enrich[enrich$Database == db, , drop = FALSE]
+                p <- do_call(VizEnrichment, plotargs)
+                outprefix <- file.path(info$prefix, paste0("enrich.", slugify(db), ".", slugify(plotname)))
+                if (plotargs$plot_type == "bar") {
+                    attr(p, "height") <- attr(p, "height") / 1.5
+                }
+                save_plot(p, outprefix, plotargs$devpars, formats = "png")
+                plots[[length(plots) + 1]] <- reporter$image(outprefix, c(), FALSE)
+            }
+            reporter$add2(
+                list(name = db, contents = plots),
+                hs = c(info$section, info$name),
+                hs2 = "Enrichment Analysis",
+                ui = "tabs"
+            )
+        }
     }
 }
-do_case <- function(casename) {
-    print(paste("- Running for case:", casename))
-    case <- cases[[casename]]
-    parts <- unlist(strsplit(casename, ":"))
-    section <- parts[1]
-    casename <- paste(parts[-1], collapse = ":")
-    print("  Calculating average expression ...")
+run_case <- function(name) {
+    log$info("Case: {name} ...")
+    case <- cases[[name]]
+    log$info("- Subsetting cells and calculating average expression ...")
+    if (!is.null(case$subset)) {
+        subobj <- filter(srtobj, !!parse_expr(case$subset))
+    } else {
+        subobj <- srtobj
+    }
+    case$group_by <- case$group_by %||% GetIdentityColumn(srtobj)
+    if (is.null(case$ident)) {
+        case$ident <- as.character(unique(subobj@meta.data[[case$group_by]]))
+    }
     avgexpr <- AverageExpression(
-        srtobj,
-        group.by = case$group.by
-    )$RNA[, case$ident, drop = FALSE]
-    avgexpr <- avgexpr[order(-avgexpr), , drop = FALSE]
+        subobj,
+        group_by = case$group_by,
+        assays = assay
+    )[[assay]]
+    # https://github.com/satijalab/seurat/issues/7893
+    colnames(avgexpr) <- as.character(unique(subobj@meta.data[[case$group_by]]))
+    avgexpr <- avgexpr[, case$ident, drop = FALSE]
-    odir <- file.path(outdir, section, casename)
-    dir.create(odir, recursive = TRUE, showWarnings = FALSE)
+    for (idt in case$ident) {
+        log$info("- Processing {idt} ...")
+        info <- case_info(paste0(name, "::", idt), outdir, create = TRUE)
+        expr <- avgexpr[, idt, drop = FALSE]
+        expr <- expr[order(expr, decreasing = TRUE), , drop = FALSE]
+        expr <- expr[1:min(case$n, nrow(expr)), , drop = FALSE]
+        expr <- as.data.frame(expr)
+        expr$gene <- rownames(expr)
+        colnames(expr) <- c("avg_expr", "gene")
+        expr <- expr[, c("gene", "avg_expr"), drop = FALSE]
-    do_enrich(avgexpr, odir)
+        log$info("  Performing enrichment analysis ...")
+        process_markers(expr, info, case = list(
+            ident = idt,
+            dbs = case$dbs,
+            enrich_style = case$enrich_style,
+            enrich_plots = case$enrich_plots
+        ))
+    }
+    invisible()
 }
-sapply(sort(names(cases)), do_case)
+sapply(names(cases), run_case)
+reporter$save(joboutdir)

biopipen/scripts/scrna/celltypist-wrapper.py ADDED Viewed

@@ -0,0 +1,195 @@
+from argparse import ArgumentParser
+from typing import Union
+import numpy as np
+import pandas as pd
+import scanpy as sc
+import celltypist
+from celltypist.classifier import logger, AnnData, Model, Classifier
+parser = ArgumentParser(description="Run CellTypist")
+parser.add_argument(
+    "-i", "--input", required=True, help="Input H5AD file with AnnData object"
+)
+parser.add_argument("-o", "--output", required=True, help="Output file")
+parser.add_argument("-m", "--model", required=True, help="Model file")
+parser.add_argument(
+    "-v", "--majority_voting", action="store_true", help="Majority voting"
+)
+parser.add_argument(
+    "-c",
+    "--over_clustering",
+    required=False,
+    default=None,
+    help="Over clustering. Error if the column does not exist.",
+)
+def classifier_init(
+    self, filename="", model="", transpose=False, gene_file=None, cell_file=None
+):
+    """Celltypist check if adata is in the range of log1p normalized data to 10000
+    counts per cell. Otherwise it will use the raw data if available. However, in
+    some cases, the raw data has invalid feature names (var_names) which causes errors.
+    Here we check if the feature names of raw data is valid with intersection with
+    model features, if not, we will use the adata.X instead of adata.raw.X
+    """
+    if isinstance(model, str):
+        model = Model.load(model)
+    self.model = model
+    if not filename:
+        logger.warn("📭 No input file provided to the classifier")
+        return
+    if isinstance(filename, str):
+        self.filename = filename
+        logger.info(f"📁 Input file is '{self.filename}'")
+        logger.info("⏳ Loading data")
+    if isinstance(filename, str) and filename.endswith(
+        (".csv", ".txt", ".tsv", ".tab", ".mtx", ".mtx.gz")
+    ):
+        self.adata = sc.read(self.filename)
+        if transpose:
+            self.adata = self.adata.transpose()
+        if self.filename.endswith((".mtx", ".mtx.gz")):
+            if (gene_file is None) or (cell_file is None):
+                raise FileNotFoundError(
+                    "🛑 Missing `gene_file` and/or `cell_file`. Please provide both "
+                    "arguments together with the input mtx file"
+                )
+            genes_mtx = pd.read_csv(gene_file, header=None)[0].values
+            cells_mtx = pd.read_csv(cell_file, header=None)[0].values
+            if len(genes_mtx) != self.adata.n_vars:
+                raise ValueError(
+                    f"🛑 The number of genes in {gene_file} does not match the number "
+                    f"of genes in {self.filename}"
+                )
+            if len(cells_mtx) != self.adata.n_obs:
+                raise ValueError(
+                    f"🛑 The number of cells in {cell_file} does not match the number "
+                    f"of cells in {self.filename}"
+                )
+            self.adata.var_names = genes_mtx
+            self.adata.obs_names = cells_mtx
+        if not float(self.adata.X[:1000].max()).is_integer():
+            logger.warn(
+                "⚠️ Warning: the input file seems not a raw count matrix. The "
+                "prediction result may not be accurate"
+            )
+        if (
+            (self.adata.n_vars >= 100000)
+            or (len(self.adata.var_names[0]) >= 30)
+            or (
+                len(
+                    self.adata.obs_names.intersection(
+                        ["GAPDH", "ACTB", "CALM1", "PTPRC", "MALAT1"]
+                    )
+                )
+                >= 1
+            )
+        ):
+            logger.warn(
+                "⚠️ The input matrix is detected to be a gene-by-cell matrix, will "
+                "transpose it"
+            )
+            self.adata = self.adata.transpose()
+        self.adata.var_names_make_unique()
+        sc.pp.normalize_total(self.adata, target_sum=1e4)
+        sc.pp.log1p(self.adata)
+        self.indata = self.adata.X
+        self.indata_genes = self.adata.var_names
+        self.indata_names = self.adata.obs_names
+    elif isinstance(filename, AnnData) or (
+        isinstance(filename, str) and filename.endswith(".h5ad")
+    ):
+        self.adata = sc.read(filename) if isinstance(filename, str) else filename
+        self.adata.var_names_make_unique()
+        # When to use raw.X?
+        # 1. if adata.raw exists
+        # 2. if adata.raw.var_names has intersection with model genes
+        # 3. if adata.X is not in the expected range
+        use_raw = self.adata.raw and (
+            self.adata.X[:1000].min() < 0 or self.adata.X[:1000].max() > 9.22
+        ) and np.isin(
+            self.adata.raw.var_names, self.model.classifier.features
+        ).sum() > 0
+        if use_raw:
+            if not self.adata.raw:
+                raise ValueError(
+                    "🛑 Invalid expression matrix in `.X`, expect log1p normalized "
+                    "expression to 10000 counts per cell"
+                )
+            elif (self.adata.raw.X[:1000].min() < 0) or (
+                self.adata.raw.X[:1000].max() > 9.22
+            ):
+                raise ValueError(
+                    "🛑 Invalid expression matrix in both `.X` and `.raw.X`, expect "
+                    "log1p normalized expression to 10000 counts per cell"
+                )
+            else:
+                logger.info(
+                    "👀 Invalid expression matrix in `.X`, expect log1p normalized "
+                    "expression to 10000 counts per cell; will use `.raw.X` instead"
+                )
+                self.indata = self.adata.raw.X
+                self.indata_genes = self.adata.raw.var_names
+                self.indata_names = self.adata.raw.obs_names
+        else:
+            self.indata = self.adata.X
+            self.indata_genes = self.adata.var_names
+            self.indata_names = self.adata.obs_names
+        if np.abs(np.expm1(self.indata[0]).sum() - 10000) > 1:
+            logger.warn(
+                "⚠️ Warning: invalid expression matrix, expect ALL genes and log1p "
+                "normalized expression to 10000 counts per cell. The prediction result "
+                "may not be accurate"
+            )
+    else:
+        raise ValueError(
+            "🛑 Invalid input. Supported types: .csv, .txt, .tsv, .tab, .mtx, .mtx.gz "
+            "and .h5ad, or AnnData loaded in memory"
+        )
+    logger.info(
+        f"🔬 Input data has {self.indata.shape[0]} cells and {len(self.indata_genes)} "
+        "genes"
+    )
+if __name__ == "__main__":
+    Classifier.__init__ = classifier_init  # type: ignore
+    args = parser.parse_args()
+    adata = sc.read_h5ad(args.input)
+    over_clustering = args.over_clustering
+    if over_clustering and over_clustering not in adata.obs.columns:
+        raise ValueError(
+            f"Over clustering column '{over_clustering}' not found in AnnData object."
+        )
+    if "neighbors" in adata.uns and "params" in adata.uns["neighbors"]:
+        adata.uns["neighbors"]["params"].setdefault("n_neighbors", 15)
+    annotated = celltypist.annotate(
+        adata,
+        model=args.model,
+        majority_voting=args.majority_voting,
+        over_clustering=over_clustering,
+    )
+    out_adata = annotated.to_adata()
+    # leave as is
+    # if over_clustering and args.majority_voting:
+    #     # rename majority_voting column to over_clustering
+    #     out_adata.obs[over_clustering] = out_adata.obs["majority_voting"]
+    if args.output.endswith(".h5ad"):
+        try:
+            out_adata._raw._var.rename(  # type: ignore
+                columns={"_index": "features"}, inplace=True
+            )
+            del out_adata.raw
+        except (KeyError, AttributeError):
+            pass
+        out_adata.write(args.output)
+    else:
+        out_adata.obs.to_csv(args.output, sep="\t", index=True)

biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl

biopipen 0.21.0py3-none-any.whl → 0.34.26py3-none-any.whl