biopipen 0.33.1__py3-none-any.whl → 0.34.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/filters.py +10 -183
- biopipen/core/proc.py +5 -3
- biopipen/core/testing.py +8 -1
- biopipen/ns/bam.py +40 -4
- biopipen/ns/cnv.py +1 -1
- biopipen/ns/cnvkit.py +1 -1
- biopipen/ns/delim.py +1 -1
- biopipen/ns/gsea.py +63 -37
- biopipen/ns/misc.py +38 -0
- biopipen/ns/plot.py +8 -0
- biopipen/ns/scrna.py +328 -292
- biopipen/ns/scrna_metabolic_landscape.py +207 -366
- biopipen/ns/tcr.py +165 -97
- biopipen/reports/bam/CNVpytor.svelte +4 -9
- biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
- biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
- biopipen/reports/scrna/DimPlots.svelte +1 -1
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
- biopipen/reports/snp/PlinkCallRate.svelte +2 -2
- biopipen/reports/snp/PlinkFreq.svelte +1 -1
- biopipen/reports/snp/PlinkHWE.svelte +1 -1
- biopipen/reports/snp/PlinkHet.svelte +1 -1
- biopipen/reports/snp/PlinkIBD.svelte +1 -1
- biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
- biopipen/scripts/bam/CNAClinic.R +41 -6
- biopipen/scripts/bam/CNVpytor.py +2 -1
- biopipen/scripts/bam/ControlFREEC.py +2 -3
- biopipen/scripts/bam/SamtoolsView.py +33 -0
- biopipen/scripts/cnv/AneuploidyScore.R +25 -13
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
- biopipen/scripts/cnv/TMADScore.R +4 -4
- biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
- biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
- biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
- biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
- biopipen/scripts/delim/RowsBinder.R +1 -1
- biopipen/scripts/delim/SampleInfo.R +4 -1
- biopipen/scripts/gene/GeneNameConversion.R +14 -12
- biopipen/scripts/gsea/Enrichr.R +2 -2
- biopipen/scripts/gsea/FGSEA.R +184 -50
- biopipen/scripts/gsea/PreRank.R +3 -3
- biopipen/scripts/misc/Plot.R +80 -0
- biopipen/scripts/plot/VennDiagram.R +2 -2
- biopipen/scripts/protein/ProdigySummary.R +34 -27
- biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
- biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
- biopipen/scripts/regulatory/motifs-common.R +10 -9
- biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
- biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
- biopipen/scripts/rnaseq/Simulation.R +0 -2
- biopipen/scripts/rnaseq/UnitConversion.R +6 -5
- biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
- biopipen/scripts/scrna/CellCellCommunication.py +1 -1
- biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
- biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
- biopipen/scripts/scrna/CellsDistribution.R +1 -1
- biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
- biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
- biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
- biopipen/scripts/scrna/MarkersFinder.R +481 -215
- biopipen/scripts/scrna/MetaMarkers.R +3 -3
- biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
- biopipen/scripts/scrna/RadarPlots.R +1 -1
- biopipen/scripts/scrna/ScFGSEA.R +231 -76
- biopipen/scripts/scrna/ScSimulation.R +11 -10
- biopipen/scripts/scrna/ScVelo.py +605 -0
- biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
- biopipen/scripts/scrna/SeuratClusterStats-features.R +43 -30
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
- biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
- biopipen/scripts/scrna/SeuratClustering.R +31 -48
- biopipen/scripts/scrna/SeuratLoading.R +2 -2
- biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
- biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
- biopipen/scripts/scrna/SeuratPreparing.R +76 -24
- biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
- biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
- biopipen/scripts/scrna/Subset10X.R +2 -2
- biopipen/scripts/scrna/TopExpressingGenes.R +144 -185
- biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
- biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
- biopipen/scripts/snp/MatrixEQTL.R +39 -20
- biopipen/scripts/snp/PlinkCallRate.R +43 -34
- biopipen/scripts/snp/PlinkFreq.R +34 -41
- biopipen/scripts/snp/PlinkHWE.R +23 -18
- biopipen/scripts/snp/PlinkHet.R +26 -22
- biopipen/scripts/snp/PlinkIBD.R +30 -34
- biopipen/scripts/stats/ChowTest.R +9 -8
- biopipen/scripts/stats/DiffCoexpr.R +13 -11
- biopipen/scripts/stats/LiquidAssoc.R +7 -8
- biopipen/scripts/stats/Mediation.R +8 -8
- biopipen/scripts/stats/MetaPvalue.R +11 -13
- biopipen/scripts/stats/MetaPvalue1.R +6 -5
- biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
- biopipen/scripts/tcr/ClonalStats.R +6 -5
- biopipen/scripts/tcr/CloneResidency.R +3 -3
- biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
- biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
- biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
- biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
- biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
- biopipen/scripts/tcr/ScRepLoading.R +114 -92
- biopipen/scripts/tcr/TCRClusterStats.R +2 -2
- biopipen/scripts/tcr/TCRClustering.R +86 -97
- biopipen/scripts/tcr/TESSA.R +65 -115
- biopipen/scripts/tcr/VJUsage.R +5 -5
- biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
- biopipen/utils/common_docstrs.py +66 -63
- biopipen/utils/reporter.py +177 -0
- {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/METADATA +2 -1
- {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/RECORD +130 -145
- {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/WHEEL +1 -1
- biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
- biopipen/reports/scrna/ScFGSEA.svelte +0 -16
- biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
- biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
- biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
- biopipen/reports/utils/gsea.liq +0 -110
- biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
- biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
- biopipen/utils/caching.R +0 -44
- biopipen/utils/gene.R +0 -95
- biopipen/utils/gsea.R +0 -329
- biopipen/utils/io.R +0 -20
- biopipen/utils/misc.R +0 -602
- biopipen/utils/mutate_helpers.R +0 -581
- biopipen/utils/plot.R +0 -209
- biopipen/utils/repr.R +0 -146
- biopipen/utils/rnaseq.R +0 -48
- biopipen/utils/single_cell.R +0 -207
- {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/entry_points.txt +0 -0
|
@@ -1,13 +1,8 @@
|
|
|
1
|
-
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
|
-
|
|
3
1
|
library(Seurat)
|
|
4
|
-
library(tibble)
|
|
5
|
-
library(enrichR)
|
|
6
2
|
library(rlang)
|
|
7
3
|
library(dplyr)
|
|
8
|
-
library(
|
|
9
|
-
|
|
10
|
-
setEnrichrSite("Enrichr")
|
|
4
|
+
library(tidyselect)
|
|
5
|
+
library(biopipen.utils)
|
|
11
6
|
|
|
12
7
|
srtfile <- {{in.srtobj | r}}
|
|
13
8
|
outdir <- {{out.outdir | r}}
|
|
@@ -16,238 +11,202 @@ mutaters <- {{ envs.mutaters | r }}
|
|
|
16
11
|
ident <- {{ envs.ident | r }}
|
|
17
12
|
group.by <- {{ envs["group-by"] | r }} # nolint
|
|
18
13
|
each <- {{ envs.each | r }}
|
|
19
|
-
prefix_each <- {{ envs.prefix_each | r }}
|
|
20
|
-
section <- {{ envs.section | r }}
|
|
21
14
|
dbs <- {{ envs.dbs | r }}
|
|
22
15
|
n <- {{ envs.n | r }}
|
|
16
|
+
enrich_style <- {{ envs.enrich_style | r }}
|
|
23
17
|
sset <- {{ envs.subset | r }}
|
|
18
|
+
enrich_plots_defaults <- {{ envs.enrich_plots_defaults | r }}
|
|
19
|
+
enrich_plots <- {{ envs.enrich_plots | r }}
|
|
24
20
|
cases <- {{ envs.cases | r: todot = "-" }} # nolint
|
|
25
21
|
|
|
26
22
|
set.seed(8525)
|
|
23
|
+
log <- get_logger()
|
|
24
|
+
reporter <- get_reporter()
|
|
27
25
|
|
|
28
|
-
|
|
29
|
-
srtobj <-
|
|
26
|
+
log$info("Reading Seurat object ...")
|
|
27
|
+
srtobj <- read_obj(srtfile)
|
|
28
|
+
if (!"Identity" %in% colnames(srtobj@meta.data)) {
|
|
29
|
+
srtobj@meta.data$Identity <- Idents(srtobj)
|
|
30
|
+
}
|
|
30
31
|
assay <- DefaultAssay(srtobj)
|
|
31
32
|
|
|
32
|
-
|
|
33
|
-
|
|
33
|
+
if (!is.null(mutaters) && length(mutaters) > 0) {
|
|
34
|
+
log$info("Mutating meta data ...")
|
|
34
35
|
srtobj@meta.data <- srtobj@meta.data %>%
|
|
35
36
|
mutate(!!!lapply(mutaters, parse_expr))
|
|
36
37
|
}
|
|
37
38
|
|
|
39
|
+
enrich_plots <- lapply(enrich_plots, function(x) {
|
|
40
|
+
list_update(enrich_plots_defaults, x)
|
|
41
|
+
})
|
|
38
42
|
defaults <- list(
|
|
39
43
|
ident = ident,
|
|
40
44
|
group.by = group.by,
|
|
41
45
|
each = each,
|
|
42
|
-
prefix_each = prefix_each,
|
|
43
|
-
section = section,
|
|
44
46
|
dbs = dbs,
|
|
45
47
|
n = n,
|
|
48
|
+
enrich_style = enrich_style,
|
|
49
|
+
enrich_plots = enrich_plots,
|
|
50
|
+
enrich_plots_defaults = enrich_plots_defaults,
|
|
46
51
|
subset = sset
|
|
47
52
|
)
|
|
48
53
|
|
|
49
|
-
|
|
54
|
+
cases <- expand_cases(cases, defaults, default_case = "Top Expressing Genes", post = function(name, case) {
|
|
50
55
|
outcases <- list()
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
outcases[[name]] <- case
|
|
58
|
-
} else {
|
|
59
|
-
outcases[[paste0(case$section, "::", name)]] <- case
|
|
60
|
-
}
|
|
61
|
-
} else if (no_each) { # no_ident
|
|
62
|
-
# expanding idents
|
|
63
|
-
if (has_section) {
|
|
64
|
-
log_warn(" Ignoring `section` in case `{name}` when no `ident` is set.")
|
|
65
|
-
case$section <- NULL
|
|
66
|
-
}
|
|
67
|
-
if (!is.null(case$subset)) {
|
|
68
|
-
idents <- srtobj@meta.data %>% filter(!!parse_expr(case$subset)) %>%
|
|
69
|
-
pull(case$group.by) %>% unique() %>% na.omit() %>% as.vector()
|
|
70
|
-
} else {
|
|
71
|
-
idents <- srtobj@meta.data %>%
|
|
72
|
-
pull(case$group.by) %>% unique() %>% na.omit() %>% as.vector()
|
|
73
|
-
}
|
|
56
|
+
if (is.null(case$each) || is.na(case$each) || nchar(case$each) == 0 || isFALSE(each)) {
|
|
57
|
+
case$enrich_plots <- lapply(
|
|
58
|
+
case$enrich_plots,
|
|
59
|
+
function(x) { list_update(case$enrich_plots_defaults, x) }
|
|
60
|
+
)
|
|
61
|
+
case$enrich_plots_defaults <- NULL
|
|
74
62
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
}
|
|
85
|
-
# expanding eachs
|
|
86
|
-
if (has_section) {
|
|
87
|
-
log_warn(" Ignoring `section` in case `{name}` when `each` is set.")
|
|
88
|
-
case$section <- NULL
|
|
63
|
+
outcases[[name]] <- case
|
|
64
|
+
} else {
|
|
65
|
+
eachs <- if (!is.null(case$subset)) {
|
|
66
|
+
srtobj@meta.data %>%
|
|
67
|
+
filter(!!parse_expr(case$subset)) %>%
|
|
68
|
+
pull(case$each) %>% na.omit() %>% unique() %>% as.vector()
|
|
69
|
+
} else {
|
|
70
|
+
srtobj@meta.data %>%
|
|
71
|
+
pull(case$each) %>% na.omit() %>% unique() %>% as.vector()
|
|
89
72
|
}
|
|
90
73
|
|
|
91
|
-
if (
|
|
92
|
-
|
|
93
|
-
pull(case$each) %>% unique() %>% na.omit() %>% as.vector()
|
|
94
|
-
} else {
|
|
95
|
-
eachs <- srtobj@meta.data %>%
|
|
96
|
-
pull(case$each) %>% unique() %>% na.omit() %>% as.vector()
|
|
74
|
+
if (length(cases) == 0 && name == "Top Expressing Genes") {
|
|
75
|
+
name <- case$each
|
|
97
76
|
}
|
|
98
77
|
|
|
99
78
|
for (each in eachs) {
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
!!sym(case$group.by),
|
|
105
|
-
NA
|
|
106
|
-
)
|
|
107
|
-
)
|
|
79
|
+
newname <- paste0(name, " - ", each)
|
|
80
|
+
newcase <- case
|
|
81
|
+
newcase$each_name <- case$each
|
|
82
|
+
newcase$each <- each
|
|
108
83
|
|
|
109
|
-
if (
|
|
110
|
-
|
|
84
|
+
if (!is.null(case$subset)) {
|
|
85
|
+
newcase$subset <- paste0(case$subset, " & ", bQuote(case$each), " == '", each, "'")
|
|
111
86
|
} else {
|
|
112
|
-
|
|
87
|
+
newcase$subset <- paste0(bQuote(case$each), " == '", each, "'")
|
|
113
88
|
}
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
89
|
+
|
|
90
|
+
newcase$enrich_plots <- lapply(
|
|
91
|
+
case$enrich_plots,
|
|
92
|
+
function(x) { list_update(case$enrich_plots_defaults, x) }
|
|
93
|
+
)
|
|
94
|
+
newcase$enrich_plots_defaults <- NULL
|
|
95
|
+
|
|
96
|
+
outcases[[newname]] <- newcase
|
|
117
97
|
}
|
|
118
98
|
}
|
|
119
|
-
outcases
|
|
120
|
-
}
|
|
121
99
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
sep = "\t",
|
|
142
|
-
row.names = TRUE,
|
|
143
|
-
col.names = TRUE,
|
|
144
|
-
quote = FALSE
|
|
100
|
+
outcases
|
|
101
|
+
})
|
|
102
|
+
|
|
103
|
+
log$info("Running cases ...")
|
|
104
|
+
|
|
105
|
+
process_markers <- function(markers, info, case) {
|
|
106
|
+
# Save markers
|
|
107
|
+
write.table(markers, file.path(info$prefix, "top_genes.tsv"), sep = "\t", quote = FALSE, row.names = FALSE)
|
|
108
|
+
reporter$add2(
|
|
109
|
+
list(
|
|
110
|
+
name = "Table",
|
|
111
|
+
contents = list(
|
|
112
|
+
list(kind = "descr", content = "Showing top expressing genes ordered by their expression descendingly."),
|
|
113
|
+
list(kind = "table", src = file.path(info$prefix, "top_genes.tsv"), data = list(nrows = 100))
|
|
114
|
+
)
|
|
115
|
+
),
|
|
116
|
+
hs = c(info$section, info$name),
|
|
117
|
+
hs2 = paste0("Top Genes"),
|
|
118
|
+
ui = "tabs"
|
|
145
119
|
)
|
|
146
120
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
)
|
|
121
|
+
enrich <- RunEnrichment(
|
|
122
|
+
markers$gene,
|
|
123
|
+
dbs = case$dbs, style = case$enrich_style)
|
|
124
|
+
|
|
125
|
+
write.table(enrich, file.path(info$prefix, "enrich.tsv"), sep = "\t", quote = FALSE, row.names = FALSE)
|
|
126
|
+
reporter$add2(
|
|
127
|
+
list(
|
|
128
|
+
name = "Table",
|
|
129
|
+
contents = list(list(kind = "table", src = file.path(info$prefix, "enrich.tsv"), data = list(nrows = 100)))
|
|
130
|
+
),
|
|
131
|
+
hs = c(info$section, info$name),
|
|
132
|
+
hs2 = "Enrichment Analysis",
|
|
133
|
+
ui = "tabs"
|
|
134
|
+
)
|
|
158
135
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
136
|
+
# Visualize enriched terms
|
|
137
|
+
if (length(case$enrich_plots) > 0) {
|
|
138
|
+
for (db in case$dbs) {
|
|
139
|
+
plots <- list()
|
|
140
|
+
for (plotname in names(case$enrich_plots)) {
|
|
141
|
+
plotargs <- case$enrich_plots[[plotname]]
|
|
142
|
+
plotargs$data <- enrich[enrich$Database == db, , drop = FALSE]
|
|
143
|
+
|
|
144
|
+
p <- do_call(VizEnrichment, plotargs)
|
|
145
|
+
|
|
146
|
+
outprefix <- file.path(info$prefix, paste0("enrich.", slugify(db), ".", slugify(plotname)))
|
|
147
|
+
if (plotargs$plot_type == "bar") {
|
|
148
|
+
attr(p, "height") <- attr(p, "height") / 1.5
|
|
149
|
+
}
|
|
150
|
+
save_plot(p, outprefix, plotargs$devpars, formats = "png")
|
|
151
|
+
plots[[length(plots) + 1]] <- reporter$image(outprefix, c(), FALSE)
|
|
152
|
+
}
|
|
153
|
+
reporter$add2(
|
|
154
|
+
list(name = db, contents = plots),
|
|
155
|
+
hs = c(info$section, info$name),
|
|
156
|
+
hs2 = "Enrichment Analysis",
|
|
157
|
+
ui = "tabs"
|
|
158
|
+
)
|
|
162
159
|
}
|
|
163
|
-
|
|
164
|
-
enrich_p <- plotEnrich(enriched[[db]], showTerms = 20, title = db) +
|
|
165
|
-
theme_prism()
|
|
166
|
-
enrich_plot <- file.path(odir, paste0("Enrichr-", db, ".png"))
|
|
167
|
-
png(enrich_plot, res = 100, height = 1000, width = 1000)
|
|
168
|
-
print(enrich_p)
|
|
169
|
-
dev.off()
|
|
170
|
-
|
|
171
|
-
enrich_plot_pdf <- file.path(odir, paste0("Enrichr-", db, ".pdf"))
|
|
172
|
-
pdf(enrich_plot_pdf, height = 10, width = 10)
|
|
173
|
-
print(enrich_p)
|
|
174
|
-
dev.off()
|
|
175
160
|
}
|
|
176
161
|
}
|
|
177
162
|
|
|
178
|
-
do_case <- function(casename) {
|
|
179
|
-
log_info("- Running for case: {casename} ...")
|
|
180
|
-
case <- cases[[casename]]
|
|
181
|
-
info <- casename_info(casename, cases, outdir, create = TRUE)
|
|
182
163
|
|
|
183
|
-
|
|
164
|
+
run_case <- function(name) {
|
|
165
|
+
log$info("Case: {name} ...")
|
|
166
|
+
case <- cases[[name]]
|
|
167
|
+
|
|
168
|
+
log$info("- Subsetting cells and calculating average expression ...")
|
|
184
169
|
if (!is.null(case$subset)) {
|
|
185
|
-
|
|
186
|
-
sobj <- subset(srtobj, !!parse_expr(case$subset))
|
|
187
|
-
}, error = function(e) {
|
|
188
|
-
log_warn(" No cells found for the subset, skipping ...")
|
|
189
|
-
})
|
|
170
|
+
subobj <- filter(srtobj, !!parse_expr(case$subset))
|
|
190
171
|
} else {
|
|
191
|
-
|
|
172
|
+
subobj <- srtobj
|
|
173
|
+
}
|
|
174
|
+
case$group.by <- case$group.by %||% "Identity"
|
|
175
|
+
if (is.null(case$ident)) {
|
|
176
|
+
case$ident <- as.character(unique(subobj@meta.data[[case$group.by]]))
|
|
192
177
|
}
|
|
193
178
|
avgexpr <- AverageExpression(
|
|
194
|
-
|
|
179
|
+
subobj,
|
|
195
180
|
group.by = case$group.by,
|
|
196
181
|
assays = assay
|
|
197
182
|
)[[assay]]
|
|
198
183
|
# https://github.com/satijalab/seurat/issues/7893
|
|
199
|
-
colnames(avgexpr) <- as.character(unique(
|
|
184
|
+
colnames(avgexpr) <- as.character(unique(subobj@meta.data[[case$group.by]]))
|
|
200
185
|
avgexpr <- avgexpr[, case$ident, drop = FALSE]
|
|
201
|
-
avgexpr <- avgexpr[order(-avgexpr), , drop = FALSE]
|
|
202
186
|
|
|
203
|
-
|
|
187
|
+
for (idt in case$ident) {
|
|
188
|
+
log$info("- Processing {idt} ...")
|
|
189
|
+
info <- case_info(paste0(name, "::", idt), outdir, create = TRUE)
|
|
190
|
+
expr <- avgexpr[, idt, drop = FALSE]
|
|
191
|
+
expr <- expr[order(expr, decreasing = TRUE), , drop = FALSE]
|
|
192
|
+
expr <- expr[1:min(case$n, nrow(expr)), , drop = FALSE]
|
|
193
|
+
expr <- as.data.frame(expr)
|
|
194
|
+
expr$gene <- rownames(expr)
|
|
195
|
+
colnames(expr) <- c("avg_expr", "gene")
|
|
196
|
+
expr <- expr[, c("gene", "avg_expr"), drop = FALSE]
|
|
197
|
+
|
|
198
|
+
log$info(" Performing enrichment analysis ...")
|
|
199
|
+
process_markers(expr, info, case = list(
|
|
200
|
+
ident = idt,
|
|
201
|
+
dbs = case$dbs,
|
|
202
|
+
enrich_style = case$enrich_style,
|
|
203
|
+
enrich_plots = case$enrich_plots
|
|
204
|
+
))
|
|
205
|
+
}
|
|
204
206
|
|
|
205
|
-
|
|
207
|
+
invisible()
|
|
206
208
|
}
|
|
207
209
|
|
|
208
|
-
|
|
209
|
-
log_debug(" Adding case report ...")
|
|
210
|
-
h1 = info$h1
|
|
211
|
-
h2 = info$h2
|
|
212
|
-
|
|
213
|
-
if (!is.null(info$error)) {
|
|
214
|
-
add_report(
|
|
215
|
-
list(
|
|
216
|
-
kind = "descr",
|
|
217
|
-
content = paste0("Top ", n, " expressing genes")
|
|
218
|
-
),
|
|
219
|
-
list(kind = "error", content = info$error),
|
|
220
|
-
h1 = h1,
|
|
221
|
-
h2 = ifelse(h2 == "#", "Top Expressing Genes", h2),
|
|
222
|
-
h3 = ifelse(h2 == "#", "#", "Top Expressing Genes")
|
|
223
|
-
)
|
|
224
|
-
} else {
|
|
225
|
-
add_report(
|
|
226
|
-
list(
|
|
227
|
-
kind = "descr",
|
|
228
|
-
content = paste0("Top ", n, " expressing genes")
|
|
229
|
-
),
|
|
230
|
-
list(
|
|
231
|
-
kind = "table",
|
|
232
|
-
src = file.path(info$casedir, "exprn.txt")
|
|
233
|
-
),
|
|
234
|
-
h1 = h1,
|
|
235
|
-
h2 = ifelse(h2 == "#", "Top Expressing Genes", h2),
|
|
236
|
-
h3 = ifelse(h2 == "#", "#", "Top Expressing Genes")
|
|
237
|
-
)
|
|
238
|
-
|
|
239
|
-
add_report(
|
|
240
|
-
list(
|
|
241
|
-
kind = "descr",
|
|
242
|
-
content = paste0("Enrichment analysis for the top ", n, " expressing genes")
|
|
243
|
-
),
|
|
244
|
-
list(kind = "enrichr", dir = info$casedir),
|
|
245
|
-
h1 = h1,
|
|
246
|
-
h2 = ifelse(h2 == "#", "Enrichment Analysis", h2),
|
|
247
|
-
h3 = ifelse(h2 == "#", "#", "Enrichment Analysis")
|
|
248
|
-
)
|
|
249
|
-
}
|
|
250
|
-
}
|
|
210
|
+
sapply(names(cases), run_case)
|
|
251
211
|
|
|
252
|
-
|
|
253
|
-
save_report(joboutdir)
|
|
212
|
+
reporter$save(joboutdir)
|
|
@@ -12,8 +12,9 @@ parser.add_argument(
|
|
|
12
12
|
parser.add_argument(
|
|
13
13
|
"-c",
|
|
14
14
|
"--over_clustering",
|
|
15
|
-
|
|
16
|
-
|
|
15
|
+
required=False,
|
|
16
|
+
default=None,
|
|
17
|
+
help="Over clustering. Error if the column does not exist.",
|
|
17
18
|
)
|
|
18
19
|
|
|
19
20
|
|
|
@@ -25,8 +26,9 @@ if __name__ == "__main__":
|
|
|
25
26
|
adata = sc.read_h5ad(args.input)
|
|
26
27
|
over_clustering = args.over_clustering
|
|
27
28
|
if over_clustering and over_clustering not in adata.obs.columns:
|
|
28
|
-
|
|
29
|
-
|
|
29
|
+
raise ValueError(
|
|
30
|
+
f"Over clustering column '{over_clustering}' not found in AnnData object."
|
|
31
|
+
)
|
|
30
32
|
|
|
31
33
|
annotated = celltypist.annotate(
|
|
32
34
|
adata,
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""Convert Seurat objects to AnnData format back and forth.
|
|
2
|
+
|
|
3
|
+
Need R and R packages Seurat, SeuratDisk and biopipen.utils.R installed.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def convert_seurat_to_anndata(
|
|
8
|
+
input_file,
|
|
9
|
+
output_file,
|
|
10
|
+
assay=None,
|
|
11
|
+
subset=None,
|
|
12
|
+
rscript="Rscript",
|
|
13
|
+
):
|
|
14
|
+
"""Convert Seurat object to AnnData format.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
input_file (str): Path to the input Seurat RDS or qs/qs2 file.
|
|
18
|
+
output_file (str): Path to the output AnnData H5AD file.
|
|
19
|
+
assay (str): Name of the assay to use in the Seurat object.
|
|
20
|
+
subset (str): An R expression to subset the Seurat object to convert.
|
|
21
|
+
rscript (RScript): R script executor.
|
|
22
|
+
"""
|
|
23
|
+
from biopipen.utils.misc import run_command
|
|
24
|
+
|
|
25
|
+
script = f"""
|
|
26
|
+
library(biopipen.utils)
|
|
27
|
+
|
|
28
|
+
assay <- {repr(assay) if assay else 'NULL'}
|
|
29
|
+
subset <- {repr(subset) if subset else 'NULL'}
|
|
30
|
+
|
|
31
|
+
ConvertSeuratToAnnData(
|
|
32
|
+
"{input_file}", "{output_file}", assay = assay, subset = subset
|
|
33
|
+
)
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
# Save the script to a temporary file
|
|
37
|
+
from tempfile import NamedTemporaryFile
|
|
38
|
+
with NamedTemporaryFile(suffix=".R", delete=False) as temp_script:
|
|
39
|
+
temp_script.write(script.encode('utf-8'))
|
|
40
|
+
temp_script_path = temp_script.name
|
|
41
|
+
|
|
42
|
+
# Run the R script using the provided Rscript command
|
|
43
|
+
cmd = [rscript, temp_script_path]
|
|
44
|
+
run_command(cmd, fg=True)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def convert_anndata_to_seurat(
|
|
48
|
+
input_file,
|
|
49
|
+
output_file,
|
|
50
|
+
assay=None,
|
|
51
|
+
rscript="Rscript",
|
|
52
|
+
):
|
|
53
|
+
"""Convert AnnData object to Seurat format.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
input_file (str): Path to the input AnnData H5AD file.
|
|
57
|
+
output_file (str): Path to the output Seurat RDS or qs/qs2 file.
|
|
58
|
+
assay (str): Name of the assay to use in the Seurat object.
|
|
59
|
+
rscript (RScript): R script executor.
|
|
60
|
+
"""
|
|
61
|
+
from biopipen.utils.misc import run_command
|
|
62
|
+
|
|
63
|
+
script = f"""
|
|
64
|
+
library(biopipen.utils)
|
|
65
|
+
|
|
66
|
+
assay <- {repr(assay) if assay else 'NULL'}
|
|
67
|
+
|
|
68
|
+
ConvertAnnDataToSeurat(
|
|
69
|
+
"{input_file}", "{output_file}", assay = assay
|
|
70
|
+
)
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
# Save the script to a temporary file
|
|
74
|
+
from tempfile import NamedTemporaryFile
|
|
75
|
+
with NamedTemporaryFile(suffix=".R", delete=False) as temp_script:
|
|
76
|
+
temp_script.write(script.encode('utf-8'))
|
|
77
|
+
temp_script_path = temp_script.name
|
|
78
|
+
|
|
79
|
+
# Run the R script using the provided Rscript command
|
|
80
|
+
cmd = [rscript, temp_script_path]
|
|
81
|
+
run_command(cmd, fg=True)
|