biopipen 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/filters.py +10 -183
- biopipen/core/proc.py +5 -3
- biopipen/core/testing.py +8 -1
- biopipen/ns/bam.py +40 -4
- biopipen/ns/cnv.py +1 -1
- biopipen/ns/cnvkit.py +1 -1
- biopipen/ns/delim.py +1 -1
- biopipen/ns/gsea.py +63 -37
- biopipen/ns/misc.py +38 -0
- biopipen/ns/plot.py +8 -0
- biopipen/ns/scrna.py +290 -288
- biopipen/ns/scrna_metabolic_landscape.py +207 -366
- biopipen/ns/tcr.py +165 -97
- biopipen/reports/bam/CNVpytor.svelte +4 -9
- biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
- biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
- biopipen/reports/scrna/DimPlots.svelte +1 -1
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
- biopipen/reports/snp/PlinkCallRate.svelte +2 -2
- biopipen/reports/snp/PlinkFreq.svelte +1 -1
- biopipen/reports/snp/PlinkHWE.svelte +1 -1
- biopipen/reports/snp/PlinkHet.svelte +1 -1
- biopipen/reports/snp/PlinkIBD.svelte +1 -1
- biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
- biopipen/scripts/bam/CNAClinic.R +41 -6
- biopipen/scripts/bam/CNVpytor.py +2 -1
- biopipen/scripts/bam/ControlFREEC.py +2 -3
- biopipen/scripts/bam/SamtoolsView.py +33 -0
- biopipen/scripts/cnv/AneuploidyScore.R +25 -13
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
- biopipen/scripts/cnv/TMADScore.R +4 -4
- biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
- biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
- biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
- biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
- biopipen/scripts/delim/RowsBinder.R +1 -1
- biopipen/scripts/delim/SampleInfo.R +4 -1
- biopipen/scripts/gene/GeneNameConversion.R +14 -12
- biopipen/scripts/gsea/Enrichr.R +2 -2
- biopipen/scripts/gsea/FGSEA.R +184 -50
- biopipen/scripts/gsea/PreRank.R +3 -3
- biopipen/scripts/misc/Plot.R +80 -0
- biopipen/scripts/plot/VennDiagram.R +2 -2
- biopipen/scripts/protein/ProdigySummary.R +34 -27
- biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
- biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
- biopipen/scripts/regulatory/motifs-common.R +10 -9
- biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
- biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
- biopipen/scripts/rnaseq/Simulation.R +0 -2
- biopipen/scripts/rnaseq/UnitConversion.R +6 -5
- biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
- biopipen/scripts/scrna/CellCellCommunication.py +1 -1
- biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
- biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
- biopipen/scripts/scrna/CellsDistribution.R +1 -1
- biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
- biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
- biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
- biopipen/scripts/scrna/MarkersFinder.R +348 -217
- biopipen/scripts/scrna/MetaMarkers.R +3 -3
- biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
- biopipen/scripts/scrna/RadarPlots.R +1 -1
- biopipen/scripts/scrna/ScFGSEA.R +157 -75
- biopipen/scripts/scrna/ScSimulation.R +11 -10
- biopipen/scripts/scrna/ScVelo.py +605 -0
- biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
- biopipen/scripts/scrna/SeuratClusterStats-features.R +39 -30
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
- biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
- biopipen/scripts/scrna/SeuratClustering.R +31 -48
- biopipen/scripts/scrna/SeuratLoading.R +2 -2
- biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
- biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
- biopipen/scripts/scrna/SeuratPreparing.R +76 -24
- biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
- biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
- biopipen/scripts/scrna/Subset10X.R +2 -2
- biopipen/scripts/scrna/TopExpressingGenes.R +141 -184
- biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
- biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
- biopipen/scripts/snp/MatrixEQTL.R +39 -20
- biopipen/scripts/snp/PlinkCallRate.R +43 -34
- biopipen/scripts/snp/PlinkFreq.R +34 -41
- biopipen/scripts/snp/PlinkHWE.R +23 -18
- biopipen/scripts/snp/PlinkHet.R +26 -22
- biopipen/scripts/snp/PlinkIBD.R +30 -34
- biopipen/scripts/stats/ChowTest.R +9 -8
- biopipen/scripts/stats/DiffCoexpr.R +13 -11
- biopipen/scripts/stats/LiquidAssoc.R +7 -8
- biopipen/scripts/stats/Mediation.R +8 -8
- biopipen/scripts/stats/MetaPvalue.R +11 -13
- biopipen/scripts/stats/MetaPvalue1.R +6 -5
- biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
- biopipen/scripts/tcr/ClonalStats.R +5 -4
- biopipen/scripts/tcr/CloneResidency.R +3 -3
- biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
- biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
- biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
- biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
- biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
- biopipen/scripts/tcr/ScRepLoading.R +114 -92
- biopipen/scripts/tcr/TCRClusterStats.R +2 -2
- biopipen/scripts/tcr/TCRClustering.R +86 -97
- biopipen/scripts/tcr/TESSA.R +65 -115
- biopipen/scripts/tcr/VJUsage.R +5 -5
- biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
- biopipen/utils/common_docstrs.py +66 -63
- biopipen/utils/reporter.py +177 -0
- {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/METADATA +2 -1
- {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/RECORD +130 -144
- {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/WHEEL +1 -1
- biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
- biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
- biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
- biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
- biopipen/reports/utils/gsea.liq +0 -110
- biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
- biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
- biopipen/utils/caching.R +0 -44
- biopipen/utils/gene.R +0 -95
- biopipen/utils/gsea.R +0 -329
- biopipen/utils/io.R +0 -20
- biopipen/utils/misc.R +0 -602
- biopipen/utils/mutate_helpers.R +0 -581
- biopipen/utils/plot.R +0 -209
- biopipen/utils/repr.R +0 -146
- biopipen/utils/rnaseq.R +0 -48
- biopipen/utils/single_cell.R +0 -207
- {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,13 +1,8 @@
|
|
|
1
|
-
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
|
-
|
|
3
1
|
library(Seurat)
|
|
4
|
-
library(tibble)
|
|
5
|
-
library(enrichR)
|
|
6
2
|
library(rlang)
|
|
7
3
|
library(dplyr)
|
|
8
|
-
library(
|
|
9
|
-
|
|
10
|
-
setEnrichrSite("Enrichr")
|
|
4
|
+
library(tidyselect)
|
|
5
|
+
library(biopipen.utils)
|
|
11
6
|
|
|
12
7
|
srtfile <- {{in.srtobj | r}}
|
|
13
8
|
outdir <- {{out.outdir | r}}
|
|
@@ -16,238 +11,200 @@ mutaters <- {{ envs.mutaters | r }}
|
|
|
16
11
|
ident <- {{ envs.ident | r }}
|
|
17
12
|
group.by <- {{ envs["group-by"] | r }} # nolint
|
|
18
13
|
each <- {{ envs.each | r }}
|
|
19
|
-
prefix_each <- {{ envs.prefix_each | r }}
|
|
20
|
-
section <- {{ envs.section | r }}
|
|
21
14
|
dbs <- {{ envs.dbs | r }}
|
|
22
15
|
n <- {{ envs.n | r }}
|
|
16
|
+
enrich_style <- {{ envs.enrich_style | r }}
|
|
23
17
|
sset <- {{ envs.subset | r }}
|
|
18
|
+
enrich_plots_defaults <- {{ envs.enrich_plots_defaults | r }}
|
|
19
|
+
enrich_plots <- {{ envs.enrich_plots | r }}
|
|
24
20
|
cases <- {{ envs.cases | r: todot = "-" }} # nolint
|
|
25
21
|
|
|
26
22
|
set.seed(8525)
|
|
23
|
+
log <- get_logger()
|
|
24
|
+
reporter <- get_reporter()
|
|
27
25
|
|
|
28
|
-
|
|
29
|
-
srtobj <-
|
|
26
|
+
log$info("Reading Seurat object ...")
|
|
27
|
+
srtobj <- read_obj(srtfile)
|
|
28
|
+
if (!"Identity" %in% colnames(srtobj@meta.data)) {
|
|
29
|
+
srtobj@meta.data$Identity <- Idents(srtobj)
|
|
30
|
+
}
|
|
30
31
|
assay <- DefaultAssay(srtobj)
|
|
31
32
|
|
|
32
|
-
|
|
33
|
-
|
|
33
|
+
if (!is.null(mutaters) && length(mutaters) > 0) {
|
|
34
|
+
log$info("Mutating meta data ...")
|
|
34
35
|
srtobj@meta.data <- srtobj@meta.data %>%
|
|
35
36
|
mutate(!!!lapply(mutaters, parse_expr))
|
|
36
37
|
}
|
|
37
38
|
|
|
39
|
+
enrich_plots <- lapply(enrich_plots, function(x) {
|
|
40
|
+
list_update(enrich_plots_defaults, x)
|
|
41
|
+
})
|
|
38
42
|
defaults <- list(
|
|
39
43
|
ident = ident,
|
|
40
44
|
group.by = group.by,
|
|
41
45
|
each = each,
|
|
42
|
-
prefix_each = prefix_each,
|
|
43
|
-
section = section,
|
|
44
46
|
dbs = dbs,
|
|
45
47
|
n = n,
|
|
48
|
+
enrich_style = enrich_style,
|
|
49
|
+
enrich_plots = enrich_plots,
|
|
50
|
+
enrich_plots_defaults = enrich_plots_defaults,
|
|
46
51
|
subset = sset
|
|
47
52
|
)
|
|
48
53
|
|
|
49
|
-
|
|
54
|
+
cases <- expand_cases(cases, defaults, default_case = "Top Expressing Genes", post = function(name, case) {
|
|
50
55
|
outcases <- list()
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
outcases[[name]] <- case
|
|
58
|
-
} else {
|
|
59
|
-
outcases[[paste0(case$section, "::", name)]] <- case
|
|
60
|
-
}
|
|
61
|
-
} else if (no_each) { # no_ident
|
|
62
|
-
# expanding idents
|
|
63
|
-
if (has_section) {
|
|
64
|
-
log_warn(" Ignoring `section` in case `{name}` when no `ident` is set.")
|
|
65
|
-
case$section <- NULL
|
|
66
|
-
}
|
|
67
|
-
if (!is.null(case$subset)) {
|
|
68
|
-
idents <- srtobj@meta.data %>% filter(!!parse_expr(case$subset)) %>%
|
|
69
|
-
pull(case$group.by) %>% unique() %>% na.omit() %>% as.vector()
|
|
70
|
-
} else {
|
|
71
|
-
idents <- srtobj@meta.data %>%
|
|
72
|
-
pull(case$group.by) %>% unique() %>% na.omit() %>% as.vector()
|
|
73
|
-
}
|
|
56
|
+
if (is.null(case$each) || is.na(case$each) || nchar(case$each) == 0 || isFALSE(each)) {
|
|
57
|
+
case$enrich_plots <- lapply(
|
|
58
|
+
case$enrich_plots,
|
|
59
|
+
function(x) { list_update(case$enrich_plots_defaults, x) }
|
|
60
|
+
)
|
|
61
|
+
case$enrich_plots_defaults <- NULL
|
|
74
62
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
}
|
|
85
|
-
# expanding eachs
|
|
86
|
-
if (has_section) {
|
|
87
|
-
log_warn(" Ignoring `section` in case `{name}` when `each` is set.")
|
|
88
|
-
case$section <- NULL
|
|
63
|
+
outcases[[name]] <- case
|
|
64
|
+
} else {
|
|
65
|
+
eachs <- if (!is.null(case$subset)) {
|
|
66
|
+
srtobj@meta.data %>%
|
|
67
|
+
filter(!!parse_expr(case$subset)) %>%
|
|
68
|
+
pull(case$each) %>% na.omit() %>% unique() %>% as.vector()
|
|
69
|
+
} else {
|
|
70
|
+
srtobj@meta.data %>%
|
|
71
|
+
pull(case$each) %>% na.omit() %>% unique() %>% as.vector()
|
|
89
72
|
}
|
|
90
73
|
|
|
91
|
-
if (
|
|
92
|
-
|
|
93
|
-
pull(case$each) %>% unique() %>% na.omit() %>% as.vector()
|
|
94
|
-
} else {
|
|
95
|
-
eachs <- srtobj@meta.data %>%
|
|
96
|
-
pull(case$each) %>% unique() %>% na.omit() %>% as.vector()
|
|
74
|
+
if (length(cases) == 0 && name == "Top Expressing Genes") {
|
|
75
|
+
name <- case$each
|
|
97
76
|
}
|
|
98
77
|
|
|
99
78
|
for (each in eachs) {
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
!!sym(case$group.by),
|
|
105
|
-
NA
|
|
106
|
-
)
|
|
107
|
-
)
|
|
79
|
+
newname <- paste0(name, " - ", each)
|
|
80
|
+
newcase <- case
|
|
81
|
+
newcase$each_name <- case$each
|
|
82
|
+
newcase$each <- each
|
|
108
83
|
|
|
109
|
-
if (
|
|
110
|
-
|
|
84
|
+
if (!is.null(case$subset)) {
|
|
85
|
+
newcase$subset <- paste0(case$subset, " & ", bQuote(case$each), " == '", each, "'")
|
|
111
86
|
} else {
|
|
112
|
-
|
|
87
|
+
newcase$subset <- paste0(bQuote(case$each), " == '", each, "'")
|
|
113
88
|
}
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
89
|
+
|
|
90
|
+
newcase$enrich_plots <- lapply(
|
|
91
|
+
case$enrich_plots,
|
|
92
|
+
function(x) { list_update(case$enrich_plots_defaults, x) }
|
|
93
|
+
)
|
|
94
|
+
newcase$enrich_plots_defaults <- NULL
|
|
95
|
+
|
|
96
|
+
outcases[[newname]] <- newcase
|
|
117
97
|
}
|
|
118
98
|
}
|
|
119
|
-
outcases
|
|
120
|
-
}
|
|
121
99
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
100
|
+
outcases
|
|
101
|
+
})
|
|
102
|
+
|
|
103
|
+
log$info("Running cases ...")
|
|
104
|
+
|
|
105
|
+
process_markers <- function(markers, info, case) {
|
|
106
|
+
# Save markers
|
|
107
|
+
write.table(markers, file.path(info$prefix, "top_genes.tsv"), sep = "\t", quote = FALSE, row.names = FALSE)
|
|
108
|
+
reporter$add2(
|
|
109
|
+
list(
|
|
110
|
+
name = "Table",
|
|
111
|
+
contents = list(
|
|
112
|
+
list(kind = "descr", content = "Showing top expressing genes ordered by their expression descendingly."),
|
|
113
|
+
list(kind = "table", src = file.path(info$prefix, "top_genes.tsv"), data = list(nrows = 100))
|
|
114
|
+
)
|
|
115
|
+
),
|
|
116
|
+
hs = c(info$section, info$name),
|
|
117
|
+
hs2 = paste0("Top Genes"),
|
|
118
|
+
ui = "tabs"
|
|
137
119
|
)
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
120
|
+
|
|
121
|
+
enrich <- RunEnrichment(
|
|
122
|
+
markers$gene,
|
|
123
|
+
dbs = case$dbs, style = case$enrich_style)
|
|
124
|
+
|
|
125
|
+
write.table(enrich, file.path(info$prefix, "enrich.tsv"), sep = "\t", quote = FALSE, row.names = FALSE)
|
|
126
|
+
reporter$add2(
|
|
127
|
+
list(
|
|
128
|
+
name = "Table",
|
|
129
|
+
contents = list(list(kind = "table", src = file.path(info$prefix, "enrich.tsv"), data = list(nrows = 100)))
|
|
130
|
+
),
|
|
131
|
+
hs = c(info$section, info$name),
|
|
132
|
+
hs2 = "Enrichment Analysis",
|
|
133
|
+
ui = "tabs"
|
|
145
134
|
)
|
|
146
135
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
row.names = FALSE,
|
|
155
|
-
col.names = TRUE,
|
|
156
|
-
quote = FALSE
|
|
157
|
-
)
|
|
136
|
+
# Visualize enriched terms
|
|
137
|
+
if (length(case$enrich_plots) > 0) {
|
|
138
|
+
for (db in case$dbs) {
|
|
139
|
+
plots <- list()
|
|
140
|
+
for (plotname in names(case$enrich_plots)) {
|
|
141
|
+
plotargs <- case$enrich_plots[[plotname]]
|
|
142
|
+
plotargs$data <- enrich[enrich$Database == db, , drop = FALSE]
|
|
158
143
|
|
|
159
|
-
|
|
160
|
-
log_warn(paste0(" No enriched terms for ", db))
|
|
161
|
-
next
|
|
162
|
-
}
|
|
144
|
+
p <- do_call(VizEnrichment, plotargs)
|
|
163
145
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
146
|
+
outprefix <- file.path(info$prefix, paste0("enrich.", slugify(db), ".", slugify(plotname)))
|
|
147
|
+
attr(p, "height") <- attr(p, "height") / 1.5
|
|
148
|
+
save_plot(p, outprefix, plotargs$devpars, formats = "png")
|
|
149
|
+
plots[[length(plots) + 1]] <- reporter$image(outprefix, c(), FALSE)
|
|
150
|
+
}
|
|
151
|
+
reporter$add2(
|
|
152
|
+
list(name = db, contents = plots),
|
|
153
|
+
hs = c(info$section, info$name),
|
|
154
|
+
hs2 = "Enrichment Analysis",
|
|
155
|
+
ui = "tabs"
|
|
156
|
+
)
|
|
157
|
+
}
|
|
175
158
|
}
|
|
176
159
|
}
|
|
177
160
|
|
|
178
|
-
do_case <- function(casename) {
|
|
179
|
-
log_info("- Running for case: {casename} ...")
|
|
180
|
-
case <- cases[[casename]]
|
|
181
|
-
info <- casename_info(casename, cases, outdir, create = TRUE)
|
|
182
161
|
|
|
183
|
-
|
|
162
|
+
run_case <- function(name) {
|
|
163
|
+
log$info("Case: {name} ...")
|
|
164
|
+
case <- cases[[name]]
|
|
165
|
+
|
|
166
|
+
log$info("- Subsetting cells and calculating average expression ...")
|
|
184
167
|
if (!is.null(case$subset)) {
|
|
185
|
-
|
|
186
|
-
sobj <- subset(srtobj, !!parse_expr(case$subset))
|
|
187
|
-
}, error = function(e) {
|
|
188
|
-
log_warn(" No cells found for the subset, skipping ...")
|
|
189
|
-
})
|
|
168
|
+
subobj <- filter(srtobj, !!parse_expr(case$subset))
|
|
190
169
|
} else {
|
|
191
|
-
|
|
170
|
+
subobj <- srtobj
|
|
171
|
+
}
|
|
172
|
+
case$group.by <- case$group.by %||% "Identity"
|
|
173
|
+
if (is.null(case$ident)) {
|
|
174
|
+
case$ident <- as.character(unique(subobj@meta.data[[case$group.by]]))
|
|
192
175
|
}
|
|
193
176
|
avgexpr <- AverageExpression(
|
|
194
|
-
|
|
177
|
+
subobj,
|
|
195
178
|
group.by = case$group.by,
|
|
196
179
|
assays = assay
|
|
197
180
|
)[[assay]]
|
|
198
181
|
# https://github.com/satijalab/seurat/issues/7893
|
|
199
|
-
colnames(avgexpr) <- as.character(unique(
|
|
182
|
+
colnames(avgexpr) <- as.character(unique(subobj@meta.data[[case$group.by]]))
|
|
200
183
|
avgexpr <- avgexpr[, case$ident, drop = FALSE]
|
|
201
|
-
avgexpr <- avgexpr[order(-avgexpr), , drop = FALSE]
|
|
202
184
|
|
|
203
|
-
|
|
185
|
+
for (idt in case$ident) {
|
|
186
|
+
log$info("- Processing {idt} ...")
|
|
187
|
+
info <- case_info(paste0(name, "::", idt), outdir, create = TRUE)
|
|
188
|
+
expr <- avgexpr[, idt, drop = FALSE]
|
|
189
|
+
expr <- expr[order(expr, decreasing = TRUE), , drop = FALSE]
|
|
190
|
+
expr <- expr[1:min(case$n, nrow(expr)), , drop = FALSE]
|
|
191
|
+
expr <- as.data.frame(expr)
|
|
192
|
+
expr$gene <- rownames(expr)
|
|
193
|
+
colnames(expr) <- c("avg_expr", "gene")
|
|
194
|
+
expr <- expr[, c("gene", "avg_expr"), drop = FALSE]
|
|
195
|
+
|
|
196
|
+
log$info(" Performing enrichment analysis ...")
|
|
197
|
+
process_markers(expr, info, case = list(
|
|
198
|
+
ident = idt,
|
|
199
|
+
dbs = case$dbs,
|
|
200
|
+
enrich_style = case$enrich_style,
|
|
201
|
+
enrich_plots = case$enrich_plots
|
|
202
|
+
))
|
|
203
|
+
}
|
|
204
204
|
|
|
205
|
-
|
|
205
|
+
invisible()
|
|
206
206
|
}
|
|
207
207
|
|
|
208
|
-
|
|
209
|
-
log_debug(" Adding case report ...")
|
|
210
|
-
h1 = info$h1
|
|
211
|
-
h2 = info$h2
|
|
212
|
-
|
|
213
|
-
if (!is.null(info$error)) {
|
|
214
|
-
add_report(
|
|
215
|
-
list(
|
|
216
|
-
kind = "descr",
|
|
217
|
-
content = paste0("Top ", n, " expressing genes")
|
|
218
|
-
),
|
|
219
|
-
list(kind = "error", content = info$error),
|
|
220
|
-
h1 = h1,
|
|
221
|
-
h2 = ifelse(h2 == "#", "Top Expressing Genes", h2),
|
|
222
|
-
h3 = ifelse(h2 == "#", "#", "Top Expressing Genes")
|
|
223
|
-
)
|
|
224
|
-
} else {
|
|
225
|
-
add_report(
|
|
226
|
-
list(
|
|
227
|
-
kind = "descr",
|
|
228
|
-
content = paste0("Top ", n, " expressing genes")
|
|
229
|
-
),
|
|
230
|
-
list(
|
|
231
|
-
kind = "table",
|
|
232
|
-
src = file.path(info$casedir, "exprn.txt")
|
|
233
|
-
),
|
|
234
|
-
h1 = h1,
|
|
235
|
-
h2 = ifelse(h2 == "#", "Top Expressing Genes", h2),
|
|
236
|
-
h3 = ifelse(h2 == "#", "#", "Top Expressing Genes")
|
|
237
|
-
)
|
|
238
|
-
|
|
239
|
-
add_report(
|
|
240
|
-
list(
|
|
241
|
-
kind = "descr",
|
|
242
|
-
content = paste0("Enrichment analysis for the top ", n, " expressing genes")
|
|
243
|
-
),
|
|
244
|
-
list(kind = "enrichr", dir = info$casedir),
|
|
245
|
-
h1 = h1,
|
|
246
|
-
h2 = ifelse(h2 == "#", "Enrichment Analysis", h2),
|
|
247
|
-
h3 = ifelse(h2 == "#", "#", "Enrichment Analysis")
|
|
248
|
-
)
|
|
249
|
-
}
|
|
250
|
-
}
|
|
208
|
+
sapply(names(cases), run_case)
|
|
251
209
|
|
|
252
|
-
|
|
253
|
-
save_report(joboutdir)
|
|
210
|
+
reporter$save(joboutdir)
|
|
@@ -12,8 +12,9 @@ parser.add_argument(
|
|
|
12
12
|
parser.add_argument(
|
|
13
13
|
"-c",
|
|
14
14
|
"--over_clustering",
|
|
15
|
-
|
|
16
|
-
|
|
15
|
+
required=False,
|
|
16
|
+
default=None,
|
|
17
|
+
help="Over clustering. Error if the column does not exist.",
|
|
17
18
|
)
|
|
18
19
|
|
|
19
20
|
|
|
@@ -25,8 +26,9 @@ if __name__ == "__main__":
|
|
|
25
26
|
adata = sc.read_h5ad(args.input)
|
|
26
27
|
over_clustering = args.over_clustering
|
|
27
28
|
if over_clustering and over_clustering not in adata.obs.columns:
|
|
28
|
-
|
|
29
|
-
|
|
29
|
+
raise ValueError(
|
|
30
|
+
f"Over clustering column '{over_clustering}' not found in AnnData object."
|
|
31
|
+
)
|
|
30
32
|
|
|
31
33
|
annotated = celltypist.annotate(
|
|
32
34
|
adata,
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""Convert Seurat objects to AnnData format back and forth.
|
|
2
|
+
|
|
3
|
+
Need R and R packages Seurat, SeuratDisk and biopipen.utils.R installed.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def convert_seurat_to_anndata(
|
|
8
|
+
input_file,
|
|
9
|
+
output_file,
|
|
10
|
+
assay=None,
|
|
11
|
+
subset=None,
|
|
12
|
+
rscript="Rscript",
|
|
13
|
+
):
|
|
14
|
+
"""Convert Seurat object to AnnData format.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
input_file (str): Path to the input Seurat RDS or qs/qs2 file.
|
|
18
|
+
output_file (str): Path to the output AnnData H5AD file.
|
|
19
|
+
assay (str): Name of the assay to use in the Seurat object.
|
|
20
|
+
subset (str): An R expression to subset the Seurat object to convert.
|
|
21
|
+
rscript (RScript): R script executor.
|
|
22
|
+
"""
|
|
23
|
+
from biopipen.utils.misc import run_command
|
|
24
|
+
|
|
25
|
+
script = f"""
|
|
26
|
+
library(biopipen.utils)
|
|
27
|
+
|
|
28
|
+
assay <- {repr(assay) if assay else 'NULL'}
|
|
29
|
+
subset <- {repr(subset) if subset else 'NULL'}
|
|
30
|
+
|
|
31
|
+
ConvertSeuratToAnnData(
|
|
32
|
+
"{input_file}", "{output_file}", assay = assay, subset = subset
|
|
33
|
+
)
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
# Save the script to a temporary file
|
|
37
|
+
from tempfile import NamedTemporaryFile
|
|
38
|
+
with NamedTemporaryFile(suffix=".R", delete=False) as temp_script:
|
|
39
|
+
temp_script.write(script.encode('utf-8'))
|
|
40
|
+
temp_script_path = temp_script.name
|
|
41
|
+
|
|
42
|
+
# Run the R script using the provided Rscript command
|
|
43
|
+
cmd = [rscript, temp_script_path]
|
|
44
|
+
run_command(cmd, fg=True)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def convert_anndata_to_seurat(
|
|
48
|
+
input_file,
|
|
49
|
+
output_file,
|
|
50
|
+
assay=None,
|
|
51
|
+
rscript="Rscript",
|
|
52
|
+
):
|
|
53
|
+
"""Convert AnnData object to Seurat format.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
input_file (str): Path to the input AnnData H5AD file.
|
|
57
|
+
output_file (str): Path to the output Seurat RDS or qs/qs2 file.
|
|
58
|
+
assay (str): Name of the assay to use in the Seurat object.
|
|
59
|
+
rscript (RScript): R script executor.
|
|
60
|
+
"""
|
|
61
|
+
from biopipen.utils.misc import run_command
|
|
62
|
+
|
|
63
|
+
script = f"""
|
|
64
|
+
library(biopipen.utils)
|
|
65
|
+
|
|
66
|
+
assay <- {repr(assay) if assay else 'NULL'}
|
|
67
|
+
|
|
68
|
+
ConvertAnnDataToSeurat(
|
|
69
|
+
"{input_file}", "{output_file}", assay = assay
|
|
70
|
+
)
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
# Save the script to a temporary file
|
|
74
|
+
from tempfile import NamedTemporaryFile
|
|
75
|
+
with NamedTemporaryFile(suffix=".R", delete=False) as temp_script:
|
|
76
|
+
temp_script.write(script.encode('utf-8'))
|
|
77
|
+
temp_script_path = temp_script.name
|
|
78
|
+
|
|
79
|
+
# Run the R script using the provided Rscript command
|
|
80
|
+
cmd = [rscript, temp_script_path]
|
|
81
|
+
run_command(cmd, fg=True)
|