biopipen 0.21.2__py3-none-any.whl → 0.22.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/filters.py +142 -0
- biopipen/ns/scrna.py +19 -1
- biopipen/ns/tcr.py +30 -10
- biopipen/reports/delim/SampleInfo.svelte +2 -22
- biopipen/reports/scrna/CellsDistribution.svelte +4 -39
- biopipen/reports/scrna/MarkersFinder.svelte +6 -126
- biopipen/reports/scrna/MetaMarkers.svelte +3 -75
- biopipen/reports/scrna/RadarPlots.svelte +4 -20
- biopipen/reports/scrna/ScFGSEA.svelte +4 -23
- biopipen/reports/scrna/SeuratClusterStats.svelte +3 -69
- biopipen/reports/scrna/SeuratPreparing.svelte +3 -26
- biopipen/reports/scrna/TopExpressingGenes.svelte +3 -41
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +17 -16
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +13 -16
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +44 -52
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +2 -8
- biopipen/reports/tcr/CDR3AAPhyschem.svelte +18 -65
- biopipen/reports/tcr/CloneResidency.svelte +3 -93
- biopipen/reports/tcr/Immunarch.svelte +4 -168
- biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
- biopipen/reports/tcr/TESSA.svelte +11 -28
- biopipen/scripts/delim/SampleInfo.R +41 -7
- biopipen/scripts/scrna/CellsDistribution.R +127 -16
- biopipen/scripts/scrna/MarkersFinder.R +245 -100
- biopipen/scripts/scrna/MetaMarkers.R +163 -82
- biopipen/scripts/scrna/RadarPlots.R +163 -110
- biopipen/scripts/scrna/ScFGSEA.R +51 -11
- biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +26 -13
- biopipen/scripts/scrna/SeuratClusterStats-features.R +58 -53
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +39 -21
- biopipen/scripts/scrna/SeuratClusterStats.R +4 -2
- biopipen/scripts/scrna/SeuratClustering.R +73 -26
- biopipen/scripts/scrna/SeuratMetadataMutater.R +13 -1
- biopipen/scripts/scrna/SeuratPreparing.R +93 -19
- biopipen/scripts/scrna/TopExpressingGenes.R +100 -18
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +21 -8
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +25 -3
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +1 -0
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +14 -3
- biopipen/scripts/tcr/Attach2Seurat.R +2 -1
- biopipen/scripts/tcr/CDR3AAPhyschem.R +123 -10
- biopipen/scripts/tcr/CloneResidency.R +114 -34
- biopipen/scripts/tcr/Immunarch-basic.R +18 -4
- biopipen/scripts/tcr/Immunarch-clonality.R +14 -0
- biopipen/scripts/tcr/Immunarch-diversity.R +123 -18
- biopipen/scripts/tcr/Immunarch-geneusage.R +23 -1
- biopipen/scripts/tcr/Immunarch-kmer.R +45 -3
- biopipen/scripts/tcr/Immunarch-overlap.R +62 -0
- biopipen/scripts/tcr/Immunarch-spectratyping.R +18 -2
- biopipen/scripts/tcr/Immunarch-tracking.R +24 -1
- biopipen/scripts/tcr/Immunarch-vjjunc.R +17 -2
- biopipen/scripts/tcr/Immunarch.R +7 -0
- biopipen/scripts/tcr/ImmunarchLoading.R +22 -23
- biopipen/scripts/tcr/TCRClusterStats.R +124 -11
- biopipen/scripts/tcr/TCRClustering.R +8 -9
- biopipen/scripts/tcr/TESSA.R +66 -41
- biopipen/utils/misc.R +96 -1
- {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/METADATA +1 -1
- {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/RECORD +62 -62
- {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/WHEEL +0 -0
- {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/entry_points.txt +0 -0
|
@@ -5,20 +5,6 @@ features = {{envs.features | r: todot="-", skip=1}}
|
|
|
5
5
|
|
|
6
6
|
odir = file.path(outdir, "features")
|
|
7
7
|
dir.create(odir, recursive=TRUE, showWarnings=FALSE)
|
|
8
|
-
report_toc_file = file.path(odir, "report_toc.json")
|
|
9
|
-
# Section => list(
|
|
10
|
-
# list(name?, kind, file),
|
|
11
|
-
# ...
|
|
12
|
-
# )
|
|
13
|
-
report_toc = list()
|
|
14
|
-
|
|
15
|
-
.add_toc = function(section, toc) {
|
|
16
|
-
if (section %in% names(report_toc)) {
|
|
17
|
-
report_toc[[section]][[length(report_toc[[section]]) + 1]] <<- toc
|
|
18
|
-
} else {
|
|
19
|
-
report_toc[[section]] <<- list(toc)
|
|
20
|
-
}
|
|
21
|
-
}
|
|
22
8
|
|
|
23
9
|
.get_features = function(features) {
|
|
24
10
|
if (is.null(features)) { features = 20 }
|
|
@@ -50,11 +36,9 @@ report_toc = list()
|
|
|
50
36
|
}
|
|
51
37
|
|
|
52
38
|
do_one_features = function(name) {
|
|
53
|
-
|
|
39
|
+
log_info("Doing features for: {name}")
|
|
54
40
|
|
|
55
41
|
case = list_update(features_defaults, features[[name]])
|
|
56
|
-
toc = list()
|
|
57
|
-
if (!is.null(case$section)) { toc$name = name }
|
|
58
42
|
case$devpars = list_update(features_defaults$devpars, features[[name]]$devpars)
|
|
59
43
|
excluded_args = c(
|
|
60
44
|
"section",
|
|
@@ -65,30 +49,40 @@ do_one_features = function(name) {
|
|
|
65
49
|
"kind"
|
|
66
50
|
)
|
|
67
51
|
|
|
52
|
+
if (is.character(case$subset)) {
|
|
53
|
+
case$object = srtobj %>% filter(!!rlang::parse_expr(case$subset))
|
|
54
|
+
} else {
|
|
55
|
+
case$object = srtobj
|
|
56
|
+
}
|
|
57
|
+
if (!is.null(case$ident)) {
|
|
58
|
+
Idents(case$object) = case$ident
|
|
59
|
+
}
|
|
60
|
+
n_uidents = length(unique(Idents(case$object)))
|
|
61
|
+
|
|
68
62
|
fn = NULL
|
|
69
63
|
default_devpars = NULL
|
|
70
64
|
if ("ridge" %in% case$kind) {
|
|
71
65
|
case$kind = "ridge"
|
|
72
66
|
if (is.null(case$cols)) {
|
|
73
|
-
case$cols =
|
|
67
|
+
case$cols = pal_biopipen()(32)
|
|
74
68
|
}
|
|
75
69
|
excluded_args = c(excluded_args, "split.by")
|
|
76
70
|
fn = RidgePlot
|
|
77
|
-
default_devpars = function(features, ncol
|
|
71
|
+
default_devpars = function(features, ncol) {
|
|
78
72
|
if (is.null(ncol)) { ncol = 1 }
|
|
79
73
|
list(
|
|
80
74
|
width = 400 * ncol,
|
|
81
|
-
height = ceiling(length(features) / ncol) * ifelse(
|
|
75
|
+
height = ceiling(length(features) / ncol) * ifelse(n_uidents < 10, 300, 400),
|
|
82
76
|
res = 100
|
|
83
77
|
)
|
|
84
78
|
}
|
|
85
79
|
} else if ("vln" %in% case$kind || "violin" %in% case$kind) {
|
|
86
80
|
case$kind = "violin"
|
|
87
81
|
if (is.null(case$cols)) {
|
|
88
|
-
case$cols =
|
|
82
|
+
case$cols = pal_biopipen()(n_uidents)
|
|
89
83
|
}
|
|
90
84
|
fn = VlnPlot
|
|
91
|
-
default_devpars = function(features, ncol
|
|
85
|
+
default_devpars = function(features, ncol) {
|
|
92
86
|
if (is.null(ncol)) { ncol = 1 }
|
|
93
87
|
list(
|
|
94
88
|
width = 400 * ncol,
|
|
@@ -99,12 +93,12 @@ do_one_features = function(name) {
|
|
|
99
93
|
} else if ("feature" %in% case$kind) {
|
|
100
94
|
case$kind = "feature"
|
|
101
95
|
if (is.null(case$cols)) {
|
|
102
|
-
case$cols = c("lightgrey",
|
|
96
|
+
case$cols = c("lightgrey", pal_biopipen()(1))
|
|
103
97
|
}
|
|
104
98
|
excluded_args = c(excluded_args, "group.by", "assay")
|
|
105
99
|
case$shape.by = case$group.by
|
|
106
100
|
fn = FeaturePlot
|
|
107
|
-
default_devpars = function(features, ncol
|
|
101
|
+
default_devpars = function(features, ncol) {
|
|
108
102
|
if (is.null(ncol)) { ncol = 1 }
|
|
109
103
|
list(
|
|
110
104
|
width = 400 * ncol,
|
|
@@ -115,16 +109,16 @@ do_one_features = function(name) {
|
|
|
115
109
|
} else if ("dot" %in% case$kind) {
|
|
116
110
|
case$kind = "dot"
|
|
117
111
|
if (is.null(case$cols)) {
|
|
118
|
-
case$cols = c("lightgrey",
|
|
112
|
+
case$cols = c("lightgrey", pal_biopipen()(1))
|
|
119
113
|
}
|
|
120
114
|
if (is.null(case$plus)) {
|
|
121
115
|
case$plus = 'theme_prism(axis_text_angle=90)'
|
|
122
116
|
}
|
|
123
117
|
excluded_args = c(excluded_args, "slot", "ncol")
|
|
124
118
|
fn = DotPlot
|
|
125
|
-
default_devpars = function(features, ncol
|
|
119
|
+
default_devpars = function(features, ncol) {
|
|
126
120
|
list(
|
|
127
|
-
height = max(
|
|
121
|
+
height = max(n_uidents * 80 + 150, 420),
|
|
128
122
|
width = length(features) * 50 + 150,
|
|
129
123
|
res = 100
|
|
130
124
|
)
|
|
@@ -133,20 +127,20 @@ do_one_features = function(name) {
|
|
|
133
127
|
case$kind = "heatmap"
|
|
134
128
|
case = list_update(
|
|
135
129
|
list(
|
|
136
|
-
group.colors =
|
|
130
|
+
group.colors = pal_biopipen()(n_uidents),
|
|
137
131
|
size = 3.5,
|
|
138
132
|
group.bar.height = 0.01
|
|
139
133
|
),
|
|
140
134
|
case
|
|
141
135
|
)
|
|
142
136
|
if (is.null(case$plus)) {
|
|
143
|
-
case$plus = 'scale_fill_gradientn(colors = c("lightgrey",
|
|
137
|
+
case$plus = 'scale_fill_gradientn(colors = c("lightgrey", pal_biopipen()(1)), na.value = "white")'
|
|
144
138
|
}
|
|
145
139
|
excluded_args = c(excluded_args, "group.by", "split.by", "downsample", "ncol")
|
|
146
140
|
fn = DoHeatmap
|
|
147
|
-
default_devpars = function(features, ncol
|
|
141
|
+
default_devpars = function(features, ncol) {
|
|
148
142
|
list(
|
|
149
|
-
width =
|
|
143
|
+
width = n_uidents * 60 + 150,
|
|
150
144
|
height = length(features) * 40 + 150,
|
|
151
145
|
res = 100
|
|
152
146
|
)
|
|
@@ -160,7 +154,7 @@ do_one_features = function(name) {
|
|
|
160
154
|
case$slot = "data"
|
|
161
155
|
}
|
|
162
156
|
} else {
|
|
163
|
-
stop("Unknown kind of plot")
|
|
157
|
+
stop(paste0("Unknown kind of plot: ", case$kind))
|
|
164
158
|
}
|
|
165
159
|
|
|
166
160
|
for (arg in excluded_args) {
|
|
@@ -168,33 +162,34 @@ do_one_features = function(name) {
|
|
|
168
162
|
case[[arg]] = NULL
|
|
169
163
|
}
|
|
170
164
|
|
|
171
|
-
if (is.character(subset)) {
|
|
172
|
-
case$object = srtobj %>% filter(!!rlang::parse_expr(subset))
|
|
173
|
-
} else {
|
|
174
|
-
case$object = srtobj
|
|
175
|
-
}
|
|
176
|
-
if (!is.null(ident)) {
|
|
177
|
-
Idents(case$object) = ident
|
|
178
|
-
}
|
|
179
165
|
case$features = .get_features(case$features)
|
|
180
166
|
if (!is.null(case$ncol)) {
|
|
181
167
|
case$ncol = min(case$ncol, length(case$features))
|
|
182
168
|
}
|
|
183
169
|
|
|
184
|
-
toc$kind = kind
|
|
185
170
|
if (kind == "table") {
|
|
186
171
|
expr = do_call(fn, case)$RNA %>%
|
|
187
172
|
as.data.frame() %>%
|
|
188
173
|
rownames_to_column("Feature") %>%
|
|
189
174
|
select(Feature, everything())
|
|
190
175
|
|
|
191
|
-
|
|
192
|
-
write.table(expr, file.path(odir,
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
176
|
+
exprfile = paste0(slugify(name), ".txt")
|
|
177
|
+
write.table(expr, file.path(odir, exprfile), sep="\t", quote=FALSE, row.names=FALSE)
|
|
178
|
+
|
|
179
|
+
add_report(
|
|
180
|
+
list(
|
|
181
|
+
kind = "descr",
|
|
182
|
+
content = paste0("Table of expression value for selected features, by ", ident)
|
|
183
|
+
),
|
|
184
|
+
list(
|
|
185
|
+
kind = "table",
|
|
186
|
+
src = exprfile
|
|
187
|
+
),
|
|
188
|
+
h1 = ifelse(is.null(case$section), name, case$section),
|
|
189
|
+
h2 = ifelse(is.null(case$section), "#", name)
|
|
197
190
|
)
|
|
191
|
+
} else {
|
|
192
|
+
devpars = list_update(default_devpars(case$features, case$ncol), devpars)
|
|
198
193
|
if (kind == "heatmap") {
|
|
199
194
|
if (!exists("downsample") || is.null(downsample)) {
|
|
200
195
|
downsample = "average"
|
|
@@ -202,9 +197,9 @@ do_one_features = function(name) {
|
|
|
202
197
|
if (downsample %in% c("average", "mean")) {
|
|
203
198
|
case$object = AverageExpression(case$object, return.seurat = TRUE)
|
|
204
199
|
} else if (is.integer(downsample)) {
|
|
205
|
-
case$object = subset(case$object, downsample = downsample)
|
|
200
|
+
case$object = base::subset(case$object, downsample = downsample)
|
|
206
201
|
} else {
|
|
207
|
-
stop("Unknown downsample method
|
|
202
|
+
stop(paste0("Unknown downsample method: ", downsample))
|
|
208
203
|
}
|
|
209
204
|
}
|
|
210
205
|
p = do_call(fn, case)
|
|
@@ -213,8 +208,7 @@ do_one_features = function(name) {
|
|
|
213
208
|
p = p + eval(parse(text = pls))
|
|
214
209
|
}
|
|
215
210
|
}
|
|
216
|
-
figfile = file.path(odir, paste0(slugify(name), ".", kind, ".png"))
|
|
217
|
-
toc$file = basename(figfile)
|
|
211
|
+
figfile = file.path(odir, paste0(slugify(name), ".", slugify(case$kind), ".png"))
|
|
218
212
|
png(figfile, width=devpars$width, height=devpars$height, res=devpars$res)
|
|
219
213
|
tryCatch({
|
|
220
214
|
print(p)
|
|
@@ -229,9 +223,20 @@ do_one_features = function(name) {
|
|
|
229
223
|
)
|
|
230
224
|
})
|
|
231
225
|
dev.off()
|
|
226
|
+
|
|
227
|
+
add_report(
|
|
228
|
+
list(
|
|
229
|
+
kind = "descr",
|
|
230
|
+
content = paste0(kind, "plots for selected features, by ", ident)
|
|
231
|
+
),
|
|
232
|
+
list(
|
|
233
|
+
kind = "image",
|
|
234
|
+
src = figfile
|
|
235
|
+
),
|
|
236
|
+
h1 = ifelse(is.null(section), name, section),
|
|
237
|
+
h2 = ifelse(is.null(section), "#", name)
|
|
238
|
+
)
|
|
232
239
|
}
|
|
233
|
-
.add_toc(if (is.null(section)) name else section, toc)
|
|
234
240
|
}
|
|
235
241
|
|
|
236
242
|
sapply(names(features), do_one_features)
|
|
237
|
-
.save_toc()
|
|
@@ -5,22 +5,9 @@ stats = {{envs.stats | r: todot="-", skip=1}}
|
|
|
5
5
|
|
|
6
6
|
odir = file.path(outdir, "stats")
|
|
7
7
|
dir.create(odir, recursive=TRUE, showWarnings=FALSE)
|
|
8
|
-
report_toc_file = file.path(odir, "report_toc.json")
|
|
9
|
-
# Realname => {bar: ..., pie: ..., table: ...}
|
|
10
|
-
report_toc = list()
|
|
11
|
-
|
|
12
|
-
.add_toc = function(name, toc) {
|
|
13
|
-
report_toc[[name]] <<- toc
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
.save_toc = function() {
|
|
17
|
-
writeLines(toJSON(report_toc, pretty = TRUE, auto_unbox = TRUE), report_toc_file)
|
|
18
|
-
}
|
|
19
8
|
|
|
20
9
|
do_one_stats = function(name) {
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
toc = list()
|
|
10
|
+
log_info("Doing stats for: {name}")
|
|
24
11
|
|
|
25
12
|
case = list_update(stats_defaults, stats[[name]])
|
|
26
13
|
case$devpars = list_update(stats_defaults$devpars, case$devpars)
|
|
@@ -45,7 +32,6 @@ do_one_stats = function(name) {
|
|
|
45
32
|
mutate(.frac = .n / sum(.n))
|
|
46
33
|
|
|
47
34
|
if (isTRUE(case$table)) {
|
|
48
|
-
toc$table = basename(tablefile)
|
|
49
35
|
write.table(df_cells, tablefile, sep="\t", quote=FALSE, row.names=FALSE)
|
|
50
36
|
}
|
|
51
37
|
if (isTRUE(case$pie)) {
|
|
@@ -54,7 +40,7 @@ do_one_stats = function(name) {
|
|
|
54
40
|
ggplot(aes(x="", y=.n, fill=!!sym(case$ident))) +
|
|
55
41
|
geom_bar(stat="identity", width=1, alpha=.8, position = position_stack(reverse = TRUE)) +
|
|
56
42
|
coord_polar("y", start=0) +
|
|
57
|
-
|
|
43
|
+
scale_fill_biopipen() +
|
|
58
44
|
guides(fill = guide_legend(title = case$ident)) +
|
|
59
45
|
theme_void() +
|
|
60
46
|
geom_label(
|
|
@@ -72,7 +58,6 @@ do_one_stats = function(name) {
|
|
|
72
58
|
p_pie = p_pie + facet_wrap(case$split.by)
|
|
73
59
|
}
|
|
74
60
|
|
|
75
|
-
toc$pie = basename(piefile)
|
|
76
61
|
png(piefile, width=case$devpars$width, height=case$devpars$height, res=case$devpars$res)
|
|
77
62
|
print(p_pie)
|
|
78
63
|
dev.off()
|
|
@@ -89,20 +74,53 @@ do_one_stats = function(name) {
|
|
|
89
74
|
)) +
|
|
90
75
|
geom_bar(stat="identity", position=bar_position, alpha=.8) +
|
|
91
76
|
theme_prism(axis_text_angle = 90) +
|
|
92
|
-
|
|
77
|
+
scale_fill_biopipen() +
|
|
93
78
|
ylab(ifelse(isTRUE(case$frac), "Fraction of cells", "Number of cells"))
|
|
94
79
|
|
|
95
80
|
if (!is.null(case$split.by)) {
|
|
96
81
|
p = p + facet_wrap(case$split.by)
|
|
97
82
|
}
|
|
98
83
|
|
|
99
|
-
toc$bar = basename(figfile)
|
|
100
84
|
png(figfile, width=case$devpars$width, height=case$devpars$height, res=case$devpars$res)
|
|
101
85
|
print(p)
|
|
102
86
|
dev.off()
|
|
103
87
|
|
|
104
|
-
|
|
88
|
+
add_report(
|
|
89
|
+
list(
|
|
90
|
+
kind = "descr",
|
|
91
|
+
content = paste0(
|
|
92
|
+
"Plots showing the ",
|
|
93
|
+
ifelse(isTRUE(case$frac), "number/faction", "number"),
|
|
94
|
+
" of cells per cluster",
|
|
95
|
+
ifelse(
|
|
96
|
+
is.null(case$group.by),
|
|
97
|
+
"",
|
|
98
|
+
paste0(", by ", paste0(case$group.by, collapse = ", "))
|
|
99
|
+
)
|
|
100
|
+
)
|
|
101
|
+
),
|
|
102
|
+
h1 = name
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
add_report(
|
|
106
|
+
list(
|
|
107
|
+
name = "Bar Plot",
|
|
108
|
+
contents = list(list(kind = "image", src = figfile))
|
|
109
|
+
),
|
|
110
|
+
h1 = name,
|
|
111
|
+
ui = "tabs"
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
if (isTRUE(case$pie)) {
|
|
115
|
+
add_report(
|
|
116
|
+
list(
|
|
117
|
+
name = "Pie Chart",
|
|
118
|
+
contents = list(list(kind = "image", src = piefile))
|
|
119
|
+
),
|
|
120
|
+
h1 = name,
|
|
121
|
+
ui = "tabs"
|
|
122
|
+
)
|
|
123
|
+
}
|
|
105
124
|
}
|
|
106
125
|
|
|
107
126
|
sapply(names(stats), do_one_stats)
|
|
108
|
-
.save_toc()
|
|
@@ -1,21 +1,23 @@
|
|
|
1
1
|
source("{{biopipen_dir}}/utils/misc.R")
|
|
2
2
|
source("{{biopipen_dir}}/utils/plot.R")
|
|
3
|
-
library(jsonlite)
|
|
4
3
|
library(slugify)
|
|
5
4
|
library(Seurat)
|
|
6
5
|
library(rlang)
|
|
7
6
|
library(dplyr)
|
|
8
7
|
library(tibble)
|
|
9
8
|
library(ggprism)
|
|
10
|
-
library(ggsci)
|
|
11
9
|
library(ggrepel)
|
|
12
10
|
library(tidyseurat)
|
|
13
11
|
|
|
14
12
|
srtfile = {{in.srtobj | r}}
|
|
15
13
|
outdir = {{out.outdir | r}}
|
|
14
|
+
joboutdir = {{job.outdir | r}}
|
|
16
15
|
|
|
16
|
+
log_info("Loading Seurat object ...")
|
|
17
17
|
srtobj = readRDS(srtfile)
|
|
18
18
|
|
|
19
19
|
{% include biopipen_dir + "/scripts/scrna/SeuratClusterStats-stats.R" %}
|
|
20
20
|
{% include biopipen_dir + "/scripts/scrna/SeuratClusterStats-features.R" %}
|
|
21
21
|
{% include biopipen_dir + "/scripts/scrna/SeuratClusterStats-dimplots.R" %}
|
|
22
|
+
|
|
23
|
+
save_report(joboutdir)
|
|
@@ -4,11 +4,13 @@ library(Seurat)
|
|
|
4
4
|
library(future)
|
|
5
5
|
library(tidyr)
|
|
6
6
|
library(dplyr)
|
|
7
|
+
library(digest)
|
|
7
8
|
|
|
8
9
|
set.seed(8525)
|
|
9
10
|
|
|
10
11
|
srtfile = {{in.srtobj | quote}}
|
|
11
12
|
rdsfile = {{out.rdsfile | quote}}
|
|
13
|
+
joboutdir = {{job.outdir | quote}}
|
|
12
14
|
envs = {{envs | r: todot="-"}}
|
|
13
15
|
|
|
14
16
|
options(future.globals.maxSize = 80000 * 1024^2)
|
|
@@ -26,7 +28,46 @@ envs$IntegrateData = .expand_dims(envs$IntegrateData)
|
|
|
26
28
|
envs$RunUMAP = .expand_dims(envs$RunUMAP)
|
|
27
29
|
envs$FindNeighbors = .expand_dims(envs$FindNeighbors)
|
|
28
30
|
|
|
31
|
+
log_info("Reading Seurat object ...")
|
|
29
32
|
sobj = readRDS(srtfile)
|
|
33
|
+
|
|
34
|
+
if (isTRUE(envs$cache)) {
|
|
35
|
+
envs$cache = joboutdir
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
if (is.character(envs$cache) && nchar(envs$cache) > 0) {
|
|
39
|
+
log_info("Obtainning the signature ...")
|
|
40
|
+
envs2 = envs
|
|
41
|
+
envs2$ncores <- NULL
|
|
42
|
+
sig = c(
|
|
43
|
+
capture.output(str(sobj)),
|
|
44
|
+
"\n\n-------------------\n\n",
|
|
45
|
+
capture.output(str(envs2)),
|
|
46
|
+
"\n"
|
|
47
|
+
)
|
|
48
|
+
digested_sig = digest::digest(sig, algo = "md5")
|
|
49
|
+
cached_file = file.path(envs$cache, paste0(digested_sig, ".cached.RDS"))
|
|
50
|
+
if (file.exists(cached_file)) {
|
|
51
|
+
log_info("Using cached results {cached_file}")
|
|
52
|
+
# copy cached file to rdsfile
|
|
53
|
+
file.copy(cached_file, rdsfile, copy.date = TRUE)
|
|
54
|
+
quit()
|
|
55
|
+
} else {
|
|
56
|
+
log_info("Cached results not found, logging the current and cached signatures.")
|
|
57
|
+
log_info("- Current signature:")
|
|
58
|
+
print(sig)
|
|
59
|
+
sigfiles = Sys.glob(file.path(envs$cache, "*.signature.txt"))
|
|
60
|
+
for (sigfile in sigfiles) {
|
|
61
|
+
log_info("- Found cached signature file: {sigfile}")
|
|
62
|
+
cached_sig = readLines(sigfile)
|
|
63
|
+
log_info("- Cached signature:")
|
|
64
|
+
print(cached_sig)
|
|
65
|
+
}
|
|
66
|
+
writeLines(sig, file.path(envs$cache, paste0(digested_sig, ".signature.txt")))
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
|
|
30
71
|
obj_list = SplitObject(sobj, split.by = "Sample")
|
|
31
72
|
rm(sobj)
|
|
32
73
|
|
|
@@ -51,27 +92,28 @@ if (!is.null(envs$FindIntegrationAnchors$reference)) {
|
|
|
51
92
|
# ############################
|
|
52
93
|
# Using SCT
|
|
53
94
|
# https://satijalab.org/seurat/articles/integration_rpca.html#performing-integration-on-datasets-normalized-with-sctransform-1
|
|
54
|
-
|
|
95
|
+
log_info("########## Using SCT route ##########")
|
|
96
|
+
log_info("Performing SCTransform on each sample ...")
|
|
55
97
|
obj_list <- lapply(X = obj_list, FUN = function(x) {
|
|
56
|
-
|
|
98
|
+
log_info("- On sample: {x@meta.data$Sample[1]} ...")
|
|
57
99
|
# # Needed?
|
|
58
100
|
# DefaultAssay(x) <- "RNA"
|
|
59
101
|
args = list_update(envs$SCTransform, list(object = x))
|
|
60
102
|
do_call(SCTransform, args)
|
|
61
103
|
})
|
|
62
104
|
|
|
63
|
-
|
|
105
|
+
log_info("Running SelectIntegrationFeatures ...")
|
|
64
106
|
envs$SelectIntegrationFeatures$object.list = obj_list
|
|
65
107
|
features = do_call(SelectIntegrationFeatures, envs$SelectIntegrationFeatures)
|
|
66
108
|
|
|
67
|
-
|
|
109
|
+
log_info("Running PrepSCTIntegration ...")
|
|
68
110
|
envs$PrepSCTIntegration$object.list = obj_list
|
|
69
111
|
envs$PrepSCTIntegration$anchor.features = features
|
|
70
112
|
obj_list = do_call(PrepSCTIntegration, envs$PrepSCTIntegration)
|
|
71
113
|
|
|
72
|
-
|
|
114
|
+
log_info("Running PCA on each sample ...")
|
|
73
115
|
obj_list = lapply(X = obj_list, FUN = function(x) {
|
|
74
|
-
|
|
116
|
+
log_info("- On sample: {x@meta.data$Sample[1]} ...")
|
|
75
117
|
npcs = if (is.null(envs$RunPCA1$npcs)) 50 else envs$RunPCA1$npcs
|
|
76
118
|
args = list_setdefault(
|
|
77
119
|
envs$RunPCA1,
|
|
@@ -83,11 +125,11 @@ obj_list = lapply(X = obj_list, FUN = function(x) {
|
|
|
83
125
|
do_call(RunPCA, args)
|
|
84
126
|
})
|
|
85
127
|
|
|
86
|
-
|
|
128
|
+
log_info("Running FindIntegrationAnchors ...")
|
|
87
129
|
if (!is.null(envs$FindIntegrationAnchors$reference)) {
|
|
88
|
-
|
|
130
|
+
log_info(
|
|
89
131
|
paste(
|
|
90
|
-
"
|
|
132
|
+
"- Using samples as reference:",
|
|
91
133
|
paste(envs$FindIntegrationAnchors$reference, collapse = ", ")
|
|
92
134
|
)
|
|
93
135
|
)
|
|
@@ -106,7 +148,7 @@ fia_args$dims = 1:min(min_dim, max(fia_args$dims))
|
|
|
106
148
|
fia_args$k.score = min(30, min_dim - 1)
|
|
107
149
|
anchors = do_call(FindIntegrationAnchors, fia_args)
|
|
108
150
|
|
|
109
|
-
|
|
151
|
+
log_info("Running IntegrateData ...")
|
|
110
152
|
envs$IntegrateData$anchorset = anchors
|
|
111
153
|
id_args = list_setdefault(
|
|
112
154
|
envs$IntegrateData,
|
|
@@ -139,9 +181,10 @@ tryCatch({
|
|
|
139
181
|
# ############################
|
|
140
182
|
# Using rpca
|
|
141
183
|
# https://satijalab.org/seurat/articles/integration_rpca.html
|
|
142
|
-
|
|
184
|
+
log_info("########## Using rpca route ##########")
|
|
185
|
+
log_info("Performing NormalizeData + FindVariableFeatures on each sample ...")
|
|
143
186
|
obj_list <- lapply(X = obj_list, FUN = function(x) {
|
|
144
|
-
|
|
187
|
+
log_info("- On sample: {x@meta.data$Sample[1]} ...")
|
|
145
188
|
DefaultAssay(x) <- "RNA"
|
|
146
189
|
args = list_update(envs$NormalizeData, list(object = x))
|
|
147
190
|
x <- do_call(NormalizeData, args)
|
|
@@ -150,14 +193,13 @@ obj_list <- lapply(X = obj_list, FUN = function(x) {
|
|
|
150
193
|
do_call(FindVariableFeatures, args)
|
|
151
194
|
})
|
|
152
195
|
|
|
153
|
-
|
|
154
|
-
print("- Running SelectIntegrationFeatures ...")
|
|
196
|
+
log_info("Running SelectIntegrationFeatures ...")
|
|
155
197
|
envs$SelectIntegrationFeatures$object.list = obj_list
|
|
156
198
|
features = do_call(SelectIntegrationFeatures, envs$SelectIntegrationFeatures)
|
|
157
199
|
|
|
158
|
-
|
|
200
|
+
log_info("Running ScaleData + RunPCA on each sample ...")
|
|
159
201
|
obj_list <- lapply(X = obj_list, FUN = function(x) {
|
|
160
|
-
|
|
202
|
+
log_info("- On sample: {x@meta.data$Sample[1]} ...")
|
|
161
203
|
args = list_setdefault(envs$ScaleData1, object = x, features = features)
|
|
162
204
|
x <- do_call(ScaleData, args)
|
|
163
205
|
|
|
@@ -172,11 +214,11 @@ obj_list <- lapply(X = obj_list, FUN = function(x) {
|
|
|
172
214
|
do_call(RunPCA, args)
|
|
173
215
|
})
|
|
174
216
|
|
|
175
|
-
|
|
217
|
+
log_info("Running FindIntegrationAnchors ...")
|
|
176
218
|
if (!is.null(envs$FindIntegrationAnchors$reference)) {
|
|
177
|
-
|
|
219
|
+
log_info(
|
|
178
220
|
paste(
|
|
179
|
-
"
|
|
221
|
+
"- Using samples as reference:",
|
|
180
222
|
paste(envs$FindIntegrationAnchors$reference, collapse = ", ")
|
|
181
223
|
)
|
|
182
224
|
)
|
|
@@ -194,7 +236,7 @@ fia_args$dims = 1:min(min_dim, max(fia_args$dims))
|
|
|
194
236
|
fia_args$k.score = min(30, min_dim - 1)
|
|
195
237
|
anchors = do_call(FindIntegrationAnchors, fia_args)
|
|
196
238
|
|
|
197
|
-
|
|
239
|
+
log_info("Running IntegrateData ...")
|
|
198
240
|
envs$IntegrateData$anchorset = anchors
|
|
199
241
|
id_args = list_setdefault(envs$IntegrateData, dims = 1:30)
|
|
200
242
|
id_args$dims = 1:min(min_dim, max(id_args$dims))
|
|
@@ -207,7 +249,7 @@ obj_list = do_call(ScaleData, envs$ScaleData)
|
|
|
207
249
|
|
|
208
250
|
{%- endif %}
|
|
209
251
|
|
|
210
|
-
|
|
252
|
+
log_info("Running RunPCA ...")
|
|
211
253
|
pca_args = list_setdefault(
|
|
212
254
|
envs$RunPCA,
|
|
213
255
|
object = obj_list,
|
|
@@ -216,7 +258,7 @@ pca_args = list_setdefault(
|
|
|
216
258
|
pca_args$npcs = min(pca_args$npcs, ncol(obj_list) - 1)
|
|
217
259
|
obj_list = do_call(RunPCA, pca_args)
|
|
218
260
|
|
|
219
|
-
|
|
261
|
+
log_info("Running RunUMAP ...")
|
|
220
262
|
umap_args = list_setdefault(
|
|
221
263
|
envs$RunUMAP,
|
|
222
264
|
object = obj_list,
|
|
@@ -225,16 +267,21 @@ umap_args = list_setdefault(
|
|
|
225
267
|
umap_args$dims = 1:min(max(umap_args$dims), ncol(obj_list) - 1)
|
|
226
268
|
obj_list = do_call(RunUMAP, umap_args)
|
|
227
269
|
|
|
228
|
-
|
|
270
|
+
log_info("Running FindNeighbors ...")
|
|
229
271
|
envs$FindNeighbors$object = obj_list
|
|
230
272
|
obj_list = do_call(FindNeighbors, envs$FindNeighbors)
|
|
231
273
|
|
|
232
|
-
|
|
274
|
+
log_info("Running FindClusters ...")
|
|
233
275
|
envs$FindClusters$object = obj_list
|
|
234
276
|
obj_list = do_call(FindClusters, envs$FindClusters)
|
|
235
277
|
|
|
236
278
|
nclusters = length(unique(Idents(obj_list)))
|
|
237
|
-
|
|
279
|
+
log_info("Identified {nclusters} clusters.")
|
|
238
280
|
|
|
239
|
-
|
|
281
|
+
log_info("Saving results ...")
|
|
240
282
|
saveRDS(obj_list, file = rdsfile)
|
|
283
|
+
|
|
284
|
+
if (is.character(envs$cache) && nchar(envs$cache) > 0) {
|
|
285
|
+
log_info("Caching results ...")
|
|
286
|
+
file.copy(rdsfile, cached_file, overwrite = TRUE)
|
|
287
|
+
}
|
|
@@ -1,4 +1,6 @@
|
|
|
1
|
+
source("{{biopipen_dir}}/utils/misc.R")
|
|
1
2
|
source("{{biopipen_dir}}/utils/mutate_helpers.R")
|
|
3
|
+
|
|
2
4
|
library(rlang)
|
|
3
5
|
library(tibble)
|
|
4
6
|
library(dplyr)
|
|
@@ -14,7 +16,17 @@ metadata = srt@meta.data
|
|
|
14
16
|
|
|
15
17
|
if (!is.null(metafile)) {
|
|
16
18
|
mdata = read.table(metafile, header=TRUE, row.names=1, sep="\t", check.names=FALSE)
|
|
17
|
-
|
|
19
|
+
ov_cols = intersect(colnames(metadata), colnames(mdata))
|
|
20
|
+
if (length(ov_cols) > 0) {
|
|
21
|
+
log_warn(paste0(
|
|
22
|
+
"The following columns are already present in Seurat object and will be ignored: ",
|
|
23
|
+
paste(ov_cols, collapse=', ')
|
|
24
|
+
))
|
|
25
|
+
}
|
|
26
|
+
metadata = cbind(
|
|
27
|
+
metadata,
|
|
28
|
+
mdata[rownames(metadata), setdiff(colnames(mdata), ov_cols), drop=FALSE]
|
|
29
|
+
)
|
|
18
30
|
}
|
|
19
31
|
|
|
20
32
|
expr = list()
|