biopipen 0.17.6__py3-none-any.whl → 0.18.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (31) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/filters.py +36 -23
  3. biopipen/ns/delim.py +1 -1
  4. biopipen/ns/scrna.py +132 -49
  5. biopipen/ns/tcr.py +62 -0
  6. biopipen/reports/scrna/MarkersFinder.svelte +30 -8
  7. biopipen/reports/scrna/SeuratClusterStats.svelte +64 -109
  8. biopipen/reports/tcr/TESSA.svelte +43 -0
  9. biopipen/scripts/delim/SampleInfo.R +18 -15
  10. biopipen/scripts/scrna/MarkersFinder.R +58 -2
  11. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +40 -0
  12. biopipen/scripts/scrna/SeuratClusterStats-features.R +236 -0
  13. biopipen/scripts/scrna/SeuratClusterStats-stats.R +105 -0
  14. biopipen/scripts/scrna/SeuratClusterStats.R +7 -521
  15. biopipen/scripts/scrna/SeuratClustering.R +20 -1
  16. biopipen/scripts/tcr/ImmunarchLoading.R +1 -1
  17. biopipen/scripts/tcr/TESSA.R +198 -0
  18. biopipen/scripts/tcr/TESSA_source/Atchley_factors.csv +21 -0
  19. biopipen/scripts/tcr/TESSA_source/BriseisEncoder.py +168 -0
  20. biopipen/scripts/tcr/TESSA_source/MCMC_control.R +71 -0
  21. biopipen/scripts/tcr/TESSA_source/TrainedEncoder.h5 +0 -0
  22. biopipen/scripts/tcr/TESSA_source/fixed_b.csv +31 -0
  23. biopipen/scripts/tcr/TESSA_source/initialization.R +120 -0
  24. biopipen/scripts/tcr/TESSA_source/post_analysis.R +124 -0
  25. biopipen/scripts/tcr/TESSA_source/real_data.R +67 -0
  26. biopipen/scripts/tcr/TESSA_source/update.R +195 -0
  27. biopipen/scripts/tcr/TESSA_source/utility.R +18 -0
  28. {biopipen-0.17.6.dist-info → biopipen-0.18.0.dist-info}/METADATA +8 -8
  29. {biopipen-0.17.6.dist-info → biopipen-0.18.0.dist-info}/RECORD +31 -16
  30. {biopipen-0.17.6.dist-info → biopipen-0.18.0.dist-info}/WHEEL +0 -0
  31. {biopipen-0.17.6.dist-info → biopipen-0.18.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,43 @@
1
+ {% from "utils/misc.liq" import report_jobs, table_of_images -%}
2
+ <script>
3
+ import { Image, DataTable } from "$libs";
4
+ import { Tile } from "$ccs";
5
+ </script>
6
+
7
+ <Tile>
8
+ <p><a href="https://github.com/jcao89757/TESSA" target="_blank">Tessa</a> is a Bayesian model to integrate T cell receptor (TCR) sequence profiling with transcriptomes of T cells. Enabled by the recently developed single cell sequencing techniques, which provide both TCR sequences and RNA sequences of each T cell concurrently, Tessa maps the functional landscape of the TCR repertoire, and generates insights into understanding human immune response to diseases. As the first part of tessa, BriseisEncoder is employed prior to the Bayesian algorithm to capture the TCR sequence features and create numerical embeddings. We showed that the reconstructed Atchley Factor matrices and CDR3 sequences, generated through the numerical embeddings, are highly similar to their original counterparts. The CDR3 peptide sequences are constructed via a RandomForest model applied on the reconstructed Atchley Factor matrices.</p>
9
+
10
+ <p>For more information, please refer to the following papers:</p>
11
+ <ul>
12
+ <li>- <a href="https://www.nature.com/articles/s41592-020-01020-3" target="_blank">Mapping the Functional Landscape of TCR Repertoire</a>, Zhang, Z., Xiong, D., Wang, X. et al. 2021.</li>
13
+ <li>- <a href="https://www.nature.com/articles/s42256-021-00383-2" target="_blank">Deep learning-based prediction of the T cell receptor–antigen binding specificity</a>, Lu, T., Zhang, Z., Zhu, J. et al. 2021.</li>
14
+ </ul>
15
+ </Tile>
16
+ <p>&nbsp;</p>
17
+
18
+ {%- macro report_job(job, h=1) -%}
19
+ {{ table_of_images(
20
+ [
21
+ joinpaths(job.outdir, "result", "Cluster_size_dist.png"),
22
+ joinpaths(job.outdir, "result", "clone_size.png"),
23
+ joinpaths(job.outdir, "result", "exp_TCR_pair_plot.png"),
24
+ joinpaths(job.outdir, "result", "TCR_dist_density.png"),
25
+ joinpaths(job.outdir, "result", "TCR_explore.png"),
26
+ joinpaths(job.outdir, "result", "TCR_explore_clusters.png"),
27
+ ],
28
+ [
29
+ "TESSA cluster size distribution",
30
+ "Cluster center size vs. non-center cluster size",
31
+ "Expression-TCR distance plot",
32
+ "Density of TCR distances",
33
+ "Exploratory plot at the TCR level",
34
+ "TESSA clusters",
35
+ ],
36
+ ) }}
37
+ {%- endmacro -%}
38
+
39
+ {%- macro head_job(job) -%}
40
+ <h1>{{job.in.immdata | stem | escape}}</h1>
41
+ {%- endmacro -%}
42
+
43
+ {{ report_jobs(jobs, head_job, report_job) }}
@@ -5,6 +5,7 @@ library(dplyr)
5
5
  library(ggplot2)
6
6
  library(ggprism)
7
7
  library(ggsci)
8
+ library(ggrepel)
8
9
 
9
10
  infile <- {{in.infile | r}}
10
11
  outfile <- {{out.outfile | r}}
@@ -39,8 +40,9 @@ for (name in names(stats)) {
39
40
  plotfile <- file.path(outdir, paste0(name, ".png"))
40
41
 
41
42
  is_continuous <- FALSE
42
- if (startsWith(stat$on, "distinct:")) {
43
- stat$on <- substring(stat$on, 10)
43
+ if (startsWith(stat$on, "distinct:") || startsWith(stat$on, "unique:")) {
44
+ stat$on <- gsub("distinct:", "", stat$on)
45
+ stat$on <- gsub("unique:", "", stat$on)
44
46
  data <- mutdata %>% distinct(!!sym(stat$on), .keep_all = TRUE)
45
47
  } else {
46
48
  data <- mutdata
@@ -89,14 +91,14 @@ for (name in names(stats)) {
89
91
  if (stat$plot == "boxplot" || stat$plot == "box") {
90
92
  p <- ggplot(data, aes(x=!!group, y=!!sym(stat$on), fill=!!group)) +
91
93
  geom_boxplot(position = "dodge") +
92
- scale_fill_ucscgb() +
94
+ scale_fill_ucscgb(alpha = .8) +
93
95
  xlab("")
94
96
  } else if (stat$plot == "violin" ||
95
97
  stat$plot == "violinplot" ||
96
98
  stat$plot == "vlnplot") {
97
99
  p <- ggplot(data, aes(x = !!group, y = !!sym(stat$on), fill=!!group)) +
98
100
  geom_violin(position = "dodge") +
99
- scale_fill_ucscgb() +
101
+ scale_fill_ucscgb(alpha = .8) +
100
102
  xlab("")
101
103
  } else if (
102
104
  (grepl("violin", stat$plot) || grepl("vln", stat$plot)) &&
@@ -105,12 +107,12 @@ for (name in names(stats)) {
105
107
  p <- ggplot(data, aes(x = !!group, y = !!sym(stat$on), fill = !!group)) +
106
108
  geom_violin(position = "dodge") +
107
109
  geom_boxplot(width = 0.1, position = position_dodge(0.9), fill="white") +
108
- scale_fill_ucscgb() +
110
+ scale_fill_ucscgb(alpha = .8) +
109
111
  xlab("")
110
112
  } else if (stat$plot == "histogram" || stat$plot == "hist") {
111
113
  p <- ggplot(data, aes(x = !!sym(stat$on), fill = !!group)) +
112
114
  geom_histogram(bins = 10, position = "dodge", alpha = 0.8, color = "white") +
113
- scale_fill_ucscgb()
115
+ scale_fill_ucscgb(alpha = .8)
114
116
  } else if (stat$plot == "pie" || stat$plot == "piechart") {
115
117
  if (is.null(stat$each)) {
116
118
  data <- data %>% distinct(!!group, .keep_all = TRUE)
@@ -120,19 +122,20 @@ for (name in names(stats)) {
120
122
  group_by(!!sym(stat$each))
121
123
  }
122
124
  p <- ggplot(
123
- data %>%
124
- mutate(
125
- ..prop = !!sym(count_on) / sum(!!sym(count_on)),
126
- ..ypos = (cumsum(..prop) - 0.5 * ..prop) * sum(!!sym(count_on))
127
- ),
128
- aes(x = "", y = !!sym(count_on), fill = !!group)
125
+ data %>% arrange(!!group),
126
+ aes(x = "", y = !!sym(count_on), fill = rev(!!group), label = !!sym(count_on))
129
127
  ) +
130
128
  geom_bar(stat="identity", width=1, color="white") +
131
129
  coord_polar("y", start = 0) +
132
130
  theme_void() +
133
131
  theme(plot.title = element_text(hjust = 0.5)) +
134
- geom_text(aes(y = ..ypos, label = !!sym(count_on))) +
135
- scale_fill_ucscgb() +
132
+ geom_label_repel(
133
+ position = position_stack(vjust = 0.5),
134
+ color="#333333",
135
+ fill="#EEEEEE",
136
+ size=4
137
+ ) +
138
+ scale_fill_ucscgb(alpha = .8, name = group) +
136
139
  ggtitle(paste0("# ", stat$on))
137
140
  } else if (stat$plot == "bar" || stat$plot == "barplot") {
138
141
  if (is.null(stat$each)) {
@@ -144,7 +147,7 @@ for (name in names(stats)) {
144
147
  data,
145
148
  aes(x = !!group, y = !!sym(count_on), fill = !!group)) +
146
149
  geom_bar(stat = "identity") +
147
- scale_fill_ucscgb() +
150
+ scale_fill_ucscgb(alpha = .8) +
148
151
  ylab(paste0("# ", stat$on))
149
152
  } else {
150
153
  stop("Unknown plot type: ", stat$plot)
@@ -8,6 +8,8 @@ library(tibble)
8
8
  library(Seurat)
9
9
  library(enrichR)
10
10
  library(ggplot2)
11
+ library(ggprism)
12
+ library(ggrepel)
11
13
  library(future)
12
14
  library(tidyseurat)
13
15
 
@@ -25,9 +27,14 @@ prefix_each <- {{ envs.prefix_each | r }}
25
27
  section <- {{ envs.section | r }}
26
28
  dbs <- {{ envs.dbs | r }}
27
29
  sigmarkers <- {{ envs.sigmarkers | r }}
30
+ volcano_genes <- {{ envs.volcano_genes | r }}
28
31
  rest <- {{ envs.rest | r: todot="-" }}
29
32
  cases <- {{ envs.cases | r: todot="-" }}
30
33
 
34
+ if (is.character(volcano_genes) && length(volcano_genes) == 1) {
35
+ volcano_genes <- trimws(strsplit(volcano_genes, ",")[[1]])
36
+ }
37
+
31
38
  set.seed(8525)
32
39
  if (ncores > 1) {
33
40
  options(future.globals.maxSize = 80000 * 1024^2)
@@ -55,6 +62,7 @@ if (is.null(cases) || length(cases) == 0) {
55
62
  section = section,
56
63
  dbs = dbs,
57
64
  sigmarkers = sigmarkers,
65
+ volcano_genes = volcano_genes,
58
66
  rest = rest
59
67
  )
60
68
  )
@@ -70,6 +78,7 @@ if (is.null(cases) || length(cases) == 0) {
70
78
  section = section,
71
79
  dbs = dbs,
72
80
  sigmarkers = sigmarkers,
81
+ volcano_genes = volcano_genes,
73
82
  rest = rest
74
83
  )
75
84
  case$rest <- list_setdefault(case$rest, rest)
@@ -153,13 +162,59 @@ for (name in names(cases)) {
153
162
  }
154
163
  cases <- newcases
155
164
 
165
+ plot_volcano = function(markers, volfile, sig, volgenes) {
166
+ # markers
167
+ # gene p_val avg_log2FC pct.1 pct.2 p_val_adj
168
+ # 1 CCL5 1.883596e-11 -4.8282535 0.359 0.927 4.332270e-09
169
+ # 2 HLA-DQB1 3.667713e-09 6.1543174 0.718 0.098 8.435740e-07
170
+ # 3 HLA-DRB5 1.242993e-07 3.9032231 0.744 0.195 2.858885e-05
171
+ # 4 CD79B 2.036731e-07 4.2748835 0.692 0.146 4.684482e-05
172
+ markers = markers %>%
173
+ mutate(
174
+ Significant = if_else(
175
+ !!parse_expr(sig),
176
+ if_else(avg_log2FC > 0, "Up", "Down"),
177
+ "No"
178
+ ),
179
+ Label = if_else(
180
+ Significant != "No" & (isTRUE(volgenes) | (gene %in% volgenes)),
181
+ gene,
182
+ ""
183
+ )
184
+ )
185
+
186
+ p_vol = ggplot(markers, aes(x = avg_log2FC, y = -log10(p_val_adj))) +
187
+ geom_point(aes(color = Significant), alpha = 0.75) +
188
+ scale_color_manual(
189
+ values = c(Up = "#FF3333", Down = "#3333FF", No = "#AAAAAA"),
190
+ labels = c(Up = "Up", Down = "Down", No = "Non-Significant")
191
+ ) +
192
+ geom_text_repel(
193
+ aes(label = Label),
194
+ size = 3,
195
+ color = "#000000",
196
+ box.padding = unit(0.35, "lines"),
197
+ point.padding = unit(0.5, "lines"),
198
+ segment.color = "#000000"
199
+ ) +
200
+ theme_prism() +
201
+ theme(legend.title=element_blank()) +
202
+ labs(
203
+ x = "log2 Fold Change",
204
+ y = "-log10 Adjusted P-value"
205
+ )
206
+
207
+ png(volfile, res = 100, height = 800, width = 900)
208
+ print(p_vol)
209
+ dev.off()
210
+ }
156
211
 
157
212
  # Do enrichment analysis for a case using Enrichr
158
213
  # Args:
159
214
  # case: case name
160
215
  # markers: markers dataframe
161
216
  # sig: The expression to filter significant markers
162
- do_enrich <- function(case, markers, sig) {
217
+ do_enrich <- function(case, markers, sig, volgenes) {
163
218
  print(paste(" Running enrichment for case:", case))
164
219
  parts <- strsplit(case, ":")[[1]]
165
220
  sec <- parts[1]
@@ -171,6 +226,7 @@ do_enrich <- function(case, markers, sig) {
171
226
  cat("No markers found.", file = file.path(casedir, "error.txt"))
172
227
  return()
173
228
  }
229
+ plot_volcano(markers, file.path(casedir, "volcano.png"), sig, volgenes)
174
230
  markers_sig <- markers %>% filter(!!parse_expr(sig))
175
231
  if (nrow(markers_sig) == 0) {
176
232
  print(paste(" No significant markers found for case:", case))
@@ -255,7 +311,7 @@ do_case <- function(casename) {
255
311
  args$object <- srtobj
256
312
  }
257
313
  markers <- do_call(FindMarkers, args) %>% rownames_to_column("gene")
258
- do_enrich(casename, markers, case$sigmarkers)
314
+ do_enrich(casename, markers, case$sigmarkers, case$volcano_genes)
259
315
  }
260
316
 
261
317
  sapply(sort(names(cases)), do_case)
@@ -0,0 +1,40 @@
1
+ # Loaded variables: srtfile, outdir, srtobj
2
+
3
+ dimplots_defaults = {{envs.dimplots_defaults | r: todot="-"}}
4
+ dimplots = {{envs.dimplots | r: todot="-", skip=1}}
5
+
6
+ odir = file.path(outdir, "dimplots")
7
+ dir.create(odir, recursive=TRUE, showWarnings=FALSE)
8
+ report_toc_file = file.path(odir, "report_toc.json")
9
+ # Realname => file
10
+ report_toc = list()
11
+
12
+ do_one_dimplot = function(name) {
13
+ print(paste0("Doing dimplots for: ", name))
14
+
15
+ case = list_update(dimplots_defaults, dimplots[[name]])
16
+ case$devpars = list_update(dimplots_defaults$devpars, dimplots[[name]]$devpars)
17
+ case$object = srtobj
18
+ if (is.null(case$cols)) {
19
+ case$cols = pal_ucscgb()(26)
20
+ }
21
+
22
+ excluded_args = c("devpars", "ident")
23
+ for (arg in excluded_args) {
24
+ assign(arg, case[[arg]])
25
+ case[[arg]] = NULL
26
+ }
27
+
28
+ if (case$reduction %in% c("dim", "auto")) {
29
+ case$reduction = NULL
30
+ }
31
+ report_toc[[name]] <<- paste0(slugify(name), ".dim.png")
32
+ figfile = file.path(odir, report_toc[[name]])
33
+ png(figfile, width=devpars$width, height=devpars$height, res=devpars$res)
34
+ p = do_call(DimPlot, case)
35
+ print(p)
36
+ dev.off()
37
+ }
38
+
39
+ sapply(names(dimplots), do_one_dimplot)
40
+ .save_toc()
@@ -0,0 +1,236 @@
1
+ # Loaded variables: srtfile, outdir, srtobj
2
+
3
+ features_defaults = {{envs.features_defaults | r: todot="-"}}
4
+ features = {{envs.features | r: todot="-", skip=1}}
5
+
6
+ odir = file.path(outdir, "features")
7
+ dir.create(odir, recursive=TRUE, showWarnings=FALSE)
8
+ report_toc_file = file.path(odir, "report_toc.json")
9
+ # Section => list(
10
+ # list(name?, kind, file),
11
+ # ...
12
+ # )
13
+ report_toc = list()
14
+
15
+ .add_toc = function(section, toc) {
16
+ if (section %in% names(report_toc)) {
17
+ report_toc[[section]][length(report_toc[[section]])] <<- toc
18
+ } else {
19
+ report_toc[[section]] <<- list(toc)
20
+ }
21
+ }
22
+
23
+ .get_features = function(features) {
24
+ if (is.null(features)) { features = 20 }
25
+ if (is.numeric(features)) {
26
+ return (VariableFeatures(srtobj)[1:features])
27
+ }
28
+ if (is.character(features) && length(features) > 1) {
29
+ return (features)
30
+ }
31
+ if (is.character(features) && startsWith(features, "file://")) {
32
+ return (read.table(
33
+ substring(features, 8),
34
+ header = FALSE,
35
+ row.names = NULL,
36
+ check.names = FALSE
37
+ )$V1)
38
+ }
39
+
40
+ if (is.null(features)) {
41
+ if (is.null(default_features)) {
42
+ return (default[1:20])
43
+ } else {
44
+ return (default_features)
45
+ }
46
+ }
47
+
48
+ return (trimws(unlist(strsplit(features, ","))))
49
+ }
50
+
51
+ do_one_features = function(name) {
52
+ print(paste0("Doing features for: ", name))
53
+
54
+ case = list_update(features_defaults, features[[name]])
55
+ toc = list()
56
+ if (!is.null(case$section)) { toc$name = name }
57
+ case$devpars = list_update(features_defaults$devpars, features[[name]]$devpars)
58
+ excluded_args = c(
59
+ "section",
60
+ "devpars",
61
+ "subset",
62
+ "plus",
63
+ "ident",
64
+ "kind"
65
+ )
66
+
67
+ fn = NULL
68
+ default_devpars = NULL
69
+ if ("ridge" %in% case$kind) {
70
+ case$kind = "ridge"
71
+ if (is.null(case$cols)) {
72
+ case$cols = pal_ucscgb(alpha = .8)(26)
73
+ }
74
+ excluded_args = c(excluded_args, "split.by")
75
+ fn = RidgePlot
76
+ default_devpars = function(features, ncol, uidents) {
77
+ if (is.null(ncol)) { ncol = 1 }
78
+ list(
79
+ width = 400 * ncol,
80
+ height = ceiling(length(features) / ncol) * ifelse(length(uidents) < 10, 300, 400),
81
+ res = 100
82
+ )
83
+ }
84
+ } else if ("vln" %in% case$kind || "violin" %in% case$kind) {
85
+ case$kind = "violin"
86
+ if (is.null(case$cols)) {
87
+ case$cols = pal_ucscgb(alpha = .8)(26)
88
+ }
89
+ fn = VlnPlot
90
+ default_devpars = function(features, ncol, uidents) {
91
+ if (is.null(ncol)) { ncol = 1 }
92
+ list(
93
+ width = 400 * ncol,
94
+ height = ceiling(length(features) / ncol) * 400,
95
+ res = 100
96
+ )
97
+ }
98
+ } else if ("feature" %in% case$kind) {
99
+ case$kind = "feature"
100
+ if (is.null(case$cols)) {
101
+ case$cols = c("lightgrey", pal_ucscgb()(1))
102
+ }
103
+ excluded_args = c(excluded_args, "group.by", "assay")
104
+ case$shape.by = case$group.by
105
+ fn = FeaturePlot
106
+ default_devpars = function(features, ncol, uidents) {
107
+ if (is.null(ncol)) { ncol = 1 }
108
+ list(
109
+ width = 400 * ncol,
110
+ height = ceiling(length(features) / ncol) * 300,
111
+ res = 100
112
+ )
113
+ }
114
+ } else if ("dot" %in% case$kind) {
115
+ case$kind = "dot"
116
+ if (is.null(case$cols)) {
117
+ case$cols = c("lightgrey", pal_ucscgb()(1))
118
+ }
119
+ if (is.null(case$plus)) {
120
+ case$plus = 'theme_prism(axis_text_angle=90)'
121
+ }
122
+ excluded_args = c(excluded_args, "slot", "ncol")
123
+ fn = DotPlot
124
+ default_devpars = function(features, ncol, uidents) {
125
+ list(
126
+ height = max(length(uidents) * 80 + 150, 420),
127
+ width = length(features) * 50 + 150,
128
+ res = 100
129
+ )
130
+ }
131
+ } else if ("heatmap" %in% case$kind) {
132
+ case$kind = "heatmap"
133
+ case = list_update(
134
+ list(
135
+ group.colors = pal_ucscgb(alpha = .8)(26),
136
+ size = 3.5,
137
+ group.bar.height = 0.01
138
+ ),
139
+ case
140
+ )
141
+ if (is.null(case$plus)) {
142
+ case$plus = 'scale_fill_gradientn(colors = c("lightgrey", pal_ucscgb()(1)), na.value = "white")'
143
+ }
144
+ excluded_args = c(excluded_args, "group.by", "split.by", "downsample", "ncol")
145
+ fn = DoHeatmap
146
+ default_devpars = function(features, ncol, uidents) {
147
+ list(
148
+ width = length(uidents) * 60 + 150,
149
+ height = length(features) * 40 + 150,
150
+ res = 100
151
+ )
152
+ }
153
+ } else if ("table" %in% case$kind) {
154
+ case$kind = "table"
155
+ excluded_args = c(excluded_args, "group.by", "split.by", "assay")
156
+ case$assays = case$assay
157
+ fn = AverageExpression
158
+ if (is.null(case$slot)) {
159
+ case$slot = "data"
160
+ }
161
+ } else {
162
+ stop("Unknown kind of plot")
163
+ }
164
+
165
+ for (arg in excluded_args) {
166
+ assign(arg, case[[arg]])
167
+ case[[arg]] = NULL
168
+ }
169
+
170
+ if (is.character(subset)) {
171
+ case$object = srtobj %>% tidyseurat::filter(!!rlang::parse_expr(subset))
172
+ } else {
173
+ case$object = srtobj
174
+ }
175
+ if (!is.null(ident)) {
176
+ Idents(case$object) = ident
177
+ }
178
+ case$features = .get_features(case$features)
179
+ if (!is.null(case$ncol)) {
180
+ case$ncol = min(case$ncol, length(case$features))
181
+ }
182
+
183
+ toc$kind = kind
184
+ if (kind == "table") {
185
+ expr = do_call(fn, case)$RNA %>%
186
+ as.data.frame() %>%
187
+ rownames_to_column("Feature") %>%
188
+ select(Feature, everything())
189
+
190
+ toc$file = paste0(slugify(name), ".txt")
191
+ write.table(expr, file.path(odir, toc$file), sep="\t", quote=FALSE, row.names=FALSE)
192
+ } else {
193
+ devpars = list_update(
194
+ default_devpars(case$features, case$ncol, unique(Idents(case$object))),
195
+ devpars
196
+ )
197
+ if (kind == "heatmap") {
198
+ if (!exists("downsample") || is.null(downsample)) {
199
+ downsample = "average"
200
+ }
201
+ if (downsample %in% c("average", "mean")) {
202
+ case$object = AverageExpression(case$object, return.seurat = TRUE)
203
+ } else if (is.integer(downsample)) {
204
+ case$object = subset(case$object, downsample = downsample)
205
+ } else {
206
+ stop("Unknown downsample method.")
207
+ }
208
+ }
209
+ p = do_call(fn, case)
210
+ if (!is.null(plus)) {
211
+ for (pls in plus) {
212
+ p = p + eval(parse(text = pls))
213
+ }
214
+ }
215
+ figfile = file.path(odir, paste0(slugify(name), ".", kind, ".png"))
216
+ toc$file = basename(figfile)
217
+ png(figfile, width=devpars$width, height=devpars$height, res=devpars$res)
218
+ tryCatch({
219
+ print(p)
220
+ }, error = function(e) {
221
+ stop(
222
+ paste(
223
+ paste(names(devpars), collapse=" "),
224
+ paste(devpars, collapse=" "),
225
+ e,
226
+ sep = "\n"
227
+ )
228
+ )
229
+ })
230
+ dev.off()
231
+ }
232
+ .add_toc(if (is.null(section)) name else section, toc)
233
+ }
234
+
235
+ sapply(names(features), do_one_features)
236
+ .save_toc()
@@ -0,0 +1,105 @@
1
+ # Loaded variables: srtfile, outdir, srtobj
2
+
3
+ stats_defaults = {{envs.stats_defaults | r: todot="-"}}
4
+ stats = {{envs.stats | r: todot="-", skip=1}}
5
+
6
+ odir = file.path(outdir, "stats")
7
+ dir.create(odir, recursive=TRUE, showWarnings=FALSE)
8
+ report_toc_file = file.path(odir, "report_toc.json")
9
+ # Realname => {bar: ..., pie: ..., table: ...}
10
+ report_toc = list()
11
+
12
+ .add_toc = function(name, toc) {
13
+ report_toc[[name]] <<- toc
14
+ }
15
+
16
+ .save_toc = function() {
17
+ writeLines(toJSON(report_toc, pretty = TRUE, auto_unbox = TRUE), report_toc_file)
18
+ }
19
+
20
+ do_one_stats = function(name) {
21
+ print(paste0("Doing stats for: ", name))
22
+
23
+ toc = list()
24
+
25
+ case = list_update(stats_defaults, stats[[name]])
26
+ case$devpars = list_update(stats_defaults$devpars, case$devpars)
27
+ if (isTRUE(case$pie) && !is.null(case$group.by)) {
28
+ stop("pie charts are not supported for group-by")
29
+ }
30
+
31
+ figfile = file.path(odir, paste0(slugify(name), ".bar.png"))
32
+ piefile = file.path(odir, paste0(slugify(name), ".pie.png"))
33
+ tablefile = file.path(odir, paste0(slugify(name), ".txt"))
34
+
35
+ df_cells = srtobj@meta.data
36
+ if (!is.null(case$subset)) {
37
+ df_cells = df_cells %>% filter(!!rlang::parse_expr(case$subset))
38
+ }
39
+
40
+ select_cols = c(case$ident, case$group.by, case$split.by)
41
+ df_cells = df_cells %>%
42
+ select(all_of(select_cols)) %>%
43
+ group_by(!!!syms(select_cols)) %>%
44
+ summarise(.n = n(), .groups = "drop") %>%
45
+ mutate(.frac = .n / sum(.n))
46
+
47
+ if (isTRUE(case$table)) {
48
+ toc$table = basename(tablefile)
49
+ write.table(df_cells, tablefile, sep="\t", quote=FALSE, row.names=FALSE)
50
+ }
51
+ if (isTRUE(case$pie)) {
52
+ p_pie = df_cells %>%
53
+ arrange(!!sym(case$ident)) %>%
54
+ ggplot(aes(x="", y=.n, fill=rev(!!sym(case$ident)))) +
55
+ geom_bar(stat="identity", width=1, alpha=.8) +
56
+ coord_polar("y", start=0) +
57
+ guides(fill = guide_legend(reverse = TRUE, title = NULL)) +
58
+ theme_void() +
59
+ geom_label_repel(
60
+ if (isTRUE(case$frac)) aes(label=sprintf("%.1f%%", .frac * 100)) else aes(label=.n),
61
+ position = position_stack(vjust = 0.5),
62
+ color="#333333",
63
+ fill="#EEEEEE",
64
+ size=5
65
+ ) +
66
+ scale_fill_ucscgb(alpha=.8)
67
+
68
+ if (!is.null(case$split.by)) {
69
+ p_pie = p_pie + facet_wrap(case$split.by)
70
+ }
71
+
72
+ toc$pie = basename(piefile)
73
+ png(piefile, width=case$devpars$width, height=case$devpars$height, res=case$devpars$res)
74
+ print(p_pie)
75
+ dev.off()
76
+ }
77
+
78
+ ngroups = ifelse(is.null(case$group.by), 1, length(unique(df_cells[[case$group.by]])))
79
+ nidents = length(unique(df_cells[[case$ident]]))
80
+ bar_position = ifelse(ngroups > 5, "stack", "dodge")
81
+ p = df_cells %>%
82
+ ggplot(aes(
83
+ x=!!sym(case$ident),
84
+ y=if (isTRUE(case$frac)) .frac else .n,
85
+ fill=!!sym(ifelse(is.null(case$group.by), case$ident, case$group.by))
86
+ )) +
87
+ geom_bar(stat="identity", position=bar_position, alpha=.8) +
88
+ theme_prism(axis_text_angle = 90) +
89
+ scale_fill_manual(values=rep(pal_ucscgb(alpha=.8)(26), 10)[1:max(ngroups, nidents)]) +
90
+ ylab(ifelse(isTRUE(case$frac), "Fraction of cells", "Number of cells"))
91
+
92
+ if (!is.null(case$split.by)) {
93
+ p = p + facet_wrap(case$split.by)
94
+ }
95
+
96
+ toc$bar = basename(figfile)
97
+ png(figfile, width=case$devpars$width, height=case$devpars$height, res=case$devpars$res)
98
+ print(p)
99
+ dev.off()
100
+
101
+ .add_toc(name, toc)
102
+ }
103
+
104
+ sapply(names(stats), do_one_stats)
105
+ .save_toc()