biopipen 0.21.2__py3-none-any.whl → 0.22.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (62) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/filters.py +142 -0
  3. biopipen/ns/scrna.py +19 -1
  4. biopipen/ns/tcr.py +30 -10
  5. biopipen/reports/delim/SampleInfo.svelte +2 -22
  6. biopipen/reports/scrna/CellsDistribution.svelte +4 -39
  7. biopipen/reports/scrna/MarkersFinder.svelte +6 -126
  8. biopipen/reports/scrna/MetaMarkers.svelte +3 -75
  9. biopipen/reports/scrna/RadarPlots.svelte +4 -20
  10. biopipen/reports/scrna/ScFGSEA.svelte +4 -23
  11. biopipen/reports/scrna/SeuratClusterStats.svelte +3 -69
  12. biopipen/reports/scrna/SeuratPreparing.svelte +3 -26
  13. biopipen/reports/scrna/TopExpressingGenes.svelte +3 -41
  14. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +17 -16
  15. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +13 -16
  16. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +44 -52
  17. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +2 -8
  18. biopipen/reports/tcr/CDR3AAPhyschem.svelte +18 -65
  19. biopipen/reports/tcr/CloneResidency.svelte +3 -93
  20. biopipen/reports/tcr/Immunarch.svelte +4 -168
  21. biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
  22. biopipen/reports/tcr/TESSA.svelte +11 -28
  23. biopipen/scripts/delim/SampleInfo.R +41 -7
  24. biopipen/scripts/scrna/CellsDistribution.R +127 -16
  25. biopipen/scripts/scrna/MarkersFinder.R +245 -100
  26. biopipen/scripts/scrna/MetaMarkers.R +163 -82
  27. biopipen/scripts/scrna/RadarPlots.R +163 -110
  28. biopipen/scripts/scrna/ScFGSEA.R +51 -11
  29. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +26 -13
  30. biopipen/scripts/scrna/SeuratClusterStats-features.R +58 -53
  31. biopipen/scripts/scrna/SeuratClusterStats-stats.R +39 -21
  32. biopipen/scripts/scrna/SeuratClusterStats.R +4 -2
  33. biopipen/scripts/scrna/SeuratClustering.R +73 -26
  34. biopipen/scripts/scrna/SeuratMetadataMutater.R +13 -1
  35. biopipen/scripts/scrna/SeuratPreparing.R +93 -19
  36. biopipen/scripts/scrna/TopExpressingGenes.R +100 -18
  37. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +21 -8
  38. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +25 -3
  39. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +1 -0
  40. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +14 -3
  41. biopipen/scripts/tcr/Attach2Seurat.R +2 -1
  42. biopipen/scripts/tcr/CDR3AAPhyschem.R +123 -10
  43. biopipen/scripts/tcr/CloneResidency.R +114 -34
  44. biopipen/scripts/tcr/Immunarch-basic.R +18 -4
  45. biopipen/scripts/tcr/Immunarch-clonality.R +14 -0
  46. biopipen/scripts/tcr/Immunarch-diversity.R +123 -18
  47. biopipen/scripts/tcr/Immunarch-geneusage.R +23 -1
  48. biopipen/scripts/tcr/Immunarch-kmer.R +45 -3
  49. biopipen/scripts/tcr/Immunarch-overlap.R +62 -0
  50. biopipen/scripts/tcr/Immunarch-spectratyping.R +18 -2
  51. biopipen/scripts/tcr/Immunarch-tracking.R +24 -1
  52. biopipen/scripts/tcr/Immunarch-vjjunc.R +17 -2
  53. biopipen/scripts/tcr/Immunarch.R +7 -0
  54. biopipen/scripts/tcr/ImmunarchLoading.R +22 -23
  55. biopipen/scripts/tcr/TCRClusterStats.R +124 -11
  56. biopipen/scripts/tcr/TCRClustering.R +8 -9
  57. biopipen/scripts/tcr/TESSA.R +66 -41
  58. biopipen/utils/misc.R +96 -1
  59. {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/METADATA +1 -1
  60. {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/RECORD +62 -62
  61. {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/WHEEL +0 -0
  62. {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/entry_points.txt +0 -0
@@ -5,6 +5,7 @@ library(future)
5
5
  library(bracer)
6
6
  library(ggplot2)
7
7
  library(tidyseurat)
8
+ library(slugify)
8
9
 
9
10
  metafile = {{in.metafile | quote}}
10
11
  rdsfile = {{out.rdsfile | quote}}
@@ -15,6 +16,18 @@ set.seed(8525)
15
16
  options(future.globals.maxSize = 80000 * 1024^2)
16
17
  plan(strategy = "multicore", workers = envs$ncores)
17
18
 
19
+ add_report(
20
+ list(
21
+ kind = "descr",
22
+ name = "Filters applied",
23
+ content = paste0(
24
+ "<p>Cell filters: ", html_escape(envs$cell_qc), "</p>",
25
+ "<p>Gene filters: ", html_escape(envs$gene_qc), "</p>"
26
+ )
27
+ ),
28
+ h1 = "Filters and QC"
29
+ )
30
+
18
31
  metadata = read.table(
19
32
  metafile,
20
33
  header = TRUE,
@@ -57,7 +70,7 @@ rename_files = function(e, sample, path) {
57
70
  }
58
71
 
59
72
  load_sample = function(sample) {
60
- print(paste(" Loading sample:", sample, "..."))
73
+ log_info("- Loading sample: {sample} ...")
61
74
  mdata = as.data.frame(metadata)[metadata$Sample == sample, , drop=TRUE]
62
75
  path = as.character(mdata$RNAData)
63
76
  if (is.na(path) || !is.character(path) || nchar(path) == 0) {
@@ -105,10 +118,10 @@ load_sample = function(sample) {
105
118
  # Load data
106
119
  samples = as.character(metadata$Sample)
107
120
 
108
- print("- Reading samples individually ...")
121
+ log_info("Reading samples individually ...")
109
122
  obj_list = lapply(samples, load_sample)
110
123
 
111
- print("- Merging samples ...")
124
+ log_info("Merging samples ...")
112
125
  if (length(obj_list) >= 2) {
113
126
  y = c()
114
127
  for (i in 2:length(obj_list)) y = c(y, obj_list[[i]])
@@ -117,7 +130,7 @@ if (length(obj_list) >= 2) {
117
130
  sobj = obj_list[[1]]
118
131
  }
119
132
 
120
- print("- Adding metadata for QC ...")
133
+ log_info("Adding metadata for QC ...")
121
134
  sobj$percent.mt = PercentageFeatureSet(sobj, pattern = "^MT-")
122
135
  sobj$percent.ribo = PercentageFeatureSet(sobj, pattern = "^RP[SL]")
123
136
  sobj$percent.hb = PercentageFeatureSet(sobj, pattern = "^HB[^(P)]")
@@ -126,7 +139,7 @@ sobj$percent.plat = PercentageFeatureSet(sobj, pattern = "PECAM1|PF4")
126
139
  dim_df = data.frame(When = "Before_QC", nCells = ncol(sobj), nGenes = nrow(sobj))
127
140
 
128
141
  if (is.null(envs$cell_qc) || length(envs$cell_qc) == 0) {
129
- warning("No cell QC criteria is provided. All cells will be kept.", immediate. = TRUE)
142
+ log_warn("No cell QC criteria is provided. All cells will be kept.")
130
143
  envs$cell_qc = "TRUE"
131
144
  }
132
145
 
@@ -136,9 +149,21 @@ plotsdir = file.path(joboutdir, "plots")
136
149
  dir.create(plotsdir, showWarnings = FALSE)
137
150
 
138
151
  # Violin plots
139
- print("- Plotting violin plots ...")
152
+ log_info("Plotting violin plots ...")
153
+ add_report(
154
+ list(
155
+ kind = "descr",
156
+ content = paste(
157
+ "The violin plots for each feature. The cells are grouped by sample.",
158
+ "The cells that fail the QC criteria are colored in red, and",
159
+ "the cells that pass the QC criteria are colored in black.",
160
+ "The cells that fail the QC criteria are filtered out in the returned Seurat object."
161
+ )
162
+ ),
163
+ h1 = "Violin Plots"
164
+ )
140
165
  for (feat in feats) {
141
- print(paste0(" ", feat, "..."))
166
+ log_info("- For feature: {feat}")
142
167
  vln_p = VlnPlot(
143
168
  sobj,
144
169
  cols = rep("white", length(samples)),
@@ -150,20 +175,43 @@ for (feat in feats) {
150
175
  aes(color = .QC),
151
176
  data = vln_p$data,
152
177
  position = position_jitterdodge(jitter.width = 0.4, dodge.width = 0.9)
153
- ) + scale_color_manual(values = c("black", "red"), breaks = c(TRUE, FALSE))
178
+ ) + scale_color_manual(values = c("#181818", pal_biopipen()(1)), breaks = c(TRUE, FALSE))
154
179
 
180
+ vlnplot = file.path(plotsdir, paste0(slugify(feat, tolower = FALSE), ".vln.png"))
155
181
  png(
156
- file.path(plotsdir, paste0(feat, ".vln.png")),
182
+ vlnplot,
157
183
  width = 800 + length(samples) * 15, height = 600, res = 100
158
184
  )
159
185
  print(vln_p)
160
186
  dev.off()
187
+
188
+ add_report(
189
+ list(
190
+ src = vlnplot,
191
+ name = feat,
192
+ descr = paste0("Distribution of ", feat, " for each sample.")
193
+ ),
194
+ h1 = "Violin Plots",
195
+ ui = "table_of_images"
196
+ )
161
197
  }
162
198
 
163
199
  # Scatter plots against nCount_RNA
164
- print("- Plotting scatter plots ...")
200
+ log_info("Plotting scatter plots ...")
201
+ add_report(
202
+ list(
203
+ kind = "descr",
204
+ content = paste(
205
+ "The scatter plots for each feature against nCount_RNA. ",
206
+ "The cells that fail the QC criteria are colored in red, and",
207
+ "the cells that pass the QC criteria are colored in black.",
208
+ "The cells that fail the QC criteria are filtered out in the returned Seurat object."
209
+ )
210
+ ),
211
+ h1 = "Scatter Plots"
212
+ )
165
213
  for (feat in setdiff(feats, "nCount_RNA")) {
166
- print(paste0(" ", feat, "..."))
214
+ log_info("- For feature: {feat}, against nCount_RNA")
167
215
  scat_p = FeatureScatter(
168
216
  sobj,
169
217
  feature1 = "nCount_RNA",
@@ -171,22 +219,30 @@ for (feat in setdiff(feats, "nCount_RNA")) {
171
219
  group.by = ".QC"
172
220
  ) +
173
221
  NoLegend() +
174
- scale_color_manual(values = c("black", "red"), breaks = c(TRUE, FALSE))
222
+ scale_color_manual(values = c("#181818", pal_biopipen()(1)), breaks = c(TRUE, FALSE))
175
223
 
176
- png(
177
- file.path(plotsdir, paste0(feat, "-nCount_RNA.scatter.png")),
178
- width = 800, height = 600, res = 100
179
- )
224
+ scatfile = file.path(plotsdir, paste0(slugify(feat, tolower = FALSE), "-nCount_RNA.scatter.png"))
225
+ png(scatfile, width = 800, height = 600, res = 100)
180
226
  print(scat_p)
181
227
  dev.off()
228
+
229
+ add_report(
230
+ list(
231
+ src = scatfile,
232
+ name = paste0(feat, " vs nCount_RNA"),
233
+ descr = paste0("Scatter plot for ", feat, " against nCount_RNA")
234
+ ),
235
+ h1 = "Scatter Plots",
236
+ ui = "table_of_images"
237
+ )
182
238
  }
183
239
 
184
240
  # Do the filtering
185
- print("- Filtering cells ...")
241
+ log_info("Filtering cells using QC criteria ...")
186
242
  sobj = sobj %>% filter(.QC)
187
243
  sobj$.QC = NULL
188
244
 
189
- print("- Filtering genes ...")
245
+ log_info("Filtering genes ...")
190
246
  if (is.list(envs$gene_qc)) {
191
247
  if ("min_cells" %in% names(envs$gene_qc)) {
192
248
  genes = rownames(sobj)[Matrix::rowSums(sobj) >= envs$gene_qc$min_cells]
@@ -202,8 +258,26 @@ dim_df = rbind(
202
258
  )
203
259
  )
204
260
 
261
+ log_info("Saving dimension table ...")
205
262
  write.table(dim_df, file = file.path(plotsdir, "dim.txt"),
206
263
  row.names = FALSE, quote = FALSE, sep = "\t")
207
264
 
208
- print("- Saving results ...")
265
+ add_report(
266
+ list(
267
+ kind = "descr",
268
+ content = paste(
269
+ "The dimension table for the Seurat object. The table contains the number of cells and genes before and after QC."
270
+ )
271
+ ),
272
+ list(
273
+ kind = "table",
274
+ data = list(path = file.path(plotsdir, "dim.txt"))
275
+ ),
276
+ h1 = "Filters and QC"
277
+ )
278
+
279
+
280
+ log_info("Saving filtered seurat object ...")
209
281
  saveRDS(sobj, rdsfile)
282
+
283
+ save_report(joboutdir)
@@ -5,11 +5,14 @@ library(tibble)
5
5
  library(enrichR)
6
6
  library(rlang)
7
7
  library(dplyr)
8
+ library(slugify)
9
+ library(ggprism)
8
10
 
9
11
  setEnrichrSite("Enrichr")
10
12
 
11
13
  srtfile <- {{in.srtobj | r}}
12
14
  outdir <- {{out.outdir | r}}
15
+ joboutdir <- {{job.outdir | r}}
13
16
  mutaters <- {{ envs.mutaters | r }}
14
17
  ident <- {{ envs.ident | r }}
15
18
  group.by <- {{ envs["group-by"] | r }} # nolint
@@ -22,16 +25,16 @@ cases <- {{ envs.cases | r: todot = "-" }} # nolint
22
25
 
23
26
  set.seed(8525)
24
27
 
25
- print("- Loading Seurat object ...")
28
+ log_info("Loading Seurat object ...")
26
29
  srtobj <- readRDS(srtfile)
27
30
 
28
- print("- Mutate meta data if needed ...")
31
+ log_info("Mutate meta data if needed ...")
29
32
  if (!is.null(mutaters) && length(mutaters)) {
30
33
  srtobj@meta.data <- srtobj@meta.data %>%
31
34
  mutate(!!!lapply(mutaters, parse_expr))
32
35
  }
33
36
 
34
- print("- Expanding cases ...")
37
+ log_info("Expanding cases ...")
35
38
  if (is.null(cases) || length(cases) == 0) {
36
39
  cases <- list(
37
40
  DEFAULT = list(
@@ -61,11 +64,14 @@ if (is.null(cases) || length(cases) == 0) {
61
64
 
62
65
  # Expand each and ident
63
66
  newcases <- list()
67
+ sections <- c()
64
68
  for (name in names(cases)) { # nolint
65
69
  case <- cases[[name]]
66
70
  if (is.null(case$each) && !is.null(case$ident)) {
71
+ sections <- c(sections, case$section)
67
72
  newcases[[paste0(case$section, ":", name)]] <- case
68
73
  } else if (is.null(case$each)) {
74
+ sections <- c(sections, name)
69
75
  idents <- srtobj@meta.data %>%
70
76
  pull(case$group.by) %>%
71
77
  unique() %>%
@@ -93,15 +99,21 @@ for (name in names(cases)) { # nolint
93
99
  na.omit()
94
100
  for (ident in idents) {
95
101
  kname <- if (name == "DEFAULT") "" else paste0("-", name)
102
+ sections <- c(sections, paste0(each, kname))
96
103
  key <- paste0(each, kname, ":", ident)
97
104
  if (case$prefix_each) {
98
- key <- paste0(case$each, "-", key)
105
+ key <- paste0(
106
+ ifelse(case$each == "seurat_clusters", "Cluster", case$each),
107
+ " - ",
108
+ key
109
+ )
99
110
  }
100
111
  newcases[[key]] <- case
101
112
  newcases[[key]]$ident <- ident
102
113
  newcases[[key]]$group.by <- by # nolint
103
114
  }
104
115
  } else {
116
+ sections <- c(sections, case$each)
105
117
  key <- paste0(case$each, ":", each)
106
118
  if (name != "DEFAULT") {
107
119
  key <- paste0(key, " - ", name)
@@ -112,11 +124,33 @@ for (name in names(cases)) { # nolint
112
124
  }
113
125
  }
114
126
  cases <- newcases
127
+ single_section <- length(unique(sections)) == 1
128
+
129
+ casename_info <- function(casename, create = FALSE) {
130
+ sec_case_names <- strsplit(casename, ":")[[1]]
131
+ cname <- paste(sec_case_names[-1], collapse = ":")
132
+
133
+ out <- list(
134
+ casename = casename,
135
+ section = sec_case_names[1],
136
+ case = cname,
137
+ section_slug = slugify(sec_case_names[1], tolower = FALSE),
138
+ case_slug = slugify(cname, tolower = FALSE)
139
+ )
140
+ out$casedir <- file.path(outdir, out$section_slug, out$case_slug)
141
+ if (create) {
142
+ dir.create(out$casedir, showWarnings = FALSE, recursive = TRUE)
143
+ }
144
+ out
145
+ }
115
146
 
116
147
  do_enrich <- function(expr, odir) {
117
- print(" Saving expressions ...")
148
+ log_info(" Saving expressions ...")
149
+ expr <- expr %>% as.data.frame()
150
+ colnames(expr) <- c("Expression")
151
+ expr <- expr %>% rownames_to_column("Gene") %>% select(Gene, Expression)
118
152
  write.table(
119
- expr %>% as.data.frame() %>% rownames_to_column("Gene"),
153
+ expr,
120
154
  file.path(odir, "expr.txt"),
121
155
  sep = "\t",
122
156
  row.names = TRUE,
@@ -124,7 +158,7 @@ do_enrich <- function(expr, odir) {
124
158
  quote = FALSE
125
159
  )
126
160
  write.table(
127
- expr %>% as.data.frame() %>% rownames_to_column("Gene") %>% head(n),
161
+ expr %>% head(n),
128
162
  file.path(odir, "exprn.txt"),
129
163
  sep = "\t",
130
164
  row.names = TRUE,
@@ -132,8 +166,8 @@ do_enrich <- function(expr, odir) {
132
166
  quote = FALSE
133
167
  )
134
168
 
135
- print(" Running enrichment ...")
136
- enriched <- enrichr(rownames(head(expr, n)), dbs) # nolint
169
+ log_info(" Running enrichment ...")
170
+ enriched <- enrichr(head(expr$Gene, n), dbs) # nolint
137
171
  for (db in dbs) {
138
172
  write.table(
139
173
  enriched[[db]],
@@ -147,29 +181,77 @@ do_enrich <- function(expr, odir) {
147
181
  file.path(odir, paste0("Enrichr-", db, ".png")),
148
182
  res = 100, height = 1000, width = 1000
149
183
  )
150
- print(plotEnrich(enriched[[db]], showTerms = 20, title = db)) # nolint
184
+ print(
185
+ plotEnrich(enriched[[db]], showTerms = 20, title = db) +
186
+ theme_prism()
187
+ )
151
188
  dev.off()
152
189
  }
153
190
  }
154
191
 
155
192
  do_case <- function(casename) {
156
- print(paste("- Running for case:", casename))
193
+ log_info("- Running for case: {casename} ...")
157
194
  case <- cases[[casename]]
158
- parts <- unlist(strsplit(casename, ":"))
159
- section <- parts[1]
160
- casename <- paste(parts[-1], collapse = ":")
195
+ info <- casename_info(casename, create = TRUE)
161
196
 
162
- print(" Calculating average expression ...")
197
+ log_info(" Calculating average expression ...")
163
198
  avgexpr <- AverageExpression(
164
199
  srtobj,
165
200
  group.by = case$group.by
166
201
  )$RNA[, case$ident, drop = FALSE]
167
202
  avgexpr <- avgexpr[order(-avgexpr), , drop = FALSE]
168
203
 
169
- odir <- file.path(outdir, section, casename)
170
- dir.create(odir, recursive = TRUE, showWarnings = FALSE)
204
+ do_enrich(avgexpr, info$casedir)
205
+
206
+ add_case_report(info)
207
+ }
171
208
 
172
- do_enrich(avgexpr, odir)
209
+ add_case_report <- function(info) {
210
+ log_info(" Adding case report ...")
211
+ h1 = ifelse(
212
+ info$section == "DEFAULT",
213
+ info$case,
214
+ ifelse(
215
+ single_section,
216
+ paste0(
217
+ ifelse(info$section == "seurat_clusters", "Cluster", info$section),
218
+ " - ",
219
+ info$case
220
+ ),
221
+ info$section
222
+ )
223
+ )
224
+ h2 = ifelse(
225
+ info$section == "DEFAULT",
226
+ "#",
227
+ ifelse(single_section, "#", info$case)
228
+ )
229
+
230
+ add_report(
231
+ list(
232
+ kind = "descr",
233
+ content = paste0("Top ", n, " expressing genes")
234
+ ),
235
+ list(
236
+ kind = "table",
237
+ src = file.path(info$casedir, "exprn.txt")
238
+ ),
239
+ h1 = h1,
240
+ h2 = ifelse(h2 == "#", "Top Expressing Genes", h2),
241
+ h3 = ifelse(h2 == "#", "#", "Top Expressing Genes")
242
+ )
243
+
244
+ add_report(
245
+ list(
246
+ kind = "descr",
247
+ content = paste0("Enrichment analysis for the top ", n, " expressing genes")
248
+ ),
249
+ list(kind = "enrichr", dir = info$casedir),
250
+ h1 = h1,
251
+ h2 = ifelse(h2 == "#", "Enrichment Analysis", h2),
252
+ h3 = ifelse(h2 == "#", "#", "Enrichment Analysis")
253
+ )
173
254
  }
174
255
 
175
256
  sapply(sort(names(cases)), do_case)
257
+ save_report(joboutdir)
@@ -1,10 +1,13 @@
1
+ source("{{biopipen_dir}}/utils/misc.R")
1
2
  source("{{biopipen_dir}}/utils/gsea.R")
2
3
 
3
4
  library(parallel)
4
5
  library(Seurat)
6
+ library(slugify)
5
7
 
6
8
  sobjfile <- {{ in.sobjfile | r }}
7
9
  outdir <- {{ out.outdir | r }}
10
+ joboutdir <- {{ job.outdir | r }}
8
11
  gmtfile <- {{ envs.gmtfile | r }}
9
12
  ncores <- {{ envs.ncores | r }}
10
13
  fgsea <- {{ envs.fgsea | r }}
@@ -37,10 +40,10 @@ pathways <- gmt_pathways(gmtfile)
37
40
  metabolics <- unique(as.vector(unname(unlist(pathways))))
38
41
  sobj <- readRDS(sobjfile)
39
42
 
40
- do_one_group <- function(obj, group, outputdir) {
41
- print(paste("- Processing group", grouping, ":", group))
43
+ do_one_group <- function(obj, group, outputdir, h1) {
44
+ log_info(paste("- Processing group", grouping, ":", group))
42
45
  groupname = paste0(grouping_prefix, group)
43
- odir = file.path(outputdir, groupname)
46
+ odir = file.path(outputdir, slugify(groupname, tolower = FALSE))
44
47
  dir.create(odir, showWarnings = FALSE)
45
48
 
46
49
  classes = as.character(obj@meta.data[[grouping]])
@@ -65,19 +68,24 @@ do_one_group <- function(obj, group, outputdir) {
65
68
  }
66
69
  }, error=function(e) {
67
70
  unlink(odir, recursive = T, force = T)
68
- warning(paste("Unable to run for", group))
69
- warning(e)
71
+ log_warn(paste("Unable to run for", group))
72
+ log_warn(e)
70
73
  })
71
74
 
75
+ add_report(
76
+ list(kind = "fgsea", dir = odir),
77
+ h1 = ifelse(is.null(h1), groupname, h1),
78
+ h2 = ifelse(is.null(h1), "#", groupname)
79
+ )
72
80
  }
73
81
 
74
82
  do_one_subset <- function(s, subset_col, subset_prefix) {
75
- print(paste("Processing subset", subset_col, ":", s))
83
+ log_info(paste("Processing subset", subset_col, ":", s))
76
84
  if (is.null(s)) {
77
85
  outputdir <- file.path(outdir, "ALL")
78
86
  subset_obj <- sobj
79
87
  } else {
80
- outputdir <- file.path(outdir, paste0(subset_prefix, s))
88
+ outputdir <- file.path(outdir, slugify(paste0(subset_prefix, s), tolower = FALSE))
81
89
  subset_code <- paste0("subset(sobj, subset = ", subset_col, "=='", s, "')")
82
90
  subset_obj <- eval(parse(text = subset_code))
83
91
  }
@@ -85,9 +93,13 @@ do_one_subset <- function(s, subset_col, subset_prefix) {
85
93
 
86
94
  subset_obj <- subset(subset_obj, features = intersect(rownames(subset_obj), metabolics))
87
95
 
96
+ h1 <- NULL
97
+ if (!is.null(s)) {
98
+ h1 <- paste0(subset_prefix, s)
99
+ }
88
100
  groups = subset_obj@meta.data[[grouping]]
89
101
  x = mclapply(as.character(unique(groups)), function(group) {
90
- do_one_group(subset_obj, group, outputdir)
102
+ do_one_group(subset_obj, group, outputdir, h1)
91
103
  }, mc.cores = ncores)
92
104
  if (any(unlist(lapply(x, class)) == "try-error")) {
93
105
  stop("mclapply error")
@@ -110,3 +122,4 @@ if (is.null(subsetting_cols)) {
110
122
  }
111
123
  }
112
124
 
125
+ save_report(joboutdir)
@@ -4,9 +4,11 @@ source("{{biopipen_dir}}/utils/gsea.R")
4
4
  library(parallel)
5
5
  library(scater)
6
6
  library(Seurat)
7
+ library(slugify)
7
8
 
8
9
  sobjfile <- {{ in.sobjfile | r }}
9
10
  outdir <- {{ out.outdir | r }}
11
+ joboutdir <- {{ job.outdir | r }}
10
12
  gmtfile <- {{ envs.gmtfile | r }}
11
13
  ncores <- {{ envs.ncores | r }}
12
14
  fgsea <- {{ envs.fgsea | r }}
@@ -47,7 +49,8 @@ do_one_comparison <- function(
47
49
  control,
48
50
  groupdir,
49
51
  subset_col,
50
- subset_prefix
52
+ subset_prefix,
53
+ groupname
51
54
  ) {
52
55
  print(paste(" Design:", compname, "(", case, ",", control, ")"))
53
56
  case_code = paste0("subset(obj, subset = ", subset_col, " == '", case, "')")
@@ -68,6 +71,11 @@ do_one_comparison <- function(
68
71
  })
69
72
  if (is.null(control_obj)) {
70
73
  print(" Skip (not enough cells in control)")
74
+ add_report(
75
+ list(kind = "error", content = "Not enough cells in control"),
76
+ h1 = groupname,
77
+ h2 = compname
78
+ )
71
79
  return (NULL)
72
80
  }
73
81
  exprs_case = GetAssayData(case_obj)
@@ -77,6 +85,11 @@ do_one_comparison <- function(
77
85
  dir.create(odir, showWarnings = FALSE)
78
86
  if (ncol(exprs_case) < 3 || ncol(exprs_control) < 3) {
79
87
  print(" Skip (not enough cells)")
88
+ add_report(
89
+ list(kind = "error", content = "Not enough cells"),
90
+ h1 = groupname,
91
+ h2 = compname
92
+ )
80
93
  return (NULL)
81
94
  }
82
95
  if (fgsea) {
@@ -95,6 +108,12 @@ do_one_comparison <- function(
95
108
  outdir = odir,
96
109
  envs = list(nproc = 1)
97
110
  )
111
+
112
+ add_report(
113
+ list(kind = "fgsea", dir = odir),
114
+ h1 = groupname,
115
+ h2 = compname
116
+ )
98
117
  } else {
99
118
  runGSEA(
100
119
  cbind(exprs_case, exprs_control),
@@ -114,7 +133,7 @@ do_one_group <- function(group) {
114
133
  )
115
134
  obj = eval(parse(text = group_code))
116
135
  groupname = paste0(grouping_prefix, group)
117
- groupdir = file.path(outdir, groupname)
136
+ groupdir = file.path(outdir, slugify(groupname, tolower = FALSE))
118
137
  dir.create(groupdir, showWarnings = FALSE)
119
138
 
120
139
  for (i in seq_along(subsetting_comparison)) {
@@ -132,7 +151,8 @@ do_one_group <- function(group) {
132
151
  sci[[compname]][2],
133
152
  groupdir,
134
153
  subsetting_cols[i],
135
- subsetting_prefix[i]
154
+ subsetting_prefix[i],
155
+ groupname
136
156
  )
137
157
  }
138
158
  )
@@ -148,3 +168,5 @@ if (ncores == 1) {
148
168
  stop("mclapply error")
149
169
  }
150
170
  }
171
+
172
+ save_report(joboutdir)
@@ -296,6 +296,7 @@ do_one_subset <- function(s, subset_col, subset_prefix) {
296
296
  size = 1,
297
297
  color = "black"
298
298
  )',
299
+ "scale_fill_biopipen()",
299
300
  "theme_prism(axis_text_angle = 90)"
300
301
  ),
301
302
  devpars = vio_devpars,
@@ -1,3 +1,4 @@
1
+ source("{{biopipen_dir}}/utils/misc.R")
1
2
  source("{{biopipen_dir}}/utils/gsea.R")
2
3
  source("{{biopipen_dir}}/utils/plot.R")
3
4
 
@@ -7,9 +8,11 @@ library(ggprism)
7
8
  library(Matrix)
8
9
  library(sparseMatrixStats)
9
10
  library(Seurat)
11
+ library(slugify)
10
12
 
11
13
  sobjfile <- {{ in.sobjfile | r }}
12
14
  outdir <- {{ out.outdir | r }}
15
+ joboutdir <- {{ job.outdir | r }}
13
16
  gmtfile <- {{ envs.gmtfile | r }}
14
17
  select_pcs <- {{ envs.select_pcs | r }}
15
18
  ncores <- {{ envs.ncores | r }}
@@ -43,12 +46,12 @@ metabolics <- unique(as.vector(unname(unlist(pathways))))
43
46
  sobj <- readRDS(sobjfile)
44
47
 
45
48
  do_one_subset <- function(s, subset_col, subset_prefix) {
46
- print(paste0(" Handling subset value: ", s, " ..."))
49
+ log_info(paste0(" Handling subset value: ", s, " ..."))
47
50
  if (is.null(s)) {
48
51
  subset_dir = file.path(outdir, "ALL")
49
52
  subset_obj = sobj
50
53
  } else {
51
- subset_dir = file.path(outdir, paste0(subset_prefix, s))
54
+ subset_dir = file.path(outdir, slugify(paste0(subset_prefix, s), tolower = FALSE))
52
55
  subset_code = paste0("subset(sobj, subset = ", subset_col, " == '", s, "')")
53
56
  subset_obj = eval(parse(text = subset_code))
54
57
  }
@@ -214,10 +217,16 @@ do_one_subset <- function(s, subset_col, subset_prefix) {
214
217
  )
215
218
 
216
219
  ggsave(file.path(subset_dir, "PC_variance_plot.pdf"), p, device = "pdf", useDingbats = FALSE)
220
+
221
+ add_report(
222
+ list(kind = "descr", content = "Metabolic pathways enriched in genes with highest contribution to the metabolic heterogeneities"),
223
+ list(kind = "image", src = bubblefile),
224
+ h1 = ifelse(is.null(s), "Metabolic pathway heterogeneity", paste0(subset_prefix, s))
225
+ )
217
226
  }
218
227
 
219
228
  do_one_subset_col <- function(subset_col, subset_prefix) {
220
- print(paste0("- Handling subset column: ", subset_col, " ..."))
229
+ log_info(paste0("- Handling subset column: ", subset_col, " ..."))
221
230
  if (is.null(subset_col)) {
222
231
  do_one_subset(NULL, subset_col = NULL, subset_prefix = NULL)
223
232
  }
@@ -240,3 +249,5 @@ if (is.null(subsetting_cols)) {
240
249
  do_one_subset_col(subsetting_cols[i], subsetting_prefix[i])
241
250
  }
242
251
  }
252
+
253
+ save_report(joboutdir)
@@ -11,6 +11,7 @@ immfile = {{in.immfile | r}}
11
11
  sobjfile = {{in.sobjfile | r}}
12
12
  outfile = {{out.outfile | r}}
13
13
  metacols = {{envs.metacols | r}}
14
+ prefix = {{envs.prefix | r}}
14
15
 
15
16
  immdata = readRDS(immfile)
16
17
  sobj = readRDS(sobjfile)
@@ -31,7 +32,7 @@ metadf = do_call(rbind, lapply(seq_len(nrow(immdata$meta)), function(i) {
31
32
 
32
33
  cldata %>%
33
34
  separate_rows(Barcode, sep=";") %>%
34
- mutate(Barcode = glue("{{envs.prefix}}{Barcode}"))
35
+ mutate(Barcode = glue(paste0(prefix, "{Barcode}")))
35
36
 
36
37
  }))
37
38