biopipen 0.21.2__py3-none-any.whl → 0.22.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (62) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/filters.py +142 -0
  3. biopipen/ns/scrna.py +19 -1
  4. biopipen/ns/tcr.py +30 -10
  5. biopipen/reports/delim/SampleInfo.svelte +2 -22
  6. biopipen/reports/scrna/CellsDistribution.svelte +4 -39
  7. biopipen/reports/scrna/MarkersFinder.svelte +6 -126
  8. biopipen/reports/scrna/MetaMarkers.svelte +3 -75
  9. biopipen/reports/scrna/RadarPlots.svelte +4 -20
  10. biopipen/reports/scrna/ScFGSEA.svelte +4 -23
  11. biopipen/reports/scrna/SeuratClusterStats.svelte +3 -69
  12. biopipen/reports/scrna/SeuratPreparing.svelte +3 -26
  13. biopipen/reports/scrna/TopExpressingGenes.svelte +3 -41
  14. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +17 -16
  15. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +13 -16
  16. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +44 -52
  17. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +2 -8
  18. biopipen/reports/tcr/CDR3AAPhyschem.svelte +18 -65
  19. biopipen/reports/tcr/CloneResidency.svelte +3 -93
  20. biopipen/reports/tcr/Immunarch.svelte +4 -168
  21. biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
  22. biopipen/reports/tcr/TESSA.svelte +11 -28
  23. biopipen/scripts/delim/SampleInfo.R +41 -7
  24. biopipen/scripts/scrna/CellsDistribution.R +127 -16
  25. biopipen/scripts/scrna/MarkersFinder.R +245 -100
  26. biopipen/scripts/scrna/MetaMarkers.R +163 -82
  27. biopipen/scripts/scrna/RadarPlots.R +163 -110
  28. biopipen/scripts/scrna/ScFGSEA.R +51 -11
  29. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +26 -13
  30. biopipen/scripts/scrna/SeuratClusterStats-features.R +58 -53
  31. biopipen/scripts/scrna/SeuratClusterStats-stats.R +39 -21
  32. biopipen/scripts/scrna/SeuratClusterStats.R +4 -2
  33. biopipen/scripts/scrna/SeuratClustering.R +73 -26
  34. biopipen/scripts/scrna/SeuratMetadataMutater.R +13 -1
  35. biopipen/scripts/scrna/SeuratPreparing.R +93 -19
  36. biopipen/scripts/scrna/TopExpressingGenes.R +100 -18
  37. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +21 -8
  38. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +25 -3
  39. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +1 -0
  40. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +14 -3
  41. biopipen/scripts/tcr/Attach2Seurat.R +2 -1
  42. biopipen/scripts/tcr/CDR3AAPhyschem.R +123 -10
  43. biopipen/scripts/tcr/CloneResidency.R +114 -34
  44. biopipen/scripts/tcr/Immunarch-basic.R +18 -4
  45. biopipen/scripts/tcr/Immunarch-clonality.R +14 -0
  46. biopipen/scripts/tcr/Immunarch-diversity.R +123 -18
  47. biopipen/scripts/tcr/Immunarch-geneusage.R +23 -1
  48. biopipen/scripts/tcr/Immunarch-kmer.R +45 -3
  49. biopipen/scripts/tcr/Immunarch-overlap.R +62 -0
  50. biopipen/scripts/tcr/Immunarch-spectratyping.R +18 -2
  51. biopipen/scripts/tcr/Immunarch-tracking.R +24 -1
  52. biopipen/scripts/tcr/Immunarch-vjjunc.R +17 -2
  53. biopipen/scripts/tcr/Immunarch.R +7 -0
  54. biopipen/scripts/tcr/ImmunarchLoading.R +22 -23
  55. biopipen/scripts/tcr/TCRClusterStats.R +124 -11
  56. biopipen/scripts/tcr/TCRClustering.R +8 -9
  57. biopipen/scripts/tcr/TESSA.R +66 -41
  58. biopipen/utils/misc.R +96 -1
  59. {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/METADATA +1 -1
  60. {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/RECORD +62 -62
  61. {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/WHEEL +0 -0
  62. {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/entry_points.txt +0 -0
@@ -12,16 +12,19 @@ library(ggplot2)
12
12
  library(ggprism)
13
13
  library(parallel)
14
14
  library(tidyseurat)
15
+ library(slugify)
15
16
 
16
17
  setEnrichrSite("Enrichr")
17
18
 
18
19
  srtfile <- {{ in.srtobj | quote }}
19
20
  outdir <- {{ out.outdir | quote }}
21
+ joboutdir <- {{ job.outdir | quote }}
20
22
  ncores <- {{ envs.ncores | int }}
21
23
  mutaters <- {{ envs.mutaters | r }}
22
24
  idents <- {{ envs.idents | r }}
23
25
  group_by <- {{ envs["group-by"] | r }}
24
26
  each <- {{ envs.each | r }}
27
+ subset <- {{ envs.subset | r }}
25
28
  prefix_each <- {{ envs.prefix_each | r }}
26
29
  p_adjust <- {{ envs.p_adjust | r }}
27
30
  section <- {{ envs.section | r }}
@@ -32,15 +35,15 @@ cases <- {{ envs.cases | r: todot = "-" }}
32
35
 
33
36
  set.seed(8525)
34
37
 
35
- print("- Reading Seurat object ...")
38
+ log_info("- Reading Seurat object ...")
36
39
  srtobj <- readRDS(srtfile)
37
40
 
38
- print("- Mutate meta data if needed ...")
41
+ log_info("- Mutate meta data if needed ...")
39
42
  if (!is.null(mutaters) && length(mutaters)) {
40
43
  srtobj@meta.data <- srtobj@meta.data %>% mutate(!!!lapply(mutaters, parse_expr))
41
44
  }
42
45
 
43
- print("- Expanding cases ...")
46
+ log_info("- Expanding cases ...")
44
47
  if (is.null(cases) || length(cases) == 0) {
45
48
  cases <- list(
46
49
  DEFAULT = list(
@@ -49,6 +52,7 @@ if (is.null(cases) || length(cases) == 0) {
49
52
  each = each,
50
53
  prefix_each = prefix_each,
51
54
  p_adjust = p_adjust,
55
+ subset = subset,
52
56
  section = section,
53
57
  dbs = dbs,
54
58
  sigmarkers = sigmarkers,
@@ -65,6 +69,7 @@ if (is.null(cases) || length(cases) == 0) {
65
69
  prefix_each = prefix_each,
66
70
  p_adjust = p_adjust,
67
71
  section = section,
72
+ subset = subset,
68
73
  dbs = dbs,
69
74
  sigmarkers = sigmarkers,
70
75
  method = method
@@ -74,12 +79,19 @@ if (is.null(cases) || length(cases) == 0) {
74
79
  }
75
80
 
76
81
  newcases <- list()
82
+ sections <- c()
77
83
  for (name in names(cases)) {
78
84
  case <- cases[[name]]
79
85
  if (is.null(case$each)) {
86
+ sections <- c(sections, case$section)
80
87
  newcases[[paste0(case$section, ":", name)]] <- case
81
88
  } else {
82
- eachs <- srtobj@meta.data %>% pull(case$each) %>% unique() %>% na.omit()
89
+ if (is.null(case$subset)) {
90
+ eachs <- srtobj@meta.data %>% pull(case$each) %>% unique() %>% na.omit()
91
+ } else {
92
+ eachs <- srtobj@meta.data %>% filter(!!parse_expr(case$subset)) %>% pull(case$each) %>% unique() %>% na.omit()
93
+ }
94
+ sections <- c(sections, case$each)
83
95
  for (each in eachs) {
84
96
  by = make.names(paste0(".", name, "_", case$each, "_", each))
85
97
  idents <- case$idents
@@ -111,98 +123,98 @@ for (name in names(cases)) {
111
123
  }
112
124
  }
113
125
  cases <- newcases
126
+ single_section <- length(unique(sections)) == 1
127
+
128
+ casename_info <- function(casename, create = FALSE) {
129
+ sec_case_names <- strsplit(casename, ":")[[1]]
130
+ cname <- paste(sec_case_names[-1], collapse = ":")
114
131
 
132
+ out <- list(
133
+ casename = casename,
134
+ section = sec_case_names[1],
135
+ case = cname,
136
+ section_slug = slugify(sec_case_names[1], tolower = FALSE),
137
+ case_slug = slugify(cname, tolower = FALSE)
138
+ )
139
+ out$casedir <- file.path(outdir, out$section_slug, out$case_slug)
140
+ if (create) {
141
+ dir.create(out$casedir, showWarnings = FALSE, recursive = TRUE)
142
+ }
143
+ out
144
+ }
115
145
 
116
146
  # Do enrichment analysis for a case using Enrichr
117
147
  # Args:
118
148
  # case: case name
119
149
  # markers: markers dataframe
120
150
  # sig: The expression to filter significant markers
121
- do_enrich <- function(case, markers, sig) {
122
- print(paste(" Running enrichment for case:", case))
123
- parts <- strsplit(case, ":")[[1]]
124
- sec <- parts[1]
125
- case <- paste0(parts[-1], collapse = ":")
126
- casedir <- file.path(outdir, sec, case)
127
- dir.create(casedir, showWarnings = FALSE, recursive = TRUE)
151
+ do_enrich <- function(info, markers, sig) {
152
+ log_info(" Running enrichment for case: {info$casename}")
128
153
  if (nrow(markers) == 0) {
129
- print(paste(" No markers found for case:", case))
130
- cat("No markers found.", file = file.path(casedir, "error.txt"))
131
- return()
154
+ msg <- paste0("No markers found for case: ", info$casename)
155
+ log_warn(" {msg}")
156
+ return(msg)
132
157
  }
133
158
  markers_sig <- markers %>% filter(!!parse_expr(sig))
134
159
  if (nrow(markers_sig) == 0) {
135
- print(paste(" No significant markers found for case:", case))
136
- cat("No significant markers.", file = file.path(casedir, "error.txt"))
137
- return()
160
+ msg <- paste0("No significant markers found for case: ", info$casename)
161
+ log_warn(" {msg}")
162
+ return(msg)
138
163
  }
139
164
  write.table(
140
165
  markers_sig,
141
- file.path(casedir, "markers.txt"),
166
+ file.path(info$casedir, "markers.txt"),
142
167
  sep = "\t",
143
168
  row.names = FALSE,
144
169
  col.names = TRUE,
145
170
  quote = FALSE
146
171
  )
172
+
147
173
  if (nrow(markers_sig) < 5) {
148
- for (db in dbs) {
149
- write.table(
150
- data.frame(Warning = "Not enough significant markers."),
151
- file.path(casedir, paste0("Enrichr-", db, ".txt")),
152
- sep = "\t",
153
- row.names = FALSE,
154
- col.names = TRUE,
155
- quote = FALSE
156
- )
157
- png(
158
- file.path(casedir, paste0("Enrichr-", db, ".png")),
159
- res = 100, height = 200, width = 1000
160
- )
161
- print(
162
- ggplot() +
163
- annotate(
164
- "text",
165
- x = 1,
166
- y = 1,
167
- label = "Not enough significant markers."
168
- ) +
169
- theme_classic()
170
- )
171
- dev.off()
172
- }
173
- } else {
174
- enriched <- enrichr(markers_sig$gene, dbs)
175
- for (db in dbs) {
176
- write.table(
177
- enriched[[db]],
178
- file.path(casedir, paste0("Enrichr-", db, ".txt")),
179
- sep = "\t",
180
- row.names = FALSE,
181
- col.names = TRUE,
182
- quote = FALSE
183
- )
184
- png(
185
- file.path(casedir, paste0("Enrichr-", db, ".png")),
186
- res = 100, height = 1000, width = 1000
187
- )
188
- print(plotEnrich(enriched[[db]], showTerms = 20, title = db))
189
- dev.off()
190
- }
174
+ msg <- paste0("Too few significant markers found for case: ", info$casename)
175
+ log_warn(msg)
176
+ return(msg)
177
+ }
178
+
179
+ enriched <- enrichr(markers_sig$gene, dbs)
180
+ for (db in dbs) {
181
+ write.table(
182
+ enriched[[db]],
183
+ file.path(info$casedir, paste0("Enrichr-", db, ".txt")),
184
+ sep = "\t",
185
+ row.names = FALSE,
186
+ col.names = TRUE,
187
+ quote = FALSE
188
+ )
189
+ png(
190
+ file.path(info$casedir, paste0("Enrichr-", db, ".png")),
191
+ res = 100, height = 600, width = 800
192
+ )
193
+ print(
194
+ plotEnrich(enriched[[db]], showTerms = 20, title = db) +
195
+ theme_prism()
196
+ )
197
+ dev.off()
191
198
  }
192
199
  }
193
200
 
194
201
 
195
202
  do_case <- function(casename) {
196
- cat(paste("- Dealing with case:", casename, "...\n"))
203
+ log_info("- Dealing with case: {casename} ...")
204
+ info <- casename_info(casename, create = TRUE)
197
205
  case <- cases[[casename]]
206
+
198
207
  sobj <- srtobj %>% filter(!is.na(!!sym(case$group_by)))
208
+ if (!is.null(case$subset)) {
209
+ sobj <- srtobj %>% filter(!is.na(!!sym(case$group_by)), !!parse_expr(case$subset))
210
+ }
199
211
  df <- GetAssayData(sobj, slot = "data", assay = "RNA")
200
212
  genes <- rownames(df)
201
213
  # rows: cells, cols: genes
202
214
  df <- cbind(as.data.frame(scale(Matrix::t(df))), sobj@meta.data[, case$group_by])
203
215
  colnames(df)[ncol(df)] <- "GROUP"
204
216
 
205
- cat(paste(" Running tests for case...\n"))
217
+ log_info(" Running tests for case...")
206
218
  test_result <- mclapply(genes, function(gene) {
207
219
  fm <- as.formula(paste(bQuote(gene), "~ GROUP"))
208
220
  res <- tryCatch({
@@ -230,28 +242,97 @@ do_case <- function(casename) {
230
242
  markers <- do_call(rbind, test_result)
231
243
  markers$p_adjust <- p.adjust(markers$p.value, method = case$p_adjust)
232
244
  markers <- markers %>% arrange(p_adjust)
233
- do_enrich(casename, markers, case$sigmarkers)
234
245
 
235
- print(paste(" Plotting top 10 genes ...\n"))
236
- markers <- markers %>% head(10)
237
- parts <- strsplit(casename, ":")[[1]]
238
- sec <- parts[1]
239
- casename <- paste0(parts[-1], collapse = ":")
240
- plotdir <- file.path(outdir, sec, casename, "plots")
241
- dir.create(plotdir, showWarnings = FALSE, recursive = TRUE)
246
+ msg <- do_enrich(info, markers, case$sigmarkers)
247
+ if (is.null(msg)) {
248
+ log_info(" Plotting top 10 genes ...")
249
+ markers <- markers %>% head(10)
250
+ plotdir <- file.path(info$casedir, "expr_plots")
251
+ dir.create(plotdir, showWarnings = FALSE, recursive = TRUE)
242
252
 
243
- # Plot the top 10 genes in each group with violin plots
244
- for (gene in markers$gene) {
245
- outfile = file.path(plotdir, paste0(gene, ".png"))
246
- p = ggplot(df, aes_string(x="GROUP", y=bQuote(gene), fill="GROUP")) +
247
- geom_violin(alpha = .8) +
248
- geom_boxplot(width=0.1, fill="white") +
249
- theme_prism() +
250
- ylab(paste0("Expression of ", gene))
251
- png(outfile, res = 100, height = 800, width = 1000)
252
- print(p)
253
- dev.off()
253
+ # Plot the top 10 genes in each group with violin plots
254
+ geneplots = list()
255
+ for (gene in markers$gene) {
256
+ outfile = file.path(plotdir, paste0(slugify(gene, tolower = FALSE), ".png"))
257
+ p = ggplot(df, aes_string(x="GROUP", y=bQuote(gene), fill="GROUP")) +
258
+ geom_violin(alpha = .8) +
259
+ geom_boxplot(width=0.1, fill="white") +
260
+ theme_prism() +
261
+ ylab(paste0("Expression of ", gene))
262
+ png(outfile, res = 100, height = 600, width = 800)
263
+ print(p)
264
+ dev.off()
265
+
266
+ geneplots[[length(geneplots) + 1]] <- list(
267
+ kind = "table_image",
268
+ src = outfile,
269
+ name = gene
270
+ )
271
+ }
272
+
273
+ add_report(
274
+ list(
275
+ kind = "descr",
276
+ content = paste0(
277
+ "Top 100 genes selected by ",
278
+ "<code>", case$method, "</code> across ",
279
+ "<code>", case$group_by, "</code> and filtered by ",
280
+ "<code>", html_escape(case$sigmarkers), "</code>"
281
+ )
282
+ ),
283
+ h1 = ifelse(
284
+ info$section == "DEFAULT",
285
+ info$case,
286
+ ifelse(single_section, paste0(info$section, " - ", info$case), info$section)
287
+ ),
288
+ h2 = ifelse(single_section, "Meta-Markers", info$case),
289
+ h3 = ifelse(single_section, "#", "Meta-Markers")
290
+ )
291
+ add_report(
292
+ list(
293
+ name = "Meta-Markers",
294
+ contents = list(list(
295
+ kind = "table",
296
+ src = file.path(info$casedir, "markers.txt"),
297
+ data = list(nrows = 100)
298
+ ))
299
+ ),
300
+ list(
301
+ name = "Volin Plots (Top 10)",
302
+ ui = "table_of_images:4",
303
+ contents = geneplots
304
+ ),
305
+ h1 = ifelse(
306
+ info$section == "DEFAULT",
307
+ info$case,
308
+ ifelse(single_section, paste0(info$section, " - ", info$case), info$section)
309
+ ),
310
+ h2 = ifelse(single_section, "Meta-Markers", info$case),
311
+ h3 = ifelse(single_section, "#", "Meta-Markers"),
312
+ ui = "tabs"
313
+ )
314
+ add_report(
315
+ list(kind = "enrichr", dir = info$casedir),
316
+ h1 = ifelse(
317
+ info$section == "DEFAULT",
318
+ info$case,
319
+ ifelse(single_section, paste0(info$section, " - ", info$case), info$section)
320
+ ),
321
+ h2 = ifelse(single_section, "Enrichment Analysis", info$case),
322
+ h3 = ifelse(single_section, "#", "Enrichment Analysis")
323
+ )
324
+ } else {
325
+ add_report(
326
+ list(kind = "error", content = msg),
327
+ h1 = ifelse(
328
+ info$section == "DEFAULT",
329
+ info$case,
330
+ ifelse(single_section, paste0(info$section, " - ", info$case), info$section)
331
+ ),
332
+ h2 = ifelse(single_section, "#", info$case)
333
+ )
254
334
  }
255
335
  }
256
336
 
257
337
  sapply(sort(names(cases)), do_case)
338
+ save_report(joboutdir)