biopipen 0.21.2__py3-none-any.whl → 0.22.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (62) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/filters.py +142 -0
  3. biopipen/ns/scrna.py +19 -1
  4. biopipen/ns/tcr.py +30 -10
  5. biopipen/reports/delim/SampleInfo.svelte +2 -22
  6. biopipen/reports/scrna/CellsDistribution.svelte +4 -39
  7. biopipen/reports/scrna/MarkersFinder.svelte +6 -126
  8. biopipen/reports/scrna/MetaMarkers.svelte +3 -75
  9. biopipen/reports/scrna/RadarPlots.svelte +4 -20
  10. biopipen/reports/scrna/ScFGSEA.svelte +4 -23
  11. biopipen/reports/scrna/SeuratClusterStats.svelte +3 -69
  12. biopipen/reports/scrna/SeuratPreparing.svelte +3 -26
  13. biopipen/reports/scrna/TopExpressingGenes.svelte +3 -41
  14. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +17 -16
  15. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +13 -16
  16. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +44 -52
  17. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +2 -8
  18. biopipen/reports/tcr/CDR3AAPhyschem.svelte +18 -65
  19. biopipen/reports/tcr/CloneResidency.svelte +3 -93
  20. biopipen/reports/tcr/Immunarch.svelte +4 -168
  21. biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
  22. biopipen/reports/tcr/TESSA.svelte +11 -28
  23. biopipen/scripts/delim/SampleInfo.R +41 -7
  24. biopipen/scripts/scrna/CellsDistribution.R +127 -16
  25. biopipen/scripts/scrna/MarkersFinder.R +245 -100
  26. biopipen/scripts/scrna/MetaMarkers.R +163 -82
  27. biopipen/scripts/scrna/RadarPlots.R +163 -110
  28. biopipen/scripts/scrna/ScFGSEA.R +51 -11
  29. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +26 -13
  30. biopipen/scripts/scrna/SeuratClusterStats-features.R +58 -53
  31. biopipen/scripts/scrna/SeuratClusterStats-stats.R +39 -21
  32. biopipen/scripts/scrna/SeuratClusterStats.R +4 -2
  33. biopipen/scripts/scrna/SeuratClustering.R +73 -26
  34. biopipen/scripts/scrna/SeuratMetadataMutater.R +13 -1
  35. biopipen/scripts/scrna/SeuratPreparing.R +93 -19
  36. biopipen/scripts/scrna/TopExpressingGenes.R +100 -18
  37. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +21 -8
  38. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +25 -3
  39. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +1 -0
  40. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +14 -3
  41. biopipen/scripts/tcr/Attach2Seurat.R +2 -1
  42. biopipen/scripts/tcr/CDR3AAPhyschem.R +123 -10
  43. biopipen/scripts/tcr/CloneResidency.R +114 -34
  44. biopipen/scripts/tcr/Immunarch-basic.R +18 -4
  45. biopipen/scripts/tcr/Immunarch-clonality.R +14 -0
  46. biopipen/scripts/tcr/Immunarch-diversity.R +123 -18
  47. biopipen/scripts/tcr/Immunarch-geneusage.R +23 -1
  48. biopipen/scripts/tcr/Immunarch-kmer.R +45 -3
  49. biopipen/scripts/tcr/Immunarch-overlap.R +62 -0
  50. biopipen/scripts/tcr/Immunarch-spectratyping.R +18 -2
  51. biopipen/scripts/tcr/Immunarch-tracking.R +24 -1
  52. biopipen/scripts/tcr/Immunarch-vjjunc.R +17 -2
  53. biopipen/scripts/tcr/Immunarch.R +7 -0
  54. biopipen/scripts/tcr/ImmunarchLoading.R +22 -23
  55. biopipen/scripts/tcr/TCRClusterStats.R +124 -11
  56. biopipen/scripts/tcr/TCRClustering.R +8 -9
  57. biopipen/scripts/tcr/TESSA.R +66 -41
  58. biopipen/utils/misc.R +96 -1
  59. {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/METADATA +1 -1
  60. {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/RECORD +62 -62
  61. {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/WHEEL +0 -0
  62. {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/entry_points.txt +0 -0
@@ -14,12 +14,14 @@ library(future)
14
14
  library(tidyseurat)
15
15
  library(ggVennDiagram)
16
16
  library(UpSetR)
17
+ library(slugify)
17
18
 
18
19
  log_info("Setting up EnrichR ...")
19
20
  setEnrichrSite("Enrichr")
20
21
 
21
22
  srtfile <- {{ in.srtobj | quote }}
22
23
  outdir <- {{ out.outdir | quote }}
24
+ joboutdir <- {{ job.outdir | quote }}
23
25
  ncores <- {{ envs.ncores | int }}
24
26
  mutaters <- {{ envs.mutaters | r }}
25
27
  ident.1 <- {{ envs["ident-1"] | r }}
@@ -130,13 +132,17 @@ if (is.null(cases) || length(cases) == 0) {
130
132
  # )
131
133
  # Otherwise if section is specified, the case name will be changed to `section:case`
132
134
 
135
+ sections <- c()
136
+
133
137
  newcases <- list()
134
138
  for (name in names(cases)) {
135
139
  case <- cases[[name]]
136
140
  if (is.null(case$each) && !is.null(case$ident.1)) {
141
+ sections <- c(sections, case$section)
137
142
  newcases[[paste0(case$section, ":", name)]] <- case
138
143
  } else if (is.null(case$each)) {
139
144
  # is.null(case$ident.1)
145
+ sections <- c(sections, name)
140
146
  idents <- srtobj@meta.data %>% pull(case$group.by) %>% unique() %>% na.omit()
141
147
  for (ident in idents) {
142
148
  newcases[[paste0(name, ":", ident)]] <- case
@@ -156,16 +162,18 @@ for (name in names(cases)) {
156
162
  if (is.null(case$ident.1)) {
157
163
  idents <- srtobj@meta.data %>% pull(case$group.by) %>% unique() %>% na.omit()
158
164
  for (ident in idents) {
159
- kname <- if (name == "DEFAULT") "" else paste0("-", name)
165
+ kname <- if (name == "DEFAULT") "" else paste0(" - ", name)
166
+ sections <- c(sections, paste0(each, kname))
160
167
  key <- paste0(each, kname, ":", ident)
161
168
  if (case$prefix_each) {
162
- key <- paste0(case$each, "-", key)
169
+ key <- paste0(case$each, " - ", key)
163
170
  }
164
171
  newcases[[key]] <- case
165
172
  newcases[[key]]$ident.1 <- ident
166
173
  newcases[[key]]$group.by <- by
167
174
  }
168
175
  } else {
176
+ sections <- c(sections, case$each)
169
177
  key <- paste0(case$each, ":", each)
170
178
  if (name != "DEFAULT") {
171
179
  key <- paste0(key, " - ", name)
@@ -176,7 +184,27 @@ for (name in names(cases)) {
176
184
  }
177
185
  }
178
186
  }
187
+
179
188
  cases <- newcases
189
+ single_section <- length(unique(sections)) == 1
190
+
191
+ casename_info <- function(casename, create = FALSE) {
192
+ sec_case_names <- strsplit(casename, ":")[[1]]
193
+ cname <- paste(sec_case_names[-1], collapse = ":")
194
+
195
+ out <- list(
196
+ casename = casename,
197
+ section = sec_case_names[1],
198
+ case = cname,
199
+ section_slug = slugify(sec_case_names[1], tolower = FALSE),
200
+ case_slug = slugify(cname, tolower = FALSE)
201
+ )
202
+ out$casedir <- file.path(outdir, out$section_slug, out$case_slug)
203
+ if (create) {
204
+ dir.create(out$casedir, showWarnings = FALSE, recursive = TRUE)
205
+ }
206
+ out
207
+ }
180
208
 
181
209
  plot_volcano = function(markers, volfile, sig, volgenes) {
182
210
  # markers
@@ -221,7 +249,7 @@ plot_volcano = function(markers, volfile, sig, volgenes) {
221
249
  y = "-log10 Adjusted P-value"
222
250
  )
223
251
 
224
- png(volfile, res = 100, height = 800, width = 900)
252
+ png(volfile, res = 100, height = 1200, width = 900)
225
253
  print(p_vol)
226
254
  dev.off()
227
255
  }
@@ -231,85 +259,207 @@ plot_volcano = function(markers, volfile, sig, volgenes) {
231
259
  # case: case name
232
260
  # markers: markers dataframe
233
261
  # sig: The expression to filter significant markers
234
- do_enrich <- function(case, markers, sig, volgenes) {
235
- log_info("- Running enrichment for case: {case}")
236
- parts <- strsplit(case, ":")[[1]]
237
- sec <- parts[1]
238
- case <- paste0(parts[-1], collapse = ":")
239
- casedir <- file.path(outdir, sec, case)
240
- dir.create(casedir, showWarnings = FALSE, recursive = TRUE)
262
+ do_enrich <- function(info, markers, sig, volgenes) {
263
+ log_info("- Running enrichment for case: {info$casename}")
264
+
241
265
  if (nrow(markers) == 0) {
242
- log_warn(" No markers found for case: {case}")
243
- cat("No markers found.", file = file.path(casedir, "error.txt"))
244
- return()
266
+ log_warn(" No markers found for case: {info$casename}")
267
+ return(NULL)
245
268
  }
246
- plot_volcano(markers, file.path(casedir, "volcano.png"), sig, volgenes)
269
+
270
+ plot_volcano(markers, file.path(info$casedir, "volcano.png"), sig, volgenes)
271
+
247
272
  markers_sig <- markers %>% filter(!!parse_expr(sig))
248
273
  if (nrow(markers_sig) == 0) {
249
- log_warn(" No significant markers found for case: {case}")
250
- cat("No significant markers.", file = file.path(casedir, "error.txt"))
251
- return()
274
+ log_warn(" No significant markers found for case: {info$casename}")
275
+ return(NULL)
252
276
  }
277
+
253
278
  write.table(
254
279
  markers_sig,
255
- file.path(casedir, "markers.txt"),
280
+ file.path(info$casedir, "markers.txt"),
256
281
  sep = "\t",
257
282
  row.names = FALSE,
258
283
  col.names = TRUE,
259
284
  quote = FALSE
260
285
  )
261
286
  if (nrow(markers_sig) < 5) {
287
+ log_warn(" Too few significant markers found for case: {info$casename}")
288
+ return(NULL)
289
+ } else {
290
+ enriched <- enrichr(unique(markers_sig$gene), dbs)
262
291
  for (db in dbs) {
263
292
  write.table(
264
- data.frame(Warning = "Not enough significant markers."),
265
- file.path(casedir, paste0("Enrichr-", db, ".txt")),
293
+ enriched[[db]],
294
+ file.path(info$casedir, paste0("Enrichr-", db, ".txt")),
266
295
  sep = "\t",
267
296
  row.names = FALSE,
268
297
  col.names = TRUE,
269
298
  quote = FALSE
270
299
  )
271
300
  png(
272
- file.path(casedir, paste0("Enrichr-", db, ".png")),
273
- res = 100, height = 200, width = 1000
301
+ file.path(info$casedir, paste0("Enrichr-", db, ".png")),
302
+ res = 100, height = 1000, width = 1000
274
303
  )
275
304
  print(
276
- ggplot() +
277
- annotate(
278
- "text",
279
- x = 1,
280
- y = 1,
281
- label = "Not enough significant markers."
282
- ) +
283
- theme_classic()
305
+ plotEnrich(enriched[[db]], showTerms = 20, title = db) +
306
+ theme_prism()
284
307
  )
285
308
  dev.off()
286
309
  }
310
+ }
311
+ unique(markers_sig$gene)
312
+ }
313
+
314
+
315
+ do_dotplot <- function(info, siggenes, case, args) {
316
+ dotplot_devpars <- case$dotplot$devpars
317
+ if (is.null(args$ident.2)) {
318
+ case$dotplot$object <- args$object
319
+ case$dotplot$object@meta.data <- case$dotplot$object@meta.data %>%
320
+ mutate(
321
+ !!sym(args$group.by) := if_else(
322
+ !!sym(args$group.by) == args$ident.1,
323
+ args$ident.1,
324
+ ".Other"
325
+ ),
326
+ !!sym(args$group.by) := factor(
327
+ !!sym(args$group.by),
328
+ levels = c(args$ident.1, ".Other")
329
+ )
330
+ )
287
331
  } else {
288
- enriched <- enrichr(markers_sig$gene, dbs)
289
- for (db in dbs) {
290
- write.table(
291
- enriched[[db]],
292
- file.path(casedir, paste0("Enrichr-", db, ".txt")),
293
- sep = "\t",
294
- row.names = FALSE,
295
- col.names = TRUE,
296
- quote = FALSE
332
+ case$dotplot$object <- args$object %>%
333
+ filter(!!sym(args$group.by) %in% c(args$ident.1, args$ident.2)) %>%
334
+ mutate(!!sym(args$group.by) := factor(
335
+ !!sym(args$group.by),
336
+ levels = c(args$ident.1, args$ident.2)
337
+ ))
338
+ }
339
+ case$dotplot$devpars <- NULL
340
+ case$dotplot$features <- siggenes
341
+ case$dotplot$group.by <- args$group.by
342
+ case$dotplot$assay <- case$assay
343
+ dotplot_width = ifelse(
344
+ is.null(dotplot_devpars$width),
345
+ if (length(siggenes) <= 20) length(siggenes) * 60 else length(siggenes) * 30,
346
+ dotplot_devpars$width
347
+ )
348
+ dotplot_height = ifelse(is.null(dotplot_devpars$height), 600, dotplot_devpars$height)
349
+ dotplot_res = ifelse(is.null(dotplot_devpars$res), 100, dotplot_devpars$res)
350
+ dotplot_file <- file.path(info$casedir, "dotplot.png")
351
+ png(dotplot_file, res = dotplot_res, width = dotplot_height, height = dotplot_width)
352
+ # rotate x axis labels
353
+ print(
354
+ do_call(DotPlot, case$dotplot) +
355
+ theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
356
+ coord_flip()
357
+ )
358
+ dev.off()
359
+ }
360
+
361
+
362
+ add_case_report <- function(info, sigmarkers, siggenes) {
363
+ h1 = ifelse(
364
+ info$section == "DEFAULT",
365
+ info$case,
366
+ ifelse(
367
+ single_section,
368
+ paste0(
369
+ ifelse(info$section == "seurat_clusters", "Cluster", info$section),
370
+ " - ",
371
+ info$case
372
+ ),
373
+ info$section
374
+ )
375
+ )
376
+ h2 = ifelse(
377
+ info$section == "DEFAULT",
378
+ "#",
379
+ ifelse(single_section, "#", info$case)
380
+ )
381
+ add_report(
382
+ list(
383
+ title = "Significant Markers",
384
+ ui = "flat",
385
+ contents = list(
386
+ list(
387
+ kind = "descr",
388
+ content = paste0(
389
+ "The markers are found using Seurat's FindMarkers function, ",
390
+ "and filtered by: ",
391
+ html_escape(sigmarkers)
392
+ )
393
+ ),
394
+ list(
395
+ kind = "table",
396
+ data = list(nrows = 100),
397
+ src = file.path(info$casedir, "markers.txt")
398
+ )
297
399
  )
298
- png(
299
- file.path(casedir, paste0("Enrichr-", db, ".png")),
300
- res = 100, height = 1000, width = 1000
400
+ ),
401
+ list(
402
+ title = "Volcano Plot",
403
+ ui = "flat",
404
+ contents = list(
405
+ list(
406
+ kind = "img",
407
+ src = file.path(info$casedir, "volcano.png")
408
+ )
301
409
  )
302
- print(plotEnrich(enriched[[db]], showTerms = 20, title = db))
303
- dev.off()
304
- }
410
+ ),
411
+ list(
412
+ title = "Dot Plot",
413
+ ui = "flat",
414
+ contents = list(
415
+ list(
416
+ kind = "img",
417
+ src = file.path(info$casedir, "dotplot.png")
418
+ )
419
+ )
420
+ ),
421
+ h1 = h1,
422
+ h2 = ifelse(h2 == "#", "Markers", h2),
423
+ h3 = ifelse(h2 == "#", "#", "Markers"),
424
+ ui = "tabs"
425
+ )
426
+ if (is.null(siggenes)) {
427
+ add_report(
428
+ list(
429
+ kind = "error",
430
+ content = "No enough significant markers found for enrichment analysis"
431
+ ),
432
+ h1 = h1,
433
+ h2 = ifelse(h2 == "#", "Enrichment Analysis", h2),
434
+ h3 = ifelse(h2 == "#", "#", "Enrichment Analysis"),
435
+ ui = "flat"
436
+ )
437
+ } else {
438
+ add_report(
439
+ list(
440
+ kind = "descr",
441
+ content = paste0(
442
+ "The enrichment analysis is done using Enrichr. ",
443
+ "The significant markers are used as input. "
444
+ )
445
+ ),
446
+ list(
447
+ kind = "enrichr",
448
+ dir = info$casedir
449
+ ),
450
+ h1 = h1,
451
+ h2 = ifelse(h2 == "#", "Enrichment Analysis", h2),
452
+ h3 = ifelse(h2 == "#", "#", "Enrichment Analysis"),
453
+ ui = "flat"
454
+ )
305
455
  }
306
456
  }
307
457
 
308
458
 
309
459
  do_case <- function(casename) {
310
460
  log_info("Dealing with case: {casename}...")
311
- sec_case_names <- strsplit(casename, ":")[[1]]
312
- cname <- paste(sec_case_names[-1], collapse = ":")
461
+
462
+ info <- casename_info(casename, create = TRUE)
313
463
  case <- cases[[casename]]
314
464
  # ident1
315
465
  # ident2
@@ -343,7 +493,7 @@ do_case <- function(casename) {
343
493
  markers <- tryCatch({
344
494
  do_call(FindMarkers, args) %>% rownames_to_column("gene")
345
495
  }, error = function(e) {
346
- warning(e$message, immediate. = TRUE)
496
+ log_warn(e$message)
347
497
  data.frame(
348
498
  gene = character(),
349
499
  p_val = numeric(),
@@ -353,65 +503,21 @@ do_case <- function(casename) {
353
503
  p_val_adj=numeric()
354
504
  )
355
505
  })
356
- do_enrich(casename, markers, case$sigmarkers, case$volcano_genes)
357
506
 
358
- siggenes <- markers %>%
359
- filter(!!parse_expr(case$sigmarkers)) %>%
360
- pull(gene) %>%
361
- unique()
507
+ siggenes <- do_enrich(info, markers, case$sigmarkers, case$volcano_genes)
362
508
 
363
509
  if (length(siggenes) > 0) {
364
- dotplot_devpars <- case$dotplot$devpars
365
- if (is.null(args$ident.2)) {
366
- case$dotplot$object <- args$object
367
- case$dotplot$object@meta.data <- case$dotplot$object@meta.data %>%
368
- mutate(
369
- !!sym(args$group.by) := if_else(
370
- !!sym(args$group.by) == args$ident.1,
371
- args$ident.1,
372
- ".Other"
373
- ),
374
- !!sym(args$group.by) := factor(
375
- !!sym(args$group.by),
376
- levels = c(args$ident.1, ".Other")
377
- )
378
- )
379
- } else {
380
- case$dotplot$object <- args$object %>%
381
- filter(!!sym(args$group.by) %in% c(args$ident.1, args$ident.2)) %>%
382
- mutate(!!sym(args$group.by) := factor(
383
- !!sym(args$group.by),
384
- levels = c(args$ident.1, args$ident.2)
385
- ))
386
- }
387
- case$dotplot$devpars <- NULL
388
- case$dotplot$features <- siggenes
389
- case$dotplot$group.by <- args$group.by
390
- case$dotplot$assay <- case$assay
391
- dotplot_width = ifelse(
392
- is.null(dotplot_devpars$width),
393
- if (length(siggenes) <= 20) length(siggenes) * 60 else length(siggenes) * 30,
394
- dotplot_devpars$width
395
- )
396
- dotplot_height = ifelse(is.null(dotplot_devpars$height), 600, dotplot_devpars$height)
397
- dotplot_res = ifelse(is.null(dotplot_devpars$res), 100, dotplot_devpars$res)
398
- dotplot_file <- file.path(outdir, sec_case_names[1], cname, "dotplot.png")
399
- png(dotplot_file, res = dotplot_res, width = dotplot_height, height = dotplot_width)
400
- # rotate x axis labels
401
- print(
402
- do_call(DotPlot, case$dotplot) +
403
- theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
404
- coord_flip()
405
- )
406
- dev.off()
510
+ do_dotplot(info, siggenes, case, args)
407
511
  }
408
512
 
409
- if (sec_case_names[1] %in% overlap) {
410
- if (is.null(overlaps[[sec_case_names[1]]])) {
411
- overlaps[[sec_case_names[1]]] <<- list()
513
+ if (info$section %in% overlap) {
514
+ if (is.null(overlaps[[info$section]])) {
515
+ overlaps[[info$section]] <<- list()
412
516
  }
413
- overlaps[[sec_case_names[1]]][[cname]] <<- siggenes
517
+ overlaps[[info$section]][[info$case]] <<- siggenes
414
518
  }
519
+
520
+ add_case_report(info, case$sigmarkers, siggenes)
415
521
  }
416
522
 
417
523
  do_overlap <- function(section) {
@@ -461,7 +567,46 @@ do_overlap <- function(section) {
461
567
  png(upset_plot, res = 100, width = 800, height = 600)
462
568
  print(upset_p)
463
569
  dev.off()
570
+
571
+ add_report(
572
+ list(
573
+ title = "Venn Diagram",
574
+ ui = "flat",
575
+ contents = list(
576
+ list(
577
+ kind = "img",
578
+ src = file.path(ov_dir, "venn.png")
579
+ )
580
+ )
581
+ ),
582
+ list(
583
+ title = "UpSet Plot",
584
+ ui = "flat",
585
+ contents = list(
586
+ list(
587
+ kind = "img",
588
+ src = file.path(ov_dir, "upset.png")
589
+ )
590
+ )
591
+ ),
592
+ list(
593
+ title = "Marker Table",
594
+ ui = "flat",
595
+ contents = list(
596
+ list(
597
+ kind = "table",
598
+ data = list(nrows = 100),
599
+ src = file.path(ov_dir, "markers.txt")
600
+ )
601
+ )
602
+ ),
603
+ h1 = "Overlapping Markers",
604
+ h2 = section,
605
+ ui = "tabs"
606
+ )
464
607
  }
465
608
 
466
609
  sapply(sort(names(cases)), do_case)
467
610
  sapply(sort(names(overlaps)), do_overlap)
611
+
612
+ save_report(joboutdir)