biopipen 0.21.2__py3-none-any.whl → 0.22.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (62) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/filters.py +142 -0
  3. biopipen/ns/scrna.py +19 -1
  4. biopipen/ns/tcr.py +30 -10
  5. biopipen/reports/delim/SampleInfo.svelte +2 -22
  6. biopipen/reports/scrna/CellsDistribution.svelte +4 -39
  7. biopipen/reports/scrna/MarkersFinder.svelte +6 -126
  8. biopipen/reports/scrna/MetaMarkers.svelte +3 -75
  9. biopipen/reports/scrna/RadarPlots.svelte +4 -20
  10. biopipen/reports/scrna/ScFGSEA.svelte +4 -23
  11. biopipen/reports/scrna/SeuratClusterStats.svelte +3 -69
  12. biopipen/reports/scrna/SeuratPreparing.svelte +3 -26
  13. biopipen/reports/scrna/TopExpressingGenes.svelte +3 -41
  14. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +17 -16
  15. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +13 -16
  16. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +44 -52
  17. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +2 -8
  18. biopipen/reports/tcr/CDR3AAPhyschem.svelte +18 -65
  19. biopipen/reports/tcr/CloneResidency.svelte +3 -93
  20. biopipen/reports/tcr/Immunarch.svelte +4 -168
  21. biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
  22. biopipen/reports/tcr/TESSA.svelte +11 -28
  23. biopipen/scripts/delim/SampleInfo.R +41 -7
  24. biopipen/scripts/scrna/CellsDistribution.R +127 -16
  25. biopipen/scripts/scrna/MarkersFinder.R +245 -100
  26. biopipen/scripts/scrna/MetaMarkers.R +163 -82
  27. biopipen/scripts/scrna/RadarPlots.R +163 -110
  28. biopipen/scripts/scrna/ScFGSEA.R +51 -11
  29. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +26 -13
  30. biopipen/scripts/scrna/SeuratClusterStats-features.R +58 -53
  31. biopipen/scripts/scrna/SeuratClusterStats-stats.R +39 -21
  32. biopipen/scripts/scrna/SeuratClusterStats.R +4 -2
  33. biopipen/scripts/scrna/SeuratClustering.R +73 -26
  34. biopipen/scripts/scrna/SeuratMetadataMutater.R +13 -1
  35. biopipen/scripts/scrna/SeuratPreparing.R +93 -19
  36. biopipen/scripts/scrna/TopExpressingGenes.R +100 -18
  37. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +21 -8
  38. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +25 -3
  39. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +1 -0
  40. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +14 -3
  41. biopipen/scripts/tcr/Attach2Seurat.R +2 -1
  42. biopipen/scripts/tcr/CDR3AAPhyschem.R +123 -10
  43. biopipen/scripts/tcr/CloneResidency.R +114 -34
  44. biopipen/scripts/tcr/Immunarch-basic.R +18 -4
  45. biopipen/scripts/tcr/Immunarch-clonality.R +14 -0
  46. biopipen/scripts/tcr/Immunarch-diversity.R +123 -18
  47. biopipen/scripts/tcr/Immunarch-geneusage.R +23 -1
  48. biopipen/scripts/tcr/Immunarch-kmer.R +45 -3
  49. biopipen/scripts/tcr/Immunarch-overlap.R +62 -0
  50. biopipen/scripts/tcr/Immunarch-spectratyping.R +18 -2
  51. biopipen/scripts/tcr/Immunarch-tracking.R +24 -1
  52. biopipen/scripts/tcr/Immunarch-vjjunc.R +17 -2
  53. biopipen/scripts/tcr/Immunarch.R +7 -0
  54. biopipen/scripts/tcr/ImmunarchLoading.R +22 -23
  55. biopipen/scripts/tcr/TCRClusterStats.R +124 -11
  56. biopipen/scripts/tcr/TCRClustering.R +8 -9
  57. biopipen/scripts/tcr/TESSA.R +66 -41
  58. biopipen/utils/misc.R +96 -1
  59. {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/METADATA +1 -1
  60. {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/RECORD +62 -62
  61. {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/WHEEL +0 -0
  62. {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/entry_points.txt +0 -0
@@ -5,12 +5,13 @@ library(rlang)
5
5
  library(tidyr)
6
6
  library(dplyr)
7
7
  library(ggplot2)
8
- library(ggsci)
9
8
  library(ggVennDiagram)
10
9
  library(UpSetR)
10
+ library(slugify)
11
11
 
12
12
  srtfile <- {{in.srtobj | r}} # nolint
13
13
  outdir <- {{out.outdir | r}} # nolint
14
+ joboutdir <- {{job.outdir | r}} # nolint
14
15
  mutaters <- {{envs.mutaters | r}} # nolint
15
16
  group_by <- {{envs.group_by | r}} # nolint
16
17
  group_order <- {{envs.group_order | r}} # nolint
@@ -19,6 +20,7 @@ cells_order <- {{envs.cells_order | r}} # nolint
19
20
  cells_orderby <- {{envs.cells_orderby | r}} # nolint
20
21
  cells_n <- {{envs.cells_n | r}} # nolint
21
22
  subset <- {{envs.subset | r}} # nolint
23
+ descr <- {{envs.descr | r}} # nolint
22
24
  devpars <- {{envs.devpars | r}} # nolint
23
25
  each <- {{envs.each | r}} # nolint
24
26
  section <- {{envs.section | r}} # nolint
@@ -27,11 +29,11 @@ cases <- {{envs.cases | r}} # nolint
27
29
 
28
30
  if (is.null(overlap)) { overlap = c() }
29
31
  overlaps <- list()
30
- print("- Loading seurat object ...")
32
+ log_info("- Loading seurat object ...")
31
33
  srtobj <- readRDS(srtfile)
32
34
 
33
35
  if (!is.null(mutaters) && length(mutaters) > 0) {
34
- print("- Mutating seurat object ...")
36
+ log_info("- Mutating seurat object ...")
35
37
  srtobj@meta.data <- srtobj@meta.data %>%
36
38
  mutate(!!!lapply(mutaters, parse_expr))
37
39
  }
@@ -41,6 +43,7 @@ if (!is.factor(all_clusters)) {
41
43
  all_clusters = factor(all_clusters, levels = sort(unique(all_clusters)))
42
44
  }
43
45
 
46
+ single_section <- TRUE
44
47
  expand_cases <- function() {
45
48
  # fill up cases with missing parameters
46
49
  if (is.null(cases) || length(cases) == 0) {
@@ -55,7 +58,8 @@ expand_cases <- function() {
55
58
  devpars = devpars,
56
59
  each = each,
57
60
  section = section,
58
- subset = subset
61
+ subset = subset,
62
+ descr = descr
59
63
  )
60
64
  )
61
65
  } else {
@@ -72,7 +76,8 @@ expand_cases <- function() {
72
76
  devpars = devpars,
73
77
  each = each,
74
78
  section = section,
75
- subset = subset
79
+ subset = subset,
80
+ descr = descr
76
81
  )
77
82
  case$devpars <- list_setdefault(case$devpars, devpars)
78
83
  filled_cases[[name]] <- case
@@ -80,12 +85,15 @@ expand_cases <- function() {
80
85
  }
81
86
 
82
87
  outcases <- list()
88
+ sections <- c()
83
89
  # expand each
84
90
  for (name in names(filled_cases)) {
85
91
  case <- filled_cases[[name]]
86
92
  if (is.null(case$each) || nchar(case$each) == 0) {
93
+ sections <- c(sections, case$section)
87
94
  outcases[[paste0(case$section, ":", name)]] <- case
88
95
  } else {
96
+ sections <- c(sections, case$each)
89
97
  eachs <- srtobj@meta.data %>% pull(case$each) %>% na.omit() %>% unique() %>% as.vector()
90
98
  for (ea in eachs) {
91
99
  by <- make.names(paste0(".", name, "_", case$each,"_", ea))
@@ -101,25 +109,46 @@ expand_cases <- function() {
101
109
  }
102
110
  }
103
111
  }
112
+ single_section <<- length(unique(sections)) == 1
104
113
  outcases
105
114
  }
106
115
 
116
+ casename_info <- function(casename, create = FALSE) {
117
+ sec_case_names <- strsplit(casename, ":")[[1]]
118
+ cname <- paste(sec_case_names[-1], collapse = ":")
119
+
120
+ out <- list(
121
+ casename = casename,
122
+ section = sec_case_names[1],
123
+ case = cname,
124
+ section_slug = slugify(sec_case_names[1], tolower = FALSE),
125
+ case_slug = slugify(cname, tolower = FALSE)
126
+ )
127
+ out$sec_dir <- file.path(outdir, out$section_slug)
128
+ if (create) {
129
+ dir.create(out$sec_dir, showWarnings = FALSE, recursive = TRUE)
130
+ }
131
+ out
132
+ }
133
+
107
134
  do_case <- function(name, case) {
108
- print(paste("- Running for case:", name))
135
+ log_info(paste("- Running for case:", name))
109
136
  if (is.null(case$group_by) || nchar(case$group_by) == 0) {
110
137
  stop(paste0("`group_by` must be specified for case", name))
111
138
  }
112
139
  if (is.null(case$cells_by) || nchar(case$cells_by) == 0) {
113
140
  stop(paste0("`cells_by` must be specified for case", name))
114
141
  }
142
+ info <- casename_info(name, create = TRUE)
115
143
  cells_by <- trimws(strsplit(case$cells_by, ",")[[1]])
116
144
 
117
145
  sec_case_names <- strsplit(name, ":")[[1]]
118
146
  sec_dir <- file.path(outdir, sec_case_names[1])
119
147
  casename <- paste(sec_case_names[-1], collapse = ":")
120
148
  dir.create(sec_dir, showWarnings = FALSE, recursive = TRUE)
121
- outfile <- file.path(sec_dir, paste0("case-", casename, ".png"))
122
- txtfile <- file.path(sec_dir, paste0("case-", casename, ".txt"))
149
+
150
+ outfile <- file.path(info$sec_dir, paste0("case-", info$case_slug, ".png"))
151
+ txtfile <- file.path(info$sec_dir, paste0("case-", info$case_slug, ".txt"))
123
152
 
124
153
  # subset the seurat object
125
154
  meta <- srtobj@meta.data
@@ -148,11 +177,11 @@ do_case <- function(name, case) {
148
177
  meta <- meta1
149
178
  }
150
179
 
151
- if (sec_case_names[1] %in% overlap) {
152
- if (is.null(overlaps[[sec_case_names[1]]])) {
153
- overlaps[[sec_case_names[1]]] <<- list()
180
+ if (info$section %in% overlap) {
181
+ if (is.null(overlaps[[info$section]])) {
182
+ overlaps[[info$section]] <<- list()
154
183
  }
155
- overlaps[[sec_case_names[1]]][[casename]] <<- meta %>% pull(case$cells_by) %>% unique()
184
+ overlaps[[info$section]][[info$case]] <<- meta %>% pull(case$cells_by) %>% unique()
156
185
  }
157
186
 
158
187
  # add sizes
@@ -197,7 +226,20 @@ do_case <- function(name, case) {
197
226
  }
198
227
 
199
228
  write.table(
200
- meta,
229
+ meta %>% select(
230
+ !!sym(cells_by),
231
+ !!sym(case$group_by),
232
+ seurat_clusters,
233
+ CloneSize,
234
+ CloneGroupSize,
235
+ CloneClusterSize,
236
+ CloneGroupClusterSize,
237
+ ) %>% distinct(
238
+ !!sym(cells_by),
239
+ !!sym(case$group_by),
240
+ seurat_clusters,
241
+ .keep_all = TRUE
242
+ ),
201
243
  txtfile,
202
244
  sep = "\t",
203
245
  row.names = TRUE,
@@ -226,7 +268,7 @@ do_case <- function(name, case) {
226
268
  geom_col(width=.01, position="fill", color = "#888888") +
227
269
  geom_bar(stat = "identity", position = position_fill(reverse = TRUE)) +
228
270
  coord_polar("y", start = 0) +
229
- scale_fill_ucscgb(name = "Cluster", alpha = 1, limits = levels(all_clusters)) +
271
+ scale_fill_biopipen(name = "Cluster", limits = levels(all_clusters)) +
230
272
  theme_void() +
231
273
  theme(
232
274
  plot.margin = unit(c(1,1,1,1), "cm"),
@@ -238,16 +280,63 @@ do_case <- function(name, case) {
238
280
  png(outfile, res = devpars$res, width = devpars$width, height = devpars$height)
239
281
  print(p)
240
282
  dev.off()
283
+
284
+ add_report(
285
+ list(
286
+ kind = "descr",
287
+ content = ifelse(
288
+ is.null(case$descr) || nchar(case$descr) == 0,
289
+ paste0(
290
+ "Distribution for cells in ",
291
+ "<code>", html_escape(cells_by), "</code>",
292
+ " for ",
293
+ "<code>", html_escape(case$group_by), "</code>"
294
+ ),
295
+ case$descr
296
+ )
297
+ ),
298
+ h1 = ifelse(
299
+ info$section == "DEFAULT",
300
+ info$case,
301
+ ifelse(single_section, paste0(info$section, " - ", info$case), info$section)
302
+ ),
303
+ h2 = ifelse(single_section, "#", info$case)
304
+ )
305
+
306
+ add_report(
307
+ list(
308
+ name = "Distribution Plot",
309
+ contents = list(list(
310
+ kind = "image",
311
+ src = outfile
312
+ ))
313
+ ),
314
+ list(
315
+ name = "Distribution Table",
316
+ contents = list(list(
317
+ kind = "table",
318
+ data = list(nrows = 100),
319
+ src = txtfile
320
+ ))
321
+ ),
322
+ h1 = ifelse(
323
+ info$section == "DEFAULT",
324
+ info$case,
325
+ ifelse(single_section, paste0(info$section, " - ", info$case), info$section)
326
+ ),
327
+ h2 = ifelse(single_section, "#", info$case),
328
+ ui = "tabs"
329
+ )
241
330
  }
242
331
 
243
332
  do_overlap <- function(section) {
244
- print(paste("- Running overlaps for section:", section))
333
+ log_info(paste("- Running overlaps for section:", section))
245
334
  overlap_cases <- overlaps[[section]]
246
335
  if (length(overlap_cases) < 2) {
247
336
  stop(paste0("Not enough cases for overlap for section: ", section))
248
337
  }
249
338
 
250
- sec_dir <- file.path(outdir, section)
339
+ sec_dir <- file.path(outdir, slugify(section, tolower = FALSE))
251
340
  venn_plot <- file.path(sec_dir, "venn.png")
252
341
  venn_p <- ggVennDiagram(overlap_cases, label_percent_digit = 1) +
253
342
  scale_fill_distiller(palette = "Reds", direction = 1) +
@@ -261,8 +350,30 @@ do_overlap <- function(section) {
261
350
  png(upset_plot, res = 100, width = 800, height = 600)
262
351
  print(upset_p)
263
352
  dev.off()
353
+
354
+ add_report(
355
+ list(
356
+ name = "Venn Plot",
357
+ contents = list(list(
358
+ kind = "image",
359
+ src = venn_plot
360
+ ))
361
+ ),
362
+ list(
363
+ name = "UpSet Plot",
364
+ contents = list(list(
365
+ kind = "image",
366
+ src = upset_plot
367
+ ))
368
+ ),
369
+ h1 = "Overlapping Groups",
370
+ h2 = section,
371
+ ui = "tabs"
372
+ )
264
373
  }
265
374
 
266
375
  cases <- expand_cases()
267
376
  sapply(sort(names(cases)), function(name) do_case(name, cases[[name]]))
268
377
  sapply(sort(names(overlaps)), do_overlap)
378
+
379
+ save_report(joboutdir)