biopipen 0.22.0__py3-none-any.whl → 0.22.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

@@ -143,11 +143,13 @@ for (name in names(cases)) {
143
143
  } else if (is.null(case$each)) {
144
144
  # is.null(case$ident.1)
145
145
  sections <- c(sections, name)
146
- idents <- srtobj@meta.data %>% pull(case$group.by) %>% unique() %>% na.omit()
147
- for (ident in idents) {
148
- newcases[[paste0(name, ":", ident)]] <- case
149
- newcases[[paste0(name, ":", ident)]]$ident.1 <- ident
150
- }
146
+ newcases[[name]] <- case
147
+ newcases[[name]]$findall <- TRUE
148
+ # idents <- srtobj@meta.data %>% pull(case$group.by) %>% unique() %>% na.omit()
149
+ # for (ident in idents) {
150
+ # newcases[[paste0(name, ":", ident)]] <- case
151
+ # newcases[[paste0(name, ":", ident)]]$ident.1 <- ident
152
+ # }
151
153
  } else {
152
154
  eachs <- srtobj@meta.data %>% pull(case$each) %>% unique() %>% na.omit()
153
155
  for (each in eachs) {
@@ -160,18 +162,22 @@ for (name in names(cases)) {
160
162
  )
161
163
  )
162
164
  if (is.null(case$ident.1)) {
163
- idents <- srtobj@meta.data %>% pull(case$group.by) %>% unique() %>% na.omit()
164
- for (ident in idents) {
165
- kname <- if (name == "DEFAULT") "" else paste0(" - ", name)
166
- sections <- c(sections, paste0(each, kname))
167
- key <- paste0(each, kname, ":", ident)
168
- if (case$prefix_each) {
169
- key <- paste0(case$each, " - ", key)
170
- }
171
- newcases[[key]] <- case
172
- newcases[[key]]$ident.1 <- ident
173
- newcases[[key]]$group.by <- by
174
- }
165
+ kname <- if (name == "DEFAULT") "" else paste0(" - ", name)
166
+ sections <- c(sections, paste0(each, kname))
167
+ key <- paste0(each, kname)
168
+ newcases[[key]] <- case
169
+ newcases[[key]]$group.by <- by
170
+ newcases[[key]]$findall <- TRUE
171
+ # idents <- srtobj@meta.data %>% pull(case$group.by) %>% unique() %>% na.omit()
172
+ # for (ident in idents) {
173
+ # key <- paste0(each, kname, ":", ident)
174
+ # if (case$prefix_each) {
175
+ # key <- paste0(case$each, " - ", key)
176
+ # }
177
+ # newcases[[key]] <- case
178
+ # newcases[[key]]$ident.1 <- ident
179
+ # newcases[[key]]$group.by <- by
180
+ # }
175
181
  } else {
176
182
  sections <- c(sections, case$each)
177
183
  key <- paste0(case$each, ":", each)
@@ -312,11 +318,11 @@ do_enrich <- function(info, markers, sig, volgenes) {
312
318
  }
313
319
 
314
320
 
315
- do_dotplot <- function(info, siggenes, case, args) {
316
- dotplot_devpars <- case$dotplot$devpars
321
+ do_dotplot <- function(info, siggenes, dotplot, args) {
322
+ dotplot_devpars <- dotplot$devpars
317
323
  if (is.null(args$ident.2)) {
318
- case$dotplot$object <- args$object
319
- case$dotplot$object@meta.data <- case$dotplot$object@meta.data %>%
324
+ dotplot$object <- args$object
325
+ dotplot$object@meta.data <- dotplot$object@meta.data %>%
320
326
  mutate(
321
327
  !!sym(args$group.by) := if_else(
322
328
  !!sym(args$group.by) == args$ident.1,
@@ -329,17 +335,16 @@ do_dotplot <- function(info, siggenes, case, args) {
329
335
  )
330
336
  )
331
337
  } else {
332
- case$dotplot$object <- args$object %>%
338
+ dotplot$object <- args$object %>%
333
339
  filter(!!sym(args$group.by) %in% c(args$ident.1, args$ident.2)) %>%
334
340
  mutate(!!sym(args$group.by) := factor(
335
341
  !!sym(args$group.by),
336
342
  levels = c(args$ident.1, args$ident.2)
337
343
  ))
338
344
  }
339
- case$dotplot$devpars <- NULL
340
- case$dotplot$features <- siggenes
341
- case$dotplot$group.by <- args$group.by
342
- case$dotplot$assay <- case$assay
345
+ dotplot$devpars <- NULL
346
+ dotplot$features <- siggenes
347
+ dotplot$group.by <- args$group.by
343
348
  dotplot_width = ifelse(
344
349
  is.null(dotplot_devpars$width),
345
350
  if (length(siggenes) <= 20) length(siggenes) * 60 else length(siggenes) * 30,
@@ -351,7 +356,7 @@ do_dotplot <- function(info, siggenes, case, args) {
351
356
  png(dotplot_file, res = dotplot_res, width = dotplot_height, height = dotplot_width)
352
357
  # rotate x axis labels
353
358
  print(
354
- do_call(DotPlot, case$dotplot) +
359
+ do_call(DotPlot, dotplot) +
355
360
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
356
361
  coord_flip()
357
362
  )
@@ -456,9 +461,79 @@ add_case_report <- function(info, sigmarkers, siggenes) {
456
461
  }
457
462
 
458
463
 
464
+ do_case_findall <- function(casename) {
465
+ log_info("- Using FindAllMarkers for case: {casename}...")
466
+
467
+ case = cases[[casename]]
468
+ args <- case$rest
469
+ args$group.by <- case$group.by
470
+ if (is.null(args$logfc.threshold)) {
471
+ args$locfc.threshold <- 0
472
+ }
473
+ if (is.null(args$min.cells.group)) {
474
+ args$min.cells.group <- 1
475
+ }
476
+ if (is.null(args$min.cells.feature)) {
477
+ args$min.cells.feature <- 1
478
+ }
479
+ if (is.null(args$min.pct)) {
480
+ args$min.pct <- 0
481
+ }
482
+ if (!is.null(case$subset)) {
483
+ args$object <- srtobj %>% filter(!!parse_expr(case$subset) & filter(!is.na(!!sym(case$group.by))))
484
+ } else {
485
+ args$object <- srtobj %>% filter(!is.na(!!sym(case$group.by)))
486
+ }
487
+ Idents(args$object) <- case$group.by
488
+ markers <- tryCatch({
489
+ do_call(FindAllMarkers, args)
490
+ # gene, p_val, avg_log2FC, pct.1, pct.2, p_val_adj, cluster
491
+ }, error = function(e) {
492
+ log_warn(e$message)
493
+ data.frame(
494
+ gene = character(),
495
+ p_val = numeric(),
496
+ avg_log2FC = numeric(),
497
+ pct.1 = numeric(),
498
+ pct.2 = numeric(),
499
+ p_val_adj=numeric(),
500
+ cluster = character()
501
+ )
502
+ })
503
+
504
+ if (is.null(case$dotplot$assay)) {
505
+ case$dotplot$assay <- assay
506
+ }
507
+ idents <- unique(markers$cluster)
508
+ for (ident in idents) {
509
+ log_info("- Dealing with ident: {ident}...")
510
+ info <- casename_info(paste0(casename, ":", ident), create = TRUE)
511
+ siggenes <- do_enrich(info, markers %>% filter(cluster == ident), case$sigmarkers, case$volcano_genes)
512
+
513
+ if (length(siggenes) > 0) {
514
+ args$ident.1 <- as.character(ident)
515
+ do_dotplot(info, siggenes, case$dotplot, args)
516
+ }
517
+ add_case_report(info, case$sigmarkers, siggenes)
518
+
519
+ if (info$section %in% overlap) {
520
+ if (is.null(overlaps[[info$section]])) {
521
+ overlaps[[info$section]] <<- list()
522
+ }
523
+ overlaps[[info$section]][[info$case]] <<- siggenes
524
+ }
525
+ }
526
+ }
527
+
528
+
459
529
  do_case <- function(casename) {
460
530
  log_info("Dealing with case: {casename}...")
461
531
 
532
+ if (isTRUE(cases[[casename]]$findall)) {
533
+ do_case_findall(casename)
534
+ return()
535
+ }
536
+
462
537
  info <- casename_info(casename, create = TRUE)
463
538
  case <- cases[[casename]]
464
539
  # ident1
@@ -507,7 +582,10 @@ do_case <- function(casename) {
507
582
  siggenes <- do_enrich(info, markers, case$sigmarkers, case$volcano_genes)
508
583
 
509
584
  if (length(siggenes) > 0) {
510
- do_dotplot(info, siggenes, case, args)
585
+ if (is.null(case$dotplot$assay)) {
586
+ case$dotplot$assay <- assay
587
+ }
588
+ do_dotplot(info, siggenes, case$dotplot, args)
511
589
  }
512
590
 
513
591
  if (info$section %in% overlap) {
@@ -173,8 +173,8 @@ do_one_features = function(name) {
173
173
  rownames_to_column("Feature") %>%
174
174
  select(Feature, everything())
175
175
 
176
- exprfile = paste0(slugify(name), ".txt")
177
- write.table(expr, file.path(odir, exprfile), sep="\t", quote=FALSE, row.names=FALSE)
176
+ exprfile = file.path(odir, paste0(slugify(name), ".txt"))
177
+ write.table(expr, exprfile, sep="\t", quote=FALSE, row.names=FALSE)
178
178
 
179
179
  add_report(
180
180
  list(
@@ -1,4 +1,6 @@
1
+ source("{{biopipen_dir}}/utils/misc.R")
1
2
  source("{{biopipen_dir}}/utils/mutate_helpers.R")
3
+
2
4
  library(rlang)
3
5
  library(tibble)
4
6
  library(dplyr)
@@ -14,7 +16,17 @@ metadata = srt@meta.data
14
16
 
15
17
  if (!is.null(metafile)) {
16
18
  mdata = read.table(metafile, header=TRUE, row.names=1, sep="\t", check.names=FALSE)
17
- metadata = cbind(metadata, mdata[rownames(metadata),,drop=FALSE])
19
+ ov_cols = intersect(colnames(metadata), colnames(mdata))
20
+ if (length(ov_cols) > 0) {
21
+ log_warn(paste0(
22
+ "The following columns are already present in Seurat object and will be ignored: ",
23
+ paste(ov_cols, collapse=', ')
24
+ ))
25
+ }
26
+ metadata = cbind(
27
+ metadata,
28
+ mdata[rownames(metadata), setdiff(colnames(mdata), ov_cols), drop=FALSE]
29
+ )
18
30
  }
19
31
 
20
32
  expr = list()
@@ -11,6 +11,7 @@ immfile = {{in.immfile | r}}
11
11
  sobjfile = {{in.sobjfile | r}}
12
12
  outfile = {{out.outfile | r}}
13
13
  metacols = {{envs.metacols | r}}
14
+ prefix = {{envs.prefix | r}}
14
15
 
15
16
  immdata = readRDS(immfile)
16
17
  sobj = readRDS(sobjfile)
@@ -31,7 +32,7 @@ metadf = do_call(rbind, lapply(seq_len(nrow(immdata$meta)), function(i) {
31
32
 
32
33
  cldata %>%
33
34
  separate_rows(Barcode, sep=";") %>%
34
- mutate(Barcode = glue("{{envs.prefix}}{Barcode}"))
35
+ mutate(Barcode = glue(paste0(prefix, "{Barcode}")))
35
36
 
36
37
  }))
37
38
 
@@ -193,7 +193,7 @@ merge_data = function(sam) {
193
193
  if (!is.null(prefix) && nchar(prefix) > 0) {
194
194
  # Replace the placeholder like {Sample} with the data in other columns
195
195
  # in the same row
196
- sdata = sdata %>% mutate(.prefix_len = nchar(glue("{{envs.prefix}}")))
196
+ sdata = sdata %>% mutate(.prefix_len = nchar(glue(prefix)))
197
197
  # Remove the prefix in the rownames of sdata
198
198
  rownames(sdata) = substring(rownames(sdata), sdata$.prefix_len + 1)
199
199
  sdata = sdata %>% select(-.prefix_len)
@@ -27,6 +27,9 @@ prefix = {{ envs.prefix | r }}
27
27
  log_info("Loading immdata ...")
28
28
  immdata = readRDS(immfile)
29
29
 
30
+ if (is.null(prefix)) { prefix = immdata$prefix }
31
+ if (is.null(prefix)) { prefix = "" }
32
+
30
33
  log_info("Expanding immdata ...")
31
34
  exdata = expand_immdata(immdata)
32
35
 
@@ -1,4 +1,5 @@
1
1
  source("{{biopipen_dir}}/utils/misc.R")
2
+ source("{{biopipen_dir}}/utils/single_cell.R")
2
3
 
3
4
  # Loading 10x data into immunarch
4
5
  library(immunarch)
@@ -13,7 +14,8 @@ rdsfile = {{ out.rdsfile | quote }}
13
14
  metatxt = {{ out.metatxt | quote }}
14
15
  tmpdir = {{ envs.tmpdir | quote }}
15
16
  mode = {{ envs.mode | quote }}
16
- metacols = {{ envs.metacols | r}}
17
+ extracols = {{ envs.extracols | r}}
18
+ prefix = {{ envs.prefix | r }}
17
19
 
18
20
  metadata = read.table(
19
21
  metafile,
@@ -164,27 +166,24 @@ immdata$meta = left_join(
164
166
  by = "Sample"
165
167
  )
166
168
 
167
- saveRDS(immdata, file=rdsfile)
168
-
169
- metadf = do_call(rbind, lapply(seq_len(nrow(immdata$meta)), function(i) {
170
- # Clones Proportion CDR3.aa Barcode
171
- # 5 4 0.008583691 CAVRDTGNTPLVF;CASSEYSNQPQHF GTTCGGGCACTTACGA-1;TCTCTAAGTACCAGTT-1
172
- # 6 4 0.008583691 CALTQAAGNKLTF;CASRPEDLRGQPQHF GCTTGAAGTCGGCACT-1;TACTCGCTCCTAAGTG-1
173
- cldata = immdata$data[[i]][, unique(c(metacols, "Barcode"))]
174
- # # A tibble: 4 × 5
175
- # Sample Patient Timepoint Tissue
176
- # <chr> <chr> <chr> <chr>
177
- # 1 MC1685Pt011-Baseline-PB MC1685Pt011 Baseline PB
178
- mdata = as.list(immdata$meta[i, , drop=FALSE])
179
- for (mname in names(mdata)) {
180
- assign(mname, mdata[[mname]])
181
- }
169
+ immdata$prefix = prefix
182
170
 
183
- cldata %>%
184
- separate_rows(Barcode, sep=";") %>%
185
- distinct(Barcode, .keep_all = TRUE) %>%
186
- mutate(Barcode = glue("{{envs.prefix}}{Barcode}")) %>%
187
- column_to_rownames("Barcode")
171
+ saveRDS(immdata, file=rdsfile)
188
172
 
189
- }))
190
- write.table(metadf, metatxt, sep="\t", quote=FALSE, row.names=TRUE, col.names=TRUE)
173
+ exdata <- expand_immdata(immdata, cell_id = "Barcode") %>%
174
+ distinct(Sample, Barcode, .keep_all = TRUE) %>%
175
+ mutate(Barcode = glue(paste0(prefix, "{Barcode}"))) %>%
176
+ select(any_of(c(
177
+ colnames(immdata$meta),
178
+ "Barcode",
179
+ "CDR3.aa",
180
+ "Clones",
181
+ "Proportion",
182
+ "V.name",
183
+ "D.name",
184
+ "J.name",
185
+ extracols
186
+ ))) %>%
187
+ column_to_rownames("Barcode")
188
+
189
+ write.table(exdata, metatxt, sep="\t", quote=FALSE, row.names=TRUE, col.names=TRUE)
@@ -3,11 +3,13 @@
3
3
  # python = Sys.which({{envs.python | r}})
4
4
  # Sys.setenv(RETICULATE_PYTHON = python)
5
5
  # library(reticulate)
6
+ source("{{biopipen_dir}}/utils/single_cell.R")
6
7
 
7
8
  library(immunarch)
8
9
  library(dplyr)
9
10
  library(tidyr)
10
11
  library(tibble)
12
+ library(glue)
11
13
 
12
14
  immfile = {{in.immfile | r}}
13
15
  outdir = normalizePath({{job.outdir | r}})
@@ -17,6 +19,7 @@ tool = {{envs.tool | r}}
17
19
  python = {{envs.python | r}}
18
20
  on_multi = {{envs.on_multi | r}}
19
21
  args = {{envs.args | r}}
22
+ prefix = {{envs.prefix | r}}
20
23
 
21
24
  setwd(outdir)
22
25
 
@@ -26,17 +29,13 @@ if (on_multi) {
26
29
  } else {
27
30
  seqdata = immdata$data
28
31
  }
32
+ if (is.null(prefix)) { prefix = immdata$prefix }
33
+ if (is.null(prefix)) { prefix = "" }
29
34
 
30
35
  get_cdr3aa_df = function() {
31
- out = NULL
32
- for (sample in names(immdata$data)) {
33
- tmpdf = immdata$data[[sample]] %>%
34
- select(Barcode, CDR3.aa) %>%
35
- separate_rows(Barcode, sep = ";") %>%
36
- mutate(Barcode = paste0(sample, "_", Barcode))
37
- out = bind_rows(out, tmpdf)
38
- }
39
- out
36
+ expand_immdata(immdata, cell_id = "Barcode") %>%
37
+ mutate(Barcode = glue(paste0(prefix, "{Barcode}"))) %>%
38
+ select(Barcode, CDR3.aa)
40
39
  }
41
40
  cdr3aa_df = get_cdr3aa_df()
42
41
 
@@ -1,8 +1,10 @@
1
1
  source("{{biopipen_dir}}/utils/misc.R")
2
+ source("{{biopipen_dir}}/utils/single_cell.R")
2
3
 
3
4
  library(glue)
4
5
  library(dplyr)
5
6
  library(tidyr)
7
+ library(tibble)
6
8
  library(immunarch)
7
9
  library(Seurat)
8
10
  library(ggplot2)
@@ -13,6 +15,7 @@ exprfile <- {{in.srtobj | r}}
13
15
  outfile <- {{out.outfile | r}}
14
16
  joboutdir <- {{job.outdir | r}}
15
17
  python <- {{envs.python | r}}
18
+ prefix <- {{envs.prefix | r}}
16
19
  within_sample <- {{envs.within_sample | r}}
17
20
  assay <- {{envs.assay | r}}
18
21
  predefined_b <- {{envs.predefined_b | r}}
@@ -29,34 +32,21 @@ if (!dir.exists(tessa_dir)) dir.create(tessa_dir)
29
32
  ### Start preparing input files for TESSA
30
33
  # Prepare input files
31
34
  log_info("Preparing TCR input file ...")
32
- immdata <- readRDS(immfile)
33
-
34
- has_VJ <- "V.name" %in% colnames(immdata$data[[1]]) && "J.name" %in% colnames(immdata$data[[1]])
35
- # Merge all samples
36
- tcrdata <- do_call(rbind, lapply(seq_len(nrow(immdata$meta)), function(i) {
37
- # Clones Proportion CDR3.aa Barcode
38
- # 5 4 0.008583691 CAVRDTGNTPLVF;CASSEYSNQPQHF GTTCGGGCACTTACGA-1;TCTCTAAGTACCAGTT-1
39
- # 6 4 0.008583691 CALTQAAGNKLTF;CASRPEDLRGQPQHF GCTTGAAGTCGGCACT-1;TACTCGCTCCTAAGTG-1
40
- if (has_VJ) {
41
- cldata = immdata$data[[i]][, c("Barcode", "CDR3.aa", "V.name", "J.name")]
42
- } else {
43
- cldata = immdata$data[[i]][, c("Barcode", "CDR3.aa")]
44
- }
45
- # # A tibble: 4 × 5
46
- # Sample Patient Timepoint Tissue
47
- # <chr> <chr> <chr> <chr>
48
- # 1 MC1685Pt011-Baseline-PB MC1685Pt011 Baseline PB
49
- mdata = as.list(immdata$meta[i, , drop=FALSE])
50
- for (mname in names(mdata)) {
51
- assign(mname, mdata[[mname]])
52
- }
35
+ # If immfile endswith .rds, then it is an immunarch object
36
+ if (endsWith(tolower(immfile), ".rds")) {
37
+ immdata <- readRDS(immfile)
38
+ if (is.null(prefix)) { prefix = immdata$prefix }
39
+ if (is.null(prefix)) { prefix = "" }
40
+ tcrdata <- expand_immdata(immdata) %>%
41
+ mutate(Barcode = glue(paste0(prefix, "{Barcode}")))
42
+ rm(immdata)
43
+ } else {
44
+ tcrdata <- read.table(immfile, sep="\t", header=TRUE, row.names=1) %>%
45
+ rownames_to_column("Barcode")
46
+ }
47
+
48
+ has_VJ <- "V.name" %in% colnames(tcrdata) && "J.name" %in% colnames(tcrdata)
53
49
 
54
- cldata %>%
55
- separate_rows(Barcode, sep=";") %>%
56
- # Just in case there are duplicated barcodes
57
- distinct(Barcode, .keep_all = TRUE) %>%
58
- mutate(Barcode = glue("{{envs.prefix}}{Barcode}"), sample = Sample)
59
- }))
60
50
  if (has_VJ) {
61
51
  tcrdata <- tcrdata %>% dplyr::mutate(
62
52
  v_gene = sub("-\\d+$", "", V.name),
@@ -66,13 +56,13 @@ if (has_VJ) {
66
56
  cdr3 = CDR3.aa,
67
57
  v_gene,
68
58
  j_gene,
69
- sample
59
+ sample = Sample
70
60
  )
71
61
  } else {
72
62
  tcrdata <- tcrdata %>% dplyr::select(
73
63
  contig_id = Barcode,
74
64
  cdr3 = CDR3.aa,
75
- sample
65
+ sample = Sample
76
66
  )
77
67
  }
78
68
 
@@ -101,7 +91,10 @@ if (length(unused_expr_cells) > 0) {
101
91
  log_warn(glue("{length(unused_expr_cells)}/{ncol(expr)} expression cells are not used."))
102
92
  }
103
93
  if (length(cell_ids) == 0) {
104
- stop("No common cells between TCR and expression data. Are you using the correct prefix?")
94
+ stop(paste0(
95
+ "No common cells between TCR and expression data. ",
96
+ "Are you using the correct `envs.prefix` here or in `ImmunarchLoading`?"
97
+ ))
105
98
  }
106
99
  tcrdata <- tcrdata[tcrdata$contig_id %in% cell_ids, , drop=FALSE]
107
100
  expr <- as.matrix(expr)[, tcrdata$contig_id, drop=FALSE]
@@ -46,11 +46,14 @@ Those functions take following arguments:
46
46
  * `group-by`: The column name in metadata to group the cells.
47
47
  * `idents`: The first group or both groups of cells to compare (value in `group-by` column). If only the first group is given, the rest of the cells (with non-NA in `group-by` column) will be used as the second group.
48
48
  * `subset`: An expression to subset the cells, will be passed to `dplyr::filter()`. Default is `TRUE` (no filtering).
49
+ * `each`: A column name (without quotes) in metadata to split the cells.
50
+ Each comparison will be done for each value in this column.
49
51
  * `id`: The column name in metadata for the group ids (i.e. `CDR3.aa`).
50
52
  * `compare`: Either a (numeric) column name (i.e. `Clones`) in metadata to compare between groups, or `.n` to compare the number of cells in each group.
51
53
  If numeric column is given, the values should be the same for all cells in the same group.
52
54
  This will not be checked (only the first value is used).
53
55
  * `uniq`: Whether to return unique ids or not. Default is `TRUE`. If `FALSE`, you can mutate the meta data frame with the returned ids. For example, `df |> mutate(expanded = expanded(...))`.
56
+ * `debug`: Return the data frame with intermediate columns instead of the ids. Default is `FALSE`.
54
57
  * `order`: The order of the returned ids. It could be `sum` or `diff`, which is the sum or diff of the `compare` between idents.
55
58
  Two kinds of modifiers can be added, including `desc` and `abs`.
56
59
  For example, `sum,desc` means the sum of `compare` between idents in descending order.