biopipen 0.34.1__py3-none-any.whl → 0.34.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (32) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/ns/scrna.py +259 -34
  3. biopipen/ns/scrna_metabolic_landscape.py +1 -1
  4. biopipen/ns/tcr.py +9 -4
  5. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +12 -3
  6. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +12 -3
  7. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +12 -3
  8. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +3 -10
  9. biopipen/scripts/scrna/MarkersFinder.R +34 -28
  10. biopipen/scripts/scrna/PseudoBulkDEG.R +592 -0
  11. biopipen/scripts/scrna/ScFGSEA.R +35 -35
  12. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +16 -0
  13. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +1 -1
  14. biopipen/scripts/scrna/SeuratClusterStats-features.R +29 -6
  15. biopipen/scripts/scrna/SeuratClusterStats-stats.R +29 -1
  16. biopipen/scripts/scrna/SeuratClusterStats.R +1 -0
  17. biopipen/scripts/scrna/TopExpressingGenes.R +6 -6
  18. biopipen/scripts/scrna/celltypist-wrapper.py +2 -0
  19. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +9 -3
  20. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +2 -2
  21. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +1 -0
  22. biopipen/scripts/tcr/GIANA/GIANA4.py +2 -4
  23. biopipen/scripts/tcr/ScRepCombiningExpression.R +3 -2
  24. biopipen/scripts/tcr/ScRepLoading.R +7 -2
  25. biopipen/scripts/tcr/TCRClustering.R +9 -23
  26. biopipen/scripts/tcr/TESSA.R +4 -2
  27. {biopipen-0.34.1.dist-info → biopipen-0.34.3.dist-info}/METADATA +1 -1
  28. {biopipen-0.34.1.dist-info → biopipen-0.34.3.dist-info}/RECORD +30 -31
  29. biopipen/reports/scrna/TopExpressingGenes.svelte +0 -17
  30. biopipen/scripts/scrna/SCP-plot.R +0 -15202
  31. {biopipen-0.34.1.dist-info → biopipen-0.34.3.dist-info}/WHEEL +0 -0
  32. {biopipen-0.34.1.dist-info → biopipen-0.34.3.dist-info}/entry_points.txt +0 -0
@@ -7,9 +7,9 @@ srtfile <- {{in.srtobj | r}} # nolint
7
7
  outdir <- {{out.outdir | r}} # nolint
8
8
  joboutdir <- {{job.outdir | r}} # nolint
9
9
  mutaters <- {{envs.mutaters | r}} # nolint
10
- group.by <- {{envs["group-by"] | r}} # nolint
11
- ident.1 <- {{envs["ident-1"] | r}} # nolint
12
- ident.2 <- {{envs["ident-2"] | r}} # nolint
10
+ group_by <- {{envs.group_by | default: envs["group-by"] | default: None | r}} # nolint
11
+ ident_1 <- {{envs.ident_1 | default: envs["ident-1"] | default: None | r}} # nolint
12
+ ident_2 <- {{envs.ident_2 | default: envs["ident-2"] | default: None | r}} # nolint
13
13
  each <- {{envs.each | r}} # nolint
14
14
  subset <- {{envs.subset | r}} # nolint
15
15
  gmtfile <- {{envs.gmtfile | r}} # nolint
@@ -18,8 +18,8 @@ top <- {{envs.top | r}} # nolint
18
18
  minsize <- {{envs.minSize | default: envs.minsize | r}} # nolint
19
19
  maxsize <- {{envs.maxSize | default: envs.maxsize | r}} # nolint
20
20
  eps <- {{envs.eps | r}} # nolint
21
- allpathway_plots_defaults <- {{envs.allpathway_plots_defaults | r}} # nolint
22
- allpathway_plots <- {{envs.allpathway_plots | r}} #
21
+ alleach_plots_defaults <- {{envs.alleach_plots_defaults | r}} # nolint
22
+ alleach_plots <- {{envs.alleach_plots | r}} #
23
23
  ncores <- {{envs.ncores | r}} # nolint
24
24
  rest <- {{envs.rest | r: todot="-"}} # nolint
25
25
  cases <- {{envs.cases | r: todot="-"}} # nolint
@@ -27,8 +27,8 @@ cases <- {{envs.cases | r: todot="-"}} # nolint
27
27
  log <- get_logger()
28
28
  reporter <- get_reporter()
29
29
 
30
- allpathway_plots <- lapply(allpathway_plots, function(x) {
31
- list_update(allpathway_plots_defaults, x)
30
+ alleach_plots <- lapply(alleach_plots, function(x) {
31
+ list_update(alleach_plots_defaults, x)
32
32
  })
33
33
 
34
34
  log$info("Reading Seurat object ...")
@@ -43,9 +43,9 @@ if (!is.null(mutaters) && length(mutaters) > 0) {
43
43
  }
44
44
 
45
45
  defaults <- list(
46
- group.by = group.by,
47
- ident.1 = ident.1,
48
- ident.2 = ident.2,
46
+ group_by = group_by,
47
+ ident_1 = ident_1,
48
+ ident_2 = ident_2,
49
49
  each = each,
50
50
  subset = subset,
51
51
  gmtfile = gmtfile,
@@ -54,8 +54,8 @@ defaults <- list(
54
54
  minsize = minsize,
55
55
  maxsize = maxsize,
56
56
  eps = eps,
57
- allpathway_plots_defaults = allpathway_plots_defaults,
58
- allpathway_plots = allpathway_plots,
57
+ alleach_plots_defaults = alleach_plots_defaults,
58
+ alleach_plots = alleach_plots,
59
59
  ncores = ncores,
60
60
  rest = rest
61
61
  )
@@ -63,11 +63,11 @@ defaults <- list(
63
63
  expand_each <- function(name, case) {
64
64
  outcases <- list()
65
65
 
66
- case$group.by <- case$group.by %||% "Identity"
66
+ case$group_by <- case$group_by %||% "Identity"
67
67
 
68
68
  if (is.null(case$each) || is.na(case$each) || nchar(case$each) == 0 || isFALSE(each)) {
69
- if (length(case$allpathway_plots) > 0) {
70
- stop("Cannot perform `allpathway_plots` without `each` defined.")
69
+ if (length(case$alleach_plots) > 0) {
70
+ stop("Cannot perform `alleach_plots` without `each` defined.")
71
71
  }
72
72
 
73
73
  outcases[[name]] <- case
@@ -93,8 +93,8 @@ expand_each <- function(name, case) {
93
93
  newcase$each_name <- case$each
94
94
  newcase$each <- each
95
95
 
96
- newcase$allpathway_plots_defaults <- NULL
97
- newcase$allpathway_plots <- NULL
96
+ newcase$alleach_plots_defaults <- NULL
97
+ newcase$alleach_plots <- NULL
98
98
 
99
99
  if (!is.null(case$subset)) {
100
100
  newcase$subset <- paste0(case$subset, " & ", bQuote(case$each), " == '", each, "'")
@@ -105,13 +105,13 @@ expand_each <- function(name, case) {
105
105
  outcases[[newname]] <- newcase
106
106
  }
107
107
 
108
- if (length(case$allpathway_plots) > 0) {
108
+ if (length(case$alleach_plots) > 0) {
109
109
  newcase <- case
110
110
 
111
111
  newcase$gseas <- list()
112
- newcase$allpathway_plots <- lapply(
113
- newcase$allpathway_plots,
114
- function(x) { list_update(newcase$allpathway_plots_defaults, x) }
112
+ newcase$alleach_plots <- lapply(
113
+ newcase$alleach_plots,
114
+ function(x) { list_update(newcase$alleach_plots_defaults, x) }
115
115
  )
116
116
 
117
117
  outcases[[paste0(name, " (all ", case$each,")")]] <- newcase
@@ -154,8 +154,8 @@ do_case <- function(name) {
154
154
  }))
155
155
  gseas[[case$each]] <- factor(gseas[[case$each]], levels = each_levels)
156
156
 
157
- for (plotname in names(case$allpathway_plots)) {
158
- plotargs <- case$allpathway_plots[[plotname]]
157
+ for (plotname in names(case$alleach_plots)) {
158
+ plotargs <- case$alleach_plots[[plotname]]
159
159
  plotargs <- extract_vars(plotargs, "devpars")
160
160
  plotargs$gsea_results <- gseas
161
161
  plotargs$group_by <- case$each
@@ -182,12 +182,12 @@ do_case <- function(name) {
182
182
  allow_empty = !is.null(case$each)
183
183
  # prepare expression matrix
184
184
  log$info(" Preparing expression matrix...")
185
- sobj <- ensure_sobj({ srtobj %>% filter(!is.na(!!sym(case$group.by))) }, allow_empty)
185
+ sobj <- ensure_sobj({ srtobj %>% filter(!is.na(!!sym(case$group_by))) }, allow_empty)
186
186
  if (is.null(sobj)) {
187
187
  reporter$add2(
188
188
  list(
189
189
  kind = "error",
190
- content = paste0("No cells with non-NA `", case$group.by, "` in the Seurat object.")
190
+ content = paste0("No cells with non-NA `", case$group_by, "` in the Seurat object.")
191
191
  ),
192
192
  hs = c(info$section, info$name)
193
193
  )
@@ -200,20 +200,20 @@ do_case <- function(name) {
200
200
  reporter$add2(
201
201
  list(
202
202
  kind = "error",
203
- content = paste0("No cells with non-NA `", case$group.by, "` in the Seurat object.")
203
+ content = paste0("No cells with non-NA `", case$group_by, "` in the Seurat object.")
204
204
  ),
205
205
  hs = c(info$section, info$name)
206
206
  )
207
207
  return(NULL)
208
208
  }
209
209
  }
210
- if (!is.null(case$ident.2)) {
211
- sobj <- ensure_sobj({ sobj %>% filter(!!sym(case$group.by) %in% c(case$ident.1, case$ident.2)) }, allow_empty)
210
+ if (!is.null(case$ident_2)) {
211
+ sobj <- ensure_sobj({ sobj %>% filter(!!sym(case$group_by) %in% c(case$ident_1, case$ident_2)) }, allow_empty)
212
212
  if (is.null(sobj)) {
213
213
  reporter$add2(
214
214
  list(
215
215
  kind = "error",
216
- content = paste0("No cells with non-NA `", case$group.by, "` in the Seurat object.")
216
+ content = paste0("No cells with non-NA `", case$group_by, "` in the Seurat object.")
217
217
  ),
218
218
  hs = c(info$section, info$name)
219
219
  )
@@ -221,16 +221,16 @@ do_case <- function(name) {
221
221
  }
222
222
  }
223
223
 
224
- allclasses <- sobj@meta.data[, case$group.by, drop = TRUE]
225
- if (is.null(case$ident.2)) {
226
- case$ident.2 <- "Other"
227
- allclasses[allclasses != case$ident.1] <- "Other"
224
+ allclasses <- sobj@meta.data[, case$group_by, drop = TRUE]
225
+ if (is.null(case$ident_2)) {
226
+ case$ident_2 <- "Other"
227
+ allclasses[allclasses != case$ident_1] <- "Other"
228
228
  }
229
229
  exprs <- GetAssayData(sobj, layer = "data")
230
230
 
231
231
  # get preranks
232
232
  log$info(" Getting preranks...")
233
- ranks <- RunGSEAPreRank(exprs, allclasses, case$ident.1, case$ident.2, case$method)
233
+ ranks <- RunGSEAPreRank(exprs, allclasses, case$ident_1, case$ident_2, case$method)
234
234
  write.table(
235
235
  as.data.frame(ranks),
236
236
  file.path(info$prefix, "fgsea.rank.txt"),
@@ -310,7 +310,7 @@ do_case <- function(name) {
310
310
 
311
311
  reporter$add2(
312
312
  list(
313
- name = paste0("Table (", case$ident.1, " vs ", case$ident.2, ")"),
313
+ name = paste0("Table (", case$ident_1, " vs ", case$ident_2, ")"),
314
314
  contents = list(
315
315
  list(kind = "descr", content = paste0(
316
316
  "Showing top 50 pathways by padj in descending order. ",
@@ -26,6 +26,22 @@ if (
26
26
  if (length(clustrees) == 0) {
27
27
  log$warn("- no case found, skipping ...")
28
28
  } else {
29
+ reporter$add(
30
+ list(
31
+ kind = "descr",
32
+ content = 'The clustree plots displays clustering results from the Seurat object across different
33
+ resolutions of the clustering algorithm
34
+ (<a target="_blank" href="https://satijalab.org/seurat/reference/findclusters">Seurat::FindClusters</a>).
35
+ Each node represents a cluster, with the resolution levels labeled along the vertical (y) axis.
36
+ The size of each node reflects the number of cells in that cluster. Edges connect clusters between
37
+ adjacent resolutions and indicate how cells transition between clusters as resolution increases.
38
+ The thickness of the edges corresponds to the proportion of shared cells (in_prop) between clusters,
39
+ where darker lines signify a higher overlap (up to 100%). The color of the edges indicates the actual
40
+ number of cells that transitioned between clusters.'
41
+ ),
42
+ h1 = "Clustree plots"
43
+ )
44
+
29
45
  reports <- list()
30
46
  for (name in names(clustrees)) {
31
47
  if (is.null(clustrees[[name]]$prefix)) {
@@ -40,7 +40,7 @@ do_one_dimplot = function(name) {
40
40
  reporter$add(
41
41
  list(
42
42
  kind = "descr",
43
- content = paste0("Dimensionality reduction plot for ", case$group.by)
43
+ content = paste0("Dimensionality reduction plot for ", case$group_by)
44
44
  ),
45
45
  reporter$image(prefix, "pdf", FALSE),
46
46
  h1 = name
@@ -64,11 +64,11 @@ do_one_features <- function(name) {
64
64
  log$info("- Case: {name}")
65
65
 
66
66
  case <- list_update(features_defaults, features[[name]])
67
- case$descr <- case$descr %||% ""
68
67
  case <- extract_vars(
69
68
  case,
70
69
  "devpars", "more_formats", "save_code", "save_data", "order_by",
71
- "subset", "features", "descr")
70
+ "subset", "features", "descr",
71
+ allow_nonexisting = TRUE)
72
72
 
73
73
  if (!is.null(subset)) {
74
74
  case$object <- srtobj %>% filter(!!parse_expr(subset))
@@ -77,6 +77,7 @@ do_one_features <- function(name) {
77
77
  }
78
78
 
79
79
  if (exists("order_by") && !is.null(order_by)) {
80
+ case$ident <- case$ident %||% GetIdentityColumn(case$object)
80
81
  if (length(order_by) < 2) {
81
82
  clusters <- case$object@meta.data %>%
82
83
  group_by(!!sym(case$ident)) %>%
@@ -126,12 +127,34 @@ do_one_features <- function(name) {
126
127
  caching$save(info$prefix)
127
128
  }
128
129
  # add reports
129
- if (!is.null(descr) && nchar(descr) > 0) {
130
- reporter$add2(
131
- list(kind = "descr", content = descr),
132
- hs = c(info$section, info$name)
130
+ default_descr <- glue(
131
+ "The plot shows the distribution or pattern of the specified features ({paste(case$features %||% features, collapse = ', ')}) ",
132
+ "across cells",
133
+ "{if (!is.null(case$ident)) glue(', identified by \"{case$ident}\"') else ''}",
134
+ "{if (!is.null(case$group_by)) glue(', grouped by \"{case$group_by}\"') else ''}",
135
+ "{if (!is.null(case$split_by)) glue(', and split by \"{case$split_by}\"') else ''}. ",
136
+ "The plot type is '{case$plot_type}', ",
137
+ "{if (case$plot_type == 'dim') 'displaying the features on a dimensional reduction embedding' ",
138
+ " else if (case$plot_type == 'heatmap') 'arranged as a heatmap by rows_name and other grouping variables' ",
139
+ " else if (case$plot_type %in% c('violin', 'box', 'ridge')) 'showing the distribution of feature values by the grouping variables' ",
140
+ " else if (case$plot_type == 'cor') 'showing the correlation between features' ",
141
+ " else 'showing aggregated feature values by the grouping variables'}. ",
142
+ "{if (!is.null(case$facet_by)) glue('Plots are further faceted by \"{case$facet_by}\". ') else ''}",
143
+ "{if (case$plot_type == 'dim') glue('The reduction used is \"{if (!is.null(case$reduction)) case$reduction else DefaultDimReduc(case$object)}\"') else ''}",
144
+ "{if (case$plot_type == 'dim' && !is.null(case$graph)) glue(', with graph \"{case$graph}\" drawn to show cell neighbor edges') else ''}",
145
+ "{if (case$plot_type == 'dim' && !is.null(case$bg_cutoff) && case$bg_cutoff > 0) glue(', and a background cutoff of {case$bg_cutoff}') else ''}",
146
+ "{if (case$plot_type == 'dim') glue(', using dimensions {paste(case$dims %||% 1:2, collapse = \",\")}') else ''}"
147
+ )
148
+ if (!is.null(case$comparisons)) {
149
+ default_descr <- paste0(
150
+ default_descr,
151
+ "Statistical comparisons were performed between groups using '{case$pairwise_method %||% 'wilcox.test'}' method."
133
152
  )
134
153
  }
154
+ reporter$add2(
155
+ list(kind = "descr", content = descr %||% default_descr),
156
+ hs = c(info$section, info$name)
157
+ )
135
158
 
136
159
  if (save_data) {
137
160
  reporter$add2(
@@ -5,17 +5,26 @@ log$info("stats:")
5
5
  odir <- file.path(outdir, "stats")
6
6
  dir.create(odir, recursive=TRUE, showWarnings=FALSE)
7
7
 
8
+
9
+
8
10
  do_one_stats <- function(name) {
9
11
  log$info("- Case: {name}")
10
12
 
11
13
  case <- list_update(stats_defaults, stats[[name]])
12
- extract_vars(case, "devpars", "more_formats", "save_code", "save_data", "subset")
14
+ case <- extract_vars(case, "devpars", "more_formats", "save_code", "save_data", "subset", "descr")
13
15
 
14
16
  if (!is.null(subset)) {
15
17
  case$object <- srtobj %>% filter(!!parse_expr(subset))
16
18
  } else {
17
19
  case$object <- srtobj
18
20
  }
21
+ ident <- case$ident %||% GetIdentityColumn(case$object)
22
+ groupings <- unique(c(case$group_by, case$rows_by, case$columns_by, case$pie_group_by, ident))
23
+ if (length(groupings) > 0) {
24
+ for (g in groupings) {
25
+ case$object <- filter(case$object, !is.na(!!sym(g)))
26
+ }
27
+ }
19
28
 
20
29
  info <- case_info(name, odir, is_dir = FALSE, create = TRUE)
21
30
  p <- do_call(gglogger::register(CellStatPlot), case)
@@ -27,6 +36,20 @@ do_one_stats <- function(name) {
27
36
  auto_data_setup = FALSE)
28
37
  }
29
38
 
39
+ frac <- case$frac %||% "none"
40
+ default_descr <- glue(
41
+ "The {case$plot_type} plot shows the distribution of cells across categories defined by '{ident}'",
42
+ "{if (!is.null(case$group_by)) glue(', grouped by {case$group_by}') else ''}",
43
+ "{if (!is.null(case$split_by)) glue(', and split by {case$split_by}') else ''}. ",
44
+ "The values represent ",
45
+ "{if (frac == 'none') 'the number of cells' else glue('the fraction of cells calculated by \"{frac}\"')}. "
46
+ )
47
+ if (!is.null(case$comparisons)) {
48
+ default_descr <- paste0(
49
+ default_descr,
50
+ "Statistical comparisons were performed between groups using '{case$pairwise_method %||% 'wilcox.test'}' method."
51
+ )
52
+ }
30
53
  if (save_data) {
31
54
  pdata <- attr(p, "data") %||% p$data
32
55
  if (!inherits(pdata, "data.frame") && !inherits(pdata, "matrix")) {
@@ -37,6 +60,10 @@ do_one_stats <- function(name) {
37
60
  list(
38
61
  name = "Plot",
39
62
  contents = list(
63
+ list(
64
+ kind = "descr",
65
+ content = case$descr %||% default_descr
66
+ ),
40
67
  reporter$image(
41
68
  info$prefix, more_formats, save_code, kind = "image")
42
69
  )
@@ -60,6 +87,7 @@ do_one_stats <- function(name) {
60
87
  )
61
88
  } else {
62
89
  reporter$add2(
90
+ list(kind = "descr", content = case$descr %||% default_descr),
63
91
  reporter$image(info$prefix, more_formats, save_code, kind = "image"),
64
92
  hs = c(info$section, info$name)
65
93
  )
@@ -3,6 +3,7 @@ library(rlang)
3
3
  library(dplyr)
4
4
  library(tidyr)
5
5
  library(tibble)
6
+ library(glue)
6
7
  library(forcats)
7
8
  library(tidyseurat)
8
9
  library(gglogger)
@@ -9,7 +9,7 @@ outdir <- {{out.outdir | r}}
9
9
  joboutdir <- {{job.outdir | r}}
10
10
  mutaters <- {{ envs.mutaters | r }}
11
11
  ident <- {{ envs.ident | r }}
12
- group.by <- {{ envs["group-by"] | r }} # nolint
12
+ group_by <- {{ envs.group_by | default: envs["group-by"] | default: None | r }} # nolint
13
13
  each <- {{ envs.each | r }}
14
14
  dbs <- {{ envs.dbs | r }}
15
15
  n <- {{ envs.n | r }}
@@ -41,7 +41,7 @@ enrich_plots <- lapply(enrich_plots, function(x) {
41
41
  })
42
42
  defaults <- list(
43
43
  ident = ident,
44
- group.by = group.by,
44
+ group_by = group_by,
45
45
  each = each,
46
46
  dbs = dbs,
47
47
  n = n,
@@ -171,17 +171,17 @@ run_case <- function(name) {
171
171
  } else {
172
172
  subobj <- srtobj
173
173
  }
174
- case$group.by <- case$group.by %||% "Identity"
174
+ case$group_by <- case$group_by %||% "Identity"
175
175
  if (is.null(case$ident)) {
176
- case$ident <- as.character(unique(subobj@meta.data[[case$group.by]]))
176
+ case$ident <- as.character(unique(subobj@meta.data[[case$group_by]]))
177
177
  }
178
178
  avgexpr <- AverageExpression(
179
179
  subobj,
180
- group.by = case$group.by,
180
+ group_by = case$group_by,
181
181
  assays = assay
182
182
  )[[assay]]
183
183
  # https://github.com/satijalab/seurat/issues/7893
184
- colnames(avgexpr) <- as.character(unique(subobj@meta.data[[case$group.by]]))
184
+ colnames(avgexpr) <- as.character(unique(subobj@meta.data[[case$group_by]]))
185
185
  avgexpr <- avgexpr[, case$ident, drop = FALSE]
186
186
 
187
187
  for (idt in case$ident) {
@@ -29,6 +29,8 @@ if __name__ == "__main__":
29
29
  raise ValueError(
30
30
  f"Over clustering column '{over_clustering}' not found in AnnData object."
31
31
  )
32
+ if 'neighbors' in adata.uns and 'params' in adata.uns['neighbors']:
33
+ adata.uns['neighbors']['params'].setdefault('n_neighbors', 15)
32
34
 
33
35
  annotated = celltypist.annotate(
34
36
  adata,
@@ -98,7 +98,13 @@ do_comparison <- function(object, caseinfo, subset_by, subset_val, group_by, gro
98
98
  }
99
99
 
100
100
  classes <- as.character(object@meta.data[[group_by]])
101
- classes[classes != group1] <- "_REST"
101
+ if (!group1 %in% classes) {
102
+ stop("Group '", group1, "' not found in '", group_by, "' column of the Seurat object.")
103
+ }
104
+ if (!is.null(group2) && !group2 %in% classes) {
105
+ stop("Group '", group2, "' not found in '", group_by, "' column of the Seurat object.")
106
+ }
107
+ classes[classes != group1] <- "Other"
102
108
  if (any(table(classes) < 5)) {
103
109
  msg <- paste0(
104
110
  " ! skipped. Group has less than 5 cells: ",
@@ -266,8 +272,8 @@ do_subset <- function(object, caseinfo, subset_by, subset_val, group_by, compari
266
272
  rbind, lapply(
267
273
  as.character(comparisons),
268
274
  function(comparison) {
269
- if (grepl(",", comparison)) {
270
- group1 <- trimws(unlist(strsplit(comparison, ",")))
275
+ if (grepl(":", comparison)) {
276
+ group1 <- trimws(unlist(strsplit(comparison, ":")))
271
277
  group2 <- group1[2]
272
278
  group1 <- group1[1]
273
279
  } else {
@@ -315,8 +315,8 @@ do_subset <- function(
315
315
  plotargs$keep_empty <- TRUE
316
316
 
317
317
  p <- do_call(plotfn, plotargs)
318
- devpars$width <- devpars$width %||% (attr(p, "width") * devpars$res) %||% 1000
319
- devpars$height <- devpars$height %||% (attr(p, "height") * devpars$res) %||% 1000
318
+ devpars$width <- devpars$width %||% (attr(p, "width") * 2 * devpars$res) %||% 1000
319
+ devpars$height <- devpars$height %||% (attr(p, "height") * 2 * devpars$res) %||% 1000
320
320
  } else { # heatmap
321
321
  minval <- min(dat)
322
322
  maxval <- max(dat)
@@ -195,6 +195,7 @@ do_subset <- function(object, caseinfo, subset_by, subset_val, group_by, plots,
195
195
  plotprefix <- file.path(odir, slugify(plot))
196
196
  plotargs$devpars$width <- plotargs$devpars$width %||% (attr(p, "width") * plotargs$devpars$res) %||% 800
197
197
  plotargs$devpars$height <- plotargs$devpars$height %||% (attr(p, "height") * plotargs$devpars$res) %||% 600
198
+ plotargs$devpars$height <- max(plotargs$devpars$height, plotargs$devpars$width / 1.5)
198
199
  png(
199
200
  filename = paste0(plotprefix, ".png"),
200
201
  width = plotargs$devpars$width,
@@ -36,9 +36,6 @@ from sklearn.manifold import MDS
36
36
  import faiss
37
37
  from query import *
38
38
  try:
39
- from Bio.SubsMat.MatrixInfo import blosum62
40
- print(blosum62)
41
- except ModuleNotFoundError:
42
39
  from Bio.Align import substitution_matrices
43
40
  blosum62 = substitution_matrices.load("BLOSUM62")
44
41
  _tmp = {}
@@ -46,7 +43,8 @@ except ModuleNotFoundError:
46
43
  for ab2 in blosum62.alphabet:
47
44
  _tmp[(ab1, ab2)] = int(blosum62[(ab1, ab2)])
48
45
  blosum62 = _tmp
49
- print(blosum62)
46
+ except ModuleNotFoundError:
47
+ from Bio.SubsMat.MatrixInfo import blosum62
50
48
 
51
49
  AAstring = "ACDEFGHIKLMNPQRSTVWY"
52
50
  AAstringList = list(AAstring)
@@ -7,7 +7,7 @@ srtobjfile <- {{in.srtobj | r}}
7
7
  outfile <- {{out.outfile | r}}
8
8
  cloneCall <- {{envs.cloneCall | r}}
9
9
  chain <- {{envs.chain | r}}
10
- group.by <- {{envs["group-by"] | r}}
10
+ group_by <- {{envs.group_by | default: envs["group-by"] | default: None | r}}
11
11
  proportion <- {{envs.proportion | r}}
12
12
  filterNA <- {{envs.filterNA | r}}
13
13
  cloneSize <- {{envs.cloneSize | r}}
@@ -28,12 +28,13 @@ obj <- combineExpression(
28
28
  sc.data = srtobj,
29
29
  cloneCall = cloneCall,
30
30
  chain = chain,
31
- group.by = group.by,
31
+ group.by = group_by,
32
32
  proportion = proportion,
33
33
  filterNA = filterNA,
34
34
  cloneSize = unlist(cloneSize),
35
35
  addLabel = addLabel
36
36
  )
37
+ obj$TCR_Presence <- !is.na(obj$CTaa)
37
38
 
38
39
  log$info("Saving combined object ...")
39
40
  save_obj(obj, outfile)
@@ -118,8 +118,13 @@ load_contig <- function(input, sample, fmt) {
118
118
  fmt <- dirfmt[[2]]
119
119
  if (is.null(dir)) { return(NULL) }
120
120
  x <- loadContigs(dir, format = fmt %||% "10X")
121
- x[[1]]$sample <- NULL
122
- x[[1]]
121
+ x <- x[[1]]
122
+ x$sample <- NULL
123
+ if (identical(fmt %||% "10X", "10X") && colnames(x)[1] == "X") {
124
+ x$X <- NULL
125
+ }
126
+
127
+ x
123
128
  }
124
129
 
125
130
 
@@ -130,11 +130,10 @@ output.clusters_df.to_csv(clustcr_dir + "/clusters.txt", sep="\t", index=False)
130
130
  clustcr_file
131
131
  }
132
132
 
133
- clean_clustcr_output = function(clustcr_outfile, clustcr_input) {
133
+ clean_clustcr_output = function(clustcr_outfile) {
134
134
  clustcr_out = read.delim2(clustcr_outfile, header=TRUE, row.names = NULL)
135
135
  colnames(clustcr_out) = c("CDR3.aa", "TCR_Cluster")
136
- in_cdr3 = read.delim2(clustcr_input, header=TRUE, row.names = NULL)
137
- out = left_join(in_cdr3, distinct(clustcr_out), by=c("CDR3.aa")) %>%
136
+ out = left_join(cdr3aa_df, distinct(clustcr_out), by=c(cdr3seq4clustering = "CDR3.aa")) %>%
138
137
  mutate(
139
138
  TCR_Cluster = if_else(
140
139
  is.na(TCR_Cluster),
@@ -170,7 +169,7 @@ run_clustcr = function() {
170
169
  quit(status=rc)
171
170
  }
172
171
  clustcr_outfile = file.path(clustcr_dir, "clusters.txt")
173
- clean_clustcr_output(clustcr_outfile, clustcr_input)
172
+ clean_clustcr_output(clustcr_outfile)
174
173
  }
175
174
 
176
175
  prepare_giana = function() {
@@ -193,21 +192,8 @@ prepare_giana = function() {
193
192
  }
194
193
 
195
194
  prepare_input = function() {
196
- # prepare input file for GIANA
197
- cdr3 = c()
198
- # cdr3col = if (!on_multi) "cdr3" else "CDR3.aa"
199
- cdr3col = "CDR3.aa"
200
- for (sample in names(seqdata)) {
201
- sdata = seqdata[[sample]]
202
- if (on_multi) {
203
- sdata[[cdr3col]] = sub(";", "", sdata[[cdr3col]])
204
- } else if ("chain" %in% colnames(sdata)) {
205
- sdata = sdata %>% separate_rows(chain, cdr3col, sep = ";") %>%
206
- filter(chain == "TRB")
207
- }
208
- cdr3 = union(cdr3, unique(sdata[[cdr3col]]))
209
- }
210
- cdr3 = unique(cdr3)
195
+ cdr3aa_df$cdr3seq4clustering <<- gsub("[^A-Z]", "", cdr3aa_df$CDR3.aa) # Remove non-amino acid characters
196
+ cdr3 <- unique(cdr3aa_df$cdr3seq4clustering)
211
197
 
212
198
  # cdr3 = distinct(cdr3, aminoAcid, vMaxResolved)
213
199
 
@@ -220,15 +206,14 @@ prepare_input = function() {
220
206
  cdr3file
221
207
  }
222
208
 
223
- clean_giana_output = function(giana_outfile, giana_infile) {
209
+ clean_giana_output = function(giana_outfile) {
224
210
  # generate an output file with columns:
225
211
  # CDR3.aa, TCR_Cluster, V.name, Sample
226
212
  # If sequence doesn't exist in the input file,
227
213
  # Then a unique cluster id is assigned to it.
228
214
  giana_out = read.delim2(giana_outfile, header=FALSE, comment.char = "#", row.names = NULL)[, 1:2, drop=FALSE]
229
215
  colnames(giana_out) = c("CDR3.aa", "TCR_Cluster")
230
- in_cdr3 = read.delim2(giana_infile, header=TRUE, row.names = NULL)
231
- out = left_join(in_cdr3, distinct(giana_out), by=c("CDR3.aa")) %>%
216
+ out = left_join(cdr3aa_df, distinct(giana_out), by=c(cdr3seq4clustering = "CDR3.aa")) %>%
232
217
  mutate(
233
218
  TCR_Cluster = if_else(
234
219
  is.na(TCR_Cluster),
@@ -283,10 +268,11 @@ run_giana = function() {
283
268
  quit(status=rc)
284
269
  }
285
270
  giana_outfile = file.path(giana_outdir, "cdr3--RotationEncodingBL62.txt")
286
- clean_giana_output(giana_outfile, giana_input)
271
+ clean_giana_output(giana_outfile)
287
272
  }
288
273
 
289
274
  attach_to_obj = function(obj, out) {
275
+ out <- as.data.frame(out)
290
276
  rownames(out) <- out$Barcode
291
277
  if (is_seurat) {
292
278
  # Attach results to Seurat object
@@ -39,9 +39,11 @@ log$info("Preparing TCR input file ...")
39
39
  # If immfile endswith .rds, then it is an immunarch object
40
40
  tcrdata <- sobj@meta.data %>%
41
41
  rownames_to_column("contig_id") %>%
42
+ select(contig_id, CTaa, CTgene, sample = Sample) %>%
42
43
  filter(!is.na(CTaa) & !is.na(CTgene)) %>%
43
- separate(CTaa, into = c(NA, "cdr3"), sep = "_", remove = FALSE) %>%
44
- separate(CTgene, into = c(NA, "vjgene"), sep = "_", remove = FALSE) %>%
44
+ separate(CTaa, into = c(NA, "cdr3"), sep = "_", remove = TRUE) %>%
45
+ filter(!is.na(cdr3) & cdr3 != "NA" & cdr3 != "nan") %>%
46
+ separate(CTgene, into = c(NA, "vjgene"), sep = "_", remove = TRUE) %>%
45
47
  separate(vjgene, into = c("v_gene", NA, "j_gene", NA), sep = "\\.", remove = TRUE) %>%
46
48
  mutate(v_gene = sub("-\\d+$", "", v_gene), j_gene = sub("-\\d+$", "", j_gene))
47
49
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: biopipen
3
- Version: 0.34.1
3
+ Version: 0.34.3
4
4
  Summary: Bioinformatics processes/pipelines that can be run from `pipen run`
5
5
  License: MIT
6
6
  Author: pwwang