biopipen 0.21.2__py3-none-any.whl → 0.22.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (62) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/filters.py +142 -0
  3. biopipen/ns/scrna.py +19 -1
  4. biopipen/ns/tcr.py +30 -10
  5. biopipen/reports/delim/SampleInfo.svelte +2 -22
  6. biopipen/reports/scrna/CellsDistribution.svelte +4 -39
  7. biopipen/reports/scrna/MarkersFinder.svelte +6 -126
  8. biopipen/reports/scrna/MetaMarkers.svelte +3 -75
  9. biopipen/reports/scrna/RadarPlots.svelte +4 -20
  10. biopipen/reports/scrna/ScFGSEA.svelte +4 -23
  11. biopipen/reports/scrna/SeuratClusterStats.svelte +3 -69
  12. biopipen/reports/scrna/SeuratPreparing.svelte +3 -26
  13. biopipen/reports/scrna/TopExpressingGenes.svelte +3 -41
  14. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +17 -16
  15. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +13 -16
  16. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +44 -52
  17. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +2 -8
  18. biopipen/reports/tcr/CDR3AAPhyschem.svelte +18 -65
  19. biopipen/reports/tcr/CloneResidency.svelte +3 -93
  20. biopipen/reports/tcr/Immunarch.svelte +4 -168
  21. biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
  22. biopipen/reports/tcr/TESSA.svelte +11 -28
  23. biopipen/scripts/delim/SampleInfo.R +41 -7
  24. biopipen/scripts/scrna/CellsDistribution.R +127 -16
  25. biopipen/scripts/scrna/MarkersFinder.R +245 -100
  26. biopipen/scripts/scrna/MetaMarkers.R +163 -82
  27. biopipen/scripts/scrna/RadarPlots.R +163 -110
  28. biopipen/scripts/scrna/ScFGSEA.R +51 -11
  29. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +26 -13
  30. biopipen/scripts/scrna/SeuratClusterStats-features.R +58 -53
  31. biopipen/scripts/scrna/SeuratClusterStats-stats.R +39 -21
  32. biopipen/scripts/scrna/SeuratClusterStats.R +4 -2
  33. biopipen/scripts/scrna/SeuratClustering.R +73 -26
  34. biopipen/scripts/scrna/SeuratMetadataMutater.R +13 -1
  35. biopipen/scripts/scrna/SeuratPreparing.R +93 -19
  36. biopipen/scripts/scrna/TopExpressingGenes.R +100 -18
  37. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +21 -8
  38. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +25 -3
  39. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +1 -0
  40. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +14 -3
  41. biopipen/scripts/tcr/Attach2Seurat.R +2 -1
  42. biopipen/scripts/tcr/CDR3AAPhyschem.R +123 -10
  43. biopipen/scripts/tcr/CloneResidency.R +114 -34
  44. biopipen/scripts/tcr/Immunarch-basic.R +18 -4
  45. biopipen/scripts/tcr/Immunarch-clonality.R +14 -0
  46. biopipen/scripts/tcr/Immunarch-diversity.R +123 -18
  47. biopipen/scripts/tcr/Immunarch-geneusage.R +23 -1
  48. biopipen/scripts/tcr/Immunarch-kmer.R +45 -3
  49. biopipen/scripts/tcr/Immunarch-overlap.R +62 -0
  50. biopipen/scripts/tcr/Immunarch-spectratyping.R +18 -2
  51. biopipen/scripts/tcr/Immunarch-tracking.R +24 -1
  52. biopipen/scripts/tcr/Immunarch-vjjunc.R +17 -2
  53. biopipen/scripts/tcr/Immunarch.R +7 -0
  54. biopipen/scripts/tcr/ImmunarchLoading.R +22 -23
  55. biopipen/scripts/tcr/TCRClusterStats.R +124 -11
  56. biopipen/scripts/tcr/TCRClustering.R +8 -9
  57. biopipen/scripts/tcr/TESSA.R +66 -41
  58. biopipen/utils/misc.R +96 -1
  59. {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/METADATA +1 -1
  60. {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/RECORD +62 -62
  61. {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/WHEEL +0 -0
  62. {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/entry_points.txt +0 -0
@@ -5,20 +5,6 @@ features = {{envs.features | r: todot="-", skip=1}}
5
5
 
6
6
  odir = file.path(outdir, "features")
7
7
  dir.create(odir, recursive=TRUE, showWarnings=FALSE)
8
- report_toc_file = file.path(odir, "report_toc.json")
9
- # Section => list(
10
- # list(name?, kind, file),
11
- # ...
12
- # )
13
- report_toc = list()
14
-
15
- .add_toc = function(section, toc) {
16
- if (section %in% names(report_toc)) {
17
- report_toc[[section]][[length(report_toc[[section]]) + 1]] <<- toc
18
- } else {
19
- report_toc[[section]] <<- list(toc)
20
- }
21
- }
22
8
 
23
9
  .get_features = function(features) {
24
10
  if (is.null(features)) { features = 20 }
@@ -50,11 +36,9 @@ report_toc = list()
50
36
  }
51
37
 
52
38
  do_one_features = function(name) {
53
- print(paste0("Doing features for: ", name))
39
+ log_info("Doing features for: {name}")
54
40
 
55
41
  case = list_update(features_defaults, features[[name]])
56
- toc = list()
57
- if (!is.null(case$section)) { toc$name = name }
58
42
  case$devpars = list_update(features_defaults$devpars, features[[name]]$devpars)
59
43
  excluded_args = c(
60
44
  "section",
@@ -65,30 +49,40 @@ do_one_features = function(name) {
65
49
  "kind"
66
50
  )
67
51
 
52
+ if (is.character(case$subset)) {
53
+ case$object = srtobj %>% filter(!!rlang::parse_expr(case$subset))
54
+ } else {
55
+ case$object = srtobj
56
+ }
57
+ if (!is.null(case$ident)) {
58
+ Idents(case$object) = case$ident
59
+ }
60
+ n_uidents = length(unique(Idents(case$object)))
61
+
68
62
  fn = NULL
69
63
  default_devpars = NULL
70
64
  if ("ridge" %in% case$kind) {
71
65
  case$kind = "ridge"
72
66
  if (is.null(case$cols)) {
73
- case$cols = pal_ucscgb(alpha = .8)(26)
67
+ case$cols = pal_biopipen()(32)
74
68
  }
75
69
  excluded_args = c(excluded_args, "split.by")
76
70
  fn = RidgePlot
77
- default_devpars = function(features, ncol, uidents) {
71
+ default_devpars = function(features, ncol) {
78
72
  if (is.null(ncol)) { ncol = 1 }
79
73
  list(
80
74
  width = 400 * ncol,
81
- height = ceiling(length(features) / ncol) * ifelse(length(uidents) < 10, 300, 400),
75
+ height = ceiling(length(features) / ncol) * ifelse(n_uidents < 10, 300, 400),
82
76
  res = 100
83
77
  )
84
78
  }
85
79
  } else if ("vln" %in% case$kind || "violin" %in% case$kind) {
86
80
  case$kind = "violin"
87
81
  if (is.null(case$cols)) {
88
- case$cols = pal_ucscgb(alpha = .8)(26)
82
+ case$cols = pal_biopipen()(n_uidents)
89
83
  }
90
84
  fn = VlnPlot
91
- default_devpars = function(features, ncol, uidents) {
85
+ default_devpars = function(features, ncol) {
92
86
  if (is.null(ncol)) { ncol = 1 }
93
87
  list(
94
88
  width = 400 * ncol,
@@ -99,12 +93,12 @@ do_one_features = function(name) {
99
93
  } else if ("feature" %in% case$kind) {
100
94
  case$kind = "feature"
101
95
  if (is.null(case$cols)) {
102
- case$cols = c("lightgrey", pal_ucscgb()(1))
96
+ case$cols = c("lightgrey", pal_biopipen()(1))
103
97
  }
104
98
  excluded_args = c(excluded_args, "group.by", "assay")
105
99
  case$shape.by = case$group.by
106
100
  fn = FeaturePlot
107
- default_devpars = function(features, ncol, uidents) {
101
+ default_devpars = function(features, ncol) {
108
102
  if (is.null(ncol)) { ncol = 1 }
109
103
  list(
110
104
  width = 400 * ncol,
@@ -115,16 +109,16 @@ do_one_features = function(name) {
115
109
  } else if ("dot" %in% case$kind) {
116
110
  case$kind = "dot"
117
111
  if (is.null(case$cols)) {
118
- case$cols = c("lightgrey", pal_ucscgb()(1))
112
+ case$cols = c("lightgrey", pal_biopipen()(1))
119
113
  }
120
114
  if (is.null(case$plus)) {
121
115
  case$plus = 'theme_prism(axis_text_angle=90)'
122
116
  }
123
117
  excluded_args = c(excluded_args, "slot", "ncol")
124
118
  fn = DotPlot
125
- default_devpars = function(features, ncol, uidents) {
119
+ default_devpars = function(features, ncol) {
126
120
  list(
127
- height = max(length(uidents) * 80 + 150, 420),
121
+ height = max(n_uidents * 80 + 150, 420),
128
122
  width = length(features) * 50 + 150,
129
123
  res = 100
130
124
  )
@@ -133,20 +127,20 @@ do_one_features = function(name) {
133
127
  case$kind = "heatmap"
134
128
  case = list_update(
135
129
  list(
136
- group.colors = pal_ucscgb(alpha = .8)(26),
130
+ group.colors = pal_biopipen()(n_uidents),
137
131
  size = 3.5,
138
132
  group.bar.height = 0.01
139
133
  ),
140
134
  case
141
135
  )
142
136
  if (is.null(case$plus)) {
143
- case$plus = 'scale_fill_gradientn(colors = c("lightgrey", pal_ucscgb()(1)), na.value = "white")'
137
+ case$plus = 'scale_fill_gradientn(colors = c("lightgrey", pal_biopipen()(1)), na.value = "white")'
144
138
  }
145
139
  excluded_args = c(excluded_args, "group.by", "split.by", "downsample", "ncol")
146
140
  fn = DoHeatmap
147
- default_devpars = function(features, ncol, uidents) {
141
+ default_devpars = function(features, ncol) {
148
142
  list(
149
- width = length(uidents) * 60 + 150,
143
+ width = n_uidents * 60 + 150,
150
144
  height = length(features) * 40 + 150,
151
145
  res = 100
152
146
  )
@@ -160,7 +154,7 @@ do_one_features = function(name) {
160
154
  case$slot = "data"
161
155
  }
162
156
  } else {
163
- stop("Unknown kind of plot")
157
+ stop(paste0("Unknown kind of plot: ", case$kind))
164
158
  }
165
159
 
166
160
  for (arg in excluded_args) {
@@ -168,33 +162,34 @@ do_one_features = function(name) {
168
162
  case[[arg]] = NULL
169
163
  }
170
164
 
171
- if (is.character(subset)) {
172
- case$object = srtobj %>% filter(!!rlang::parse_expr(subset))
173
- } else {
174
- case$object = srtobj
175
- }
176
- if (!is.null(ident)) {
177
- Idents(case$object) = ident
178
- }
179
165
  case$features = .get_features(case$features)
180
166
  if (!is.null(case$ncol)) {
181
167
  case$ncol = min(case$ncol, length(case$features))
182
168
  }
183
169
 
184
- toc$kind = kind
185
170
  if (kind == "table") {
186
171
  expr = do_call(fn, case)$RNA %>%
187
172
  as.data.frame() %>%
188
173
  rownames_to_column("Feature") %>%
189
174
  select(Feature, everything())
190
175
 
191
- toc$file = paste0(slugify(name), ".txt")
192
- write.table(expr, file.path(odir, toc$file), sep="\t", quote=FALSE, row.names=FALSE)
193
- } else {
194
- devpars = list_update(
195
- default_devpars(case$features, case$ncol, unique(Idents(case$object))),
196
- devpars
176
+ exprfile = paste0(slugify(name), ".txt")
177
+ write.table(expr, file.path(odir, exprfile), sep="\t", quote=FALSE, row.names=FALSE)
178
+
179
+ add_report(
180
+ list(
181
+ kind = "descr",
182
+ content = paste0("Table of expression value for selected features, by ", ident)
183
+ ),
184
+ list(
185
+ kind = "table",
186
+ src = exprfile
187
+ ),
188
+ h1 = ifelse(is.null(case$section), name, case$section),
189
+ h2 = ifelse(is.null(case$section), "#", name)
197
190
  )
191
+ } else {
192
+ devpars = list_update(default_devpars(case$features, case$ncol), devpars)
198
193
  if (kind == "heatmap") {
199
194
  if (!exists("downsample") || is.null(downsample)) {
200
195
  downsample = "average"
@@ -202,9 +197,9 @@ do_one_features = function(name) {
202
197
  if (downsample %in% c("average", "mean")) {
203
198
  case$object = AverageExpression(case$object, return.seurat = TRUE)
204
199
  } else if (is.integer(downsample)) {
205
- case$object = subset(case$object, downsample = downsample)
200
+ case$object = base::subset(case$object, downsample = downsample)
206
201
  } else {
207
- stop("Unknown downsample method.")
202
+ stop(paste0("Unknown downsample method: ", downsample))
208
203
  }
209
204
  }
210
205
  p = do_call(fn, case)
@@ -213,8 +208,7 @@ do_one_features = function(name) {
213
208
  p = p + eval(parse(text = pls))
214
209
  }
215
210
  }
216
- figfile = file.path(odir, paste0(slugify(name), ".", kind, ".png"))
217
- toc$file = basename(figfile)
211
+ figfile = file.path(odir, paste0(slugify(name), ".", slugify(case$kind), ".png"))
218
212
  png(figfile, width=devpars$width, height=devpars$height, res=devpars$res)
219
213
  tryCatch({
220
214
  print(p)
@@ -229,9 +223,20 @@ do_one_features = function(name) {
229
223
  )
230
224
  })
231
225
  dev.off()
226
+
227
+ add_report(
228
+ list(
229
+ kind = "descr",
230
+ content = paste0(kind, "plots for selected features, by ", ident)
231
+ ),
232
+ list(
233
+ kind = "image",
234
+ src = figfile
235
+ ),
236
+ h1 = ifelse(is.null(section), name, section),
237
+ h2 = ifelse(is.null(section), "#", name)
238
+ )
232
239
  }
233
- .add_toc(if (is.null(section)) name else section, toc)
234
240
  }
235
241
 
236
242
  sapply(names(features), do_one_features)
237
- .save_toc()
@@ -5,22 +5,9 @@ stats = {{envs.stats | r: todot="-", skip=1}}
5
5
 
6
6
  odir = file.path(outdir, "stats")
7
7
  dir.create(odir, recursive=TRUE, showWarnings=FALSE)
8
- report_toc_file = file.path(odir, "report_toc.json")
9
- # Realname => {bar: ..., pie: ..., table: ...}
10
- report_toc = list()
11
-
12
- .add_toc = function(name, toc) {
13
- report_toc[[name]] <<- toc
14
- }
15
-
16
- .save_toc = function() {
17
- writeLines(toJSON(report_toc, pretty = TRUE, auto_unbox = TRUE), report_toc_file)
18
- }
19
8
 
20
9
  do_one_stats = function(name) {
21
- print(paste0("Doing stats for: ", name))
22
-
23
- toc = list()
10
+ log_info("Doing stats for: {name}")
24
11
 
25
12
  case = list_update(stats_defaults, stats[[name]])
26
13
  case$devpars = list_update(stats_defaults$devpars, case$devpars)
@@ -45,7 +32,6 @@ do_one_stats = function(name) {
45
32
  mutate(.frac = .n / sum(.n))
46
33
 
47
34
  if (isTRUE(case$table)) {
48
- toc$table = basename(tablefile)
49
35
  write.table(df_cells, tablefile, sep="\t", quote=FALSE, row.names=FALSE)
50
36
  }
51
37
  if (isTRUE(case$pie)) {
@@ -54,7 +40,7 @@ do_one_stats = function(name) {
54
40
  ggplot(aes(x="", y=.n, fill=!!sym(case$ident))) +
55
41
  geom_bar(stat="identity", width=1, alpha=.8, position = position_stack(reverse = TRUE)) +
56
42
  coord_polar("y", start=0) +
57
- scale_fill_ucscgb(alpha=.8) +
43
+ scale_fill_biopipen() +
58
44
  guides(fill = guide_legend(title = case$ident)) +
59
45
  theme_void() +
60
46
  geom_label(
@@ -72,7 +58,6 @@ do_one_stats = function(name) {
72
58
  p_pie = p_pie + facet_wrap(case$split.by)
73
59
  }
74
60
 
75
- toc$pie = basename(piefile)
76
61
  png(piefile, width=case$devpars$width, height=case$devpars$height, res=case$devpars$res)
77
62
  print(p_pie)
78
63
  dev.off()
@@ -89,20 +74,53 @@ do_one_stats = function(name) {
89
74
  )) +
90
75
  geom_bar(stat="identity", position=bar_position, alpha=.8) +
91
76
  theme_prism(axis_text_angle = 90) +
92
- scale_fill_manual(values=rep(pal_ucscgb(alpha=.8)(26), 10)[1:max(ngroups, nidents)]) +
77
+ scale_fill_biopipen() +
93
78
  ylab(ifelse(isTRUE(case$frac), "Fraction of cells", "Number of cells"))
94
79
 
95
80
  if (!is.null(case$split.by)) {
96
81
  p = p + facet_wrap(case$split.by)
97
82
  }
98
83
 
99
- toc$bar = basename(figfile)
100
84
  png(figfile, width=case$devpars$width, height=case$devpars$height, res=case$devpars$res)
101
85
  print(p)
102
86
  dev.off()
103
87
 
104
- .add_toc(name, toc)
88
+ add_report(
89
+ list(
90
+ kind = "descr",
91
+ content = paste0(
92
+ "Plots showing the ",
93
+ ifelse(isTRUE(case$frac), "number/faction", "number"),
94
+ " of cells per cluster",
95
+ ifelse(
96
+ is.null(case$group.by),
97
+ "",
98
+ paste0(", by ", paste0(case$group.by, collapse = ", "))
99
+ )
100
+ )
101
+ ),
102
+ h1 = name
103
+ )
104
+
105
+ add_report(
106
+ list(
107
+ name = "Bar Plot",
108
+ contents = list(list(kind = "image", src = figfile))
109
+ ),
110
+ h1 = name,
111
+ ui = "tabs"
112
+ )
113
+
114
+ if (isTRUE(case$pie)) {
115
+ add_report(
116
+ list(
117
+ name = "Pie Chart",
118
+ contents = list(list(kind = "image", src = piefile))
119
+ ),
120
+ h1 = name,
121
+ ui = "tabs"
122
+ )
123
+ }
105
124
  }
106
125
 
107
126
  sapply(names(stats), do_one_stats)
108
- .save_toc()
@@ -1,21 +1,23 @@
1
1
  source("{{biopipen_dir}}/utils/misc.R")
2
2
  source("{{biopipen_dir}}/utils/plot.R")
3
- library(jsonlite)
4
3
  library(slugify)
5
4
  library(Seurat)
6
5
  library(rlang)
7
6
  library(dplyr)
8
7
  library(tibble)
9
8
  library(ggprism)
10
- library(ggsci)
11
9
  library(ggrepel)
12
10
  library(tidyseurat)
13
11
 
14
12
  srtfile = {{in.srtobj | r}}
15
13
  outdir = {{out.outdir | r}}
14
+ joboutdir = {{job.outdir | r}}
16
15
 
16
+ log_info("Loading Seurat object ...")
17
17
  srtobj = readRDS(srtfile)
18
18
 
19
19
  {% include biopipen_dir + "/scripts/scrna/SeuratClusterStats-stats.R" %}
20
20
  {% include biopipen_dir + "/scripts/scrna/SeuratClusterStats-features.R" %}
21
21
  {% include biopipen_dir + "/scripts/scrna/SeuratClusterStats-dimplots.R" %}
22
+
23
+ save_report(joboutdir)
@@ -4,11 +4,13 @@ library(Seurat)
4
4
  library(future)
5
5
  library(tidyr)
6
6
  library(dplyr)
7
+ library(digest)
7
8
 
8
9
  set.seed(8525)
9
10
 
10
11
  srtfile = {{in.srtobj | quote}}
11
12
  rdsfile = {{out.rdsfile | quote}}
13
+ joboutdir = {{job.outdir | quote}}
12
14
  envs = {{envs | r: todot="-"}}
13
15
 
14
16
  options(future.globals.maxSize = 80000 * 1024^2)
@@ -26,7 +28,46 @@ envs$IntegrateData = .expand_dims(envs$IntegrateData)
26
28
  envs$RunUMAP = .expand_dims(envs$RunUMAP)
27
29
  envs$FindNeighbors = .expand_dims(envs$FindNeighbors)
28
30
 
31
+ log_info("Reading Seurat object ...")
29
32
  sobj = readRDS(srtfile)
33
+
34
+ if (isTRUE(envs$cache)) {
35
+ envs$cache = joboutdir
36
+ }
37
+
38
+ if (is.character(envs$cache) && nchar(envs$cache) > 0) {
39
+ log_info("Obtainning the signature ...")
40
+ envs2 = envs
41
+ envs2$ncores <- NULL
42
+ sig = c(
43
+ capture.output(str(sobj)),
44
+ "\n\n-------------------\n\n",
45
+ capture.output(str(envs2)),
46
+ "\n"
47
+ )
48
+ digested_sig = digest::digest(sig, algo = "md5")
49
+ cached_file = file.path(envs$cache, paste0(digested_sig, ".cached.RDS"))
50
+ if (file.exists(cached_file)) {
51
+ log_info("Using cached results {cached_file}")
52
+ # copy cached file to rdsfile
53
+ file.copy(cached_file, rdsfile, copy.date = TRUE)
54
+ quit()
55
+ } else {
56
+ log_info("Cached results not found, logging the current and cached signatures.")
57
+ log_info("- Current signature:")
58
+ print(sig)
59
+ sigfiles = Sys.glob(file.path(envs$cache, "*.signature.txt"))
60
+ for (sigfile in sigfiles) {
61
+ log_info("- Found cached signature file: {sigfile}")
62
+ cached_sig = readLines(sigfile)
63
+ log_info("- Cached signature:")
64
+ print(cached_sig)
65
+ }
66
+ writeLines(sig, file.path(envs$cache, paste0(digested_sig, ".signature.txt")))
67
+ }
68
+ }
69
+
70
+
30
71
  obj_list = SplitObject(sobj, split.by = "Sample")
31
72
  rm(sobj)
32
73
 
@@ -51,27 +92,28 @@ if (!is.null(envs$FindIntegrationAnchors$reference)) {
51
92
  # ############################
52
93
  # Using SCT
53
94
  # https://satijalab.org/seurat/articles/integration_rpca.html#performing-integration-on-datasets-normalized-with-sctransform-1
54
- print("- Performing SCTransform on each sample ...")
95
+ log_info("########## Using SCT route ##########")
96
+ log_info("Performing SCTransform on each sample ...")
55
97
  obj_list <- lapply(X = obj_list, FUN = function(x) {
56
- print(paste(" Performing SCTransform on sample:", x@meta.data$Sample[1], "..."))
98
+ log_info("- On sample: {x@meta.data$Sample[1]} ...")
57
99
  # # Needed?
58
100
  # DefaultAssay(x) <- "RNA"
59
101
  args = list_update(envs$SCTransform, list(object = x))
60
102
  do_call(SCTransform, args)
61
103
  })
62
104
 
63
- print("- Running SelectIntegrationFeatures ...")
105
+ log_info("Running SelectIntegrationFeatures ...")
64
106
  envs$SelectIntegrationFeatures$object.list = obj_list
65
107
  features = do_call(SelectIntegrationFeatures, envs$SelectIntegrationFeatures)
66
108
 
67
- print("- Running PrepSCTIntegration ...")
109
+ log_info("Running PrepSCTIntegration ...")
68
110
  envs$PrepSCTIntegration$object.list = obj_list
69
111
  envs$PrepSCTIntegration$anchor.features = features
70
112
  obj_list = do_call(PrepSCTIntegration, envs$PrepSCTIntegration)
71
113
 
72
- print("- Running PCA on each sample ...")
114
+ log_info("Running PCA on each sample ...")
73
115
  obj_list = lapply(X = obj_list, FUN = function(x) {
74
- print(paste(" On sample:", x@meta.data$Sample[1], "..."))
116
+ log_info("- On sample: {x@meta.data$Sample[1]} ...")
75
117
  npcs = if (is.null(envs$RunPCA1$npcs)) 50 else envs$RunPCA1$npcs
76
118
  args = list_setdefault(
77
119
  envs$RunPCA1,
@@ -83,11 +125,11 @@ obj_list = lapply(X = obj_list, FUN = function(x) {
83
125
  do_call(RunPCA, args)
84
126
  })
85
127
 
86
- print("- Running FindIntegrationAnchors ...")
128
+ log_info("Running FindIntegrationAnchors ...")
87
129
  if (!is.null(envs$FindIntegrationAnchors$reference)) {
88
- print(
130
+ log_info(
89
131
  paste(
90
- " Using samples as reference:",
132
+ "- Using samples as reference:",
91
133
  paste(envs$FindIntegrationAnchors$reference, collapse = ", ")
92
134
  )
93
135
  )
@@ -106,7 +148,7 @@ fia_args$dims = 1:min(min_dim, max(fia_args$dims))
106
148
  fia_args$k.score = min(30, min_dim - 1)
107
149
  anchors = do_call(FindIntegrationAnchors, fia_args)
108
150
 
109
- print("- Running IntegrateData ...")
151
+ log_info("Running IntegrateData ...")
110
152
  envs$IntegrateData$anchorset = anchors
111
153
  id_args = list_setdefault(
112
154
  envs$IntegrateData,
@@ -139,9 +181,10 @@ tryCatch({
139
181
  # ############################
140
182
  # Using rpca
141
183
  # https://satijalab.org/seurat/articles/integration_rpca.html
142
- print("- Performing NormalizeData + FindVariableFeatures on each sample ...")
184
+ log_info("########## Using rpca route ##########")
185
+ log_info("Performing NormalizeData + FindVariableFeatures on each sample ...")
143
186
  obj_list <- lapply(X = obj_list, FUN = function(x) {
144
- print(paste(" On sample:", x@meta.data$Sample[1], "..."))
187
+ log_info("- On sample: {x@meta.data$Sample[1]} ...")
145
188
  DefaultAssay(x) <- "RNA"
146
189
  args = list_update(envs$NormalizeData, list(object = x))
147
190
  x <- do_call(NormalizeData, args)
@@ -150,14 +193,13 @@ obj_list <- lapply(X = obj_list, FUN = function(x) {
150
193
  do_call(FindVariableFeatures, args)
151
194
  })
152
195
 
153
-
154
- print("- Running SelectIntegrationFeatures ...")
196
+ log_info("Running SelectIntegrationFeatures ...")
155
197
  envs$SelectIntegrationFeatures$object.list = obj_list
156
198
  features = do_call(SelectIntegrationFeatures, envs$SelectIntegrationFeatures)
157
199
 
158
- print("- Running ScaleData + RunPCA on each sample ...")
200
+ log_info("Running ScaleData + RunPCA on each sample ...")
159
201
  obj_list <- lapply(X = obj_list, FUN = function(x) {
160
- print(paste(" On sample:", x@meta.data$Sample[1], "..."))
202
+ log_info("- On sample: {x@meta.data$Sample[1]} ...")
161
203
  args = list_setdefault(envs$ScaleData1, object = x, features = features)
162
204
  x <- do_call(ScaleData, args)
163
205
 
@@ -172,11 +214,11 @@ obj_list <- lapply(X = obj_list, FUN = function(x) {
172
214
  do_call(RunPCA, args)
173
215
  })
174
216
 
175
- print("- Running FindIntegrationAnchors ...")
217
+ log_info("Running FindIntegrationAnchors ...")
176
218
  if (!is.null(envs$FindIntegrationAnchors$reference)) {
177
- print(
219
+ log_info(
178
220
  paste(
179
- " Using samples as reference:",
221
+ "- Using samples as reference:",
180
222
  paste(envs$FindIntegrationAnchors$reference, collapse = ", ")
181
223
  )
182
224
  )
@@ -194,7 +236,7 @@ fia_args$dims = 1:min(min_dim, max(fia_args$dims))
194
236
  fia_args$k.score = min(30, min_dim - 1)
195
237
  anchors = do_call(FindIntegrationAnchors, fia_args)
196
238
 
197
- print("- Running IntegrateData ...")
239
+ log_info("Running IntegrateData ...")
198
240
  envs$IntegrateData$anchorset = anchors
199
241
  id_args = list_setdefault(envs$IntegrateData, dims = 1:30)
200
242
  id_args$dims = 1:min(min_dim, max(id_args$dims))
@@ -207,7 +249,7 @@ obj_list = do_call(ScaleData, envs$ScaleData)
207
249
 
208
250
  {%- endif %}
209
251
 
210
- print("- Running RunPCA ...")
252
+ log_info("Running RunPCA ...")
211
253
  pca_args = list_setdefault(
212
254
  envs$RunPCA,
213
255
  object = obj_list,
@@ -216,7 +258,7 @@ pca_args = list_setdefault(
216
258
  pca_args$npcs = min(pca_args$npcs, ncol(obj_list) - 1)
217
259
  obj_list = do_call(RunPCA, pca_args)
218
260
 
219
- print("- Running RunUMAP ...")
261
+ log_info("Running RunUMAP ...")
220
262
  umap_args = list_setdefault(
221
263
  envs$RunUMAP,
222
264
  object = obj_list,
@@ -225,16 +267,21 @@ umap_args = list_setdefault(
225
267
  umap_args$dims = 1:min(max(umap_args$dims), ncol(obj_list) - 1)
226
268
  obj_list = do_call(RunUMAP, umap_args)
227
269
 
228
- print("- Running FindNeighbors ...")
270
+ log_info("Running FindNeighbors ...")
229
271
  envs$FindNeighbors$object = obj_list
230
272
  obj_list = do_call(FindNeighbors, envs$FindNeighbors)
231
273
 
232
- print("- Running FindClusters ...")
274
+ log_info("Running FindClusters ...")
233
275
  envs$FindClusters$object = obj_list
234
276
  obj_list = do_call(FindClusters, envs$FindClusters)
235
277
 
236
278
  nclusters = length(unique(Idents(obj_list)))
237
- print(paste0("- Identified ", nclusters, " clusters."))
279
+ log_info("Identified {nclusters} clusters.")
238
280
 
239
- print("- Saving results ...")
281
+ log_info("Saving results ...")
240
282
  saveRDS(obj_list, file = rdsfile)
283
+
284
+ if (is.character(envs$cache) && nchar(envs$cache) > 0) {
285
+ log_info("Caching results ...")
286
+ file.copy(rdsfile, cached_file, overwrite = TRUE)
287
+ }
@@ -1,4 +1,6 @@
1
+ source("{{biopipen_dir}}/utils/misc.R")
1
2
  source("{{biopipen_dir}}/utils/mutate_helpers.R")
3
+
2
4
  library(rlang)
3
5
  library(tibble)
4
6
  library(dplyr)
@@ -14,7 +16,17 @@ metadata = srt@meta.data
14
16
 
15
17
  if (!is.null(metafile)) {
16
18
  mdata = read.table(metafile, header=TRUE, row.names=1, sep="\t", check.names=FALSE)
17
- metadata = cbind(metadata, mdata[rownames(metadata),,drop=FALSE])
19
+ ov_cols = intersect(colnames(metadata), colnames(mdata))
20
+ if (length(ov_cols) > 0) {
21
+ log_warn(paste0(
22
+ "The following columns are already present in Seurat object and will be ignored: ",
23
+ paste(ov_cols, collapse=', ')
24
+ ))
25
+ }
26
+ metadata = cbind(
27
+ metadata,
28
+ mdata[rownames(metadata), setdiff(colnames(mdata), ov_cols), drop=FALSE]
29
+ )
18
30
  }
19
31
 
20
32
  expr = list()