biopipen 0.23.7__py3-none-any.whl → 0.24.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

biopipen/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.23.7"
1
+ __version__ = "0.24.0"
biopipen/core/proc.py CHANGED
@@ -25,3 +25,10 @@ class Proc(PipenProc):
25
25
  "filters": {**FILTERS, **filtermanager.filters},
26
26
  "search_paths": SEARCH_PATHS + [str(REPORT_DIR)],
27
27
  }
28
+
29
+ plugin_opts = {
30
+ "poplog_pattern": (
31
+ r"^(?P<level>INFO|WARN|WARNING|CRITICAL|ERROR|DEBUG?)\s*"
32
+ r"\[\d+-\d+-\d+ \d+:\d+:\d+\] (?P<message>.*)$"
33
+ )
34
+ }
biopipen/ns/cellranger.py CHANGED
@@ -35,7 +35,7 @@ class CellRangerCount(Proc):
35
35
  {%- set fastqs = fastqs[0] | glob: "*.fastq.gz" -%}
36
36
  {%- endif -%}
37
37
  {%- set sample = commonprefix(*fastqs) |
38
- regex_replace: "_L\\d+_$", "" |
38
+ regex_replace: "_L\\d+_?$", "" |
39
39
  regex_replace: "_S\\d+$", "" -%}
40
40
  {{- sample -}}
41
41
  """
@@ -84,7 +84,7 @@ class CellRangerVdj(Proc):
84
84
  {%- set fastqs = fastqs[0] | glob: "*.fastq.gz" -%}
85
85
  {%- endif -%}
86
86
  {%- set sample = commonprefix(*fastqs) |
87
- regex_replace: "_L\\d+_$", "" |
87
+ regex_replace: "_L\\d+_?$", "" |
88
88
  regex_replace: "_S\\d+$", "" -%}
89
89
  {{- sample -}}
90
90
  """
biopipen/ns/scrna.py CHANGED
@@ -278,18 +278,14 @@ class SeuratClustering(Proc):
278
278
  The results will be saved in `seurat_clusters_<resolution>`.
279
279
  The final resolution will be used to define the clusters at `seurat_clusters`.
280
280
  - <more>: See <https://satijalab.org/seurat/reference/findclusters>
281
- cache (type=auto): Whether to cache the seurat object with cluster information.
281
+ cache (type=auto): Whether to cache the information at different steps.
282
282
  If `True`, the seurat object will be cached in the job output directory, which will be not cleaned up when job is rerunning.
283
- The cached seurat object will be saved as `<signature>.cached.RDS` file, where `<signature>` is the signature determined by
283
+ The cached seurat object will be saved as `<signature>.<kind>.RDS` file, where `<signature>` is the signature determined by
284
284
  the input and envs of the process.
285
- See -
286
- * <https://github.com/satijalab/seurat/issues/7849>
287
- * <https://github.com/satijalab/seurat/issues/5358> and
288
- * <https://github.com/satijalab/seurat/issues/6748> for more details.
285
+ See <https://github.com/satijalab/seurat/issues/7849>, <https://github.com/satijalab/seurat/issues/5358> and
286
+ <https://github.com/satijalab/seurat/issues/6748> for more details also about reproducibility issues.
289
287
  To not use the cached seurat object, you can either set `cache` to `False` or delete the cached file at
290
- `<signature>.cached.RDS` in the cache directory.
291
- If `True`, the cache directory is `.pipen/<Pipeline>/SeuratClustering/0/output/`
292
- You can also specify customized directory to save the cached seurat object by setting `cache` to the directory path.
288
+ `<signature>.RDS` in the cache directory.
293
289
 
294
290
  Requires:
295
291
  r-seurat:
@@ -309,7 +305,7 @@ class SeuratClustering(Proc):
309
305
  "RunUMAP": {"dims": 30},
310
306
  "FindNeighbors": {},
311
307
  "FindClusters": {"resolution": 0.8},
312
- "cache": False,
308
+ "cache": config.path.tmpdir,
313
309
  }
314
310
  script = "file://../scripts/scrna/SeuratClustering.R"
315
311
 
@@ -361,18 +357,14 @@ class SeuratSubClustering(Proc):
361
357
  The results will be saved in `<casename>_<resolution>`.
362
358
  The final resolution will be used to define the clusters at `<casename>`.
363
359
  - <more>: See <https://satijalab.org/seurat/reference/findclusters>
364
- cache (type=auto): Whether to cache the seurat object with cluster information.
360
+ cache (type=auto): Whether to cache the information at different steps.
365
361
  If `True`, the seurat object will be cached in the job output directory, which will be not cleaned up when job is rerunning.
366
- The cached seurat object will be saved as `<signature>.cached.RDS` file, where `<signature>` is the signature determined by
362
+ The cached seurat object will be saved as `<signature>.<kind>.RDS` file, where `<signature>` is the signature determined by
367
363
  the input and envs of the process.
368
- See -
369
- * <https://github.com/satijalab/seurat/issues/7849>
370
- * <https://github.com/satijalab/seurat/issues/5358> and
371
- * <https://github.com/satijalab/seurat/issues/6748> for more details.
364
+ See <https://github.com/satijalab/seurat/issues/7849>, <https://github.com/satijalab/seurat/issues/5358> and
365
+ <https://github.com/satijalab/seurat/issues/6748> for more details also about reproducibility issues.
372
366
  To not use the cached seurat object, you can either set `cache` to `False` or delete the cached file at
373
- `<signature>.cached.RDS` in the cache directory.
374
- If `True`, the cache directory is `.pipen/<Pipeline>/SeuratClustering/0/output/`
375
- You can also specify customized directory to save the cached seurat object by setting `cache` to the directory path.
367
+ `<signature>.RDS` in the cache directory.
376
368
  cases (type=json): The cases to perform subclustering.
377
369
  Keys are the names of the cases and values are the dicts inherited from `envs` except `mutaters` and `cache`.
378
370
  If empty, a case with name `subcluster` will be created with default parameters.
@@ -387,7 +379,7 @@ class SeuratSubClustering(Proc):
387
379
  "RunUMAP": {"dims": 30},
388
380
  "FindNeighbors": {},
389
381
  "FindClusters": {"resolution": 0.8},
390
- "cache": False,
382
+ "cache": config.path.tmpdir,
391
383
  "cases": {"subcluster": {}},
392
384
  }
393
385
  script = "file://../scripts/scrna/SeuratSubClustering.R"
@@ -1463,6 +1455,7 @@ class ScFGSEA(Proc):
1463
1455
  ident-1: The first group of cells to compare
1464
1456
  ident-2: The second group of cells to compare, if not provided, the rest of the cells that are not `NA`s in `group-by` column are used for `ident-2`.
1465
1457
  each: The column name in metadata to separate the cells into different subsets to do the analysis.
1458
+ subset: An expression to subset the cells.
1466
1459
  section: The section name for the report. Worked only when `each` is not specified. Otherwise, the section name will be constructed from `each` and its value.
1467
1460
  This allows different cases to be put into the same section in the report.
1468
1461
  gmtfile: The pathways in GMT format, with the gene names/ids in the same format as the seurat object.
@@ -1513,6 +1506,7 @@ class ScFGSEA(Proc):
1513
1506
  "ident-1": None,
1514
1507
  "ident-2": None,
1515
1508
  "each": None,
1509
+ "subset": None,
1516
1510
  "section": "DEFAULT",
1517
1511
  "gmtfile": "",
1518
1512
  "method": "s2n",
@@ -2000,4 +1994,5 @@ class MetaMarkers(Proc):
2000
1994
  plugin_opts = {
2001
1995
  "report": "file://../reports/scrna/MetaMarkers.svelte",
2002
1996
  "report_paging": 8,
1997
+ "poplog_max": 15,
2003
1998
  }
biopipen/ns/tcr.py CHANGED
@@ -563,12 +563,13 @@ class Immunarch(Proc):
563
563
  A Gini coefficient of one (or 100 percents) expresses maximal inequality among values (for example where only one person has all the income).
564
564
  - d50: The D50 index.
565
565
  It is the number of types that are needed to cover 50%% of the total abundance.
566
- - dxx: The Dxx index.
567
- It is the number of types that are needed to cover xx%% of the total abundance.
568
- The percentage should be specified in the `args` argument using `perc` key.
569
566
  - raref: Species richness from the results of sampling through extrapolation.
570
567
  - by: The variables (column names) to group samples.
571
568
  Multiple columns should be separated by `,`.
569
+ - plot_type (choice): The type of the plot, works when `by` is specified.
570
+ Not working for `raref`.
571
+ - box: Boxplot
572
+ - bar: Barplot with error bars
572
573
  - subset: Subset the data before calculating the clonotype volumes.
573
574
  The whole data will be expanded to cell level, and then subsetted.
574
575
  Clone sizes will be re-calculated based on the subsetted data.
@@ -789,9 +790,9 @@ class Immunarch(Proc):
789
790
  },
790
791
  # Diversity
791
792
  "divs": {
792
- "filter": None,
793
793
  "method": "gini",
794
794
  "by": None,
795
+ "plot_type": "bar",
795
796
  "args": {},
796
797
  "order": [],
797
798
  "test": {
@@ -805,8 +806,8 @@ class Immunarch(Proc):
805
806
  "align_y": False,
806
807
  "log": False,
807
808
  "devpars": {
808
- "width": 1000,
809
- "height": 1000,
809
+ "width": 800,
810
+ "height": 800,
810
811
  "res": 100,
811
812
  },
812
813
  "subset": None,
@@ -851,6 +852,7 @@ class Immunarch(Proc):
851
852
  plugin_opts = {
852
853
  "report": "file://../reports/tcr/Immunarch.svelte",
853
854
  "report_paging": 3,
855
+ "poplog_max": 999,
854
856
  }
855
857
 
856
858
 
@@ -14,6 +14,7 @@ group.by <- {{envs["group-by"] | r}} # nolint
14
14
  ident.1 <- {{envs["ident-1"] | r}} # nolint
15
15
  ident.2 <- {{envs["ident-2"] | r}} # nolint
16
16
  each <- {{envs.each | r}} # nolint
17
+ subset <- {{envs.subset | r}} # nolint
17
18
  section <- {{envs.section | r}} # nolint
18
19
  gmtfile <- {{envs.gmtfile | r}} # nolint
19
20
  method <- {{envs.method | r}} # nolint
@@ -43,6 +44,7 @@ expand_cases <- function() {
43
44
  ident.1 = ident.1,
44
45
  ident.2 = ident.2,
45
46
  each = each,
47
+ subset = subset,
46
48
  section = section,
47
49
  gmtfile = gmtfile,
48
50
  method = method,
@@ -63,6 +65,7 @@ expand_cases <- function() {
63
65
  ident.1 = ident.1,
64
66
  ident.2 = ident.2,
65
67
  each = each,
68
+ subset = subset,
66
69
  section = section,
67
70
  gmtfile = gmtfile,
68
71
  method = method,
@@ -136,6 +139,9 @@ do_case <- function(name, case) {
136
139
  # prepare expression matrix
137
140
  log_info(" Preparing expression matrix...")
138
141
  sobj <- srtobj %>% filter(!is.na(!!sym(case$group.by)))
142
+ if (!is.null(case$subset)) {
143
+ sobj <- sobj %>% filter(!!!parse_exprs(case$subset))
144
+ }
139
145
  if (!is.null(case$ident.2)) {
140
146
  sobj <- sobj %>% filter(!!sym(case$group.by) %in% c(case$ident.1, case$ident.2))
141
147
  }
@@ -1,4 +1,5 @@
1
1
  source("{{biopipen_dir}}/utils/misc.R")
2
+ source("{{biopipen_dir}}/utils/caching.R")
2
3
 
3
4
  library(Seurat)
4
5
  library(future)
@@ -35,80 +36,100 @@ envs$FindNeighbors <- .expand_dims(envs$FindNeighbors)
35
36
  log_info("Reading Seurat object ...")
36
37
  sobj <- readRDS(srtfile)
37
38
 
38
- if (isTRUE(envs$cache)) {
39
- envs$cache <- joboutdir
39
+ if (isTRUE(envs$cache)) { envs$cache <- joboutdir }
40
+ if (length(envs$cache) > 1) {
41
+ log_warn("Multiple cache directories (envs.cache) detected, using the first one.")
42
+ envs$cache <- envs$cache[1]
40
43
  }
41
-
42
- if (is.character(envs$cache) && nchar(envs$cache) > 0) {
43
- log_info("Obtainning the signature ...")
44
- envs2 <- envs
45
- envs2$ncores <- NULL
46
- sig <- c(
47
- capture.output(str(sobj)),
48
- "\n\n-------------------\n\n",
49
- capture.output(str(envs2)),
50
- "\n"
51
- )
52
- digested_sig <- digest::digest(sig, algo = "md5")
53
- cached_file <- file.path(envs$cache, paste0(digested_sig, ".cached.RDS"))
54
- if (file.exists(cached_file)) {
55
- log_info("Using cached results {cached_file}")
56
- # copy cached file to rdsfile
57
- file.copy(cached_file, rdsfile, copy.date = TRUE)
58
- quit()
59
- } else {
60
- log_info("Cached results not found, logging the current and cached signatures.")
61
- log_info("- Current signature: {digested_sig}")
62
- # print(sig)
63
- # sigfiles <- Sys.glob(file.path(envs$cache, "*.signature.txt"))
64
- # for (sigfile in sigfiles) {
65
- # log_info("- Found cached signature file: {sigfile}")
66
- # cached_sig <- readLines(sigfile)
67
- # log_info("- Cached signature:")
68
- # print(cached_sig)
69
- # }
70
- writeLines(sig, file.path(envs$cache, paste0(digested_sig, ".signature.txt")))
71
- }
44
+ sobj_sig <- capture.output(str(sobj))
45
+ dig_sig <- digest::digest(sobj_sig, algo = "md5")
46
+ dig_sig <- substr(dig_sig, 1, 8)
47
+ cache_dir <- NULL
48
+ if (is.character(envs$cache)) {
49
+ cache_dir <- file.path(envs$cache, paste0(dig_sig, ".seurat_cache"))
50
+ dir.create(cache_dir, recursive = TRUE, showWarnings = FALSE)
51
+ writeLines(sobj_sig, file.path(cache_dir, "signature.txt"))
72
52
  }
73
53
 
74
54
  if (length(envs$ScaleData) > 0) {
75
55
  if (DefaultAssay(sobj) == "SCT") {
76
56
  stop("SCT assay detected, but ScaleData is specified. Use SCTransform instead.")
77
57
  }
78
- log_info("Running ScaleData ...")
79
- envs$ScaleData$object <- sobj
80
- sobj <- do_call(ScaleData, envs$ScaleData)
58
+ cached <- get_cached(envs$ScaleData, "ScaleData", cache_dir)
59
+ if (is.null(cached$data)) {
60
+ log_info("Running ScaleData ...")
61
+ envs$ScaleData$object <- sobj
62
+ sobj <- do_call(ScaleData, envs$ScaleData)
63
+ cached$data <- list(assay = sobj@assays$RNA, commands = sobj@commands)
64
+ save_to_cache(cached, "ScaleData", cache_dir)
65
+ } else {
66
+ log_info("Loading cached ScaleData ...")
67
+ sobj@assays$RNA <- cached$data$assay
68
+ sobj@commands <- cached$data$commands
69
+ DefaultAssay(sobj) <- "RNA"
70
+ }
81
71
  } else if (length(envs$SCTransform) > 0) {
82
72
  if (DefaultAssay(sobj) != "SCT") {
83
73
  stop("SCT assay not detected, but SCTransform is specified. Use ScaleData instead.")
84
74
  }
85
- log_info("Running SCTransform ...")
86
- envs$SCTransform$object <- sobj
87
- sobj <- do_call(SCTransform, envs$SCTransform)
75
+ cached <- get_cached(envs$SCTransform, "SCTransform", cache_dir)
76
+ asssay <- envs$SCTransform$new.assay.name %||% "SCT"
77
+ if (is.null(cached$data)) {
78
+ log_info("Running SCTransform ...")
79
+ envs$SCTransform$object <- sobj
80
+ sobj <- do_call(SCTransform, envs$SCTransform)
81
+ cached$data <- list(assay = sobj@assays$SCT, commands = sobj@commands)
82
+ save_to_cache(cached, "SCTransform", cache_dir)
83
+ } else {
84
+ log_info("Loading cached SCTransform ...")
85
+ sobj@assays[[assay]] <- cached$data$assay
86
+ sobj@commands <- cached$data$commands
87
+ DefaultAssay(sobj) <- assay
88
+ }
88
89
  }
89
90
 
90
- log_info("Running RunUMAP ...")
91
- umap_args <- list_setdefault(
92
- envs$RunUMAP,
93
- object = sobj,
94
- dims = 1:30,
95
- reduction = sobj@misc$integrated_new_reduction %||% "pca"
96
- )
97
- umap_args$dims <- 1:min(max(umap_args$dims), ncol(sobj) - 1)
98
- sobj <- do_call(RunUMAP, umap_args)
99
-
100
- log_info("Running FindNeighbors ...")
101
- envs$FindNeighbors$object <- sobj
102
- if (is.null(envs$FindNeighbors$reduction)) {
103
- envs$FindNeighbors$reduction <- sobj@misc$integrated_new_reduction %||% "pca"
91
+ cached <- get_cached(envs$RunUMAP, "RunUMAP", cache_dir)
92
+ reduc_name <- envs$RunUMAP$reduction.name %||% "umap"
93
+ if (is.null(cached$data)) {
94
+ log_info("Running RunUMAP ...")
95
+ umap_args <- list_setdefault(
96
+ envs$RunUMAP,
97
+ object = sobj,
98
+ dims = 1:30,
99
+ reduction = sobj@misc$integrated_new_reduction %||% "pca"
100
+ )
101
+ ncells <- ncol(sobj)
102
+ umap_args$dims <- 1:min(max(umap_args$dims), ncells - 1)
103
+ umap_method <- envs$RunUMAP$umap.method %||% "uwot"
104
+ if (umap_method == "uwot" && is.null(envs$RunUMAP$n.neighbors)) {
105
+ # https://github.com/satijalab/seurat/issues/4312
106
+ umap_args$n.neighbors <- min(ncells - 1, 30)
107
+ }
108
+ sobj <- do_call(RunUMAP, umap_args)
109
+ cached$data <- list(reduc = sobj@reductions[[reduc_name]], commands = sobj@commands)
110
+ save_to_cache(cached, "RunUMAP", cache_dir)
111
+ } else {
112
+ log_info("Loading cached RunUMAP ...")
113
+ sobj@reductions[[reduc_name]] <- cached$data$reduc
114
+ sobj@commands <- cached$data$commands
104
115
  }
105
- sobj <- do_call(FindNeighbors, envs$FindNeighbors)
106
116
 
107
- log_info("Running FindClusters ...")
108
- if (is.null(envs$FindClusters$random.seed)) {
109
- envs$FindClusters$random.seed <- 8525
117
+ cached <- get_cached(envs$FindNeighbors, "FindNeighbors", cache_dir)
118
+ if (is.null(cached$data)) {
119
+ log_info("Running FindNeighbors ...")
120
+ envs$FindNeighbors$object <- sobj
121
+ envs$FindNeighbors$reduction <- sobj@misc$integrated_new_reduction %||% "pca"
122
+ sobj <- do_call(FindNeighbors, envs$FindNeighbors)
123
+ cached$data <- list(graphs = sobj@graphs, commands = sobj@commands)
124
+ save_to_cache(cached, "FindNeighbors", cache_dir)
125
+ } else {
126
+ log_info("Loading cached FindNeighbors ...")
127
+ sobj@graphs <- cached$data$graphs
128
+ sobj@commands <- cached$data$commands
110
129
  }
111
- resolution <- envs$FindClusters$resolution
130
+
131
+ envs$FindClusters$random.seed <- envs$FindClusters$random.seed %||% 8525
132
+ resolution <- envs$FindClusters$resolution %||% 0.8
112
133
  if (is.character(resolution)) {
113
134
  if (grepl(",", resolution)) {
114
135
  resolution <- as.numeric(trimws(unlist(strsplit(resolution, ","))))
@@ -116,42 +137,38 @@ if (is.character(resolution)) {
116
137
  resolution <- as.numeric(resolution)
117
138
  }
118
139
  }
119
- if (is.null(resolution) || length(resolution) == 1) {
120
- envs$FindClusters$resolution <- resolution
121
- envs$FindClusters$object <- sobj
122
- sobj <- do_call(FindClusters, envs$FindClusters)
123
- levels(sobj$seurat_clusters) <- paste0("c", as.numeric(levels(sobj$seurat_clusters)) + 1)
124
- Idents(sobj) <- "seurat_clusters"
125
- ident_table <- table(sobj$seurat_clusters)
126
- log_info("- Found {length(ident_table)} clusters:")
127
- print(ident_table)
128
- } else {
129
- log_info("- Multiple resolutions detected ...")
130
- res_key <- NULL
131
- for (res in resolution) {
132
- findclusters_args <- envs$FindClusters
133
- findclusters_args$resolution <- res
134
- findclusters_args$object <- sobj
135
- sobj <- do_call(FindClusters, findclusters_args)
140
+
141
+ for (res in resolution) {
142
+ envs$FindClusters$resolution <- res
143
+ cached <- get_cached(envs$FindClusters, paste0("FindClusters_", res), cache_dir)
144
+ res_key <- paste0("seurat_clusters_", res)
145
+ if (is.null(cached$data)) {
146
+ log_info("Running FindClusters at resolution: {res} ...")
147
+ envs$FindClusters$object <- sobj
148
+ sobj <- do_call(FindClusters, envs$FindClusters)
136
149
  levels(sobj$seurat_clusters) <- paste0("c", as.numeric(levels(sobj$seurat_clusters)) + 1)
137
- res_key <- paste0("seurat_clusters_", res)
138
150
  sobj[[res_key]] <- sobj$seurat_clusters
139
- ident_table <- table(sobj[[res_key]])
140
- log_info("- Found {length(ident_table)} at resolution: {res}:")
141
- print(ident_table)
151
+ Idents(sobj) <- "seurat_clusters"
152
+ cached$data <- list(clusters = sobj$seurat_clusters, commands = sobj@commands)
153
+ save_to_cache(cached, paste0("FindClusters_", res), cache_dir)
154
+ } else {
155
+ log_info("Loading cached FindClusters at resolution: {res} ...")
156
+ sobj@commands <- cached$data$commands
157
+ sobj[[res_key]] <- cached$data$clusters
158
+ sobj$seurat_clusters <- cached$data$clusters
159
+ Idents(sobj) <- "seurat_clusters"
142
160
  }
161
+ ident_table <- table(Idents(sobj))
162
+ log_info("- Found {length(ident_table)} clusters")
163
+ print(ident_table)
164
+ cat("\n")
143
165
  }
144
166
 
145
167
  if (DefaultAssay(sobj) == "SCT") {
146
- # https://github.com/satijalab/seurat/issues/6968
168
+ # https://github.com/satijalab/seurat/issues/6968
147
169
  log_info("Running PrepSCTFindMarkers ...")
148
170
  sobj <- PrepSCTFindMarkers(sobj)
149
171
  }
150
172
 
151
173
  log_info("Saving results ...")
152
174
  saveRDS(sobj, file = rdsfile)
153
-
154
- if (is.character(envs$cache) && nchar(envs$cache) > 0) {
155
- log_info("Caching results ...")
156
- file.copy(rdsfile, cached_file, overwrite = TRUE)
157
- }
@@ -99,8 +99,8 @@ load_sample = function(sample) {
99
99
  }
100
100
  obj <- CreateSeuratObject(exprs, project=sample)
101
101
  # filter the cells that don't have any gene expressions
102
- cell_exprs = colSums(obj@assays$RNA)
103
- obj = subset(obj, cells = names(cell_exprs[cell_exprs > 0]))
102
+ # cell_exprs = colSums(obj@assays$RNA)
103
+ # obj = subset(obj, cells = names(cell_exprs[cell_exprs > 0]))
104
104
  obj = RenameCells(obj, add.cell.id = sample)
105
105
  # Attach meta data
106
106
  for (mname in names(mdata)) {
@@ -128,13 +128,7 @@ log_info("Reading samples individually ...")
128
128
  obj_list = lapply(samples, load_sample)
129
129
 
130
130
  log_info("Merging samples ...")
131
- if (length(obj_list) >= 2) {
132
- y = c()
133
- for (i in 2:length(obj_list)) y = c(y, obj_list[[i]])
134
- sobj = merge(obj_list[[1]], y)
135
- } else {
136
- sobj = obj_list[[1]]
137
- }
131
+ sobj = Reduce(merge, obj_list)
138
132
 
139
133
  log_info("Adding metadata for QC ...")
140
134
  sobj$percent.mt = PercentageFeatureSet(sobj, pattern = "^MT-")
@@ -297,28 +291,41 @@ add_report(
297
291
  h1 = "Filters and QC"
298
292
  )
299
293
 
294
+ .formatArgs <- function(args) {
295
+ paste(capture.output(str(args)), collapse = ", ")
296
+ }
297
+
300
298
  log_info("Performing transformation/scaling ...")
301
299
  # Not joined yet
302
300
  # sobj[["RNA"]] <- split(sobj[["RNA"]], f = sobj$Sample)
303
301
  if (envs$use_sct) {
304
302
  log_info("- Running SCTransform ...")
305
303
  SCTransformArgs <- envs$SCTransform
304
+ # log to stdout but don't populate it to running log
305
+ print(" SCTransform: {.formatArgs(SCTransformArgs)}")
306
+ log_debug(" SCTransform: {.formatArgs(SCTransformArgs)}")
306
307
  SCTransformArgs$object <- sobj
307
308
  sobj <- do_call(SCTransform, SCTransformArgs)
308
309
  # Default is to use the SCT assay
309
310
  } else {
310
311
  log_info("- Running NormalizeData ...")
311
312
  NormalizeDataArgs <- envs$NormalizeData
313
+ print(" NormalizeData: {.formatArgs(NormalizeDataArgs)}")
314
+ log_debug(" NormalizeData: {.formatArgs(NormalizeDataArgs)}")
312
315
  NormalizeDataArgs$object <- sobj
313
316
  sobj <- do_call(NormalizeData, NormalizeDataArgs)
314
317
 
315
318
  log_info("- Running FindVariableFeatures ...")
316
319
  FindVariableFeaturesArgs <- envs$FindVariableFeatures
320
+ print(" FindVariableFeatures: {.formatArgs(FindVariableFeaturesArgs)}")
321
+ log_debug(" FindVariableFeatures: {.formatArgs(FindVariableFeaturesArgs)}")
317
322
  FindVariableFeaturesArgs$object <- sobj
318
323
  sobj <- do_call(FindVariableFeatures, FindVariableFeaturesArgs)
319
324
 
320
325
  log_info("- Running ScaleData ...")
321
326
  ScaleDataArgs <- envs$ScaleData
327
+ print(" ScaleData: {.formatArgs(ScaleDataArgs)}")
328
+ log_debug(" ScaleData: {.formatArgs(ScaleDataArgs)}")
322
329
  ScaleDataArgs$object <- sobj
323
330
  sobj <- do_call(ScaleData, ScaleDataArgs)
324
331
  }
@@ -326,13 +333,14 @@ if (envs$use_sct) {
326
333
  log_info("- Running RunPCA ...")
327
334
  RunPCAArgs <- envs$RunPCA
328
335
  RunPCAArgs$npcs <- if (is.null(RunPCAArgs$npcs)) { 50 } else { min(RunPCAArgs$npcs, ncol(sobj) - 1) }
336
+ print(" RunPCA: {.formatArgs(RunPCAArgs)}")
337
+ log_debug(" RunPCA: {.formatArgs(RunPCAArgs)}")
329
338
  RunPCAArgs$object <- sobj
330
339
  sobj <- do_call(RunPCA, RunPCAArgs)
331
340
 
332
341
  if (!envs$no_integration) {
333
342
  log_info("- Running IntegrateLayers ...")
334
343
  IntegrateLayersArgs <- envs$IntegrateLayers
335
- IntegrateLayersArgs$object <- sobj
336
344
  method <- IntegrateLayersArgs$method
337
345
  if (!is.null(IntegrateLayersArgs$reference) && is.character(IntegrateLayersArgs$reference)) {
338
346
  log_info(" Using reference samples: {paste(IntegrateLayersArgs$reference, collapse = ', ')}")
@@ -359,6 +367,9 @@ if (!envs$no_integration) {
359
367
  if (is.null(IntegrateLayersArgs$new.reduction)) {
360
368
  IntegrateLayersArgs$new.reduction <- new_reductions[[method]]
361
369
  }
370
+ print(" IntegrateLayers: {.formatArgs(IntegrateLayersArgs)}")
371
+ log_debug(" IntegrateLayers: {.formatArgs(IntegrateLayersArgs)}")
372
+ IntegrateLayersArgs$object <- sobj
362
373
  sobj <- do_call(IntegrateLayers, IntegrateLayersArgs)
363
374
  # Save it for dimension reduction plots
364
375
  sobj@misc$integrated_new_reduction <- IntegrateLayersArgs$new.reduction