biopipen 0.23.8__py3-none-any.whl → 0.24.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/proc.py +7 -0
- biopipen/ns/cellranger.py +2 -2
- biopipen/ns/scrna.py +13 -20
- biopipen/ns/tcr.py +8 -6
- biopipen/scripts/scrna/SeuratClustering.R +102 -85
- biopipen/scripts/scrna/SeuratPreparing.R +13 -6
- biopipen/scripts/scrna/SeuratSubClustering.R +81 -97
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +5 -5
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +3 -3
- biopipen/scripts/tcr/Immunarch-basic.R +2 -3
- biopipen/scripts/tcr/Immunarch-clonality.R +2 -3
- biopipen/scripts/tcr/Immunarch-diversity.R +70 -49
- biopipen/scripts/tcr/Immunarch-geneusage.R +2 -3
- biopipen/scripts/tcr/Immunarch-kmer.R +2 -3
- biopipen/scripts/tcr/Immunarch-overlap.R +2 -3
- biopipen/scripts/tcr/Immunarch-spectratyping.R +2 -3
- biopipen/scripts/tcr/Immunarch-tracking.R +2 -3
- biopipen/scripts/tcr/Immunarch-vjjunc.R +2 -3
- biopipen/scripts/tcr/Immunarch.R +1 -1
- biopipen/scripts/tcr/ImmunarchLoading.R +2 -0
- biopipen/scripts/tcr/TCRClustering.R +6 -2
- biopipen/scripts/tcr/TESSA.R +3 -1
- biopipen/scripts/tcr/immunarch-patched.R +142 -0
- biopipen/utils/caching.R +44 -0
- {biopipen-0.23.8.dist-info → biopipen-0.24.0.dist-info}/METADATA +8 -7
- {biopipen-0.23.8.dist-info → biopipen-0.24.0.dist-info}/RECORD +29 -27
- {biopipen-0.23.8.dist-info → biopipen-0.24.0.dist-info}/WHEEL +0 -0
- {biopipen-0.23.8.dist-info → biopipen-0.24.0.dist-info}/entry_points.txt +0 -0
biopipen/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "0.24.0"
|
biopipen/core/proc.py
CHANGED
|
@@ -25,3 +25,10 @@ class Proc(PipenProc):
|
|
|
25
25
|
"filters": {**FILTERS, **filtermanager.filters},
|
|
26
26
|
"search_paths": SEARCH_PATHS + [str(REPORT_DIR)],
|
|
27
27
|
}
|
|
28
|
+
|
|
29
|
+
plugin_opts = {
|
|
30
|
+
"poplog_pattern": (
|
|
31
|
+
r"^(?P<level>INFO|WARN|WARNING|CRITICAL|ERROR|DEBUG?)\s*"
|
|
32
|
+
r"\[\d+-\d+-\d+ \d+:\d+:\d+\] (?P<message>.*)$"
|
|
33
|
+
)
|
|
34
|
+
}
|
biopipen/ns/cellranger.py
CHANGED
|
@@ -35,7 +35,7 @@ class CellRangerCount(Proc):
|
|
|
35
35
|
{%- set fastqs = fastqs[0] | glob: "*.fastq.gz" -%}
|
|
36
36
|
{%- endif -%}
|
|
37
37
|
{%- set sample = commonprefix(*fastqs) |
|
|
38
|
-
regex_replace: "_L\\d+_
|
|
38
|
+
regex_replace: "_L\\d+_?$", "" |
|
|
39
39
|
regex_replace: "_S\\d+$", "" -%}
|
|
40
40
|
{{- sample -}}
|
|
41
41
|
"""
|
|
@@ -84,7 +84,7 @@ class CellRangerVdj(Proc):
|
|
|
84
84
|
{%- set fastqs = fastqs[0] | glob: "*.fastq.gz" -%}
|
|
85
85
|
{%- endif -%}
|
|
86
86
|
{%- set sample = commonprefix(*fastqs) |
|
|
87
|
-
regex_replace: "_L\\d+_
|
|
87
|
+
regex_replace: "_L\\d+_?$", "" |
|
|
88
88
|
regex_replace: "_S\\d+$", "" -%}
|
|
89
89
|
{{- sample -}}
|
|
90
90
|
"""
|
biopipen/ns/scrna.py
CHANGED
|
@@ -278,18 +278,14 @@ class SeuratClustering(Proc):
|
|
|
278
278
|
The results will be saved in `seurat_clusters_<resolution>`.
|
|
279
279
|
The final resolution will be used to define the clusters at `seurat_clusters`.
|
|
280
280
|
- <more>: See <https://satijalab.org/seurat/reference/findclusters>
|
|
281
|
-
cache (type=auto): Whether to cache the
|
|
281
|
+
cache (type=auto): Whether to cache the information at different steps.
|
|
282
282
|
If `True`, the seurat object will be cached in the job output directory, which will be not cleaned up when job is rerunning.
|
|
283
|
-
The cached seurat object will be saved as `<signature>.
|
|
283
|
+
The cached seurat object will be saved as `<signature>.<kind>.RDS` file, where `<signature>` is the signature determined by
|
|
284
284
|
the input and envs of the process.
|
|
285
|
-
See
|
|
286
|
-
|
|
287
|
-
* <https://github.com/satijalab/seurat/issues/5358> and
|
|
288
|
-
* <https://github.com/satijalab/seurat/issues/6748> for more details.
|
|
285
|
+
See <https://github.com/satijalab/seurat/issues/7849>, <https://github.com/satijalab/seurat/issues/5358> and
|
|
286
|
+
<https://github.com/satijalab/seurat/issues/6748> for more details also about reproducibility issues.
|
|
289
287
|
To not use the cached seurat object, you can either set `cache` to `False` or delete the cached file at
|
|
290
|
-
`<signature>.
|
|
291
|
-
If `True`, the cache directory is `.pipen/<Pipeline>/SeuratClustering/0/output/`
|
|
292
|
-
You can also specify customized directory to save the cached seurat object by setting `cache` to the directory path.
|
|
288
|
+
`<signature>.RDS` in the cache directory.
|
|
293
289
|
|
|
294
290
|
Requires:
|
|
295
291
|
r-seurat:
|
|
@@ -309,7 +305,7 @@ class SeuratClustering(Proc):
|
|
|
309
305
|
"RunUMAP": {"dims": 30},
|
|
310
306
|
"FindNeighbors": {},
|
|
311
307
|
"FindClusters": {"resolution": 0.8},
|
|
312
|
-
"cache":
|
|
308
|
+
"cache": config.path.tmpdir,
|
|
313
309
|
}
|
|
314
310
|
script = "file://../scripts/scrna/SeuratClustering.R"
|
|
315
311
|
|
|
@@ -361,18 +357,14 @@ class SeuratSubClustering(Proc):
|
|
|
361
357
|
The results will be saved in `<casename>_<resolution>`.
|
|
362
358
|
The final resolution will be used to define the clusters at `<casename>`.
|
|
363
359
|
- <more>: See <https://satijalab.org/seurat/reference/findclusters>
|
|
364
|
-
cache (type=auto): Whether to cache the
|
|
360
|
+
cache (type=auto): Whether to cache the information at different steps.
|
|
365
361
|
If `True`, the seurat object will be cached in the job output directory, which will be not cleaned up when job is rerunning.
|
|
366
|
-
The cached seurat object will be saved as `<signature>.
|
|
362
|
+
The cached seurat object will be saved as `<signature>.<kind>.RDS` file, where `<signature>` is the signature determined by
|
|
367
363
|
the input and envs of the process.
|
|
368
|
-
See
|
|
369
|
-
|
|
370
|
-
* <https://github.com/satijalab/seurat/issues/5358> and
|
|
371
|
-
* <https://github.com/satijalab/seurat/issues/6748> for more details.
|
|
364
|
+
See <https://github.com/satijalab/seurat/issues/7849>, <https://github.com/satijalab/seurat/issues/5358> and
|
|
365
|
+
<https://github.com/satijalab/seurat/issues/6748> for more details also about reproducibility issues.
|
|
372
366
|
To not use the cached seurat object, you can either set `cache` to `False` or delete the cached file at
|
|
373
|
-
`<signature>.
|
|
374
|
-
If `True`, the cache directory is `.pipen/<Pipeline>/SeuratClustering/0/output/`
|
|
375
|
-
You can also specify customized directory to save the cached seurat object by setting `cache` to the directory path.
|
|
367
|
+
`<signature>.RDS` in the cache directory.
|
|
376
368
|
cases (type=json): The cases to perform subclustering.
|
|
377
369
|
Keys are the names of the cases and values are the dicts inherited from `envs` except `mutaters` and `cache`.
|
|
378
370
|
If empty, a case with name `subcluster` will be created with default parameters.
|
|
@@ -387,7 +379,7 @@ class SeuratSubClustering(Proc):
|
|
|
387
379
|
"RunUMAP": {"dims": 30},
|
|
388
380
|
"FindNeighbors": {},
|
|
389
381
|
"FindClusters": {"resolution": 0.8},
|
|
390
|
-
"cache":
|
|
382
|
+
"cache": config.path.tmpdir,
|
|
391
383
|
"cases": {"subcluster": {}},
|
|
392
384
|
}
|
|
393
385
|
script = "file://../scripts/scrna/SeuratSubClustering.R"
|
|
@@ -2002,4 +1994,5 @@ class MetaMarkers(Proc):
|
|
|
2002
1994
|
plugin_opts = {
|
|
2003
1995
|
"report": "file://../reports/scrna/MetaMarkers.svelte",
|
|
2004
1996
|
"report_paging": 8,
|
|
1997
|
+
"poplog_max": 15,
|
|
2005
1998
|
}
|
biopipen/ns/tcr.py
CHANGED
|
@@ -563,12 +563,13 @@ class Immunarch(Proc):
|
|
|
563
563
|
A Gini coefficient of one (or 100 percents) expresses maximal inequality among values (for example where only one person has all the income).
|
|
564
564
|
- d50: The D50 index.
|
|
565
565
|
It is the number of types that are needed to cover 50%% of the total abundance.
|
|
566
|
-
- dxx: The Dxx index.
|
|
567
|
-
It is the number of types that are needed to cover xx%% of the total abundance.
|
|
568
|
-
The percentage should be specified in the `args` argument using `perc` key.
|
|
569
566
|
- raref: Species richness from the results of sampling through extrapolation.
|
|
570
567
|
- by: The variables (column names) to group samples.
|
|
571
568
|
Multiple columns should be separated by `,`.
|
|
569
|
+
- plot_type (choice): The type of the plot, works when `by` is specified.
|
|
570
|
+
Not working for `raref`.
|
|
571
|
+
- box: Boxplot
|
|
572
|
+
- bar: Barplot with error bars
|
|
572
573
|
- subset: Subset the data before calculating the clonotype volumes.
|
|
573
574
|
The whole data will be expanded to cell level, and then subsetted.
|
|
574
575
|
Clone sizes will be re-calculated based on the subsetted data.
|
|
@@ -789,9 +790,9 @@ class Immunarch(Proc):
|
|
|
789
790
|
},
|
|
790
791
|
# Diversity
|
|
791
792
|
"divs": {
|
|
792
|
-
"filter": None,
|
|
793
793
|
"method": "gini",
|
|
794
794
|
"by": None,
|
|
795
|
+
"plot_type": "bar",
|
|
795
796
|
"args": {},
|
|
796
797
|
"order": [],
|
|
797
798
|
"test": {
|
|
@@ -805,8 +806,8 @@ class Immunarch(Proc):
|
|
|
805
806
|
"align_y": False,
|
|
806
807
|
"log": False,
|
|
807
808
|
"devpars": {
|
|
808
|
-
"width":
|
|
809
|
-
"height":
|
|
809
|
+
"width": 800,
|
|
810
|
+
"height": 800,
|
|
810
811
|
"res": 100,
|
|
811
812
|
},
|
|
812
813
|
"subset": None,
|
|
@@ -851,6 +852,7 @@ class Immunarch(Proc):
|
|
|
851
852
|
plugin_opts = {
|
|
852
853
|
"report": "file://../reports/tcr/Immunarch.svelte",
|
|
853
854
|
"report_paging": 3,
|
|
855
|
+
"poplog_max": 999,
|
|
854
856
|
}
|
|
855
857
|
|
|
856
858
|
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
source("{{biopipen_dir}}/utils/misc.R")
|
|
2
|
+
source("{{biopipen_dir}}/utils/caching.R")
|
|
2
3
|
|
|
3
4
|
library(Seurat)
|
|
4
5
|
library(future)
|
|
@@ -35,80 +36,100 @@ envs$FindNeighbors <- .expand_dims(envs$FindNeighbors)
|
|
|
35
36
|
log_info("Reading Seurat object ...")
|
|
36
37
|
sobj <- readRDS(srtfile)
|
|
37
38
|
|
|
38
|
-
if (isTRUE(envs$cache)) {
|
|
39
|
-
|
|
39
|
+
if (isTRUE(envs$cache)) { envs$cache <- joboutdir }
|
|
40
|
+
if (length(envs$cache) > 1) {
|
|
41
|
+
log_warn("Multiple cache directories (envs.cache) detected, using the first one.")
|
|
42
|
+
envs$cache <- envs$cache[1]
|
|
40
43
|
}
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
capture.output(str(envs2)),
|
|
50
|
-
"\n"
|
|
51
|
-
)
|
|
52
|
-
digested_sig <- digest::digest(sig, algo = "md5")
|
|
53
|
-
cached_file <- file.path(envs$cache, paste0(digested_sig, ".cached.RDS"))
|
|
54
|
-
if (file.exists(cached_file)) {
|
|
55
|
-
log_info("Using cached results {cached_file}")
|
|
56
|
-
# copy cached file to rdsfile
|
|
57
|
-
file.copy(cached_file, rdsfile, copy.date = TRUE)
|
|
58
|
-
quit()
|
|
59
|
-
} else {
|
|
60
|
-
log_info("Cached results not found, logging the current and cached signatures.")
|
|
61
|
-
log_info("- Current signature: {digested_sig}")
|
|
62
|
-
# print(sig)
|
|
63
|
-
# sigfiles <- Sys.glob(file.path(envs$cache, "*.signature.txt"))
|
|
64
|
-
# for (sigfile in sigfiles) {
|
|
65
|
-
# log_info("- Found cached signature file: {sigfile}")
|
|
66
|
-
# cached_sig <- readLines(sigfile)
|
|
67
|
-
# log_info("- Cached signature:")
|
|
68
|
-
# print(cached_sig)
|
|
69
|
-
# }
|
|
70
|
-
writeLines(sig, file.path(envs$cache, paste0(digested_sig, ".signature.txt")))
|
|
71
|
-
}
|
|
44
|
+
sobj_sig <- capture.output(str(sobj))
|
|
45
|
+
dig_sig <- digest::digest(sobj_sig, algo = "md5")
|
|
46
|
+
dig_sig <- substr(dig_sig, 1, 8)
|
|
47
|
+
cache_dir <- NULL
|
|
48
|
+
if (is.character(envs$cache)) {
|
|
49
|
+
cache_dir <- file.path(envs$cache, paste0(dig_sig, ".seurat_cache"))
|
|
50
|
+
dir.create(cache_dir, recursive = TRUE, showWarnings = FALSE)
|
|
51
|
+
writeLines(sobj_sig, file.path(cache_dir, "signature.txt"))
|
|
72
52
|
}
|
|
73
53
|
|
|
74
54
|
if (length(envs$ScaleData) > 0) {
|
|
75
55
|
if (DefaultAssay(sobj) == "SCT") {
|
|
76
56
|
stop("SCT assay detected, but ScaleData is specified. Use SCTransform instead.")
|
|
77
57
|
}
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
58
|
+
cached <- get_cached(envs$ScaleData, "ScaleData", cache_dir)
|
|
59
|
+
if (is.null(cached$data)) {
|
|
60
|
+
log_info("Running ScaleData ...")
|
|
61
|
+
envs$ScaleData$object <- sobj
|
|
62
|
+
sobj <- do_call(ScaleData, envs$ScaleData)
|
|
63
|
+
cached$data <- list(assay = sobj@assays$RNA, commands = sobj@commands)
|
|
64
|
+
save_to_cache(cached, "ScaleData", cache_dir)
|
|
65
|
+
} else {
|
|
66
|
+
log_info("Loading cached ScaleData ...")
|
|
67
|
+
sobj@assays$RNA <- cached$data$assay
|
|
68
|
+
sobj@commands <- cached$data$commands
|
|
69
|
+
DefaultAssay(sobj) <- "RNA"
|
|
70
|
+
}
|
|
81
71
|
} else if (length(envs$SCTransform) > 0) {
|
|
82
72
|
if (DefaultAssay(sobj) != "SCT") {
|
|
83
73
|
stop("SCT assay not detected, but SCTransform is specified. Use ScaleData instead.")
|
|
84
74
|
}
|
|
85
|
-
|
|
86
|
-
envs$SCTransform$
|
|
87
|
-
|
|
75
|
+
cached <- get_cached(envs$SCTransform, "SCTransform", cache_dir)
|
|
76
|
+
asssay <- envs$SCTransform$new.assay.name %||% "SCT"
|
|
77
|
+
if (is.null(cached$data)) {
|
|
78
|
+
log_info("Running SCTransform ...")
|
|
79
|
+
envs$SCTransform$object <- sobj
|
|
80
|
+
sobj <- do_call(SCTransform, envs$SCTransform)
|
|
81
|
+
cached$data <- list(assay = sobj@assays$SCT, commands = sobj@commands)
|
|
82
|
+
save_to_cache(cached, "SCTransform", cache_dir)
|
|
83
|
+
} else {
|
|
84
|
+
log_info("Loading cached SCTransform ...")
|
|
85
|
+
sobj@assays[[assay]] <- cached$data$assay
|
|
86
|
+
sobj@commands <- cached$data$commands
|
|
87
|
+
DefaultAssay(sobj) <- assay
|
|
88
|
+
}
|
|
88
89
|
}
|
|
89
90
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
sobj
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
91
|
+
cached <- get_cached(envs$RunUMAP, "RunUMAP", cache_dir)
|
|
92
|
+
reduc_name <- envs$RunUMAP$reduction.name %||% "umap"
|
|
93
|
+
if (is.null(cached$data)) {
|
|
94
|
+
log_info("Running RunUMAP ...")
|
|
95
|
+
umap_args <- list_setdefault(
|
|
96
|
+
envs$RunUMAP,
|
|
97
|
+
object = sobj,
|
|
98
|
+
dims = 1:30,
|
|
99
|
+
reduction = sobj@misc$integrated_new_reduction %||% "pca"
|
|
100
|
+
)
|
|
101
|
+
ncells <- ncol(sobj)
|
|
102
|
+
umap_args$dims <- 1:min(max(umap_args$dims), ncells - 1)
|
|
103
|
+
umap_method <- envs$RunUMAP$umap.method %||% "uwot"
|
|
104
|
+
if (umap_method == "uwot" && is.null(envs$RunUMAP$n.neighbors)) {
|
|
105
|
+
# https://github.com/satijalab/seurat/issues/4312
|
|
106
|
+
umap_args$n.neighbors <- min(ncells - 1, 30)
|
|
107
|
+
}
|
|
108
|
+
sobj <- do_call(RunUMAP, umap_args)
|
|
109
|
+
cached$data <- list(reduc = sobj@reductions[[reduc_name]], commands = sobj@commands)
|
|
110
|
+
save_to_cache(cached, "RunUMAP", cache_dir)
|
|
111
|
+
} else {
|
|
112
|
+
log_info("Loading cached RunUMAP ...")
|
|
113
|
+
sobj@reductions[[reduc_name]] <- cached$data$reduc
|
|
114
|
+
sobj@commands <- cached$data$commands
|
|
104
115
|
}
|
|
105
|
-
sobj <- do_call(FindNeighbors, envs$FindNeighbors)
|
|
106
116
|
|
|
107
|
-
|
|
108
|
-
if (is.null(
|
|
109
|
-
|
|
117
|
+
cached <- get_cached(envs$FindNeighbors, "FindNeighbors", cache_dir)
|
|
118
|
+
if (is.null(cached$data)) {
|
|
119
|
+
log_info("Running FindNeighbors ...")
|
|
120
|
+
envs$FindNeighbors$object <- sobj
|
|
121
|
+
envs$FindNeighbors$reduction <- sobj@misc$integrated_new_reduction %||% "pca"
|
|
122
|
+
sobj <- do_call(FindNeighbors, envs$FindNeighbors)
|
|
123
|
+
cached$data <- list(graphs = sobj@graphs, commands = sobj@commands)
|
|
124
|
+
save_to_cache(cached, "FindNeighbors", cache_dir)
|
|
125
|
+
} else {
|
|
126
|
+
log_info("Loading cached FindNeighbors ...")
|
|
127
|
+
sobj@graphs <- cached$data$graphs
|
|
128
|
+
sobj@commands <- cached$data$commands
|
|
110
129
|
}
|
|
111
|
-
|
|
130
|
+
|
|
131
|
+
envs$FindClusters$random.seed <- envs$FindClusters$random.seed %||% 8525
|
|
132
|
+
resolution <- envs$FindClusters$resolution %||% 0.8
|
|
112
133
|
if (is.character(resolution)) {
|
|
113
134
|
if (grepl(",", resolution)) {
|
|
114
135
|
resolution <- as.numeric(trimws(unlist(strsplit(resolution, ","))))
|
|
@@ -116,42 +137,38 @@ if (is.character(resolution)) {
|
|
|
116
137
|
resolution <- as.numeric(resolution)
|
|
117
138
|
}
|
|
118
139
|
}
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
envs$FindClusters$
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
} else {
|
|
129
|
-
log_info("- Multiple resolutions detected ...")
|
|
130
|
-
res_key <- NULL
|
|
131
|
-
for (res in resolution) {
|
|
132
|
-
findclusters_args <- envs$FindClusters
|
|
133
|
-
findclusters_args$resolution <- res
|
|
134
|
-
findclusters_args$object <- sobj
|
|
135
|
-
sobj <- do_call(FindClusters, findclusters_args)
|
|
140
|
+
|
|
141
|
+
for (res in resolution) {
|
|
142
|
+
envs$FindClusters$resolution <- res
|
|
143
|
+
cached <- get_cached(envs$FindClusters, paste0("FindClusters_", res), cache_dir)
|
|
144
|
+
res_key <- paste0("seurat_clusters_", res)
|
|
145
|
+
if (is.null(cached$data)) {
|
|
146
|
+
log_info("Running FindClusters at resolution: {res} ...")
|
|
147
|
+
envs$FindClusters$object <- sobj
|
|
148
|
+
sobj <- do_call(FindClusters, envs$FindClusters)
|
|
136
149
|
levels(sobj$seurat_clusters) <- paste0("c", as.numeric(levels(sobj$seurat_clusters)) + 1)
|
|
137
|
-
res_key <- paste0("seurat_clusters_", res)
|
|
138
150
|
sobj[[res_key]] <- sobj$seurat_clusters
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
151
|
+
Idents(sobj) <- "seurat_clusters"
|
|
152
|
+
cached$data <- list(clusters = sobj$seurat_clusters, commands = sobj@commands)
|
|
153
|
+
save_to_cache(cached, paste0("FindClusters_", res), cache_dir)
|
|
154
|
+
} else {
|
|
155
|
+
log_info("Loading cached FindClusters at resolution: {res} ...")
|
|
156
|
+
sobj@commands <- cached$data$commands
|
|
157
|
+
sobj[[res_key]] <- cached$data$clusters
|
|
158
|
+
sobj$seurat_clusters <- cached$data$clusters
|
|
159
|
+
Idents(sobj) <- "seurat_clusters"
|
|
142
160
|
}
|
|
161
|
+
ident_table <- table(Idents(sobj))
|
|
162
|
+
log_info("- Found {length(ident_table)} clusters")
|
|
163
|
+
print(ident_table)
|
|
164
|
+
cat("\n")
|
|
143
165
|
}
|
|
144
166
|
|
|
145
167
|
if (DefaultAssay(sobj) == "SCT") {
|
|
146
|
-
|
|
168
|
+
# https://github.com/satijalab/seurat/issues/6968
|
|
147
169
|
log_info("Running PrepSCTFindMarkers ...")
|
|
148
170
|
sobj <- PrepSCTFindMarkers(sobj)
|
|
149
171
|
}
|
|
150
172
|
|
|
151
173
|
log_info("Saving results ...")
|
|
152
174
|
saveRDS(sobj, file = rdsfile)
|
|
153
|
-
|
|
154
|
-
if (is.character(envs$cache) && nchar(envs$cache) > 0) {
|
|
155
|
-
log_info("Caching results ...")
|
|
156
|
-
file.copy(rdsfile, cached_file, overwrite = TRUE)
|
|
157
|
-
}
|
|
@@ -301,26 +301,31 @@ log_info("Performing transformation/scaling ...")
|
|
|
301
301
|
if (envs$use_sct) {
|
|
302
302
|
log_info("- Running SCTransform ...")
|
|
303
303
|
SCTransformArgs <- envs$SCTransform
|
|
304
|
-
|
|
304
|
+
# log to stdout but don't populate it to running log
|
|
305
|
+
print(" SCTransform: {.formatArgs(SCTransformArgs)}")
|
|
306
|
+
log_debug(" SCTransform: {.formatArgs(SCTransformArgs)}")
|
|
305
307
|
SCTransformArgs$object <- sobj
|
|
306
308
|
sobj <- do_call(SCTransform, SCTransformArgs)
|
|
307
309
|
# Default is to use the SCT assay
|
|
308
310
|
} else {
|
|
309
311
|
log_info("- Running NormalizeData ...")
|
|
310
312
|
NormalizeDataArgs <- envs$NormalizeData
|
|
311
|
-
|
|
313
|
+
print(" NormalizeData: {.formatArgs(NormalizeDataArgs)}")
|
|
314
|
+
log_debug(" NormalizeData: {.formatArgs(NormalizeDataArgs)}")
|
|
312
315
|
NormalizeDataArgs$object <- sobj
|
|
313
316
|
sobj <- do_call(NormalizeData, NormalizeDataArgs)
|
|
314
317
|
|
|
315
318
|
log_info("- Running FindVariableFeatures ...")
|
|
316
319
|
FindVariableFeaturesArgs <- envs$FindVariableFeatures
|
|
317
|
-
|
|
320
|
+
print(" FindVariableFeatures: {.formatArgs(FindVariableFeaturesArgs)}")
|
|
321
|
+
log_debug(" FindVariableFeatures: {.formatArgs(FindVariableFeaturesArgs)}")
|
|
318
322
|
FindVariableFeaturesArgs$object <- sobj
|
|
319
323
|
sobj <- do_call(FindVariableFeatures, FindVariableFeaturesArgs)
|
|
320
324
|
|
|
321
325
|
log_info("- Running ScaleData ...")
|
|
322
326
|
ScaleDataArgs <- envs$ScaleData
|
|
323
|
-
|
|
327
|
+
print(" ScaleData: {.formatArgs(ScaleDataArgs)}")
|
|
328
|
+
log_debug(" ScaleData: {.formatArgs(ScaleDataArgs)}")
|
|
324
329
|
ScaleDataArgs$object <- sobj
|
|
325
330
|
sobj <- do_call(ScaleData, ScaleDataArgs)
|
|
326
331
|
}
|
|
@@ -328,7 +333,8 @@ if (envs$use_sct) {
|
|
|
328
333
|
log_info("- Running RunPCA ...")
|
|
329
334
|
RunPCAArgs <- envs$RunPCA
|
|
330
335
|
RunPCAArgs$npcs <- if (is.null(RunPCAArgs$npcs)) { 50 } else { min(RunPCAArgs$npcs, ncol(sobj) - 1) }
|
|
331
|
-
|
|
336
|
+
print(" RunPCA: {.formatArgs(RunPCAArgs)}")
|
|
337
|
+
log_debug(" RunPCA: {.formatArgs(RunPCAArgs)}")
|
|
332
338
|
RunPCAArgs$object <- sobj
|
|
333
339
|
sobj <- do_call(RunPCA, RunPCAArgs)
|
|
334
340
|
|
|
@@ -361,7 +367,8 @@ if (!envs$no_integration) {
|
|
|
361
367
|
if (is.null(IntegrateLayersArgs$new.reduction)) {
|
|
362
368
|
IntegrateLayersArgs$new.reduction <- new_reductions[[method]]
|
|
363
369
|
}
|
|
364
|
-
|
|
370
|
+
print(" IntegrateLayers: {.formatArgs(IntegrateLayersArgs)}")
|
|
371
|
+
log_debug(" IntegrateLayers: {.formatArgs(IntegrateLayersArgs)}")
|
|
365
372
|
IntegrateLayersArgs$object <- sobj
|
|
366
373
|
sobj <- do_call(IntegrateLayers, IntegrateLayersArgs)
|
|
367
374
|
# Save it for dimension reduction plots
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
source("{{biopipen_dir}}/utils/misc.R")
|
|
2
|
+
source("{{biopipen_dir}}/utils/caching.R")
|
|
2
3
|
|
|
3
4
|
library(Seurat)
|
|
4
5
|
library(future)
|
|
@@ -33,40 +34,10 @@ envs$FindNeighbors <- .expand_dims(envs$FindNeighbors)
|
|
|
33
34
|
log_info("Reading Seurat object ...")
|
|
34
35
|
srtobj <- readRDS(srtfile)
|
|
35
36
|
|
|
36
|
-
if (isTRUE(envs$cache)) {
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
if (is.character(envs$cache) && nchar(envs$cache) > 0) {
|
|
41
|
-
log_info("Obtainning the signature ...")
|
|
42
|
-
envs2 <- envs
|
|
43
|
-
envs2$ncores <- NULL
|
|
44
|
-
sig <- c(
|
|
45
|
-
capture.output(str(srtobj)),
|
|
46
|
-
"\n\n-------------------\n\n",
|
|
47
|
-
capture.output(str(envs2)),
|
|
48
|
-
"\n"
|
|
49
|
-
)
|
|
50
|
-
digested_sig <- digest::digest(sig, algo = "md5")
|
|
51
|
-
cached_file <- file.path(envs$cache, paste0(digested_sig, ".cached.RDS"))
|
|
52
|
-
if (file.exists(cached_file)) {
|
|
53
|
-
log_info("Using cached results {cached_file}")
|
|
54
|
-
# copy cached file to rdsfile
|
|
55
|
-
file.copy(cached_file, rdsfile, copy.date = TRUE)
|
|
56
|
-
quit()
|
|
57
|
-
} else {
|
|
58
|
-
log_info("Cached results not found.")
|
|
59
|
-
log_info("- Current signature: {digested_sig}")
|
|
60
|
-
# print(sig)
|
|
61
|
-
# sigfiles <- Sys.glob(file.path(envs$cache, "*.signature.txt"))
|
|
62
|
-
# for (sigfile in sigfiles) {
|
|
63
|
-
# log_info("- Found cached signature file: {sigfile}")
|
|
64
|
-
# cached_sig <- readLines(sigfile)
|
|
65
|
-
# log_info("- Cached signature:")
|
|
66
|
-
# print(cached_sig)
|
|
67
|
-
# }
|
|
68
|
-
writeLines(sig, file.path(envs$cache, paste0(digested_sig, ".signature.txt")))
|
|
69
|
-
}
|
|
37
|
+
if (isTRUE(envs$cache)) { envs$cache <- joboutdir }
|
|
38
|
+
if (length(envs$cache) > 1) {
|
|
39
|
+
log_warn("Multiple cache directories (envs.cache) detected, using the first one.")
|
|
40
|
+
envs$cache <- envs$cache[1]
|
|
70
41
|
}
|
|
71
42
|
|
|
72
43
|
if (!is.null(envs$mutaters) && length(envs$mutaters) > 0) {
|
|
@@ -102,30 +73,66 @@ for (key in names(envs$cases)) {
|
|
|
102
73
|
}
|
|
103
74
|
|
|
104
75
|
log_info("- Subsetting ...")
|
|
105
|
-
sobj <-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
case$FindNeighbors$object <- sobj
|
|
119
|
-
if (is.null(case$FindNeighbors$reduction)) {
|
|
120
|
-
case$FindNeighbors$reduction <- sobj@misc$integrated_new_reduction %||% "pca"
|
|
76
|
+
sobj <- tryCatch({
|
|
77
|
+
srtobj %>% filter(!!parse_expr(case$subset))
|
|
78
|
+
}, error = function(e) {
|
|
79
|
+
stop(paste0(" Error in subset: ", e$message))
|
|
80
|
+
})
|
|
81
|
+
sobj_sig <- capture.output(str(sobj))
|
|
82
|
+
dig_sig <- digest::digest(sobj_sig, algo = "md5")
|
|
83
|
+
dig_sig <- substr(dig_sig, 1, 8)
|
|
84
|
+
cache_dir <- NULL
|
|
85
|
+
if (is.character(envs$cache)) {
|
|
86
|
+
cache_dir <- file.path(envs$cache, paste0(dig_sig, ".seurat_cache"))
|
|
87
|
+
dir.create(cache_dir, recursive = TRUE, showWarnings = FALSE)
|
|
88
|
+
writeLines(sobj_sig, file.path(cache_dir, "signature.txt"))
|
|
121
89
|
}
|
|
122
|
-
sobj <- do_call(FindNeighbors, case$FindNeighbors)
|
|
123
90
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
91
|
+
cached <- get_cached(case$RunUMAP, "RunUMAP", cache_dir)
|
|
92
|
+
reduc_name <- case$RunUMAP$reduction.name %||% "umap"
|
|
93
|
+
if (is.null(cached$data)) {
|
|
94
|
+
log_info("- Running RunUMAP ...")
|
|
95
|
+
umap_args <- list_setdefault(
|
|
96
|
+
case$RunUMAP,
|
|
97
|
+
object = sobj,
|
|
98
|
+
dims = 1:30,
|
|
99
|
+
reduction = sobj@misc$integrated_new_reduction %||% "pca"
|
|
100
|
+
)
|
|
101
|
+
ncells <- ncol(sobj)
|
|
102
|
+
umap_args$dims <- 1:min(max(umap_args$dims), ncells - 1)
|
|
103
|
+
umap_method <- case$RunUMAP$umap.method %||% "uwot"
|
|
104
|
+
if (umap_method == "uwot" && is.null(case$RunUMAP$n.neighbors)) {
|
|
105
|
+
# https://github.com/satijalab/seurat/issues/4312
|
|
106
|
+
umap_args$n.neighbors <- min(ncells - 1, 30)
|
|
107
|
+
}
|
|
108
|
+
sobj <- do_call(RunUMAP, umap_args)
|
|
109
|
+
cached$data <- list(reduc = sobj@reductions[[reduc_name]], commands = sobj@commands)
|
|
110
|
+
save_to_cache(cached, "RunUMAP", cache_dir)
|
|
111
|
+
} else {
|
|
112
|
+
log_info("- Loading cached RunUMAP ...")
|
|
113
|
+
sobj@reductions[[reduc_name]] <- cached$data$reduc
|
|
114
|
+
sobj@commands <- cached$data$commands
|
|
115
|
+
}
|
|
116
|
+
reduc <- cached$data$reduc
|
|
117
|
+
|
|
118
|
+
cached <- get_cached(case$FindNeighbors, "FindNeighbors", cache_dir)
|
|
119
|
+
if (is.null(cached$data)) {
|
|
120
|
+
log_info("- Running FindNeighbors ...")
|
|
121
|
+
case$FindNeighbors$object <- sobj
|
|
122
|
+
if (is.null(case$FindNeighbors$reduction)) {
|
|
123
|
+
case$FindNeighbors$reduction <- sobj@misc$integrated_new_reduction %||% "pca"
|
|
124
|
+
}
|
|
125
|
+
sobj <- do_call(FindNeighbors, case$FindNeighbors)
|
|
126
|
+
cached$data <- list(graphs = sobj@graphs, commands = sobj@commands)
|
|
127
|
+
save_to_cache(cached, "FindNeighbors", cache_dir)
|
|
128
|
+
} else {
|
|
129
|
+
log_info("- Loading cached FindNeighbors ...")
|
|
130
|
+
sobj@graphs <- cached$data$graphs
|
|
131
|
+
sobj@commands <- cached$data$commands
|
|
127
132
|
}
|
|
128
|
-
|
|
133
|
+
|
|
134
|
+
case$FindClusters$random.seed <- case$FindClusters$random.seed %||% 8525
|
|
135
|
+
resolution <- case$FindClusters$resolution %||% 0.8
|
|
129
136
|
if (is.character(resolution)) {
|
|
130
137
|
if (grepl(",", resolution)) {
|
|
131
138
|
resolution <- as.numeric(trimws(unlist(strsplit(resolution, ","))))
|
|
@@ -133,53 +140,30 @@ for (key in names(envs$cases)) {
|
|
|
133
140
|
resolution <- as.numeric(resolution)
|
|
134
141
|
}
|
|
135
142
|
}
|
|
136
|
-
|
|
137
|
-
case$FindClusters$resolution <-
|
|
138
|
-
case$FindClusters
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
log_info("- Found {length(ident_table)} clusters:")
|
|
145
|
-
print(ident_table)
|
|
146
|
-
cat("\n")
|
|
147
|
-
|
|
148
|
-
log_info("- Updating meta.data with subclusters...")
|
|
149
|
-
srtobj <- AddMetaData(srtobj, metadata = sobj@meta.data[, key, drop = FALSE])
|
|
150
|
-
srtobj[[paste0("sub_umap_", key)]] <- sobj@reductions$umap
|
|
151
|
-
} else {
|
|
152
|
-
log_info("- Multiple resolutions detected ...")
|
|
153
|
-
log_info("")
|
|
154
|
-
metadata <- NULL
|
|
155
|
-
for (res in resolution) {
|
|
156
|
-
findclusters_args <- case$FindClusters
|
|
157
|
-
findclusters_args$resolution <- res
|
|
158
|
-
findclusters_args$object <- sobj
|
|
159
|
-
sobj1 <- do_call(FindClusters, findclusters_args)
|
|
160
|
-
res_key <- paste0(key, "_", res)
|
|
143
|
+
for (res in resolution) {
|
|
144
|
+
case$FindClusters$resolution <- res
|
|
145
|
+
cached <- get_cached(case$FindClusters, paste0("FindClusters_", res), cache_dir)
|
|
146
|
+
res_key <- paste0("seurat_clusters_", res)
|
|
147
|
+
if (is.null(cached$data)) {
|
|
148
|
+
log_info("- Running FindClusters at resolution: {res} ...")
|
|
149
|
+
case$FindClusters$object <- sobj
|
|
150
|
+
sobj1 <- do_call(FindClusters, case$FindClusters)
|
|
161
151
|
levels(sobj1$seurat_clusters) <- paste0("s", as.numeric(levels(sobj1$seurat_clusters)) + 1)
|
|
162
|
-
Idents(sobj1) <- "seurat_clusters"
|
|
163
152
|
sobj1[[res_key]] <- sobj1$seurat_clusters
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
log_info("- Updating meta.data with subclusters...")
|
|
170
|
-
metadata <- sobj1@meta.data[, res_key, drop = FALSE]
|
|
171
|
-
srtobj <- AddMetaData(srtobj, metadata = metadata)
|
|
172
|
-
srtobj[[paste0("sub_umap_", res_key)]] <- sobj1@reductions$umap
|
|
153
|
+
cached$data <- sobj1@meta.data[, res_key, drop = FALSE]
|
|
154
|
+
save_to_cache(cached, paste0("FindClusters_", res), cache_dir)
|
|
155
|
+
} else {
|
|
156
|
+
log_info("- Using cached FindClusters at resolution: {res} ...")
|
|
173
157
|
}
|
|
174
|
-
|
|
175
|
-
|
|
158
|
+
ident_table <- table(cached$data[[res_key]])
|
|
159
|
+
log_info(" Found {length(ident_table)} clusters")
|
|
160
|
+
print(ident_table)
|
|
161
|
+
cat("\n")
|
|
176
162
|
}
|
|
163
|
+
log_info("- Updating meta.data with subclusters...")
|
|
164
|
+
srtobj <- AddMetaData(srtobj, metadata = cached$data, col.name = key)
|
|
165
|
+
srtobj[[paste0("sub_umap_", key)]] <- reduc
|
|
177
166
|
}
|
|
178
167
|
|
|
179
168
|
log_info("Saving results ...")
|
|
180
169
|
saveRDS(srtobj, file = rdsfile)
|
|
181
|
-
|
|
182
|
-
if (is.character(envs$cache) && nchar(envs$cache) > 0) {
|
|
183
|
-
log_info("Caching results to {cached_file} ...")
|
|
184
|
-
invisible(file.copy(rdsfile, cached_file, overwrite = TRUE))
|
|
185
|
-
}
|