biopipen 0.30.0__py3-none-any.whl → 0.31.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/ns/scrna.py +61 -8
- biopipen/scripts/scrna/CellsDistribution.R +1 -1
- biopipen/scripts/scrna/ScSimulation.R +64 -0
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +16 -22
- biopipen/scripts/scrna/SeuratMap2Ref.R +63 -36
- {biopipen-0.30.0.dist-info → biopipen-0.31.1.dist-info}/METADATA +7 -7
- {biopipen-0.30.0.dist-info → biopipen-0.31.1.dist-info}/RECORD +10 -9
- {biopipen-0.30.0.dist-info → biopipen-0.31.1.dist-info}/WHEEL +0 -0
- {biopipen-0.30.0.dist-info → biopipen-0.31.1.dist-info}/entry_points.txt +0 -0
biopipen/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "0.31.1"
|
biopipen/ns/scrna.py
CHANGED
|
@@ -541,14 +541,19 @@ class SeuratClusterStats(Proc):
|
|
|
541
541
|
This is to do some basic statistics on the clusters. For more comprehensive analysis,
|
|
542
542
|
see `RadarPlots` and `CellsDistribution`.
|
|
543
543
|
The parameters from the cases can overwrite the default parameters.
|
|
544
|
-
- frac (
|
|
544
|
+
- frac (choice): How to calculate the fraction of cells.
|
|
545
|
+
- group: calculate the fraction in each group.
|
|
546
|
+
The total fraction of the cells of idents in each group will be 1.
|
|
547
|
+
When `group-by` is not specified, it will be the same as `all`.
|
|
548
|
+
- ident: calculate the fraction in each ident.
|
|
549
|
+
The total fraction of the cells of groups in each ident will be 1.
|
|
550
|
+
Only works when `group-by` is specified.
|
|
551
|
+
- cluster: alias of `ident`.
|
|
552
|
+
- all: calculate the fraction against all cells.
|
|
553
|
+
- none: do not calculate the fraction, use the number of cells instead.
|
|
545
554
|
- pie (flag): Also output a pie chart?
|
|
546
555
|
- circos (flag): Also output a circos plot?
|
|
547
556
|
- table (flag): Whether to output a table (in tab-delimited format) and in the report.
|
|
548
|
-
- frac_ofall (flag): Whether to output the fraction against all cells,
|
|
549
|
-
instead of the fraction in each group.
|
|
550
|
-
Does not work for circos plot.
|
|
551
|
-
Only works when `frac` is `True` and `group-by` is specified.
|
|
552
557
|
- transpose (flag): Whether to transpose the cluster and group, that is,
|
|
553
558
|
using group as the x-axis and cluster to fill the plot.
|
|
554
559
|
For circos plot, when transposed, the arrows will be drawn from the idents (by `ident`) to the
|
|
@@ -708,12 +713,11 @@ class SeuratClusterStats(Proc):
|
|
|
708
713
|
},
|
|
709
714
|
"hists": {},
|
|
710
715
|
"stats_defaults": {
|
|
711
|
-
"frac":
|
|
716
|
+
"frac": "none",
|
|
712
717
|
"pie": False,
|
|
713
718
|
"circos": False,
|
|
714
719
|
"table": False,
|
|
715
720
|
"position": "auto",
|
|
716
|
-
"frac_ofall": False,
|
|
717
721
|
"transpose": False,
|
|
718
722
|
"ident": "seurat_clusters",
|
|
719
723
|
"group-by": None,
|
|
@@ -731,7 +735,7 @@ class SeuratClusterStats(Proc):
|
|
|
731
735
|
"Number of cells in each cluster by Sample": {
|
|
732
736
|
"group-by": "Sample",
|
|
733
737
|
"table": True,
|
|
734
|
-
"frac":
|
|
738
|
+
"frac": "group",
|
|
735
739
|
},
|
|
736
740
|
},
|
|
737
741
|
"ngenes_defaults": {
|
|
@@ -2261,3 +2265,52 @@ class AnnData2Seurat(Proc):
|
|
|
2261
2265
|
lang = config.lang.rscript
|
|
2262
2266
|
envs = {"outtype": "rds", "assay": "RNA", "dotplot_check": True}
|
|
2263
2267
|
script = "file://../scripts/scrna/AnnData2Seurat.R"
|
|
2268
|
+
|
|
2269
|
+
|
|
2270
|
+
class ScSimulation(Proc):
|
|
2271
|
+
"""Simulate single-cell data using splatter.
|
|
2272
|
+
|
|
2273
|
+
See <https://www.bioconductor.org/packages/devel/bioc/vignettes/splatter/inst/doc/splatter.html#2_Quickstart>
|
|
2274
|
+
|
|
2275
|
+
Input:
|
|
2276
|
+
seed: The seed for the simulation
|
|
2277
|
+
You could also use string as the seed, and the seed will be
|
|
2278
|
+
generated by `digest::digest2int()`.
|
|
2279
|
+
So this could also work as a unique identifier for the simulation (ie. Sample ID).
|
|
2280
|
+
|
|
2281
|
+
Output:
|
|
2282
|
+
outfile: The output Seurat object/SingleCellExperiment in RDS format
|
|
2283
|
+
|
|
2284
|
+
Envs:
|
|
2285
|
+
ngenes (type=int): The number of genes to simulate
|
|
2286
|
+
ncells (type=int): The number of cells to simulate
|
|
2287
|
+
nspikes (type=int): The number of spike-ins to simulate
|
|
2288
|
+
When `ngenes`, `ncells`, and `nspikes` are not specified, the default
|
|
2289
|
+
params from `mockSCE()` will be used. By default, `ngenes = 2000`,
|
|
2290
|
+
`ncells = 200`, and `nspikes = 100`.
|
|
2291
|
+
outtype (choice): The output file type.
|
|
2292
|
+
- seurat: Seurat object
|
|
2293
|
+
- singlecellexperiment: SingleCellExperiment object
|
|
2294
|
+
- sce: alias for `singlecellexperiment`
|
|
2295
|
+
method (choice): which simulation method to use. Options are:
|
|
2296
|
+
- single: produces a single population
|
|
2297
|
+
- groups: produces distinct groups (eg. cell types), or
|
|
2298
|
+
- paths: selects cells from continuous trajectories (eg. differentiation processes)
|
|
2299
|
+
params (ns): Other parameters for simulation.
|
|
2300
|
+
The parameters are initialized `splitEstimate(mockSCE())` and then
|
|
2301
|
+
updated with the given parameters.
|
|
2302
|
+
See <https://rdrr.io/bioc/splatter/man/SplatParams.html>.
|
|
2303
|
+
Hyphens (`-`) will be transformed into dots (`.`) for the keys.
|
|
2304
|
+
""" # noqa: E501
|
|
2305
|
+
input = "seed:var"
|
|
2306
|
+
output = "outfile:file:simulatied_{{in.seed}}.RDS"
|
|
2307
|
+
lang = config.lang.rscript
|
|
2308
|
+
envs = {
|
|
2309
|
+
"ngenes": None,
|
|
2310
|
+
"ncells": None,
|
|
2311
|
+
"nspikes": None,
|
|
2312
|
+
"outtype": "seurat",
|
|
2313
|
+
"method": "single",
|
|
2314
|
+
"params": {},
|
|
2315
|
+
}
|
|
2316
|
+
script = "file://../scripts/scrna/ScSimulation.R"
|
|
@@ -325,7 +325,7 @@ do_case <- function(name, case) {
|
|
|
325
325
|
geom_col(width=.01, position="fill", color = "#888888") +
|
|
326
326
|
geom_bar(stat = "identity", position = position_fill(reverse = TRUE)) +
|
|
327
327
|
coord_polar("y", start = 0) +
|
|
328
|
-
|
|
328
|
+
scale_fill_manual(name = "Cluster", values = pal_biopipen()(length(levels(all_clusters)))) +
|
|
329
329
|
theme_void() +
|
|
330
330
|
theme(
|
|
331
331
|
plot.margin = plot.margin,
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
|
+
|
|
3
|
+
library(rlang)
|
|
4
|
+
library(splatter)
|
|
5
|
+
library(scater)
|
|
6
|
+
|
|
7
|
+
# Load template variables
|
|
8
|
+
seed <- {{ in.seed | r }}
|
|
9
|
+
outfile <- {{ out.outfile | r }}
|
|
10
|
+
ngenes <- {{ envs.ngenes | r }}
|
|
11
|
+
ncells <- {{ envs.ncells | r }}
|
|
12
|
+
nspikes <- {{ envs.nspikes | r }}
|
|
13
|
+
outtype <- {{ envs.outtype | r }}
|
|
14
|
+
method <- {{ envs.method | r }}
|
|
15
|
+
user_params <- {{ envs.params | r: todot="-" }}
|
|
16
|
+
|
|
17
|
+
log_info("Generating simulation parameters ...")
|
|
18
|
+
|
|
19
|
+
seed <- seed %||% 1
|
|
20
|
+
if (length(seed) > 1) {
|
|
21
|
+
log_warn("- multiple seeds provided, using the first one")
|
|
22
|
+
seed <- seed[1]
|
|
23
|
+
}
|
|
24
|
+
if (is.character(seed)) {
|
|
25
|
+
library(digest)
|
|
26
|
+
proj <- seed
|
|
27
|
+
seed <- digest2int(seed)
|
|
28
|
+
} else {
|
|
29
|
+
proj <- paste0("S", seed)
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
set.seed(seed)
|
|
33
|
+
mock_sce_params <- list()
|
|
34
|
+
if (!is.null(ngenes)) mock_sce_params$ngenes <- ngenes
|
|
35
|
+
if (!is.null(ncells)) mock_sce_params$ncells <- ncells
|
|
36
|
+
if (!is.null(nspikes)) mock_sce_params$nspikes <- nspikes
|
|
37
|
+
sce <- do.call(mockSCE, mock_sce_params)
|
|
38
|
+
params <- splatEstimate(sce)
|
|
39
|
+
user_params$seed <- seed
|
|
40
|
+
user_params$object = params
|
|
41
|
+
do_call(setParams, user_params)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
log_info("Saving simulation parameters to file ...")
|
|
45
|
+
|
|
46
|
+
sim <- splatSimulate(params, method = method, verbose = TRUE)
|
|
47
|
+
|
|
48
|
+
outtype <- tolower(outtype)
|
|
49
|
+
if (outtype == "sce") outtype <- "singlecellexperiment"
|
|
50
|
+
|
|
51
|
+
if (outtype == "singlecellexperiment") {
|
|
52
|
+
log_info("Saving simulation to file ...")
|
|
53
|
+
saveRDS(sim, file = outfile)
|
|
54
|
+
} else {
|
|
55
|
+
log_info("Converting simulation to Seurat object ...")
|
|
56
|
+
cnts <- SingleCellExperiment::counts(sim)
|
|
57
|
+
sobj <- Seurat::CreateSeuratObject(counts = cnts, project = proj)
|
|
58
|
+
rm(sim)
|
|
59
|
+
rm(cnts)
|
|
60
|
+
gc()
|
|
61
|
+
|
|
62
|
+
log_info("Saving simulation to file ...")
|
|
63
|
+
saveRDS(sobj, file = outfile)
|
|
64
|
+
}
|
|
@@ -18,12 +18,6 @@ do_one_stats = function(name) {
|
|
|
18
18
|
if (isTRUE(case$pie) && !is.null(case$group.by)) {
|
|
19
19
|
stop(paste0(name, ": pie charts are not supported for group-by"))
|
|
20
20
|
}
|
|
21
|
-
if (!isTRUE(case$frac) && isTRUE(case$frac_ofall)) {
|
|
22
|
-
stop(paste0(name, ": frac_ofall is only supported when frac is true"))
|
|
23
|
-
}
|
|
24
|
-
if (isTRUE(case$frac_ofall) && is.null(case$group.by)) {
|
|
25
|
-
stop(paste0(name, ": frac_ofall is only supported for group-by"))
|
|
26
|
-
}
|
|
27
21
|
if (isTRUE(case$transpose) && is.null(case$group.by)) {
|
|
28
22
|
stop(paste0(name, ": transpose is only supported for group-by"))
|
|
29
23
|
}
|
|
@@ -46,28 +40,28 @@ do_one_stats = function(name) {
|
|
|
46
40
|
!!!syms(case$split.by)
|
|
47
41
|
), function(df) {
|
|
48
42
|
out <- df %>% group_by(!!!syms(select_cols)) %>% summarise(.n = n(), .groups = "drop")
|
|
49
|
-
if (!is.null(case$group.by) &&
|
|
50
|
-
if (
|
|
43
|
+
if (!is.null(case$group.by) && case$frac != "none") {
|
|
44
|
+
if (case$frac == "all") {
|
|
51
45
|
out <- out %>% mutate(.frac = .n / sum(.n))
|
|
52
|
-
} else if (
|
|
53
|
-
out <- out %>% group_by(!!sym(case$ident)) %>% mutate(.frac = .n / sum(.n))
|
|
54
|
-
} else {
|
|
46
|
+
} else if (case$frac == "group") {
|
|
55
47
|
out <- out %>% group_by(!!sym(case$group.by)) %>% mutate(.frac = .n / sum(.n))
|
|
48
|
+
} else { # case$frac == "ident" or "cluster"
|
|
49
|
+
out <- out %>% group_by(!!sym(case$ident)) %>% mutate(.frac = .n / sum(.n))
|
|
56
50
|
}
|
|
57
51
|
}
|
|
58
52
|
out
|
|
59
53
|
}))
|
|
60
|
-
} else if (!is.null(case$group.by) &&
|
|
54
|
+
} else if (!is.null(case$group.by) && case$frac != "none") { # no split.by
|
|
61
55
|
plot_df <- df_cells %>%
|
|
62
56
|
select(all_of(select_cols)) %>%
|
|
63
57
|
group_by(!!!syms(select_cols)) %>%
|
|
64
58
|
summarise(.n = n(), .groups = "drop")
|
|
65
|
-
if (
|
|
59
|
+
if (case$frac == "all") {
|
|
66
60
|
plot_df = plot_df %>% mutate(.frac = .n / sum(.n))
|
|
67
|
-
} else {
|
|
68
|
-
plot_df = plot_df %>%
|
|
69
|
-
|
|
70
|
-
|
|
61
|
+
} else if (case$frac == "group") {
|
|
62
|
+
plot_df = plot_df %>% group_by(!!sym(case$group.by)) %>% mutate(.frac = .n / sum(.n))
|
|
63
|
+
} else { # case$frac == "ident" or "cluster"
|
|
64
|
+
plot_df = plot_df %>% group_by(!!sym(case$ident)) %>% mutate(.frac = .n / sum(.n))
|
|
71
65
|
}
|
|
72
66
|
} else {
|
|
73
67
|
plot_df <- df_cells %>%
|
|
@@ -75,7 +69,7 @@ do_one_stats = function(name) {
|
|
|
75
69
|
group_by(!!!syms(select_cols)) %>%
|
|
76
70
|
summarise(.n = n(), .groups = "drop")
|
|
77
71
|
|
|
78
|
-
if (
|
|
72
|
+
if (case$frac != "none") {
|
|
79
73
|
plot_df <- plot_df %>% mutate(.frac = .n / sum(.n))
|
|
80
74
|
}
|
|
81
75
|
}
|
|
@@ -88,13 +82,13 @@ do_one_stats = function(name) {
|
|
|
88
82
|
p = plot_df %>%
|
|
89
83
|
ggplot(aes(
|
|
90
84
|
x=!!sym(ifelse(case$transpose, case$group.by, case$ident)),
|
|
91
|
-
y=if (
|
|
85
|
+
y=if (case$frac != "none") .frac else .n,
|
|
92
86
|
fill=!!sym(ifelse(is.null(case$group.by) || isTRUE(case$transpose), case$ident, case$group.by))
|
|
93
87
|
)) +
|
|
94
88
|
geom_bar(stat="identity", position=bar_position, alpha=.8) +
|
|
95
89
|
theme_prism(axis_text_angle = 90) +
|
|
96
90
|
scale_fill_biopipen() +
|
|
97
|
-
ylab(ifelse(
|
|
91
|
+
ylab(ifelse(case$frac != "none", "Fraction of cells", "Number of cells"))
|
|
98
92
|
|
|
99
93
|
if (!is.null(case$split.by)) {
|
|
100
94
|
p = p + facet_wrap(case$split.by)
|
|
@@ -109,7 +103,7 @@ do_one_stats = function(name) {
|
|
|
109
103
|
kind = "descr",
|
|
110
104
|
content = paste0(
|
|
111
105
|
"Plots showing the ",
|
|
112
|
-
ifelse(
|
|
106
|
+
ifelse(case$frac != "none", "number/faction", "number"),
|
|
113
107
|
" of cells per cluster",
|
|
114
108
|
ifelse(
|
|
115
109
|
is.null(case$group.by),
|
|
@@ -150,7 +144,7 @@ do_one_stats = function(name) {
|
|
|
150
144
|
guides(fill = guide_legend(title = case$ident)) +
|
|
151
145
|
theme_void() +
|
|
152
146
|
geom_label(
|
|
153
|
-
if (
|
|
147
|
+
if (case$frac != "none")
|
|
154
148
|
aes(label=sprintf("%.1f%%", .frac * 100))
|
|
155
149
|
else
|
|
156
150
|
aes(label=.n),
|
|
@@ -45,6 +45,10 @@ if (is.null(split_by)) {
|
|
|
45
45
|
future::plan(strategy = "multicore", workers = ncores)
|
|
46
46
|
}
|
|
47
47
|
|
|
48
|
+
.is_sct <- function(x) {
|
|
49
|
+
return(Seurat:::IsSCT(assay = x@assays[[DefaultAssay(x)]]))
|
|
50
|
+
}
|
|
51
|
+
|
|
48
52
|
.expand_dims = function(args, name = "dims") {
|
|
49
53
|
# Expand dims from 30 to 1:30
|
|
50
54
|
if (is.numeric(args[[name]]) && length(args[[name]] == 1)) {
|
|
@@ -63,6 +67,8 @@ if (endsWith(ref, ".rds") || endsWith(ref, ".RDS")) {
|
|
|
63
67
|
} else {
|
|
64
68
|
reference = LoadH5Seurat(ref)
|
|
65
69
|
}
|
|
70
|
+
reference = UpdateSeuratObject(reference)
|
|
71
|
+
reference = UpdateSCTAssays(reference)
|
|
66
72
|
|
|
67
73
|
# check if refdata exists in the reference
|
|
68
74
|
for (rname in names(mapquery_args$refdata)) {
|
|
@@ -84,9 +90,20 @@ for (rname in names(mapquery_args$refdata)) {
|
|
|
84
90
|
}
|
|
85
91
|
}
|
|
86
92
|
|
|
87
|
-
if (refnorm == "auto" &&
|
|
93
|
+
if (refnorm == "auto" && .is_sct(reference)) {
|
|
88
94
|
refnorm = "SCTransform"
|
|
89
95
|
}
|
|
96
|
+
if (refnorm == "SCTransform") {
|
|
97
|
+
# Check if the reference is SCTransform'ed
|
|
98
|
+
if (!.is_sct(reference)) {
|
|
99
|
+
stop("Reference is not SCTransform'ed")
|
|
100
|
+
}
|
|
101
|
+
n_models = length(x = slot(object = reference[[DefaultAssay(reference)]], name = "SCTModel.list"))
|
|
102
|
+
if (n_models == 0) {
|
|
103
|
+
stop("Reference doesn't contain SCTModel.")
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
90
107
|
log_info(" Normalization method used: {refnorm}")
|
|
91
108
|
if (refnorm == "SCTransform") {
|
|
92
109
|
findtransferanchors_args$normalization.method = "SCT"
|
|
@@ -130,18 +147,17 @@ log_info("- Normalizing data")
|
|
|
130
147
|
if (refnorm == "SCTransform") {
|
|
131
148
|
if (defassay == "SCT" && skip_if_normalized) {
|
|
132
149
|
log_warn(" Skipping normalization as the object is already SCTransform'ed")
|
|
133
|
-
query = sobj
|
|
134
150
|
} else {
|
|
135
151
|
log_info(" Using SCTransform normalization")
|
|
136
152
|
sctransform_args$residual.features = rownames(x = reference)
|
|
137
153
|
if (is.null(split_by)) {
|
|
138
154
|
sctransform_args$object = sobj
|
|
139
|
-
|
|
155
|
+
sobj = do_call(SCTransform, sctransform_args)
|
|
140
156
|
sctransform_args$object <- NULL
|
|
141
157
|
rm(sctransform_args)
|
|
142
158
|
gc()
|
|
143
159
|
} else {
|
|
144
|
-
|
|
160
|
+
sobj = mclapply(
|
|
145
161
|
X = sobj,
|
|
146
162
|
FUN = function(x) {
|
|
147
163
|
sctransform_args$object = x
|
|
@@ -149,22 +165,21 @@ if (refnorm == "SCTransform") {
|
|
|
149
165
|
},
|
|
150
166
|
mc.cores = ncores
|
|
151
167
|
)
|
|
152
|
-
if (any(unlist(lapply(
|
|
153
|
-
stop(paste0("\nmclapply (SCTransform) error:",
|
|
168
|
+
if (any(unlist(lapply(sobj, class)) == "try-error")) {
|
|
169
|
+
stop(paste0("\nmclapply (SCTransform) error:", sobj))
|
|
154
170
|
}
|
|
155
171
|
}
|
|
156
172
|
}
|
|
157
173
|
} else {
|
|
158
174
|
if (defassay == "RNA" && skip_if_normalized) {
|
|
159
175
|
log_warn(" Skipping normalization as the object is already LogNormalize'd")
|
|
160
|
-
query = sobj
|
|
161
176
|
} else {
|
|
162
177
|
log_info(" Using NormalizeData normalization")
|
|
163
178
|
if (is.null(split_by)) {
|
|
164
179
|
normalizedata_args$object = sobj
|
|
165
|
-
|
|
180
|
+
sobj = do_call(NormalizeData, normalizedata_args)
|
|
166
181
|
} else {
|
|
167
|
-
|
|
182
|
+
sobj = mclapply(
|
|
168
183
|
X = sobj,
|
|
169
184
|
FUN = function(x) {
|
|
170
185
|
normalizedata_args$object = x
|
|
@@ -172,8 +187,8 @@ if (refnorm == "SCTransform") {
|
|
|
172
187
|
},
|
|
173
188
|
mc.cores = ncores
|
|
174
189
|
)
|
|
175
|
-
if (any(unlist(lapply(
|
|
176
|
-
stop(paste0("\nmclapply (NormalizeData) error:",
|
|
190
|
+
if (any(unlist(lapply(sobj, class)) == "try-error")) {
|
|
191
|
+
stop(paste0("\nmclapply (NormalizeData) error:", sobj))
|
|
177
192
|
}
|
|
178
193
|
}
|
|
179
194
|
normalizedata_args$object <- NULL
|
|
@@ -181,14 +196,12 @@ if (refnorm == "SCTransform") {
|
|
|
181
196
|
gc()
|
|
182
197
|
}
|
|
183
198
|
}
|
|
184
|
-
rm(sobj)
|
|
185
|
-
gc()
|
|
186
199
|
|
|
187
200
|
# Find anchors between query and reference
|
|
188
201
|
log_info("- Finding anchors")
|
|
189
202
|
findtransferanchors_args$reference = reference
|
|
190
203
|
if (is.null(split_by)) {
|
|
191
|
-
findtransferanchors_args$query =
|
|
204
|
+
findtransferanchors_args$query = sobj
|
|
192
205
|
anchors = do_call(FindTransferAnchors, findtransferanchors_args)
|
|
193
206
|
findtransferanchors_args$reference = NULL
|
|
194
207
|
findtransferanchors_args$query = NULL
|
|
@@ -196,7 +209,7 @@ if (is.null(split_by)) {
|
|
|
196
209
|
gc()
|
|
197
210
|
} else {
|
|
198
211
|
anchors = mclapply(
|
|
199
|
-
X =
|
|
212
|
+
X = sobj,
|
|
200
213
|
FUN = function(x) {
|
|
201
214
|
findtransferanchors_args$query = x
|
|
202
215
|
do_call(FindTransferAnchors, findtransferanchors_args)
|
|
@@ -212,25 +225,25 @@ if (is.null(split_by)) {
|
|
|
212
225
|
log_info("- Mapping query to reference")
|
|
213
226
|
mapquery_args$reference = reference
|
|
214
227
|
if (is.null(split_by)) {
|
|
215
|
-
mapquery_args$query =
|
|
228
|
+
mapquery_args$query = sobj
|
|
216
229
|
mapquery_args$anchorset = anchors
|
|
217
|
-
|
|
230
|
+
sobj = do_call(MapQuery, mapquery_args)
|
|
218
231
|
mapquery_args$reference = NULL
|
|
219
232
|
mapquery_args$query = NULL
|
|
220
233
|
mapquery_args$anchorset = NULL
|
|
221
234
|
gc()
|
|
222
235
|
} else {
|
|
223
|
-
|
|
224
|
-
X = seq_along(
|
|
236
|
+
sobj = mclapply(
|
|
237
|
+
X = seq_along(sobj),
|
|
225
238
|
FUN = function(i) {
|
|
226
|
-
mapquery_args$query =
|
|
239
|
+
mapquery_args$query = sobj[[i]]
|
|
227
240
|
mapquery_args$anchorset = anchors[[i]]
|
|
228
241
|
do_call(MapQuery, mapquery_args)
|
|
229
242
|
},
|
|
230
243
|
mc.cores = ncores
|
|
231
244
|
)
|
|
232
|
-
if (any(unlist(lapply(
|
|
233
|
-
stop(paste0("\nmclapply (MapQuery) error:",
|
|
245
|
+
if (any(unlist(lapply(sobj, class)) == "try-error")) {
|
|
246
|
+
stop(paste0("\nmclapply (MapQuery) error:", sobj))
|
|
234
247
|
}
|
|
235
248
|
}
|
|
236
249
|
|
|
@@ -254,7 +267,7 @@ if (is.null(split_by)) {
|
|
|
254
267
|
gc()
|
|
255
268
|
} else {
|
|
256
269
|
mappingscore = mclapply(
|
|
257
|
-
X = seq_along(
|
|
270
|
+
X = seq_along(sobj),
|
|
258
271
|
FUN = function(i) {
|
|
259
272
|
mappingscore_args$anchors = anchors[[i]]
|
|
260
273
|
tryCatch({
|
|
@@ -274,25 +287,25 @@ if (is.null(split_by)) {
|
|
|
274
287
|
# Calculate mapping score and add to metadata
|
|
275
288
|
log_info("- Adding mapping score to metadata")
|
|
276
289
|
if (is.null(split_by)) {
|
|
277
|
-
|
|
278
|
-
object =
|
|
290
|
+
sobj = AddMetaData(
|
|
291
|
+
object = sobj,
|
|
279
292
|
metadata = mappingscore,
|
|
280
293
|
col.name = "mapping.score"
|
|
281
294
|
)
|
|
282
295
|
} else {
|
|
283
|
-
|
|
284
|
-
X = seq_along(
|
|
296
|
+
sobj = mclapply(
|
|
297
|
+
X = seq_along(sobj),
|
|
285
298
|
FUN = function(i) {
|
|
286
299
|
AddMetaData(
|
|
287
|
-
object =
|
|
300
|
+
object = sobj[[i]],
|
|
288
301
|
metadata = mappingscore[[i]],
|
|
289
302
|
col.name = "mapping.score"
|
|
290
303
|
)
|
|
291
304
|
},
|
|
292
305
|
mc.cores = ncores
|
|
293
306
|
)
|
|
294
|
-
if (any(unlist(lapply(
|
|
295
|
-
stop(paste0("\nmclapply (AddMetaData) error:",
|
|
307
|
+
if (any(unlist(lapply(sobj, class)) == "try-error")) {
|
|
308
|
+
stop(paste0("\nmclapply (AddMetaData) error:", sobj))
|
|
296
309
|
}
|
|
297
310
|
|
|
298
311
|
# Combine the results
|
|
@@ -300,19 +313,33 @@ if (is.null(split_by)) {
|
|
|
300
313
|
gc()
|
|
301
314
|
# Memory efficient way to merge the results
|
|
302
315
|
# query = Reduce(function(x, y) merge(x, y, merge.dr = "ref.umap"), query)
|
|
303
|
-
|
|
316
|
+
sobj = merge(sobj[[1]], sobj[2:length(sobj)], merge.dr = "ref.umap")
|
|
304
317
|
}
|
|
305
318
|
|
|
306
319
|
# Add the alias to the metadata for the clusters
|
|
307
320
|
log_info("- Adding ident to metadata and set as ident")
|
|
308
|
-
|
|
321
|
+
sobj@meta.data = sobj@meta.data %>% mutate(
|
|
309
322
|
!!sym(ident) := as.factor(!!parse_expr(paste0("predicted.", use)))
|
|
310
323
|
)
|
|
311
|
-
Idents(
|
|
324
|
+
Idents(sobj) = ident
|
|
325
|
+
|
|
326
|
+
# Check if PrepSCTFindMarkers is done
|
|
327
|
+
if (.is_sct(sobj) && is.null(sobj@commands$PrepSCTFindMarkers)) {
|
|
328
|
+
log_info("- Running PrepSCTFindMarkers ...")
|
|
329
|
+
sobj <- PrepSCTFindMarkers(sobj)
|
|
330
|
+
# compose a new SeuratCommand to record it to sobj@commands
|
|
331
|
+
commands <- names(pbmc_small@commands)
|
|
332
|
+
scommand <- pbmc_small@commands[[commands[length(commands)]]]
|
|
333
|
+
scommand@time.stamp <- Sys.time()
|
|
334
|
+
scommand@assay.used <- DefaultAssay(sobj)
|
|
335
|
+
scommand@call.string <- "PrepSCTFindMarkers(object = sobj)"
|
|
336
|
+
scommand@params <- list()
|
|
337
|
+
sobj@commands$PrepSCTFindMarkers <- scommand
|
|
338
|
+
}
|
|
312
339
|
|
|
313
340
|
# Save
|
|
314
341
|
log_info("- Saving result ...")
|
|
315
|
-
saveRDS(
|
|
342
|
+
saveRDS(sobj, file = outfile)
|
|
316
343
|
|
|
317
344
|
|
|
318
345
|
# ############################
|
|
@@ -325,7 +352,7 @@ ref.reduction = mapquery_args$reduction.model %||% "wnn.umap"
|
|
|
325
352
|
for (qname in names(mapquery_args$refdata)) {
|
|
326
353
|
rname <- mapquery_args$refdata[[qname]]
|
|
327
354
|
|
|
328
|
-
if (grepl("Array", class(reference[[rname]])) && grepl("Array", class(
|
|
355
|
+
if (grepl("Array", class(reference[[rname]])) && grepl("Array", class(sobj[[qname]]))) {
|
|
329
356
|
log_warn(" Skipping transferred array: {qname} -> {rname}")
|
|
330
357
|
next
|
|
331
358
|
}
|
|
@@ -342,7 +369,7 @@ for (qname in names(mapquery_args$refdata)) {
|
|
|
342
369
|
) + NoLegend()
|
|
343
370
|
|
|
344
371
|
query_p <- DimPlot(
|
|
345
|
-
object =
|
|
372
|
+
object = sobj,
|
|
346
373
|
reduction = "ref.umap",
|
|
347
374
|
group.by = paste0("predicted.", qname),
|
|
348
375
|
label = TRUE,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: biopipen
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.31.1
|
|
4
4
|
Summary: Bioinformatics processes/pipelines that can be run from `pipen run`
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: pwwang
|
|
@@ -14,9 +14,9 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.12
|
|
15
15
|
Provides-Extra: runinfo
|
|
16
16
|
Requires-Dist: datar[pandas] (>=0.15.6,<0.16.0)
|
|
17
|
-
Requires-Dist: pipen-board[report] (>=0.
|
|
18
|
-
Requires-Dist: pipen-cli-run (>=0.
|
|
19
|
-
Requires-Dist: pipen-filters (>=0.
|
|
20
|
-
Requires-Dist: pipen-poplog (>=0.
|
|
21
|
-
Requires-Dist: pipen-runinfo (>=0.
|
|
22
|
-
Requires-Dist: pipen-verbose (>=0.
|
|
17
|
+
Requires-Dist: pipen-board[report] (>=0.16,<0.17)
|
|
18
|
+
Requires-Dist: pipen-cli-run (>=0.14,<0.15)
|
|
19
|
+
Requires-Dist: pipen-filters (>=0.14,<0.15)
|
|
20
|
+
Requires-Dist: pipen-poplog (>=0.2.0,<0.3.0)
|
|
21
|
+
Requires-Dist: pipen-runinfo (>=0.7,<0.8) ; extra == "runinfo"
|
|
22
|
+
Requires-Dist: pipen-verbose (>=0.12,<0.13)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
biopipen/__init__.py,sha256=
|
|
1
|
+
biopipen/__init__.py,sha256=PB3hjnlSwoWLBLl2ge7lsrSRubKXRdIanr_Hg2t3ViA,23
|
|
2
2
|
biopipen/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
biopipen/core/config.py,sha256=edK5xnDhM8j27srDzsxubi934NMrglLoKrdcC8qsEPk,1069
|
|
4
4
|
biopipen/core/config.toml,sha256=7IXvviRicZ2D1h6x3BVgbLJ96nsh-ikvZ0sVlQepqFE,1944
|
|
@@ -21,7 +21,7 @@ biopipen/ns/misc.py,sha256=qXcm0RdR6W-xpYGgQn3v7JBeYRWwVm5gtgSj2tdVxx4,2935
|
|
|
21
21
|
biopipen/ns/plot.py,sha256=_dGLKpyHiJqLIIQu5tqCk8H5BkgGBh_qRUZHkjnOgtE,18080
|
|
22
22
|
biopipen/ns/regulatory.py,sha256=qvc9QrwgwCI_lg0DQ2QOZbAhC8BAD1qnQXSGtAGlVcY,11750
|
|
23
23
|
biopipen/ns/rnaseq.py,sha256=bKAa6friFWof4yDTWZQahm1MS-lrdetO1GqDKdfxXYc,7708
|
|
24
|
-
biopipen/ns/scrna.py,sha256=
|
|
24
|
+
biopipen/ns/scrna.py,sha256=fXP_h7gchcuk_Jwos0IgY_P8ON6Q995OgKHgdrxfvAY,112868
|
|
25
25
|
biopipen/ns/scrna_metabolic_landscape.py,sha256=6AhaynGG3lNRi96N2tReVT46BJMuEwooSSd2irBoN80,28347
|
|
26
26
|
biopipen/ns/snp.py,sha256=-Jx5Hsv_7KV7TqLU0nHCaPkMEN0CFdi4tNVlyq0rUZ4,27259
|
|
27
27
|
biopipen/ns/stats.py,sha256=DlPyK5Vsg6ZEkV9SDS3aAw21eXzvOHgqeZDkXPhg7go,20509
|
|
@@ -139,7 +139,7 @@ biopipen/scripts/scrna/CellTypeAnnotation-hitype.R,sha256=zol-IF0jd3DTzw9I1UVUMY
|
|
|
139
139
|
biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R,sha256=LTvFzGzP0nmQLkJIxn6H7L8xkP2Z3q52DknXtBkkmcA,1822
|
|
140
140
|
biopipen/scripts/scrna/CellTypeAnnotation-sctype.R,sha256=RhFHy1E4b3Y7pw6feKEhPx5GTbNdaQMDXHzvRlPk34Q,4544
|
|
141
141
|
biopipen/scripts/scrna/CellTypeAnnotation.R,sha256=X_LR0LZ6UQjLA-SCCcQ8CBFZGG_J8CUNx--qwBS8Oh8,773
|
|
142
|
-
biopipen/scripts/scrna/CellsDistribution.R,sha256=
|
|
142
|
+
biopipen/scripts/scrna/CellsDistribution.R,sha256=QOkQhpendXMmgen99wn0Pl3XKnMJQzvLVb0hPdR49NQ,18980
|
|
143
143
|
biopipen/scripts/scrna/DimPlots.R,sha256=oKhygoWQOCck8OlpnOrNJg0CS2q-r8Com1dfjTvQzvU,1575
|
|
144
144
|
biopipen/scripts/scrna/ExprImputation-alra.R,sha256=xNdQiXgY13-iF16o2KgCJWfPIy4P-lOiO1bl9PfXI4U,864
|
|
145
145
|
biopipen/scripts/scrna/ExprImputation-rmagic.R,sha256=jYIfqZpnvjKJkvItLnemPVtUApHBYQi1_L8rHVbEe1M,735
|
|
@@ -151,19 +151,20 @@ biopipen/scripts/scrna/ModuleScoreCalculator.R,sha256=QJ_E6qjRC8GazoopcxZfT5cKxl
|
|
|
151
151
|
biopipen/scripts/scrna/RadarPlots.R,sha256=4zs0hAm7yq1Ls62f_29koPLqAKCeKbYiztNM-HR7j3U,13041
|
|
152
152
|
biopipen/scripts/scrna/SCImpute.R,sha256=dSJOHhmJ3x_72LBRXT72dbCti5oiB85CJ-OjWtqONbk,2958
|
|
153
153
|
biopipen/scripts/scrna/ScFGSEA.R,sha256=MFoJ3i3LFBfsPCxwLPnTh141ZJyrzwnrTuCZIFwvYjU,6318
|
|
154
|
+
biopipen/scripts/scrna/ScSimulation.R,sha256=b2LtL68ucxLoI57tSEDD0hOSbVHUA_x88Y96eK07N-s,1712
|
|
154
155
|
biopipen/scripts/scrna/Seurat2AnnData.R,sha256=G7bcHGffdNlz6Uuy98tQdlahXiPkTDokflp1yTUgcSQ,1578
|
|
155
156
|
biopipen/scripts/scrna/SeuratClusterStats-clustree.R,sha256=FkbniQMHiZGrFAIuS8nUfPVJKFofSL-ZnpxTqIhTa54,3058
|
|
156
157
|
biopipen/scripts/scrna/SeuratClusterStats-dimplots.R,sha256=NEdlJHNXnJZfF7YkefYVWTPO8Z_KAppRAs9rNvB8TXs,2360
|
|
157
158
|
biopipen/scripts/scrna/SeuratClusterStats-features.R,sha256=DeGo7AkBRq0V3Y3JDaifId6rrr5dwawTzcSAJ3W1lxE,15614
|
|
158
159
|
biopipen/scripts/scrna/SeuratClusterStats-hists.R,sha256=PXyDKww8HcloCU8r4IqRwRrm6Ly0cpmpvRcP30xxBa4,5038
|
|
159
160
|
biopipen/scripts/scrna/SeuratClusterStats-ngenes.R,sha256=ihZLB27iEhgICKj-ZTnxTvRAYIgg9rzWr9Oyh1YmOYM,2160
|
|
160
|
-
biopipen/scripts/scrna/SeuratClusterStats-stats.R,sha256=
|
|
161
|
+
biopipen/scripts/scrna/SeuratClusterStats-stats.R,sha256=jufs99zxNyUaTCDiWzO0yt76ncc73Yn_SpPa5igbJyA,9120
|
|
161
162
|
biopipen/scripts/scrna/SeuratClusterStats.R,sha256=fywRBVjaFIJBxdgsZxLXEheUZI4l5VUoNIcJnhPIdPQ,2193
|
|
162
163
|
biopipen/scripts/scrna/SeuratClustering-common.R,sha256=JX4Cn2FC6GOcBqaVyGDD3MM5zGpm3TpKJlfo2oOQ4Uk,8195
|
|
163
164
|
biopipen/scripts/scrna/SeuratClustering.R,sha256=0OKRBQ5rFuupK7c03_sSt2HMwMdMnCYFqTvkRXFKchs,1706
|
|
164
165
|
biopipen/scripts/scrna/SeuratFilter.R,sha256=BrYK0MLdaTtQvInMaQsmOt7oH_hlks0M1zykkJtg2lM,509
|
|
165
166
|
biopipen/scripts/scrna/SeuratLoading.R,sha256=ekWKnHIqtQb3kHVQiVymAHXXqiUxs6KKefjZKjaykmk,900
|
|
166
|
-
biopipen/scripts/scrna/SeuratMap2Ref.R,sha256=
|
|
167
|
+
biopipen/scripts/scrna/SeuratMap2Ref.R,sha256=KARt5IVBDYpNhLZ7_j0FEi1u5S8PxU_mB4THH26s7AM,12008
|
|
167
168
|
biopipen/scripts/scrna/SeuratMetadataMutater.R,sha256=PMwG0Xvl_EEVKkicfrIi4arEqpY948PkYLkb59kTAXI,1135
|
|
168
169
|
biopipen/scripts/scrna/SeuratPreparing-common.R,sha256=WuD7lGS17eAUQWSiIdAoV0EIeqS3Tnkkx-7PbP6Q3tc,16279
|
|
169
170
|
biopipen/scripts/scrna/SeuratPreparing-doublet_detection.R,sha256=TNN2lfFjpnnO0rguMsG38JYCP1nFUhcPLJ1LqGj-Sc8,6674
|
|
@@ -278,7 +279,7 @@ biopipen/utils/reference.py,sha256=oi5evicLwHxF0KAIPNZohBeHJLJQNWFJH0cr2y5pgcg,5
|
|
|
278
279
|
biopipen/utils/rnaseq.R,sha256=Ro2B2dG-Z2oVaT5tkwp9RHBz4dp_RF-JcizlM5GYXFs,1298
|
|
279
280
|
biopipen/utils/single_cell.R,sha256=pJjYP8bIZpNAtTQ32rOXhZxaM1Y-6D-xUcK3pql9tbk,4316
|
|
280
281
|
biopipen/utils/vcf.py,sha256=ajXs0M_QghEctlvUlSRjWQIABVF02wPdYd-0LP4mIsU,9377
|
|
281
|
-
biopipen-0.
|
|
282
|
-
biopipen-0.
|
|
283
|
-
biopipen-0.
|
|
284
|
-
biopipen-0.
|
|
282
|
+
biopipen-0.31.1.dist-info/METADATA,sha256=uPOVUaGxNgT5ZJwJWBDXqmnxaBUxaMvyErjqFCRsV60,882
|
|
283
|
+
biopipen-0.31.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
284
|
+
biopipen-0.31.1.dist-info/entry_points.txt,sha256=69SbeMaF47Z2DS40yo-qDyoBKmMmumrNnsjEZMOioCE,625
|
|
285
|
+
biopipen-0.31.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|