biopipen 0.30.0__py3-none-any.whl → 0.31.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/ns/scrna.py +61 -8
- biopipen/scripts/scrna/CellsDistribution.R +1 -1
- biopipen/scripts/scrna/ScSimulation.R +64 -0
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +16 -22
- biopipen/scripts/scrna/SeuratMap2Ref.R +45 -35
- {biopipen-0.30.0.dist-info → biopipen-0.31.0.dist-info}/METADATA +7 -7
- {biopipen-0.30.0.dist-info → biopipen-0.31.0.dist-info}/RECORD +10 -9
- {biopipen-0.30.0.dist-info → biopipen-0.31.0.dist-info}/WHEEL +0 -0
- {biopipen-0.30.0.dist-info → biopipen-0.31.0.dist-info}/entry_points.txt +0 -0
biopipen/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "0.31.0"
|
biopipen/ns/scrna.py
CHANGED
|
@@ -541,14 +541,19 @@ class SeuratClusterStats(Proc):
|
|
|
541
541
|
This is to do some basic statistics on the clusters. For more comprehensive analysis,
|
|
542
542
|
see `RadarPlots` and `CellsDistribution`.
|
|
543
543
|
The parameters from the cases can overwrite the default parameters.
|
|
544
|
-
- frac (
|
|
544
|
+
- frac (choice): How to calculate the fraction of cells.
|
|
545
|
+
- group: calculate the fraction in each group.
|
|
546
|
+
The total fraction of the cells of idents in each group will be 1.
|
|
547
|
+
When `group-by` is not specified, it will be the same as `all`.
|
|
548
|
+
- ident: calculate the fraction in each ident.
|
|
549
|
+
The total fraction of the cells of groups in each ident will be 1.
|
|
550
|
+
Only works when `group-by` is specified.
|
|
551
|
+
- cluster: alias of `ident`.
|
|
552
|
+
- all: calculate the fraction against all cells.
|
|
553
|
+
- none: do not calculate the fraction, use the number of cells instead.
|
|
545
554
|
- pie (flag): Also output a pie chart?
|
|
546
555
|
- circos (flag): Also output a circos plot?
|
|
547
556
|
- table (flag): Whether to output a table (in tab-delimited format) and in the report.
|
|
548
|
-
- frac_ofall (flag): Whether to output the fraction against all cells,
|
|
549
|
-
instead of the fraction in each group.
|
|
550
|
-
Does not work for circos plot.
|
|
551
|
-
Only works when `frac` is `True` and `group-by` is specified.
|
|
552
557
|
- transpose (flag): Whether to transpose the cluster and group, that is,
|
|
553
558
|
using group as the x-axis and cluster to fill the plot.
|
|
554
559
|
For circos plot, when transposed, the arrows will be drawn from the idents (by `ident`) to the
|
|
@@ -708,12 +713,11 @@ class SeuratClusterStats(Proc):
|
|
|
708
713
|
},
|
|
709
714
|
"hists": {},
|
|
710
715
|
"stats_defaults": {
|
|
711
|
-
"frac":
|
|
716
|
+
"frac": "none",
|
|
712
717
|
"pie": False,
|
|
713
718
|
"circos": False,
|
|
714
719
|
"table": False,
|
|
715
720
|
"position": "auto",
|
|
716
|
-
"frac_ofall": False,
|
|
717
721
|
"transpose": False,
|
|
718
722
|
"ident": "seurat_clusters",
|
|
719
723
|
"group-by": None,
|
|
@@ -731,7 +735,7 @@ class SeuratClusterStats(Proc):
|
|
|
731
735
|
"Number of cells in each cluster by Sample": {
|
|
732
736
|
"group-by": "Sample",
|
|
733
737
|
"table": True,
|
|
734
|
-
"frac":
|
|
738
|
+
"frac": "group",
|
|
735
739
|
},
|
|
736
740
|
},
|
|
737
741
|
"ngenes_defaults": {
|
|
@@ -2261,3 +2265,52 @@ class AnnData2Seurat(Proc):
|
|
|
2261
2265
|
lang = config.lang.rscript
|
|
2262
2266
|
envs = {"outtype": "rds", "assay": "RNA", "dotplot_check": True}
|
|
2263
2267
|
script = "file://../scripts/scrna/AnnData2Seurat.R"
|
|
2268
|
+
|
|
2269
|
+
|
|
2270
|
+
class ScSimulation(Proc):
|
|
2271
|
+
"""Simulate single-cell data using splatter.
|
|
2272
|
+
|
|
2273
|
+
See <https://www.bioconductor.org/packages/devel/bioc/vignettes/splatter/inst/doc/splatter.html#2_Quickstart>
|
|
2274
|
+
|
|
2275
|
+
Input:
|
|
2276
|
+
seed: The seed for the simulation
|
|
2277
|
+
You could also use string as the seed, and the seed will be
|
|
2278
|
+
generated by `digest::digest2int()`.
|
|
2279
|
+
So this could also work as a unique identifier for the simulation (ie. Sample ID).
|
|
2280
|
+
|
|
2281
|
+
Output:
|
|
2282
|
+
outfile: The output Seurat object/SingleCellExperiment in RDS format
|
|
2283
|
+
|
|
2284
|
+
Envs:
|
|
2285
|
+
ngenes (type=int): The number of genes to simulate
|
|
2286
|
+
ncells (type=int): The number of cells to simulate
|
|
2287
|
+
nspikes (type=int): The number of spike-ins to simulate
|
|
2288
|
+
When `ngenes`, `ncells`, and `nspikes` are not specified, the default
|
|
2289
|
+
params from `mockSCE()` will be used. By default, `ngenes = 2000`,
|
|
2290
|
+
`ncells = 200`, and `nspikes = 100`.
|
|
2291
|
+
outtype (choice): The output file type.
|
|
2292
|
+
- seurat: Seurat object
|
|
2293
|
+
- singlecellexperiment: SingleCellExperiment object
|
|
2294
|
+
- sce: alias for `singlecellexperiment`
|
|
2295
|
+
method (choice): which simulation method to use. Options are:
|
|
2296
|
+
- single: produces a single population
|
|
2297
|
+
- groups: produces distinct groups (eg. cell types), or
|
|
2298
|
+
- paths: selects cells from continuous trajectories (eg. differentiation processes)
|
|
2299
|
+
params (ns): Other parameters for simulation.
|
|
2300
|
+
The parameters are initialized `splitEstimate(mockSCE())` and then
|
|
2301
|
+
updated with the given parameters.
|
|
2302
|
+
See <https://rdrr.io/bioc/splatter/man/SplatParams.html>.
|
|
2303
|
+
Hyphens (`-`) will be transformed into dots (`.`) for the keys.
|
|
2304
|
+
""" # noqa: E501
|
|
2305
|
+
input = "seed:var"
|
|
2306
|
+
output = "outfile:file:simulatied_{{in.seed}}.RDS"
|
|
2307
|
+
lang = config.lang.rscript
|
|
2308
|
+
envs = {
|
|
2309
|
+
"ngenes": None,
|
|
2310
|
+
"ncells": None,
|
|
2311
|
+
"nspikes": None,
|
|
2312
|
+
"outtype": "seurat",
|
|
2313
|
+
"method": "single",
|
|
2314
|
+
"params": {},
|
|
2315
|
+
}
|
|
2316
|
+
script = "file://../scripts/scrna/ScSimulation.R"
|
|
@@ -325,7 +325,7 @@ do_case <- function(name, case) {
|
|
|
325
325
|
geom_col(width=.01, position="fill", color = "#888888") +
|
|
326
326
|
geom_bar(stat = "identity", position = position_fill(reverse = TRUE)) +
|
|
327
327
|
coord_polar("y", start = 0) +
|
|
328
|
-
|
|
328
|
+
scale_fill_manual(name = "Cluster", values = pal_biopipen()(length(levels(all_clusters)))) +
|
|
329
329
|
theme_void() +
|
|
330
330
|
theme(
|
|
331
331
|
plot.margin = plot.margin,
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
|
+
|
|
3
|
+
library(rlang)
|
|
4
|
+
library(splatter)
|
|
5
|
+
library(scater)
|
|
6
|
+
|
|
7
|
+
# Load template variables
|
|
8
|
+
seed <- {{ in.seed | r }}
|
|
9
|
+
outfile <- {{ out.outfile | r }}
|
|
10
|
+
ngenes <- {{ envs.ngenes | r }}
|
|
11
|
+
ncells <- {{ envs.ncells | r }}
|
|
12
|
+
nspikes <- {{ envs.nspikes | r }}
|
|
13
|
+
outtype <- {{ envs.outtype | r }}
|
|
14
|
+
method <- {{ envs.method | r }}
|
|
15
|
+
user_params <- {{ envs.params | r: todot="-" }}
|
|
16
|
+
|
|
17
|
+
log_info("Generating simulation parameters ...")
|
|
18
|
+
|
|
19
|
+
seed <- seed %||% 1
|
|
20
|
+
if (length(seed) > 1) {
|
|
21
|
+
log_warn("- multiple seeds provided, using the first one")
|
|
22
|
+
seed <- seed[1]
|
|
23
|
+
}
|
|
24
|
+
if (is.character(seed)) {
|
|
25
|
+
library(digest)
|
|
26
|
+
proj <- seed
|
|
27
|
+
seed <- digest2int(seed)
|
|
28
|
+
} else {
|
|
29
|
+
proj <- paste0("S", seed)
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
set.seed(seed)
|
|
33
|
+
mock_sce_params <- list()
|
|
34
|
+
if (!is.null(ngenes)) mock_sce_params$ngenes <- ngenes
|
|
35
|
+
if (!is.null(ncells)) mock_sce_params$ncells <- ncells
|
|
36
|
+
if (!is.null(nspikes)) mock_sce_params$nspikes <- nspikes
|
|
37
|
+
sce <- do.call(mockSCE, mock_sce_params)
|
|
38
|
+
params <- splatEstimate(sce)
|
|
39
|
+
user_params$seed <- seed
|
|
40
|
+
user_params$object = params
|
|
41
|
+
do_call(setParams, user_params)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
log_info("Saving simulation parameters to file ...")
|
|
45
|
+
|
|
46
|
+
sim <- splatSimulate(params, method = method, verbose = TRUE)
|
|
47
|
+
|
|
48
|
+
outtype <- tolower(outtype)
|
|
49
|
+
if (outtype == "sce") outtype <- "singlecellexperiment"
|
|
50
|
+
|
|
51
|
+
if (outtype == "singlecellexperiment") {
|
|
52
|
+
log_info("Saving simulation to file ...")
|
|
53
|
+
saveRDS(sim, file = outfile)
|
|
54
|
+
} else {
|
|
55
|
+
log_info("Converting simulation to Seurat object ...")
|
|
56
|
+
cnts <- SingleCellExperiment::counts(sim)
|
|
57
|
+
sobj <- Seurat::CreateSeuratObject(counts = cnts, project = proj)
|
|
58
|
+
rm(sim)
|
|
59
|
+
rm(cnts)
|
|
60
|
+
gc()
|
|
61
|
+
|
|
62
|
+
log_info("Saving simulation to file ...")
|
|
63
|
+
saveRDS(sobj, file = outfile)
|
|
64
|
+
}
|
|
@@ -18,12 +18,6 @@ do_one_stats = function(name) {
|
|
|
18
18
|
if (isTRUE(case$pie) && !is.null(case$group.by)) {
|
|
19
19
|
stop(paste0(name, ": pie charts are not supported for group-by"))
|
|
20
20
|
}
|
|
21
|
-
if (!isTRUE(case$frac) && isTRUE(case$frac_ofall)) {
|
|
22
|
-
stop(paste0(name, ": frac_ofall is only supported when frac is true"))
|
|
23
|
-
}
|
|
24
|
-
if (isTRUE(case$frac_ofall) && is.null(case$group.by)) {
|
|
25
|
-
stop(paste0(name, ": frac_ofall is only supported for group-by"))
|
|
26
|
-
}
|
|
27
21
|
if (isTRUE(case$transpose) && is.null(case$group.by)) {
|
|
28
22
|
stop(paste0(name, ": transpose is only supported for group-by"))
|
|
29
23
|
}
|
|
@@ -46,28 +40,28 @@ do_one_stats = function(name) {
|
|
|
46
40
|
!!!syms(case$split.by)
|
|
47
41
|
), function(df) {
|
|
48
42
|
out <- df %>% group_by(!!!syms(select_cols)) %>% summarise(.n = n(), .groups = "drop")
|
|
49
|
-
if (!is.null(case$group.by) &&
|
|
50
|
-
if (
|
|
43
|
+
if (!is.null(case$group.by) && case$frac != "none") {
|
|
44
|
+
if (case$frac == "all") {
|
|
51
45
|
out <- out %>% mutate(.frac = .n / sum(.n))
|
|
52
|
-
} else if (
|
|
53
|
-
out <- out %>% group_by(!!sym(case$ident)) %>% mutate(.frac = .n / sum(.n))
|
|
54
|
-
} else {
|
|
46
|
+
} else if (case$frac == "group") {
|
|
55
47
|
out <- out %>% group_by(!!sym(case$group.by)) %>% mutate(.frac = .n / sum(.n))
|
|
48
|
+
} else { # case$frac == "ident" or "cluster"
|
|
49
|
+
out <- out %>% group_by(!!sym(case$ident)) %>% mutate(.frac = .n / sum(.n))
|
|
56
50
|
}
|
|
57
51
|
}
|
|
58
52
|
out
|
|
59
53
|
}))
|
|
60
|
-
} else if (!is.null(case$group.by) &&
|
|
54
|
+
} else if (!is.null(case$group.by) && case$frac != "none") { # no split.by
|
|
61
55
|
plot_df <- df_cells %>%
|
|
62
56
|
select(all_of(select_cols)) %>%
|
|
63
57
|
group_by(!!!syms(select_cols)) %>%
|
|
64
58
|
summarise(.n = n(), .groups = "drop")
|
|
65
|
-
if (
|
|
59
|
+
if (case$frac == "all") {
|
|
66
60
|
plot_df = plot_df %>% mutate(.frac = .n / sum(.n))
|
|
67
|
-
} else {
|
|
68
|
-
plot_df = plot_df %>%
|
|
69
|
-
|
|
70
|
-
|
|
61
|
+
} else if (case$frac == "group") {
|
|
62
|
+
plot_df = plot_df %>% group_by(!!sym(case$group.by)) %>% mutate(.frac = .n / sum(.n))
|
|
63
|
+
} else { # case$frac == "ident" or "cluster"
|
|
64
|
+
plot_df = plot_df %>% group_by(!!sym(case$ident)) %>% mutate(.frac = .n / sum(.n))
|
|
71
65
|
}
|
|
72
66
|
} else {
|
|
73
67
|
plot_df <- df_cells %>%
|
|
@@ -75,7 +69,7 @@ do_one_stats = function(name) {
|
|
|
75
69
|
group_by(!!!syms(select_cols)) %>%
|
|
76
70
|
summarise(.n = n(), .groups = "drop")
|
|
77
71
|
|
|
78
|
-
if (
|
|
72
|
+
if (case$frac != "none") {
|
|
79
73
|
plot_df <- plot_df %>% mutate(.frac = .n / sum(.n))
|
|
80
74
|
}
|
|
81
75
|
}
|
|
@@ -88,13 +82,13 @@ do_one_stats = function(name) {
|
|
|
88
82
|
p = plot_df %>%
|
|
89
83
|
ggplot(aes(
|
|
90
84
|
x=!!sym(ifelse(case$transpose, case$group.by, case$ident)),
|
|
91
|
-
y=if (
|
|
85
|
+
y=if (case$frac != "none") .frac else .n,
|
|
92
86
|
fill=!!sym(ifelse(is.null(case$group.by) || isTRUE(case$transpose), case$ident, case$group.by))
|
|
93
87
|
)) +
|
|
94
88
|
geom_bar(stat="identity", position=bar_position, alpha=.8) +
|
|
95
89
|
theme_prism(axis_text_angle = 90) +
|
|
96
90
|
scale_fill_biopipen() +
|
|
97
|
-
ylab(ifelse(
|
|
91
|
+
ylab(ifelse(case$frac != "none", "Fraction of cells", "Number of cells"))
|
|
98
92
|
|
|
99
93
|
if (!is.null(case$split.by)) {
|
|
100
94
|
p = p + facet_wrap(case$split.by)
|
|
@@ -109,7 +103,7 @@ do_one_stats = function(name) {
|
|
|
109
103
|
kind = "descr",
|
|
110
104
|
content = paste0(
|
|
111
105
|
"Plots showing the ",
|
|
112
|
-
ifelse(
|
|
106
|
+
ifelse(case$frac != "none", "number/faction", "number"),
|
|
113
107
|
" of cells per cluster",
|
|
114
108
|
ifelse(
|
|
115
109
|
is.null(case$group.by),
|
|
@@ -150,7 +144,7 @@ do_one_stats = function(name) {
|
|
|
150
144
|
guides(fill = guide_legend(title = case$ident)) +
|
|
151
145
|
theme_void() +
|
|
152
146
|
geom_label(
|
|
153
|
-
if (
|
|
147
|
+
if (case$frac != "none")
|
|
154
148
|
aes(label=sprintf("%.1f%%", .frac * 100))
|
|
155
149
|
else
|
|
156
150
|
aes(label=.n),
|
|
@@ -130,18 +130,17 @@ log_info("- Normalizing data")
|
|
|
130
130
|
if (refnorm == "SCTransform") {
|
|
131
131
|
if (defassay == "SCT" && skip_if_normalized) {
|
|
132
132
|
log_warn(" Skipping normalization as the object is already SCTransform'ed")
|
|
133
|
-
query = sobj
|
|
134
133
|
} else {
|
|
135
134
|
log_info(" Using SCTransform normalization")
|
|
136
135
|
sctransform_args$residual.features = rownames(x = reference)
|
|
137
136
|
if (is.null(split_by)) {
|
|
138
137
|
sctransform_args$object = sobj
|
|
139
|
-
|
|
138
|
+
sobj = do_call(SCTransform, sctransform_args)
|
|
140
139
|
sctransform_args$object <- NULL
|
|
141
140
|
rm(sctransform_args)
|
|
142
141
|
gc()
|
|
143
142
|
} else {
|
|
144
|
-
|
|
143
|
+
sobj = mclapply(
|
|
145
144
|
X = sobj,
|
|
146
145
|
FUN = function(x) {
|
|
147
146
|
sctransform_args$object = x
|
|
@@ -149,22 +148,21 @@ if (refnorm == "SCTransform") {
|
|
|
149
148
|
},
|
|
150
149
|
mc.cores = ncores
|
|
151
150
|
)
|
|
152
|
-
if (any(unlist(lapply(
|
|
153
|
-
stop(paste0("\nmclapply (SCTransform) error:",
|
|
151
|
+
if (any(unlist(lapply(sobj, class)) == "try-error")) {
|
|
152
|
+
stop(paste0("\nmclapply (SCTransform) error:", sobj))
|
|
154
153
|
}
|
|
155
154
|
}
|
|
156
155
|
}
|
|
157
156
|
} else {
|
|
158
157
|
if (defassay == "RNA" && skip_if_normalized) {
|
|
159
158
|
log_warn(" Skipping normalization as the object is already LogNormalize'd")
|
|
160
|
-
query = sobj
|
|
161
159
|
} else {
|
|
162
160
|
log_info(" Using NormalizeData normalization")
|
|
163
161
|
if (is.null(split_by)) {
|
|
164
162
|
normalizedata_args$object = sobj
|
|
165
|
-
|
|
163
|
+
sobj = do_call(NormalizeData, normalizedata_args)
|
|
166
164
|
} else {
|
|
167
|
-
|
|
165
|
+
sobj = mclapply(
|
|
168
166
|
X = sobj,
|
|
169
167
|
FUN = function(x) {
|
|
170
168
|
normalizedata_args$object = x
|
|
@@ -172,8 +170,8 @@ if (refnorm == "SCTransform") {
|
|
|
172
170
|
},
|
|
173
171
|
mc.cores = ncores
|
|
174
172
|
)
|
|
175
|
-
if (any(unlist(lapply(
|
|
176
|
-
stop(paste0("\nmclapply (NormalizeData) error:",
|
|
173
|
+
if (any(unlist(lapply(sobj, class)) == "try-error")) {
|
|
174
|
+
stop(paste0("\nmclapply (NormalizeData) error:", sobj))
|
|
177
175
|
}
|
|
178
176
|
}
|
|
179
177
|
normalizedata_args$object <- NULL
|
|
@@ -181,14 +179,12 @@ if (refnorm == "SCTransform") {
|
|
|
181
179
|
gc()
|
|
182
180
|
}
|
|
183
181
|
}
|
|
184
|
-
rm(sobj)
|
|
185
|
-
gc()
|
|
186
182
|
|
|
187
183
|
# Find anchors between query and reference
|
|
188
184
|
log_info("- Finding anchors")
|
|
189
185
|
findtransferanchors_args$reference = reference
|
|
190
186
|
if (is.null(split_by)) {
|
|
191
|
-
findtransferanchors_args$query =
|
|
187
|
+
findtransferanchors_args$query = sobj
|
|
192
188
|
anchors = do_call(FindTransferAnchors, findtransferanchors_args)
|
|
193
189
|
findtransferanchors_args$reference = NULL
|
|
194
190
|
findtransferanchors_args$query = NULL
|
|
@@ -196,7 +192,7 @@ if (is.null(split_by)) {
|
|
|
196
192
|
gc()
|
|
197
193
|
} else {
|
|
198
194
|
anchors = mclapply(
|
|
199
|
-
X =
|
|
195
|
+
X = sobj,
|
|
200
196
|
FUN = function(x) {
|
|
201
197
|
findtransferanchors_args$query = x
|
|
202
198
|
do_call(FindTransferAnchors, findtransferanchors_args)
|
|
@@ -212,25 +208,25 @@ if (is.null(split_by)) {
|
|
|
212
208
|
log_info("- Mapping query to reference")
|
|
213
209
|
mapquery_args$reference = reference
|
|
214
210
|
if (is.null(split_by)) {
|
|
215
|
-
mapquery_args$query =
|
|
211
|
+
mapquery_args$query = sobj
|
|
216
212
|
mapquery_args$anchorset = anchors
|
|
217
|
-
|
|
213
|
+
sobj = do_call(MapQuery, mapquery_args)
|
|
218
214
|
mapquery_args$reference = NULL
|
|
219
215
|
mapquery_args$query = NULL
|
|
220
216
|
mapquery_args$anchorset = NULL
|
|
221
217
|
gc()
|
|
222
218
|
} else {
|
|
223
|
-
|
|
224
|
-
X = seq_along(
|
|
219
|
+
sobj = mclapply(
|
|
220
|
+
X = seq_along(sobj),
|
|
225
221
|
FUN = function(i) {
|
|
226
|
-
mapquery_args$query =
|
|
222
|
+
mapquery_args$query = sobj[[i]]
|
|
227
223
|
mapquery_args$anchorset = anchors[[i]]
|
|
228
224
|
do_call(MapQuery, mapquery_args)
|
|
229
225
|
},
|
|
230
226
|
mc.cores = ncores
|
|
231
227
|
)
|
|
232
|
-
if (any(unlist(lapply(
|
|
233
|
-
stop(paste0("\nmclapply (MapQuery) error:",
|
|
228
|
+
if (any(unlist(lapply(sobj, class)) == "try-error")) {
|
|
229
|
+
stop(paste0("\nmclapply (MapQuery) error:", sobj))
|
|
234
230
|
}
|
|
235
231
|
}
|
|
236
232
|
|
|
@@ -254,7 +250,7 @@ if (is.null(split_by)) {
|
|
|
254
250
|
gc()
|
|
255
251
|
} else {
|
|
256
252
|
mappingscore = mclapply(
|
|
257
|
-
X = seq_along(
|
|
253
|
+
X = seq_along(sobj),
|
|
258
254
|
FUN = function(i) {
|
|
259
255
|
mappingscore_args$anchors = anchors[[i]]
|
|
260
256
|
tryCatch({
|
|
@@ -274,25 +270,25 @@ if (is.null(split_by)) {
|
|
|
274
270
|
# Calculate mapping score and add to metadata
|
|
275
271
|
log_info("- Adding mapping score to metadata")
|
|
276
272
|
if (is.null(split_by)) {
|
|
277
|
-
|
|
278
|
-
object =
|
|
273
|
+
sobj = AddMetaData(
|
|
274
|
+
object = sobj,
|
|
279
275
|
metadata = mappingscore,
|
|
280
276
|
col.name = "mapping.score"
|
|
281
277
|
)
|
|
282
278
|
} else {
|
|
283
|
-
|
|
284
|
-
X = seq_along(
|
|
279
|
+
sobj = mclapply(
|
|
280
|
+
X = seq_along(sobj),
|
|
285
281
|
FUN = function(i) {
|
|
286
282
|
AddMetaData(
|
|
287
|
-
object =
|
|
283
|
+
object = sobj[[i]],
|
|
288
284
|
metadata = mappingscore[[i]],
|
|
289
285
|
col.name = "mapping.score"
|
|
290
286
|
)
|
|
291
287
|
},
|
|
292
288
|
mc.cores = ncores
|
|
293
289
|
)
|
|
294
|
-
if (any(unlist(lapply(
|
|
295
|
-
stop(paste0("\nmclapply (AddMetaData) error:",
|
|
290
|
+
if (any(unlist(lapply(sobj, class)) == "try-error")) {
|
|
291
|
+
stop(paste0("\nmclapply (AddMetaData) error:", sobj))
|
|
296
292
|
}
|
|
297
293
|
|
|
298
294
|
# Combine the results
|
|
@@ -300,19 +296,33 @@ if (is.null(split_by)) {
|
|
|
300
296
|
gc()
|
|
301
297
|
# Memory efficient way to merge the results
|
|
302
298
|
# query = Reduce(function(x, y) merge(x, y, merge.dr = "ref.umap"), query)
|
|
303
|
-
|
|
299
|
+
sobj = merge(sobj[[1]], sobj[2:length(sobj)], merge.dr = "ref.umap")
|
|
304
300
|
}
|
|
305
301
|
|
|
306
302
|
# Add the alias to the metadata for the clusters
|
|
307
303
|
log_info("- Adding ident to metadata and set as ident")
|
|
308
|
-
|
|
304
|
+
sobj@meta.data = sobj@meta.data %>% mutate(
|
|
309
305
|
!!sym(ident) := as.factor(!!parse_expr(paste0("predicted.", use)))
|
|
310
306
|
)
|
|
311
|
-
Idents(
|
|
307
|
+
Idents(sobj) = ident
|
|
308
|
+
|
|
309
|
+
# Check if PrepSCTFindMarkers is done
|
|
310
|
+
if (DefaultAssay(sobj) == "SCT") {
|
|
311
|
+
log_info("- Running PrepSCTFindMarkers ...")
|
|
312
|
+
sobj <- PrepSCTFindMarkers(sobj)
|
|
313
|
+
# compose a new SeuratCommand to record it to sobj@commands
|
|
314
|
+
commands <- names(pbmc_small@commands)
|
|
315
|
+
scommand <- pbmc_small@commands[[commands[length(commands)]]]
|
|
316
|
+
scommand@time.stamp <- Sys.time()
|
|
317
|
+
scommand@assay.used <- "SCT"
|
|
318
|
+
scommand@call.string <- "PrepSCTFindMarkers(object = sobj)"
|
|
319
|
+
scommand@params <- list()
|
|
320
|
+
sobj@commands$PrepSCTFindMarkers <- scommand
|
|
321
|
+
}
|
|
312
322
|
|
|
313
323
|
# Save
|
|
314
324
|
log_info("- Saving result ...")
|
|
315
|
-
saveRDS(
|
|
325
|
+
saveRDS(sobj, file = outfile)
|
|
316
326
|
|
|
317
327
|
|
|
318
328
|
# ############################
|
|
@@ -325,7 +335,7 @@ ref.reduction = mapquery_args$reduction.model %||% "wnn.umap"
|
|
|
325
335
|
for (qname in names(mapquery_args$refdata)) {
|
|
326
336
|
rname <- mapquery_args$refdata[[qname]]
|
|
327
337
|
|
|
328
|
-
if (grepl("Array", class(reference[[rname]])) && grepl("Array", class(
|
|
338
|
+
if (grepl("Array", class(reference[[rname]])) && grepl("Array", class(sobj[[qname]]))) {
|
|
329
339
|
log_warn(" Skipping transferred array: {qname} -> {rname}")
|
|
330
340
|
next
|
|
331
341
|
}
|
|
@@ -342,7 +352,7 @@ for (qname in names(mapquery_args$refdata)) {
|
|
|
342
352
|
) + NoLegend()
|
|
343
353
|
|
|
344
354
|
query_p <- DimPlot(
|
|
345
|
-
object =
|
|
355
|
+
object = sobj,
|
|
346
356
|
reduction = "ref.umap",
|
|
347
357
|
group.by = paste0("predicted.", qname),
|
|
348
358
|
label = TRUE,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: biopipen
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.31.0
|
|
4
4
|
Summary: Bioinformatics processes/pipelines that can be run from `pipen run`
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: pwwang
|
|
@@ -14,9 +14,9 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.12
|
|
15
15
|
Provides-Extra: runinfo
|
|
16
16
|
Requires-Dist: datar[pandas] (>=0.15.6,<0.16.0)
|
|
17
|
-
Requires-Dist: pipen-board[report] (>=0.
|
|
18
|
-
Requires-Dist: pipen-cli-run (>=0.
|
|
19
|
-
Requires-Dist: pipen-filters (>=0.
|
|
20
|
-
Requires-Dist: pipen-poplog (>=0.
|
|
21
|
-
Requires-Dist: pipen-runinfo (>=0.
|
|
22
|
-
Requires-Dist: pipen-verbose (>=0.
|
|
17
|
+
Requires-Dist: pipen-board[report] (>=0.16,<0.17)
|
|
18
|
+
Requires-Dist: pipen-cli-run (>=0.14,<0.15)
|
|
19
|
+
Requires-Dist: pipen-filters (>=0.14,<0.15)
|
|
20
|
+
Requires-Dist: pipen-poplog (>=0.2.0,<0.3.0)
|
|
21
|
+
Requires-Dist: pipen-runinfo (>=0.7,<0.8) ; extra == "runinfo"
|
|
22
|
+
Requires-Dist: pipen-verbose (>=0.12,<0.13)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
biopipen/__init__.py,sha256=
|
|
1
|
+
biopipen/__init__.py,sha256=TJt1pYzTJuPE6GzDP1gxaeeVQlzcoUyEmSVSUPgYnIA,23
|
|
2
2
|
biopipen/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
biopipen/core/config.py,sha256=edK5xnDhM8j27srDzsxubi934NMrglLoKrdcC8qsEPk,1069
|
|
4
4
|
biopipen/core/config.toml,sha256=7IXvviRicZ2D1h6x3BVgbLJ96nsh-ikvZ0sVlQepqFE,1944
|
|
@@ -21,7 +21,7 @@ biopipen/ns/misc.py,sha256=qXcm0RdR6W-xpYGgQn3v7JBeYRWwVm5gtgSj2tdVxx4,2935
|
|
|
21
21
|
biopipen/ns/plot.py,sha256=_dGLKpyHiJqLIIQu5tqCk8H5BkgGBh_qRUZHkjnOgtE,18080
|
|
22
22
|
biopipen/ns/regulatory.py,sha256=qvc9QrwgwCI_lg0DQ2QOZbAhC8BAD1qnQXSGtAGlVcY,11750
|
|
23
23
|
biopipen/ns/rnaseq.py,sha256=bKAa6friFWof4yDTWZQahm1MS-lrdetO1GqDKdfxXYc,7708
|
|
24
|
-
biopipen/ns/scrna.py,sha256=
|
|
24
|
+
biopipen/ns/scrna.py,sha256=fXP_h7gchcuk_Jwos0IgY_P8ON6Q995OgKHgdrxfvAY,112868
|
|
25
25
|
biopipen/ns/scrna_metabolic_landscape.py,sha256=6AhaynGG3lNRi96N2tReVT46BJMuEwooSSd2irBoN80,28347
|
|
26
26
|
biopipen/ns/snp.py,sha256=-Jx5Hsv_7KV7TqLU0nHCaPkMEN0CFdi4tNVlyq0rUZ4,27259
|
|
27
27
|
biopipen/ns/stats.py,sha256=DlPyK5Vsg6ZEkV9SDS3aAw21eXzvOHgqeZDkXPhg7go,20509
|
|
@@ -139,7 +139,7 @@ biopipen/scripts/scrna/CellTypeAnnotation-hitype.R,sha256=zol-IF0jd3DTzw9I1UVUMY
|
|
|
139
139
|
biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R,sha256=LTvFzGzP0nmQLkJIxn6H7L8xkP2Z3q52DknXtBkkmcA,1822
|
|
140
140
|
biopipen/scripts/scrna/CellTypeAnnotation-sctype.R,sha256=RhFHy1E4b3Y7pw6feKEhPx5GTbNdaQMDXHzvRlPk34Q,4544
|
|
141
141
|
biopipen/scripts/scrna/CellTypeAnnotation.R,sha256=X_LR0LZ6UQjLA-SCCcQ8CBFZGG_J8CUNx--qwBS8Oh8,773
|
|
142
|
-
biopipen/scripts/scrna/CellsDistribution.R,sha256=
|
|
142
|
+
biopipen/scripts/scrna/CellsDistribution.R,sha256=QOkQhpendXMmgen99wn0Pl3XKnMJQzvLVb0hPdR49NQ,18980
|
|
143
143
|
biopipen/scripts/scrna/DimPlots.R,sha256=oKhygoWQOCck8OlpnOrNJg0CS2q-r8Com1dfjTvQzvU,1575
|
|
144
144
|
biopipen/scripts/scrna/ExprImputation-alra.R,sha256=xNdQiXgY13-iF16o2KgCJWfPIy4P-lOiO1bl9PfXI4U,864
|
|
145
145
|
biopipen/scripts/scrna/ExprImputation-rmagic.R,sha256=jYIfqZpnvjKJkvItLnemPVtUApHBYQi1_L8rHVbEe1M,735
|
|
@@ -151,19 +151,20 @@ biopipen/scripts/scrna/ModuleScoreCalculator.R,sha256=QJ_E6qjRC8GazoopcxZfT5cKxl
|
|
|
151
151
|
biopipen/scripts/scrna/RadarPlots.R,sha256=4zs0hAm7yq1Ls62f_29koPLqAKCeKbYiztNM-HR7j3U,13041
|
|
152
152
|
biopipen/scripts/scrna/SCImpute.R,sha256=dSJOHhmJ3x_72LBRXT72dbCti5oiB85CJ-OjWtqONbk,2958
|
|
153
153
|
biopipen/scripts/scrna/ScFGSEA.R,sha256=MFoJ3i3LFBfsPCxwLPnTh141ZJyrzwnrTuCZIFwvYjU,6318
|
|
154
|
+
biopipen/scripts/scrna/ScSimulation.R,sha256=b2LtL68ucxLoI57tSEDD0hOSbVHUA_x88Y96eK07N-s,1712
|
|
154
155
|
biopipen/scripts/scrna/Seurat2AnnData.R,sha256=G7bcHGffdNlz6Uuy98tQdlahXiPkTDokflp1yTUgcSQ,1578
|
|
155
156
|
biopipen/scripts/scrna/SeuratClusterStats-clustree.R,sha256=FkbniQMHiZGrFAIuS8nUfPVJKFofSL-ZnpxTqIhTa54,3058
|
|
156
157
|
biopipen/scripts/scrna/SeuratClusterStats-dimplots.R,sha256=NEdlJHNXnJZfF7YkefYVWTPO8Z_KAppRAs9rNvB8TXs,2360
|
|
157
158
|
biopipen/scripts/scrna/SeuratClusterStats-features.R,sha256=DeGo7AkBRq0V3Y3JDaifId6rrr5dwawTzcSAJ3W1lxE,15614
|
|
158
159
|
biopipen/scripts/scrna/SeuratClusterStats-hists.R,sha256=PXyDKww8HcloCU8r4IqRwRrm6Ly0cpmpvRcP30xxBa4,5038
|
|
159
160
|
biopipen/scripts/scrna/SeuratClusterStats-ngenes.R,sha256=ihZLB27iEhgICKj-ZTnxTvRAYIgg9rzWr9Oyh1YmOYM,2160
|
|
160
|
-
biopipen/scripts/scrna/SeuratClusterStats-stats.R,sha256=
|
|
161
|
+
biopipen/scripts/scrna/SeuratClusterStats-stats.R,sha256=jufs99zxNyUaTCDiWzO0yt76ncc73Yn_SpPa5igbJyA,9120
|
|
161
162
|
biopipen/scripts/scrna/SeuratClusterStats.R,sha256=fywRBVjaFIJBxdgsZxLXEheUZI4l5VUoNIcJnhPIdPQ,2193
|
|
162
163
|
biopipen/scripts/scrna/SeuratClustering-common.R,sha256=JX4Cn2FC6GOcBqaVyGDD3MM5zGpm3TpKJlfo2oOQ4Uk,8195
|
|
163
164
|
biopipen/scripts/scrna/SeuratClustering.R,sha256=0OKRBQ5rFuupK7c03_sSt2HMwMdMnCYFqTvkRXFKchs,1706
|
|
164
165
|
biopipen/scripts/scrna/SeuratFilter.R,sha256=BrYK0MLdaTtQvInMaQsmOt7oH_hlks0M1zykkJtg2lM,509
|
|
165
166
|
biopipen/scripts/scrna/SeuratLoading.R,sha256=ekWKnHIqtQb3kHVQiVymAHXXqiUxs6KKefjZKjaykmk,900
|
|
166
|
-
biopipen/scripts/scrna/SeuratMap2Ref.R,sha256
|
|
167
|
+
biopipen/scripts/scrna/SeuratMap2Ref.R,sha256=-Jyd9O4IkVxZJRlskuaP_tOrI7Q1wwkot-YdmzRbLws,11452
|
|
167
168
|
biopipen/scripts/scrna/SeuratMetadataMutater.R,sha256=PMwG0Xvl_EEVKkicfrIi4arEqpY948PkYLkb59kTAXI,1135
|
|
168
169
|
biopipen/scripts/scrna/SeuratPreparing-common.R,sha256=WuD7lGS17eAUQWSiIdAoV0EIeqS3Tnkkx-7PbP6Q3tc,16279
|
|
169
170
|
biopipen/scripts/scrna/SeuratPreparing-doublet_detection.R,sha256=TNN2lfFjpnnO0rguMsG38JYCP1nFUhcPLJ1LqGj-Sc8,6674
|
|
@@ -278,7 +279,7 @@ biopipen/utils/reference.py,sha256=oi5evicLwHxF0KAIPNZohBeHJLJQNWFJH0cr2y5pgcg,5
|
|
|
278
279
|
biopipen/utils/rnaseq.R,sha256=Ro2B2dG-Z2oVaT5tkwp9RHBz4dp_RF-JcizlM5GYXFs,1298
|
|
279
280
|
biopipen/utils/single_cell.R,sha256=pJjYP8bIZpNAtTQ32rOXhZxaM1Y-6D-xUcK3pql9tbk,4316
|
|
280
281
|
biopipen/utils/vcf.py,sha256=ajXs0M_QghEctlvUlSRjWQIABVF02wPdYd-0LP4mIsU,9377
|
|
281
|
-
biopipen-0.
|
|
282
|
-
biopipen-0.
|
|
283
|
-
biopipen-0.
|
|
284
|
-
biopipen-0.
|
|
282
|
+
biopipen-0.31.0.dist-info/METADATA,sha256=OtaZr2FTKCeaK9H-yBmssBT6848_F1j1wa2e8D3wraE,882
|
|
283
|
+
biopipen-0.31.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
284
|
+
biopipen-0.31.0.dist-info/entry_points.txt,sha256=69SbeMaF47Z2DS40yo-qDyoBKmMmumrNnsjEZMOioCE,625
|
|
285
|
+
biopipen-0.31.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|