biopipen 0.34.8__py3-none-any.whl → 0.34.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/ns/regulatory.py +4 -0
- biopipen/ns/scrna.py +17 -1
- biopipen/scripts/regulatory/MotifAffinityTest.R +21 -5
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +9 -2
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +15 -6
- biopipen/scripts/regulatory/VariantMotifPlot.R +1 -1
- biopipen/scripts/regulatory/motifs-common.R +3 -2
- biopipen/scripts/scrna/CellCellCommunication.py +4 -0
- biopipen/scripts/scrna/CellCellCommunicationPlots.R +23 -4
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +33 -11
- biopipen/scripts/scrna/MarkersFinder.R +9 -1
- biopipen/scripts/scrna/ModuleScoreCalculator.R +9 -1
- {biopipen-0.34.8.dist-info → biopipen-0.34.9.dist-info}/METADATA +3 -2
- {biopipen-0.34.8.dist-info → biopipen-0.34.9.dist-info}/RECORD +17 -17
- {biopipen-0.34.8.dist-info → biopipen-0.34.9.dist-info}/WHEEL +1 -1
- {biopipen-0.34.8.dist-info → biopipen-0.34.9.dist-info}/entry_points.txt +0 -0
biopipen/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.34.
|
|
1
|
+
__version__ = "0.34.9"
|
biopipen/ns/regulatory.py
CHANGED
|
@@ -132,6 +132,9 @@ class MotifAffinityTest(Proc):
|
|
|
132
132
|
If no `regulator_col` is provided, no regulator information is written in
|
|
133
133
|
the output. Otherwise, the regulator information is written in the output in
|
|
134
134
|
the `Regulator` column.
|
|
135
|
+
var_col: The column names in the `in.motiffile` containing the variant information.
|
|
136
|
+
It has to be matching the names in the `in.varfile`. This is helpful when
|
|
137
|
+
we only need to test the pairs of variants and motifs in the `in.motiffile`.
|
|
135
138
|
notfound (choice): What to do if a motif is not found in the database,
|
|
136
139
|
or a regulator is not found in the regulator-motif mapping (envs.regmotifs)
|
|
137
140
|
file.
|
|
@@ -200,6 +203,7 @@ class MotifAffinityTest(Proc):
|
|
|
200
203
|
"bcftools": config.exe.bcftools,
|
|
201
204
|
"motif_col": None,
|
|
202
205
|
"regulator_col": None,
|
|
206
|
+
"var_col": None,
|
|
203
207
|
"notfound": "error",
|
|
204
208
|
"motifdb": config.ref.tf_motifdb,
|
|
205
209
|
"regmotifs": config.ref.tf_motifs,
|
biopipen/ns/scrna.py
CHANGED
|
@@ -787,6 +787,11 @@ class ModuleScoreCalculator(Proc):
|
|
|
787
787
|
`reduction = "DC"` in `env.dimplots` in `SeuratClusterStats`.
|
|
788
788
|
This requires [`SingleCellExperiment`](https://bioconductor.org/packages/release/bioc/html/SingleCellExperiment.html)
|
|
789
789
|
and [`destiny`](https://bioconductor.org/packages/release/bioc/html/destiny.html) R packages.
|
|
790
|
+
post_mutaters (type=json): The mutaters to mutate the metadata after
|
|
791
|
+
calculating the module scores.
|
|
792
|
+
The mutaters will be applied in the order specified.
|
|
793
|
+
This is useful when you want to create new scores based on the
|
|
794
|
+
calculated module scores.
|
|
790
795
|
""" # noqa: E501
|
|
791
796
|
|
|
792
797
|
input = "srtobj:file"
|
|
@@ -810,6 +815,7 @@ class ModuleScoreCalculator(Proc):
|
|
|
810
815
|
# "Activation": {"features": "IFNG"},
|
|
811
816
|
# "Proliferation": {"features": "STMN1,TUBB"},
|
|
812
817
|
},
|
|
818
|
+
"post_mutaters": {},
|
|
813
819
|
}
|
|
814
820
|
script = "file://../scripts/scrna/ModuleScoreCalculator.R"
|
|
815
821
|
|
|
@@ -1131,7 +1137,7 @@ class MarkersFinder(Proc):
|
|
|
1131
1137
|
- res (type=int): The resolution of the plots.
|
|
1132
1138
|
- height (type=int): The height of the plots.
|
|
1133
1139
|
- width (type=int): The width of the plots.
|
|
1134
|
-
- <more>: See <https://pwwang.github.io/scplotter/reference/EnrichmentPlot.
|
|
1140
|
+
- <more>: See <https://pwwang.github.io/scplotter/reference/EnrichmentPlot.html>.
|
|
1135
1141
|
enrich_plots (type=json): Cases of the plots to generate for the enrichment analysis.
|
|
1136
1142
|
The keys are the names of the cases and the values are the dicts inherited from `enrich_plots_defaults`.
|
|
1137
1143
|
The cases under `envs.cases` can inherit this options.
|
|
@@ -1781,6 +1787,11 @@ class CellTypeAnnotation(Proc):
|
|
|
1781
1787
|
the original cell types will be kept and nothing will be changed.
|
|
1782
1788
|
///
|
|
1783
1789
|
|
|
1790
|
+
more_cell_types (type=json): The additional cell type annotations to add to the metadata.
|
|
1791
|
+
The keys are the new column names and the values are the cell types lists.
|
|
1792
|
+
The cell type lists work the same as `cell_types` above.
|
|
1793
|
+
This is useful when you want to keep multiple annotations of cell types.
|
|
1794
|
+
|
|
1784
1795
|
sccatch_args (ns): The arguments for `scCATCH::findmarkergene()` if `tool` is `sccatch`.
|
|
1785
1796
|
- species: The specie of cells.
|
|
1786
1797
|
- cancer: If the sample is from cancer tissue, then the cancer type may be defined.
|
|
@@ -1842,6 +1853,7 @@ class CellTypeAnnotation(Proc):
|
|
|
1842
1853
|
"sctype_tissue": None,
|
|
1843
1854
|
"sctype_db": config.ref.sctype_db,
|
|
1844
1855
|
"cell_types": [],
|
|
1856
|
+
"more_cell_types": None,
|
|
1845
1857
|
"sccatch_args": {
|
|
1846
1858
|
"species": None,
|
|
1847
1859
|
"cancer": "Normal",
|
|
@@ -2524,6 +2536,10 @@ class CellCellCommunicationPlots(Proc):
|
|
|
2524
2536
|
cases (type=json): The cases for the plots.
|
|
2525
2537
|
The keys are the names of the cases and the values are the arguments for
|
|
2526
2538
|
the plots. The arguments include the ones inherited from `envs`.
|
|
2539
|
+
You can have a special `plot_type` `"table"` to generate a table for the
|
|
2540
|
+
ccc data to save as a text file and show in the report.
|
|
2541
|
+
If no cases are given, a default case will be used, with the
|
|
2542
|
+
key `Cell-Cell Communication`.
|
|
2527
2543
|
<more>: Other arguments passed to
|
|
2528
2544
|
[scplotter::CCCPlot](https://pwwang.github.io/scplotter/reference/CCCPlot.html)
|
|
2529
2545
|
""" # noqa: E501
|
|
@@ -14,6 +14,7 @@ bcftools <- {{envs.bcftools | r}}
|
|
|
14
14
|
genome <- {{envs.genome | r}}
|
|
15
15
|
motif_col <- {{envs.motif_col | r}}
|
|
16
16
|
regulator_col <- {{envs.regulator_col | r}}
|
|
17
|
+
var_col <- {{envs.var_col | r}}
|
|
17
18
|
notfound <- {{envs.notfound | r}}
|
|
18
19
|
motifdb <- {{envs.motifdb | r}}
|
|
19
20
|
regmotifs <- {{envs.regmotifs | r}}
|
|
@@ -21,6 +22,7 @@ devpars <- {{envs.devpars | r}}
|
|
|
21
22
|
plot_nvars <- {{envs.plot_nvars | r}}
|
|
22
23
|
plots <- {{envs.plots | r}}
|
|
23
24
|
cutoff <- {{envs.cutoff | r}}
|
|
25
|
+
set.seed(8525)
|
|
24
26
|
|
|
25
27
|
if (is.null(motifdb) || !file.exists(motifdb)) {
|
|
26
28
|
stop("Motif database (envs.motifdb) is required and must exist")
|
|
@@ -47,10 +49,21 @@ log <- get_logger()
|
|
|
47
49
|
log$info("Reading input regulator/motif file ...")
|
|
48
50
|
in_motifs <- read.table(motiffile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
|
|
49
51
|
|
|
52
|
+
|
|
50
53
|
log$info("Ensuring motifs and regulators in the input data ...")
|
|
51
|
-
in_motifs <- ensure_regulator_motifs(in_motifs, outdir, motif_col, regulator_col, regmotifs, notfound = notfound)
|
|
54
|
+
in_motifs <- ensure_regulator_motifs(in_motifs, outdir, motif_col, regulator_col, var_col, regmotifs, notfound = notfound)
|
|
52
55
|
genome_pkg <- get_genome_pkg(genome)
|
|
53
56
|
|
|
57
|
+
motif_var_pairs <- NULL
|
|
58
|
+
if (!is.null(var_col)) {
|
|
59
|
+
log$info("Obtaining motif-variant pairs to test ...")
|
|
60
|
+
if (!var_col %in% colnames(in_motifs)) {
|
|
61
|
+
stop("Variant column (envs.var_col) not found in the input motif file")
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
motif_var_pairs <- unique(paste0(in_motifs[[motif_col]], " // ", in_motifs[[var_col]]))
|
|
65
|
+
}
|
|
66
|
+
|
|
54
67
|
log$info("Reading variant file ...")
|
|
55
68
|
if (grepl("\\.vcf$", varfile) || grepl("\\.vcf\\.gz$", varfile)) {
|
|
56
69
|
log$info("Converting VCF file to BED file ...")
|
|
@@ -77,10 +90,13 @@ mdb <- read_meme_to_motifdb(motifdb, in_motifs, motif_col, regulator_col, notfou
|
|
|
77
90
|
tool <- tolower(tool)
|
|
78
91
|
tool <- match.arg(tool, c("motifbreakr", "atsnp"))
|
|
79
92
|
|
|
80
|
-
if
|
|
93
|
+
{% if envs.tool == "motifbreakr" %}
|
|
81
94
|
motifbreakr_args <- {{envs.motifbreakr_args | r}}
|
|
82
95
|
{% include biopipen_dir + "/scripts/regulatory/MotifAffinityTest_MotifBreakR.R" %}
|
|
83
|
-
|
|
84
|
-
atsnp_args <-
|
|
96
|
+
{% else %}
|
|
97
|
+
atsnp_args <- list_update(
|
|
98
|
+
list(padj_cutoff = TRUE, padj = "BH", p = "Pval_diff"),
|
|
99
|
+
{{envs.atsnp_args | r}}
|
|
100
|
+
)
|
|
85
101
|
{% include biopipen_dir + "/scripts/regulatory/MotifAffinityTest_AtSNP.R" %}
|
|
86
|
-
}
|
|
102
|
+
{% endif %}
|
|
@@ -46,6 +46,13 @@ atsnp_result <- ComputePValues(
|
|
|
46
46
|
testing.mc = TRUE
|
|
47
47
|
)
|
|
48
48
|
|
|
49
|
+
if (!is.null(motif_var_pairs)) {
|
|
50
|
+
log$info("Filtering motif-variant pairs ...")
|
|
51
|
+
atsnp_result$motifs_vars <- paste0(atsnp_result$motif, " // ", atsnp_result$snpid)
|
|
52
|
+
atsnp_result <- atsnp_result[atsnp_result$motifs_vars %in% motif_var_pairs, , drop = FALSE]
|
|
53
|
+
atsnp_result$motifs_vars <- NULL
|
|
54
|
+
}
|
|
55
|
+
|
|
49
56
|
padj_col <- paste0(atsnp_args$p, "_adj")
|
|
50
57
|
atsnp_result[[padj_col]] <- p.adjust(atsnp_result[[atsnp_args$p]], method = atsnp_args$padj)
|
|
51
58
|
cutoff_col <- if (atsnp_args$padj_cutoff) padj_col else atsnp_args$p
|
|
@@ -87,7 +94,8 @@ write.table(
|
|
|
87
94
|
|
|
88
95
|
log$info("Plotting variants ...")
|
|
89
96
|
# Convert result to GRanges object
|
|
90
|
-
atsnp_result$alleleDiff <- -atsnp_result[[cutoff_col]]
|
|
97
|
+
atsnp_result$alleleDiff <- -log10(atsnp_result[[cutoff_col]])
|
|
98
|
+
atsnp_result <- atsnp_result[order(-atsnp_result$alleleDiff), , drop = FALSE]
|
|
91
99
|
atsnp_result$effect <- "strong"
|
|
92
100
|
atsnp_result$motifPos <- lapply(atsnp_result$motifPos, function(x) as.integer(unlist(strsplit(x, ","))))
|
|
93
101
|
atsnp_result <- makeGRangesFromDataFrame(atsnp_result, keep.extra.columns = TRUE, starts.in.df.are.0based = TRUE)
|
|
@@ -96,7 +104,6 @@ attributes(atsnp_result)$genome.package <- genome_pkg
|
|
|
96
104
|
attributes(atsnp_result)$motifs <- mdb
|
|
97
105
|
|
|
98
106
|
if (is.null(plots) || length(plots) == 0) {
|
|
99
|
-
atsnp_result <- atsnp_result[order(-abs(atsnp_result$alleleDiff)), , drop = FALSE]
|
|
100
107
|
atsnp_result <- atsnp_result[1:min(plot_nvars, length(atsnp_result)), , drop = FALSE]
|
|
101
108
|
variants <- unique(atsnp_result$SNP_id)
|
|
102
109
|
} else {
|
|
@@ -50,6 +50,7 @@ results <- motifbreakR(
|
|
|
50
50
|
|
|
51
51
|
log$info("Calculating p values ...")
|
|
52
52
|
results <- calculatePvalue(results)
|
|
53
|
+
results$.id <- 1:length(results)
|
|
53
54
|
results_to_save <- as.data.frame(unname(results))
|
|
54
55
|
results_to_save$motifPos <- lapply(results_to_save$motifPos, function(x) paste(x, collapse = ","))
|
|
55
56
|
results_to_save$altPos <- lapply(results_to_save$altPos, function(x) paste(x, collapse = ","))
|
|
@@ -60,20 +61,28 @@ if (!is.null(regulator_col)) {
|
|
|
60
61
|
drop = TRUE
|
|
61
62
|
]
|
|
62
63
|
}
|
|
63
|
-
results_to_save <- apply(results_to_save, 2, as.character)
|
|
64
|
+
results_to_save <- as.data.frame(apply(results_to_save, 2, as.character))
|
|
65
|
+
|
|
66
|
+
if (!is.null(motif_var_pairs)) {
|
|
67
|
+
log$info("Filtering motif-variant pairs ...")
|
|
68
|
+
results_to_save$motifs_vars <- paste0(results_to_save$providerId, " // ", results_to_save$SNP_id)
|
|
69
|
+
results_to_save <- results_to_save[results_to_save$motifs_vars %in% motif_var_pairs, , drop = FALSE]
|
|
70
|
+
results_to_save$motifs_vars <- NULL
|
|
71
|
+
}
|
|
64
72
|
|
|
65
73
|
write.table(
|
|
66
74
|
results_to_save,
|
|
67
75
|
file = file.path(outdir, "motifbreakr.txt"),
|
|
68
76
|
sep = "\t", quote = FALSE, row.names = FALSE
|
|
69
77
|
)
|
|
70
|
-
rm(results_to_save)
|
|
78
|
+
# rm(results_to_save)
|
|
71
79
|
|
|
72
80
|
log$info("Plotting variants ...")
|
|
73
81
|
if (is.null(plots) || length(plots) == 0) {
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
82
|
+
results_to_save$alleleDiff <- as.numeric(results_to_save$alleleDiff)
|
|
83
|
+
results_to_save <- results_to_save[order(-abs(results_to_save$alleleDiff)), , drop = FALSE]
|
|
84
|
+
results_to_save <- results_to_save[1:min(plot_nvars, nrow(results_to_save)), , drop = FALSE]
|
|
85
|
+
variants <- unique(results_to_save$SNP_id)
|
|
77
86
|
} else {
|
|
78
87
|
variants <- names(plots)
|
|
79
88
|
}
|
|
@@ -88,7 +97,7 @@ for (variant in variants) {
|
|
|
88
97
|
if (is.null(plots[[variant]]$devpars)) {
|
|
89
98
|
plots[[variant]]$devpars <- devpars
|
|
90
99
|
}
|
|
91
|
-
res <- results[results$SNP_id == variant, , drop = FALSE]
|
|
100
|
+
res <- results[results$SNP_id == variant & results$.id %in% results_to_save$.id, , drop = FALSE]
|
|
92
101
|
res <- subset(res, subset = eval(parse(text = plots[[variant]]$which)))
|
|
93
102
|
|
|
94
103
|
plot_variant_motifs(res, variant, plots[[variant]]$devpars, outdir)
|
|
@@ -33,7 +33,7 @@ log$info("Reading input data ...")
|
|
|
33
33
|
indata <- read.table(infile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
|
|
34
34
|
|
|
35
35
|
log$info("Ensuring regulators in the input data ...")
|
|
36
|
-
indata <- ensure_regulator_motifs(indata, outdir, motif_col, regulator_col, regmotifs, notfound = notfound)
|
|
36
|
+
indata <- ensure_regulator_motifs(indata, outdir, motif_col, regulator_col, "SNP_id", regmotifs, notfound = notfound)
|
|
37
37
|
genome_pkg <- get_genome_pkg(genome)
|
|
38
38
|
|
|
39
39
|
log$info("Reading motif database ...")
|
|
@@ -138,12 +138,13 @@ motifdb_to_motiflib <- function(motifdb) {
|
|
|
138
138
|
#' @param outdir Output directory, used to save un-matched regulators
|
|
139
139
|
#' @param motif_col Column name for the motif
|
|
140
140
|
#' @param regulator_col Column name for the regulator
|
|
141
|
+
#' @param var_col Column name for the variant
|
|
141
142
|
#' @param regmotifs Regulator-motif mapping file
|
|
142
143
|
#' @param log_indent Indentation for log messages
|
|
143
144
|
#' @param notfound Action to take if regulators are not found in the mapping file
|
|
144
145
|
#' @return Data frame with regulators and motifs
|
|
145
146
|
#' @export
|
|
146
|
-
ensure_regulator_motifs <- function (indata, outdir, motif_col, regulator_col, regmotifs, log_indent = "", notfound = "error", log = NULL) {
|
|
147
|
+
ensure_regulator_motifs <- function (indata, outdir, motif_col, regulator_col, var_col, regmotifs, log_indent = "", notfound = "error", log = NULL) {
|
|
147
148
|
if (is.null(motif_col)) {
|
|
148
149
|
if (is.null(regmotifs)) {
|
|
149
150
|
stop("Regulator-motif mapping file (envs.regmotifs) is required when no motif column (envs.motif_col) is provided")
|
|
@@ -198,7 +199,7 @@ ensure_regulator_motifs <- function (indata, outdir, motif_col, regulator_col, r
|
|
|
198
199
|
regulator_col <<- rm_reg_col
|
|
199
200
|
}
|
|
200
201
|
} else {
|
|
201
|
-
indata <- indata[!duplicated(indata[, c(regulator_col, motif_col), drop = FALSE]), , drop = FALSE]
|
|
202
|
+
indata <- indata[!duplicated(indata[, c(regulator_col, motif_col, var_col), drop = FALSE]), , drop = FALSE]
|
|
202
203
|
}
|
|
203
204
|
|
|
204
205
|
return(indata)
|
|
@@ -7,6 +7,10 @@ import scanpy
|
|
|
7
7
|
import liana
|
|
8
8
|
import liana.method.sc._liana_pipe as _liana_pipe
|
|
9
9
|
|
|
10
|
+
# AttributeError: module 'numpy' has no attribute 'product'
|
|
11
|
+
if not hasattr(np, "product"):
|
|
12
|
+
np.product = np.prod
|
|
13
|
+
|
|
10
14
|
# monkey-patch liana.method.sc._liana_pipe._trimean due to the updates by scipy 1.14
|
|
11
15
|
# https://github.com/scipy/scipy/commit/a660202652deead0f3b4b688eb9fdcdf9f74066c
|
|
12
16
|
def _trimean(a, axis=0):
|
|
@@ -27,7 +27,7 @@ defaults <- list(
|
|
|
27
27
|
devpars = list(res = 100)
|
|
28
28
|
)
|
|
29
29
|
|
|
30
|
-
cases <- expand_cases(cases, defaults)
|
|
30
|
+
cases <- expand_cases(cases, defaults, default_case = "Cell-Cell Communication")
|
|
31
31
|
log <- get_logger()
|
|
32
32
|
reporter <- get_reporter()
|
|
33
33
|
|
|
@@ -35,12 +35,31 @@ do_case <- function(name) {
|
|
|
35
35
|
log$info("- Case: {name}")
|
|
36
36
|
case <- cases[[name]]
|
|
37
37
|
info <- case_info(name, outdir, is_dir = FALSE)
|
|
38
|
-
case <- extract_vars(case, "subset", "devpars", "more_formats", "descr")
|
|
38
|
+
case <- extract_vars(case, subset_ = "subset", "devpars", "more_formats", "descr")
|
|
39
39
|
|
|
40
40
|
case$data <- ccc
|
|
41
|
-
if (!is.null(
|
|
42
|
-
case$data <- ccc %>% dplyr::filter(!!parse_expr(
|
|
41
|
+
if (!is.null(subset_)) {
|
|
42
|
+
case$data <- ccc %>% dplyr::filter(!!parse_expr(subset_))
|
|
43
43
|
}
|
|
44
|
+
|
|
45
|
+
if (identical(case$plot_type, "table")) {
|
|
46
|
+
write.table(
|
|
47
|
+
case$data,
|
|
48
|
+
file = paste0(info$prefix, ".txt"),
|
|
49
|
+
sep = "\t",
|
|
50
|
+
row.names = FALSE,
|
|
51
|
+
col.names = TRUE,
|
|
52
|
+
quote = FALSE
|
|
53
|
+
)
|
|
54
|
+
report <- list(
|
|
55
|
+
kind = "table",
|
|
56
|
+
data = list(nrows = 100),
|
|
57
|
+
src = paste0(info$prefix, ".txt")
|
|
58
|
+
)
|
|
59
|
+
reporter$add2(report, hs = c(info$section, info$name))
|
|
60
|
+
return()
|
|
61
|
+
}
|
|
62
|
+
|
|
44
63
|
if (is.null(case$magnitude)) {
|
|
45
64
|
case$magnitude <- NULL
|
|
46
65
|
}
|
|
@@ -5,11 +5,15 @@ outfile <- {{out.outfile | r}}
|
|
|
5
5
|
celltypes <- {{envs.cell_types | r}}
|
|
6
6
|
newcol <- {{envs.newcol | r}}
|
|
7
7
|
merge_same_labels <- {{envs.merge | r}}
|
|
8
|
+
more_cell_types <- {{envs.more_cell_types | r}}
|
|
8
9
|
|
|
9
10
|
log <- biopipen.utils::get_logger()
|
|
10
11
|
|
|
11
12
|
if (is.null(celltypes) || length(celltypes) == 0) {
|
|
12
13
|
log$warn("No cell types are given!")
|
|
14
|
+
if (!is.null(more_cell_types) && length(more_cell_types) > 0) {
|
|
15
|
+
log$warn("`envs.celltypes` is not given, won't process `envs.more_cell_types`!")
|
|
16
|
+
}
|
|
13
17
|
|
|
14
18
|
if (merge_same_labels) {
|
|
15
19
|
log$warn("Ignoring 'envs.merge' because no cell types are given!")
|
|
@@ -25,26 +29,43 @@ if (is.null(celltypes) || length(celltypes) == 0) {
|
|
|
25
29
|
} else {
|
|
26
30
|
idents <- as.character(unique(idents))
|
|
27
31
|
}
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
32
|
+
process_celltypes <- function(ct, key = NULL) {
|
|
33
|
+
if (length(ct) < length(idents)) {
|
|
34
|
+
ct <- c(ct, idents[(length(ct) + 1):length(idents)])
|
|
35
|
+
} else if (length(ct) > length(idents)) {
|
|
36
|
+
ct <- ct[1:length(idents)]
|
|
37
|
+
if (is.null(key)) {
|
|
38
|
+
log$warn("The length of cell types is longer than the number of clusters!")
|
|
39
|
+
} else {
|
|
40
|
+
log$warn(paste0("The length of cell types for '", key, "' is longer than the number of clusters!"))
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
for (i in seq_along(ct)) {
|
|
44
|
+
if (ct[i] == "-" || ct[i] == "") {
|
|
45
|
+
ct[i] <- idents[i]
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
names(ct) <- idents
|
|
49
|
+
return(ct)
|
|
34
50
|
}
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
51
|
+
|
|
52
|
+
if (!is.null(more_cell_types) && length(more_cell_types) > 0) {
|
|
53
|
+
for (key in names(more_cell_types)) {
|
|
54
|
+
ct <- more_cell_types[[key]]
|
|
55
|
+
ct <- process_celltypes(ct, key)
|
|
56
|
+
log$info(paste0("Adding additional cell type annotation: '", key, "' ..."))
|
|
57
|
+
sobj@meta.data[[key]] <- ct[as.character(Idents(sobj))]
|
|
38
58
|
}
|
|
39
59
|
}
|
|
40
|
-
|
|
60
|
+
|
|
61
|
+
celltypes <- process_celltypes(celltypes)
|
|
41
62
|
|
|
42
63
|
log$info("Renaming cell types ...")
|
|
43
64
|
if (is.null(newcol)) {
|
|
44
65
|
has_na <- "NA" %in% unlist(celltypes) || anyNA(unlist(celltypes))
|
|
45
66
|
sobj$seurat_clusters_id <- Idents(sobj)
|
|
46
67
|
celltypes$object <- sobj
|
|
47
|
-
sobj <- do_call(RenameIdents, celltypes)
|
|
68
|
+
sobj <- biopipen.utils::do_call(RenameIdents, celltypes)
|
|
48
69
|
sobj$seurat_clusters <- Idents(sobj)
|
|
49
70
|
if (has_na) {
|
|
50
71
|
log$info("Filtering clusters if NA ...")
|
|
@@ -65,5 +86,6 @@ if (is.null(celltypes) || length(celltypes) == 0) {
|
|
|
65
86
|
sobj <- merge_clusters_with_same_labels(sobj, newcol)
|
|
66
87
|
}
|
|
67
88
|
|
|
89
|
+
log$info("Saving Seurat object ...")
|
|
68
90
|
biopipen.utils::save_obj(sobj, outfile)
|
|
69
91
|
}
|
|
@@ -353,16 +353,24 @@ process_markers <- function(markers, info, case) {
|
|
|
353
353
|
for (db in case$dbs) {
|
|
354
354
|
plots <- list()
|
|
355
355
|
for (plotname in names(case$enrich_plots)) {
|
|
356
|
-
plotargs <- case$enrich_plots[[plotname]]
|
|
356
|
+
plotargs <- extract_vars(case$enrich_plots[[plotname]], "descr", allow_nonexisting = TRUE)
|
|
357
357
|
plotargs$data <- enrich[enrich$Database == db, , drop = FALSE]
|
|
358
358
|
|
|
359
359
|
p <- do_call(VizEnrichment, plotargs)
|
|
360
360
|
|
|
361
361
|
if (plotargs$plot_type == "bar") {
|
|
362
362
|
attr(p, "height") <- attr(p, "height") / 1.5
|
|
363
|
+
descr <- descr %||% glue::glue(
|
|
364
|
+
"The bar plot shows the top enriched terms in database '{db}', ",
|
|
365
|
+
"the x-axis shows the -log10 of the adjusted p-values, ",
|
|
366
|
+
"and the y-axis shows the term names. The number next to each bar indicates the overlap gene count."
|
|
367
|
+
)
|
|
363
368
|
}
|
|
364
369
|
outprefix <- file.path(info$prefix, paste0("enrich.", slugify(db), ".", slugify(plotname)))
|
|
365
370
|
save_plot(p, outprefix, plotargs$devpars, formats = "png")
|
|
371
|
+
if (!is.null(descr)) {
|
|
372
|
+
plots[[length(plots) + 1]] <- list(kind = "descr", content = glue::glue(descr))
|
|
373
|
+
}
|
|
366
374
|
plots[[length(plots) + 1]] <- reporter$image(outprefix, c(), FALSE)
|
|
367
375
|
}
|
|
368
376
|
reporter$add2(
|
|
@@ -1,11 +1,13 @@
|
|
|
1
|
-
library(
|
|
1
|
+
library(rlang)
|
|
2
2
|
library(dplyr)
|
|
3
|
+
library(Seurat)
|
|
3
4
|
library(biopipen.utils)
|
|
4
5
|
|
|
5
6
|
sobjfile <- {{in.srtobj | r}}
|
|
6
7
|
outfile <- {{out.rdsfile | r}}
|
|
7
8
|
defaults <- {{envs.defaults | r}}
|
|
8
9
|
modules <- {{envs.modules | r}}
|
|
10
|
+
post_mutaters <- {{envs.post_mutaters | r}}
|
|
9
11
|
|
|
10
12
|
log <- get_logger()
|
|
11
13
|
|
|
@@ -134,6 +136,12 @@ for (key in names(modules)) {
|
|
|
134
136
|
}
|
|
135
137
|
}
|
|
136
138
|
|
|
139
|
+
if (!is.null(post_mutaters) && length(post_mutaters) > 0) {
|
|
140
|
+
log$info("Applying post mutaters ...")
|
|
141
|
+
sobj@meta.data <- sobj@meta.data %>%
|
|
142
|
+
mutate(!!!lapply(post_mutaters, parse_expr))
|
|
143
|
+
}
|
|
144
|
+
|
|
137
145
|
# save seurat object
|
|
138
146
|
log$info("Saving Seurat object ...")
|
|
139
147
|
save_obj(sobj, outfile)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: biopipen
|
|
3
|
-
Version: 0.34.
|
|
3
|
+
Version: 0.34.9
|
|
4
4
|
Summary: Bioinformatics processes/pipelines that can be run from `pipen run`
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: pwwang
|
|
@@ -13,6 +13,7 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.11
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.12
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
16
17
|
Provides-Extra: runinfo
|
|
17
18
|
Requires-Dist: datar[pandas] (>=0.15.8,<0.16.0)
|
|
18
19
|
Requires-Dist: pipen-board[report] (>=0.17,<0.18)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
biopipen/__init__.py,sha256=
|
|
1
|
+
biopipen/__init__.py,sha256=zx5DRdGvH-gZCcW6m_sgJE0BiH5pc_-kR07ANPXxh70,23
|
|
2
2
|
biopipen/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
biopipen/core/config.py,sha256=edK5xnDhM8j27srDzsxubi934NMrglLoKrdcC8qsEPk,1069
|
|
4
4
|
biopipen/core/config.toml,sha256=lZV_vbYWk6uqm19ZWJcsZCcSNqAdIfN2fOfamzxZpg4,2148
|
|
@@ -20,9 +20,9 @@ biopipen/ns/gsea.py,sha256=eMGj6lljdMds2Pzs3Mcab0lQPU4vtgRTKMhAsKXpxYo,9742
|
|
|
20
20
|
biopipen/ns/misc.py,sha256=0jDPvpRL3EUIf2ipTjKqLTZgnallLWEjSxzTpS-geTQ,4355
|
|
21
21
|
biopipen/ns/plot.py,sha256=N41_izb6zi-XArUly5WhLebapNXbTNSgGlOCCwtrDlY,18282
|
|
22
22
|
biopipen/ns/protein.py,sha256=YJtlKoHI2p5yHdxKeQnNtm5QrbxDGOq1UXOdt_7tlTs,6391
|
|
23
|
-
biopipen/ns/regulatory.py,sha256=
|
|
23
|
+
biopipen/ns/regulatory.py,sha256=WlnX_R8jEFyxCjk8mru5Qu5iCQJLzjMWiWGoc3gygzc,16221
|
|
24
24
|
biopipen/ns/rnaseq.py,sha256=bKAa6friFWof4yDTWZQahm1MS-lrdetO1GqDKdfxXYc,7708
|
|
25
|
-
biopipen/ns/scrna.py,sha256
|
|
25
|
+
biopipen/ns/scrna.py,sha256=-pW0noyhqocHwoN4mF6ZYegfryc5H_l4AB53EQ2e-UE,146055
|
|
26
26
|
biopipen/ns/scrna_metabolic_landscape.py,sha256=EwLMrsj_pTqvyAgtHLoishjQxCg_j8n5OofuTofUph0,22096
|
|
27
27
|
biopipen/ns/snp.py,sha256=iXWrw7Lmhf4_ct57HGT7JGTClCXUD4sZ2FzOgsC2pTg,28123
|
|
28
28
|
biopipen/ns/stats.py,sha256=DlPyK5Vsg6ZEkV9SDS3aAw21eXzvOHgqeZDkXPhg7go,20509
|
|
@@ -128,22 +128,22 @@ biopipen/scripts/protein/PDB2Fasta.py,sha256=HVsoRRpieobuPwemCz30_N0rJ7T4aGFTQKZ
|
|
|
128
128
|
biopipen/scripts/protein/Prodigy.py,sha256=elA62U7WJ89TGEKobvjjd3Refjzr61S69PiVO0qF6DE,4493
|
|
129
129
|
biopipen/scripts/protein/ProdigySummary.R,sha256=qP30GYFpmxCvcfT2IVbJImGMgOdreKi-m1nyUqH6480,3799
|
|
130
130
|
biopipen/scripts/protein/RMSD.py,sha256=zE0g9QKWqqpC8lhGoQIF54VqDw37FaOUkvk0vtYf4-c,6250
|
|
131
|
-
biopipen/scripts/regulatory/MotifAffinityTest.R,sha256=
|
|
132
|
-
biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R,sha256=
|
|
133
|
-
biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R,sha256=
|
|
131
|
+
biopipen/scripts/regulatory/MotifAffinityTest.R,sha256=7kQFV9ExawMkCfLJ-mIsnxbXazL57D1-hVBWcHEPrus,3466
|
|
132
|
+
biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R,sha256=pFO6SVo_h1lRdNhq-GOX5jD8jF9PlXz0_XnRDpy9RXg,4670
|
|
133
|
+
biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R,sha256=Z-OcUuLPScX5kZvozNGhtawdkbF33ckiuBsSrXRAApk,3853
|
|
134
134
|
biopipen/scripts/regulatory/MotifScan.py,sha256=mxhRWp6NBGEMpWJOpwqIvzkKlrgnRvJApyCU91svh8E,5399
|
|
135
|
-
biopipen/scripts/regulatory/VariantMotifPlot.R,sha256
|
|
136
|
-
biopipen/scripts/regulatory/motifs-common.R,sha256=
|
|
135
|
+
biopipen/scripts/regulatory/VariantMotifPlot.R,sha256=cHngquU7zVCUhh8zGi40k1o7oeWLfuF78Ycljo_Ql88,2849
|
|
136
|
+
biopipen/scripts/regulatory/motifs-common.R,sha256=ES2UaFE68yULd4mfw7-T0zUcXQtb_uI6IDS-hQsVSvQ,13369
|
|
137
137
|
biopipen/scripts/rnaseq/Simulation-ESCO.R,sha256=cdADB5dpkI5hvzDPw5PyrhOyRFU4PMLgSsa84YOZALc,6424
|
|
138
138
|
biopipen/scripts/rnaseq/Simulation-RUVcorr.R,sha256=oZJHHEMdH7SBIkhCrgkpNYroBkF0dtr20U3ugY9I9hM,1202
|
|
139
139
|
biopipen/scripts/rnaseq/Simulation.R,sha256=LvIjL_onCA8GJR5TPiREUkN_NlMz_ngcw6PezWKc2x0,809
|
|
140
140
|
biopipen/scripts/rnaseq/UnitConversion.R,sha256=xuoj9AdFiCKNztpCmzwCz9VxmUAE-FslZ_LgjOm7dhM,11360
|
|
141
141
|
biopipen/scripts/scrna/AnnData2Seurat.R,sha256=wc5PDbK9TkuJtoXXxF4W1ODylWhyfKWd3vV_AdOcTjM,1118
|
|
142
142
|
biopipen/scripts/scrna/CCPlotR-patch.R,sha256=KpB8fwacBaWaUNjIidcLUkMShLjS4Gq9UY8LUgIITB0,8369
|
|
143
|
-
biopipen/scripts/scrna/CellCellCommunication.py,sha256=
|
|
144
|
-
biopipen/scripts/scrna/CellCellCommunicationPlots.R,sha256=
|
|
143
|
+
biopipen/scripts/scrna/CellCellCommunication.py,sha256=LnEuV8YHOJSYM7Tb_jwLbTQdMSpJw5ChRIiLktcJzSQ,4471
|
|
144
|
+
biopipen/scripts/scrna/CellCellCommunicationPlots.R,sha256=IcqqhVWasSE54PDWaw85u5_yup_YHVNNwZI7oOy9250,2456
|
|
145
145
|
biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R,sha256=CwYR8WWQMf8r7V2CTalG4kxdKnYMtyhpJBe9zP2sQWA,6964
|
|
146
|
-
biopipen/scripts/scrna/CellTypeAnnotation-direct.R,sha256=
|
|
146
|
+
biopipen/scripts/scrna/CellTypeAnnotation-direct.R,sha256=jwjSBql66ku11b4O_7bIs9zuwbqHiGgrAFDk1tSbwg4,3111
|
|
147
147
|
biopipen/scripts/scrna/CellTypeAnnotation-hitype.R,sha256=vvjhxin4aoA9heecey0dpr6ofirybygY3ApjgtQW89Y,2094
|
|
148
148
|
biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R,sha256=xxB4K1MzBSNQnDxa44s5ExeU67MbncOBf8lGFr7RvwQ,1870
|
|
149
149
|
biopipen/scripts/scrna/CellTypeAnnotation-sctype.R,sha256=1BZ8tOJsB7lRtrYXtImxly-he4gfDTfGqbwK35yJjYw,4604
|
|
@@ -155,9 +155,9 @@ biopipen/scripts/scrna/ExprImputation-rmagic.R,sha256=ePgbMZ_3bKbeUrjsMdkdtBM_MS
|
|
|
155
155
|
biopipen/scripts/scrna/ExprImputation-scimpute.R,sha256=MI_bYfvCDKJsuGntUxfx_-NdrssBoQgL95-DGwJVE5s,1191
|
|
156
156
|
biopipen/scripts/scrna/ExprImputation.R,sha256=GcdZJpkDpq88hRQjtLZY5-byp8V43stEFm5T-pQbU6A,319
|
|
157
157
|
biopipen/scripts/scrna/LoomTo10X.R,sha256=c6F0p1udsL5UOlb84-53K5BsjSDWkdFyYTt5NQmlIec,1059
|
|
158
|
-
biopipen/scripts/scrna/MarkersFinder.R,sha256
|
|
158
|
+
biopipen/scripts/scrna/MarkersFinder.R,sha256=qBVdxO8cKTJMtGyJLl2QGRrtdiXOJSLXu6rpZUPkDZk,25437
|
|
159
159
|
biopipen/scripts/scrna/MetaMarkers.R,sha256=BgYaWYEj6obwqaZaDWqNPtxb1IEEAnXAeBE0Ji9PvBA,12426
|
|
160
|
-
biopipen/scripts/scrna/ModuleScoreCalculator.R,sha256
|
|
160
|
+
biopipen/scripts/scrna/ModuleScoreCalculator.R,sha256=_mvo35a-wk5miUb_kMIVwvKK0b6InRa1NKtN8zznGwk,4457
|
|
161
161
|
biopipen/scripts/scrna/PseudoBulkDEG.R,sha256=IuM4hl-KHZ5aaaTqZeylw4b1ZenMZaY4qobD5qxAlHs,25199
|
|
162
162
|
biopipen/scripts/scrna/RadarPlots.R,sha256=Kn1E-hpczuujpgNjR8MqeIIVN-S3PbpmfcKWGKcNCVY,14546
|
|
163
163
|
biopipen/scripts/scrna/SCImpute.R,sha256=dSJOHhmJ3x_72LBRXT72dbCti5oiB85CJ-OjWtqONbk,2958
|
|
@@ -284,7 +284,7 @@ biopipen/utils/misc.py,sha256=pDZ-INWVNqHuXYvcjmu8KqNAigkh2lsHy0BxX44CPvc,4048
|
|
|
284
284
|
biopipen/utils/reference.py,sha256=Oc6IlA1giLxymAuI7DO-IQLHQ7-DbsWzOQE86oTDfMU,5955
|
|
285
285
|
biopipen/utils/reporter.py,sha256=VwLl6xyVDWnGY7NEXyqBlkW8expKJoNQ5iTyZSELf5c,4922
|
|
286
286
|
biopipen/utils/vcf.py,sha256=MmMbAtLUcKPp02jUdk9TzuET2gWSeoWn7xgoOXFysK0,9393
|
|
287
|
-
biopipen-0.34.
|
|
288
|
-
biopipen-0.34.
|
|
289
|
-
biopipen-0.34.
|
|
290
|
-
biopipen-0.34.
|
|
287
|
+
biopipen-0.34.9.dist-info/METADATA,sha256=aYXqI1hITAFJ1bhVKyz2toplmcj0oNdfKqa3JU-poBw,1026
|
|
288
|
+
biopipen-0.34.9.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
|
|
289
|
+
biopipen-0.34.9.dist-info/entry_points.txt,sha256=BYqHGBQJxyFDNLYqgH64ycI5PYwnlqwYcCFsMvJgzAU,653
|
|
290
|
+
biopipen-0.34.9.dist-info/RECORD,,
|
|
File without changes
|