biopipen 0.34.8__py3-none-any.whl → 0.34.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/ns/regulatory.py +4 -0
- biopipen/ns/scrna.py +22 -2
- biopipen/scripts/regulatory/MotifAffinityTest.R +21 -5
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +9 -2
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +15 -6
- biopipen/scripts/regulatory/VariantMotifPlot.R +1 -1
- biopipen/scripts/regulatory/motifs-common.R +3 -2
- biopipen/scripts/scrna/CellCellCommunication.py +4 -0
- biopipen/scripts/scrna/CellCellCommunicationPlots.R +23 -4
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +33 -11
- biopipen/scripts/scrna/MarkersFinder.R +9 -1
- biopipen/scripts/scrna/ModuleScoreCalculator.R +9 -1
- {biopipen-0.34.8.dist-info → biopipen-0.34.10.dist-info}/METADATA +3 -2
- {biopipen-0.34.8.dist-info → biopipen-0.34.10.dist-info}/RECORD +17 -17
- {biopipen-0.34.8.dist-info → biopipen-0.34.10.dist-info}/WHEEL +1 -1
- {biopipen-0.34.8.dist-info → biopipen-0.34.10.dist-info}/entry_points.txt +0 -0
biopipen/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.34.
|
|
1
|
+
__version__ = "0.34.10"
|
biopipen/ns/regulatory.py
CHANGED
|
@@ -132,6 +132,9 @@ class MotifAffinityTest(Proc):
|
|
|
132
132
|
If no `regulator_col` is provided, no regulator information is written in
|
|
133
133
|
the output. Otherwise, the regulator information is written in the output in
|
|
134
134
|
the `Regulator` column.
|
|
135
|
+
var_col: The column names in the `in.motiffile` containing the variant information.
|
|
136
|
+
It has to be matching the names in the `in.varfile`. This is helpful when
|
|
137
|
+
we only need to test the pairs of variants and motifs in the `in.motiffile`.
|
|
135
138
|
notfound (choice): What to do if a motif is not found in the database,
|
|
136
139
|
or a regulator is not found in the regulator-motif mapping (envs.regmotifs)
|
|
137
140
|
file.
|
|
@@ -200,6 +203,7 @@ class MotifAffinityTest(Proc):
|
|
|
200
203
|
"bcftools": config.exe.bcftools,
|
|
201
204
|
"motif_col": None,
|
|
202
205
|
"regulator_col": None,
|
|
206
|
+
"var_col": None,
|
|
203
207
|
"notfound": "error",
|
|
204
208
|
"motifdb": config.ref.tf_motifdb,
|
|
205
209
|
"regmotifs": config.ref.tf_motifs,
|
biopipen/ns/scrna.py
CHANGED
|
@@ -118,6 +118,10 @@ class SeuratPreparing(Proc):
|
|
|
118
118
|
It doesn't work when data is loaded from loom files.
|
|
119
119
|
cell_qc: Filter expression to filter cells, using
|
|
120
120
|
`tidyrseurat::filter()`.
|
|
121
|
+
It can also be a dictionary of expressions, where the names of the list are
|
|
122
|
+
sample names.
|
|
123
|
+
You can have a default expression in the list with the name "DEFAULT" for
|
|
124
|
+
the samples that are not listed.
|
|
121
125
|
Available QC keys include `nFeature_RNA`, `nCount_RNA`,
|
|
122
126
|
`percent.mt`, `percent.ribo`, `percent.hb`, and `percent.plat`.
|
|
123
127
|
|
|
@@ -782,11 +786,16 @@ class ModuleScoreCalculator(Proc):
|
|
|
782
786
|
will perform diffusion map as a reduction and add the first 2
|
|
783
787
|
components as `DC_1` and `DC_2` to the metadata. `diffmap` is a shortcut
|
|
784
788
|
for `diffusion_map`. Other key-value pairs will pass to
|
|
785
|
-
[`destiny::DiffusionMap()`](https://www.rdocumentation.org/packages/destiny/versions/2.0.4/topics/DiffusionMap
|
|
789
|
+
[`destiny::DiffusionMap()`](https://www.rdocumentation.org/packages/destiny/versions/2.0.4/topics/DiffusionMap class).
|
|
786
790
|
You can later plot the diffusion map by using
|
|
787
791
|
`reduction = "DC"` in `env.dimplots` in `SeuratClusterStats`.
|
|
788
792
|
This requires [`SingleCellExperiment`](https://bioconductor.org/packages/release/bioc/html/SingleCellExperiment.html)
|
|
789
793
|
and [`destiny`](https://bioconductor.org/packages/release/bioc/html/destiny.html) R packages.
|
|
794
|
+
post_mutaters (type=json): The mutaters to mutate the metadata after
|
|
795
|
+
calculating the module scores.
|
|
796
|
+
The mutaters will be applied in the order specified.
|
|
797
|
+
This is useful when you want to create new scores based on the
|
|
798
|
+
calculated module scores.
|
|
790
799
|
""" # noqa: E501
|
|
791
800
|
|
|
792
801
|
input = "srtobj:file"
|
|
@@ -810,6 +819,7 @@ class ModuleScoreCalculator(Proc):
|
|
|
810
819
|
# "Activation": {"features": "IFNG"},
|
|
811
820
|
# "Proliferation": {"features": "STMN1,TUBB"},
|
|
812
821
|
},
|
|
822
|
+
"post_mutaters": {},
|
|
813
823
|
}
|
|
814
824
|
script = "file://../scripts/scrna/ModuleScoreCalculator.R"
|
|
815
825
|
|
|
@@ -1131,7 +1141,7 @@ class MarkersFinder(Proc):
|
|
|
1131
1141
|
- res (type=int): The resolution of the plots.
|
|
1132
1142
|
- height (type=int): The height of the plots.
|
|
1133
1143
|
- width (type=int): The width of the plots.
|
|
1134
|
-
- <more>: See <https://pwwang.github.io/scplotter/reference/EnrichmentPlot.
|
|
1144
|
+
- <more>: See <https://pwwang.github.io/scplotter/reference/EnrichmentPlot.html>.
|
|
1135
1145
|
enrich_plots (type=json): Cases of the plots to generate for the enrichment analysis.
|
|
1136
1146
|
The keys are the names of the cases and the values are the dicts inherited from `enrich_plots_defaults`.
|
|
1137
1147
|
The cases under `envs.cases` can inherit this options.
|
|
@@ -1781,6 +1791,11 @@ class CellTypeAnnotation(Proc):
|
|
|
1781
1791
|
the original cell types will be kept and nothing will be changed.
|
|
1782
1792
|
///
|
|
1783
1793
|
|
|
1794
|
+
more_cell_types (type=json): The additional cell type annotations to add to the metadata.
|
|
1795
|
+
The keys are the new column names and the values are the cell types lists.
|
|
1796
|
+
The cell type lists work the same as `cell_types` above.
|
|
1797
|
+
This is useful when you want to keep multiple annotations of cell types.
|
|
1798
|
+
|
|
1784
1799
|
sccatch_args (ns): The arguments for `scCATCH::findmarkergene()` if `tool` is `sccatch`.
|
|
1785
1800
|
- species: The specie of cells.
|
|
1786
1801
|
- cancer: If the sample is from cancer tissue, then the cancer type may be defined.
|
|
@@ -1842,6 +1857,7 @@ class CellTypeAnnotation(Proc):
|
|
|
1842
1857
|
"sctype_tissue": None,
|
|
1843
1858
|
"sctype_db": config.ref.sctype_db,
|
|
1844
1859
|
"cell_types": [],
|
|
1860
|
+
"more_cell_types": None,
|
|
1845
1861
|
"sccatch_args": {
|
|
1846
1862
|
"species": None,
|
|
1847
1863
|
"cancer": "Normal",
|
|
@@ -2524,6 +2540,10 @@ class CellCellCommunicationPlots(Proc):
|
|
|
2524
2540
|
cases (type=json): The cases for the plots.
|
|
2525
2541
|
The keys are the names of the cases and the values are the arguments for
|
|
2526
2542
|
the plots. The arguments include the ones inherited from `envs`.
|
|
2543
|
+
You can have a special `plot_type` `"table"` to generate a table for the
|
|
2544
|
+
ccc data to save as a text file and show in the report.
|
|
2545
|
+
If no cases are given, a default case will be used, with the
|
|
2546
|
+
key `Cell-Cell Communication`.
|
|
2527
2547
|
<more>: Other arguments passed to
|
|
2528
2548
|
[scplotter::CCCPlot](https://pwwang.github.io/scplotter/reference/CCCPlot.html)
|
|
2529
2549
|
""" # noqa: E501
|
|
@@ -14,6 +14,7 @@ bcftools <- {{envs.bcftools | r}}
|
|
|
14
14
|
genome <- {{envs.genome | r}}
|
|
15
15
|
motif_col <- {{envs.motif_col | r}}
|
|
16
16
|
regulator_col <- {{envs.regulator_col | r}}
|
|
17
|
+
var_col <- {{envs.var_col | r}}
|
|
17
18
|
notfound <- {{envs.notfound | r}}
|
|
18
19
|
motifdb <- {{envs.motifdb | r}}
|
|
19
20
|
regmotifs <- {{envs.regmotifs | r}}
|
|
@@ -21,6 +22,7 @@ devpars <- {{envs.devpars | r}}
|
|
|
21
22
|
plot_nvars <- {{envs.plot_nvars | r}}
|
|
22
23
|
plots <- {{envs.plots | r}}
|
|
23
24
|
cutoff <- {{envs.cutoff | r}}
|
|
25
|
+
set.seed(8525)
|
|
24
26
|
|
|
25
27
|
if (is.null(motifdb) || !file.exists(motifdb)) {
|
|
26
28
|
stop("Motif database (envs.motifdb) is required and must exist")
|
|
@@ -47,10 +49,21 @@ log <- get_logger()
|
|
|
47
49
|
log$info("Reading input regulator/motif file ...")
|
|
48
50
|
in_motifs <- read.table(motiffile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
|
|
49
51
|
|
|
52
|
+
|
|
50
53
|
log$info("Ensuring motifs and regulators in the input data ...")
|
|
51
|
-
in_motifs <- ensure_regulator_motifs(in_motifs, outdir, motif_col, regulator_col, regmotifs, notfound = notfound)
|
|
54
|
+
in_motifs <- ensure_regulator_motifs(in_motifs, outdir, motif_col, regulator_col, var_col, regmotifs, notfound = notfound)
|
|
52
55
|
genome_pkg <- get_genome_pkg(genome)
|
|
53
56
|
|
|
57
|
+
motif_var_pairs <- NULL
|
|
58
|
+
if (!is.null(var_col)) {
|
|
59
|
+
log$info("Obtaining motif-variant pairs to test ...")
|
|
60
|
+
if (!var_col %in% colnames(in_motifs)) {
|
|
61
|
+
stop("Variant column (envs.var_col) not found in the input motif file")
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
motif_var_pairs <- unique(paste0(in_motifs[[motif_col]], " // ", in_motifs[[var_col]]))
|
|
65
|
+
}
|
|
66
|
+
|
|
54
67
|
log$info("Reading variant file ...")
|
|
55
68
|
if (grepl("\\.vcf$", varfile) || grepl("\\.vcf\\.gz$", varfile)) {
|
|
56
69
|
log$info("Converting VCF file to BED file ...")
|
|
@@ -77,10 +90,13 @@ mdb <- read_meme_to_motifdb(motifdb, in_motifs, motif_col, regulator_col, notfou
|
|
|
77
90
|
tool <- tolower(tool)
|
|
78
91
|
tool <- match.arg(tool, c("motifbreakr", "atsnp"))
|
|
79
92
|
|
|
80
|
-
if
|
|
93
|
+
{% if envs.tool == "motifbreakr" %}
|
|
81
94
|
motifbreakr_args <- {{envs.motifbreakr_args | r}}
|
|
82
95
|
{% include biopipen_dir + "/scripts/regulatory/MotifAffinityTest_MotifBreakR.R" %}
|
|
83
|
-
|
|
84
|
-
atsnp_args <-
|
|
96
|
+
{% else %}
|
|
97
|
+
atsnp_args <- list_update(
|
|
98
|
+
list(padj_cutoff = TRUE, padj = "BH", p = "Pval_diff"),
|
|
99
|
+
{{envs.atsnp_args | r}}
|
|
100
|
+
)
|
|
85
101
|
{% include biopipen_dir + "/scripts/regulatory/MotifAffinityTest_AtSNP.R" %}
|
|
86
|
-
}
|
|
102
|
+
{% endif %}
|
|
@@ -46,6 +46,13 @@ atsnp_result <- ComputePValues(
|
|
|
46
46
|
testing.mc = TRUE
|
|
47
47
|
)
|
|
48
48
|
|
|
49
|
+
if (!is.null(motif_var_pairs)) {
|
|
50
|
+
log$info("Filtering motif-variant pairs ...")
|
|
51
|
+
atsnp_result$motifs_vars <- paste0(atsnp_result$motif, " // ", atsnp_result$snpid)
|
|
52
|
+
atsnp_result <- atsnp_result[atsnp_result$motifs_vars %in% motif_var_pairs, , drop = FALSE]
|
|
53
|
+
atsnp_result$motifs_vars <- NULL
|
|
54
|
+
}
|
|
55
|
+
|
|
49
56
|
padj_col <- paste0(atsnp_args$p, "_adj")
|
|
50
57
|
atsnp_result[[padj_col]] <- p.adjust(atsnp_result[[atsnp_args$p]], method = atsnp_args$padj)
|
|
51
58
|
cutoff_col <- if (atsnp_args$padj_cutoff) padj_col else atsnp_args$p
|
|
@@ -87,7 +94,8 @@ write.table(
|
|
|
87
94
|
|
|
88
95
|
log$info("Plotting variants ...")
|
|
89
96
|
# Convert result to GRanges object
|
|
90
|
-
atsnp_result$alleleDiff <- -atsnp_result[[cutoff_col]]
|
|
97
|
+
atsnp_result$alleleDiff <- -log10(atsnp_result[[cutoff_col]])
|
|
98
|
+
atsnp_result <- atsnp_result[order(-atsnp_result$alleleDiff), , drop = FALSE]
|
|
91
99
|
atsnp_result$effect <- "strong"
|
|
92
100
|
atsnp_result$motifPos <- lapply(atsnp_result$motifPos, function(x) as.integer(unlist(strsplit(x, ","))))
|
|
93
101
|
atsnp_result <- makeGRangesFromDataFrame(atsnp_result, keep.extra.columns = TRUE, starts.in.df.are.0based = TRUE)
|
|
@@ -96,7 +104,6 @@ attributes(atsnp_result)$genome.package <- genome_pkg
|
|
|
96
104
|
attributes(atsnp_result)$motifs <- mdb
|
|
97
105
|
|
|
98
106
|
if (is.null(plots) || length(plots) == 0) {
|
|
99
|
-
atsnp_result <- atsnp_result[order(-abs(atsnp_result$alleleDiff)), , drop = FALSE]
|
|
100
107
|
atsnp_result <- atsnp_result[1:min(plot_nvars, length(atsnp_result)), , drop = FALSE]
|
|
101
108
|
variants <- unique(atsnp_result$SNP_id)
|
|
102
109
|
} else {
|
|
@@ -50,6 +50,7 @@ results <- motifbreakR(
|
|
|
50
50
|
|
|
51
51
|
log$info("Calculating p values ...")
|
|
52
52
|
results <- calculatePvalue(results)
|
|
53
|
+
results$.id <- 1:length(results)
|
|
53
54
|
results_to_save <- as.data.frame(unname(results))
|
|
54
55
|
results_to_save$motifPos <- lapply(results_to_save$motifPos, function(x) paste(x, collapse = ","))
|
|
55
56
|
results_to_save$altPos <- lapply(results_to_save$altPos, function(x) paste(x, collapse = ","))
|
|
@@ -60,20 +61,28 @@ if (!is.null(regulator_col)) {
|
|
|
60
61
|
drop = TRUE
|
|
61
62
|
]
|
|
62
63
|
}
|
|
63
|
-
results_to_save <- apply(results_to_save, 2, as.character)
|
|
64
|
+
results_to_save <- as.data.frame(apply(results_to_save, 2, as.character))
|
|
65
|
+
|
|
66
|
+
if (!is.null(motif_var_pairs)) {
|
|
67
|
+
log$info("Filtering motif-variant pairs ...")
|
|
68
|
+
results_to_save$motifs_vars <- paste0(results_to_save$providerId, " // ", results_to_save$SNP_id)
|
|
69
|
+
results_to_save <- results_to_save[results_to_save$motifs_vars %in% motif_var_pairs, , drop = FALSE]
|
|
70
|
+
results_to_save$motifs_vars <- NULL
|
|
71
|
+
}
|
|
64
72
|
|
|
65
73
|
write.table(
|
|
66
74
|
results_to_save,
|
|
67
75
|
file = file.path(outdir, "motifbreakr.txt"),
|
|
68
76
|
sep = "\t", quote = FALSE, row.names = FALSE
|
|
69
77
|
)
|
|
70
|
-
rm(results_to_save)
|
|
78
|
+
# rm(results_to_save)
|
|
71
79
|
|
|
72
80
|
log$info("Plotting variants ...")
|
|
73
81
|
if (is.null(plots) || length(plots) == 0) {
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
82
|
+
results_to_save$alleleDiff <- as.numeric(results_to_save$alleleDiff)
|
|
83
|
+
results_to_save <- results_to_save[order(-abs(results_to_save$alleleDiff)), , drop = FALSE]
|
|
84
|
+
results_to_save <- results_to_save[1:min(plot_nvars, nrow(results_to_save)), , drop = FALSE]
|
|
85
|
+
variants <- unique(results_to_save$SNP_id)
|
|
77
86
|
} else {
|
|
78
87
|
variants <- names(plots)
|
|
79
88
|
}
|
|
@@ -88,7 +97,7 @@ for (variant in variants) {
|
|
|
88
97
|
if (is.null(plots[[variant]]$devpars)) {
|
|
89
98
|
plots[[variant]]$devpars <- devpars
|
|
90
99
|
}
|
|
91
|
-
res <- results[results$SNP_id == variant, , drop = FALSE]
|
|
100
|
+
res <- results[results$SNP_id == variant & results$.id %in% results_to_save$.id, , drop = FALSE]
|
|
92
101
|
res <- subset(res, subset = eval(parse(text = plots[[variant]]$which)))
|
|
93
102
|
|
|
94
103
|
plot_variant_motifs(res, variant, plots[[variant]]$devpars, outdir)
|
|
@@ -33,7 +33,7 @@ log$info("Reading input data ...")
|
|
|
33
33
|
indata <- read.table(infile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
|
|
34
34
|
|
|
35
35
|
log$info("Ensuring regulators in the input data ...")
|
|
36
|
-
indata <- ensure_regulator_motifs(indata, outdir, motif_col, regulator_col, regmotifs, notfound = notfound)
|
|
36
|
+
indata <- ensure_regulator_motifs(indata, outdir, motif_col, regulator_col, "SNP_id", regmotifs, notfound = notfound)
|
|
37
37
|
genome_pkg <- get_genome_pkg(genome)
|
|
38
38
|
|
|
39
39
|
log$info("Reading motif database ...")
|
|
@@ -138,12 +138,13 @@ motifdb_to_motiflib <- function(motifdb) {
|
|
|
138
138
|
#' @param outdir Output directory, used to save un-matched regulators
|
|
139
139
|
#' @param motif_col Column name for the motif
|
|
140
140
|
#' @param regulator_col Column name for the regulator
|
|
141
|
+
#' @param var_col Column name for the variant
|
|
141
142
|
#' @param regmotifs Regulator-motif mapping file
|
|
142
143
|
#' @param log_indent Indentation for log messages
|
|
143
144
|
#' @param notfound Action to take if regulators are not found in the mapping file
|
|
144
145
|
#' @return Data frame with regulators and motifs
|
|
145
146
|
#' @export
|
|
146
|
-
ensure_regulator_motifs <- function (indata, outdir, motif_col, regulator_col, regmotifs, log_indent = "", notfound = "error", log = NULL) {
|
|
147
|
+
ensure_regulator_motifs <- function (indata, outdir, motif_col, regulator_col, var_col, regmotifs, log_indent = "", notfound = "error", log = NULL) {
|
|
147
148
|
if (is.null(motif_col)) {
|
|
148
149
|
if (is.null(regmotifs)) {
|
|
149
150
|
stop("Regulator-motif mapping file (envs.regmotifs) is required when no motif column (envs.motif_col) is provided")
|
|
@@ -198,7 +199,7 @@ ensure_regulator_motifs <- function (indata, outdir, motif_col, regulator_col, r
|
|
|
198
199
|
regulator_col <<- rm_reg_col
|
|
199
200
|
}
|
|
200
201
|
} else {
|
|
201
|
-
indata <- indata[!duplicated(indata[, c(regulator_col, motif_col), drop = FALSE]), , drop = FALSE]
|
|
202
|
+
indata <- indata[!duplicated(indata[, c(regulator_col, motif_col, var_col), drop = FALSE]), , drop = FALSE]
|
|
202
203
|
}
|
|
203
204
|
|
|
204
205
|
return(indata)
|
|
@@ -7,6 +7,10 @@ import scanpy
|
|
|
7
7
|
import liana
|
|
8
8
|
import liana.method.sc._liana_pipe as _liana_pipe
|
|
9
9
|
|
|
10
|
+
# AttributeError: module 'numpy' has no attribute 'product'
|
|
11
|
+
if not hasattr(np, "product"):
|
|
12
|
+
np.product = np.prod
|
|
13
|
+
|
|
10
14
|
# monkey-patch liana.method.sc._liana_pipe._trimean due to the updates by scipy 1.14
|
|
11
15
|
# https://github.com/scipy/scipy/commit/a660202652deead0f3b4b688eb9fdcdf9f74066c
|
|
12
16
|
def _trimean(a, axis=0):
|
|
@@ -27,7 +27,7 @@ defaults <- list(
|
|
|
27
27
|
devpars = list(res = 100)
|
|
28
28
|
)
|
|
29
29
|
|
|
30
|
-
cases <- expand_cases(cases, defaults)
|
|
30
|
+
cases <- expand_cases(cases, defaults, default_case = "Cell-Cell Communication")
|
|
31
31
|
log <- get_logger()
|
|
32
32
|
reporter <- get_reporter()
|
|
33
33
|
|
|
@@ -35,12 +35,31 @@ do_case <- function(name) {
|
|
|
35
35
|
log$info("- Case: {name}")
|
|
36
36
|
case <- cases[[name]]
|
|
37
37
|
info <- case_info(name, outdir, is_dir = FALSE)
|
|
38
|
-
case <- extract_vars(case, "subset", "devpars", "more_formats", "descr")
|
|
38
|
+
case <- extract_vars(case, subset_ = "subset", "devpars", "more_formats", "descr")
|
|
39
39
|
|
|
40
40
|
case$data <- ccc
|
|
41
|
-
if (!is.null(
|
|
42
|
-
case$data <- ccc %>% dplyr::filter(!!parse_expr(
|
|
41
|
+
if (!is.null(subset_)) {
|
|
42
|
+
case$data <- ccc %>% dplyr::filter(!!parse_expr(subset_))
|
|
43
43
|
}
|
|
44
|
+
|
|
45
|
+
if (identical(case$plot_type, "table")) {
|
|
46
|
+
write.table(
|
|
47
|
+
case$data,
|
|
48
|
+
file = paste0(info$prefix, ".txt"),
|
|
49
|
+
sep = "\t",
|
|
50
|
+
row.names = FALSE,
|
|
51
|
+
col.names = TRUE,
|
|
52
|
+
quote = FALSE
|
|
53
|
+
)
|
|
54
|
+
report <- list(
|
|
55
|
+
kind = "table",
|
|
56
|
+
data = list(nrows = 100),
|
|
57
|
+
src = paste0(info$prefix, ".txt")
|
|
58
|
+
)
|
|
59
|
+
reporter$add2(report, hs = c(info$section, info$name))
|
|
60
|
+
return()
|
|
61
|
+
}
|
|
62
|
+
|
|
44
63
|
if (is.null(case$magnitude)) {
|
|
45
64
|
case$magnitude <- NULL
|
|
46
65
|
}
|
|
@@ -5,11 +5,15 @@ outfile <- {{out.outfile | r}}
|
|
|
5
5
|
celltypes <- {{envs.cell_types | r}}
|
|
6
6
|
newcol <- {{envs.newcol | r}}
|
|
7
7
|
merge_same_labels <- {{envs.merge | r}}
|
|
8
|
+
more_cell_types <- {{envs.more_cell_types | r}}
|
|
8
9
|
|
|
9
10
|
log <- biopipen.utils::get_logger()
|
|
10
11
|
|
|
11
12
|
if (is.null(celltypes) || length(celltypes) == 0) {
|
|
12
13
|
log$warn("No cell types are given!")
|
|
14
|
+
if (!is.null(more_cell_types) && length(more_cell_types) > 0) {
|
|
15
|
+
log$warn("`envs.celltypes` is not given, won't process `envs.more_cell_types`!")
|
|
16
|
+
}
|
|
13
17
|
|
|
14
18
|
if (merge_same_labels) {
|
|
15
19
|
log$warn("Ignoring 'envs.merge' because no cell types are given!")
|
|
@@ -25,26 +29,43 @@ if (is.null(celltypes) || length(celltypes) == 0) {
|
|
|
25
29
|
} else {
|
|
26
30
|
idents <- as.character(unique(idents))
|
|
27
31
|
}
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
32
|
+
process_celltypes <- function(ct, key = NULL) {
|
|
33
|
+
if (length(ct) < length(idents)) {
|
|
34
|
+
ct <- c(ct, idents[(length(ct) + 1):length(idents)])
|
|
35
|
+
} else if (length(ct) > length(idents)) {
|
|
36
|
+
ct <- ct[1:length(idents)]
|
|
37
|
+
if (is.null(key)) {
|
|
38
|
+
log$warn("The length of cell types is longer than the number of clusters!")
|
|
39
|
+
} else {
|
|
40
|
+
log$warn(paste0("The length of cell types for '", key, "' is longer than the number of clusters!"))
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
for (i in seq_along(ct)) {
|
|
44
|
+
if (ct[i] == "-" || ct[i] == "") {
|
|
45
|
+
ct[i] <- idents[i]
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
names(ct) <- idents
|
|
49
|
+
return(ct)
|
|
34
50
|
}
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
51
|
+
|
|
52
|
+
if (!is.null(more_cell_types) && length(more_cell_types) > 0) {
|
|
53
|
+
for (key in names(more_cell_types)) {
|
|
54
|
+
ct <- more_cell_types[[key]]
|
|
55
|
+
ct <- process_celltypes(ct, key)
|
|
56
|
+
log$info(paste0("Adding additional cell type annotation: '", key, "' ..."))
|
|
57
|
+
sobj@meta.data[[key]] <- ct[as.character(Idents(sobj))]
|
|
38
58
|
}
|
|
39
59
|
}
|
|
40
|
-
|
|
60
|
+
|
|
61
|
+
celltypes <- process_celltypes(celltypes)
|
|
41
62
|
|
|
42
63
|
log$info("Renaming cell types ...")
|
|
43
64
|
if (is.null(newcol)) {
|
|
44
65
|
has_na <- "NA" %in% unlist(celltypes) || anyNA(unlist(celltypes))
|
|
45
66
|
sobj$seurat_clusters_id <- Idents(sobj)
|
|
46
67
|
celltypes$object <- sobj
|
|
47
|
-
sobj <- do_call(RenameIdents, celltypes)
|
|
68
|
+
sobj <- biopipen.utils::do_call(RenameIdents, celltypes)
|
|
48
69
|
sobj$seurat_clusters <- Idents(sobj)
|
|
49
70
|
if (has_na) {
|
|
50
71
|
log$info("Filtering clusters if NA ...")
|
|
@@ -65,5 +86,6 @@ if (is.null(celltypes) || length(celltypes) == 0) {
|
|
|
65
86
|
sobj <- merge_clusters_with_same_labels(sobj, newcol)
|
|
66
87
|
}
|
|
67
88
|
|
|
89
|
+
log$info("Saving Seurat object ...")
|
|
68
90
|
biopipen.utils::save_obj(sobj, outfile)
|
|
69
91
|
}
|
|
@@ -353,16 +353,24 @@ process_markers <- function(markers, info, case) {
|
|
|
353
353
|
for (db in case$dbs) {
|
|
354
354
|
plots <- list()
|
|
355
355
|
for (plotname in names(case$enrich_plots)) {
|
|
356
|
-
plotargs <- case$enrich_plots[[plotname]]
|
|
356
|
+
plotargs <- extract_vars(case$enrich_plots[[plotname]], "descr", allow_nonexisting = TRUE)
|
|
357
357
|
plotargs$data <- enrich[enrich$Database == db, , drop = FALSE]
|
|
358
358
|
|
|
359
359
|
p <- do_call(VizEnrichment, plotargs)
|
|
360
360
|
|
|
361
361
|
if (plotargs$plot_type == "bar") {
|
|
362
362
|
attr(p, "height") <- attr(p, "height") / 1.5
|
|
363
|
+
descr <- descr %||% glue::glue(
|
|
364
|
+
"The bar plot shows the top enriched terms in database '{db}', ",
|
|
365
|
+
"the x-axis shows the -log10 of the adjusted p-values, ",
|
|
366
|
+
"and the y-axis shows the term names. The number next to each bar indicates the overlap gene count."
|
|
367
|
+
)
|
|
363
368
|
}
|
|
364
369
|
outprefix <- file.path(info$prefix, paste0("enrich.", slugify(db), ".", slugify(plotname)))
|
|
365
370
|
save_plot(p, outprefix, plotargs$devpars, formats = "png")
|
|
371
|
+
if (!is.null(descr)) {
|
|
372
|
+
plots[[length(plots) + 1]] <- list(kind = "descr", content = glue::glue(descr))
|
|
373
|
+
}
|
|
366
374
|
plots[[length(plots) + 1]] <- reporter$image(outprefix, c(), FALSE)
|
|
367
375
|
}
|
|
368
376
|
reporter$add2(
|
|
@@ -1,11 +1,13 @@
|
|
|
1
|
-
library(
|
|
1
|
+
library(rlang)
|
|
2
2
|
library(dplyr)
|
|
3
|
+
library(Seurat)
|
|
3
4
|
library(biopipen.utils)
|
|
4
5
|
|
|
5
6
|
sobjfile <- {{in.srtobj | r}}
|
|
6
7
|
outfile <- {{out.rdsfile | r}}
|
|
7
8
|
defaults <- {{envs.defaults | r}}
|
|
8
9
|
modules <- {{envs.modules | r}}
|
|
10
|
+
post_mutaters <- {{envs.post_mutaters | r}}
|
|
9
11
|
|
|
10
12
|
log <- get_logger()
|
|
11
13
|
|
|
@@ -134,6 +136,12 @@ for (key in names(modules)) {
|
|
|
134
136
|
}
|
|
135
137
|
}
|
|
136
138
|
|
|
139
|
+
if (!is.null(post_mutaters) && length(post_mutaters) > 0) {
|
|
140
|
+
log$info("Applying post mutaters ...")
|
|
141
|
+
sobj@meta.data <- sobj@meta.data %>%
|
|
142
|
+
mutate(!!!lapply(post_mutaters, parse_expr))
|
|
143
|
+
}
|
|
144
|
+
|
|
137
145
|
# save seurat object
|
|
138
146
|
log$info("Saving Seurat object ...")
|
|
139
147
|
save_obj(sobj, outfile)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: biopipen
|
|
3
|
-
Version: 0.34.
|
|
3
|
+
Version: 0.34.10
|
|
4
4
|
Summary: Bioinformatics processes/pipelines that can be run from `pipen run`
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: pwwang
|
|
@@ -13,6 +13,7 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.11
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.12
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
16
17
|
Provides-Extra: runinfo
|
|
17
18
|
Requires-Dist: datar[pandas] (>=0.15.8,<0.16.0)
|
|
18
19
|
Requires-Dist: pipen-board[report] (>=0.17,<0.18)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
biopipen/__init__.py,sha256=
|
|
1
|
+
biopipen/__init__.py,sha256=u7r_X5RPL4hal_HhVAZpDtDz0JbAd6bKAwNi0BAOm5c,24
|
|
2
2
|
biopipen/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
biopipen/core/config.py,sha256=edK5xnDhM8j27srDzsxubi934NMrglLoKrdcC8qsEPk,1069
|
|
4
4
|
biopipen/core/config.toml,sha256=lZV_vbYWk6uqm19ZWJcsZCcSNqAdIfN2fOfamzxZpg4,2148
|
|
@@ -20,9 +20,9 @@ biopipen/ns/gsea.py,sha256=eMGj6lljdMds2Pzs3Mcab0lQPU4vtgRTKMhAsKXpxYo,9742
|
|
|
20
20
|
biopipen/ns/misc.py,sha256=0jDPvpRL3EUIf2ipTjKqLTZgnallLWEjSxzTpS-geTQ,4355
|
|
21
21
|
biopipen/ns/plot.py,sha256=N41_izb6zi-XArUly5WhLebapNXbTNSgGlOCCwtrDlY,18282
|
|
22
22
|
biopipen/ns/protein.py,sha256=YJtlKoHI2p5yHdxKeQnNtm5QrbxDGOq1UXOdt_7tlTs,6391
|
|
23
|
-
biopipen/ns/regulatory.py,sha256=
|
|
23
|
+
biopipen/ns/regulatory.py,sha256=WlnX_R8jEFyxCjk8mru5Qu5iCQJLzjMWiWGoc3gygzc,16221
|
|
24
24
|
biopipen/ns/rnaseq.py,sha256=bKAa6friFWof4yDTWZQahm1MS-lrdetO1GqDKdfxXYc,7708
|
|
25
|
-
biopipen/ns/scrna.py,sha256=
|
|
25
|
+
biopipen/ns/scrna.py,sha256=ts8yust_YJBb-95Wl59KbEfStML3Eb4fmZInpfatXGc,146298
|
|
26
26
|
biopipen/ns/scrna_metabolic_landscape.py,sha256=EwLMrsj_pTqvyAgtHLoishjQxCg_j8n5OofuTofUph0,22096
|
|
27
27
|
biopipen/ns/snp.py,sha256=iXWrw7Lmhf4_ct57HGT7JGTClCXUD4sZ2FzOgsC2pTg,28123
|
|
28
28
|
biopipen/ns/stats.py,sha256=DlPyK5Vsg6ZEkV9SDS3aAw21eXzvOHgqeZDkXPhg7go,20509
|
|
@@ -128,22 +128,22 @@ biopipen/scripts/protein/PDB2Fasta.py,sha256=HVsoRRpieobuPwemCz30_N0rJ7T4aGFTQKZ
|
|
|
128
128
|
biopipen/scripts/protein/Prodigy.py,sha256=elA62U7WJ89TGEKobvjjd3Refjzr61S69PiVO0qF6DE,4493
|
|
129
129
|
biopipen/scripts/protein/ProdigySummary.R,sha256=qP30GYFpmxCvcfT2IVbJImGMgOdreKi-m1nyUqH6480,3799
|
|
130
130
|
biopipen/scripts/protein/RMSD.py,sha256=zE0g9QKWqqpC8lhGoQIF54VqDw37FaOUkvk0vtYf4-c,6250
|
|
131
|
-
biopipen/scripts/regulatory/MotifAffinityTest.R,sha256=
|
|
132
|
-
biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R,sha256=
|
|
133
|
-
biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R,sha256=
|
|
131
|
+
biopipen/scripts/regulatory/MotifAffinityTest.R,sha256=7kQFV9ExawMkCfLJ-mIsnxbXazL57D1-hVBWcHEPrus,3466
|
|
132
|
+
biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R,sha256=pFO6SVo_h1lRdNhq-GOX5jD8jF9PlXz0_XnRDpy9RXg,4670
|
|
133
|
+
biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R,sha256=Z-OcUuLPScX5kZvozNGhtawdkbF33ckiuBsSrXRAApk,3853
|
|
134
134
|
biopipen/scripts/regulatory/MotifScan.py,sha256=mxhRWp6NBGEMpWJOpwqIvzkKlrgnRvJApyCU91svh8E,5399
|
|
135
|
-
biopipen/scripts/regulatory/VariantMotifPlot.R,sha256
|
|
136
|
-
biopipen/scripts/regulatory/motifs-common.R,sha256=
|
|
135
|
+
biopipen/scripts/regulatory/VariantMotifPlot.R,sha256=cHngquU7zVCUhh8zGi40k1o7oeWLfuF78Ycljo_Ql88,2849
|
|
136
|
+
biopipen/scripts/regulatory/motifs-common.R,sha256=ES2UaFE68yULd4mfw7-T0zUcXQtb_uI6IDS-hQsVSvQ,13369
|
|
137
137
|
biopipen/scripts/rnaseq/Simulation-ESCO.R,sha256=cdADB5dpkI5hvzDPw5PyrhOyRFU4PMLgSsa84YOZALc,6424
|
|
138
138
|
biopipen/scripts/rnaseq/Simulation-RUVcorr.R,sha256=oZJHHEMdH7SBIkhCrgkpNYroBkF0dtr20U3ugY9I9hM,1202
|
|
139
139
|
biopipen/scripts/rnaseq/Simulation.R,sha256=LvIjL_onCA8GJR5TPiREUkN_NlMz_ngcw6PezWKc2x0,809
|
|
140
140
|
biopipen/scripts/rnaseq/UnitConversion.R,sha256=xuoj9AdFiCKNztpCmzwCz9VxmUAE-FslZ_LgjOm7dhM,11360
|
|
141
141
|
biopipen/scripts/scrna/AnnData2Seurat.R,sha256=wc5PDbK9TkuJtoXXxF4W1ODylWhyfKWd3vV_AdOcTjM,1118
|
|
142
142
|
biopipen/scripts/scrna/CCPlotR-patch.R,sha256=KpB8fwacBaWaUNjIidcLUkMShLjS4Gq9UY8LUgIITB0,8369
|
|
143
|
-
biopipen/scripts/scrna/CellCellCommunication.py,sha256=
|
|
144
|
-
biopipen/scripts/scrna/CellCellCommunicationPlots.R,sha256=
|
|
143
|
+
biopipen/scripts/scrna/CellCellCommunication.py,sha256=LnEuV8YHOJSYM7Tb_jwLbTQdMSpJw5ChRIiLktcJzSQ,4471
|
|
144
|
+
biopipen/scripts/scrna/CellCellCommunicationPlots.R,sha256=IcqqhVWasSE54PDWaw85u5_yup_YHVNNwZI7oOy9250,2456
|
|
145
145
|
biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R,sha256=CwYR8WWQMf8r7V2CTalG4kxdKnYMtyhpJBe9zP2sQWA,6964
|
|
146
|
-
biopipen/scripts/scrna/CellTypeAnnotation-direct.R,sha256=
|
|
146
|
+
biopipen/scripts/scrna/CellTypeAnnotation-direct.R,sha256=jwjSBql66ku11b4O_7bIs9zuwbqHiGgrAFDk1tSbwg4,3111
|
|
147
147
|
biopipen/scripts/scrna/CellTypeAnnotation-hitype.R,sha256=vvjhxin4aoA9heecey0dpr6ofirybygY3ApjgtQW89Y,2094
|
|
148
148
|
biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R,sha256=xxB4K1MzBSNQnDxa44s5ExeU67MbncOBf8lGFr7RvwQ,1870
|
|
149
149
|
biopipen/scripts/scrna/CellTypeAnnotation-sctype.R,sha256=1BZ8tOJsB7lRtrYXtImxly-he4gfDTfGqbwK35yJjYw,4604
|
|
@@ -155,9 +155,9 @@ biopipen/scripts/scrna/ExprImputation-rmagic.R,sha256=ePgbMZ_3bKbeUrjsMdkdtBM_MS
|
|
|
155
155
|
biopipen/scripts/scrna/ExprImputation-scimpute.R,sha256=MI_bYfvCDKJsuGntUxfx_-NdrssBoQgL95-DGwJVE5s,1191
|
|
156
156
|
biopipen/scripts/scrna/ExprImputation.R,sha256=GcdZJpkDpq88hRQjtLZY5-byp8V43stEFm5T-pQbU6A,319
|
|
157
157
|
biopipen/scripts/scrna/LoomTo10X.R,sha256=c6F0p1udsL5UOlb84-53K5BsjSDWkdFyYTt5NQmlIec,1059
|
|
158
|
-
biopipen/scripts/scrna/MarkersFinder.R,sha256
|
|
158
|
+
biopipen/scripts/scrna/MarkersFinder.R,sha256=qBVdxO8cKTJMtGyJLl2QGRrtdiXOJSLXu6rpZUPkDZk,25437
|
|
159
159
|
biopipen/scripts/scrna/MetaMarkers.R,sha256=BgYaWYEj6obwqaZaDWqNPtxb1IEEAnXAeBE0Ji9PvBA,12426
|
|
160
|
-
biopipen/scripts/scrna/ModuleScoreCalculator.R,sha256
|
|
160
|
+
biopipen/scripts/scrna/ModuleScoreCalculator.R,sha256=_mvo35a-wk5miUb_kMIVwvKK0b6InRa1NKtN8zznGwk,4457
|
|
161
161
|
biopipen/scripts/scrna/PseudoBulkDEG.R,sha256=IuM4hl-KHZ5aaaTqZeylw4b1ZenMZaY4qobD5qxAlHs,25199
|
|
162
162
|
biopipen/scripts/scrna/RadarPlots.R,sha256=Kn1E-hpczuujpgNjR8MqeIIVN-S3PbpmfcKWGKcNCVY,14546
|
|
163
163
|
biopipen/scripts/scrna/SCImpute.R,sha256=dSJOHhmJ3x_72LBRXT72dbCti5oiB85CJ-OjWtqONbk,2958
|
|
@@ -284,7 +284,7 @@ biopipen/utils/misc.py,sha256=pDZ-INWVNqHuXYvcjmu8KqNAigkh2lsHy0BxX44CPvc,4048
|
|
|
284
284
|
biopipen/utils/reference.py,sha256=Oc6IlA1giLxymAuI7DO-IQLHQ7-DbsWzOQE86oTDfMU,5955
|
|
285
285
|
biopipen/utils/reporter.py,sha256=VwLl6xyVDWnGY7NEXyqBlkW8expKJoNQ5iTyZSELf5c,4922
|
|
286
286
|
biopipen/utils/vcf.py,sha256=MmMbAtLUcKPp02jUdk9TzuET2gWSeoWn7xgoOXFysK0,9393
|
|
287
|
-
biopipen-0.34.
|
|
288
|
-
biopipen-0.34.
|
|
289
|
-
biopipen-0.34.
|
|
290
|
-
biopipen-0.34.
|
|
287
|
+
biopipen-0.34.10.dist-info/METADATA,sha256=IeN3Cn85Wko4l4ANyDPAxcKF1P5eJQ1SRyBxTfPg6b8,1027
|
|
288
|
+
biopipen-0.34.10.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
|
|
289
|
+
biopipen-0.34.10.dist-info/entry_points.txt,sha256=BYqHGBQJxyFDNLYqgH64ycI5PYwnlqwYcCFsMvJgzAU,653
|
|
290
|
+
biopipen-0.34.10.dist-info/RECORD,,
|
|
File without changes
|