biopipen 0.29.1__py3-none-any.whl → 0.30.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/config.toml +2 -0
- biopipen/core/filters.py +21 -0
- biopipen/ns/plot.py +55 -0
- biopipen/ns/scrna.py +49 -13
- biopipen/ns/web.py +87 -5
- biopipen/scripts/bam/CNAClinic.R +2 -1
- biopipen/scripts/cellranger/CellRangerCount.py +3 -3
- biopipen/scripts/cellranger/CellRangerSummary.R +2 -1
- biopipen/scripts/cnv/AneuploidyScore.R +1 -1
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +2 -2
- biopipen/scripts/delim/RowsBinder.R +1 -1
- biopipen/scripts/delim/SampleInfo.R +3 -2
- biopipen/scripts/gene/GeneNameConversion.R +2 -2
- biopipen/scripts/gsea/Enrichr.R +3 -3
- biopipen/scripts/gsea/FGSEA.R +2 -2
- biopipen/scripts/gsea/GSEA.R +2 -2
- biopipen/scripts/gsea/PreRank.R +2 -2
- biopipen/scripts/plot/Heatmap.R +3 -3
- biopipen/scripts/plot/Manhattan.R +2 -1
- biopipen/scripts/plot/QQPlot.R +1 -1
- biopipen/scripts/plot/ROC.R +1 -1
- biopipen/scripts/plot/Scatter.R +112 -0
- biopipen/scripts/plot/VennDiagram.R +3 -3
- biopipen/scripts/regulatory/MotifAffinityTest.R +3 -7
- biopipen/scripts/rnaseq/Simulation.R +1 -1
- biopipen/scripts/rnaseq/UnitConversion.R +2 -1
- biopipen/scripts/scrna/AnnData2Seurat.R +1 -1
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +24 -8
- biopipen/scripts/scrna/CellTypeAnnotation-common.R +10 -0
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +9 -1
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -8
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +15 -2
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +38 -15
- biopipen/scripts/scrna/CellTypeAnnotation.R +3 -0
- biopipen/scripts/scrna/CellsDistribution.R +3 -2
- biopipen/scripts/scrna/DimPlots.R +1 -1
- biopipen/scripts/scrna/ExprImputation-alra.R +1 -1
- biopipen/scripts/scrna/MarkersFinder.R +5 -4
- biopipen/scripts/scrna/MetaMarkers.R +22 -4
- biopipen/scripts/scrna/ModuleScoreCalculator.R +2 -1
- biopipen/scripts/scrna/RadarPlots.R +1 -1
- biopipen/scripts/scrna/ScFGSEA.R +4 -3
- biopipen/scripts/scrna/Seurat2AnnData.R +1 -1
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +73 -0
- biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +4 -3
- biopipen/scripts/scrna/SeuratClusterStats-features.R +8 -5
- biopipen/scripts/scrna/SeuratClusterStats-hists.R +6 -5
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +4 -3
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -3
- biopipen/scripts/scrna/SeuratClusterStats.R +24 -8
- biopipen/scripts/scrna/SeuratClustering-common.R +213 -0
- biopipen/scripts/scrna/SeuratClustering.R +10 -170
- biopipen/scripts/scrna/SeuratMap2Ref.R +65 -31
- biopipen/scripts/scrna/SeuratMetadataMutater.R +2 -2
- biopipen/scripts/scrna/SeuratPreparing-common.R +452 -0
- biopipen/scripts/scrna/SeuratPreparing-doublet_detection.R +201 -0
- biopipen/scripts/scrna/SeuratPreparing.R +22 -562
- biopipen/scripts/scrna/SeuratSubClustering.R +24 -39
- biopipen/scripts/scrna/TopExpressingGenes.R +1 -1
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +7 -4
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +7 -3
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +3 -3
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +3 -3
- biopipen/scripts/snp/MatrixEQTL.R +1 -1
- biopipen/scripts/snp/PlinkCallRate.R +2 -2
- biopipen/scripts/snp/PlinkFreq.R +2 -2
- biopipen/scripts/snp/PlinkHWE.R +2 -2
- biopipen/scripts/snp/PlinkHet.R +2 -2
- biopipen/scripts/snp/PlinkIBD.R +2 -2
- biopipen/scripts/stats/ChowTest.R +1 -1
- biopipen/scripts/stats/DiffCoexpr.R +1 -1
- biopipen/scripts/stats/LiquidAssoc.R +1 -1
- biopipen/scripts/stats/Mediation.R +26 -12
- biopipen/scripts/stats/MetaPvalue.R +4 -1
- biopipen/scripts/stats/MetaPvalue1.R +4 -1
- biopipen/scripts/tcr/Attach2Seurat.R +1 -1
- biopipen/scripts/tcr/CDR3AAPhyschem.R +1 -1
- biopipen/scripts/tcr/CloneResidency.R +2 -2
- biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
- biopipen/scripts/tcr/Immunarch-basic.R +0 -4
- biopipen/scripts/tcr/Immunarch-clonality.R +0 -4
- biopipen/scripts/tcr/Immunarch-diversity.R +2 -24
- biopipen/scripts/tcr/Immunarch-geneusage.R +0 -2
- biopipen/scripts/tcr/Immunarch-kmer.R +0 -2
- biopipen/scripts/tcr/Immunarch-overlap.R +0 -2
- biopipen/scripts/tcr/Immunarch-spectratyping.R +0 -2
- biopipen/scripts/tcr/Immunarch-tracking.R +0 -2
- biopipen/scripts/tcr/Immunarch-vjjunc.R +0 -2
- biopipen/scripts/tcr/Immunarch.R +43 -11
- biopipen/scripts/tcr/ImmunarchFilter.R +1 -1
- biopipen/scripts/tcr/ImmunarchLoading.R +2 -2
- biopipen/scripts/tcr/SampleDiversity.R +1 -1
- biopipen/scripts/tcr/TCRClusterStats.R +2 -2
- biopipen/scripts/tcr/TCRClustering.R +2 -2
- biopipen/scripts/tcr/TESSA.R +2 -2
- biopipen/scripts/vcf/TruvariBenchSummary.R +2 -2
- biopipen/scripts/vcf/TruvariConsistency.R +1 -1
- biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
- biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
- biopipen/scripts/web/gcloud_common.py +49 -0
- {biopipen-0.29.1.dist-info → biopipen-0.30.0.dist-info}/METADATA +1 -1
- {biopipen-0.29.1.dist-info → biopipen-0.30.0.dist-info}/RECORD +105 -96
- {biopipen-0.29.1.dist-info → biopipen-0.30.0.dist-info}/WHEEL +0 -0
- {biopipen-0.29.1.dist-info → biopipen-0.30.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
.get_envs_cached_doubletfinder <- function() {
|
|
2
|
+
envs_cache <- envs
|
|
3
|
+
envs_cache$ncores <- NULL
|
|
4
|
+
envs_cache$doublet_detector <- NULL
|
|
5
|
+
envs_cache$scDblFinder <- NULL
|
|
6
|
+
envs_cache$DoubletFinder$ncores <- NULL
|
|
7
|
+
envs_cache
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
.get_envs_cached_scdblfinder <- function() {
|
|
11
|
+
envs_cache <- envs
|
|
12
|
+
envs_cache$ncores <- NULL
|
|
13
|
+
envs_cache$doublet_detector <- NULL
|
|
14
|
+
envs_cache$DoubletFinder <- NULL
|
|
15
|
+
envs_cache$scDblFinder$ncores <- NULL
|
|
16
|
+
envs_cache
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
.run_doubletfinder <- function() {
|
|
20
|
+
library(DoubletFinder)
|
|
21
|
+
log_info("- Preparing Seurat object ...")
|
|
22
|
+
|
|
23
|
+
if (is.null(envs$DoubletFinder$ncores)) {
|
|
24
|
+
envs$DoubletFinder$ncores <- envs$ncores
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
# More controls from envs?
|
|
28
|
+
sobj <- FindNeighbors(sobj, dims = 1:envs$DoubletFinder$PCs)
|
|
29
|
+
sobj <- FindClusters(sobj)
|
|
30
|
+
|
|
31
|
+
log_info("- pK Indentification ...")
|
|
32
|
+
sweep.res.list <- paramSweep(
|
|
33
|
+
sobj,
|
|
34
|
+
PCs = 1:envs$DoubletFinder$PCs,
|
|
35
|
+
sct = envs$use_sct,
|
|
36
|
+
num.cores = envs$DoubletFinder$ncores
|
|
37
|
+
)
|
|
38
|
+
sweep.stats <- summarizeSweep(sweep.res.list, GT = FALSE)
|
|
39
|
+
bcmvn <- find.pK(sweep.stats)
|
|
40
|
+
bcmvn$Selected <- bcmvn$pK == bcmvn$pK[which.max(bcmvn$BCmetric)[1]]
|
|
41
|
+
|
|
42
|
+
pK <- bcmvn$pK[which.max(bcmvn$BCmetric)[1]]
|
|
43
|
+
pK <- as.numeric(as.character(pK))
|
|
44
|
+
pN <- envs$DoubletFinder$pN
|
|
45
|
+
log_info("- Homotypic Doublet Proportion Estimate ...")
|
|
46
|
+
homotypic.prop <- modelHomotypic(Idents(sobj))
|
|
47
|
+
nExp_poi <- round(nrow(sobj@meta.data) * envs$DoubletFinder$doublets)
|
|
48
|
+
nExp_poi.adj <- round(nExp_poi * (1 - homotypic.prop))
|
|
49
|
+
|
|
50
|
+
log_info("- Running DoubletFinder ...")
|
|
51
|
+
sobj <- doubletFinder(
|
|
52
|
+
sobj,
|
|
53
|
+
PCs = 1:envs$DoubletFinder$PCs,
|
|
54
|
+
pN = pN,
|
|
55
|
+
pK = pK,
|
|
56
|
+
nExp = nExp_poi.adj,
|
|
57
|
+
reuse.pANN = FALSE,
|
|
58
|
+
sct = envs$use_sct
|
|
59
|
+
)
|
|
60
|
+
pANN_col <- paste0("pANN_", pN, "_", pK)
|
|
61
|
+
pANN_col <- colnames(sobj@meta.data)[grepl(pANN_col, colnames(sobj@meta.data))]
|
|
62
|
+
DF_col <- paste0("DF.classifications_", pN, "_", pK)
|
|
63
|
+
DF_col <- colnames(sobj@meta.data)[grepl(DF_col, colnames(sobj@meta.data))]
|
|
64
|
+
doublets <- sobj@meta.data[, c(pANN_col, DF_col), drop = FALSE]
|
|
65
|
+
colnames(doublets) <- c("DoubletFinder_score","DoubletFinder_DropletType")
|
|
66
|
+
doublets$DoubletFinder_DropletType <- tolower(doublets$DoubletFinder_DropletType)
|
|
67
|
+
|
|
68
|
+
pk_plot <- ggplot(bcmvn, aes(x = pK, y = BCmetric, color = Selected)) +
|
|
69
|
+
geom_point() +
|
|
70
|
+
# rotate x axis labels
|
|
71
|
+
theme(axis.text.x = element_text(angle = 90, hjust = 1))
|
|
72
|
+
list(doublets = doublets, pk_plot = pk_plot)
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
.run_scdblfinder <- function() {
|
|
76
|
+
library(scDblFinder)
|
|
77
|
+
if (is.null(envs$scDblFinder$ncores)) {
|
|
78
|
+
envs$scDblFinder$ncores <- envs$ncores
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
envs$scDblFinder$sce <- GetAssayData(sobj, layer = "counts")
|
|
82
|
+
if (envs$scDblFinder$ncores > 1) {
|
|
83
|
+
envs$scDblFinder$BPPARAM <- BiocParallel::MulticoreParam(envs$scDblFinder$ncores, RNGseed = 8525)
|
|
84
|
+
}
|
|
85
|
+
envs$scDblFinder$returnType <- "table"
|
|
86
|
+
envs$scDblFinder$ncores <- NULL
|
|
87
|
+
|
|
88
|
+
doublets <- do_call(scDblFinder, envs$scDblFinder)
|
|
89
|
+
doublets <- doublets[doublets$type == "real", , drop = FALSE]
|
|
90
|
+
doublets <- doublets[, c("score", "class"), drop = FALSE]
|
|
91
|
+
colnames(doublets) <- c("scDblFinder_score", "scDblFinder_DropletType")
|
|
92
|
+
|
|
93
|
+
list(doublets = doublets)
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
run_dd <- function(detector) {
|
|
97
|
+
log_info("Running {detector} ...")
|
|
98
|
+
if (detector == "DoubletFinder") {
|
|
99
|
+
envs_cache_fun <- .get_envs_cached_doubletfinder
|
|
100
|
+
run_fun <- .run_doubletfinder
|
|
101
|
+
} else if (detector == "scDblFinder") {
|
|
102
|
+
envs_cache_fun <- .get_envs_cached_scdblfinder
|
|
103
|
+
run_fun <- .run_scdblfinder
|
|
104
|
+
} else {
|
|
105
|
+
stop("Unknown doublet detector: ", detector)
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
cached <- get_cached(envs_cache_fun(), detector, cache_dir)
|
|
109
|
+
if (!is.null(cached$data)) {
|
|
110
|
+
log_info("- Loading cached results ...")
|
|
111
|
+
results <- cached$data
|
|
112
|
+
} else {
|
|
113
|
+
results <- run_fun()
|
|
114
|
+
|
|
115
|
+
cached$data <- results
|
|
116
|
+
save_to_cache(cached, detector, cache_dir)
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
results
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
save_dd <- function(dd, detector) {
|
|
123
|
+
doublets <- dd$doublets
|
|
124
|
+
write.table(
|
|
125
|
+
doublets,
|
|
126
|
+
file.path(joboutdir, paste0(detector, "_doublets_singlets.txt")),
|
|
127
|
+
row.names = FALSE,
|
|
128
|
+
quote = FALSE,
|
|
129
|
+
sep = "\t"
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
summary <- as.data.frame(table(dd$doublets[[paste0(detector, "_DropletType")]]))
|
|
133
|
+
colnames(summary) <- c("Classification", "Droplet_N")
|
|
134
|
+
write.table(
|
|
135
|
+
summary,
|
|
136
|
+
file.path(joboutdir, paste0(detector, "_summary.txt")),
|
|
137
|
+
row.names = FALSE,
|
|
138
|
+
quote = FALSE,
|
|
139
|
+
sep = "\t"
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
n_doublet <- summary$Droplet_N[summary$Classification == 'doublet']
|
|
143
|
+
log_info("- {n_doublet}/{sum(summary$Droplet_N)} doublets detected.")
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
add_dd_to_seurat <- function(sobj, dd) {
|
|
147
|
+
AddMetaData(sobj, metadata = as.data.frame(dd$doublets))
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
plot_dd <- function(sobj, dd, detector) {
|
|
151
|
+
if (detector == "DoubletFinder") {
|
|
152
|
+
log_debug("- Plotting pK vs BCmetric ...")
|
|
153
|
+
ggsave(dd$pk_plot, filename = file.path(plotsdir, "DoubletFinder_pK_BCmetric.png"))
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
log_info("- Plotting dimension reduction ...")
|
|
157
|
+
dimp <- DimPlot(
|
|
158
|
+
sobj, group.by = paste0(detector, "_DropletType"), order = "doublet",
|
|
159
|
+
cols = c("#333333", "#FF3333"), pt.size = 0.8, alpha = 0.5)
|
|
160
|
+
ggsave(dimp, filename = file.path(plotsdir, paste0(detector, "_dimplot.png")))
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
filter_dd <- function(sobj, dd, detector) {
|
|
164
|
+
subset(sobj,
|
|
165
|
+
cells = rownames(dd$doublets[
|
|
166
|
+
dd$doublets[[paste0(detector, "_DropletType")]] == "singlet", ,
|
|
167
|
+
drop = FALSE
|
|
168
|
+
]))
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
report_dd <- function(detector) {
|
|
172
|
+
add_report(
|
|
173
|
+
list(
|
|
174
|
+
kind = "descr",
|
|
175
|
+
content = "The table contains the number of cells classified as singlets and doublets."
|
|
176
|
+
),
|
|
177
|
+
list(
|
|
178
|
+
kind = "table",
|
|
179
|
+
data = list(path = file.path(joboutdir, paste0(detector, "_summary.txt")))
|
|
180
|
+
),
|
|
181
|
+
h1 = paste0(detector, " Results"),
|
|
182
|
+
h2 = paste0("The ", detector, " Summary")
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
if (detector == "DoubletFinder") {
|
|
186
|
+
add_report(
|
|
187
|
+
list(name = "pK vs BCmetric", src = file.path(plotsdir, "pK_BCmetric.png")),
|
|
188
|
+
list(name = "Dimension Reduction Plot", src = file.path(plotsdir, "DoubletFinder_dimplot.png")),
|
|
189
|
+
ui = "table_of_images",
|
|
190
|
+
h1 = "DoubletFinder Results",
|
|
191
|
+
h2 = "Plots"
|
|
192
|
+
)
|
|
193
|
+
} else {
|
|
194
|
+
add_report(
|
|
195
|
+
list(name = "Dimension Reduction Plot",src = file.path(plotsdir, "scDblFinder_dimplot.png")),
|
|
196
|
+
ui = "table_of_images",
|
|
197
|
+
h1 = "scDblFinder Results",
|
|
198
|
+
h2 = "Plots"
|
|
199
|
+
)
|
|
200
|
+
}
|
|
201
|
+
}
|