biopipen 0.29.2__py3-none-any.whl → 0.30.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (105) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +2 -0
  3. biopipen/core/filters.py +21 -0
  4. biopipen/ns/plot.py +55 -0
  5. biopipen/ns/scrna.py +49 -13
  6. biopipen/ns/web.py +87 -5
  7. biopipen/scripts/bam/CNAClinic.R +2 -1
  8. biopipen/scripts/cellranger/CellRangerCount.py +3 -3
  9. biopipen/scripts/cellranger/CellRangerSummary.R +2 -1
  10. biopipen/scripts/cnv/AneuploidyScore.R +1 -1
  11. biopipen/scripts/cnv/AneuploidyScoreSummary.R +2 -2
  12. biopipen/scripts/delim/RowsBinder.R +1 -1
  13. biopipen/scripts/delim/SampleInfo.R +3 -2
  14. biopipen/scripts/gene/GeneNameConversion.R +2 -2
  15. biopipen/scripts/gsea/Enrichr.R +3 -3
  16. biopipen/scripts/gsea/FGSEA.R +2 -2
  17. biopipen/scripts/gsea/GSEA.R +2 -2
  18. biopipen/scripts/gsea/PreRank.R +2 -2
  19. biopipen/scripts/plot/Heatmap.R +3 -3
  20. biopipen/scripts/plot/Manhattan.R +2 -1
  21. biopipen/scripts/plot/QQPlot.R +1 -1
  22. biopipen/scripts/plot/ROC.R +1 -1
  23. biopipen/scripts/plot/Scatter.R +112 -0
  24. biopipen/scripts/plot/VennDiagram.R +3 -3
  25. biopipen/scripts/regulatory/MotifAffinityTest.R +3 -7
  26. biopipen/scripts/rnaseq/Simulation.R +1 -1
  27. biopipen/scripts/rnaseq/UnitConversion.R +2 -1
  28. biopipen/scripts/scrna/AnnData2Seurat.R +1 -1
  29. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +24 -8
  30. biopipen/scripts/scrna/CellTypeAnnotation-common.R +10 -0
  31. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +9 -1
  32. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -8
  33. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +15 -2
  34. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +38 -15
  35. biopipen/scripts/scrna/CellTypeAnnotation.R +3 -0
  36. biopipen/scripts/scrna/CellsDistribution.R +3 -2
  37. biopipen/scripts/scrna/DimPlots.R +1 -1
  38. biopipen/scripts/scrna/ExprImputation-alra.R +1 -1
  39. biopipen/scripts/scrna/MarkersFinder.R +5 -5
  40. biopipen/scripts/scrna/MetaMarkers.R +4 -4
  41. biopipen/scripts/scrna/ModuleScoreCalculator.R +2 -1
  42. biopipen/scripts/scrna/RadarPlots.R +1 -1
  43. biopipen/scripts/scrna/ScFGSEA.R +4 -3
  44. biopipen/scripts/scrna/Seurat2AnnData.R +1 -1
  45. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +73 -0
  46. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +4 -3
  47. biopipen/scripts/scrna/SeuratClusterStats-features.R +6 -5
  48. biopipen/scripts/scrna/SeuratClusterStats-hists.R +6 -5
  49. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +4 -3
  50. biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -3
  51. biopipen/scripts/scrna/SeuratClusterStats.R +24 -8
  52. biopipen/scripts/scrna/SeuratClustering-common.R +213 -0
  53. biopipen/scripts/scrna/SeuratClustering.R +10 -170
  54. biopipen/scripts/scrna/SeuratMap2Ref.R +65 -31
  55. biopipen/scripts/scrna/SeuratMetadataMutater.R +2 -2
  56. biopipen/scripts/scrna/SeuratPreparing-common.R +452 -0
  57. biopipen/scripts/scrna/SeuratPreparing-doublet_detection.R +201 -0
  58. biopipen/scripts/scrna/SeuratPreparing.R +22 -562
  59. biopipen/scripts/scrna/SeuratSubClustering.R +24 -39
  60. biopipen/scripts/scrna/TopExpressingGenes.R +1 -1
  61. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +2 -2
  62. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +2 -2
  63. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +3 -3
  64. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +3 -3
  65. biopipen/scripts/snp/MatrixEQTL.R +1 -1
  66. biopipen/scripts/snp/PlinkCallRate.R +2 -2
  67. biopipen/scripts/snp/PlinkFreq.R +2 -2
  68. biopipen/scripts/snp/PlinkHWE.R +2 -2
  69. biopipen/scripts/snp/PlinkHet.R +2 -2
  70. biopipen/scripts/snp/PlinkIBD.R +2 -2
  71. biopipen/scripts/stats/ChowTest.R +1 -1
  72. biopipen/scripts/stats/DiffCoexpr.R +1 -1
  73. biopipen/scripts/stats/LiquidAssoc.R +1 -1
  74. biopipen/scripts/stats/Mediation.R +11 -9
  75. biopipen/scripts/stats/MetaPvalue.R +4 -1
  76. biopipen/scripts/stats/MetaPvalue1.R +4 -1
  77. biopipen/scripts/tcr/Attach2Seurat.R +1 -1
  78. biopipen/scripts/tcr/CDR3AAPhyschem.R +1 -1
  79. biopipen/scripts/tcr/CloneResidency.R +2 -2
  80. biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
  81. biopipen/scripts/tcr/Immunarch-basic.R +0 -4
  82. biopipen/scripts/tcr/Immunarch-clonality.R +0 -4
  83. biopipen/scripts/tcr/Immunarch-diversity.R +2 -24
  84. biopipen/scripts/tcr/Immunarch-geneusage.R +0 -2
  85. biopipen/scripts/tcr/Immunarch-kmer.R +0 -2
  86. biopipen/scripts/tcr/Immunarch-overlap.R +0 -2
  87. biopipen/scripts/tcr/Immunarch-spectratyping.R +0 -2
  88. biopipen/scripts/tcr/Immunarch-tracking.R +0 -2
  89. biopipen/scripts/tcr/Immunarch-vjjunc.R +0 -2
  90. biopipen/scripts/tcr/Immunarch.R +43 -11
  91. biopipen/scripts/tcr/ImmunarchFilter.R +1 -1
  92. biopipen/scripts/tcr/ImmunarchLoading.R +2 -2
  93. biopipen/scripts/tcr/SampleDiversity.R +1 -1
  94. biopipen/scripts/tcr/TCRClusterStats.R +2 -2
  95. biopipen/scripts/tcr/TCRClustering.R +2 -2
  96. biopipen/scripts/tcr/TESSA.R +2 -2
  97. biopipen/scripts/vcf/TruvariBenchSummary.R +2 -2
  98. biopipen/scripts/vcf/TruvariConsistency.R +1 -1
  99. biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
  100. biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
  101. biopipen/scripts/web/gcloud_common.py +49 -0
  102. {biopipen-0.29.2.dist-info → biopipen-0.30.0.dist-info}/METADATA +1 -1
  103. {biopipen-0.29.2.dist-info → biopipen-0.30.0.dist-info}/RECORD +105 -96
  104. {biopipen-0.29.2.dist-info → biopipen-0.30.0.dist-info}/WHEEL +0 -0
  105. {biopipen-0.29.2.dist-info → biopipen-0.30.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,201 @@
1
+ .get_envs_cached_doubletfinder <- function() {
2
+ envs_cache <- envs
3
+ envs_cache$ncores <- NULL
4
+ envs_cache$doublet_detector <- NULL
5
+ envs_cache$scDblFinder <- NULL
6
+ envs_cache$DoubletFinder$ncores <- NULL
7
+ envs_cache
8
+ }
9
+
10
+ .get_envs_cached_scdblfinder <- function() {
11
+ envs_cache <- envs
12
+ envs_cache$ncores <- NULL
13
+ envs_cache$doublet_detector <- NULL
14
+ envs_cache$DoubletFinder <- NULL
15
+ envs_cache$scDblFinder$ncores <- NULL
16
+ envs_cache
17
+ }
18
+
19
+ .run_doubletfinder <- function() {
20
+ library(DoubletFinder)
21
+ log_info("- Preparing Seurat object ...")
22
+
23
+ if (is.null(envs$DoubletFinder$ncores)) {
24
+ envs$DoubletFinder$ncores <- envs$ncores
25
+ }
26
+
27
+ # More controls from envs?
28
+ sobj <- FindNeighbors(sobj, dims = 1:envs$DoubletFinder$PCs)
29
+ sobj <- FindClusters(sobj)
30
+
31
+ log_info("- pK Indentification ...")
32
+ sweep.res.list <- paramSweep(
33
+ sobj,
34
+ PCs = 1:envs$DoubletFinder$PCs,
35
+ sct = envs$use_sct,
36
+ num.cores = envs$DoubletFinder$ncores
37
+ )
38
+ sweep.stats <- summarizeSweep(sweep.res.list, GT = FALSE)
39
+ bcmvn <- find.pK(sweep.stats)
40
+ bcmvn$Selected <- bcmvn$pK == bcmvn$pK[which.max(bcmvn$BCmetric)[1]]
41
+
42
+ pK <- bcmvn$pK[which.max(bcmvn$BCmetric)[1]]
43
+ pK <- as.numeric(as.character(pK))
44
+ pN <- envs$DoubletFinder$pN
45
+ log_info("- Homotypic Doublet Proportion Estimate ...")
46
+ homotypic.prop <- modelHomotypic(Idents(sobj))
47
+ nExp_poi <- round(nrow(sobj@meta.data) * envs$DoubletFinder$doublets)
48
+ nExp_poi.adj <- round(nExp_poi * (1 - homotypic.prop))
49
+
50
+ log_info("- Running DoubletFinder ...")
51
+ sobj <- doubletFinder(
52
+ sobj,
53
+ PCs = 1:envs$DoubletFinder$PCs,
54
+ pN = pN,
55
+ pK = pK,
56
+ nExp = nExp_poi.adj,
57
+ reuse.pANN = FALSE,
58
+ sct = envs$use_sct
59
+ )
60
+ pANN_col <- paste0("pANN_", pN, "_", pK)
61
+ pANN_col <- colnames(sobj@meta.data)[grepl(pANN_col, colnames(sobj@meta.data))]
62
+ DF_col <- paste0("DF.classifications_", pN, "_", pK)
63
+ DF_col <- colnames(sobj@meta.data)[grepl(DF_col, colnames(sobj@meta.data))]
64
+ doublets <- sobj@meta.data[, c(pANN_col, DF_col), drop = FALSE]
65
+ colnames(doublets) <- c("DoubletFinder_score","DoubletFinder_DropletType")
66
+ doublets$DoubletFinder_DropletType <- tolower(doublets$DoubletFinder_DropletType)
67
+
68
+ pk_plot <- ggplot(bcmvn, aes(x = pK, y = BCmetric, color = Selected)) +
69
+ geom_point() +
70
+ # rotate x axis labels
71
+ theme(axis.text.x = element_text(angle = 90, hjust = 1))
72
+ list(doublets = doublets, pk_plot = pk_plot)
73
+ }
74
+
75
+ .run_scdblfinder <- function() {
76
+ library(scDblFinder)
77
+ if (is.null(envs$scDblFinder$ncores)) {
78
+ envs$scDblFinder$ncores <- envs$ncores
79
+ }
80
+
81
+ envs$scDblFinder$sce <- GetAssayData(sobj, layer = "counts")
82
+ if (envs$scDblFinder$ncores > 1) {
83
+ envs$scDblFinder$BPPARAM <- BiocParallel::MulticoreParam(envs$scDblFinder$ncores, RNGseed = 8525)
84
+ }
85
+ envs$scDblFinder$returnType <- "table"
86
+ envs$scDblFinder$ncores <- NULL
87
+
88
+ doublets <- do_call(scDblFinder, envs$scDblFinder)
89
+ doublets <- doublets[doublets$type == "real", , drop = FALSE]
90
+ doublets <- doublets[, c("score", "class"), drop = FALSE]
91
+ colnames(doublets) <- c("scDblFinder_score", "scDblFinder_DropletType")
92
+
93
+ list(doublets = doublets)
94
+ }
95
+
96
+ run_dd <- function(detector) {
97
+ log_info("Running {detector} ...")
98
+ if (detector == "DoubletFinder") {
99
+ envs_cache_fun <- .get_envs_cached_doubletfinder
100
+ run_fun <- .run_doubletfinder
101
+ } else if (detector == "scDblFinder") {
102
+ envs_cache_fun <- .get_envs_cached_scdblfinder
103
+ run_fun <- .run_scdblfinder
104
+ } else {
105
+ stop("Unknown doublet detector: ", detector)
106
+ }
107
+
108
+ cached <- get_cached(envs_cache_fun(), detector, cache_dir)
109
+ if (!is.null(cached$data)) {
110
+ log_info("- Loading cached results ...")
111
+ results <- cached$data
112
+ } else {
113
+ results <- run_fun()
114
+
115
+ cached$data <- results
116
+ save_to_cache(cached, detector, cache_dir)
117
+ }
118
+
119
+ results
120
+ }
121
+
122
+ save_dd <- function(dd, detector) {
123
+ doublets <- dd$doublets
124
+ write.table(
125
+ doublets,
126
+ file.path(joboutdir, paste0(detector, "_doublets_singlets.txt")),
127
+ row.names = FALSE,
128
+ quote = FALSE,
129
+ sep = "\t"
130
+ )
131
+
132
+ summary <- as.data.frame(table(dd$doublets[[paste0(detector, "_DropletType")]]))
133
+ colnames(summary) <- c("Classification", "Droplet_N")
134
+ write.table(
135
+ summary,
136
+ file.path(joboutdir, paste0(detector, "_summary.txt")),
137
+ row.names = FALSE,
138
+ quote = FALSE,
139
+ sep = "\t"
140
+ )
141
+
142
+ n_doublet <- summary$Droplet_N[summary$Classification == 'doublet']
143
+ log_info("- {n_doublet}/{sum(summary$Droplet_N)} doublets detected.")
144
+ }
145
+
146
+ add_dd_to_seurat <- function(sobj, dd) {
147
+ AddMetaData(sobj, metadata = as.data.frame(dd$doublets))
148
+ }
149
+
150
+ plot_dd <- function(sobj, dd, detector) {
151
+ if (detector == "DoubletFinder") {
152
+ log_debug("- Plotting pK vs BCmetric ...")
153
+ ggsave(dd$pk_plot, filename = file.path(plotsdir, "DoubletFinder_pK_BCmetric.png"))
154
+ }
155
+
156
+ log_info("- Plotting dimension reduction ...")
157
+ dimp <- DimPlot(
158
+ sobj, group.by = paste0(detector, "_DropletType"), order = "doublet",
159
+ cols = c("#333333", "#FF3333"), pt.size = 0.8, alpha = 0.5)
160
+ ggsave(dimp, filename = file.path(plotsdir, paste0(detector, "_dimplot.png")))
161
+ }
162
+
163
+ filter_dd <- function(sobj, dd, detector) {
164
+ subset(sobj,
165
+ cells = rownames(dd$doublets[
166
+ dd$doublets[[paste0(detector, "_DropletType")]] == "singlet", ,
167
+ drop = FALSE
168
+ ]))
169
+ }
170
+
171
+ report_dd <- function(detector) {
172
+ add_report(
173
+ list(
174
+ kind = "descr",
175
+ content = "The table contains the number of cells classified as singlets and doublets."
176
+ ),
177
+ list(
178
+ kind = "table",
179
+ data = list(path = file.path(joboutdir, paste0(detector, "_summary.txt")))
180
+ ),
181
+ h1 = paste0(detector, " Results"),
182
+ h2 = paste0("The ", detector, " Summary")
183
+ )
184
+
185
+ if (detector == "DoubletFinder") {
186
+ add_report(
187
+ list(name = "pK vs BCmetric", src = file.path(plotsdir, "pK_BCmetric.png")),
188
+ list(name = "Dimension Reduction Plot", src = file.path(plotsdir, "DoubletFinder_dimplot.png")),
189
+ ui = "table_of_images",
190
+ h1 = "DoubletFinder Results",
191
+ h2 = "Plots"
192
+ )
193
+ } else {
194
+ add_report(
195
+ list(name = "Dimension Reduction Plot",src = file.path(plotsdir, "scDblFinder_dimplot.png")),
196
+ ui = "table_of_images",
197
+ h1 = "scDblFinder Results",
198
+ h2 = "Plots"
199
+ )
200
+ }
201
+ }