biopipen 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/filters.py +10 -183
- biopipen/core/proc.py +5 -3
- biopipen/core/testing.py +8 -1
- biopipen/ns/bam.py +40 -4
- biopipen/ns/cnv.py +1 -1
- biopipen/ns/cnvkit.py +1 -1
- biopipen/ns/delim.py +1 -1
- biopipen/ns/gsea.py +63 -37
- biopipen/ns/misc.py +38 -0
- biopipen/ns/plot.py +8 -0
- biopipen/ns/scrna.py +290 -288
- biopipen/ns/scrna_metabolic_landscape.py +207 -366
- biopipen/ns/tcr.py +165 -97
- biopipen/reports/bam/CNVpytor.svelte +4 -9
- biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
- biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
- biopipen/reports/scrna/DimPlots.svelte +1 -1
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
- biopipen/reports/snp/PlinkCallRate.svelte +2 -2
- biopipen/reports/snp/PlinkFreq.svelte +1 -1
- biopipen/reports/snp/PlinkHWE.svelte +1 -1
- biopipen/reports/snp/PlinkHet.svelte +1 -1
- biopipen/reports/snp/PlinkIBD.svelte +1 -1
- biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
- biopipen/scripts/bam/CNAClinic.R +41 -6
- biopipen/scripts/bam/CNVpytor.py +2 -1
- biopipen/scripts/bam/ControlFREEC.py +2 -3
- biopipen/scripts/bam/SamtoolsView.py +33 -0
- biopipen/scripts/cnv/AneuploidyScore.R +25 -13
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
- biopipen/scripts/cnv/TMADScore.R +4 -4
- biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
- biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
- biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
- biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
- biopipen/scripts/delim/RowsBinder.R +1 -1
- biopipen/scripts/delim/SampleInfo.R +4 -1
- biopipen/scripts/gene/GeneNameConversion.R +14 -12
- biopipen/scripts/gsea/Enrichr.R +2 -2
- biopipen/scripts/gsea/FGSEA.R +184 -50
- biopipen/scripts/gsea/PreRank.R +3 -3
- biopipen/scripts/misc/Plot.R +80 -0
- biopipen/scripts/plot/VennDiagram.R +2 -2
- biopipen/scripts/protein/ProdigySummary.R +34 -27
- biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
- biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
- biopipen/scripts/regulatory/motifs-common.R +10 -9
- biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
- biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
- biopipen/scripts/rnaseq/Simulation.R +0 -2
- biopipen/scripts/rnaseq/UnitConversion.R +6 -5
- biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
- biopipen/scripts/scrna/CellCellCommunication.py +1 -1
- biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
- biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
- biopipen/scripts/scrna/CellsDistribution.R +1 -1
- biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
- biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
- biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
- biopipen/scripts/scrna/MarkersFinder.R +348 -217
- biopipen/scripts/scrna/MetaMarkers.R +3 -3
- biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
- biopipen/scripts/scrna/RadarPlots.R +1 -1
- biopipen/scripts/scrna/ScFGSEA.R +157 -75
- biopipen/scripts/scrna/ScSimulation.R +11 -10
- biopipen/scripts/scrna/ScVelo.py +605 -0
- biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
- biopipen/scripts/scrna/SeuratClusterStats-features.R +39 -30
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
- biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
- biopipen/scripts/scrna/SeuratClustering.R +31 -48
- biopipen/scripts/scrna/SeuratLoading.R +2 -2
- biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
- biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
- biopipen/scripts/scrna/SeuratPreparing.R +76 -24
- biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
- biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
- biopipen/scripts/scrna/Subset10X.R +2 -2
- biopipen/scripts/scrna/TopExpressingGenes.R +141 -184
- biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
- biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
- biopipen/scripts/snp/MatrixEQTL.R +39 -20
- biopipen/scripts/snp/PlinkCallRate.R +43 -34
- biopipen/scripts/snp/PlinkFreq.R +34 -41
- biopipen/scripts/snp/PlinkHWE.R +23 -18
- biopipen/scripts/snp/PlinkHet.R +26 -22
- biopipen/scripts/snp/PlinkIBD.R +30 -34
- biopipen/scripts/stats/ChowTest.R +9 -8
- biopipen/scripts/stats/DiffCoexpr.R +13 -11
- biopipen/scripts/stats/LiquidAssoc.R +7 -8
- biopipen/scripts/stats/Mediation.R +8 -8
- biopipen/scripts/stats/MetaPvalue.R +11 -13
- biopipen/scripts/stats/MetaPvalue1.R +6 -5
- biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
- biopipen/scripts/tcr/ClonalStats.R +5 -4
- biopipen/scripts/tcr/CloneResidency.R +3 -3
- biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
- biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
- biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
- biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
- biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
- biopipen/scripts/tcr/ScRepLoading.R +114 -92
- biopipen/scripts/tcr/TCRClusterStats.R +2 -2
- biopipen/scripts/tcr/TCRClustering.R +86 -97
- biopipen/scripts/tcr/TESSA.R +65 -115
- biopipen/scripts/tcr/VJUsage.R +5 -5
- biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
- biopipen/utils/common_docstrs.py +66 -63
- biopipen/utils/reporter.py +177 -0
- {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/METADATA +2 -1
- {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/RECORD +130 -144
- {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/WHEEL +1 -1
- biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
- biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
- biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
- biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
- biopipen/reports/utils/gsea.liq +0 -110
- biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
- biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
- biopipen/utils/caching.R +0 -44
- biopipen/utils/gene.R +0 -95
- biopipen/utils/gsea.R +0 -329
- biopipen/utils/io.R +0 -20
- biopipen/utils/misc.R +0 -602
- biopipen/utils/mutate_helpers.R +0 -581
- biopipen/utils/plot.R +0 -209
- biopipen/utils/repr.R +0 -146
- biopipen/utils/rnaseq.R +0 -48
- biopipen/utils/single_cell.R +0 -207
- {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/entry_points.txt +0 -0
biopipen/utils/plot.R
DELETED
|
@@ -1,209 +0,0 @@
|
|
|
1
|
-
library(ggplot2)
|
|
2
|
-
pdf(NULL) # preventing Rplots.pdf
|
|
3
|
-
|
|
4
|
-
plotVenn = function(
|
|
5
|
-
# A named list with elements,
|
|
6
|
-
# e.g. list(A=paste0("R", 1:5), B=paste0("R": 3:7))
|
|
7
|
-
data,
|
|
8
|
-
# Arguments for `ggVennDiagram()`
|
|
9
|
-
args = list(),
|
|
10
|
-
# Extra ggplot components in string
|
|
11
|
-
ggs = NULL,
|
|
12
|
-
# Parameters for device (res, width, height) for `png()`
|
|
13
|
-
devpars = list(res=100, width=800, height=600),
|
|
14
|
-
# The output file. If NULL, will return the plot object
|
|
15
|
-
outfile = NULL
|
|
16
|
-
) {
|
|
17
|
-
library(ggVennDiagram)
|
|
18
|
-
|
|
19
|
-
args$x = data
|
|
20
|
-
p = do.call(ggVennDiagram, args)
|
|
21
|
-
if (!is.null(ggs)) {
|
|
22
|
-
for (gg in ggs) {
|
|
23
|
-
if (is.character(gg)) {
|
|
24
|
-
p = p + eval(parse(text=gg))
|
|
25
|
-
} else {
|
|
26
|
-
p = p + gg
|
|
27
|
-
}
|
|
28
|
-
}
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
if (is.null(outfile)) {
|
|
32
|
-
return (p)
|
|
33
|
-
} else {
|
|
34
|
-
for (outf in outfile) {
|
|
35
|
-
if (endsWith(outf, ".pdf")) {
|
|
36
|
-
pdf(outf, width = devpars$width / devpars$res, height=devpars$height / devpars$res)
|
|
37
|
-
print(p)
|
|
38
|
-
dev.off()
|
|
39
|
-
} else {
|
|
40
|
-
fmt = substring(outf, nchar(outf) - 2)
|
|
41
|
-
devpars$filename = outf
|
|
42
|
-
do.call(fmt, devpars)
|
|
43
|
-
print(p)
|
|
44
|
-
dev.off()
|
|
45
|
-
}
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
plotGG = function(
|
|
52
|
-
# A data frame (long format)
|
|
53
|
-
data,
|
|
54
|
-
# the geom
|
|
55
|
-
geom,
|
|
56
|
-
# Arguments for `geom_x()`
|
|
57
|
-
args = list(),
|
|
58
|
-
# Extra ggplot components in string
|
|
59
|
-
ggs = NULL,
|
|
60
|
-
# Parameters for device (res, width, height) for `png()`
|
|
61
|
-
devpars = list(res=100, width=1000, height=1000),
|
|
62
|
-
# The output file. If NULL, will return the plot object
|
|
63
|
-
outfile = NULL
|
|
64
|
-
) {
|
|
65
|
-
|
|
66
|
-
p = ggplot(data)
|
|
67
|
-
p = p + do.call(paste0("geom_", geom), args)
|
|
68
|
-
if (!is.null(ggs)) {
|
|
69
|
-
for (gg in ggs) {
|
|
70
|
-
if (is.character(gg)) {
|
|
71
|
-
p = p + eval(parse(text=gg))
|
|
72
|
-
} else {
|
|
73
|
-
p = p + gg
|
|
74
|
-
}
|
|
75
|
-
}
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
if (is.null(outfile)) {
|
|
79
|
-
return (p)
|
|
80
|
-
} else {
|
|
81
|
-
for (outf in outfile) {
|
|
82
|
-
if (endsWith(outf, ".pdf")) {
|
|
83
|
-
pdf(outf, width = devpars$width / devpars$res, height=devpars$height / devpars$res)
|
|
84
|
-
print(p)
|
|
85
|
-
dev.off()
|
|
86
|
-
} else {
|
|
87
|
-
fmt = substring(outf, nchar(outf) - 2)
|
|
88
|
-
devpars$filename = outf
|
|
89
|
-
do.call(fmt, devpars)
|
|
90
|
-
print(p)
|
|
91
|
-
dev.off()
|
|
92
|
-
}
|
|
93
|
-
}
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
plotViolin = function(
|
|
99
|
-
# A data frame (long format)
|
|
100
|
-
data,
|
|
101
|
-
# Arguments for `geom_violin()`
|
|
102
|
-
args = list(),
|
|
103
|
-
# Extra ggplot components in string
|
|
104
|
-
ggs = NULL,
|
|
105
|
-
# Parameters for device (res, width, height) for `png()`
|
|
106
|
-
devpars = list(res=100, width=1000, height=1000),
|
|
107
|
-
# The output file. If NULL, will return the plot object
|
|
108
|
-
outfile = NULL
|
|
109
|
-
) {
|
|
110
|
-
plotGG(data, "violin", args, ggs, devpars, outfile)
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
plotUpset = function(
|
|
115
|
-
# A named list with elements,
|
|
116
|
-
# e.g. list(A=paste0("R", 1:5), B=paste0("R": 3:7))
|
|
117
|
-
# Or a data frame
|
|
118
|
-
# https://cran.r-project.org/web/packages/ggupset/readme/README.html
|
|
119
|
-
data,
|
|
120
|
-
# Arguments for `scale_x_upset()`
|
|
121
|
-
args = list(),
|
|
122
|
-
# Extra ggplot components in string
|
|
123
|
-
ggs = "geom_bar(aes(x=V1))",
|
|
124
|
-
# Parameters for device (res, width, height) for `png()`
|
|
125
|
-
devpars = list(res=100, width=1000, height=1000),
|
|
126
|
-
# The output file. If NULL, will return the plot object
|
|
127
|
-
outfile = NULL
|
|
128
|
-
) {
|
|
129
|
-
library(ggupset)
|
|
130
|
-
library(tidyr)
|
|
131
|
-
library(dplyr)
|
|
132
|
-
|
|
133
|
-
if (!is.data.frame(data) && is.list(data)) {
|
|
134
|
-
all_elems = unique(unlist(data))
|
|
135
|
-
df = data.frame(ALL_ELEMS = all_elems)
|
|
136
|
-
data = do.call(cbind, lapply(names(data), function(nd) {
|
|
137
|
-
df[df$ALL_ELEMS %in% data[[nd]], nd] = nd
|
|
138
|
-
df
|
|
139
|
-
})) %>% select(-ALL_ELEMS) %>% unite("V1", sep="; ", na.rm = TRUE) %>%
|
|
140
|
-
mutate(V1 = strsplit(V1, "; ", fixed=TRUE))
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
p = ggplot(data)
|
|
144
|
-
for (gg in ggs) {
|
|
145
|
-
if (is.character(gg)) {
|
|
146
|
-
p = p + eval(parse(text=gg))
|
|
147
|
-
} else {
|
|
148
|
-
p = p + gg
|
|
149
|
-
}
|
|
150
|
-
}
|
|
151
|
-
p = p + do.call(scale_x_upset, args)
|
|
152
|
-
|
|
153
|
-
if (is.null(outfile)) {
|
|
154
|
-
return (p)
|
|
155
|
-
} else {
|
|
156
|
-
for (outf in outfile) {
|
|
157
|
-
if (endsWith(outf, ".pdf")) {
|
|
158
|
-
pdf(outf, width = devpars$width / devpars$res, height=devpars$height / devpars$res)
|
|
159
|
-
print(p)
|
|
160
|
-
dev.off()
|
|
161
|
-
} else {
|
|
162
|
-
fmt = substring(outf, nchar(outf) - 2)
|
|
163
|
-
devpars$filename = outf
|
|
164
|
-
do.call(fmt, devpars)
|
|
165
|
-
print(p)
|
|
166
|
-
dev.off()
|
|
167
|
-
}
|
|
168
|
-
}
|
|
169
|
-
}
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
plotHeatmap = function(
|
|
173
|
-
# Data matrix
|
|
174
|
-
data,
|
|
175
|
-
# Arguments for `ComplexHeatmap::Heatmap()`
|
|
176
|
-
args = list(),
|
|
177
|
-
# Other arguments for `ComplexHeatmap::draw()`
|
|
178
|
-
draw = list(),
|
|
179
|
-
# Parameters for device (res, width, height) for `png()`
|
|
180
|
-
devpars = NULL,
|
|
181
|
-
# The output file. If NULL, will return the plot object
|
|
182
|
-
# If "draw", will call `ComplexHeatmap::draw()`
|
|
183
|
-
outfile = NULL
|
|
184
|
-
) {
|
|
185
|
-
library(ComplexHeatmap)
|
|
186
|
-
|
|
187
|
-
args$matrix = as.matrix(data)
|
|
188
|
-
hm = do.call(Heatmap, args)
|
|
189
|
-
|
|
190
|
-
if (is.null(outfile)) {
|
|
191
|
-
return(hm)
|
|
192
|
-
} else if (identical(outfile, "draw")) {
|
|
193
|
-
do.call(ComplexHeatmap::draw, c(list(hm), draw))
|
|
194
|
-
} else {
|
|
195
|
-
for (outf in outfile) {
|
|
196
|
-
if (endsWith(outf, ".pdf")) {
|
|
197
|
-
pdf(outf, width = devpars$width / devpars$res, height=devpars$height / devpars$res)
|
|
198
|
-
do.call(ComplexHeatmap::draw, c(list(hm), draw))
|
|
199
|
-
dev.off()
|
|
200
|
-
} else {
|
|
201
|
-
fmt = substring(outf, nchar(outf) - 2)
|
|
202
|
-
devpars$filename = outf
|
|
203
|
-
do.call(fmt, devpars)
|
|
204
|
-
do.call(ComplexHeatmap::draw, c(list(hm), draw))
|
|
205
|
-
dev.off()
|
|
206
|
-
}
|
|
207
|
-
}
|
|
208
|
-
}
|
|
209
|
-
}
|
biopipen/utils/repr.R
DELETED
|
@@ -1,146 +0,0 @@
|
|
|
1
|
-
library(rlang)
|
|
2
|
-
|
|
3
|
-
#' The string representation of an object
|
|
4
|
-
#' @param x An object
|
|
5
|
-
#' @param newline Whether to add newlines to the output for each element
|
|
6
|
-
#' @return The string representation
|
|
7
|
-
#' @export
|
|
8
|
-
repr <- function(x, newline = FALSE, ...) UseMethod("repr")
|
|
9
|
-
|
|
10
|
-
repr.default <- function(x, newline = FALSE, ...) {
|
|
11
|
-
klass <- paste0(class(x), collapse = "/")
|
|
12
|
-
fallback <- paste0("<", klass, ": ", deparse(substitute(x)), ">")
|
|
13
|
-
|
|
14
|
-
tryCatch(
|
|
15
|
-
x$.repr(newline, ...),
|
|
16
|
-
error = function(e) {
|
|
17
|
-
fallback
|
|
18
|
-
}
|
|
19
|
-
)
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
repr.numeric <- function(x, newline = FALSE, ...) {
|
|
23
|
-
if (length(x) == 1) {
|
|
24
|
-
as.character(x)
|
|
25
|
-
} else if (!newline) {
|
|
26
|
-
paste0("c(", paste(x, collapse = paste0(", ")), ")")
|
|
27
|
-
} else {
|
|
28
|
-
paste0(
|
|
29
|
-
"c(\n",
|
|
30
|
-
paste0(lapply(x, function(y) paste0(" ", y)), collapse = ",\n"),
|
|
31
|
-
"\n)"
|
|
32
|
-
)
|
|
33
|
-
}
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
repr.character <- function(x, newline = FALSE, ...) {
|
|
37
|
-
if (length(x) == 1) {
|
|
38
|
-
paste0("\"", x, "\"")
|
|
39
|
-
} else if (!newline) {
|
|
40
|
-
paste0("c(", paste0(lapply(x, function(y) sQuote(y, q = FALSE)), collapse = ", "), ")")
|
|
41
|
-
} else {
|
|
42
|
-
paste0(
|
|
43
|
-
"c(\n",
|
|
44
|
-
paste0(lapply(x, function(y) paste0(" ", sQuote(y, q = FALSE))), collapse = ",\n"),
|
|
45
|
-
"\n)"
|
|
46
|
-
)
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
repr.factor <- function(x, newline = FALSE, ...) {
|
|
51
|
-
if (!newline) {
|
|
52
|
-
paste0(
|
|
53
|
-
"factor(", repr(as.character(x), newline, ...), ", levels = ", repr(levels(x), newline, ...), ")"
|
|
54
|
-
)
|
|
55
|
-
} else if (!newline) {
|
|
56
|
-
paste0(
|
|
57
|
-
"factor(\n",
|
|
58
|
-
paste0(" ", repr(as.character(x), newline, ...), ",\n"),
|
|
59
|
-
" levels = ", repr(levels(x), newline, ...), "\n)"
|
|
60
|
-
)
|
|
61
|
-
} else {
|
|
62
|
-
paste0(
|
|
63
|
-
"factor(\n",
|
|
64
|
-
paste0(" ", repr(as.character(x), newline, ...), ",\n"),
|
|
65
|
-
" levels = ", repr(levels(x), newline, ...), "\n)"
|
|
66
|
-
)
|
|
67
|
-
}
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
repr.logical <- function(x, newline = FALSE, ...) {
|
|
71
|
-
if (length(x) == 1) {
|
|
72
|
-
if (x) "TRUE" else "FALSE"
|
|
73
|
-
} else if (!newline) {
|
|
74
|
-
paste0("c(", paste0(x, collapse = ","), ")")
|
|
75
|
-
} else {
|
|
76
|
-
paste0(
|
|
77
|
-
"c(\n",
|
|
78
|
-
paste0(lapply(x, function(y) paste0(" ", y)), collapse = ",\n"),
|
|
79
|
-
"\n)"
|
|
80
|
-
)
|
|
81
|
-
}
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
repr.list <- function(x, newline = FALSE, ...) {
|
|
85
|
-
start <- if (newline) "list(\n" else "list("
|
|
86
|
-
end <- if (newline) "\n)" else ")"
|
|
87
|
-
sep <- if (newline) ",\n" else ", "
|
|
88
|
-
prefix <- if (newline) " " else ""
|
|
89
|
-
if (length(names(x)) > 0) {
|
|
90
|
-
paste0(
|
|
91
|
-
start,
|
|
92
|
-
paste0(
|
|
93
|
-
lapply(seq_along(x), function(i) {
|
|
94
|
-
name <- names(x)[i]
|
|
95
|
-
if (identical(name, "")) {
|
|
96
|
-
paste0(prefix, repr(x[[i]]))
|
|
97
|
-
} else {
|
|
98
|
-
paste0(prefix, bQuote(name), " = ", repr(x[[name]]))
|
|
99
|
-
}
|
|
100
|
-
}), collapse = sep
|
|
101
|
-
),
|
|
102
|
-
end
|
|
103
|
-
)
|
|
104
|
-
} else {
|
|
105
|
-
paste0(
|
|
106
|
-
start, paste0(lapply(x, repr, newline = newline, ...), collapse = sep), end
|
|
107
|
-
)
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
repr.NULL <- function(x, newline = FALSE, ...) {
|
|
112
|
-
"NULL"
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
repr.formula <- function(x, newline = FALSE, ...) {
|
|
116
|
-
deparse(x)
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
repr.data.frame <- function(x, newline = FALSE, ...) {
|
|
120
|
-
paste0(
|
|
121
|
-
"data.frame(\n",
|
|
122
|
-
paste0(
|
|
123
|
-
lapply(names(x), function(name) {
|
|
124
|
-
paste0(" ", bQuote(name), " = ", repr(x[[name]], newline = newline, ...))
|
|
125
|
-
}), collapse = ",\n"
|
|
126
|
-
),
|
|
127
|
-
"\n)"
|
|
128
|
-
)
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
repr.environment <- function(x, newline = FALSE, ...) {
|
|
132
|
-
if (!is_environment(x)) {
|
|
133
|
-
# in case .GlobalEnv is dispatched here
|
|
134
|
-
NextMethod()
|
|
135
|
-
} else {
|
|
136
|
-
nl <- if (newline) "\n" else " "
|
|
137
|
-
prefix <- if (newline) " " else ""
|
|
138
|
-
paste0(
|
|
139
|
-
"rlang::env(", nl, paste0(
|
|
140
|
-
lapply(ls(x), function(name) {
|
|
141
|
-
paste0(prefix, bQuote(name), " = ", repr(get(name, x), newline = newline, ...))
|
|
142
|
-
}), collapse = paste0(",", nl)
|
|
143
|
-
), nl, ")"
|
|
144
|
-
)
|
|
145
|
-
}
|
|
146
|
-
}
|
biopipen/utils/rnaseq.R
DELETED
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
.normUnit = function(unit) {
|
|
3
|
-
if ("count" %in% unit) {
|
|
4
|
-
return("count")
|
|
5
|
-
}
|
|
6
|
-
return(unit)
|
|
7
|
-
}
|
|
8
|
-
|
|
9
|
-
glenFromGFFExons = function(exonfile) {
|
|
10
|
-
gff = read.table(exonfile, header = F, row.names = NULL)
|
|
11
|
-
# V4: start, V5: end, V10: gene name
|
|
12
|
-
glen = aggregate(V5-V4+1 ~ V10, gff, sum)
|
|
13
|
-
genes = glen[,1]
|
|
14
|
-
glen = glen[,-1,drop=TRUE]
|
|
15
|
-
names(glen) = genes
|
|
16
|
-
return(glen)
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
count2tpm = function(x, args) {
|
|
20
|
-
if (is.null(args$genelen)) {
|
|
21
|
-
stop("Gene lengths are required to convert count to TPM.")
|
|
22
|
-
}
|
|
23
|
-
glengenes = names(args$genelen)
|
|
24
|
-
mygenes = rownames(x)
|
|
25
|
-
missing = setdiff(mygenes, glengenes)
|
|
26
|
-
warning(paste(length(missing), "gene cannot be found in gene length data"))
|
|
27
|
-
warning(paste(missing, sep=", "))
|
|
28
|
-
|
|
29
|
-
genes = intersect(mygenes, glengenes)
|
|
30
|
-
x = x[genes, , drop=FALSE]
|
|
31
|
-
|
|
32
|
-
# see: https://gist.github.com/slowkow/c6ab0348747f86e2748b
|
|
33
|
-
# and https://support.bioconductor.org/p/91218/
|
|
34
|
-
out = x / unlist(args$genelen[genes])
|
|
35
|
-
out = t(t(out) * 1e6 / colSums(out))
|
|
36
|
-
rownames(out) = genes
|
|
37
|
-
colnames(out) = colnames(x)
|
|
38
|
-
|
|
39
|
-
return(out)
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
unit_conversion = function(x, inunit, outunit, args=list()) {
|
|
44
|
-
inunit = .normUnit(inunit)
|
|
45
|
-
outunit = .normUnit(outunit)
|
|
46
|
-
func = get(paste0(inunit, "2", outunit))
|
|
47
|
-
func(x, args)
|
|
48
|
-
}
|
biopipen/utils/single_cell.R
DELETED
|
@@ -1,207 +0,0 @@
|
|
|
1
|
-
suppressPackageStartupMessages(library(rlang))
|
|
2
|
-
suppressPackageStartupMessages(library(dplyr))
|
|
3
|
-
suppressPackageStartupMessages(library(tidyr))
|
|
4
|
-
try(suppressPackageStartupMessages(library(immunarch)))
|
|
5
|
-
|
|
6
|
-
#' Expand a Immunarch object into cell-level
|
|
7
|
-
#'
|
|
8
|
-
#' Here is how the data is expanded:
|
|
9
|
-
#' 1. Expand `$data` by Barcode (other columns are copied)
|
|
10
|
-
#' 2. Add sample to `Sample` column
|
|
11
|
-
#' 3. Add columns from `$meta`
|
|
12
|
-
#'
|
|
13
|
-
#' @param immdata Immunarch object
|
|
14
|
-
#' @return A data frame
|
|
15
|
-
#'
|
|
16
|
-
#' @example
|
|
17
|
-
#' immunarch::immdata$data$MS1 |> head()
|
|
18
|
-
#' # Clones Proportion CDR3.nt CDR3.aa V.name D.name J.name V.end D.start D.end J.start VJ.ins VD.ins DJ.ins Sequence
|
|
19
|
-
#' # <dbl> <dbl> <chr> <chr> <chr> <chr> <chr> <int> <int> <int> <int> <dbl> <dbl> <dbl> <lgl>
|
|
20
|
-
#' # 1 539 0.0634 TGTGCCAGCAGCTTACA… CASSLQ… TRBV7… TRBD2 TRBJ2… 14 18 26 29 -1 3 2 NA
|
|
21
|
-
#' # 2 320 0.0376 TGTGCCAGCAGCGTGTA… CASSVY… TRBV9 TRBD1 TRBJ2… 13 20 22 29 -1 6 6 NA
|
|
22
|
-
#' immunarch::immdata$meta |> head()
|
|
23
|
-
#' # Sample ID Sex Age Status Lane
|
|
24
|
-
#' # <chr> <chr> <chr> <dbl> <chr> <chr>
|
|
25
|
-
#' # 1 A2-i129 C1 M 11 C A
|
|
26
|
-
#' # 2 A2-i131 C2 M 9 C A
|
|
27
|
-
#' # 3 A2-i133 C4 M 16 C A
|
|
28
|
-
#' # 4 A2-i132 C3 F 6 C A
|
|
29
|
-
#' # 5 A4-i191 C8 F 22 C B
|
|
30
|
-
#' # 6 A4-i192 C9 F 24 C B
|
|
31
|
-
#'
|
|
32
|
-
#' @export
|
|
33
|
-
expand_immdata <- function(immdata, cell_id = "Barcode") {
|
|
34
|
-
if (!cell_id %in% colnames(immdata$data[[1]])) {
|
|
35
|
-
stop(paste0("cell_id '", cell_id, "' not found in data"))
|
|
36
|
-
}
|
|
37
|
-
do.call(rbind, lapply(names(immdata$data), function(name) {
|
|
38
|
-
# Split barcodes
|
|
39
|
-
dat <- immdata$data[[name]] %>% separate_rows(!!sym(cell_id), sep = ";")
|
|
40
|
-
dat$Sample <- name
|
|
41
|
-
dat <- dat %>% left_join(immdata$meta, by = "Sample", suffix = c("_data", ""))
|
|
42
|
-
|
|
43
|
-
dat
|
|
44
|
-
}))
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
#' Filter expanded immdata
|
|
48
|
-
#'
|
|
49
|
-
#' @param exdata Expanded immdata
|
|
50
|
-
#' @param filters Filters
|
|
51
|
-
#' @return Filtered data
|
|
52
|
-
#'
|
|
53
|
-
#' @export
|
|
54
|
-
filter_expanded_immdata <- function(exdata, filters, update_clones = FALSE) {
|
|
55
|
-
if (length(filters) == 0) {
|
|
56
|
-
return(exdata)
|
|
57
|
-
}
|
|
58
|
-
out <- exdata %>% dplyr::filter(!!parse_expr(filters))
|
|
59
|
-
if (update_clones) {
|
|
60
|
-
out <- out %>%
|
|
61
|
-
group_by(Sample, CDR3.aa) %>%
|
|
62
|
-
mutate(Clones = n()) %>%
|
|
63
|
-
ungroup() %>%
|
|
64
|
-
group_by(Sample) %>%
|
|
65
|
-
mutate(Proportion = Clones / n()) %>%
|
|
66
|
-
ungroup() %>%
|
|
67
|
-
arrange(Sample, desc(Clones))
|
|
68
|
-
}
|
|
69
|
-
out
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
#' Convert expanded immdata to Immunarch object
|
|
73
|
-
#'
|
|
74
|
-
#' @param exdata Expanded immdata
|
|
75
|
-
#' @param metacols Columns to be added to `$meta`
|
|
76
|
-
#' @return Immunarch object
|
|
77
|
-
#'
|
|
78
|
-
#' @export
|
|
79
|
-
immdata_from_expanded <- function(
|
|
80
|
-
exdata,
|
|
81
|
-
metacols = NULL,
|
|
82
|
-
cell_id = "Barcode",
|
|
83
|
-
update_clones = TRUE
|
|
84
|
-
) {
|
|
85
|
-
if (is.null(metacols)) {
|
|
86
|
-
metacols = setdiff(colnames(exdata), c(
|
|
87
|
-
"Clones", "Proportion", "CDR3.nt", "CDR3.aa", "V.name", "D.name", "J.name",
|
|
88
|
-
"V.end", "D.start", "D.end", "J.start", "VJ.ins", "VD.ins", "DJ.ins",
|
|
89
|
-
"Sequence", "chain", "Barcode", "raw_clonotype_id", "ContigID", "C.name",
|
|
90
|
-
"CDR1.nt", "CDR2.nt", "CDR1.aa", "CDR2.aa", "FR1.nt", "FR2.nt", "FR3.nt",
|
|
91
|
-
"FR4.nt", "FR1.aa", "FR2.aa", "FR3.aa", "FR4.aa"
|
|
92
|
-
))
|
|
93
|
-
}
|
|
94
|
-
out <- list(meta = exdata[, metacols, drop = FALSE])
|
|
95
|
-
out$meta <- out$meta[!duplicated(out$meta$Sample), , drop = FALSE]
|
|
96
|
-
out$data <- lapply(
|
|
97
|
-
split(
|
|
98
|
-
exdata[, setdiff(colnames(exdata), metacols), drop = FALSE],
|
|
99
|
-
exdata$Sample
|
|
100
|
-
),
|
|
101
|
-
function(dat) {
|
|
102
|
-
ncells <- nrow(dat)
|
|
103
|
-
dat_cols <- setdiff(colnames(dat), c("Clones", "Proportion", cell_id))
|
|
104
|
-
dat %>% group_by(CDR3.aa) %>%
|
|
105
|
-
summarise(
|
|
106
|
-
Clones = ifelse(update_clones, n(), first(Clones)),
|
|
107
|
-
Proportion = ifelse(update_clones, n() / ncells, first(Proportion)),
|
|
108
|
-
!!sym(cell_id) := paste0(!!sym(cell_id), collapse = ";"),
|
|
109
|
-
!!!parse_exprs(sapply(dat_cols, function(x) paste0('first(`', x, '`)'))),
|
|
110
|
-
.groups = "drop"
|
|
111
|
-
) %>%
|
|
112
|
-
arrange(desc(Clones))
|
|
113
|
-
}
|
|
114
|
-
)
|
|
115
|
-
out
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
#' Convert Seurat object to Anndata
|
|
119
|
-
#'
|
|
120
|
-
#' @param sobjfile Seurat object file
|
|
121
|
-
#' @param outfile Output file
|
|
122
|
-
#' @param assay Assay to be used
|
|
123
|
-
#'
|
|
124
|
-
#' @export
|
|
125
|
-
seurat_to_anndata <- function(sobjfile, outfile, assay = NULL, log_info, tmpdir = NULL, log_indent = "") {
|
|
126
|
-
library(Seurat)
|
|
127
|
-
library(SeuratDisk)
|
|
128
|
-
library(hdf5r)
|
|
129
|
-
if (endsWith(sobjfile, ".rds") || endsWith(sobjfile, ".RDS")) {
|
|
130
|
-
library(digest)
|
|
131
|
-
|
|
132
|
-
dig <- digest::digest(sobjfile, algo = "md5")
|
|
133
|
-
dig <- substr(dig, 1, 8)
|
|
134
|
-
assay_name <- ifelse(is.null(assay), "", paste0("_", assay))
|
|
135
|
-
tmpdir <- tmpdir %||% dirname(outfile)
|
|
136
|
-
dir.create(tmpdir, showWarnings = FALSE)
|
|
137
|
-
h5seurat_file <- file.path(
|
|
138
|
-
tmpdir,
|
|
139
|
-
paste0(
|
|
140
|
-
tools::file_path_sans_ext(basename(outfile)),
|
|
141
|
-
assay_name, ".", dig, ".h5seurat"
|
|
142
|
-
)
|
|
143
|
-
)
|
|
144
|
-
if (file.exists(h5seurat_file) &&
|
|
145
|
-
(file.mtime(h5seurat_file) < file.mtime(sobjfile))) {
|
|
146
|
-
file.remove(h5seurat_file)
|
|
147
|
-
}
|
|
148
|
-
if (!file.exists(h5seurat_file)) {
|
|
149
|
-
log_info("{log_indent}Reading RDS file ...")
|
|
150
|
-
sobj <- readRDS(sobjfile)
|
|
151
|
-
assay <- assay %||% DefaultAssay(sobj)
|
|
152
|
-
# In order to convert to h5ad
|
|
153
|
-
# https://github.com/satijalab/seurat/issues/8220#issuecomment-1871874649
|
|
154
|
-
sobj$RNAv3 <- as(object = sobj[[assay]], Class = "Assay")
|
|
155
|
-
DefaultAssay(sobj) <- "RNAv3"
|
|
156
|
-
sobj$RNA <- NULL
|
|
157
|
-
sobj <- RenameAssays(sobj, RNAv3 = "RNA")
|
|
158
|
-
|
|
159
|
-
log_info("{log_indent}Saving to H5Seurat file ...")
|
|
160
|
-
SaveH5Seurat(sobj, h5seurat_file)
|
|
161
|
-
rm(sobj)
|
|
162
|
-
gc()
|
|
163
|
-
sobjfile <- h5seurat_file
|
|
164
|
-
} else {
|
|
165
|
-
log_info("{log_indent}Using existing H5Seurat file ...")
|
|
166
|
-
}
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
if (!endsWith(sobjfile, ".h5seurat")) {
|
|
170
|
-
stop(paste0("Unknown input file format: ",
|
|
171
|
-
tools::file_ext(sobjfile),
|
|
172
|
-
". Supported formats: .rds, .RDS, .h5seurat"))
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
log_info("{log_indent}Converting to Anndata ...")
|
|
176
|
-
Convert(sobjfile, dest = outfile, assay = assay %||% "RNA", overwrite = TRUE)
|
|
177
|
-
|
|
178
|
-
log_info("{log_indent}Fixing categorical data ...")
|
|
179
|
-
# See: https://github.com/mojaveazure/seurat-disk/issues/183
|
|
180
|
-
H5.create_reference <- function(self, ...) {
|
|
181
|
-
space <- self$get_space()
|
|
182
|
-
do.call("[", c(list(space), list(...)))
|
|
183
|
-
ref_type <- hdf5r::h5const$H5R_OBJECT
|
|
184
|
-
ref_obj <- hdf5r::H5R_OBJECT$new(1, self)
|
|
185
|
-
res <- .Call("R_H5Rcreate", ref_obj$ref, self$id, ".", ref_type,
|
|
186
|
-
space$id, FALSE, PACKAGE = "hdf5r")
|
|
187
|
-
if (res$return_val < 0) {
|
|
188
|
-
stop("Error creating object reference")
|
|
189
|
-
}
|
|
190
|
-
ref_obj$ref <- res$ref
|
|
191
|
-
return(ref_obj)
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
h5ad <- H5File$new(outfile, "r+")
|
|
195
|
-
cats <- names(h5ad[["obs/__categories"]])
|
|
196
|
-
for (cat in cats) {
|
|
197
|
-
catname <- paste0("obs/__categories/", cat)
|
|
198
|
-
obsname <- paste0("obs/", cat)
|
|
199
|
-
ref <- H5.create_reference(h5ad[[catname]])
|
|
200
|
-
h5ad[[obsname]]$create_attr(
|
|
201
|
-
attr_name = "categories",
|
|
202
|
-
robj = ref,
|
|
203
|
-
space = H5S$new(type = "scalar")
|
|
204
|
-
)
|
|
205
|
-
}
|
|
206
|
-
h5ad$close()
|
|
207
|
-
}
|
|
File without changes
|