biopipen 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (149) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/filters.py +10 -183
  3. biopipen/core/proc.py +5 -3
  4. biopipen/core/testing.py +8 -1
  5. biopipen/ns/bam.py +40 -4
  6. biopipen/ns/cnv.py +1 -1
  7. biopipen/ns/cnvkit.py +1 -1
  8. biopipen/ns/delim.py +1 -1
  9. biopipen/ns/gsea.py +63 -37
  10. biopipen/ns/misc.py +38 -0
  11. biopipen/ns/plot.py +8 -0
  12. biopipen/ns/scrna.py +290 -288
  13. biopipen/ns/scrna_metabolic_landscape.py +207 -366
  14. biopipen/ns/tcr.py +165 -97
  15. biopipen/reports/bam/CNVpytor.svelte +4 -9
  16. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  17. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  18. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  19. biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
  20. biopipen/reports/scrna/DimPlots.svelte +1 -1
  21. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
  22. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
  23. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
  24. biopipen/reports/snp/PlinkCallRate.svelte +2 -2
  25. biopipen/reports/snp/PlinkFreq.svelte +1 -1
  26. biopipen/reports/snp/PlinkHWE.svelte +1 -1
  27. biopipen/reports/snp/PlinkHet.svelte +1 -1
  28. biopipen/reports/snp/PlinkIBD.svelte +1 -1
  29. biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
  30. biopipen/scripts/bam/CNAClinic.R +41 -6
  31. biopipen/scripts/bam/CNVpytor.py +2 -1
  32. biopipen/scripts/bam/ControlFREEC.py +2 -3
  33. biopipen/scripts/bam/SamtoolsView.py +33 -0
  34. biopipen/scripts/cnv/AneuploidyScore.R +25 -13
  35. biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
  36. biopipen/scripts/cnv/TMADScore.R +4 -4
  37. biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
  38. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
  39. biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
  40. biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
  41. biopipen/scripts/delim/RowsBinder.R +1 -1
  42. biopipen/scripts/delim/SampleInfo.R +4 -1
  43. biopipen/scripts/gene/GeneNameConversion.R +14 -12
  44. biopipen/scripts/gsea/Enrichr.R +2 -2
  45. biopipen/scripts/gsea/FGSEA.R +184 -50
  46. biopipen/scripts/gsea/PreRank.R +3 -3
  47. biopipen/scripts/misc/Plot.R +80 -0
  48. biopipen/scripts/plot/VennDiagram.R +2 -2
  49. biopipen/scripts/protein/ProdigySummary.R +34 -27
  50. biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
  51. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
  52. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
  53. biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
  54. biopipen/scripts/regulatory/motifs-common.R +10 -9
  55. biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
  56. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
  57. biopipen/scripts/rnaseq/Simulation.R +0 -2
  58. biopipen/scripts/rnaseq/UnitConversion.R +6 -5
  59. biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
  60. biopipen/scripts/scrna/CellCellCommunication.py +1 -1
  61. biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
  62. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
  63. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
  64. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
  65. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
  66. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
  67. biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
  68. biopipen/scripts/scrna/CellsDistribution.R +1 -1
  69. biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
  70. biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
  71. biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
  72. biopipen/scripts/scrna/MarkersFinder.R +348 -217
  73. biopipen/scripts/scrna/MetaMarkers.R +3 -3
  74. biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
  75. biopipen/scripts/scrna/RadarPlots.R +1 -1
  76. biopipen/scripts/scrna/ScFGSEA.R +157 -75
  77. biopipen/scripts/scrna/ScSimulation.R +11 -10
  78. biopipen/scripts/scrna/ScVelo.py +605 -0
  79. biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
  80. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
  81. biopipen/scripts/scrna/SeuratClusterStats-features.R +39 -30
  82. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
  83. biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
  84. biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
  85. biopipen/scripts/scrna/SeuratClustering.R +31 -48
  86. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  87. biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
  88. biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
  89. biopipen/scripts/scrna/SeuratPreparing.R +76 -24
  90. biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
  91. biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
  92. biopipen/scripts/scrna/Subset10X.R +2 -2
  93. biopipen/scripts/scrna/TopExpressingGenes.R +141 -184
  94. biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
  95. biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
  96. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
  97. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
  98. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
  99. biopipen/scripts/snp/MatrixEQTL.R +39 -20
  100. biopipen/scripts/snp/PlinkCallRate.R +43 -34
  101. biopipen/scripts/snp/PlinkFreq.R +34 -41
  102. biopipen/scripts/snp/PlinkHWE.R +23 -18
  103. biopipen/scripts/snp/PlinkHet.R +26 -22
  104. biopipen/scripts/snp/PlinkIBD.R +30 -34
  105. biopipen/scripts/stats/ChowTest.R +9 -8
  106. biopipen/scripts/stats/DiffCoexpr.R +13 -11
  107. biopipen/scripts/stats/LiquidAssoc.R +7 -8
  108. biopipen/scripts/stats/Mediation.R +8 -8
  109. biopipen/scripts/stats/MetaPvalue.R +11 -13
  110. biopipen/scripts/stats/MetaPvalue1.R +6 -5
  111. biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
  112. biopipen/scripts/tcr/ClonalStats.R +5 -4
  113. biopipen/scripts/tcr/CloneResidency.R +3 -3
  114. biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
  115. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  116. biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
  117. biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
  118. biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
  119. biopipen/scripts/tcr/ScRepLoading.R +114 -92
  120. biopipen/scripts/tcr/TCRClusterStats.R +2 -2
  121. biopipen/scripts/tcr/TCRClustering.R +86 -97
  122. biopipen/scripts/tcr/TESSA.R +65 -115
  123. biopipen/scripts/tcr/VJUsage.R +5 -5
  124. biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
  125. biopipen/utils/common_docstrs.py +66 -63
  126. biopipen/utils/reporter.py +177 -0
  127. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/METADATA +2 -1
  128. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/RECORD +130 -144
  129. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/WHEEL +1 -1
  130. biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
  131. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
  132. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
  133. biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
  134. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
  135. biopipen/reports/utils/gsea.liq +0 -110
  136. biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
  137. biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
  138. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
  139. biopipen/utils/caching.R +0 -44
  140. biopipen/utils/gene.R +0 -95
  141. biopipen/utils/gsea.R +0 -329
  142. biopipen/utils/io.R +0 -20
  143. biopipen/utils/misc.R +0 -602
  144. biopipen/utils/mutate_helpers.R +0 -581
  145. biopipen/utils/plot.R +0 -209
  146. biopipen/utils/repr.R +0 -146
  147. biopipen/utils/rnaseq.R +0 -48
  148. biopipen/utils/single_cell.R +0 -207
  149. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/entry_points.txt +0 -0
biopipen/utils/plot.R DELETED
@@ -1,209 +0,0 @@
1
- library(ggplot2)
2
- pdf(NULL) # preventing Rplots.pdf
3
-
4
- plotVenn = function(
5
- # A named list with elements,
6
- # e.g. list(A=paste0("R", 1:5), B=paste0("R": 3:7))
7
- data,
8
- # Arguments for `ggVennDiagram()`
9
- args = list(),
10
- # Extra ggplot components in string
11
- ggs = NULL,
12
- # Parameters for device (res, width, height) for `png()`
13
- devpars = list(res=100, width=800, height=600),
14
- # The output file. If NULL, will return the plot object
15
- outfile = NULL
16
- ) {
17
- library(ggVennDiagram)
18
-
19
- args$x = data
20
- p = do.call(ggVennDiagram, args)
21
- if (!is.null(ggs)) {
22
- for (gg in ggs) {
23
- if (is.character(gg)) {
24
- p = p + eval(parse(text=gg))
25
- } else {
26
- p = p + gg
27
- }
28
- }
29
- }
30
-
31
- if (is.null(outfile)) {
32
- return (p)
33
- } else {
34
- for (outf in outfile) {
35
- if (endsWith(outf, ".pdf")) {
36
- pdf(outf, width = devpars$width / devpars$res, height=devpars$height / devpars$res)
37
- print(p)
38
- dev.off()
39
- } else {
40
- fmt = substring(outf, nchar(outf) - 2)
41
- devpars$filename = outf
42
- do.call(fmt, devpars)
43
- print(p)
44
- dev.off()
45
- }
46
- }
47
- }
48
- }
49
-
50
-
51
- plotGG = function(
52
- # A data frame (long format)
53
- data,
54
- # the geom
55
- geom,
56
- # Arguments for `geom_x()`
57
- args = list(),
58
- # Extra ggplot components in string
59
- ggs = NULL,
60
- # Parameters for device (res, width, height) for `png()`
61
- devpars = list(res=100, width=1000, height=1000),
62
- # The output file. If NULL, will return the plot object
63
- outfile = NULL
64
- ) {
65
-
66
- p = ggplot(data)
67
- p = p + do.call(paste0("geom_", geom), args)
68
- if (!is.null(ggs)) {
69
- for (gg in ggs) {
70
- if (is.character(gg)) {
71
- p = p + eval(parse(text=gg))
72
- } else {
73
- p = p + gg
74
- }
75
- }
76
- }
77
-
78
- if (is.null(outfile)) {
79
- return (p)
80
- } else {
81
- for (outf in outfile) {
82
- if (endsWith(outf, ".pdf")) {
83
- pdf(outf, width = devpars$width / devpars$res, height=devpars$height / devpars$res)
84
- print(p)
85
- dev.off()
86
- } else {
87
- fmt = substring(outf, nchar(outf) - 2)
88
- devpars$filename = outf
89
- do.call(fmt, devpars)
90
- print(p)
91
- dev.off()
92
- }
93
- }
94
- }
95
- }
96
-
97
-
98
- plotViolin = function(
99
- # A data frame (long format)
100
- data,
101
- # Arguments for `geom_violin()`
102
- args = list(),
103
- # Extra ggplot components in string
104
- ggs = NULL,
105
- # Parameters for device (res, width, height) for `png()`
106
- devpars = list(res=100, width=1000, height=1000),
107
- # The output file. If NULL, will return the plot object
108
- outfile = NULL
109
- ) {
110
- plotGG(data, "violin", args, ggs, devpars, outfile)
111
- }
112
-
113
-
114
- plotUpset = function(
115
- # A named list with elements,
116
- # e.g. list(A=paste0("R", 1:5), B=paste0("R": 3:7))
117
- # Or a data frame
118
- # https://cran.r-project.org/web/packages/ggupset/readme/README.html
119
- data,
120
- # Arguments for `scale_x_upset()`
121
- args = list(),
122
- # Extra ggplot components in string
123
- ggs = "geom_bar(aes(x=V1))",
124
- # Parameters for device (res, width, height) for `png()`
125
- devpars = list(res=100, width=1000, height=1000),
126
- # The output file. If NULL, will return the plot object
127
- outfile = NULL
128
- ) {
129
- library(ggupset)
130
- library(tidyr)
131
- library(dplyr)
132
-
133
- if (!is.data.frame(data) && is.list(data)) {
134
- all_elems = unique(unlist(data))
135
- df = data.frame(ALL_ELEMS = all_elems)
136
- data = do.call(cbind, lapply(names(data), function(nd) {
137
- df[df$ALL_ELEMS %in% data[[nd]], nd] = nd
138
- df
139
- })) %>% select(-ALL_ELEMS) %>% unite("V1", sep="; ", na.rm = TRUE) %>%
140
- mutate(V1 = strsplit(V1, "; ", fixed=TRUE))
141
- }
142
-
143
- p = ggplot(data)
144
- for (gg in ggs) {
145
- if (is.character(gg)) {
146
- p = p + eval(parse(text=gg))
147
- } else {
148
- p = p + gg
149
- }
150
- }
151
- p = p + do.call(scale_x_upset, args)
152
-
153
- if (is.null(outfile)) {
154
- return (p)
155
- } else {
156
- for (outf in outfile) {
157
- if (endsWith(outf, ".pdf")) {
158
- pdf(outf, width = devpars$width / devpars$res, height=devpars$height / devpars$res)
159
- print(p)
160
- dev.off()
161
- } else {
162
- fmt = substring(outf, nchar(outf) - 2)
163
- devpars$filename = outf
164
- do.call(fmt, devpars)
165
- print(p)
166
- dev.off()
167
- }
168
- }
169
- }
170
- }
171
-
172
- plotHeatmap = function(
173
- # Data matrix
174
- data,
175
- # Arguments for `ComplexHeatmap::Heatmap()`
176
- args = list(),
177
- # Other arguments for `ComplexHeatmap::draw()`
178
- draw = list(),
179
- # Parameters for device (res, width, height) for `png()`
180
- devpars = NULL,
181
- # The output file. If NULL, will return the plot object
182
- # If "draw", will call `ComplexHeatmap::draw()`
183
- outfile = NULL
184
- ) {
185
- library(ComplexHeatmap)
186
-
187
- args$matrix = as.matrix(data)
188
- hm = do.call(Heatmap, args)
189
-
190
- if (is.null(outfile)) {
191
- return(hm)
192
- } else if (identical(outfile, "draw")) {
193
- do.call(ComplexHeatmap::draw, c(list(hm), draw))
194
- } else {
195
- for (outf in outfile) {
196
- if (endsWith(outf, ".pdf")) {
197
- pdf(outf, width = devpars$width / devpars$res, height=devpars$height / devpars$res)
198
- do.call(ComplexHeatmap::draw, c(list(hm), draw))
199
- dev.off()
200
- } else {
201
- fmt = substring(outf, nchar(outf) - 2)
202
- devpars$filename = outf
203
- do.call(fmt, devpars)
204
- do.call(ComplexHeatmap::draw, c(list(hm), draw))
205
- dev.off()
206
- }
207
- }
208
- }
209
- }
biopipen/utils/repr.R DELETED
@@ -1,146 +0,0 @@
1
- library(rlang)
2
-
3
- #' The string representation of an object
4
- #' @param x An object
5
- #' @param newline Whether to add newlines to the output for each element
6
- #' @return The string representation
7
- #' @export
8
- repr <- function(x, newline = FALSE, ...) UseMethod("repr")
9
-
10
- repr.default <- function(x, newline = FALSE, ...) {
11
- klass <- paste0(class(x), collapse = "/")
12
- fallback <- paste0("<", klass, ": ", deparse(substitute(x)), ">")
13
-
14
- tryCatch(
15
- x$.repr(newline, ...),
16
- error = function(e) {
17
- fallback
18
- }
19
- )
20
- }
21
-
22
- repr.numeric <- function(x, newline = FALSE, ...) {
23
- if (length(x) == 1) {
24
- as.character(x)
25
- } else if (!newline) {
26
- paste0("c(", paste(x, collapse = paste0(", ")), ")")
27
- } else {
28
- paste0(
29
- "c(\n",
30
- paste0(lapply(x, function(y) paste0(" ", y)), collapse = ",\n"),
31
- "\n)"
32
- )
33
- }
34
- }
35
-
36
- repr.character <- function(x, newline = FALSE, ...) {
37
- if (length(x) == 1) {
38
- paste0("\"", x, "\"")
39
- } else if (!newline) {
40
- paste0("c(", paste0(lapply(x, function(y) sQuote(y, q = FALSE)), collapse = ", "), ")")
41
- } else {
42
- paste0(
43
- "c(\n",
44
- paste0(lapply(x, function(y) paste0(" ", sQuote(y, q = FALSE))), collapse = ",\n"),
45
- "\n)"
46
- )
47
- }
48
- }
49
-
50
- repr.factor <- function(x, newline = FALSE, ...) {
51
- if (!newline) {
52
- paste0(
53
- "factor(", repr(as.character(x), newline, ...), ", levels = ", repr(levels(x), newline, ...), ")"
54
- )
55
- } else if (!newline) {
56
- paste0(
57
- "factor(\n",
58
- paste0(" ", repr(as.character(x), newline, ...), ",\n"),
59
- " levels = ", repr(levels(x), newline, ...), "\n)"
60
- )
61
- } else {
62
- paste0(
63
- "factor(\n",
64
- paste0(" ", repr(as.character(x), newline, ...), ",\n"),
65
- " levels = ", repr(levels(x), newline, ...), "\n)"
66
- )
67
- }
68
- }
69
-
70
- repr.logical <- function(x, newline = FALSE, ...) {
71
- if (length(x) == 1) {
72
- if (x) "TRUE" else "FALSE"
73
- } else if (!newline) {
74
- paste0("c(", paste0(x, collapse = ","), ")")
75
- } else {
76
- paste0(
77
- "c(\n",
78
- paste0(lapply(x, function(y) paste0(" ", y)), collapse = ",\n"),
79
- "\n)"
80
- )
81
- }
82
- }
83
-
84
- repr.list <- function(x, newline = FALSE, ...) {
85
- start <- if (newline) "list(\n" else "list("
86
- end <- if (newline) "\n)" else ")"
87
- sep <- if (newline) ",\n" else ", "
88
- prefix <- if (newline) " " else ""
89
- if (length(names(x)) > 0) {
90
- paste0(
91
- start,
92
- paste0(
93
- lapply(seq_along(x), function(i) {
94
- name <- names(x)[i]
95
- if (identical(name, "")) {
96
- paste0(prefix, repr(x[[i]]))
97
- } else {
98
- paste0(prefix, bQuote(name), " = ", repr(x[[name]]))
99
- }
100
- }), collapse = sep
101
- ),
102
- end
103
- )
104
- } else {
105
- paste0(
106
- start, paste0(lapply(x, repr, newline = newline, ...), collapse = sep), end
107
- )
108
- }
109
- }
110
-
111
- repr.NULL <- function(x, newline = FALSE, ...) {
112
- "NULL"
113
- }
114
-
115
- repr.formula <- function(x, newline = FALSE, ...) {
116
- deparse(x)
117
- }
118
-
119
- repr.data.frame <- function(x, newline = FALSE, ...) {
120
- paste0(
121
- "data.frame(\n",
122
- paste0(
123
- lapply(names(x), function(name) {
124
- paste0(" ", bQuote(name), " = ", repr(x[[name]], newline = newline, ...))
125
- }), collapse = ",\n"
126
- ),
127
- "\n)"
128
- )
129
- }
130
-
131
- repr.environment <- function(x, newline = FALSE, ...) {
132
- if (!is_environment(x)) {
133
- # in case .GlobalEnv is dispatched here
134
- NextMethod()
135
- } else {
136
- nl <- if (newline) "\n" else " "
137
- prefix <- if (newline) " " else ""
138
- paste0(
139
- "rlang::env(", nl, paste0(
140
- lapply(ls(x), function(name) {
141
- paste0(prefix, bQuote(name), " = ", repr(get(name, x), newline = newline, ...))
142
- }), collapse = paste0(",", nl)
143
- ), nl, ")"
144
- )
145
- }
146
- }
biopipen/utils/rnaseq.R DELETED
@@ -1,48 +0,0 @@
1
-
2
- .normUnit = function(unit) {
3
- if ("count" %in% unit) {
4
- return("count")
5
- }
6
- return(unit)
7
- }
8
-
9
- glenFromGFFExons = function(exonfile) {
10
- gff = read.table(exonfile, header = F, row.names = NULL)
11
- # V4: start, V5: end, V10: gene name
12
- glen = aggregate(V5-V4+1 ~ V10, gff, sum)
13
- genes = glen[,1]
14
- glen = glen[,-1,drop=TRUE]
15
- names(glen) = genes
16
- return(glen)
17
- }
18
-
19
- count2tpm = function(x, args) {
20
- if (is.null(args$genelen)) {
21
- stop("Gene lengths are required to convert count to TPM.")
22
- }
23
- glengenes = names(args$genelen)
24
- mygenes = rownames(x)
25
- missing = setdiff(mygenes, glengenes)
26
- warning(paste(length(missing), "gene cannot be found in gene length data"))
27
- warning(paste(missing, sep=", "))
28
-
29
- genes = intersect(mygenes, glengenes)
30
- x = x[genes, , drop=FALSE]
31
-
32
- # see: https://gist.github.com/slowkow/c6ab0348747f86e2748b
33
- # and https://support.bioconductor.org/p/91218/
34
- out = x / unlist(args$genelen[genes])
35
- out = t(t(out) * 1e6 / colSums(out))
36
- rownames(out) = genes
37
- colnames(out) = colnames(x)
38
-
39
- return(out)
40
- }
41
-
42
-
43
- unit_conversion = function(x, inunit, outunit, args=list()) {
44
- inunit = .normUnit(inunit)
45
- outunit = .normUnit(outunit)
46
- func = get(paste0(inunit, "2", outunit))
47
- func(x, args)
48
- }
@@ -1,207 +0,0 @@
1
- suppressPackageStartupMessages(library(rlang))
2
- suppressPackageStartupMessages(library(dplyr))
3
- suppressPackageStartupMessages(library(tidyr))
4
- try(suppressPackageStartupMessages(library(immunarch)))
5
-
6
- #' Expand a Immunarch object into cell-level
7
- #'
8
- #' Here is how the data is expanded:
9
- #' 1. Expand `$data` by Barcode (other columns are copied)
10
- #' 2. Add sample to `Sample` column
11
- #' 3. Add columns from `$meta`
12
- #'
13
- #' @param immdata Immunarch object
14
- #' @return A data frame
15
- #'
16
- #' @example
17
- #' immunarch::immdata$data$MS1 |> head()
18
- #' # Clones Proportion CDR3.nt CDR3.aa V.name D.name J.name V.end D.start D.end J.start VJ.ins VD.ins DJ.ins Sequence
19
- #' # <dbl> <dbl> <chr> <chr> <chr> <chr> <chr> <int> <int> <int> <int> <dbl> <dbl> <dbl> <lgl>
20
- #' # 1 539 0.0634 TGTGCCAGCAGCTTACA… CASSLQ… TRBV7… TRBD2 TRBJ2… 14 18 26 29 -1 3 2 NA
21
- #' # 2 320 0.0376 TGTGCCAGCAGCGTGTA… CASSVY… TRBV9 TRBD1 TRBJ2… 13 20 22 29 -1 6 6 NA
22
- #' immunarch::immdata$meta |> head()
23
- #' # Sample ID Sex Age Status Lane
24
- #' # <chr> <chr> <chr> <dbl> <chr> <chr>
25
- #' # 1 A2-i129 C1 M 11 C A
26
- #' # 2 A2-i131 C2 M 9 C A
27
- #' # 3 A2-i133 C4 M 16 C A
28
- #' # 4 A2-i132 C3 F 6 C A
29
- #' # 5 A4-i191 C8 F 22 C B
30
- #' # 6 A4-i192 C9 F 24 C B
31
- #'
32
- #' @export
33
- expand_immdata <- function(immdata, cell_id = "Barcode") {
34
- if (!cell_id %in% colnames(immdata$data[[1]])) {
35
- stop(paste0("cell_id '", cell_id, "' not found in data"))
36
- }
37
- do.call(rbind, lapply(names(immdata$data), function(name) {
38
- # Split barcodes
39
- dat <- immdata$data[[name]] %>% separate_rows(!!sym(cell_id), sep = ";")
40
- dat$Sample <- name
41
- dat <- dat %>% left_join(immdata$meta, by = "Sample", suffix = c("_data", ""))
42
-
43
- dat
44
- }))
45
- }
46
-
47
- #' Filter expanded immdata
48
- #'
49
- #' @param exdata Expanded immdata
50
- #' @param filters Filters
51
- #' @return Filtered data
52
- #'
53
- #' @export
54
- filter_expanded_immdata <- function(exdata, filters, update_clones = FALSE) {
55
- if (length(filters) == 0) {
56
- return(exdata)
57
- }
58
- out <- exdata %>% dplyr::filter(!!parse_expr(filters))
59
- if (update_clones) {
60
- out <- out %>%
61
- group_by(Sample, CDR3.aa) %>%
62
- mutate(Clones = n()) %>%
63
- ungroup() %>%
64
- group_by(Sample) %>%
65
- mutate(Proportion = Clones / n()) %>%
66
- ungroup() %>%
67
- arrange(Sample, desc(Clones))
68
- }
69
- out
70
- }
71
-
72
- #' Convert expanded immdata to Immunarch object
73
- #'
74
- #' @param exdata Expanded immdata
75
- #' @param metacols Columns to be added to `$meta`
76
- #' @return Immunarch object
77
- #'
78
- #' @export
79
- immdata_from_expanded <- function(
80
- exdata,
81
- metacols = NULL,
82
- cell_id = "Barcode",
83
- update_clones = TRUE
84
- ) {
85
- if (is.null(metacols)) {
86
- metacols = setdiff(colnames(exdata), c(
87
- "Clones", "Proportion", "CDR3.nt", "CDR3.aa", "V.name", "D.name", "J.name",
88
- "V.end", "D.start", "D.end", "J.start", "VJ.ins", "VD.ins", "DJ.ins",
89
- "Sequence", "chain", "Barcode", "raw_clonotype_id", "ContigID", "C.name",
90
- "CDR1.nt", "CDR2.nt", "CDR1.aa", "CDR2.aa", "FR1.nt", "FR2.nt", "FR3.nt",
91
- "FR4.nt", "FR1.aa", "FR2.aa", "FR3.aa", "FR4.aa"
92
- ))
93
- }
94
- out <- list(meta = exdata[, metacols, drop = FALSE])
95
- out$meta <- out$meta[!duplicated(out$meta$Sample), , drop = FALSE]
96
- out$data <- lapply(
97
- split(
98
- exdata[, setdiff(colnames(exdata), metacols), drop = FALSE],
99
- exdata$Sample
100
- ),
101
- function(dat) {
102
- ncells <- nrow(dat)
103
- dat_cols <- setdiff(colnames(dat), c("Clones", "Proportion", cell_id))
104
- dat %>% group_by(CDR3.aa) %>%
105
- summarise(
106
- Clones = ifelse(update_clones, n(), first(Clones)),
107
- Proportion = ifelse(update_clones, n() / ncells, first(Proportion)),
108
- !!sym(cell_id) := paste0(!!sym(cell_id), collapse = ";"),
109
- !!!parse_exprs(sapply(dat_cols, function(x) paste0('first(`', x, '`)'))),
110
- .groups = "drop"
111
- ) %>%
112
- arrange(desc(Clones))
113
- }
114
- )
115
- out
116
- }
117
-
118
- #' Convert Seurat object to Anndata
119
- #'
120
- #' @param sobjfile Seurat object file
121
- #' @param outfile Output file
122
- #' @param assay Assay to be used
123
- #'
124
- #' @export
125
- seurat_to_anndata <- function(sobjfile, outfile, assay = NULL, log_info, tmpdir = NULL, log_indent = "") {
126
- library(Seurat)
127
- library(SeuratDisk)
128
- library(hdf5r)
129
- if (endsWith(sobjfile, ".rds") || endsWith(sobjfile, ".RDS")) {
130
- library(digest)
131
-
132
- dig <- digest::digest(sobjfile, algo = "md5")
133
- dig <- substr(dig, 1, 8)
134
- assay_name <- ifelse(is.null(assay), "", paste0("_", assay))
135
- tmpdir <- tmpdir %||% dirname(outfile)
136
- dir.create(tmpdir, showWarnings = FALSE)
137
- h5seurat_file <- file.path(
138
- tmpdir,
139
- paste0(
140
- tools::file_path_sans_ext(basename(outfile)),
141
- assay_name, ".", dig, ".h5seurat"
142
- )
143
- )
144
- if (file.exists(h5seurat_file) &&
145
- (file.mtime(h5seurat_file) < file.mtime(sobjfile))) {
146
- file.remove(h5seurat_file)
147
- }
148
- if (!file.exists(h5seurat_file)) {
149
- log_info("{log_indent}Reading RDS file ...")
150
- sobj <- readRDS(sobjfile)
151
- assay <- assay %||% DefaultAssay(sobj)
152
- # In order to convert to h5ad
153
- # https://github.com/satijalab/seurat/issues/8220#issuecomment-1871874649
154
- sobj$RNAv3 <- as(object = sobj[[assay]], Class = "Assay")
155
- DefaultAssay(sobj) <- "RNAv3"
156
- sobj$RNA <- NULL
157
- sobj <- RenameAssays(sobj, RNAv3 = "RNA")
158
-
159
- log_info("{log_indent}Saving to H5Seurat file ...")
160
- SaveH5Seurat(sobj, h5seurat_file)
161
- rm(sobj)
162
- gc()
163
- sobjfile <- h5seurat_file
164
- } else {
165
- log_info("{log_indent}Using existing H5Seurat file ...")
166
- }
167
- }
168
-
169
- if (!endsWith(sobjfile, ".h5seurat")) {
170
- stop(paste0("Unknown input file format: ",
171
- tools::file_ext(sobjfile),
172
- ". Supported formats: .rds, .RDS, .h5seurat"))
173
- }
174
-
175
- log_info("{log_indent}Converting to Anndata ...")
176
- Convert(sobjfile, dest = outfile, assay = assay %||% "RNA", overwrite = TRUE)
177
-
178
- log_info("{log_indent}Fixing categorical data ...")
179
- # See: https://github.com/mojaveazure/seurat-disk/issues/183
180
- H5.create_reference <- function(self, ...) {
181
- space <- self$get_space()
182
- do.call("[", c(list(space), list(...)))
183
- ref_type <- hdf5r::h5const$H5R_OBJECT
184
- ref_obj <- hdf5r::H5R_OBJECT$new(1, self)
185
- res <- .Call("R_H5Rcreate", ref_obj$ref, self$id, ".", ref_type,
186
- space$id, FALSE, PACKAGE = "hdf5r")
187
- if (res$return_val < 0) {
188
- stop("Error creating object reference")
189
- }
190
- ref_obj$ref <- res$ref
191
- return(ref_obj)
192
- }
193
-
194
- h5ad <- H5File$new(outfile, "r+")
195
- cats <- names(h5ad[["obs/__categories"]])
196
- for (cat in cats) {
197
- catname <- paste0("obs/__categories/", cat)
198
- obsname <- paste0("obs/", cat)
199
- ref <- H5.create_reference(h5ad[[catname]])
200
- h5ad[[obsname]]$create_attr(
201
- attr_name = "categories",
202
- robj = ref,
203
- space = H5S$new(type = "scalar")
204
- )
205
- }
206
- h5ad$close()
207
- }