biopipen 0.33.1__py3-none-any.whl → 0.34.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (150) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/filters.py +10 -183
  3. biopipen/core/proc.py +5 -3
  4. biopipen/core/testing.py +8 -1
  5. biopipen/ns/bam.py +40 -4
  6. biopipen/ns/cnv.py +1 -1
  7. biopipen/ns/cnvkit.py +1 -1
  8. biopipen/ns/delim.py +1 -1
  9. biopipen/ns/gsea.py +63 -37
  10. biopipen/ns/misc.py +38 -0
  11. biopipen/ns/plot.py +8 -0
  12. biopipen/ns/scrna.py +328 -292
  13. biopipen/ns/scrna_metabolic_landscape.py +207 -366
  14. biopipen/ns/tcr.py +165 -97
  15. biopipen/reports/bam/CNVpytor.svelte +4 -9
  16. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  17. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  18. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  19. biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
  20. biopipen/reports/scrna/DimPlots.svelte +1 -1
  21. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
  22. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
  23. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
  24. biopipen/reports/snp/PlinkCallRate.svelte +2 -2
  25. biopipen/reports/snp/PlinkFreq.svelte +1 -1
  26. biopipen/reports/snp/PlinkHWE.svelte +1 -1
  27. biopipen/reports/snp/PlinkHet.svelte +1 -1
  28. biopipen/reports/snp/PlinkIBD.svelte +1 -1
  29. biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
  30. biopipen/scripts/bam/CNAClinic.R +41 -6
  31. biopipen/scripts/bam/CNVpytor.py +2 -1
  32. biopipen/scripts/bam/ControlFREEC.py +2 -3
  33. biopipen/scripts/bam/SamtoolsView.py +33 -0
  34. biopipen/scripts/cnv/AneuploidyScore.R +25 -13
  35. biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
  36. biopipen/scripts/cnv/TMADScore.R +4 -4
  37. biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
  38. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
  39. biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
  40. biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
  41. biopipen/scripts/delim/RowsBinder.R +1 -1
  42. biopipen/scripts/delim/SampleInfo.R +4 -1
  43. biopipen/scripts/gene/GeneNameConversion.R +14 -12
  44. biopipen/scripts/gsea/Enrichr.R +2 -2
  45. biopipen/scripts/gsea/FGSEA.R +184 -50
  46. biopipen/scripts/gsea/PreRank.R +3 -3
  47. biopipen/scripts/misc/Plot.R +80 -0
  48. biopipen/scripts/plot/VennDiagram.R +2 -2
  49. biopipen/scripts/protein/ProdigySummary.R +34 -27
  50. biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
  51. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
  52. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
  53. biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
  54. biopipen/scripts/regulatory/motifs-common.R +10 -9
  55. biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
  56. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
  57. biopipen/scripts/rnaseq/Simulation.R +0 -2
  58. biopipen/scripts/rnaseq/UnitConversion.R +6 -5
  59. biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
  60. biopipen/scripts/scrna/CellCellCommunication.py +1 -1
  61. biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
  62. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
  63. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
  64. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
  65. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
  66. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
  67. biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
  68. biopipen/scripts/scrna/CellsDistribution.R +1 -1
  69. biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
  70. biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
  71. biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
  72. biopipen/scripts/scrna/MarkersFinder.R +481 -215
  73. biopipen/scripts/scrna/MetaMarkers.R +3 -3
  74. biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
  75. biopipen/scripts/scrna/RadarPlots.R +1 -1
  76. biopipen/scripts/scrna/ScFGSEA.R +231 -76
  77. biopipen/scripts/scrna/ScSimulation.R +11 -10
  78. biopipen/scripts/scrna/ScVelo.py +605 -0
  79. biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
  80. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
  81. biopipen/scripts/scrna/SeuratClusterStats-features.R +43 -30
  82. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
  83. biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
  84. biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
  85. biopipen/scripts/scrna/SeuratClustering.R +31 -48
  86. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  87. biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
  88. biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
  89. biopipen/scripts/scrna/SeuratPreparing.R +76 -24
  90. biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
  91. biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
  92. biopipen/scripts/scrna/Subset10X.R +2 -2
  93. biopipen/scripts/scrna/TopExpressingGenes.R +144 -185
  94. biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
  95. biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
  96. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
  97. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
  98. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
  99. biopipen/scripts/snp/MatrixEQTL.R +39 -20
  100. biopipen/scripts/snp/PlinkCallRate.R +43 -34
  101. biopipen/scripts/snp/PlinkFreq.R +34 -41
  102. biopipen/scripts/snp/PlinkHWE.R +23 -18
  103. biopipen/scripts/snp/PlinkHet.R +26 -22
  104. biopipen/scripts/snp/PlinkIBD.R +30 -34
  105. biopipen/scripts/stats/ChowTest.R +9 -8
  106. biopipen/scripts/stats/DiffCoexpr.R +13 -11
  107. biopipen/scripts/stats/LiquidAssoc.R +7 -8
  108. biopipen/scripts/stats/Mediation.R +8 -8
  109. biopipen/scripts/stats/MetaPvalue.R +11 -13
  110. biopipen/scripts/stats/MetaPvalue1.R +6 -5
  111. biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
  112. biopipen/scripts/tcr/ClonalStats.R +6 -5
  113. biopipen/scripts/tcr/CloneResidency.R +3 -3
  114. biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
  115. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  116. biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
  117. biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
  118. biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
  119. biopipen/scripts/tcr/ScRepLoading.R +114 -92
  120. biopipen/scripts/tcr/TCRClusterStats.R +2 -2
  121. biopipen/scripts/tcr/TCRClustering.R +86 -97
  122. biopipen/scripts/tcr/TESSA.R +65 -115
  123. biopipen/scripts/tcr/VJUsage.R +5 -5
  124. biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
  125. biopipen/utils/common_docstrs.py +66 -63
  126. biopipen/utils/reporter.py +177 -0
  127. {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/METADATA +2 -1
  128. {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/RECORD +130 -145
  129. {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/WHEEL +1 -1
  130. biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
  131. biopipen/reports/scrna/ScFGSEA.svelte +0 -16
  132. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
  133. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
  134. biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
  135. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
  136. biopipen/reports/utils/gsea.liq +0 -110
  137. biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
  138. biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
  139. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
  140. biopipen/utils/caching.R +0 -44
  141. biopipen/utils/gene.R +0 -95
  142. biopipen/utils/gsea.R +0 -329
  143. biopipen/utils/io.R +0 -20
  144. biopipen/utils/misc.R +0 -602
  145. biopipen/utils/mutate_helpers.R +0 -581
  146. biopipen/utils/plot.R +0 -209
  147. biopipen/utils/repr.R +0 -146
  148. biopipen/utils/rnaseq.R +0 -48
  149. biopipen/utils/single_cell.R +0 -207
  150. {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/entry_points.txt +0 -0
biopipen/utils/gsea.R DELETED
@@ -1,329 +0,0 @@
1
- library(ggplot2)
2
- library(dplyr)
3
- library(tibble)
4
-
5
- if (!exists("slugify")) {
6
- slugify <- function(x, non_alphanum_replace="-", collapse_replace=TRUE, tolower=FALSE) {
7
- subs <- list(
8
- "š"="s", "œ"="oe", "ž"="z", "ß"="ss", "þ"="y", "à"="a", "á"="a", "â"="a",
9
- "ã"="a", "ä"="a", "å"="a", "æ"="ae", "ç"="c", "è"="e", "é"="e", "ê"="e",
10
- "ë"="e", "ì"="i", "í"="i", "î"="i", "ï"="i", "ð"="d", "ñ"="n", "ò"="o",
11
- "ó"="o", "ô"="o", "õ"="o", "ö"="o", "ø"="oe", "ù"="u", "ú"="u", "û"="u",
12
- "ü"="u", "ý"="y", "ÿ"="y", "ğ"="g", "ı"="i", "ij"="ij", "ľ"="l", "ň"="n",
13
- "ř"="r", "ş"="s", "ť"="t", "ų"="u", "ů"="u", "ý"="y", "ź"="z", "ż"="z",
14
- "ſ"="s", "α"="a", "β"="b", "γ"="g", "δ"="d", "ε"="e", "ζ"="z", "η"="h",
15
- "θ"="th", "ι"="i", "κ"="k", "λ"="l", "μ"="m", "ν"="n", "ξ"="x", "ο"="o",
16
- "π"="p", "ρ"="r", "σ"="s", "τ"="t", "υ"="u", "φ"="ph", "χ"="ch", "ψ"="ps",
17
- "ω"="o", "ά"="a", "έ"="e", "ή"="h", "ί"="i", "ό"="o", "ύ"="u", "ώ"="o",
18
- "ϐ"="b", "ϑ"="th", "ϒ"="y", "ϕ"="ph", "ϖ"="p", "Ϛ"="st", "ϛ"="st", "Ϝ"="f",
19
- "ϝ"="f", "Ϟ"="k", "ϟ"="k", "Ϡ"="k", "ϡ"="k", "ϰ"="k", "ϱ"="r", "ϲ"="s",
20
- "ϳ"="j", "ϴ"="th", "ϵ"="e", "϶"="p"
21
- )
22
- # replace latin and greek characters to the closest english character
23
- for (k in names(subs)) {
24
- x <- gsub(k, subs[[k]], x)
25
- }
26
- x <- gsub("[^[:alnum:]_]", non_alphanum_replace, x)
27
- if(collapse_replace) x <- gsub(
28
- paste0(gsub("([][{}()+*^$|\\\\?.])", "\\\\\\1", non_alphanum_replace), "+"),
29
- non_alphanum_replace,
30
- x
31
- )
32
- if(tolower) x <- tolower(x)
33
- x
34
- }
35
- }
36
-
37
- #' Download the GMT file and save it to cachedir
38
- #' Return the path to the GMT file
39
- #' We also check if the second column is shorter than the first column.
40
- #' If so, we switch the first and second columns.
41
- #' In case some providers provide the GMT file with the first and second columns switched.
42
- #' We also replace the "/" in the gene names with "-" if any. This is because the "/" is
43
- #' not allowed in a path, but GSEA uses the gene names as the file name.
44
- #'
45
- #' @param gmturl The URL or path of the GMT file
46
- #' @param cachedir The directory to save the GMT file
47
- #' @return The path to the GMT file
48
- localizeGmtfile <- function(gmturl, cachedir = tempdir()) {
49
- # Download the GMT file and save it to cachedir
50
- # Return the path to the GMT file
51
- in_gmtfile <- out_gmtfile <- file.path(cachedir, basename(gmturl))
52
- if (startsWith(gmturl, "http") || startsWith(gmturl, "ftp")) {
53
- download.file(gmturl, in_gmtfile)
54
- remote <- TRUE
55
- } else {
56
- in_gmtfile <- gmturl
57
- remote <- FALSE
58
- }
59
-
60
- items <- readLines(in_gmtfile)
61
- items <- items[!grepl("^#", items) & nchar(items) > 0]
62
- items <- lapply(strsplit(items, "\t"), function(x) c(x[1:2], paste0(x[3:length(x)], collapse = "\t")))
63
- items <- as.data.frame(t(as.data.frame(items)))
64
- rownames(items) <- NULL
65
- colnames(items) <- c("V1", "V2", "V3")
66
-
67
- if (ncol(items) < 3) {
68
- stop(paste0("Invalid GMT file: ", gmturl))
69
- }
70
- if (nrow(items) == 0) {
71
- stop(paste0("Empty GMT file: ", gmturl))
72
- }
73
-
74
- # Check if the second column is shorter than the first column
75
- nchars1 <- sum(nchar(items$V1))
76
- nchars2 <- sum(nchar(items$V2))
77
- prefix <- gsub("[0-9]+$", "", items$V2[1])
78
-
79
- if (is.character(items$V2) && # Only when V2 is character, as pathway names
80
- nchars2 < nchars1 && # Only when V2 is shorter than V1
81
- all(nchar(items$V2) > 0) && # Only when V2 is not empty
82
- !all(grepl("^[0-9]+$", items$V2)) && # Only when V2 is not all numbers
83
- (nchar(prefix) == 0 || !all(startsWith(items$V2, prefix))) # Only when they are not like hsa00001, hsa00002, etc.
84
- ) {
85
- warning(paste0(
86
- "The second column is shorter, switching the first and second columns in ",
87
- "GMT file ", gmturl
88
- ))
89
- items <- items[, c(2, 1, 3:ncol(items))]
90
- switched <- TRUE
91
- } else {
92
- switched <- FALSE
93
- }
94
-
95
- if (any(grepl("/", items$V1))) {
96
- items$V1 <- gsub("/", "-", items$V1)
97
- replaced <- TRUE
98
- } else {
99
- replaced <- FALSE
100
- }
101
-
102
- if (remote || switched || replaced) {
103
- write.table(
104
- items,
105
- out_gmtfile,
106
- row.names = FALSE,
107
- col.names = FALSE,
108
- sep = "\t",
109
- quote = FALSE
110
- )
111
- } else {
112
- out_gmtfile <- in_gmtfile
113
- }
114
-
115
- return(out_gmtfile)
116
- }
117
-
118
-
119
- prerank <- function(
120
- exprdata,
121
- pos,
122
- neg,
123
- classes, # must be in the order of colnames(exprdata)
124
- method = "signal_to_noise"
125
- ) {
126
- library(matrixStats)
127
- set.seed(8525)
128
- # See: https://gseapy.readthedocs.io/en/latest/_modules/gseapy/algorithm.html#ranking_metric
129
- expr_pos_mean = rowMeans(exprdata[, classes == pos, drop=F], na.rm=TRUE)
130
- expr_neg_mean = rowMeans(exprdata[, classes == neg, drop=F], na.rm=TRUE)
131
- expr_pos_std = rowSds(as.matrix(exprdata[, classes == pos, drop=F]), na.rm=TRUE, useNames = T)
132
- expr_neg_std = rowSds(as.matrix(exprdata[, classes == neg, drop=F]), na.rm=TRUE, useNames = T)
133
- rands = rnorm(length(expr_neg_std)) * 1e-6
134
-
135
- if (method %in% c("s2n", "signal_to_noise")) {
136
- out = (expr_pos_mean - expr_neg_mean) / (expr_pos_std + expr_neg_std + rands)
137
- } else if (method %in% c("abs_s2n", "abs_signal_to_noise")) {
138
- out = abs((expr_pos_mean - expr_neg_mean) / (expr_pos_std + expr_neg_std + rands))
139
- } else if (method == "t_test") {
140
- # ser = (df_mean[pos] - df_mean[neg])/ np.sqrt(df_std[pos]**2/len(df_std)+df_std[neg]**2/len(df_std) )
141
- out = (expr_pos_mean - expr_neg_mean) / sqrt(
142
- expr_pos_std ^ 2 / length(expr_pos_std) +
143
- expr_neg_std ^ 2 / length(expr_neg_std)
144
- )
145
- } else if (method == "ratio_of_classes") {
146
- out = expr_pos_mean / expr_neg_mean
147
- } else if (method == "diff_of_classes") {
148
- out = expr_pos_mean - expr_neg_mean
149
- } else if (method == "log2_ratio_of_classes") {
150
- out = log2(expr_pos_mean) - log2(expr_neg_mean)
151
- } else {
152
- stop(paste("Unknown method:", method))
153
- }
154
- # todo: log2fc * -log10(p)
155
- # see https://github.com/crazyhottommy/RNA-seq-analysis/blob/master/GSEA_explained.md#2-using-a-pre-ranked-gene-list
156
- out = as.data.frame(out) %>% rownames_to_column("Gene") %>% arrange(.[[2]])
157
- colnames(out)[2] = paste(pos, "vs", neg, sep="_")
158
- return(out)
159
- }
160
-
161
- runEnrichr = function(
162
- genes,
163
- dbs,
164
- outdir,
165
- showTerms = 20,
166
- numChar =40,
167
- orderBy = "P.value"
168
- ) {
169
- library(enrichR)
170
- setEnrichrSite("Enrichr") # Human genes
171
-
172
- enriched = enrichr(genes, dbs)
173
-
174
- for (db in dbs) {
175
- enr = enriched[[db]] %>% select(-c(Old.P.value, Old.Adjusted.P.value))
176
- outtable = file.path(outdir, paste0("Enrichr_", db, ".txt"))
177
- outfig = file.path(outdir, paste0("Enrichr_", db, ".png"))
178
- write.table(enr, outtable, row.names=T, col.names=F, sep="\t", quote=F)
179
-
180
- if (nrow(enr) == 0) {
181
- print(paste0("No enriched terms for ", db))
182
- next
183
- }
184
-
185
- png(outfig, res=100, height=1000, width=1400)
186
- print(
187
- plotEnrich(
188
- enriched[[db]],
189
- showTerms=showTerms,
190
- numChar=numChar,
191
- orderBy=orderBy
192
- )
193
- )
194
- dev.off()
195
- }
196
-
197
- }
198
-
199
- runFGSEA = function(
200
- ranks,
201
- gmtfile,
202
- top,
203
- outdir,
204
- envs = list(),
205
- plot = TRUE # only generate fgsea.txt?
206
- ) {
207
- library(data.table)
208
- library(fgsea)
209
- set.seed(8525)
210
-
211
- if (is.data.frame(ranks)) {
212
- ranks = setNames(ranks[[2]], ranks[[1]])
213
- } else if (is.list(ranks)) {
214
- ranks = unlist(ranks)
215
- }
216
-
217
- gmtfile = localizeGmtfile(gmtfile)
218
- envs$pathways = gmtPathways(gmtfile)
219
- envs$stats = ranks
220
- gsea_res = do.call(fgsea::fgsea, envs)
221
- gsea_res = gsea_res[order(pval), ]
222
-
223
- write.table(
224
- gsea_res %>%
225
- mutate(leadingEdge = sapply(leadingEdge, function(x) paste(x, collapse=",")),
226
- slug = sapply(pathway, slugify)),
227
- file = file.path(outdir, "fgsea.txt"),
228
- row.names = FALSE,
229
- col.names = TRUE,
230
- sep = "\t",
231
- quote = FALSE
232
- )
233
-
234
- if (!plot) {return (NULL)}
235
-
236
- if (top > 1) {
237
- topPathways = head(gsea_res, n=top)[, "pathway"]
238
- } else {
239
- topPathways = gsea_res[padj < top][, "pathway"]
240
- }
241
- topPathways = unlist(topPathways)
242
-
243
- p <- plotGseaTable(
244
- envs$pathways[topPathways],
245
- ranks,
246
- gsea_res,
247
- gseaParam = if (!is.null(envs$gseaParam)) envs$gseaParam else 1
248
- )
249
-
250
- tablefig = file.path(outdir, "gsea_table.png")
251
- png(tablefig, res=100, width=1000, height=200 + 40 * length(topPathways))
252
- print(p)
253
- dev.off()
254
-
255
- tablefig_pdf = file.path(outdir, "gsea_table.pdf")
256
- pdf(tablefig_pdf, width=10, height=2 + 0.4 * length(topPathways))
257
- print(p)
258
- dev.off()
259
-
260
- for (pathway in topPathways) {
261
- enrfig = file.path(outdir, paste0("fgsea_", slugify(pathway), ".png"))
262
- p <- plotEnrichment(
263
- envs$pathways[[pathway]],
264
- ranks,
265
- gseaParam = if (!is.null(envs$gseaParam)) envs$gseaParam else 1
266
- ) + labs(title = pathway)
267
-
268
- png(enrfig, res=100, width=1000, height=800)
269
- print(p)
270
- dev.off()
271
-
272
- enrfig_pdf = gsub(".png$", ".pdf", enrfig)
273
- pdf(enrfig_pdf, width=10, height=8)
274
- print(p)
275
- dev.off()
276
- }
277
- }
278
-
279
- runGSEA = function(
280
- indata, # expression data
281
- classes, # sample classes
282
- gmtfile, # the GMT file
283
- outdir,
284
- envs = list() # other arguments for GSEA()
285
- ) {
286
- library(GSEA)
287
- # reproducibility
288
- if (is.null(envs$random.seed)) {
289
- envs$random.seed <- 8525
290
- }
291
-
292
- # prepare gct file
293
- gctfile = file.path(outdir, "gsea.gct")
294
- con = file(gctfile, open='w')
295
- write("#1.2", con)
296
- write(paste(dim(indata), collapse = "\t"), con)
297
- close(con)
298
- indata = indata %>%
299
- as.data.frame() %>%
300
- mutate(Description = "na") %>%
301
- rownames_to_column("NAME") %>%
302
- select(NAME, Description, everything())
303
-
304
- write.table(
305
- indata,
306
- gctfile,
307
- row.names = F,
308
- col.names = T,
309
- sep="\t",
310
- quote=F,
311
- append = T
312
- )
313
-
314
- # prepare cls file
315
- clsfile = file.path(outdir, "gsea.cls")
316
- uniclasses = unique(classes)
317
- con = file(clsfile, open='w')
318
- write(paste(length(classes), length(uniclasses), '1'), con)
319
- write(paste('#', paste(uniclasses, collapse=" ")), con)
320
- write(paste(classes, collapse=" "), con)
321
- close(con)
322
-
323
- envs$input.ds = gctfile
324
- envs$input.cls = clsfile
325
- envs$gs.db = localizeGmtfile(gmtfile)
326
- envs$output.directory = outdir
327
-
328
- do.call(GSEA, envs)
329
- }
biopipen/utils/io.R DELETED
@@ -1,20 +0,0 @@
1
- read.table.opts = function(file, opts) {
2
- rncol = NULL
3
- if (!is.null(opts$row.names) && opts$row.names < 0) {
4
- rncol = -opts$row.names
5
- opts$row.names = NULL
6
- opts = c(opts, list(row.names=NULL))
7
- }
8
- if (endsWith(file, ".gz")) {
9
- opts$file = gzfile(file)
10
- } else {
11
- opts$file = file
12
- }
13
- out = do.call(read.table, opts)
14
- if (!is.null(rncol)) {
15
- rnames = make.unique(out[, rncol])
16
- out = out[, -rncol, drop=F]
17
- rownames(out) = rnames
18
- }
19
- return (out)
20
- }