@sjcrh/proteinpaint-server 2.113.0 → 2.115.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +7 -6
- package/routes/correlationVolcano.js +4 -1
- package/routes/samplewsimages.js +23 -35
- package/routes/termdb.cluster.js +182 -74
- package/routes/termdb.config.js +2 -0
- package/routes/termdb.topMutatedGenes.js +51 -0
- package/routes/wsisamples.js +74 -0
- package/src/app.js +834 -780
- package/src/serverconfig.js +1 -1
- package/utils/edge.R +202 -202
- package/utils/regression.utils.R +2 -1
- package/routes/gdc.topMutatedGenes.js +0 -275
package/src/serverconfig.js
CHANGED
|
@@ -126,7 +126,7 @@ if (serverconfig.debugmode && !serverconfig.binpath.includes('sjcrh/')) {
|
|
|
126
126
|
const routeSetters = []
|
|
127
127
|
const defaultDir = path.join(serverconfig.binpath, 'src/test/routes')
|
|
128
128
|
// will add testing routes as needed and if found, such as in dev environment
|
|
129
|
-
const testRouteSetters = ['gdc.js', 'specs.js', 'readme.js', '
|
|
129
|
+
const testRouteSetters = ['gdc.js', 'specs.js', 'readme.js', 'coverage.js']
|
|
130
130
|
if (serverconfig.features.sse === undefined) serverconfig.features.sse = true
|
|
131
131
|
if (typeof serverconfig.features.sse !== 'boolean') {
|
|
132
132
|
throw `serverconfig.features.sse must be either undefined or boolean`
|
package/utils/edge.R
CHANGED
|
@@ -2,70 +2,70 @@
|
|
|
2
2
|
|
|
3
3
|
# Load required packages
|
|
4
4
|
suppressWarnings({
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
5
|
+
library(jsonlite)
|
|
6
|
+
library(rhdf5)
|
|
7
|
+
library(stringr)
|
|
8
|
+
library(readr)
|
|
9
|
+
suppressPackageStartupMessages(library(edgeR))
|
|
10
|
+
suppressPackageStartupMessages(library(dplyr))
|
|
11
11
|
})
|
|
12
12
|
|
|
13
13
|
# Filter based on CPM
|
|
14
14
|
filter_using_cpm <- function(y, gene_cpm_cutoff, sample_cpm_cutoff, count_cpm_cutoff) {
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
15
|
+
selr <- rowSums(cpm(y$counts)>gene_cpm_cutoff)>=sample_cpm_cutoff
|
|
16
|
+
selc <- colSums(cpm(y$counts))>=count_cpm_cutoff
|
|
17
|
+
y <- y[selr, selc]
|
|
18
18
|
}
|
|
19
19
|
|
|
20
20
|
# Read JSON input from stdin
|
|
21
21
|
read_json_time <- system.time({
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
22
|
+
con <- file("stdin", "r")
|
|
23
|
+
json <- readLines(con, warn=FALSE)
|
|
24
|
+
close(con)
|
|
25
|
+
input <- fromJSON(json)
|
|
26
|
+
cases <- unlist(strsplit(input$case, ","))
|
|
27
|
+
controls <- unlist(strsplit(input$control, ","))
|
|
28
|
+
combined <- c("geneID", "geneSymbol", cases, controls)
|
|
29
29
|
})
|
|
30
30
|
#cat("Time to read JSON: ", as.difftime(read_json_time, units = "secs")[3], " seconds\n")
|
|
31
31
|
|
|
32
32
|
# Read counts data
|
|
33
33
|
read_counts_time <- system.time({
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
34
|
+
if (input$storage_type == "HDF5") {
|
|
35
|
+
geneIDs <- h5read(input$input_file, "gene_names")
|
|
36
|
+
geneSymbols <- h5read(input$input_file, "gene_symbols")
|
|
37
|
+
samples <- h5read(input$input_file, "samples")
|
|
38
38
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
39
|
+
# Find indices of case and control samples in the HDF5 file
|
|
40
|
+
case_indices <- match(cases, samples)
|
|
41
|
+
control_indices <- match(controls, samples)
|
|
42
42
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
}
|
|
48
|
-
if (any(is.na(control_indices))) {
|
|
49
|
-
missing_controls <- controls[is.na(control_indices)]
|
|
50
|
-
stop(paste(missing_controls, "not found"))
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
samples_indices <- c(case_indices, control_indices)
|
|
54
|
-
read_counts <- as.data.frame(t(h5read(input$input_file, "counts", index = list(samples_indices, 1:length(geneIDs)))))
|
|
55
|
-
colnames(read_counts) <- c(cases, controls)
|
|
56
|
-
} else if (input$storage_type == "text") {
|
|
57
|
-
suppressWarnings({
|
|
58
|
-
suppressMessages({
|
|
59
|
-
read_counts <- read_tsv(input$input_file, col_names = TRUE, col_select = combined)
|
|
60
|
-
})
|
|
61
|
-
})
|
|
62
|
-
geneIDs <- unlist(read_counts[1])
|
|
63
|
-
geneSymbols <- unlist(read_counts[2])
|
|
64
|
-
read_counts <- select(read_counts, -geneID)
|
|
65
|
-
read_counts <- select(read_counts, -geneSymbol)
|
|
66
|
-
} else {
|
|
67
|
-
stop("Unknown storage type")
|
|
43
|
+
# Check for missing samples
|
|
44
|
+
if (any(is.na(case_indices))) {
|
|
45
|
+
missing_cases <- cases[is.na(case_indices)]
|
|
46
|
+
stop(paste(missing_cases, "not found"))
|
|
68
47
|
}
|
|
48
|
+
if (any(is.na(control_indices))) {
|
|
49
|
+
missing_controls <- controls[is.na(control_indices)]
|
|
50
|
+
stop(paste(missing_controls, "not found"))
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
samples_indices <- c(case_indices, control_indices)
|
|
54
|
+
read_counts <- as.data.frame(t(h5read(input$input_file, "counts", index = list(samples_indices, 1:length(geneIDs)))))
|
|
55
|
+
colnames(read_counts) <- c(cases, controls)
|
|
56
|
+
} else if (input$storage_type == "text") {
|
|
57
|
+
suppressWarnings({
|
|
58
|
+
suppressMessages({
|
|
59
|
+
read_counts <- read_tsv(input$input_file, col_names = TRUE, col_select = combined)
|
|
60
|
+
})
|
|
61
|
+
})
|
|
62
|
+
geneIDs <- unlist(read_counts[1])
|
|
63
|
+
geneSymbols <- unlist(read_counts[2])
|
|
64
|
+
read_counts <- select(read_counts, -geneID)
|
|
65
|
+
read_counts <- select(read_counts, -geneSymbol)
|
|
66
|
+
} else {
|
|
67
|
+
stop("Unknown storage type")
|
|
68
|
+
}
|
|
69
69
|
})
|
|
70
70
|
#cat("Time to read counts data: ", as.difftime(read_counts_time, units = "secs")[3], " seconds\n")
|
|
71
71
|
|
|
@@ -75,210 +75,210 @@ gene_id_symbols <- paste0(geneIDs, "\t", geneSymbols)
|
|
|
75
75
|
|
|
76
76
|
# Create DGEList object
|
|
77
77
|
dge_list_time <- system.time({
|
|
78
|
-
|
|
78
|
+
y <- DGEList(counts = read_counts, group = conditions, genes = gene_id_symbols)
|
|
79
79
|
})
|
|
80
80
|
#cat("Time to generate DGEList: ", as.difftime(dge_list_time, units = "secs")[3], " seconds\n")
|
|
81
81
|
|
|
82
82
|
# Filter and normalize counts
|
|
83
83
|
filter_time <- system.time({
|
|
84
|
-
|
|
84
|
+
keep <- filterByExpr(y, min.count = input$min_count, min.total.count = input$min_total_count)
|
|
85
85
|
})
|
|
86
86
|
#cat("Time to filter by expression: ", as.difftime(filter_time, unit = "secs")[3], " seconds\n")
|
|
87
87
|
|
|
88
88
|
normalization_time <- system.time({
|
|
89
|
-
|
|
90
|
-
|
|
89
|
+
y <- y[keep, keep.lib.sizes = FALSE]
|
|
90
|
+
y <- normLibSizes(y) # Using TMM method for normalization
|
|
91
91
|
})
|
|
92
92
|
#cat("Normalization time: ", as.difftime(normalization_time, units = "secs")[3], " seconds\n")
|
|
93
93
|
|
|
94
94
|
# Cutoffs for cpm, will add them as UI options later
|
|
95
95
|
if (length(samples_indices) > 100) {
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
96
|
+
gene_cpm_cutoff <- 15
|
|
97
|
+
sample_cpm_cutoff <- 30
|
|
98
|
+
count_cpm_cutoff <- 100000
|
|
99
99
|
} else {
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
100
|
+
gene_cpm_cutoff <- 5
|
|
101
|
+
sample_cpm_cutoff <- 15
|
|
102
|
+
count_cpm_cutoff <- 100000
|
|
103
103
|
}
|
|
104
104
|
|
|
105
105
|
filter_using_cpm_time <- system.time({
|
|
106
|
-
|
|
107
|
-
|
|
106
|
+
y <- filter_using_cpm(y, gene_cpm_cutoff, sample_cpm_cutoff, count_cpm_cutoff) # Filtering counts matrix based on gene_cpm_cutoff, sample_cpm_cutoff and count_cpm_cutoff
|
|
107
|
+
})
|
|
108
108
|
#cat("Filter using cpm time: ", as.difftime(filter_using_cpm_time, units = "secs")[3], " seconds\n")
|
|
109
109
|
|
|
110
110
|
# Saving MDS plot image
|
|
111
111
|
|
|
112
112
|
if (dim(read_counts)[1] * dim(read_counts)[2] < as.numeric(input$mds_cutoff)) { # If the dimensions of the read counts matrix is below this threshold, only then the mds image will be generated as its very compute intensive
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
113
|
+
mds_plot_time <- system.time({
|
|
114
|
+
set.seed(as.integer(Sys.time())) # Set the seed according to current time
|
|
115
|
+
cachedir <- input$cachedir # Importing serverconfig.cachedir
|
|
116
|
+
random_number <- runif(1, min = 0, max = 1) # Generating random number
|
|
117
|
+
mds_image_name <- paste0("edgeR_mds_temp_",random_number,".png") # Generating random image name so that simultaneous server side requests do NOT generate the same edgeR file name
|
|
118
|
+
png(filename = paste0(cachedir,"/",mds_image_name), width = 1000, height = 1000, res = 200) # Opening a png device
|
|
119
|
+
par(oma = c(0, 0, 0, 0)) # Creating a margin
|
|
120
|
+
mds_conditions <- c(rep("T", length(cases)), rep("C", length(controls))) # Case samples are labelled "T" and control samples are labelled "C". Single-letter labelling added because otherwise labels get overwritten on each other.
|
|
121
|
+
plotMDS(y, labels = mds_conditions) # Plot the edgeR MDS plot
|
|
122
|
+
# dev.off() # Gives a null device message which breaks JSON. Commenting it out for now, will investigate it later
|
|
123
|
+
})
|
|
124
|
+
#cat("mds plot time: ", as.difftime(mds_plot_time, units = "secs")[3], " seconds\n")
|
|
125
125
|
}
|
|
126
126
|
|
|
127
127
|
# Differential expression analysis
|
|
128
128
|
if (length(input$conf1) == 0) { # No adjustment of confounding factors
|
|
129
|
-
|
|
129
|
+
design <- model.matrix(~conditions) # Based on the protocol defined in section 1.4 of edgeR manual https://bioconductor.org/packages/release/bioc/vignettes/edgeR/inst/doc/edgeRUsersGuide.pdf
|
|
130
130
|
} else { # Adjusting for confounding factors
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
if (length(input$conf2) == 0) { # No adjustment of confounding factor 2
|
|
139
|
-
y$samples <- data.frame(y$samples, conditions = conditions, conf1 = conf1)
|
|
140
|
-
model_gen_time <- system.time({
|
|
141
|
-
design <- model.matrix(~ conditions + conf1, data = y$samples)
|
|
142
|
-
})
|
|
143
|
-
#cat("Time for making design matrix: ", as.difftime(model_gen_time, units = "secs")[3], " seconds\n")
|
|
144
|
-
} else {
|
|
145
|
-
# Check the type of confounding variable 2
|
|
146
|
-
if (input$conf2_mode == "continuous") { # If this is float, the input conf2 vector should be converted into a numeric vector
|
|
147
|
-
conf2 <- as.numeric(input$conf2)
|
|
148
|
-
} else { # When input$conf2_mode == "discrete" keep the vector as string.
|
|
149
|
-
conf2 <- as.factor(input$conf2)
|
|
150
|
-
}
|
|
151
|
-
y$samples <- data.frame(y$samples, conditions = conditions, conf1 = conf1, conf2 = conf2)
|
|
152
|
-
model_gen_time <- system.time({
|
|
153
|
-
design <- model.matrix(~ conditions + conf1 + conf2, data = y$samples)
|
|
154
|
-
})
|
|
155
|
-
#cat("Time for making design matrix: ", as.difftime(model_gen_time, units = "secs")[3], " seconds\n")
|
|
131
|
+
# Check the type of confounding variable
|
|
132
|
+
if (input$conf1_mode == "continuous") { # If this is float, the input conf1 vector should be converted into a numeric vector
|
|
133
|
+
conf1 <- as.numeric(input$conf1)
|
|
134
|
+
} else { # When input$conf1_mode == "discrete" keep the vector as string.
|
|
135
|
+
conf1 <- as.factor(input$conf1)
|
|
136
|
+
}
|
|
156
137
|
|
|
138
|
+
if (length(input$conf2) == 0) { # No adjustment of confounding factor 2
|
|
139
|
+
y$samples <- data.frame(y$samples, conditions = conditions, conf1 = conf1)
|
|
140
|
+
model_gen_time <- system.time({
|
|
141
|
+
design <- model.matrix(~ conditions + conf1, data = y$samples)
|
|
142
|
+
})
|
|
143
|
+
#cat("Time for making design matrix: ", as.difftime(model_gen_time, units = "secs")[3], " seconds\n")
|
|
144
|
+
} else {
|
|
145
|
+
# Check the type of confounding variable 2
|
|
146
|
+
if (input$conf2_mode == "continuous") { # If this is float, the input conf2 vector should be converted into a numeric vector
|
|
147
|
+
conf2 <- as.numeric(input$conf2)
|
|
148
|
+
} else { # When input$conf2_mode == "discrete" keep the vector as string.
|
|
149
|
+
conf2 <- as.factor(input$conf2)
|
|
157
150
|
}
|
|
151
|
+
y$samples <- data.frame(y$samples, conditions = conditions, conf1 = conf1, conf2 = conf2)
|
|
152
|
+
model_gen_time <- system.time({
|
|
153
|
+
design <- model.matrix(~ conditions + conf1 + conf2, data = y$samples)
|
|
154
|
+
})
|
|
155
|
+
#cat("Time for making design matrix: ", as.difftime(model_gen_time, units = "secs")[3], " seconds\n")
|
|
156
|
+
|
|
157
|
+
}
|
|
158
158
|
}
|
|
159
159
|
|
|
160
160
|
DE_method <- input$DE_method
|
|
161
161
|
if (DE_method == "edgeR") {
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
})
|
|
167
|
-
})
|
|
162
|
+
fit_time <- system.time({
|
|
163
|
+
suppressWarnings({
|
|
164
|
+
suppressMessages({
|
|
165
|
+
fit <- glmQLFit(y,design) # The glmQLFit() replaces glmFit() which implements the quasi-likelihood function. This is better able to account for overdispersion as it employs a more lenient approach where variance is not a fixed function of the mean.
|
|
168
166
|
})
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
167
|
+
})
|
|
168
|
+
})
|
|
169
|
+
#cat("QL fit time: ", as.difftime(fit_time, units = "secs")[3], " seconds\n")
|
|
170
|
+
test_time <- system.time({
|
|
171
|
+
suppressWarnings({
|
|
172
|
+
suppressMessages({
|
|
173
|
+
et <- glmQLFTest(fit, coef = "conditionsDiseased")
|
|
176
174
|
})
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
175
|
+
})
|
|
176
|
+
})
|
|
177
|
+
#cat("QL test time: ", as.difftime(test_time, units = "secs")[3], " seconds\n")
|
|
178
|
+
|
|
179
|
+
# Saving QL fit image
|
|
180
|
+
ql_plot_time <- system.time({
|
|
181
|
+
set.seed(as.integer(Sys.time())) # Set the seed according to current time
|
|
182
|
+
cachedir <- input$cachedir # Importing serverconfig.cachedir
|
|
183
|
+
random_number <- runif(1, min = 0, max = 1) # Generating random number
|
|
184
|
+
fit_image_name <- paste0("edgeR_ql_temp_",random_number,".png") # Generating random image name so that simultaneous server side requests do NOT generate the same edgeR file name
|
|
185
|
+
png(filename = paste0(cachedir,"/",fit_image_name), width = 1000, height = 1000, res = 200) # Opening a png device
|
|
186
|
+
par(oma = c(0, 0, 0, 0)) # Creating a margin
|
|
187
|
+
plotQLDisp(fit) # Plot the edgeR fit
|
|
188
|
+
# dev.off() # Gives a null device message which breaks JSON. Commenting it out for now, will investigate it later
|
|
189
|
+
})
|
|
190
|
+
#cat("ql plot time: ", as.difftime(ql_plot_time, units = "secs")[3], " seconds\n")
|
|
191
|
+
logfc <- et$table$logFC
|
|
192
|
+
logcpm <- et$table$logCPM
|
|
193
|
+
pvalues <- et$table$PValue
|
|
194
|
+
genes_matrix <- str_split_fixed(unlist(et$genes), "\t", 2)
|
|
195
|
+
geneids <- unlist(genes_matrix[, 1])
|
|
196
|
+
genesymbols <- unlist(genes_matrix[, 2])
|
|
197
|
+
adjust_p_values <- p.adjust(pvalues, method = "fdr")
|
|
198
198
|
} else if (DE_method == "limma") {
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
199
|
+
# Do voom transformation
|
|
200
|
+
voom_transformation_time <- system.time({
|
|
201
|
+
suppressWarnings({
|
|
202
|
+
suppressMessages({
|
|
203
|
+
set.seed(as.integer(Sys.time())) # Set the seed according to current time
|
|
204
|
+
cachedir <- input$cachedir # Importing serverconfig.cachedir
|
|
205
|
+
random_number <- runif(1, min = 0, max = 1) # Generating random number
|
|
206
|
+
fit_image_name <- paste0("limma_voom_temp_",random_number,".png") # Generating random image name so that simultaneous server side requests do NOT generate the same edgeR file name
|
|
207
|
+
png(filename = paste0(cachedir,"/",fit_image_name), width = 1000, height = 1000, res = 200) # Opening a png device
|
|
208
|
+
par(oma = c(0, 0, 0, 0)) # Creating a margin
|
|
209
|
+
suppressWarnings({
|
|
210
|
+
suppressMessages({
|
|
211
|
+
y <- voom(y, design, plot = TRUE)
|
|
212
|
+
})
|
|
213
|
+
})
|
|
214
|
+
dev.off() # Gives a null device message which breaks JSON. Commenting it out for now, will investigate it later
|
|
215
|
+
})
|
|
216
|
+
})
|
|
217
|
+
})
|
|
218
|
+
#cat("voom transformation time: ", as.difftime(voom_transformation_time, units = "secs")[3], " seconds\n")
|
|
219
219
|
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
220
|
+
# Fit linear model
|
|
221
|
+
fit_time <- system.time({
|
|
222
|
+
suppressWarnings({
|
|
223
|
+
suppressMessages({
|
|
224
|
+
fit <- lmFit(y, design, plot = FALSE)
|
|
225
|
+
})
|
|
226
|
+
})
|
|
227
|
+
})
|
|
228
|
+
#cat("limma fit time: ", as.difftime(fit_time, units = "secs")[3], " seconds\n")
|
|
229
229
|
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
230
|
+
# Saving mean-difference plot (aka MA plot)
|
|
231
|
+
#set.seed(as.integer(Sys.time())) # Set the seed according to current time
|
|
232
|
+
#cachedir <- input$cachedir # Importing serverconfig.cachedir
|
|
233
|
+
#random_number <- runif(1, min = 0, max = 1) # Generating random number
|
|
234
|
+
#md_image_name <- paste0("limma_md_temp_",random_number,".png") # Generating random image name so that simultaneous server side requests do NOT generate the same edgeR file name
|
|
235
|
+
#png(filename = paste0(cachedir,"/",md_image_name), width = 1000, height = 1000, res = 200) # Opening a png device
|
|
236
|
+
#par(oma = c(0, 0, 0, 0)) # Creating a margin
|
|
237
|
+
#plotMD(fit) # Plot the limma fit
|
|
238
|
+
## dev.off() # Gives a null device message which breaks JSON. Commenting it out for now, will investigate it later
|
|
239
239
|
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
240
|
+
# Empirical Bayes smoothing
|
|
241
|
+
empirical_smoothing_time <- system.time({
|
|
242
|
+
suppressWarnings({
|
|
243
|
+
suppressMessages({
|
|
244
|
+
tmp <- eBayes(fit)
|
|
245
|
+
})
|
|
246
|
+
})
|
|
247
|
+
})
|
|
248
|
+
#cat("Empirical smoothing time: ", as.difftime(empirical_smoothing_time, units = "secs")[3], " seconds\n")
|
|
249
249
|
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
250
|
+
# Time for selecting top genes
|
|
251
|
+
top_genes_selection_time <- system.time({
|
|
252
|
+
suppressWarnings({
|
|
253
|
+
suppressMessages({
|
|
254
|
+
top_table <- topTable(tmp, coef = "conditionsDiseased", number = Inf, adjust.method = "fdr") # The coeff needs to be specified in topTable() because it needs to know for which contrast the logFC needs to be calculated https://www.biostars.org/p/160465/
|
|
255
|
+
logfc <- top_table$logFC
|
|
256
|
+
pvalues <- top_table$P.Value
|
|
257
|
+
genes_matrix <- str_split_fixed(unlist(top_table$genes), "\t", 2)
|
|
258
|
+
geneids <- unlist(genes_matrix[, 1])
|
|
259
|
+
genesymbols <- unlist(genes_matrix[, 2])
|
|
260
|
+
adjust_p_values <- top_table$adj.P.Val
|
|
261
|
+
})
|
|
262
|
+
})
|
|
263
|
+
})
|
|
264
|
+
#cat("Time for selecting top genes: ", as.difftime(top_genes_selection_time, units = "secs")[3], " seconds\n")
|
|
265
265
|
} else { # Should not happen
|
|
266
|
-
|
|
266
|
+
stop(paste0("Unknown method:", DE_method))
|
|
267
267
|
}
|
|
268
268
|
|
|
269
269
|
final_data_generation_time <- system.time({
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
270
|
+
output <- data.frame(geneids, genesymbols, logfc, pvalues, adjust_p_values)
|
|
271
|
+
names(output)[1] <- "gene_name"
|
|
272
|
+
names(output)[2] <- "gene_symbol"
|
|
273
|
+
names(output)[3] <- "fold_change"
|
|
274
|
+
names(output)[4] <- "original_p_value"
|
|
275
|
+
names(output)[5] <- "adjusted_p_value"
|
|
276
276
|
})
|
|
277
277
|
final_output <- c()
|
|
278
278
|
final_output$gene_data <- output
|
|
279
279
|
final_output$edgeR_ql_image_name <- fit_image_name
|
|
280
280
|
if (dim(read_counts)[1] * dim(read_counts)[2] < as.numeric(input$mds_cutoff)) { # If the dimensions of the read counts matrix is below this threshold, only then the mds image will be generated as its very compute intensive
|
|
281
|
-
|
|
281
|
+
final_output$edgeR_mds_image_name <- mds_image_name
|
|
282
282
|
}
|
|
283
283
|
#cat("Time for generating final dataframe: ", as.difftime(final_data_generation_time, unit = "secs")[3], " seconds\n")
|
|
284
284
|
|
package/utils/regression.utils.R
CHANGED
|
@@ -24,7 +24,8 @@
|
|
|
24
24
|
#######
|
|
25
25
|
|
|
26
26
|
# prepare data table
|
|
27
|
-
prepareDataTable <- function(
|
|
27
|
+
prepareDataTable <- function(tempdat, independent) {
|
|
28
|
+
dat <- tempdat[,colnames(tempdat) != "__sample"] # remove sample column
|
|
28
29
|
for (r in 1:nrow(independent)) {
|
|
29
30
|
variable <- independent[r,]
|
|
30
31
|
id <- variable$id
|