@sjcrh/proteinpaint-server 2.112.0 → 2.113.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +9 -4
- package/routes/termdb.DE.js +16 -12
- package/routes/termdb.cluster.js +14 -4
- package/routes/termdb.config.js +0 -3
- package/src/app.js +242 -132
- package/src/serverconfig.js +7 -5
- package/utils/edge.R +137 -80
package/src/serverconfig.js
CHANGED
|
@@ -9,7 +9,7 @@ import { fileURLToPath } from 'url'
|
|
|
9
9
|
|
|
10
10
|
// import.meta.dirname is undefined when using docker dev environment
|
|
11
11
|
// use __dirname and __filename global variable convention from commonjs
|
|
12
|
-
const __dirname = import.meta.dirname ||
|
|
12
|
+
const __dirname = import.meta.dirname || new URL('.', import.meta.url).pathname
|
|
13
13
|
const __filename = import.meta.filename || fileURLToPath(import.meta.url)
|
|
14
14
|
|
|
15
15
|
// do not assume that serverconfig.json is in the same dir as server.js
|
|
@@ -112,8 +112,7 @@ if (!serverconfig.binpath) {
|
|
|
112
112
|
} else {
|
|
113
113
|
if (fs.existsSync('./server')) serverconfig.binpath = fs.realpathSync('./server')
|
|
114
114
|
else if (fs.existsSync('./src')) serverconfig.binpath = fs.realpathSync('./src/..')
|
|
115
|
-
else if (__dirname.includes('/server/'))
|
|
116
|
-
serverconfig.binpath = __dirname.split('/server/')[0] + '/server'
|
|
115
|
+
else if (__dirname.includes('/server/')) serverconfig.binpath = __dirname.split('/server/')[0] + '/server'
|
|
117
116
|
else if (__dirname.includes('/proteinpaint')) serverconfig.binpath = __dirname
|
|
118
117
|
else throw 'unable to determine the serverconfig.binpath'
|
|
119
118
|
}
|
|
@@ -127,7 +126,7 @@ if (serverconfig.debugmode && !serverconfig.binpath.includes('sjcrh/')) {
|
|
|
127
126
|
const routeSetters = []
|
|
128
127
|
const defaultDir = path.join(serverconfig.binpath, 'src/test/routes')
|
|
129
128
|
// will add testing routes as needed and if found, such as in dev environment
|
|
130
|
-
const testRouteSetters = ['gdc.js', 'specs.js', 'readme.js']
|
|
129
|
+
const testRouteSetters = ['gdc.js', 'specs.js', 'readme.js', 'closeCoverage.js']
|
|
131
130
|
if (serverconfig.features.sse === undefined) serverconfig.features.sse = true
|
|
132
131
|
if (typeof serverconfig.features.sse !== 'boolean') {
|
|
133
132
|
throw `serverconfig.features.sse must be either undefined or boolean`
|
|
@@ -231,7 +230,10 @@ if (process.argv.find(a => a == 'validate')) {
|
|
|
231
230
|
serverconfig.features.stopGdcCacheAliquot = true
|
|
232
231
|
}
|
|
233
232
|
|
|
234
|
-
|
|
233
|
+
const publicDir = path.join(process.cwd(), './public')
|
|
234
|
+
if (!serverconfig.backend_only && fs.existsSync(publicDir)) serverconfig.publicDir = publicDir
|
|
235
|
+
|
|
236
|
+
if (serverconfig.publicDir) {
|
|
235
237
|
const defaultTarget = path.join(serverconfig.binpath, 'cards')
|
|
236
238
|
if (!serverconfig.cards) {
|
|
237
239
|
serverconfig.cards = {
|
package/utils/edge.R
CHANGED
|
@@ -27,7 +27,7 @@ read_json_time <- system.time({
|
|
|
27
27
|
controls <- unlist(strsplit(input$control, ","))
|
|
28
28
|
combined <- c("geneID", "geneSymbol", cases, controls)
|
|
29
29
|
})
|
|
30
|
-
#cat("Time to read JSON: ", read_json_time[3], " seconds\n")
|
|
30
|
+
#cat("Time to read JSON: ", as.difftime(read_json_time, units = "secs")[3], " seconds\n")
|
|
31
31
|
|
|
32
32
|
# Read counts data
|
|
33
33
|
read_counts_time <- system.time({
|
|
@@ -67,7 +67,7 @@ read_counts_time <- system.time({
|
|
|
67
67
|
stop("Unknown storage type")
|
|
68
68
|
}
|
|
69
69
|
})
|
|
70
|
-
#cat("Time to read counts data: ", read_counts_time[3], " seconds\n")
|
|
70
|
+
#cat("Time to read counts data: ", as.difftime(read_counts_time, units = "secs")[3], " seconds\n")
|
|
71
71
|
|
|
72
72
|
# Create conditions vector
|
|
73
73
|
conditions <- c(rep("Diseased", length(cases)), rep("Control", length(controls)))
|
|
@@ -77,19 +77,19 @@ gene_id_symbols <- paste0(geneIDs, "\t", geneSymbols)
|
|
|
77
77
|
dge_list_time <- system.time({
|
|
78
78
|
y <- DGEList(counts = read_counts, group = conditions, genes = gene_id_symbols)
|
|
79
79
|
})
|
|
80
|
-
#cat("Time to generate DGEList: ", dge_list_time[3], " seconds\n")
|
|
80
|
+
#cat("Time to generate DGEList: ", as.difftime(dge_list_time, units = "secs")[3], " seconds\n")
|
|
81
81
|
|
|
82
82
|
# Filter and normalize counts
|
|
83
83
|
filter_time <- system.time({
|
|
84
84
|
keep <- filterByExpr(y, min.count = input$min_count, min.total.count = input$min_total_count)
|
|
85
85
|
})
|
|
86
|
-
#cat("Time to filter by expression: ", filter_time[3], " seconds\n")
|
|
86
|
+
#cat("Time to filter by expression: ", as.difftime(filter_time, unit = "secs")[3], " seconds\n")
|
|
87
87
|
|
|
88
88
|
normalization_time <- system.time({
|
|
89
89
|
y <- y[keep, keep.lib.sizes = FALSE]
|
|
90
90
|
y <- normLibSizes(y) # Using TMM method for normalization
|
|
91
91
|
})
|
|
92
|
-
#cat("Normalization time: ", normalization_time[3], " seconds\n")
|
|
92
|
+
#cat("Normalization time: ", as.difftime(normalization_time, units = "secs")[3], " seconds\n")
|
|
93
93
|
|
|
94
94
|
# Cutoffs for cpm, will add them as UI options later
|
|
95
95
|
if (length(samples_indices) > 100) {
|
|
@@ -105,40 +105,28 @@ if (length(samples_indices) > 100) {
|
|
|
105
105
|
filter_using_cpm_time <- system.time({
|
|
106
106
|
y <- filter_using_cpm(y, gene_cpm_cutoff, sample_cpm_cutoff, count_cpm_cutoff) # Filtering counts matrix based on gene_cpm_cutoff, sample_cpm_cutoff and count_cpm_cutoff
|
|
107
107
|
})
|
|
108
|
-
#cat("Filter using cpm time: ", filter_using_cpm_time[3], " seconds\n")
|
|
108
|
+
#cat("Filter using cpm time: ", as.difftime(filter_using_cpm_time, units = "secs")[3], " seconds\n")
|
|
109
109
|
|
|
110
110
|
# Saving MDS plot image
|
|
111
|
-
set.seed(as.integer(Sys.time())) # Set the seed according to current time
|
|
112
|
-
cachedir <- input$cachedir # Importing serverconfig.cachedir
|
|
113
|
-
random_number <- runif(1, min = 0, max = 1) # Generating random number
|
|
114
|
-
mds_image_name <- paste0("edgeR_mds_temp_",random_number,".png") # Generating random image name so that simultaneous server side requests do NOT generate the same edgeR file name
|
|
115
|
-
png(filename = paste0(cachedir,"/",mds_image_name), width = 1000, height = 1000, res = 200) # Opening a png device
|
|
116
|
-
par(oma = c(0, 0, 0, 0)) # Creating a margin
|
|
117
|
-
mds_conditions <- c(rep("T", length(cases)), rep("C", length(controls))) # Case samples are labelled "T" and control samples are labelled "C". Single-letter labelling added because otherwise labels get overwritten on each other.
|
|
118
|
-
plotMDS(y, labels = mds_conditions) # Plot the edgeR MDS plot
|
|
119
|
-
# dev.off() # Gives a null device message which breaks JSON. Commenting it out for now, will investigate it later
|
|
120
111
|
|
|
112
|
+
if (dim(read_counts)[1] * dim(read_counts)[2] < as.numeric(input$mds_cutoff)) { # If the dimensions of the read counts matrix is below this threshold, only then the mds image will be generated as its very compute intensive
|
|
113
|
+
mds_plot_time <- system.time({
|
|
114
|
+
set.seed(as.integer(Sys.time())) # Set the seed according to current time
|
|
115
|
+
cachedir <- input$cachedir # Importing serverconfig.cachedir
|
|
116
|
+
random_number <- runif(1, min = 0, max = 1) # Generating random number
|
|
117
|
+
mds_image_name <- paste0("edgeR_mds_temp_",random_number,".png") # Generating random image name so that simultaneous server side requests do NOT generate the same edgeR file name
|
|
118
|
+
png(filename = paste0(cachedir,"/",mds_image_name), width = 1000, height = 1000, res = 200) # Opening a png device
|
|
119
|
+
par(oma = c(0, 0, 0, 0)) # Creating a margin
|
|
120
|
+
mds_conditions <- c(rep("T", length(cases)), rep("C", length(controls))) # Case samples are labelled "T" and control samples are labelled "C". Single-letter labelling added because otherwise labels get overwritten on each other.
|
|
121
|
+
plotMDS(y, labels = mds_conditions) # Plot the edgeR MDS plot
|
|
122
|
+
# dev.off() # Gives a null device message which breaks JSON. Commenting it out for now, will investigate it later
|
|
123
|
+
})
|
|
124
|
+
#cat("mds plot time: ", as.difftime(mds_plot_time, units = "secs")[3], " seconds\n")
|
|
125
|
+
}
|
|
121
126
|
|
|
122
127
|
# Differential expression analysis
|
|
123
128
|
if (length(input$conf1) == 0) { # No adjustment of confounding factors
|
|
124
129
|
design <- model.matrix(~conditions) # Based on the protocol defined in section 1.4 of edgeR manual https://bioconductor.org/packages/release/bioc/vignettes/edgeR/inst/doc/edgeRUsersGuide.pdf
|
|
125
|
-
fit_time <- system.time({
|
|
126
|
-
suppressWarnings({
|
|
127
|
-
suppressMessages({
|
|
128
|
-
fit <- glmQLFit(y,design) # The glmQLFit() replaces glmFit() which implements the quasi-likelihood function. This is better able to account for overdispersion as it employs a more lenient approach where variance is not a fixed function of the mean.
|
|
129
|
-
})
|
|
130
|
-
})
|
|
131
|
-
})
|
|
132
|
-
#cat("QL fit time: ", fit_time[3], " seconds\n")
|
|
133
|
-
|
|
134
|
-
test_time <- system.time({
|
|
135
|
-
suppressWarnings({
|
|
136
|
-
suppressMessages({
|
|
137
|
-
et <- glmQLFTest(fit)
|
|
138
|
-
})
|
|
139
|
-
})
|
|
140
|
-
})
|
|
141
|
-
#cat("QL test time: ", test_time[3], " seconds\n")
|
|
142
130
|
} else { # Adjusting for confounding factors
|
|
143
131
|
# Check the type of confounding variable
|
|
144
132
|
if (input$conf1_mode == "continuous") { # If this is float, the input conf1 vector should be converted into a numeric vector
|
|
@@ -152,7 +140,7 @@ if (length(input$conf1) == 0) { # No adjustment of confounding factors
|
|
|
152
140
|
model_gen_time <- system.time({
|
|
153
141
|
design <- model.matrix(~ conditions + conf1, data = y$samples)
|
|
154
142
|
})
|
|
155
|
-
#cat("Time for making design matrix: ", model_gen_time[3], " seconds\n")
|
|
143
|
+
#cat("Time for making design matrix: ", as.difftime(model_gen_time, units = "secs")[3], " seconds\n")
|
|
156
144
|
} else {
|
|
157
145
|
# Check the type of confounding variable 2
|
|
158
146
|
if (input$conf2_mode == "continuous") { # If this is float, the input conf2 vector should be converted into a numeric vector
|
|
@@ -164,46 +152,121 @@ if (length(input$conf1) == 0) { # No adjustment of confounding factors
|
|
|
164
152
|
model_gen_time <- system.time({
|
|
165
153
|
design <- model.matrix(~ conditions + conf1 + conf2, data = y$samples)
|
|
166
154
|
})
|
|
167
|
-
#cat("Time for making design matrix: ", model_gen_time[3], " seconds\n")
|
|
155
|
+
#cat("Time for making design matrix: ", as.difftime(model_gen_time, units = "secs")[3], " seconds\n")
|
|
156
|
+
|
|
168
157
|
}
|
|
158
|
+
}
|
|
169
159
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
160
|
+
DE_method <- input$DE_method
|
|
161
|
+
if (DE_method == "edgeR") {
|
|
162
|
+
fit_time <- system.time({
|
|
163
|
+
suppressWarnings({
|
|
164
|
+
suppressMessages({
|
|
165
|
+
fit <- glmQLFit(y,design) # The glmQLFit() replaces glmFit() which implements the quasi-likelihood function. This is better able to account for overdispersion as it employs a more lenient approach where variance is not a fixed function of the mean.
|
|
166
|
+
})
|
|
167
|
+
})
|
|
168
|
+
})
|
|
169
|
+
#cat("QL fit time: ", as.difftime(fit_time, units = "secs")[3], " seconds\n")
|
|
170
|
+
test_time <- system.time({
|
|
171
|
+
suppressWarnings({
|
|
172
|
+
suppressMessages({
|
|
173
|
+
et <- glmQLFTest(fit, coef = "conditionsDiseased")
|
|
174
|
+
})
|
|
175
|
+
})
|
|
176
|
+
})
|
|
177
|
+
#cat("QL test time: ", as.difftime(test_time, units = "secs")[3], " seconds\n")
|
|
178
|
+
|
|
179
|
+
# Saving QL fit image
|
|
180
|
+
ql_plot_time <- system.time({
|
|
181
|
+
set.seed(as.integer(Sys.time())) # Set the seed according to current time
|
|
182
|
+
cachedir <- input$cachedir # Importing serverconfig.cachedir
|
|
183
|
+
random_number <- runif(1, min = 0, max = 1) # Generating random number
|
|
184
|
+
fit_image_name <- paste0("edgeR_ql_temp_",random_number,".png") # Generating random image name so that simultaneous server side requests do NOT generate the same edgeR file name
|
|
185
|
+
png(filename = paste0(cachedir,"/",fit_image_name), width = 1000, height = 1000, res = 200) # Opening a png device
|
|
186
|
+
par(oma = c(0, 0, 0, 0)) # Creating a margin
|
|
187
|
+
plotQLDisp(fit) # Plot the edgeR fit
|
|
188
|
+
# dev.off() # Gives a null device message which breaks JSON. Commenting it out for now, will investigate it later
|
|
189
|
+
})
|
|
190
|
+
#cat("ql plot time: ", as.difftime(ql_plot_time, units = "secs")[3], " seconds\n")
|
|
191
|
+
logfc <- et$table$logFC
|
|
192
|
+
logcpm <- et$table$logCPM
|
|
193
|
+
pvalues <- et$table$PValue
|
|
194
|
+
genes_matrix <- str_split_fixed(unlist(et$genes), "\t", 2)
|
|
195
|
+
geneids <- unlist(genes_matrix[, 1])
|
|
196
|
+
genesymbols <- unlist(genes_matrix[, 2])
|
|
197
|
+
adjust_p_values <- p.adjust(pvalues, method = "fdr")
|
|
198
|
+
} else if (DE_method == "limma") {
|
|
199
|
+
# Do voom transformation
|
|
200
|
+
voom_transformation_time <- system.time({
|
|
201
|
+
suppressWarnings({
|
|
202
|
+
suppressMessages({
|
|
203
|
+
set.seed(as.integer(Sys.time())) # Set the seed according to current time
|
|
204
|
+
cachedir <- input$cachedir # Importing serverconfig.cachedir
|
|
205
|
+
random_number <- runif(1, min = 0, max = 1) # Generating random number
|
|
206
|
+
fit_image_name <- paste0("limma_voom_temp_",random_number,".png") # Generating random image name so that simultaneous server side requests do NOT generate the same edgeR file name
|
|
207
|
+
png(filename = paste0(cachedir,"/",fit_image_name), width = 1000, height = 1000, res = 200) # Opening a png device
|
|
208
|
+
par(oma = c(0, 0, 0, 0)) # Creating a margin
|
|
209
|
+
suppressWarnings({
|
|
210
|
+
suppressMessages({
|
|
211
|
+
y <- voom(y, design, plot = TRUE)
|
|
212
|
+
})
|
|
213
|
+
})
|
|
214
|
+
dev.off() # Gives a null device message which breaks JSON. Commenting it out for now, will investigate it later
|
|
215
|
+
})
|
|
216
|
+
})
|
|
217
|
+
})
|
|
218
|
+
#cat("voom transformation time: ", as.difftime(voom_transformation_time, units = "secs")[3], " seconds\n")
|
|
219
|
+
|
|
220
|
+
# Fit linear model
|
|
221
|
+
fit_time <- system.time({
|
|
222
|
+
suppressWarnings({
|
|
223
|
+
suppressMessages({
|
|
224
|
+
fit <- lmFit(y, design, plot = FALSE)
|
|
225
|
+
})
|
|
226
|
+
})
|
|
227
|
+
})
|
|
228
|
+
#cat("limma fit time: ", as.difftime(fit_time, units = "secs")[3], " seconds\n")
|
|
229
|
+
|
|
230
|
+
# Saving mean-difference plot (aka MA plot)
|
|
231
|
+
#set.seed(as.integer(Sys.time())) # Set the seed according to current time
|
|
232
|
+
#cachedir <- input$cachedir # Importing serverconfig.cachedir
|
|
233
|
+
#random_number <- runif(1, min = 0, max = 1) # Generating random number
|
|
234
|
+
#md_image_name <- paste0("limma_md_temp_",random_number,".png") # Generating random image name so that simultaneous server side requests do NOT generate the same edgeR file name
|
|
235
|
+
#png(filename = paste0(cachedir,"/",md_image_name), width = 1000, height = 1000, res = 200) # Opening a png device
|
|
236
|
+
#par(oma = c(0, 0, 0, 0)) # Creating a margin
|
|
237
|
+
#plotMD(fit) # Plot the limma fit
|
|
238
|
+
## dev.off() # Gives a null device message which breaks JSON. Commenting it out for now, will investigate it later
|
|
239
|
+
|
|
240
|
+
# Empirical Bayes smoothing
|
|
241
|
+
empirical_smoothing_time <- system.time({
|
|
242
|
+
suppressWarnings({
|
|
243
|
+
suppressMessages({
|
|
244
|
+
tmp <- eBayes(fit)
|
|
245
|
+
})
|
|
246
|
+
})
|
|
247
|
+
})
|
|
248
|
+
#cat("Empirical smoothing time: ", as.difftime(empirical_smoothing_time, units = "secs")[3], " seconds\n")
|
|
249
|
+
|
|
250
|
+
# Time for selecting top genes
|
|
251
|
+
top_genes_selection_time <- system.time({
|
|
252
|
+
suppressWarnings({
|
|
253
|
+
suppressMessages({
|
|
254
|
+
top_table <- topTable(tmp, coef = "conditionsDiseased", number = Inf, adjust.method = "fdr") # The coeff needs to be specified in topTable() because it needs to know for which contrast the logFC needs to be calculated https://www.biostars.org/p/160465/
|
|
255
|
+
logfc <- top_table$logFC
|
|
256
|
+
pvalues <- top_table$P.Value
|
|
257
|
+
genes_matrix <- str_split_fixed(unlist(top_table$genes), "\t", 2)
|
|
258
|
+
geneids <- unlist(genes_matrix[, 1])
|
|
259
|
+
genesymbols <- unlist(genes_matrix[, 2])
|
|
260
|
+
adjust_p_values <- top_table$adj.P.Val
|
|
261
|
+
})
|
|
262
|
+
})
|
|
263
|
+
})
|
|
264
|
+
#cat("Time for selecting top genes: ", as.difftime(top_genes_selection_time, units = "secs")[3], " seconds\n")
|
|
265
|
+
} else { # Should not happen
|
|
266
|
+
stop(paste0("Unknown method:", DE_method))
|
|
186
267
|
}
|
|
187
268
|
|
|
188
|
-
|
|
189
|
-
set.seed(as.integer(Sys.time())) # Set the seed according to current time
|
|
190
|
-
cachedir <- input$cachedir # Importing serverconfig.cachedir
|
|
191
|
-
random_number <- runif(1, min = 0, max = 1) # Generating random number
|
|
192
|
-
ql_image_name <- paste0("edgeR_ql_temp_",random_number,".png") # Generating random image name so that simultaneous server side requests do NOT generate the same edgeR file name
|
|
193
|
-
png(filename = paste0(cachedir,"/",ql_image_name), width = 1000, height = 1000, res = 200) # Opening a png device
|
|
194
|
-
par(oma = c(0, 0, 0, 0)) # Creating a margin
|
|
195
|
-
plotQLDisp(fit) # Plot the edgeR fit
|
|
196
|
-
# dev.off() # Gives a null device message which breaks JSON. Commenting it out for now, will investigate it later
|
|
197
|
-
|
|
198
|
-
# Multiple testing correction
|
|
199
|
-
multiple_testing_correction_time <- system.time({
|
|
200
|
-
logfc <- et$table$logFC
|
|
201
|
-
logcpm <- et$table$logCPM
|
|
202
|
-
pvalues <- et$table$PValue
|
|
203
|
-
genes_matrix <- str_split_fixed(unlist(et$genes), "\t", 2)
|
|
204
|
-
geneids <- unlist(genes_matrix[, 1])
|
|
205
|
-
genesymbols <- unlist(genes_matrix[, 2])
|
|
206
|
-
adjust_p_values <- p.adjust(pvalues, method = "fdr")
|
|
269
|
+
final_data_generation_time <- system.time({
|
|
207
270
|
output <- data.frame(geneids, genesymbols, logfc, -log10(pvalues), -log10(adjust_p_values))
|
|
208
271
|
names(output)[1] <- "gene_name"
|
|
209
272
|
names(output)[2] <- "gene_symbol"
|
|
@@ -213,17 +276,11 @@ multiple_testing_correction_time <- system.time({
|
|
|
213
276
|
})
|
|
214
277
|
final_output <- c()
|
|
215
278
|
final_output$gene_data <- output
|
|
216
|
-
final_output$edgeR_ql_image_name <-
|
|
217
|
-
|
|
218
|
-
|
|
279
|
+
final_output$edgeR_ql_image_name <- fit_image_name
|
|
280
|
+
if (dim(read_counts)[1] * dim(read_counts)[2] < as.numeric(input$mds_cutoff)) { # If the dimensions of the read counts matrix is below this threshold, only then the mds image will be generated as its very compute intensive
|
|
281
|
+
final_output$edgeR_mds_image_name <- mds_image_name
|
|
282
|
+
}
|
|
283
|
+
#cat("Time for generating final dataframe: ", as.difftime(final_data_generation_time, unit = "secs")[3], " seconds\n")
|
|
219
284
|
|
|
220
285
|
# Output results
|
|
221
286
|
toJSON(final_output)
|
|
222
|
-
|
|
223
|
-
#-----------------------------------#
|
|
224
|
-
# Will implement this later
|
|
225
|
-
filter_genes_by_group_variance <- function(read_counts, gene_id_symbols, num_variable_genes, cases, controls) {
|
|
226
|
-
# Divide the read counts into two groups
|
|
227
|
-
case_read_counts <- read_counts[, cases]
|
|
228
|
-
control_read_counts <- read_counts[, controls]
|
|
229
|
-
}
|