@sjcrh/proteinpaint-server 2.112.0 → 2.113.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,7 +9,7 @@ import { fileURLToPath } from 'url'
9
9
 
10
10
  // import.meta.dirname is undefined when using docker dev environment
11
11
  // use __dirname and __filename global variable convention from commonjs
12
- const __dirname = import.meta.dirname || (new URL('.', import.meta.url)).pathname
12
+ const __dirname = import.meta.dirname || new URL('.', import.meta.url).pathname
13
13
  const __filename = import.meta.filename || fileURLToPath(import.meta.url)
14
14
 
15
15
  // do not assume that serverconfig.json is in the same dir as server.js
@@ -112,8 +112,7 @@ if (!serverconfig.binpath) {
112
112
  } else {
113
113
  if (fs.existsSync('./server')) serverconfig.binpath = fs.realpathSync('./server')
114
114
  else if (fs.existsSync('./src')) serverconfig.binpath = fs.realpathSync('./src/..')
115
- else if (__dirname.includes('/server/'))
116
- serverconfig.binpath = __dirname.split('/server/')[0] + '/server'
115
+ else if (__dirname.includes('/server/')) serverconfig.binpath = __dirname.split('/server/')[0] + '/server'
117
116
  else if (__dirname.includes('/proteinpaint')) serverconfig.binpath = __dirname
118
117
  else throw 'unable to determine the serverconfig.binpath'
119
118
  }
@@ -127,7 +126,7 @@ if (serverconfig.debugmode && !serverconfig.binpath.includes('sjcrh/')) {
127
126
  const routeSetters = []
128
127
  const defaultDir = path.join(serverconfig.binpath, 'src/test/routes')
129
128
  // will add testing routes as needed and if found, such as in dev environment
130
- const testRouteSetters = ['gdc.js', 'specs.js', 'readme.js']
129
+ const testRouteSetters = ['gdc.js', 'specs.js', 'readme.js', 'closeCoverage.js']
131
130
  if (serverconfig.features.sse === undefined) serverconfig.features.sse = true
132
131
  if (typeof serverconfig.features.sse !== 'boolean') {
133
132
  throw `serverconfig.features.sse must be either undefined or boolean`
@@ -231,7 +230,10 @@ if (process.argv.find(a => a == 'validate')) {
231
230
  serverconfig.features.stopGdcCacheAliquot = true
232
231
  }
233
232
 
234
- if (!serverconfig.backend_only && fs.existsSync(path.join(process.cwd(), './public'))) {
233
+ const publicDir = path.join(process.cwd(), './public')
234
+ if (!serverconfig.backend_only && fs.existsSync(publicDir)) serverconfig.publicDir = publicDir
235
+
236
+ if (serverconfig.publicDir) {
235
237
  const defaultTarget = path.join(serverconfig.binpath, 'cards')
236
238
  if (!serverconfig.cards) {
237
239
  serverconfig.cards = {
package/utils/edge.R CHANGED
@@ -27,7 +27,7 @@ read_json_time <- system.time({
27
27
  controls <- unlist(strsplit(input$control, ","))
28
28
  combined <- c("geneID", "geneSymbol", cases, controls)
29
29
  })
30
- #cat("Time to read JSON: ", read_json_time[3], " seconds\n")
30
+ #cat("Time to read JSON: ", as.difftime(read_json_time, units = "secs")[3], " seconds\n")
31
31
 
32
32
  # Read counts data
33
33
  read_counts_time <- system.time({
@@ -67,7 +67,7 @@ read_counts_time <- system.time({
67
67
  stop("Unknown storage type")
68
68
  }
69
69
  })
70
- #cat("Time to read counts data: ", read_counts_time[3], " seconds\n")
70
+ #cat("Time to read counts data: ", as.difftime(read_counts_time, units = "secs")[3], " seconds\n")
71
71
 
72
72
  # Create conditions vector
73
73
  conditions <- c(rep("Diseased", length(cases)), rep("Control", length(controls)))
@@ -77,19 +77,19 @@ gene_id_symbols <- paste0(geneIDs, "\t", geneSymbols)
77
77
  dge_list_time <- system.time({
78
78
  y <- DGEList(counts = read_counts, group = conditions, genes = gene_id_symbols)
79
79
  })
80
- #cat("Time to generate DGEList: ", dge_list_time[3], " seconds\n")
80
+ #cat("Time to generate DGEList: ", as.difftime(dge_list_time, units = "secs")[3], " seconds\n")
81
81
 
82
82
  # Filter and normalize counts
83
83
  filter_time <- system.time({
84
84
  keep <- filterByExpr(y, min.count = input$min_count, min.total.count = input$min_total_count)
85
85
  })
86
- #cat("Time to filter by expression: ", filter_time[3], " seconds\n")
86
+ #cat("Time to filter by expression: ", as.difftime(filter_time, unit = "secs")[3], " seconds\n")
87
87
 
88
88
  normalization_time <- system.time({
89
89
  y <- y[keep, keep.lib.sizes = FALSE]
90
90
  y <- normLibSizes(y) # Using TMM method for normalization
91
91
  })
92
- #cat("Normalization time: ", normalization_time[3], " seconds\n")
92
+ #cat("Normalization time: ", as.difftime(normalization_time, units = "secs")[3], " seconds\n")
93
93
 
94
94
  # Cutoffs for cpm, will add them as UI options later
95
95
  if (length(samples_indices) > 100) {
@@ -105,40 +105,28 @@ if (length(samples_indices) > 100) {
105
105
  filter_using_cpm_time <- system.time({
106
106
  y <- filter_using_cpm(y, gene_cpm_cutoff, sample_cpm_cutoff, count_cpm_cutoff) # Filtering counts matrix based on gene_cpm_cutoff, sample_cpm_cutoff and count_cpm_cutoff
107
107
  })
108
- #cat("Filter using cpm time: ", filter_using_cpm_time[3], " seconds\n")
108
+ #cat("Filter using cpm time: ", as.difftime(filter_using_cpm_time, units = "secs")[3], " seconds\n")
109
109
 
110
110
  # Saving MDS plot image
111
- set.seed(as.integer(Sys.time())) # Set the seed according to current time
112
- cachedir <- input$cachedir # Importing serverconfig.cachedir
113
- random_number <- runif(1, min = 0, max = 1) # Generating random number
114
- mds_image_name <- paste0("edgeR_mds_temp_",random_number,".png") # Generating random image name so that simultaneous server side requests do NOT generate the same edgeR file name
115
- png(filename = paste0(cachedir,"/",mds_image_name), width = 1000, height = 1000, res = 200) # Opening a png device
116
- par(oma = c(0, 0, 0, 0)) # Creating a margin
117
- mds_conditions <- c(rep("T", length(cases)), rep("C", length(controls))) # Case samples are labelled "T" and control samples are labelled "C". Single-letter labelling added because otherwise labels get overwritten on each other.
118
- plotMDS(y, labels = mds_conditions) # Plot the edgeR MDS plot
119
- # dev.off() # Gives a null device message which breaks JSON. Commenting it out for now, will investigate it later
120
111
 
112
+ if (dim(read_counts)[1] * dim(read_counts)[2] < as.numeric(input$mds_cutoff)) { # If the dimensions of the read counts matrix is below this threshold, only then the mds image will be generated as its very compute intensive
113
+ mds_plot_time <- system.time({
114
+ set.seed(as.integer(Sys.time())) # Set the seed according to current time
115
+ cachedir <- input$cachedir # Importing serverconfig.cachedir
116
+ random_number <- runif(1, min = 0, max = 1) # Generating random number
117
+ mds_image_name <- paste0("edgeR_mds_temp_",random_number,".png") # Generating random image name so that simultaneous server side requests do NOT generate the same edgeR file name
118
+ png(filename = paste0(cachedir,"/",mds_image_name), width = 1000, height = 1000, res = 200) # Opening a png device
119
+ par(oma = c(0, 0, 0, 0)) # Creating a margin
120
+ mds_conditions <- c(rep("T", length(cases)), rep("C", length(controls))) # Case samples are labelled "T" and control samples are labelled "C". Single-letter labelling added because otherwise labels get overwritten on each other.
121
+ plotMDS(y, labels = mds_conditions) # Plot the edgeR MDS plot
122
+ # dev.off() # Gives a null device message which breaks JSON. Commenting it out for now, will investigate it later
123
+ })
124
+ #cat("mds plot time: ", as.difftime(mds_plot_time, units = "secs")[3], " seconds\n")
125
+ }
121
126
 
122
127
  # Differential expression analysis
123
128
  if (length(input$conf1) == 0) { # No adjustment of confounding factors
124
129
  design <- model.matrix(~conditions) # Based on the protocol defined in section 1.4 of edgeR manual https://bioconductor.org/packages/release/bioc/vignettes/edgeR/inst/doc/edgeRUsersGuide.pdf
125
- fit_time <- system.time({
126
- suppressWarnings({
127
- suppressMessages({
128
- fit <- glmQLFit(y,design) # The glmQLFit() replaces glmFit() which implements the quasi-likelihood function. This is better able to account for overdispersion as it employs a more lenient approach where variance is not a fixed function of the mean.
129
- })
130
- })
131
- })
132
- #cat("QL fit time: ", fit_time[3], " seconds\n")
133
-
134
- test_time <- system.time({
135
- suppressWarnings({
136
- suppressMessages({
137
- et <- glmQLFTest(fit)
138
- })
139
- })
140
- })
141
- #cat("QL test time: ", test_time[3], " seconds\n")
142
130
  } else { # Adjusting for confounding factors
143
131
  # Check the type of confounding variable
144
132
  if (input$conf1_mode == "continuous") { # If this is float, the input conf1 vector should be converted into a numeric vector
@@ -152,7 +140,7 @@ if (length(input$conf1) == 0) { # No adjustment of confounding factors
152
140
  model_gen_time <- system.time({
153
141
  design <- model.matrix(~ conditions + conf1, data = y$samples)
154
142
  })
155
- #cat("Time for making design matrix: ", model_gen_time[3], " seconds\n")
143
+ #cat("Time for making design matrix: ", as.difftime(model_gen_time, units = "secs")[3], " seconds\n")
156
144
  } else {
157
145
  # Check the type of confounding variable 2
158
146
  if (input$conf2_mode == "continuous") { # If this is float, the input conf2 vector should be converted into a numeric vector
@@ -164,46 +152,121 @@ if (length(input$conf1) == 0) { # No adjustment of confounding factors
164
152
  model_gen_time <- system.time({
165
153
  design <- model.matrix(~ conditions + conf1 + conf2, data = y$samples)
166
154
  })
167
- #cat("Time for making design matrix: ", model_gen_time[3], " seconds\n")
155
+ #cat("Time for making design matrix: ", as.difftime(model_gen_time, units = "secs")[3], " seconds\n")
156
+
168
157
  }
158
+ }
169
159
 
170
- fit_time <- system.time({
171
- suppressWarnings({
172
- suppressMessages({
173
- fit <- glmQLFit(y,design)
174
- })
175
- })
176
- })
177
- #cat("QL fit time: ", fit_time[3], " seconds\n")
178
- test_time <- system.time({
179
- suppressWarnings({
180
- suppressMessages({
181
- et <- glmQLFTest(fit, coef = "conditionsDiseased")
182
- })
183
- })
184
- })
185
- #cat("QL test time: ", test_time[3], " seconds\n")
160
+ DE_method <- input$DE_method
161
+ if (DE_method == "edgeR") {
162
+ fit_time <- system.time({
163
+ suppressWarnings({
164
+ suppressMessages({
165
+ fit <- glmQLFit(y,design) # The glmQLFit() replaces glmFit() which implements the quasi-likelihood function. This is better able to account for overdispersion as it employs a more lenient approach where variance is not a fixed function of the mean.
166
+ })
167
+ })
168
+ })
169
+ #cat("QL fit time: ", as.difftime(fit_time, units = "secs")[3], " seconds\n")
170
+ test_time <- system.time({
171
+ suppressWarnings({
172
+ suppressMessages({
173
+ et <- glmQLFTest(fit, coef = "conditionsDiseased")
174
+ })
175
+ })
176
+ })
177
+ #cat("QL test time: ", as.difftime(test_time, units = "secs")[3], " seconds\n")
178
+
179
+ # Saving QL fit image
180
+ ql_plot_time <- system.time({
181
+ set.seed(as.integer(Sys.time())) # Set the seed according to current time
182
+ cachedir <- input$cachedir # Importing serverconfig.cachedir
183
+ random_number <- runif(1, min = 0, max = 1) # Generating random number
184
+ fit_image_name <- paste0("edgeR_ql_temp_",random_number,".png") # Generating random image name so that simultaneous server side requests do NOT generate the same edgeR file name
185
+ png(filename = paste0(cachedir,"/",fit_image_name), width = 1000, height = 1000, res = 200) # Opening a png device
186
+ par(oma = c(0, 0, 0, 0)) # Creating a margin
187
+ plotQLDisp(fit) # Plot the edgeR fit
188
+ # dev.off() # Gives a null device message which breaks JSON. Commenting it out for now, will investigate it later
189
+ })
190
+ #cat("ql plot time: ", as.difftime(ql_plot_time, units = "secs")[3], " seconds\n")
191
+ logfc <- et$table$logFC
192
+ logcpm <- et$table$logCPM
193
+ pvalues <- et$table$PValue
194
+ genes_matrix <- str_split_fixed(unlist(et$genes), "\t", 2)
195
+ geneids <- unlist(genes_matrix[, 1])
196
+ genesymbols <- unlist(genes_matrix[, 2])
197
+ adjust_p_values <- p.adjust(pvalues, method = "fdr")
198
+ } else if (DE_method == "limma") {
199
+ # Do voom transformation
200
+ voom_transformation_time <- system.time({
201
+ suppressWarnings({
202
+ suppressMessages({
203
+ set.seed(as.integer(Sys.time())) # Set the seed according to current time
204
+ cachedir <- input$cachedir # Importing serverconfig.cachedir
205
+ random_number <- runif(1, min = 0, max = 1) # Generating random number
206
+ fit_image_name <- paste0("limma_voom_temp_",random_number,".png") # Generating random image name so that simultaneous server side requests do NOT generate the same edgeR file name
207
+ png(filename = paste0(cachedir,"/",fit_image_name), width = 1000, height = 1000, res = 200) # Opening a png device
208
+ par(oma = c(0, 0, 0, 0)) # Creating a margin
209
+ suppressWarnings({
210
+ suppressMessages({
211
+ y <- voom(y, design, plot = TRUE)
212
+ })
213
+ })
214
+ dev.off() # Gives a null device message which breaks JSON. Commenting it out for now, will investigate it later
215
+ })
216
+ })
217
+ })
218
+ #cat("voom transformation time: ", as.difftime(voom_transformation_time, units = "secs")[3], " seconds\n")
219
+
220
+ # Fit linear model
221
+ fit_time <- system.time({
222
+ suppressWarnings({
223
+ suppressMessages({
224
+ fit <- lmFit(y, design, plot = FALSE)
225
+ })
226
+ })
227
+ })
228
+ #cat("limma fit time: ", as.difftime(fit_time, units = "secs")[3], " seconds\n")
229
+
230
+ # Saving mean-difference plot (aka MA plot)
231
+ #set.seed(as.integer(Sys.time())) # Set the seed according to current time
232
+ #cachedir <- input$cachedir # Importing serverconfig.cachedir
233
+ #random_number <- runif(1, min = 0, max = 1) # Generating random number
234
+ #md_image_name <- paste0("limma_md_temp_",random_number,".png") # Generating random image name so that simultaneous server side requests do NOT generate the same edgeR file name
235
+ #png(filename = paste0(cachedir,"/",md_image_name), width = 1000, height = 1000, res = 200) # Opening a png device
236
+ #par(oma = c(0, 0, 0, 0)) # Creating a margin
237
+ #plotMD(fit) # Plot the limma fit
238
+ ## dev.off() # Gives a null device message which breaks JSON. Commenting it out for now, will investigate it later
239
+
240
+ # Empirical Bayes smoothing
241
+ empirical_smoothing_time <- system.time({
242
+ suppressWarnings({
243
+ suppressMessages({
244
+ tmp <- eBayes(fit)
245
+ })
246
+ })
247
+ })
248
+ #cat("Empirical smoothing time: ", as.difftime(empirical_smoothing_time, units = "secs")[3], " seconds\n")
249
+
250
+ # Time for selecting top genes
251
+ top_genes_selection_time <- system.time({
252
+ suppressWarnings({
253
+ suppressMessages({
254
+ top_table <- topTable(tmp, coef = "conditionsDiseased", number = Inf, adjust.method = "fdr") # The coeff needs to be specified in topTable() because it needs to know for which contrast the logFC needs to be calculated https://www.biostars.org/p/160465/
255
+ logfc <- top_table$logFC
256
+ pvalues <- top_table$P.Value
257
+ genes_matrix <- str_split_fixed(unlist(top_table$genes), "\t", 2)
258
+ geneids <- unlist(genes_matrix[, 1])
259
+ genesymbols <- unlist(genes_matrix[, 2])
260
+ adjust_p_values <- top_table$adj.P.Val
261
+ })
262
+ })
263
+ })
264
+ #cat("Time for selecting top genes: ", as.difftime(top_genes_selection_time, units = "secs")[3], " seconds\n")
265
+ } else { # Should not happen
266
+ stop(paste0("Unknown method:", DE_method))
186
267
  }
187
268
 
188
- # Saving QL fit image
189
- set.seed(as.integer(Sys.time())) # Set the seed according to current time
190
- cachedir <- input$cachedir # Importing serverconfig.cachedir
191
- random_number <- runif(1, min = 0, max = 1) # Generating random number
192
- ql_image_name <- paste0("edgeR_ql_temp_",random_number,".png") # Generating random image name so that simultaneous server side requests do NOT generate the same edgeR file name
193
- png(filename = paste0(cachedir,"/",ql_image_name), width = 1000, height = 1000, res = 200) # Opening a png device
194
- par(oma = c(0, 0, 0, 0)) # Creating a margin
195
- plotQLDisp(fit) # Plot the edgeR fit
196
- # dev.off() # Gives a null device message which breaks JSON. Commenting it out for now, will investigate it later
197
-
198
- # Multiple testing correction
199
- multiple_testing_correction_time <- system.time({
200
- logfc <- et$table$logFC
201
- logcpm <- et$table$logCPM
202
- pvalues <- et$table$PValue
203
- genes_matrix <- str_split_fixed(unlist(et$genes), "\t", 2)
204
- geneids <- unlist(genes_matrix[, 1])
205
- genesymbols <- unlist(genes_matrix[, 2])
206
- adjust_p_values <- p.adjust(pvalues, method = "fdr")
269
+ final_data_generation_time <- system.time({
207
270
  output <- data.frame(geneids, genesymbols, logfc, -log10(pvalues), -log10(adjust_p_values))
208
271
  names(output)[1] <- "gene_name"
209
272
  names(output)[2] <- "gene_symbol"
@@ -213,17 +276,11 @@ multiple_testing_correction_time <- system.time({
213
276
  })
214
277
  final_output <- c()
215
278
  final_output$gene_data <- output
216
- final_output$edgeR_ql_image_name <- ql_image_name
217
- final_output$edgeR_mds_image_name <- mds_image_name
218
- #cat("Time for multiple testing correction: ", multiple_testing_correction_time[3], " seconds\n")
279
+ final_output$edgeR_ql_image_name <- fit_image_name
280
+ if (dim(read_counts)[1] * dim(read_counts)[2] < as.numeric(input$mds_cutoff)) { # If the dimensions of the read counts matrix is below this threshold, only then the mds image will be generated as its very compute intensive
281
+ final_output$edgeR_mds_image_name <- mds_image_name
282
+ }
283
+ #cat("Time for generating final dataframe: ", as.difftime(final_data_generation_time, unit = "secs")[3], " seconds\n")
219
284
 
220
285
  # Output results
221
286
  toJSON(final_output)
222
-
223
- #-----------------------------------#
224
- # Will implement this later
225
- filter_genes_by_group_variance <- function(read_counts, gene_id_symbols, num_variable_genes, cases, controls) {
226
- # Divide the read counts into two groups
227
- case_read_counts <- read_counts[, cases]
228
- control_read_counts <- read_counts[, controls]
229
- }