@sjcrh/proteinpaint-server 2.113.0 → 2.115.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -126,7 +126,7 @@ if (serverconfig.debugmode && !serverconfig.binpath.includes('sjcrh/')) {
126
126
  const routeSetters = []
127
127
  const defaultDir = path.join(serverconfig.binpath, 'src/test/routes')
128
128
  // will add testing routes as needed and if found, such as in dev environment
129
- const testRouteSetters = ['gdc.js', 'specs.js', 'readme.js', 'closeCoverage.js']
129
+ const testRouteSetters = ['gdc.js', 'specs.js', 'readme.js', 'coverage.js']
130
130
  if (serverconfig.features.sse === undefined) serverconfig.features.sse = true
131
131
  if (typeof serverconfig.features.sse !== 'boolean') {
132
132
  throw `serverconfig.features.sse must be either undefined or boolean`
package/utils/edge.R CHANGED
@@ -2,70 +2,70 @@
2
2
 
3
3
  # Load required packages
4
4
  suppressWarnings({
5
- library(jsonlite)
6
- library(rhdf5)
7
- library(stringr)
8
- library(readr)
9
- suppressPackageStartupMessages(library(edgeR))
10
- suppressPackageStartupMessages(library(dplyr))
5
+ library(jsonlite)
6
+ library(rhdf5)
7
+ library(stringr)
8
+ library(readr)
9
+ suppressPackageStartupMessages(library(edgeR))
10
+ suppressPackageStartupMessages(library(dplyr))
11
11
  })
12
12
 
13
13
  # Filter based on CPM
14
14
  filter_using_cpm <- function(y, gene_cpm_cutoff, sample_cpm_cutoff, count_cpm_cutoff) {
15
- selr <- rowSums(cpm(y$counts)>gene_cpm_cutoff)>=sample_cpm_cutoff
16
- selc <- colSums(cpm(y$counts))>=count_cpm_cutoff
17
- y <- y[selr, selc]
15
+ selr <- rowSums(cpm(y$counts)>gene_cpm_cutoff)>=sample_cpm_cutoff
16
+ selc <- colSums(cpm(y$counts))>=count_cpm_cutoff
17
+ y <- y[selr, selc]
18
18
  }
19
19
 
20
20
  # Read JSON input from stdin
21
21
  read_json_time <- system.time({
22
- con <- file("stdin", "r")
23
- json <- readLines(con, warn=FALSE)
24
- close(con)
25
- input <- fromJSON(json)
26
- cases <- unlist(strsplit(input$case, ","))
27
- controls <- unlist(strsplit(input$control, ","))
28
- combined <- c("geneID", "geneSymbol", cases, controls)
22
+ con <- file("stdin", "r")
23
+ json <- readLines(con, warn=FALSE)
24
+ close(con)
25
+ input <- fromJSON(json)
26
+ cases <- unlist(strsplit(input$case, ","))
27
+ controls <- unlist(strsplit(input$control, ","))
28
+ combined <- c("geneID", "geneSymbol", cases, controls)
29
29
  })
30
30
  #cat("Time to read JSON: ", as.difftime(read_json_time, units = "secs")[3], " seconds\n")
31
31
 
32
32
  # Read counts data
33
33
  read_counts_time <- system.time({
34
- if (input$storage_type == "HDF5") {
35
- geneIDs <- h5read(input$input_file, "gene_names")
36
- geneSymbols <- h5read(input$input_file, "gene_symbols")
37
- samples <- h5read(input$input_file, "samples")
34
+ if (input$storage_type == "HDF5") {
35
+ geneIDs <- h5read(input$input_file, "gene_names")
36
+ geneSymbols <- h5read(input$input_file, "gene_symbols")
37
+ samples <- h5read(input$input_file, "samples")
38
38
 
39
- # Find indices of case and control samples in the HDF5 file
40
- case_indices <- match(cases, samples)
41
- control_indices <- match(controls, samples)
39
+ # Find indices of case and control samples in the HDF5 file
40
+ case_indices <- match(cases, samples)
41
+ control_indices <- match(controls, samples)
42
42
 
43
- # Check for missing samples
44
- if (any(is.na(case_indices))) {
45
- missing_cases <- cases[is.na(case_indices)]
46
- stop(paste(missing_cases, "not found"))
47
- }
48
- if (any(is.na(control_indices))) {
49
- missing_controls <- controls[is.na(control_indices)]
50
- stop(paste(missing_controls, "not found"))
51
- }
52
-
53
- samples_indices <- c(case_indices, control_indices)
54
- read_counts <- as.data.frame(t(h5read(input$input_file, "counts", index = list(samples_indices, 1:length(geneIDs)))))
55
- colnames(read_counts) <- c(cases, controls)
56
- } else if (input$storage_type == "text") {
57
- suppressWarnings({
58
- suppressMessages({
59
- read_counts <- read_tsv(input$input_file, col_names = TRUE, col_select = combined)
60
- })
61
- })
62
- geneIDs <- unlist(read_counts[1])
63
- geneSymbols <- unlist(read_counts[2])
64
- read_counts <- select(read_counts, -geneID)
65
- read_counts <- select(read_counts, -geneSymbol)
66
- } else {
67
- stop("Unknown storage type")
43
+ # Check for missing samples
44
+ if (any(is.na(case_indices))) {
45
+ missing_cases <- cases[is.na(case_indices)]
46
+ stop(paste(missing_cases, "not found"))
68
47
  }
48
+ if (any(is.na(control_indices))) {
49
+ missing_controls <- controls[is.na(control_indices)]
50
+ stop(paste(missing_controls, "not found"))
51
+ }
52
+
53
+ samples_indices <- c(case_indices, control_indices)
54
+ read_counts <- as.data.frame(t(h5read(input$input_file, "counts", index = list(samples_indices, 1:length(geneIDs)))))
55
+ colnames(read_counts) <- c(cases, controls)
56
+ } else if (input$storage_type == "text") {
57
+ suppressWarnings({
58
+ suppressMessages({
59
+ read_counts <- read_tsv(input$input_file, col_names = TRUE, col_select = combined)
60
+ })
61
+ })
62
+ geneIDs <- unlist(read_counts[1])
63
+ geneSymbols <- unlist(read_counts[2])
64
+ read_counts <- select(read_counts, -geneID)
65
+ read_counts <- select(read_counts, -geneSymbol)
66
+ } else {
67
+ stop("Unknown storage type")
68
+ }
69
69
  })
70
70
  #cat("Time to read counts data: ", as.difftime(read_counts_time, units = "secs")[3], " seconds\n")
71
71
 
@@ -75,210 +75,210 @@ gene_id_symbols <- paste0(geneIDs, "\t", geneSymbols)
75
75
 
76
76
  # Create DGEList object
77
77
  dge_list_time <- system.time({
78
- y <- DGEList(counts = read_counts, group = conditions, genes = gene_id_symbols)
78
+ y <- DGEList(counts = read_counts, group = conditions, genes = gene_id_symbols)
79
79
  })
80
80
  #cat("Time to generate DGEList: ", as.difftime(dge_list_time, units = "secs")[3], " seconds\n")
81
81
 
82
82
  # Filter and normalize counts
83
83
  filter_time <- system.time({
84
- keep <- filterByExpr(y, min.count = input$min_count, min.total.count = input$min_total_count)
84
+ keep <- filterByExpr(y, min.count = input$min_count, min.total.count = input$min_total_count)
85
85
  })
86
86
  #cat("Time to filter by expression: ", as.difftime(filter_time, unit = "secs")[3], " seconds\n")
87
87
 
88
88
  normalization_time <- system.time({
89
- y <- y[keep, keep.lib.sizes = FALSE]
90
- y <- normLibSizes(y) # Using TMM method for normalization
89
+ y <- y[keep, keep.lib.sizes = FALSE]
90
+ y <- normLibSizes(y) # Using TMM method for normalization
91
91
  })
92
92
  #cat("Normalization time: ", as.difftime(normalization_time, units = "secs")[3], " seconds\n")
93
93
 
94
94
  # Cutoffs for cpm, will add them as UI options later
95
95
  if (length(samples_indices) > 100) {
96
- gene_cpm_cutoff <- 15
97
- sample_cpm_cutoff <- 30
98
- count_cpm_cutoff <- 100000
96
+ gene_cpm_cutoff <- 15
97
+ sample_cpm_cutoff <- 30
98
+ count_cpm_cutoff <- 100000
99
99
  } else {
100
- gene_cpm_cutoff <- 5
101
- sample_cpm_cutoff <- 15
102
- count_cpm_cutoff <- 100000
100
+ gene_cpm_cutoff <- 5
101
+ sample_cpm_cutoff <- 15
102
+ count_cpm_cutoff <- 100000
103
103
  }
104
104
 
105
105
  filter_using_cpm_time <- system.time({
106
- y <- filter_using_cpm(y, gene_cpm_cutoff, sample_cpm_cutoff, count_cpm_cutoff) # Filtering counts matrix based on gene_cpm_cutoff, sample_cpm_cutoff and count_cpm_cutoff
107
- })
106
+ y <- filter_using_cpm(y, gene_cpm_cutoff, sample_cpm_cutoff, count_cpm_cutoff) # Filtering counts matrix based on gene_cpm_cutoff, sample_cpm_cutoff and count_cpm_cutoff
107
+ })
108
108
  #cat("Filter using cpm time: ", as.difftime(filter_using_cpm_time, units = "secs")[3], " seconds\n")
109
109
 
110
110
  # Saving MDS plot image
111
111
 
112
112
  if (dim(read_counts)[1] * dim(read_counts)[2] < as.numeric(input$mds_cutoff)) { # If the dimensions of the read counts matrix is below this threshold, only then the mds image will be generated as its very compute intensive
113
- mds_plot_time <- system.time({
114
- set.seed(as.integer(Sys.time())) # Set the seed according to current time
115
- cachedir <- input$cachedir # Importing serverconfig.cachedir
116
- random_number <- runif(1, min = 0, max = 1) # Generating random number
117
- mds_image_name <- paste0("edgeR_mds_temp_",random_number,".png") # Generating random image name so that simultaneous server side requests do NOT generate the same edgeR file name
118
- png(filename = paste0(cachedir,"/",mds_image_name), width = 1000, height = 1000, res = 200) # Opening a png device
119
- par(oma = c(0, 0, 0, 0)) # Creating a margin
120
- mds_conditions <- c(rep("T", length(cases)), rep("C", length(controls))) # Case samples are labelled "T" and control samples are labelled "C". Single-letter labelling added because otherwise labels get overwritten on each other.
121
- plotMDS(y, labels = mds_conditions) # Plot the edgeR MDS plot
122
- # dev.off() # Gives a null device message which breaks JSON. Commenting it out for now, will investigate it later
123
- })
124
- #cat("mds plot time: ", as.difftime(mds_plot_time, units = "secs")[3], " seconds\n")
113
+ mds_plot_time <- system.time({
114
+ set.seed(as.integer(Sys.time())) # Set the seed according to current time
115
+ cachedir <- input$cachedir # Importing serverconfig.cachedir
116
+ random_number <- runif(1, min = 0, max = 1) # Generating random number
117
+ mds_image_name <- paste0("edgeR_mds_temp_",random_number,".png") # Generating random image name so that simultaneous server side requests do NOT generate the same edgeR file name
118
+ png(filename = paste0(cachedir,"/",mds_image_name), width = 1000, height = 1000, res = 200) # Opening a png device
119
+ par(oma = c(0, 0, 0, 0)) # Creating a margin
120
+ mds_conditions <- c(rep("T", length(cases)), rep("C", length(controls))) # Case samples are labelled "T" and control samples are labelled "C". Single-letter labelling added because otherwise labels get overwritten on each other.
121
+ plotMDS(y, labels = mds_conditions) # Plot the edgeR MDS plot
122
+ # dev.off() # Gives a null device message which breaks JSON. Commenting it out for now, will investigate it later
123
+ })
124
+ #cat("mds plot time: ", as.difftime(mds_plot_time, units = "secs")[3], " seconds\n")
125
125
  }
126
126
 
127
127
  # Differential expression analysis
128
128
  if (length(input$conf1) == 0) { # No adjustment of confounding factors
129
- design <- model.matrix(~conditions) # Based on the protocol defined in section 1.4 of edgeR manual https://bioconductor.org/packages/release/bioc/vignettes/edgeR/inst/doc/edgeRUsersGuide.pdf
129
+ design <- model.matrix(~conditions) # Based on the protocol defined in section 1.4 of edgeR manual https://bioconductor.org/packages/release/bioc/vignettes/edgeR/inst/doc/edgeRUsersGuide.pdf
130
130
  } else { # Adjusting for confounding factors
131
- # Check the type of confounding variable
132
- if (input$conf1_mode == "continuous") { # If this is float, the input conf1 vector should be converted into a numeric vector
133
- conf1 <- as.numeric(input$conf1)
134
- } else { # When input$conf1_mode == "discrete" keep the vector as string.
135
- conf1 <- as.factor(input$conf1)
136
- }
137
-
138
- if (length(input$conf2) == 0) { # No adjustment of confounding factor 2
139
- y$samples <- data.frame(y$samples, conditions = conditions, conf1 = conf1)
140
- model_gen_time <- system.time({
141
- design <- model.matrix(~ conditions + conf1, data = y$samples)
142
- })
143
- #cat("Time for making design matrix: ", as.difftime(model_gen_time, units = "secs")[3], " seconds\n")
144
- } else {
145
- # Check the type of confounding variable 2
146
- if (input$conf2_mode == "continuous") { # If this is float, the input conf2 vector should be converted into a numeric vector
147
- conf2 <- as.numeric(input$conf2)
148
- } else { # When input$conf2_mode == "discrete" keep the vector as string.
149
- conf2 <- as.factor(input$conf2)
150
- }
151
- y$samples <- data.frame(y$samples, conditions = conditions, conf1 = conf1, conf2 = conf2)
152
- model_gen_time <- system.time({
153
- design <- model.matrix(~ conditions + conf1 + conf2, data = y$samples)
154
- })
155
- #cat("Time for making design matrix: ", as.difftime(model_gen_time, units = "secs")[3], " seconds\n")
131
+ # Check the type of confounding variable
132
+ if (input$conf1_mode == "continuous") { # If this is float, the input conf1 vector should be converted into a numeric vector
133
+ conf1 <- as.numeric(input$conf1)
134
+ } else { # When input$conf1_mode == "discrete" keep the vector as string.
135
+ conf1 <- as.factor(input$conf1)
136
+ }
156
137
 
138
+ if (length(input$conf2) == 0) { # No adjustment of confounding factor 2
139
+ y$samples <- data.frame(y$samples, conditions = conditions, conf1 = conf1)
140
+ model_gen_time <- system.time({
141
+ design <- model.matrix(~ conditions + conf1, data = y$samples)
142
+ })
143
+ #cat("Time for making design matrix: ", as.difftime(model_gen_time, units = "secs")[3], " seconds\n")
144
+ } else {
145
+ # Check the type of confounding variable 2
146
+ if (input$conf2_mode == "continuous") { # If this is float, the input conf2 vector should be converted into a numeric vector
147
+ conf2 <- as.numeric(input$conf2)
148
+ } else { # When input$conf2_mode == "discrete" keep the vector as string.
149
+ conf2 <- as.factor(input$conf2)
157
150
  }
151
+ y$samples <- data.frame(y$samples, conditions = conditions, conf1 = conf1, conf2 = conf2)
152
+ model_gen_time <- system.time({
153
+ design <- model.matrix(~ conditions + conf1 + conf2, data = y$samples)
154
+ })
155
+ #cat("Time for making design matrix: ", as.difftime(model_gen_time, units = "secs")[3], " seconds\n")
156
+
157
+ }
158
158
  }
159
159
 
160
160
  DE_method <- input$DE_method
161
161
  if (DE_method == "edgeR") {
162
- fit_time <- system.time({
163
- suppressWarnings({
164
- suppressMessages({
165
- fit <- glmQLFit(y,design) # The glmQLFit() replaces glmFit() which implements the quasi-likelihood function. This is better able to account for overdispersion as it employs a more lenient approach where variance is not a fixed function of the mean.
166
- })
167
- })
162
+ fit_time <- system.time({
163
+ suppressWarnings({
164
+ suppressMessages({
165
+ fit <- glmQLFit(y,design) # The glmQLFit() replaces glmFit() which implements the quasi-likelihood function. This is better able to account for overdispersion as it employs a more lenient approach where variance is not a fixed function of the mean.
168
166
  })
169
- #cat("QL fit time: ", as.difftime(fit_time, units = "secs")[3], " seconds\n")
170
- test_time <- system.time({
171
- suppressWarnings({
172
- suppressMessages({
173
- et <- glmQLFTest(fit, coef = "conditionsDiseased")
174
- })
175
- })
167
+ })
168
+ })
169
+ #cat("QL fit time: ", as.difftime(fit_time, units = "secs")[3], " seconds\n")
170
+ test_time <- system.time({
171
+ suppressWarnings({
172
+ suppressMessages({
173
+ et <- glmQLFTest(fit, coef = "conditionsDiseased")
176
174
  })
177
- #cat("QL test time: ", as.difftime(test_time, units = "secs")[3], " seconds\n")
178
-
179
- # Saving QL fit image
180
- ql_plot_time <- system.time({
181
- set.seed(as.integer(Sys.time())) # Set the seed according to current time
182
- cachedir <- input$cachedir # Importing serverconfig.cachedir
183
- random_number <- runif(1, min = 0, max = 1) # Generating random number
184
- fit_image_name <- paste0("edgeR_ql_temp_",random_number,".png") # Generating random image name so that simultaneous server side requests do NOT generate the same edgeR file name
185
- png(filename = paste0(cachedir,"/",fit_image_name), width = 1000, height = 1000, res = 200) # Opening a png device
186
- par(oma = c(0, 0, 0, 0)) # Creating a margin
187
- plotQLDisp(fit) # Plot the edgeR fit
188
- # dev.off() # Gives a null device message which breaks JSON. Commenting it out for now, will investigate it later
189
- })
190
- #cat("ql plot time: ", as.difftime(ql_plot_time, units = "secs")[3], " seconds\n")
191
- logfc <- et$table$logFC
192
- logcpm <- et$table$logCPM
193
- pvalues <- et$table$PValue
194
- genes_matrix <- str_split_fixed(unlist(et$genes), "\t", 2)
195
- geneids <- unlist(genes_matrix[, 1])
196
- genesymbols <- unlist(genes_matrix[, 2])
197
- adjust_p_values <- p.adjust(pvalues, method = "fdr")
175
+ })
176
+ })
177
+ #cat("QL test time: ", as.difftime(test_time, units = "secs")[3], " seconds\n")
178
+
179
+ # Saving QL fit image
180
+ ql_plot_time <- system.time({
181
+ set.seed(as.integer(Sys.time())) # Set the seed according to current time
182
+ cachedir <- input$cachedir # Importing serverconfig.cachedir
183
+ random_number <- runif(1, min = 0, max = 1) # Generating random number
184
+ fit_image_name <- paste0("edgeR_ql_temp_",random_number,".png") # Generating random image name so that simultaneous server side requests do NOT generate the same edgeR file name
185
+ png(filename = paste0(cachedir,"/",fit_image_name), width = 1000, height = 1000, res = 200) # Opening a png device
186
+ par(oma = c(0, 0, 0, 0)) # Creating a margin
187
+ plotQLDisp(fit) # Plot the edgeR fit
188
+ # dev.off() # Gives a null device message which breaks JSON. Commenting it out for now, will investigate it later
189
+ })
190
+ #cat("ql plot time: ", as.difftime(ql_plot_time, units = "secs")[3], " seconds\n")
191
+ logfc <- et$table$logFC
192
+ logcpm <- et$table$logCPM
193
+ pvalues <- et$table$PValue
194
+ genes_matrix <- str_split_fixed(unlist(et$genes), "\t", 2)
195
+ geneids <- unlist(genes_matrix[, 1])
196
+ genesymbols <- unlist(genes_matrix[, 2])
197
+ adjust_p_values <- p.adjust(pvalues, method = "fdr")
198
198
  } else if (DE_method == "limma") {
199
- # Do voom transformation
200
- voom_transformation_time <- system.time({
201
- suppressWarnings({
202
- suppressMessages({
203
- set.seed(as.integer(Sys.time())) # Set the seed according to current time
204
- cachedir <- input$cachedir # Importing serverconfig.cachedir
205
- random_number <- runif(1, min = 0, max = 1) # Generating random number
206
- fit_image_name <- paste0("limma_voom_temp_",random_number,".png") # Generating random image name so that simultaneous server side requests do NOT generate the same edgeR file name
207
- png(filename = paste0(cachedir,"/",fit_image_name), width = 1000, height = 1000, res = 200) # Opening a png device
208
- par(oma = c(0, 0, 0, 0)) # Creating a margin
209
- suppressWarnings({
210
- suppressMessages({
211
- y <- voom(y, design, plot = TRUE)
212
- })
213
- })
214
- dev.off() # Gives a null device message which breaks JSON. Commenting it out for now, will investigate it later
215
- })
216
- })
217
- })
218
- #cat("voom transformation time: ", as.difftime(voom_transformation_time, units = "secs")[3], " seconds\n")
199
+ # Do voom transformation
200
+ voom_transformation_time <- system.time({
201
+ suppressWarnings({
202
+ suppressMessages({
203
+ set.seed(as.integer(Sys.time())) # Set the seed according to current time
204
+ cachedir <- input$cachedir # Importing serverconfig.cachedir
205
+ random_number <- runif(1, min = 0, max = 1) # Generating random number
206
+ fit_image_name <- paste0("limma_voom_temp_",random_number,".png") # Generating random image name so that simultaneous server side requests do NOT generate the same edgeR file name
207
+ png(filename = paste0(cachedir,"/",fit_image_name), width = 1000, height = 1000, res = 200) # Opening a png device
208
+ par(oma = c(0, 0, 0, 0)) # Creating a margin
209
+ suppressWarnings({
210
+ suppressMessages({
211
+ y <- voom(y, design, plot = TRUE)
212
+ })
213
+ })
214
+ dev.off() # Gives a null device message which breaks JSON. Commenting it out for now, will investigate it later
215
+ })
216
+ })
217
+ })
218
+ #cat("voom transformation time: ", as.difftime(voom_transformation_time, units = "secs")[3], " seconds\n")
219
219
 
220
- # Fit linear model
221
- fit_time <- system.time({
222
- suppressWarnings({
223
- suppressMessages({
224
- fit <- lmFit(y, design, plot = FALSE)
225
- })
226
- })
227
- })
228
- #cat("limma fit time: ", as.difftime(fit_time, units = "secs")[3], " seconds\n")
220
+ # Fit linear model
221
+ fit_time <- system.time({
222
+ suppressWarnings({
223
+ suppressMessages({
224
+ fit <- lmFit(y, design, plot = FALSE)
225
+ })
226
+ })
227
+ })
228
+ #cat("limma fit time: ", as.difftime(fit_time, units = "secs")[3], " seconds\n")
229
229
 
230
- # Saving mean-difference plot (aka MA plot)
231
- #set.seed(as.integer(Sys.time())) # Set the seed according to current time
232
- #cachedir <- input$cachedir # Importing serverconfig.cachedir
233
- #random_number <- runif(1, min = 0, max = 1) # Generating random number
234
- #md_image_name <- paste0("limma_md_temp_",random_number,".png") # Generating random image name so that simultaneous server side requests do NOT generate the same edgeR file name
235
- #png(filename = paste0(cachedir,"/",md_image_name), width = 1000, height = 1000, res = 200) # Opening a png device
236
- #par(oma = c(0, 0, 0, 0)) # Creating a margin
237
- #plotMD(fit) # Plot the limma fit
238
- ## dev.off() # Gives a null device message which breaks JSON. Commenting it out for now, will investigate it later
230
+ # Saving mean-difference plot (aka MA plot)
231
+ #set.seed(as.integer(Sys.time())) # Set the seed according to current time
232
+ #cachedir <- input$cachedir # Importing serverconfig.cachedir
233
+ #random_number <- runif(1, min = 0, max = 1) # Generating random number
234
+ #md_image_name <- paste0("limma_md_temp_",random_number,".png") # Generating random image name so that simultaneous server side requests do NOT generate the same edgeR file name
235
+ #png(filename = paste0(cachedir,"/",md_image_name), width = 1000, height = 1000, res = 200) # Opening a png device
236
+ #par(oma = c(0, 0, 0, 0)) # Creating a margin
237
+ #plotMD(fit) # Plot the limma fit
238
+ ## dev.off() # Gives a null device message which breaks JSON. Commenting it out for now, will investigate it later
239
239
 
240
- # Empirical Bayes smoothing
241
- empirical_smoothing_time <- system.time({
242
- suppressWarnings({
243
- suppressMessages({
244
- tmp <- eBayes(fit)
245
- })
246
- })
247
- })
248
- #cat("Empirical smoothing time: ", as.difftime(empirical_smoothing_time, units = "secs")[3], " seconds\n")
240
+ # Empirical Bayes smoothing
241
+ empirical_smoothing_time <- system.time({
242
+ suppressWarnings({
243
+ suppressMessages({
244
+ tmp <- eBayes(fit)
245
+ })
246
+ })
247
+ })
248
+ #cat("Empirical smoothing time: ", as.difftime(empirical_smoothing_time, units = "secs")[3], " seconds\n")
249
249
 
250
- # Time for selecting top genes
251
- top_genes_selection_time <- system.time({
252
- suppressWarnings({
253
- suppressMessages({
254
- top_table <- topTable(tmp, coef = "conditionsDiseased", number = Inf, adjust.method = "fdr") # The coeff needs to be specified in topTable() because it needs to know for which contrast the logFC needs to be calculated https://www.biostars.org/p/160465/
255
- logfc <- top_table$logFC
256
- pvalues <- top_table$P.Value
257
- genes_matrix <- str_split_fixed(unlist(top_table$genes), "\t", 2)
258
- geneids <- unlist(genes_matrix[, 1])
259
- genesymbols <- unlist(genes_matrix[, 2])
260
- adjust_p_values <- top_table$adj.P.Val
261
- })
262
- })
263
- })
264
- #cat("Time for selecting top genes: ", as.difftime(top_genes_selection_time, units = "secs")[3], " seconds\n")
250
+ # Time for selecting top genes
251
+ top_genes_selection_time <- system.time({
252
+ suppressWarnings({
253
+ suppressMessages({
254
+ top_table <- topTable(tmp, coef = "conditionsDiseased", number = Inf, adjust.method = "fdr") # The coeff needs to be specified in topTable() because it needs to know for which contrast the logFC needs to be calculated https://www.biostars.org/p/160465/
255
+ logfc <- top_table$logFC
256
+ pvalues <- top_table$P.Value
257
+ genes_matrix <- str_split_fixed(unlist(top_table$genes), "\t", 2)
258
+ geneids <- unlist(genes_matrix[, 1])
259
+ genesymbols <- unlist(genes_matrix[, 2])
260
+ adjust_p_values <- top_table$adj.P.Val
261
+ })
262
+ })
263
+ })
264
+ #cat("Time for selecting top genes: ", as.difftime(top_genes_selection_time, units = "secs")[3], " seconds\n")
265
265
  } else { # Should not happen
266
- stop(paste0("Unknown method:", DE_method))
266
+ stop(paste0("Unknown method:", DE_method))
267
267
  }
268
268
 
269
269
  final_data_generation_time <- system.time({
270
- output <- data.frame(geneids, genesymbols, logfc, -log10(pvalues), -log10(adjust_p_values))
271
- names(output)[1] <- "gene_name"
272
- names(output)[2] <- "gene_symbol"
273
- names(output)[3] <- "fold_change"
274
- names(output)[4] <- "original_p_value"
275
- names(output)[5] <- "adjusted_p_value"
270
+ output <- data.frame(geneids, genesymbols, logfc, pvalues, adjust_p_values)
271
+ names(output)[1] <- "gene_name"
272
+ names(output)[2] <- "gene_symbol"
273
+ names(output)[3] <- "fold_change"
274
+ names(output)[4] <- "original_p_value"
275
+ names(output)[5] <- "adjusted_p_value"
276
276
  })
277
277
  final_output <- c()
278
278
  final_output$gene_data <- output
279
279
  final_output$edgeR_ql_image_name <- fit_image_name
280
280
  if (dim(read_counts)[1] * dim(read_counts)[2] < as.numeric(input$mds_cutoff)) { # If the dimensions of the read counts matrix is below this threshold, only then the mds image will be generated as its very compute intensive
281
- final_output$edgeR_mds_image_name <- mds_image_name
281
+ final_output$edgeR_mds_image_name <- mds_image_name
282
282
  }
283
283
  #cat("Time for generating final dataframe: ", as.difftime(final_data_generation_time, unit = "secs")[3], " seconds\n")
284
284
 
@@ -24,7 +24,8 @@
24
24
  #######
25
25
 
26
26
  # prepare data table
27
- prepareDataTable <- function(dat, independent) {
27
+ prepareDataTable <- function(tempdat, independent) {
28
+ dat <- tempdat[,colnames(tempdat) != "__sample"] # remove sample column
28
29
  for (r in 1:nrow(independent)) {
29
30
  variable <- independent[r,]
30
31
  id <- variable$id