@sjcrh/proteinpaint-server 2.96.0 → 2.96.2-0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dataset/termdb.test.js +0 -2
- package/package.json +2 -2
- package/routes/termdb.DE.js +30 -14
- package/routes/termdb.topVariablyExpressedGenes.js +1 -1
- package/src/app.js +82 -41
- package/src/serverconfig.js +6 -4
- package/utils/edge.R +117 -132
package/src/serverconfig.js
CHANGED
|
@@ -115,8 +115,9 @@ if (!serverconfig.binpath) {
|
|
|
115
115
|
}
|
|
116
116
|
}
|
|
117
117
|
|
|
118
|
-
if (serverconfig.debugmode) {
|
|
119
|
-
// only apply optional routeSetters in debugmode
|
|
118
|
+
if (serverconfig.debugmode && !serverconfig.binpath.includes('sjcrh/')) {
|
|
119
|
+
// only apply optional routeSetters in debugmode and when the binpath
|
|
120
|
+
// indicates the server code is not installed as a node_module
|
|
120
121
|
const routeSetters = []
|
|
121
122
|
const defaultDir = path.join(serverconfig.binpath, 'src/test/routes')
|
|
122
123
|
// will add testing routes as needed and if found, such as in dev environment
|
|
@@ -146,8 +147,9 @@ if (serverconfig.debugmode) {
|
|
|
146
147
|
// since the serverconfig.binpath prefix may
|
|
147
148
|
// have been applied to locate optional routeSetter files
|
|
148
149
|
serverconfig.routeSetters = routeSetters
|
|
149
|
-
// server-sent events dir
|
|
150
|
-
|
|
150
|
+
// server-sent events dir, can manually set sseDir to false
|
|
151
|
+
// to prevent the default SSE setup in dev
|
|
152
|
+
if (serverconfig.sseDir !== false) serverconfig.sseDir = path.join(serverconfig.binpath, '../.sse')
|
|
151
153
|
}
|
|
152
154
|
|
|
153
155
|
if (serverconfig.allow_env_overrides) {
|
package/utils/edge.R
CHANGED
|
@@ -1,158 +1,143 @@
|
|
|
1
|
-
#
|
|
2
|
-
|
|
3
|
-
# in_json: [string] input data in JSON format. Streamed through stdin.
|
|
4
|
-
# out_json: [string] clustering results in JSON format. Streamed to stdout.
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
# json='{"case":"SJMB066856,SJMB069601,SJMB030827,SJMB030838,SJMB031131,SJMB031227,SJMB077221,SJMB077223","control":"SJMB069596,SJMB069587,SJMB074736,SJMB030488,SJMB030825,SJMB031110,SJMB032998,SJMB033002","input_file":"/Users/rpaul1/pp_data/files/hg38/sjmb12/rnaseq/geneCounts.txt"}' && time echo $json | Rscript edge.R
|
|
8
|
-
|
|
9
|
-
# json='{"case":"SJMB030827,SJMB030838,SJMB064540,SJMB064538,SJMB064520,SJMB064535,SJMB031131,SJMB031227","control":"SJMB030488,SJMB030825,SJMB064537,SJMB064510,SJMB064533,SJMB064534,SJMB031110","input_file":"/Users/rpaul1/pp_data/files/hg38/sjmb12/rnaseq/geneCounts.txt"}' && time echo $json | Rscript edge.R
|
|
10
|
-
|
|
11
|
-
# Checking if all R packages are installed or not, if not installing each one of them
|
|
12
|
-
|
|
13
|
-
#jsonlite_path <- system.file(package='jsonlite')
|
|
14
|
-
#if (nchar(jsonlite_path) == 0) {
|
|
15
|
-
# install.packages("jsonlite", repos='https://cran.case.edu/')
|
|
16
|
-
#}
|
|
17
|
-
#
|
|
18
|
-
#edgeR_path <- system.file(package='edgeR')
|
|
19
|
-
#if (nchar(edgeR_path) == 0) {
|
|
20
|
-
# BiocManager::install("edgeR")
|
|
21
|
-
#}
|
|
22
|
-
#
|
|
23
|
-
#readr_path <- system.file(package='readr')
|
|
24
|
-
#if (nchar(readr_path) == 0) {
|
|
25
|
-
# install.packages("readr", repos='https://cran.case.edu/')
|
|
26
|
-
#}
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
library(jsonlite)
|
|
30
|
-
library(rhdf5)
|
|
31
|
-
library(stringr)
|
|
32
|
-
library(readr)
|
|
1
|
+
# Load required packages
|
|
33
2
|
suppressWarnings({
|
|
3
|
+
library(jsonlite)
|
|
4
|
+
library(rhdf5)
|
|
5
|
+
library(stringr)
|
|
6
|
+
library(readr)
|
|
34
7
|
suppressPackageStartupMessages(library(edgeR))
|
|
35
8
|
suppressPackageStartupMessages(library(dplyr))
|
|
36
9
|
})
|
|
37
10
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
11
|
+
# Read JSON input from stdin
|
|
12
|
+
read_json_time <- system.time({
|
|
13
|
+
con <- file("stdin", "r")
|
|
14
|
+
json <- readLines(con, warn=FALSE)
|
|
15
|
+
close(con)
|
|
16
|
+
input <- fromJSON(json)
|
|
17
|
+
|
|
18
|
+
cases <- unlist(strsplit(input$case, ","))
|
|
19
|
+
controls <- unlist(strsplit(input$control, ","))
|
|
20
|
+
combined <- c("geneID", "geneSymbol", cases, controls)
|
|
21
|
+
})
|
|
22
|
+
cat("Time to read JSON: ", read_json_time[3], " seconds\n")
|
|
44
23
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
combined <- c("geneID","geneSymbol",cases,controls)
|
|
48
|
-
#data %>% select(all_of(combined))
|
|
49
|
-
#read_file_time_start <- Sys.time()
|
|
50
|
-
if (exists(input$storage_type)==FALSE) {
|
|
24
|
+
# Read counts data
|
|
25
|
+
read_counts_time <- system.time({
|
|
51
26
|
if (input$storage_type == "HDF5") {
|
|
52
|
-
#print(h5ls(input$input_file))
|
|
53
27
|
geneIDs <- h5read(input$input_file, "gene_names")
|
|
54
28
|
geneSymbols <- h5read(input$input_file, "gene_symbols")
|
|
55
29
|
samples <- h5read(input$input_file, "samples")
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
}
|
|
30
|
+
|
|
31
|
+
# Find indices of case and control samples in the HDF5 file
|
|
32
|
+
case_indices <- match(cases, samples)
|
|
33
|
+
control_indices <- match(controls, samples)
|
|
34
|
+
|
|
35
|
+
# Check for missing samples
|
|
36
|
+
if (any(is.na(case_indices))) {
|
|
37
|
+
missing_cases <- cases[is.na(case_indices)]
|
|
38
|
+
stop(paste(missing_cases, "not found"))
|
|
66
39
|
}
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
if (length(sample_index) == 1) {
|
|
71
|
-
samples_indicies <- c(samples_indicies,sample_index)
|
|
72
|
-
} else {
|
|
73
|
-
print (paste(sample,"not found"))
|
|
74
|
-
quit(status = 1)
|
|
75
|
-
}
|
|
40
|
+
if (any(is.na(control_indices))) {
|
|
41
|
+
missing_controls <- controls[is.na(control_indices)]
|
|
42
|
+
stop(paste(missing_controls, "not found"))
|
|
76
43
|
}
|
|
77
|
-
|
|
78
|
-
|
|
44
|
+
|
|
45
|
+
samples_indices <- c(case_indices, control_indices)
|
|
46
|
+
read_counts <- t(h5read(input$input_file, "counts", index = list(samples_indices, 1:length(geneIDs))))
|
|
47
|
+
colnames(read_counts) <- c(cases, controls)
|
|
79
48
|
} else if (input$storage_type == "text") {
|
|
80
49
|
suppressWarnings({
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
50
|
+
suppressMessages({
|
|
51
|
+
read_counts <- read_tsv(input$input_file, col_names = TRUE, col_select = combined)
|
|
52
|
+
})
|
|
84
53
|
})
|
|
85
54
|
geneIDs <- unlist(read_counts[1])
|
|
86
55
|
geneSymbols <- unlist(read_counts[2])
|
|
87
56
|
read_counts <- select(read_counts, -geneID)
|
|
88
57
|
read_counts <- select(read_counts, -geneSymbol)
|
|
89
58
|
} else {
|
|
90
|
-
|
|
59
|
+
stop("Unknown storage type")
|
|
91
60
|
}
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
suppressMessages({
|
|
95
|
-
read_counts <- read_tsv(input$input_file, col_names = TRUE, col_select = combined)
|
|
96
|
-
})
|
|
97
|
-
})
|
|
98
|
-
geneIDs <- unlist(read_counts[1])
|
|
99
|
-
geneSymbols <- unlist(read_counts[2])
|
|
100
|
-
read_counts <- select(read_counts, -geneID)
|
|
101
|
-
read_counts <- select(read_counts, -geneSymbol)
|
|
102
|
-
}
|
|
61
|
+
})
|
|
62
|
+
cat("Time to read counts data: ", read_counts_time[3], " seconds\n")
|
|
103
63
|
|
|
104
|
-
#
|
|
105
|
-
|
|
64
|
+
# Create conditions vector
|
|
65
|
+
conditions <- c(rep("Diseased", length(cases)), rep("Control", length(controls)))
|
|
66
|
+
gene_id_symbols <- paste0(geneIDs, "\t", geneSymbols)
|
|
106
67
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
tabs <- rep("\t",length(geneIDs))
|
|
111
|
-
gene_id_symbols <- paste0(geneIDs,tabs,geneSymbols)
|
|
112
|
-
y <- DGEList(counts = as.matrix(read_counts), group = conditions, genes = gene_id_symbols)
|
|
113
|
-
keep <- filterByExpr(y, min.count = input$min_count, min.total.count = input$min_total_count)
|
|
114
|
-
y <- y[keep, keep.lib.sizes = FALSE]
|
|
115
|
-
y <- calcNormFactors(y, method = "TMM")
|
|
116
|
-
#print (y)
|
|
117
|
-
calculate_dispersion_time_start <- Sys.time()
|
|
118
|
-
suppressWarnings({
|
|
119
|
-
suppressMessages({
|
|
120
|
-
dge <- estimateDisp(y = y)
|
|
121
|
-
})
|
|
68
|
+
# Create DGEList object
|
|
69
|
+
dge_list_time <- system.time({
|
|
70
|
+
y <- DGEList(counts = read_counts, group = conditions, genes = gene_id_symbols)
|
|
122
71
|
})
|
|
123
|
-
|
|
124
|
-
print("Dispersion Time")
|
|
125
|
-
print (calculate_dispersion_time_stop - calculate_dispersion_time_start)
|
|
126
|
-
calculate_exact_test_time_start <- Sys.time()
|
|
127
|
-
et <- exactTest(object = dge)
|
|
128
|
-
calculate_exact_test_time_stop <- Sys.time()
|
|
129
|
-
print("Exact Time")
|
|
130
|
-
print(calculate_exact_test_time_stop - calculate_exact_test_time_start)
|
|
131
|
-
#print ("Time to calculate DE")
|
|
132
|
-
#print (calculate_DE_time_stop - calculate_DE_time_start)
|
|
133
|
-
#print (et)
|
|
134
|
-
logfc <- et$table$logFC
|
|
135
|
-
logcpm <- et$table$logCPM
|
|
136
|
-
pvalues <- et$table$PValue
|
|
137
|
-
genes_matrix <- str_split_fixed(unlist(et$genes),"\t",2)
|
|
138
|
-
geneids <- unlist(genes_matrix[,1])
|
|
139
|
-
genesymbols <- unlist(genes_matrix[,2])
|
|
140
|
-
adjust_p_values <- p.adjust(pvalues, method = "fdr")
|
|
72
|
+
cat("Time to generate DGEList: ", dge_list_time[3], " seconds\n")
|
|
141
73
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
#
|
|
74
|
+
# Filter and normalize counts
|
|
75
|
+
filter_time <- system.time({
|
|
76
|
+
keep <- filterByExpr(y, min.count = input$min_count, min.total.count = input$min_total_count)
|
|
77
|
+
})
|
|
78
|
+
cat("Time to filter by expression: ", filter_time[3], " seconds\n")
|
|
79
|
+
|
|
80
|
+
normalization_time <- system.time({
|
|
81
|
+
y <- y[keep, keep.lib.sizes = FALSE]
|
|
82
|
+
y <- calcNormFactors(y, method = "TMM")
|
|
83
|
+
})
|
|
84
|
+
cat("Normalization time: ", normalization_time[3], " seconds\n")
|
|
85
|
+
|
|
86
|
+
# Differential expression analysis
|
|
87
|
+
if (length(input$conf1) == 0) { # No adjustment of confounding factors
|
|
88
|
+
dispersion_time <- system.time({
|
|
89
|
+
suppressWarnings({
|
|
90
|
+
suppressMessages({
|
|
91
|
+
y <- estimateDisp(y)
|
|
92
|
+
})
|
|
93
|
+
})
|
|
94
|
+
})
|
|
95
|
+
cat("Dispersion time: ", dispersion_time[3], " seconds\n")
|
|
96
|
+
|
|
97
|
+
exact_test_time <- system.time({
|
|
98
|
+
et <- exactTest(y)
|
|
99
|
+
})
|
|
100
|
+
cat("Exact test time: ", exact_test_time[3], " seconds\n")
|
|
101
|
+
} else { # Adjusting for confounding factors
|
|
102
|
+
y$samples <- data.frame(conditions = conditions, conf1 = input$conf1)
|
|
103
|
+
model_gen_time <- system.time({
|
|
104
|
+
design <- model.matrix(~ conf1 + conditions, data = y$samples)
|
|
105
|
+
})
|
|
106
|
+
cat("Time for making design matrix: ", model_gen_time[3], " seconds\n")
|
|
107
|
+
|
|
108
|
+
dispersion_time <- system.time({
|
|
109
|
+
y <- estimateDisp(y, design)
|
|
110
|
+
})
|
|
111
|
+
cat("Dispersion time: ", dispersion_time[3], " seconds\n")
|
|
112
|
+
|
|
113
|
+
fit_time <- system.time({
|
|
114
|
+
fit <- glmFit(y, design)
|
|
115
|
+
})
|
|
116
|
+
cat("Fit time: ", fit_time[3], " seconds\n")
|
|
117
|
+
|
|
118
|
+
test_statistics_time <- system.time({
|
|
119
|
+
et <- glmLRT(fit, coef = 2)
|
|
120
|
+
})
|
|
121
|
+
cat("Test statistics time: ", test_statistics_time[3], " seconds\n")
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
# Multiple testing correction
|
|
125
|
+
multiple_testing_correction_time <- system.time({
|
|
126
|
+
logfc <- et$table$logFC
|
|
127
|
+
logcpm <- et$table$logCPM
|
|
128
|
+
pvalues <- et$table$PValue
|
|
129
|
+
genes_matrix <- str_split_fixed(unlist(et$genes), "\t", 2)
|
|
130
|
+
geneids <- unlist(genes_matrix[, 1])
|
|
131
|
+
genesymbols <- unlist(genes_matrix[, 2])
|
|
132
|
+
adjust_p_values <- p.adjust(pvalues, method = "fdr")
|
|
133
|
+
output <- data.frame(geneids, genesymbols, logfc, -log10(pvalues), -log10(adjust_p_values))
|
|
134
|
+
names(output)[1] <- "gene_name"
|
|
135
|
+
names(output)[2] <- "gene_symbol"
|
|
136
|
+
names(output)[3] <- "fold_change"
|
|
137
|
+
names(output)[4] <- "original_p_value"
|
|
138
|
+
names(output)[5] <- "adjusted_p_value"
|
|
139
|
+
})
|
|
140
|
+
cat("Time for multiple testing correction: ", multiple_testing_correction_time[3], " seconds\n")
|
|
155
141
|
|
|
156
|
-
#
|
|
157
|
-
|
|
158
|
-
#print (top_degs)
|
|
142
|
+
# Output results
|
|
143
|
+
cat(paste0("adjusted_p_values:", toJSON(output)))
|