@sjcrh/proteinpaint-server 2.96.0 → 2.96.2-0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -115,8 +115,9 @@ if (!serverconfig.binpath) {
115
115
  }
116
116
  }
117
117
 
118
- if (serverconfig.debugmode) {
119
- // only apply optional routeSetters in debugmode
118
+ if (serverconfig.debugmode && !serverconfig.binpath.includes('sjcrh/')) {
119
+ // only apply optional routeSetters in debugmode and when the binpath
120
+ // indicates the server code is not installed as a node_module
120
121
  const routeSetters = []
121
122
  const defaultDir = path.join(serverconfig.binpath, 'src/test/routes')
122
123
  // will add testing routes as needed and if found, such as in dev environment
@@ -146,8 +147,9 @@ if (serverconfig.debugmode) {
146
147
  // since the serverconfig.binpath prefix may
147
148
  // have been applied to locate optional routeSetter files
148
149
  serverconfig.routeSetters = routeSetters
149
- // server-sent events dir
150
- serverconfig.sseDir = path.join(serverconfig.binpath, '../.sse')
150
+ // server-sent events dir, can manually set sseDir to false
151
+ // to prevent the default SSE setup in dev
152
+ if (serverconfig.sseDir !== false) serverconfig.sseDir = path.join(serverconfig.binpath, '../.sse')
151
153
  }
152
154
 
153
155
  if (serverconfig.allow_env_overrides) {
package/utils/edge.R CHANGED
@@ -1,158 +1,143 @@
1
- # Usage: echo <in_json> | Rscript edge.R > <out_json>
2
-
3
- # in_json: [string] input data in JSON format. Streamed through stdin.
4
- # out_json: [string] clustering results in JSON format. Streamed to stdout.
5
-
6
-
7
- # json='{"case":"SJMB066856,SJMB069601,SJMB030827,SJMB030838,SJMB031131,SJMB031227,SJMB077221,SJMB077223","control":"SJMB069596,SJMB069587,SJMB074736,SJMB030488,SJMB030825,SJMB031110,SJMB032998,SJMB033002","input_file":"/Users/rpaul1/pp_data/files/hg38/sjmb12/rnaseq/geneCounts.txt"}' && time echo $json | Rscript edge.R
8
-
9
- # json='{"case":"SJMB030827,SJMB030838,SJMB064540,SJMB064538,SJMB064520,SJMB064535,SJMB031131,SJMB031227","control":"SJMB030488,SJMB030825,SJMB064537,SJMB064510,SJMB064533,SJMB064534,SJMB031110","input_file":"/Users/rpaul1/pp_data/files/hg38/sjmb12/rnaseq/geneCounts.txt"}' && time echo $json | Rscript edge.R
10
-
11
- # Checking if all R packages are installed or not, if not installing each one of them
12
-
13
- #jsonlite_path <- system.file(package='jsonlite')
14
- #if (nchar(jsonlite_path) == 0) {
15
- # install.packages("jsonlite", repos='https://cran.case.edu/')
16
- #}
17
- #
18
- #edgeR_path <- system.file(package='edgeR')
19
- #if (nchar(edgeR_path) == 0) {
20
- # BiocManager::install("edgeR")
21
- #}
22
- #
23
- #readr_path <- system.file(package='readr')
24
- #if (nchar(readr_path) == 0) {
25
- # install.packages("readr", repos='https://cran.case.edu/')
26
- #}
27
-
28
-
29
- library(jsonlite)
30
- library(rhdf5)
31
- library(stringr)
32
- library(readr)
1
+ # Load required packages
33
2
  suppressWarnings({
3
+ library(jsonlite)
4
+ library(rhdf5)
5
+ library(stringr)
6
+ library(readr)
34
7
  suppressPackageStartupMessages(library(edgeR))
35
8
  suppressPackageStartupMessages(library(dplyr))
36
9
  })
37
10
 
38
- con <- file("stdin", "r")
39
- json <- readLines(con, warn=FALSE)
40
- close(con)
41
- input <- fromJSON(json)
42
- #print (input)
43
- #print (input$output_path)
11
+ # Read JSON input from stdin
12
+ read_json_time <- system.time({
13
+ con <- file("stdin", "r")
14
+ json <- readLines(con, warn=FALSE)
15
+ close(con)
16
+ input <- fromJSON(json)
17
+
18
+ cases <- unlist(strsplit(input$case, ","))
19
+ controls <- unlist(strsplit(input$control, ","))
20
+ combined <- c("geneID", "geneSymbol", cases, controls)
21
+ })
22
+ cat("Time to read JSON: ", read_json_time[3], " seconds\n")
44
23
 
45
- cases <- unlist(strsplit(input$case, ","))
46
- controls <- unlist(strsplit(input$control, ","))
47
- combined <- c("geneID","geneSymbol",cases,controls)
48
- #data %>% select(all_of(combined))
49
- #read_file_time_start <- Sys.time()
50
- if (exists(input$storage_type)==FALSE) {
24
+ # Read counts data
25
+ read_counts_time <- system.time({
51
26
  if (input$storage_type == "HDF5") {
52
- #print(h5ls(input$input_file))
53
27
  geneIDs <- h5read(input$input_file, "gene_names")
54
28
  geneSymbols <- h5read(input$input_file, "gene_symbols")
55
29
  samples <- h5read(input$input_file, "samples")
56
-
57
- samples_indicies <- c()
58
- for (sample in cases) {
59
- sample_index <- which(samples == sample)
60
- if (length(sample_index) == 1) {
61
- samples_indicies <- c(samples_indicies,sample_index)
62
- } else {
63
- print (paste(sample,"not found"))
64
- quit(status = 1)
65
- }
30
+
31
+ # Find indices of case and control samples in the HDF5 file
32
+ case_indices <- match(cases, samples)
33
+ control_indices <- match(controls, samples)
34
+
35
+ # Check for missing samples
36
+ if (any(is.na(case_indices))) {
37
+ missing_cases <- cases[is.na(case_indices)]
38
+ stop(paste(missing_cases, "not found"))
66
39
  }
67
-
68
- for (sample in controls) {
69
- sample_index <- which(samples == sample)
70
- if (length(sample_index) == 1) {
71
- samples_indicies <- c(samples_indicies,sample_index)
72
- } else {
73
- print (paste(sample,"not found"))
74
- quit(status = 1)
75
- }
40
+ if (any(is.na(control_indices))) {
41
+ missing_controls <- controls[is.na(control_indices)]
42
+ stop(paste(missing_controls, "not found"))
76
43
  }
77
- read_counts <- t(h5read(input$input_file,"counts",index=list(samples_indicies, 1:length(geneIDs))))
78
-
44
+
45
+ samples_indices <- c(case_indices, control_indices)
46
+ read_counts <- t(h5read(input$input_file, "counts", index = list(samples_indices, 1:length(geneIDs))))
47
+ colnames(read_counts) <- c(cases, controls)
79
48
  } else if (input$storage_type == "text") {
80
49
  suppressWarnings({
81
- suppressMessages({
82
- read_counts <- read_tsv(input$input_file, col_names = TRUE, col_select = combined)
83
- })
50
+ suppressMessages({
51
+ read_counts <- read_tsv(input$input_file, col_names = TRUE, col_select = combined)
52
+ })
84
53
  })
85
54
  geneIDs <- unlist(read_counts[1])
86
55
  geneSymbols <- unlist(read_counts[2])
87
56
  read_counts <- select(read_counts, -geneID)
88
57
  read_counts <- select(read_counts, -geneSymbol)
89
58
  } else {
90
- print ("Unknown storage type")
59
+ stop("Unknown storage type")
91
60
  }
92
- } else { # If not defined, parse data from a text file
93
- suppressWarnings({
94
- suppressMessages({
95
- read_counts <- read_tsv(input$input_file, col_names = TRUE, col_select = combined)
96
- })
97
- })
98
- geneIDs <- unlist(read_counts[1])
99
- geneSymbols <- unlist(read_counts[2])
100
- read_counts <- select(read_counts, -geneID)
101
- read_counts <- select(read_counts, -geneSymbol)
102
- }
61
+ })
62
+ cat("Time to read counts data: ", read_counts_time[3], " seconds\n")
103
63
 
104
- #read_file_time_stop <- Sys.time()
105
- #print (read_file_time_stop - read_file_time_start)
64
+ # Create conditions vector
65
+ conditions <- c(rep("Diseased", length(cases)), rep("Control", length(controls)))
66
+ gene_id_symbols <- paste0(geneIDs, "\t", geneSymbols)
106
67
 
107
- diseased <- rep("Diseased", length(cases))
108
- control <- rep("Control", length(controls))
109
- conditions <- c(diseased, control)
110
- tabs <- rep("\t",length(geneIDs))
111
- gene_id_symbols <- paste0(geneIDs,tabs,geneSymbols)
112
- y <- DGEList(counts = as.matrix(read_counts), group = conditions, genes = gene_id_symbols)
113
- keep <- filterByExpr(y, min.count = input$min_count, min.total.count = input$min_total_count)
114
- y <- y[keep, keep.lib.sizes = FALSE]
115
- y <- calcNormFactors(y, method = "TMM")
116
- #print (y)
117
- calculate_dispersion_time_start <- Sys.time()
118
- suppressWarnings({
119
- suppressMessages({
120
- dge <- estimateDisp(y = y)
121
- })
68
+ # Create DGEList object
69
+ dge_list_time <- system.time({
70
+ y <- DGEList(counts = read_counts, group = conditions, genes = gene_id_symbols)
122
71
  })
123
- calculate_dispersion_time_stop <- Sys.time()
124
- print("Dispersion Time")
125
- print (calculate_dispersion_time_stop - calculate_dispersion_time_start)
126
- calculate_exact_test_time_start <- Sys.time()
127
- et <- exactTest(object = dge)
128
- calculate_exact_test_time_stop <- Sys.time()
129
- print("Exact Time")
130
- print(calculate_exact_test_time_stop - calculate_exact_test_time_start)
131
- #print ("Time to calculate DE")
132
- #print (calculate_DE_time_stop - calculate_DE_time_start)
133
- #print (et)
134
- logfc <- et$table$logFC
135
- logcpm <- et$table$logCPM
136
- pvalues <- et$table$PValue
137
- genes_matrix <- str_split_fixed(unlist(et$genes),"\t",2)
138
- geneids <- unlist(genes_matrix[,1])
139
- genesymbols <- unlist(genes_matrix[,2])
140
- adjust_p_values <- p.adjust(pvalues, method = "fdr")
72
+ cat("Time to generate DGEList: ", dge_list_time[3], " seconds\n")
141
73
 
142
- output <- data.frame(geneids,genesymbols,logfc,-log10(pvalues),-log10(adjust_p_values))
143
- names(output)[1] <- "gene_name"
144
- names(output)[2] <- "gene_symbol"
145
- names(output)[3] <- "fold_change"
146
- names(output)[4] <- "original_p_value"
147
- names(output)[5] <- "adjusted_p_value"
148
- #write_csv(output,"DE_output.txt")
149
- cat(paste0("adjusted_p_values:",toJSON(output)))
150
- #output_json <- toJSON(output)
151
- #print ("output_json")
152
- #output_file <- paste0(input$output_path,"/r_output.txt")
153
- #print (output_file)
154
- #cat(output_json, file = output_file)
74
+ # Filter and normalize counts
75
+ filter_time <- system.time({
76
+ keep <- filterByExpr(y, min.count = input$min_count, min.total.count = input$min_total_count)
77
+ })
78
+ cat("Time to filter by expression: ", filter_time[3], " seconds\n")
79
+
80
+ normalization_time <- system.time({
81
+ y <- y[keep, keep.lib.sizes = FALSE]
82
+ y <- calcNormFactors(y, method = "TMM")
83
+ })
84
+ cat("Normalization time: ", normalization_time[3], " seconds\n")
85
+
86
+ # Differential expression analysis
87
+ if (length(input$conf1) == 0) { # No adjustment of confounding factors
88
+ dispersion_time <- system.time({
89
+ suppressWarnings({
90
+ suppressMessages({
91
+ y <- estimateDisp(y)
92
+ })
93
+ })
94
+ })
95
+ cat("Dispersion time: ", dispersion_time[3], " seconds\n")
96
+
97
+ exact_test_time <- system.time({
98
+ et <- exactTest(y)
99
+ })
100
+ cat("Exact test time: ", exact_test_time[3], " seconds\n")
101
+ } else { # Adjusting for confounding factors
102
+ y$samples <- data.frame(conditions = conditions, conf1 = input$conf1)
103
+ model_gen_time <- system.time({
104
+ design <- model.matrix(~ conf1 + conditions, data = y$samples)
105
+ })
106
+ cat("Time for making design matrix: ", model_gen_time[3], " seconds\n")
107
+
108
+ dispersion_time <- system.time({
109
+ y <- estimateDisp(y, design)
110
+ })
111
+ cat("Dispersion time: ", dispersion_time[3], " seconds\n")
112
+
113
+ fit_time <- system.time({
114
+ fit <- glmFit(y, design)
115
+ })
116
+ cat("Fit time: ", fit_time[3], " seconds\n")
117
+
118
+ test_statistics_time <- system.time({
119
+ et <- glmLRT(fit, coef = 2)
120
+ })
121
+ cat("Test statistics time: ", test_statistics_time[3], " seconds\n")
122
+ }
123
+
124
+ # Multiple testing correction
125
+ multiple_testing_correction_time <- system.time({
126
+ logfc <- et$table$logFC
127
+ logcpm <- et$table$logCPM
128
+ pvalues <- et$table$PValue
129
+ genes_matrix <- str_split_fixed(unlist(et$genes), "\t", 2)
130
+ geneids <- unlist(genes_matrix[, 1])
131
+ genesymbols <- unlist(genes_matrix[, 2])
132
+ adjust_p_values <- p.adjust(pvalues, method = "fdr")
133
+ output <- data.frame(geneids, genesymbols, logfc, -log10(pvalues), -log10(adjust_p_values))
134
+ names(output)[1] <- "gene_name"
135
+ names(output)[2] <- "gene_symbol"
136
+ names(output)[3] <- "fold_change"
137
+ names(output)[4] <- "original_p_value"
138
+ names(output)[5] <- "adjusted_p_value"
139
+ })
140
+ cat("Time for multiple testing correction: ", multiple_testing_correction_time[3], " seconds\n")
155
141
 
156
- #top_degs = topTags(object = et, n = "Inf")
157
- #print ("top_degs")
158
- #print (top_degs)
142
+ # Output results
143
+ cat(paste0("adjusted_p_values:", toJSON(output)))