npm - @sjcrh/proteinpaint-server - Versions diffs - 2.96.0 → 2.96.2-0 - Mend

@sjcrh/proteinpaint-server 2.96.0 → 2.96.2-0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dataset/termdb.test.js +0 -2
package/package.json +2 -2
package/routes/termdb.DE.js +30 -14
package/routes/termdb.topVariablyExpressedGenes.js +1 -1
package/src/app.js +82 -41
package/src/serverconfig.js +6 -4
package/utils/edge.R +117 -132

package/src/serverconfig.js CHANGED Viewed

@@ -115,8 +115,9 @@ if (!serverconfig.binpath) {
 	}
 }
-if (serverconfig.debugmode) {
-	// only apply optional routeSetters in debugmode
+if (serverconfig.debugmode && !serverconfig.binpath.includes('sjcrh/')) {
+	// only apply optional routeSetters in debugmode and when the binpath
+	// indicates the server code is not installed as a node_module
 	const routeSetters = []
 	const defaultDir = path.join(serverconfig.binpath, 'src/test/routes')
 	// will add testing routes as needed and if found, such as in dev environment
@@ -146,8 +147,9 @@ if (serverconfig.debugmode) {
 	// since the serverconfig.binpath prefix may
 	// have been applied to locate optional routeSetter files
 	serverconfig.routeSetters = routeSetters
-	// server-sent events dir
-	serverconfig.sseDir = path.join(serverconfig.binpath, '../.sse')
+	// server-sent events dir, can manually set sseDir to false
+	// to prevent the default SSE setup in dev
+	if (serverconfig.sseDir !== false) serverconfig.sseDir = path.join(serverconfig.binpath, '../.sse')
 }
 if (serverconfig.allow_env_overrides) {

package/utils/edge.R CHANGED Viewed

@@ -1,158 +1,143 @@
-# Usage: echo <in_json> | Rscript edge.R > <out_json>
-#   in_json: [string] input data in JSON format. Streamed through stdin.
-#   out_json: [string] clustering results in JSON format. Streamed to stdout.
-# json='{"case":"SJMB066856,SJMB069601,SJMB030827,SJMB030838,SJMB031131,SJMB031227,SJMB077221,SJMB077223","control":"SJMB069596,SJMB069587,SJMB074736,SJMB030488,SJMB030825,SJMB031110,SJMB032998,SJMB033002","input_file":"/Users/rpaul1/pp_data/files/hg38/sjmb12/rnaseq/geneCounts.txt"}' && time echo $json | Rscript edge.R
-# json='{"case":"SJMB030827,SJMB030838,SJMB064540,SJMB064538,SJMB064520,SJMB064535,SJMB031131,SJMB031227","control":"SJMB030488,SJMB030825,SJMB064537,SJMB064510,SJMB064533,SJMB064534,SJMB031110","input_file":"/Users/rpaul1/pp_data/files/hg38/sjmb12/rnaseq/geneCounts.txt"}' && time echo $json | Rscript edge.R
-# Checking if all R packages are installed or not, if not installing each one of them
-#jsonlite_path <- system.file(package='jsonlite')
-#if (nchar(jsonlite_path) == 0) {
-#  install.packages("jsonlite", repos='https://cran.case.edu/')
-#}
-#
-#edgeR_path <- system.file(package='edgeR')
-#if (nchar(edgeR_path) == 0) {
-#  BiocManager::install("edgeR")
-#}
-#
-#readr_path <- system.file(package='readr')
-#if (nchar(readr_path) == 0) {
-#  install.packages("readr", repos='https://cran.case.edu/')
-#}
-library(jsonlite)
-library(rhdf5)
-library(stringr)
-library(readr)
+# Load required packages
 suppressWarnings({
+    library(jsonlite)
+    library(rhdf5)
+    library(stringr)
+    library(readr)
     suppressPackageStartupMessages(library(edgeR))
     suppressPackageStartupMessages(library(dplyr))
 })
-con <- file("stdin", "r")
-json <- readLines(con, warn=FALSE)
-close(con)
-input <- fromJSON(json)
-#print (input)
-#print (input$output_path)
+# Read JSON input from stdin
+read_json_time <- system.time({
+    con <- file("stdin", "r")
+    json <- readLines(con, warn=FALSE)
+    close(con)
+    input <- fromJSON(json)
+    cases <- unlist(strsplit(input$case, ","))
+    controls <- unlist(strsplit(input$control, ","))
+    combined <- c("geneID", "geneSymbol", cases, controls)
+})
+cat("Time to read JSON: ", read_json_time[3], " seconds\n")
-cases <- unlist(strsplit(input$case, ","))
-controls <- unlist(strsplit(input$control, ","))
-combined <- c("geneID","geneSymbol",cases,controls)
-#data %>% select(all_of(combined))
-#read_file_time_start <- Sys.time()
-if (exists(input$storage_type)==FALSE) {
+# Read counts data
+read_counts_time <- system.time({
     if (input$storage_type == "HDF5") {
-        #print(h5ls(input$input_file))
         geneIDs <- h5read(input$input_file, "gene_names")
         geneSymbols <- h5read(input$input_file, "gene_symbols")
         samples <- h5read(input$input_file, "samples")
-        samples_indicies <- c()
-        for (sample in cases) {
-            sample_index <- which(samples == sample)
-            if (length(sample_index) == 1) {
-                samples_indicies <- c(samples_indicies,sample_index)
-            } else {
-                print (paste(sample,"not found"))
-                quit(status = 1)
-            }
+        # Find indices of case and control samples in the HDF5 file
+        case_indices <- match(cases, samples)
+        control_indices <- match(controls, samples)
+        # Check for missing samples
+        if (any(is.na(case_indices))) {
+            missing_cases <- cases[is.na(case_indices)]
+            stop(paste(missing_cases, "not found"))
         }
-        for (sample in controls) {
-            sample_index <- which(samples == sample)
-            if (length(sample_index) == 1) {
-                samples_indicies <- c(samples_indicies,sample_index)
-            } else {
-                print (paste(sample,"not found"))
-                quit(status = 1)
-            }
+        if (any(is.na(control_indices))) {
+            missing_controls <- controls[is.na(control_indices)]
+            stop(paste(missing_controls, "not found"))
         }
-        read_counts <- t(h5read(input$input_file,"counts",index=list(samples_indicies, 1:length(geneIDs))))
+        samples_indices <- c(case_indices, control_indices)
+        read_counts <- t(h5read(input$input_file, "counts", index = list(samples_indices, 1:length(geneIDs))))
+        colnames(read_counts) <- c(cases, controls)
     } else if (input$storage_type == "text") {
         suppressWarnings({
-          suppressMessages({
-        read_counts <- read_tsv(input$input_file, col_names = TRUE, col_select = combined)
-          })
+            suppressMessages({
+                read_counts <- read_tsv(input$input_file, col_names = TRUE, col_select = combined)
+            })
         })
         geneIDs <- unlist(read_counts[1])
         geneSymbols <- unlist(read_counts[2])
         read_counts <- select(read_counts, -geneID)
         read_counts <- select(read_counts, -geneSymbol)
     } else {
-        print ("Unknown storage type")
+        stop("Unknown storage type")
     }
-} else { # If not defined, parse data from a text file
-    suppressWarnings({
-      suppressMessages({
-    read_counts <- read_tsv(input$input_file, col_names = TRUE, col_select = combined)
-      })
-    })
-    geneIDs <- unlist(read_counts[1])
-    geneSymbols <- unlist(read_counts[2])
-    read_counts <- select(read_counts, -geneID)
-    read_counts <- select(read_counts, -geneSymbol)
-}
+})
+cat("Time to read counts data: ", read_counts_time[3], " seconds\n")
-#read_file_time_stop <- Sys.time()
-#print (read_file_time_stop - read_file_time_start)
+# Create conditions vector
+conditions <- c(rep("Diseased", length(cases)), rep("Control", length(controls)))
+gene_id_symbols <- paste0(geneIDs, "\t", geneSymbols)
-diseased <- rep("Diseased", length(cases))
-control <- rep("Control", length(controls))
-conditions <- c(diseased, control)
-tabs <- rep("\t",length(geneIDs))
-gene_id_symbols <- paste0(geneIDs,tabs,geneSymbols)
-y <- DGEList(counts = as.matrix(read_counts), group = conditions, genes = gene_id_symbols)
-keep <- filterByExpr(y, min.count = input$min_count, min.total.count = input$min_total_count)
-y <- y[keep, keep.lib.sizes = FALSE]
-y <- calcNormFactors(y, method = "TMM")
-#print (y)
-calculate_dispersion_time_start <- Sys.time()
-suppressWarnings({
-  suppressMessages({
-      dge <- estimateDisp(y = y)
-  })
+# Create DGEList object
+dge_list_time <- system.time({
+    y <- DGEList(counts = read_counts, group = conditions, genes = gene_id_symbols)
 })
-calculate_dispersion_time_stop <- Sys.time()
-print("Dispersion Time")
-print (calculate_dispersion_time_stop - calculate_dispersion_time_start)
-calculate_exact_test_time_start <- Sys.time()
-et <- exactTest(object = dge)
-calculate_exact_test_time_stop <- Sys.time()
-print("Exact Time")
-print(calculate_exact_test_time_stop - calculate_exact_test_time_start)
-#print ("Time to calculate DE")
-#print (calculate_DE_time_stop - calculate_DE_time_start)
-#print (et)
-logfc <- et$table$logFC
-logcpm <- et$table$logCPM
-pvalues <- et$table$PValue
-genes_matrix <- str_split_fixed(unlist(et$genes),"\t",2)
-geneids <- unlist(genes_matrix[,1])
-genesymbols <- unlist(genes_matrix[,2])
-adjust_p_values <- p.adjust(pvalues, method = "fdr")
+cat("Time to generate DGEList: ", dge_list_time[3], " seconds\n")
-output <- data.frame(geneids,genesymbols,logfc,-log10(pvalues),-log10(adjust_p_values))
-names(output)[1] <- "gene_name"
-names(output)[2] <- "gene_symbol"
-names(output)[3] <- "fold_change"
-names(output)[4] <- "original_p_value"
-names(output)[5] <- "adjusted_p_value"
-#write_csv(output,"DE_output.txt")
-cat(paste0("adjusted_p_values:",toJSON(output)))
-#output_json <- toJSON(output)
-#print ("output_json")
-#output_file <- paste0(input$output_path,"/r_output.txt")
-#print (output_file)
-#cat(output_json, file = output_file)
+# Filter and normalize counts
+filter_time <- system.time({
+    keep <- filterByExpr(y, min.count = input$min_count, min.total.count = input$min_total_count)
+})
+cat("Time to filter by expression: ", filter_time[3], " seconds\n")
+normalization_time <- system.time({
+    y <- y[keep, keep.lib.sizes = FALSE]
+    y <- calcNormFactors(y, method = "TMM")
+})
+cat("Normalization time: ", normalization_time[3], " seconds\n")
+# Differential expression analysis
+if (length(input$conf1) == 0) { # No adjustment of confounding factors
+    dispersion_time <- system.time({
+        suppressWarnings({
+            suppressMessages({
+                y <- estimateDisp(y)
+            })
+        })
+    })
+    cat("Dispersion time: ", dispersion_time[3], " seconds\n")
+    exact_test_time <- system.time({
+        et <- exactTest(y)
+    })
+    cat("Exact test time: ", exact_test_time[3], " seconds\n")
+} else { # Adjusting for confounding factors
+    y$samples <- data.frame(conditions = conditions, conf1 = input$conf1)
+    model_gen_time <- system.time({
+        design <- model.matrix(~ conf1 + conditions, data = y$samples)
+    })
+    cat("Time for making design matrix: ", model_gen_time[3], " seconds\n")
+    dispersion_time <- system.time({
+        y <- estimateDisp(y, design)
+    })
+    cat("Dispersion time: ", dispersion_time[3], " seconds\n")
+    fit_time <- system.time({
+        fit <- glmFit(y, design)
+    })
+    cat("Fit time: ", fit_time[3], " seconds\n")
+    test_statistics_time <- system.time({
+        et <- glmLRT(fit, coef = 2)
+    })
+    cat("Test statistics time: ", test_statistics_time[3], " seconds\n")
+}
+# Multiple testing correction
+multiple_testing_correction_time <- system.time({
+    logfc <- et$table$logFC
+    logcpm <- et$table$logCPM
+    pvalues <- et$table$PValue
+    genes_matrix <- str_split_fixed(unlist(et$genes), "\t", 2)
+    geneids <- unlist(genes_matrix[, 1])
+    genesymbols <- unlist(genes_matrix[, 2])
+    adjust_p_values <- p.adjust(pvalues, method = "fdr")
+    output <- data.frame(geneids, genesymbols, logfc, -log10(pvalues), -log10(adjust_p_values))
+    names(output)[1] <- "gene_name"
+    names(output)[2] <- "gene_symbol"
+    names(output)[3] <- "fold_change"
+    names(output)[4] <- "original_p_value"
+    names(output)[5] <- "adjusted_p_value"
+})
+cat("Time for multiple testing correction: ", multiple_testing_correction_time[3], " seconds\n")
-#top_degs = topTags(object = et, n = "Inf")
-#print ("top_degs")
-#print (top_degs)
+# Output results
+cat(paste0("adjusted_p_values:", toJSON(output)))