npm - @sjcrh/proteinpaint-server - Versions diffs - 2.110.0 → 2.111.0 - Mend

@sjcrh/proteinpaint-server 2.110.0 → 2.111.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/package.json +4 -4
package/routes/correlationVolcano.js +11 -1
package/routes/termdb.DE.js +19 -11
package/routes/termdb.violin.js +45 -7
package/src/app.js +4762 -4707
package/utils/density.R +29 -0
package/utils/edge.R +25 -36

package/utils/density.R ADDED Viewed

@@ -0,0 +1,29 @@
+library(jsonlite)
+# This script reads in a json string from stdin, calculates the densities of each plot and returns the densities as a json string
+# The input json string is a dictionary where each field maps to an array of numbers
+# The output json string is a dictionary  with the density for each plot. The density is represented like {x: [x density values], y: [y density values]}
+# In order to test it you can run this from the command line replacing the arrays with your own:
+# echo '{"plotA": [1.2, 2, 3], "plotB": [4.5, 5, 6]}' | Rscript ./density.R
+con <- file("stdin", "r")
+json <- readLines(con)
+close(con)
+data <- fromJSON(json)
+densities <- list()
+for(plot in names(data)){
+    values = data[[plot]]
+    # If the plot has less than 5 values or all the values are the same, we will return a flat line
+    if(length(values) <= 5 | length(unique(values)) == 1){
+        y = rep(0, length(values))
+        densities[[plot]] <- list(x=values, y=y)
+        next
+    }
+    den = density(x = values, from=min(values), to=max(values))
+    x = den$x
+    y = den$y
+    result = list(x=x, y=y) #This is an object  with two keys x and y that are number arrays
+    densities[[plot]] <- result
+}
+toJSON(densities, digits = NA, na = "string") # will  return a json like { plotA: {x:[...], y: [...]}}

package/utils/edge.R CHANGED Viewed

@@ -10,24 +10,11 @@ suppressWarnings({
     suppressPackageStartupMessages(library(dplyr))
 })
-filter_genes_by_global_variance <- function(read_counts, gene_id_symbols, num_variable_genes) {
-   # Calculate the standard deviation of each row
-   row_sd <- apply(read_counts, 1, sd)
-   # Add the standard deviation as a new column to the dataframe
-   read_counts$Row_SD <- row_sd
-   # Add the gene_id_symbols as a new column to the dataframe
-   read_counts$gene_id_symbols <- gene_id_symbols
-   # Sort the dataframe based on the standard deviation column
-   read_counts <- read_counts[order(read_counts$Row_SD, decreasing = TRUE), ]
-   # Select top 3000 rows
-   read_counts <- head(read_counts,num_variable_genes) # Currently hardcoded 3000 genes
-   # Get gene id symbols corresponding to the reordered read count matrix
-   gene_id_symbols <- read_counts$gene_id_symbols
-   # Remove column Row_SD from read_counts dataframe
-   read_counts <- read_counts[, !names(read_counts) %in% "Row_SD"]
-   # Remove column gene_id_symbols from read_counts dataframe
-   read_counts <- read_counts[, !names(read_counts) %in% "gene_id_symbols"]
-   return(list(read_counts = read_counts, gene_id_symbols = gene_id_symbols))
+# Filter based on CPM
+filter_using_cpm <- function(y, gene_cpm_cutoff, sample_cpm_cutoff, count_cpm_cutoff) {
+   selr <- rowSums(cpm(y$counts)>gene_cpm_cutoff)>=sample_cpm_cutoff
+   selc <- colSums(cpm(y$counts))>=count_cpm_cutoff
+   y <- y[selr, selc]
 }
 # Read JSON input from stdin
@@ -86,21 +73,6 @@ read_counts_time <- system.time({
 conditions <- c(rep("Diseased", length(cases)), rep("Control", length(controls)))
 gene_id_symbols <- paste0(geneIDs, "\t", geneSymbols)
-filter_genes_time <- system.time({
-if (length(input$VarGenes) != 0) { # Filter out variable genes for DE analysis
-   filtered_read_counts <- filter_genes_by_global_variance(read_counts, gene_id_symbols, input$VarGenes)
-   read_counts <- filtered_read_counts$read_counts
-   gene_id_symbols <- filtered_read_counts$gene_id_symbols
-   #### Will implement filtering by per group variance later
-   #filtered_read_counts <- filter_genes_by_group_variance(read_counts, gene_id_symbols, num_variable_genes, cases, controls)
-   #read_counts <- filtered_read_counts$read_counts
-   #gene_id_symbols <- filtered_read_counts$gene_id_symbols
-}
-})
-#cat("Time to filter genes: ", filter_genes_time[3], " seconds\n")
 # Create DGEList object
 dge_list_time <- system.time({
     y <- DGEList(counts = read_counts, group = conditions, genes = gene_id_symbols)
@@ -119,14 +91,31 @@ normalization_time <- system.time({
 })
 #cat("Normalization time: ", normalization_time[3], " seconds\n")
+# Cutoffs for cpm, will add them as UI options later
+if (length(samples_indices) > 100) {
+    gene_cpm_cutoff <- 15
+    sample_cpm_cutoff <- 30
+    count_cpm_cutoff <- 100000
+} else {
+    gene_cpm_cutoff <- 5
+    sample_cpm_cutoff <- 15
+    count_cpm_cutoff <- 100000
+}
+filter_using_cpm_time <- system.time({
+    y <- filter_using_cpm(y, gene_cpm_cutoff, sample_cpm_cutoff, count_cpm_cutoff) # Filtering counts matrix based on gene_cpm_cutoff, sample_cpm_cutoff and count_cpm_cutoff
+ })
+#cat("Filter using cpm time: ", filter_using_cpm_time[3], " seconds\n")
 # Saving MDS plot image
 set.seed(as.integer(Sys.time())) # Set the seed according to current time
 cachedir <- input$cachedir # Importing serverconfig.cachedir
 random_number <- runif(1, min = 0, max = 1) # Generating random number
 mds_image_name <- paste0("edgeR_mds_temp_",random_number,".png") # Generating random image name so that simultaneous server side requests do NOT generate the same edgeR file name
 png(filename = paste0(cachedir,"/",mds_image_name), width = 1000, height = 1000, res = 200) # Opening a png device
-par(oma = c(1, 1, 1, 1)) # Creating a margin
-plotMDS(y) # Plot the edgeR MDS plot
+par(oma = c(0, 0, 0, 0)) # Creating a margin
+mds_conditions <- c(rep("T", length(cases)), rep("C", length(controls))) # Case samples are labelled "T" and control samples are labelled "C". Single-letter labelling added because otherwise labels get overwritten on each other.
+plotMDS(y, labels = mds_conditions) # Plot the edgeR MDS plot
 # dev.off() # Gives a null device message which breaks JSON. Commenting it out for now, will investigate it later
@@ -202,7 +191,7 @@ cachedir <- input$cachedir # Importing serverconfig.cachedir
 random_number <- runif(1, min = 0, max = 1) # Generating random number
 ql_image_name <- paste0("edgeR_ql_temp_",random_number,".png") # Generating random image name so that simultaneous server side requests do NOT generate the same edgeR file name
 png(filename = paste0(cachedir,"/",ql_image_name), width = 1000, height = 1000, res = 200) # Opening a png device
-par(oma = c(1, 1, 1, 1)) # Creating a margin
+par(oma = c(0, 0, 0, 0)) # Creating a margin
 plotQLDisp(fit) # Plot the edgeR fit
 # dev.off() # Gives a null device message which breaks JSON. Commenting it out for now, will investigate it later