@sjcrh/proteinpaint-server 2.77.1 → 2.78.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/routes/termdb.singlecellSamples.js +78 -13
- package/src/app.js +324 -167
- package/utils/regression.R +15 -2
- package/utils/regression.utils.R +97 -43
package/utils/regression.R
CHANGED
|
@@ -109,7 +109,7 @@ benchmark[["prepareDataTable"]] <- unbox(paste(round(as.numeric(dtime), 4), attr
|
|
|
109
109
|
##################
|
|
110
110
|
|
|
111
111
|
stime <- Sys.time()
|
|
112
|
-
formulas <- buildFormulas(input$outcome, input$independent)
|
|
112
|
+
formulas <- buildFormulas(input$outcome, input$independent, input$neuroOnc)
|
|
113
113
|
etime <- Sys.time()
|
|
114
114
|
dtime <- etime - stime
|
|
115
115
|
benchmark[["buildFormulas"]] <- unbox(paste(round(as.numeric(dtime), 4), attr(dtime, "units")))
|
|
@@ -132,11 +132,24 @@ etime <- Sys.time()
|
|
|
132
132
|
dtime <- etime - stime
|
|
133
133
|
benchmark[["runRegression"]] <- unbox(paste(round(as.numeric(dtime), 4), attr(dtime, "units")))
|
|
134
134
|
|
|
135
|
+
|
|
136
|
+
##################
|
|
137
|
+
# PARSE RESULTS #
|
|
138
|
+
##################
|
|
139
|
+
|
|
140
|
+
if (isTRUE(input$neuroOnc) && nrow(input$independent) > 1) {
|
|
141
|
+
# neuro-oncology dataset using multiple covariates
|
|
142
|
+
# parse the results from univariate and multivariate analyses
|
|
143
|
+
# TODO: this function will not work with snplocus regression because it
|
|
144
|
+
# will combine results from multiple analyses into a single set of results
|
|
145
|
+
reg_results <- parseUniMultiResults(reg_results, input$regressionType)
|
|
146
|
+
}
|
|
147
|
+
|
|
135
148
|
out <- list(data = reg_results, benchmark = benchmark)
|
|
136
149
|
|
|
137
150
|
|
|
138
151
|
##################
|
|
139
|
-
#
|
|
152
|
+
# OUTPUT RESULTS #
|
|
140
153
|
##################
|
|
141
154
|
|
|
142
155
|
# Export results as json to stdout
|
package/utils/regression.utils.R
CHANGED
|
@@ -54,7 +54,7 @@ cubic_spline <- function(values, knots) {
|
|
|
54
54
|
}
|
|
55
55
|
|
|
56
56
|
# build formulas
|
|
57
|
-
buildFormulas <- function(outcome, independent) {
|
|
57
|
+
buildFormulas <- function(outcome, independent, neuroOnc) {
|
|
58
58
|
# first, format variables for building formulas
|
|
59
59
|
|
|
60
60
|
# declare new objects
|
|
@@ -117,6 +117,8 @@ buildFormulas <- function(outcome, independent) {
|
|
|
117
117
|
}
|
|
118
118
|
}
|
|
119
119
|
|
|
120
|
+
if (isTRUE(neuroOnc) && length(formula_interaction) > 0) stop ("interactions not supported in neuro-onc datasets")
|
|
121
|
+
|
|
120
122
|
# combine variables into formula(s)
|
|
121
123
|
# if snplocus snps are present, then prepare a
|
|
122
124
|
# separate formula for each snplocus snp
|
|
@@ -150,11 +152,22 @@ buildFormulas <- function(outcome, independent) {
|
|
|
150
152
|
}
|
|
151
153
|
} else {
|
|
152
154
|
# no snplocus snps
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
formulas[[1]]
|
|
155
|
+
if (isTRUE(neuroOnc) && length(formula_independent) > 1) {
|
|
156
|
+
# neuro-onc dataset using multiple covariates
|
|
157
|
+
# build multivariate and univariate formulas
|
|
158
|
+
formula <- as.formula(paste(formula_outcome, paste(formula_independent, collapse = "+"), sep = "~"))
|
|
159
|
+
formulas[[1]] <- list("id" = "", "type" = "multivariate", "formula" = formula)
|
|
160
|
+
for (var in formula_independent) {
|
|
161
|
+
formula <- as.formula(paste(formula_outcome, var, sep = "~"))
|
|
162
|
+
formulas[[length(formulas) + 1]] <- list("id" = "", "type" = "univariate", "formula" = formula)
|
|
163
|
+
}
|
|
164
|
+
} else {
|
|
165
|
+
# use single formula for all variables
|
|
166
|
+
formula <- as.formula(paste(formula_outcome, paste(c(formula_independent, formula_interaction), collapse = "+"), sep = "~"))
|
|
167
|
+
formulas[[1]] <- list("id" = "", "formula" = formula)
|
|
168
|
+
if (nrow(splineVariables) > 0) {
|
|
169
|
+
formulas[[1]][["splineVariables"]] = splineVariables
|
|
170
|
+
}
|
|
158
171
|
}
|
|
159
172
|
}
|
|
160
173
|
return(formulas)
|
|
@@ -400,7 +413,6 @@ coxRegression <- function(formula, dat) {
|
|
|
400
413
|
|
|
401
414
|
# run regression analysis
|
|
402
415
|
runRegression <- function(formula, regtype, dat, outcome, neuroOnc) {
|
|
403
|
-
id <- formula$id
|
|
404
416
|
# remove samples with NA values in any variable in the formula
|
|
405
417
|
# NOTE: even though regression functions (e.g. lm, glm, etc.)
|
|
406
418
|
# perform this filtration by default, this filtration
|
|
@@ -441,7 +453,8 @@ runRegression <- function(formula, regtype, dat, outcome, neuroOnc) {
|
|
|
441
453
|
if (length(warns) > 0) results[["warnings"]] <- warns
|
|
442
454
|
results$coefficients <- formatCoefficients(results$coefficients, results$res, input$regressionType, fdat, neuroOnc)
|
|
443
455
|
results$type3 <- formatType3(results$type3)
|
|
444
|
-
out <- list("id" = unbox(id), "data" = results[names(results) != "res"])
|
|
456
|
+
out <- list("id" = unbox(formula$id), "data" = results[names(results) != "res"])
|
|
457
|
+
if (isTRUE(neuroOnc)) out$type <- unbox(formula$type)
|
|
445
458
|
return(out)
|
|
446
459
|
}
|
|
447
460
|
|
|
@@ -613,8 +626,10 @@ build_coef_table <- function(res_summ) {
|
|
|
613
626
|
|
|
614
627
|
# reformat the coefficients table
|
|
615
628
|
formatCoefficients <- function(coefficients_table, res, regtype, dat, neuroOnc) {
|
|
616
|
-
# round
|
|
617
|
-
|
|
629
|
+
# round columns to 2 decimal places
|
|
630
|
+
# round p-value column to 3 significant digits
|
|
631
|
+
coefficients_table[,-ncol(coefficients_table)] <- round(coefficients_table[,-ncol(coefficients_table)], 2)
|
|
632
|
+
coefficients_table[,ncol(coefficients_table)] <- signif(coefficients_table[,ncol(coefficients_table)], 3)
|
|
618
633
|
# add variable and category columns
|
|
619
634
|
if (regtype == "cox") {
|
|
620
635
|
vCol <- vector(mode = "character")
|
|
@@ -668,43 +683,50 @@ formatCoefficients <- function(coefficients_table, res, regtype, dat, neuroOnc)
|
|
|
668
683
|
}
|
|
669
684
|
}
|
|
670
685
|
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
686
|
+
coefficients_table <- cbind("Variable" = vCol, "Category" = cCol, coefficients_table)
|
|
687
|
+
|
|
688
|
+
if (isTRUE(neuroOnc)) {
|
|
689
|
+
# neuro-oncology dataset
|
|
690
|
+
# extract columns of interest
|
|
691
|
+
if (regtype == "linear") {
|
|
692
|
+
coefficients_table <- coefficients_table[, c("Variable", "Category", "Beta", "95% CI (low)", "95% CI (high)", "Pr(>|t|)"), drop = F]
|
|
693
|
+
} else if (regtype == "logistic") {
|
|
694
|
+
coefficients_table <- coefficients_table[, c("Variable", "Category", "Odds ratio", "95% CI (low)", "95% CI (high)", "Pr(>|z|)"), drop = F]
|
|
695
|
+
} else if (regtype == "cox") {
|
|
696
|
+
# cox regression
|
|
697
|
+
# report sample size and event counts of coefficients
|
|
698
|
+
sCol <- vector(mode = "character")
|
|
699
|
+
eCol <- vector(mode = "character")
|
|
700
|
+
for (i in 1:length(vCol)) {
|
|
701
|
+
v <- vCol[i]
|
|
702
|
+
c <- cCol[i]
|
|
703
|
+
if (v %in% names(res$xlevels)) {
|
|
704
|
+
# categorical variable
|
|
705
|
+
# determine sample size and event count of both ref and non-ref categories
|
|
706
|
+
# values will be stored in separate columns in the format "ref/nonref"
|
|
707
|
+
ref <- res$xlevels[[v]][1]
|
|
708
|
+
m <- table(dat[,"outcome_event"], dat[,v])
|
|
709
|
+
samplesize_ref <- sum(m[,ref])
|
|
710
|
+
samplesize_c <- sum(m[,c])
|
|
711
|
+
sCol <- c(sCol, paste(samplesize_ref, samplesize_c, sep = "/"))
|
|
712
|
+
eventcnt_ref <- m["1",ref]
|
|
713
|
+
eventcnt_c <- m["1",c]
|
|
714
|
+
eCol <- c(eCol, paste(eventcnt_ref, eventcnt_c, sep = "/"))
|
|
715
|
+
} else {
|
|
716
|
+
# continuous variable
|
|
717
|
+
# set sample size and event count to NA
|
|
718
|
+
sCol <- c(sCol, NA)
|
|
719
|
+
eCol <- c(eCol, NA)
|
|
720
|
+
}
|
|
701
721
|
}
|
|
722
|
+
coefficients_table <- cbind(coefficients_table, "Sample Size (ref/non-ref)" = sCol, "Events (ref/non-ref)" = eCol)
|
|
723
|
+
coefficients_table <- coefficients_table[, c("Variable", "Category", "Sample Size (ref/non-ref)", "Events (ref/non-ref)", "HR", "95% CI (low)", "95% CI (high)", "Pr(>|z|)"), drop = F]
|
|
724
|
+
} else {
|
|
725
|
+
stop("regression type is not recognized")
|
|
702
726
|
}
|
|
703
|
-
coefficients_table <- cbind("Variable" = vCol, "Category" = cCol, "Sample Size (ref/non-ref)" = sCol, "Events (ref/non-ref)" = eCol, coefficients_table)
|
|
704
|
-
} else {
|
|
705
|
-
coefficients_table <- cbind("Variable" = vCol, "Category" = cCol, coefficients_table)
|
|
706
727
|
}
|
|
707
728
|
|
|
729
|
+
colnames(coefficients_table)[ncol(coefficients_table)] <- "P" # p-value column
|
|
708
730
|
coefficients_table <- list("header" = colnames(coefficients_table), "rows" = coefficients_table)
|
|
709
731
|
return(coefficients_table)
|
|
710
732
|
}
|
|
@@ -715,4 +737,36 @@ formatType3 <- function(type3_table) {
|
|
|
715
737
|
type3_table <- cbind("Variable" = sub("cubic_spline\\(", "", sub(", c\\(.*", "", row.names(type3_table))), type3_table)
|
|
716
738
|
type3_table <- list("header" = colnames(type3_table), "rows" = type3_table)
|
|
717
739
|
return(type3_table)
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
# parse results from univariate and multivariate analyses
|
|
743
|
+
parseUniMultiResults <- function(reg_results, regtype) {
|
|
744
|
+
# get coefficients from the univariate and multivariate analyses
|
|
745
|
+
multiCoefficients <- NULL
|
|
746
|
+
uniCoefficients <- NULL
|
|
747
|
+
for (res in reg_results) {
|
|
748
|
+
coefs <- res$data$coefficients$rows
|
|
749
|
+
# remove intercept row because cannot merge together intercepts
|
|
750
|
+
# from different univariate analyses
|
|
751
|
+
coefs <- coefs[row.names(coefs) != "(Intercept)", ,drop = F]
|
|
752
|
+
if (res$type == "multivariate") {
|
|
753
|
+
multiCoefficients <- coefs
|
|
754
|
+
} else if (res$type == "univariate") {
|
|
755
|
+
if (is.null(uniCoefficients)) {
|
|
756
|
+
uniCoefficients <- coefs
|
|
757
|
+
} else {
|
|
758
|
+
uniCoefficients <- rbind(uniCoefficients, coefs)
|
|
759
|
+
}
|
|
760
|
+
} else {
|
|
761
|
+
stop ("results type not recognized")
|
|
762
|
+
}
|
|
763
|
+
}
|
|
764
|
+
# prepare separate univariate and multivariate coefficients tables
|
|
765
|
+
uniCoefficients_table <- list("header" = colnames(uniCoefficients), "rows" = uniCoefficients)
|
|
766
|
+
multiCoefficients_table <- list("header" = colnames(multiCoefficients), "rows" = multiCoefficients)
|
|
767
|
+
# return parsed results containing the separate coefficients tables
|
|
768
|
+
reg_results_parsed <- list()
|
|
769
|
+
reg_results_parsed[[1]] <- list("id" = res$id, "data" = list("sampleSize" = res$data$sampleSize, "coefficients_uni" = uniCoefficients_table, "coefficients_multi" = multiCoefficients_table))
|
|
770
|
+
if (regtype == "cox") reg_results_parsed[[1]]$data$eventCnt = res$data$eventCnt
|
|
771
|
+
return(reg_results_parsed)
|
|
718
772
|
}
|