@sjcrh/proteinpaint-server 2.77.1 → 2.79.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/routes/termdb.config.js +4 -1
- package/routes/termdb.singlecellSamples.js +78 -13
- package/src/app.js +410 -213
- package/utils/regression.R +15 -2
- package/utils/regression.utils.R +96 -43
package/utils/regression.R
CHANGED
|
@@ -109,7 +109,7 @@ benchmark[["prepareDataTable"]] <- unbox(paste(round(as.numeric(dtime), 4), attr
|
|
|
109
109
|
##################
|
|
110
110
|
|
|
111
111
|
stime <- Sys.time()
|
|
112
|
-
formulas <- buildFormulas(input$outcome, input$independent)
|
|
112
|
+
formulas <- buildFormulas(input$outcome, input$independent, input$includeUnivariate)
|
|
113
113
|
etime <- Sys.time()
|
|
114
114
|
dtime <- etime - stime
|
|
115
115
|
benchmark[["buildFormulas"]] <- unbox(paste(round(as.numeric(dtime), 4), attr(dtime, "units")))
|
|
@@ -132,11 +132,24 @@ etime <- Sys.time()
|
|
|
132
132
|
dtime <- etime - stime
|
|
133
133
|
benchmark[["runRegression"]] <- unbox(paste(round(as.numeric(dtime), 4), attr(dtime, "units")))
|
|
134
134
|
|
|
135
|
+
|
|
136
|
+
##################
|
|
137
|
+
# PARSE RESULTS #
|
|
138
|
+
##################
|
|
139
|
+
|
|
140
|
+
if (isTRUE(input$includeUnivariate)) {
|
|
141
|
+
# univariate analysis included along with multivariate analysis
|
|
142
|
+
# parse the results
|
|
143
|
+
# TODO: this function will not work with snplocus regression because it
|
|
144
|
+
# will combine results from multiple analyses into a single set of results
|
|
145
|
+
reg_results <- parseUniMultiResults(reg_results, input$regressionType)
|
|
146
|
+
}
|
|
147
|
+
|
|
135
148
|
out <- list(data = reg_results, benchmark = benchmark)
|
|
136
149
|
|
|
137
150
|
|
|
138
151
|
##################
|
|
139
|
-
#
|
|
152
|
+
# OUTPUT RESULTS #
|
|
140
153
|
##################
|
|
141
154
|
|
|
142
155
|
# Export results as json to stdout
|
package/utils/regression.utils.R
CHANGED
|
@@ -54,7 +54,7 @@ cubic_spline <- function(values, knots) {
|
|
|
54
54
|
}
|
|
55
55
|
|
|
56
56
|
# build formulas
|
|
57
|
-
buildFormulas <- function(outcome, independent) {
|
|
57
|
+
buildFormulas <- function(outcome, independent, includeUnivariate) {
|
|
58
58
|
# first, format variables for building formulas
|
|
59
59
|
|
|
60
60
|
# declare new objects
|
|
@@ -150,11 +150,23 @@ buildFormulas <- function(outcome, independent) {
|
|
|
150
150
|
}
|
|
151
151
|
} else {
|
|
152
152
|
# no snplocus snps
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
153
|
+
if (isTRUE(includeUnivariate)) {
|
|
154
|
+
# include univariate formulas along with the multivariate formula
|
|
155
|
+
if (length(formula_independent) < 2) stop("must have multiple covariates to build multivariate and univariate formulas")
|
|
156
|
+
if (length(formula_interaction) > 0) stop("interactions not supported in univariate models")
|
|
157
|
+
formula <- as.formula(paste(formula_outcome, paste(formula_independent, collapse = "+"), sep = "~"))
|
|
158
|
+
formulas[[1]] <- list("id" = "", "type" = "multivariate", "formula" = formula)
|
|
159
|
+
for (var in formula_independent) {
|
|
160
|
+
formula <- as.formula(paste(formula_outcome, var, sep = "~"))
|
|
161
|
+
formulas[[length(formulas) + 1]] <- list("id" = "", "type" = "univariate", "formula" = formula)
|
|
162
|
+
}
|
|
163
|
+
} else {
|
|
164
|
+
# use single formula for all variables
|
|
165
|
+
formula <- as.formula(paste(formula_outcome, paste(c(formula_independent, formula_interaction), collapse = "+"), sep = "~"))
|
|
166
|
+
formulas[[1]] <- list("id" = "", "formula" = formula)
|
|
167
|
+
if (nrow(splineVariables) > 0) {
|
|
168
|
+
formulas[[1]][["splineVariables"]] = splineVariables
|
|
169
|
+
}
|
|
158
170
|
}
|
|
159
171
|
}
|
|
160
172
|
return(formulas)
|
|
@@ -400,7 +412,6 @@ coxRegression <- function(formula, dat) {
|
|
|
400
412
|
|
|
401
413
|
# run regression analysis
|
|
402
414
|
runRegression <- function(formula, regtype, dat, outcome, neuroOnc) {
|
|
403
|
-
id <- formula$id
|
|
404
415
|
# remove samples with NA values in any variable in the formula
|
|
405
416
|
# NOTE: even though regression functions (e.g. lm, glm, etc.)
|
|
406
417
|
# perform this filtration by default, this filtration
|
|
@@ -441,7 +452,8 @@ runRegression <- function(formula, regtype, dat, outcome, neuroOnc) {
|
|
|
441
452
|
if (length(warns) > 0) results[["warnings"]] <- warns
|
|
442
453
|
results$coefficients <- formatCoefficients(results$coefficients, results$res, input$regressionType, fdat, neuroOnc)
|
|
443
454
|
results$type3 <- formatType3(results$type3)
|
|
444
|
-
out <- list("id" = unbox(id), "data" = results[names(results) != "res"])
|
|
455
|
+
out <- list("id" = unbox(formula$id), "data" = results[names(results) != "res"])
|
|
456
|
+
if (isTRUE(neuroOnc)) out$type <- unbox(formula$type)
|
|
445
457
|
return(out)
|
|
446
458
|
}
|
|
447
459
|
|
|
@@ -613,8 +625,10 @@ build_coef_table <- function(res_summ) {
|
|
|
613
625
|
|
|
614
626
|
# reformat the coefficients table
|
|
615
627
|
formatCoefficients <- function(coefficients_table, res, regtype, dat, neuroOnc) {
|
|
616
|
-
# round
|
|
617
|
-
|
|
628
|
+
# round columns to 2 decimal places
|
|
629
|
+
# round p-value column to 3 significant digits
|
|
630
|
+
coefficients_table[,-ncol(coefficients_table)] <- round(coefficients_table[,-ncol(coefficients_table)], 2)
|
|
631
|
+
coefficients_table[,ncol(coefficients_table)] <- signif(coefficients_table[,ncol(coefficients_table)], 3)
|
|
618
632
|
# add variable and category columns
|
|
619
633
|
if (regtype == "cox") {
|
|
620
634
|
vCol <- vector(mode = "character")
|
|
@@ -668,43 +682,50 @@ formatCoefficients <- function(coefficients_table, res, regtype, dat, neuroOnc)
|
|
|
668
682
|
}
|
|
669
683
|
}
|
|
670
684
|
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
685
|
+
coefficients_table <- cbind("Variable" = vCol, "Category" = cCol, coefficients_table)
|
|
686
|
+
|
|
687
|
+
if (isTRUE(neuroOnc)) {
|
|
688
|
+
# neuro-oncology dataset
|
|
689
|
+
# extract columns of interest
|
|
690
|
+
if (regtype == "linear") {
|
|
691
|
+
coefficients_table <- coefficients_table[, c("Variable", "Category", "Beta", "95% CI (low)", "95% CI (high)", "Pr(>|t|)"), drop = F]
|
|
692
|
+
} else if (regtype == "logistic") {
|
|
693
|
+
coefficients_table <- coefficients_table[, c("Variable", "Category", "Odds ratio", "95% CI (low)", "95% CI (high)", "Pr(>|z|)"), drop = F]
|
|
694
|
+
} else if (regtype == "cox") {
|
|
695
|
+
# cox regression
|
|
696
|
+
# report sample size and event counts of coefficients
|
|
697
|
+
sCol <- vector(mode = "character")
|
|
698
|
+
eCol <- vector(mode = "character")
|
|
699
|
+
for (i in 1:length(vCol)) {
|
|
700
|
+
v <- vCol[i]
|
|
701
|
+
c <- cCol[i]
|
|
702
|
+
if (v %in% names(res$xlevels)) {
|
|
703
|
+
# categorical variable
|
|
704
|
+
# determine sample size and event count of both ref and non-ref categories
|
|
705
|
+
# values will be stored in separate columns in the format "ref/nonref"
|
|
706
|
+
ref <- res$xlevels[[v]][1]
|
|
707
|
+
m <- table(dat[,"outcome_event"], dat[,v])
|
|
708
|
+
samplesize_ref <- sum(m[,ref])
|
|
709
|
+
samplesize_c <- sum(m[,c])
|
|
710
|
+
sCol <- c(sCol, paste(samplesize_ref, samplesize_c, sep = "/"))
|
|
711
|
+
eventcnt_ref <- m["1",ref]
|
|
712
|
+
eventcnt_c <- m["1",c]
|
|
713
|
+
eCol <- c(eCol, paste(eventcnt_ref, eventcnt_c, sep = "/"))
|
|
714
|
+
} else {
|
|
715
|
+
# continuous variable
|
|
716
|
+
# set sample size and event count to NA
|
|
717
|
+
sCol <- c(sCol, NA)
|
|
718
|
+
eCol <- c(eCol, NA)
|
|
719
|
+
}
|
|
701
720
|
}
|
|
721
|
+
coefficients_table <- cbind(coefficients_table, "Sample Size (ref/non-ref)" = sCol, "Events (ref/non-ref)" = eCol)
|
|
722
|
+
coefficients_table <- coefficients_table[, c("Variable", "Category", "Sample Size (ref/non-ref)", "Events (ref/non-ref)", "HR", "95% CI (low)", "95% CI (high)", "Pr(>|z|)"), drop = F]
|
|
723
|
+
} else {
|
|
724
|
+
stop("regression type is not recognized")
|
|
702
725
|
}
|
|
703
|
-
coefficients_table <- cbind("Variable" = vCol, "Category" = cCol, "Sample Size (ref/non-ref)" = sCol, "Events (ref/non-ref)" = eCol, coefficients_table)
|
|
704
|
-
} else {
|
|
705
|
-
coefficients_table <- cbind("Variable" = vCol, "Category" = cCol, coefficients_table)
|
|
706
726
|
}
|
|
707
727
|
|
|
728
|
+
colnames(coefficients_table)[ncol(coefficients_table)] <- "P" # p-value column
|
|
708
729
|
coefficients_table <- list("header" = colnames(coefficients_table), "rows" = coefficients_table)
|
|
709
730
|
return(coefficients_table)
|
|
710
731
|
}
|
|
@@ -715,4 +736,36 @@ formatType3 <- function(type3_table) {
|
|
|
715
736
|
type3_table <- cbind("Variable" = sub("cubic_spline\\(", "", sub(", c\\(.*", "", row.names(type3_table))), type3_table)
|
|
716
737
|
type3_table <- list("header" = colnames(type3_table), "rows" = type3_table)
|
|
717
738
|
return(type3_table)
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
# parse results from univariate and multivariate analyses
|
|
742
|
+
parseUniMultiResults <- function(reg_results, regtype) {
|
|
743
|
+
# get coefficients from the univariate and multivariate analyses
|
|
744
|
+
multiCoefficients <- NULL
|
|
745
|
+
uniCoefficients <- NULL
|
|
746
|
+
for (res in reg_results) {
|
|
747
|
+
coefs <- res$data$coefficients$rows
|
|
748
|
+
# remove intercept row because cannot merge together intercepts
|
|
749
|
+
# from different univariate analyses
|
|
750
|
+
coefs <- coefs[row.names(coefs) != "(Intercept)", ,drop = F]
|
|
751
|
+
if (res$type == "multivariate") {
|
|
752
|
+
multiCoefficients <- coefs
|
|
753
|
+
} else if (res$type == "univariate") {
|
|
754
|
+
if (is.null(uniCoefficients)) {
|
|
755
|
+
uniCoefficients <- coefs
|
|
756
|
+
} else {
|
|
757
|
+
uniCoefficients <- rbind(uniCoefficients, coefs)
|
|
758
|
+
}
|
|
759
|
+
} else {
|
|
760
|
+
stop ("results type not recognized")
|
|
761
|
+
}
|
|
762
|
+
}
|
|
763
|
+
# prepare separate univariate and multivariate coefficients tables
|
|
764
|
+
uniCoefficients_table <- list("header" = colnames(uniCoefficients), "rows" = uniCoefficients)
|
|
765
|
+
multiCoefficients_table <- list("header" = colnames(multiCoefficients), "rows" = multiCoefficients)
|
|
766
|
+
# return parsed results containing the separate coefficients tables
|
|
767
|
+
reg_results_parsed <- list()
|
|
768
|
+
reg_results_parsed[[1]] <- list("id" = res$id, "data" = list("sampleSize" = res$data$sampleSize, "coefficients_uni" = uniCoefficients_table, "coefficients_multi" = multiCoefficients_table))
|
|
769
|
+
if (regtype == "cox") reg_results_parsed[[1]]$data$eventCnt = res$data$eventCnt
|
|
770
|
+
return(reg_results_parsed)
|
|
718
771
|
}
|