@sjcrh/proteinpaint-server 2.79.7 → 2.81.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dataset/termdb.test.js +2 -1
- package/package.json +2 -2
- package/routes/genomes.js +2 -0
- package/src/app.js +121 -95
- package/utils/regression.R +3 -5
- package/utils/regression.utils.R +82 -57
package/utils/regression.R
CHANGED
|
@@ -127,7 +127,7 @@ benchmark[["buildFormulas"]] <- unbox(paste(round(as.numeric(dtime), 4), attr(dt
|
|
|
127
127
|
stime <- Sys.time()
|
|
128
128
|
cores <- detectCores()
|
|
129
129
|
if (is.na(cores)) stop("unable to detect number of cores")
|
|
130
|
-
reg_results <- mclapply(X = formulas, FUN = runRegression, regtype = input$regressionType, dat = dat, outcome = input$outcome,
|
|
130
|
+
reg_results <- mclapply(X = formulas, FUN = runRegression, regtype = input$regressionType, dat = dat, outcome = input$outcome, cachedir = input$cachedir, mc.cores = cores)
|
|
131
131
|
etime <- Sys.time()
|
|
132
132
|
dtime <- etime - stime
|
|
133
133
|
benchmark[["runRegression"]] <- unbox(paste(round(as.numeric(dtime), 4), attr(dtime, "units")))
|
|
@@ -138,10 +138,8 @@ benchmark[["runRegression"]] <- unbox(paste(round(as.numeric(dtime), 4), attr(dt
|
|
|
138
138
|
##################
|
|
139
139
|
|
|
140
140
|
if (isTRUE(input$includeUnivariate)) {
|
|
141
|
-
# univariate analysis included along with
|
|
142
|
-
# parse the results
|
|
143
|
-
# TODO: this function will not work with snplocus regression because it
|
|
144
|
-
# will combine results from multiple analyses into a single set of results
|
|
141
|
+
# univariate analysis included along with multivariable analysis
|
|
142
|
+
# parse the univariate/multivariable results
|
|
145
143
|
reg_results <- parseUniMultiResults(reg_results, input$regressionType)
|
|
146
144
|
}
|
|
147
145
|
|
package/utils/regression.utils.R
CHANGED
|
@@ -154,11 +154,23 @@ buildFormulas <- function(outcome, independent, includeUnivariate) {
|
|
|
154
154
|
# include univariate formulas along with the multivariate formula
|
|
155
155
|
if (length(formula_independent) < 2) stop("must have multiple covariates to build multivariate and univariate formulas")
|
|
156
156
|
if (length(formula_interaction) > 0) stop("interactions not supported in univariate models")
|
|
157
|
+
# multivariate formula
|
|
157
158
|
formula <- as.formula(paste(formula_outcome, paste(formula_independent, collapse = "+"), sep = "~"))
|
|
158
159
|
formulas[[1]] <- list("id" = "", "type" = "multivariate", "formula" = formula)
|
|
160
|
+
if (nrow(splineVariables) > 0) {
|
|
161
|
+
formulas[[1]][["splineVariables"]] = splineVariables
|
|
162
|
+
}
|
|
163
|
+
# univariate formulas
|
|
159
164
|
for (var in formula_independent) {
|
|
160
165
|
formula <- as.formula(paste(formula_outcome, var, sep = "~"))
|
|
161
|
-
|
|
166
|
+
entry <- length(formulas) + 1
|
|
167
|
+
formulas[[entry]] <- list("id" = "", "type" = "univariate", "formula" = formula)
|
|
168
|
+
if (startsWith(var, "cubic_spline")) {
|
|
169
|
+
id <- gsub("cubic_spline\\(", "", gsub("\\,.*", "", var))
|
|
170
|
+
splineVariable <- splineVariables[splineVariables$id == id, ,drop = F]
|
|
171
|
+
if (nrow(splineVariable) == 0) stop("expected spline variable")
|
|
172
|
+
formulas[[entry]][["splineVariables"]] = splineVariable
|
|
173
|
+
}
|
|
162
174
|
}
|
|
163
175
|
} else {
|
|
164
176
|
# use single formula for all variables
|
|
@@ -411,7 +423,7 @@ coxRegression <- function(formula, dat) {
|
|
|
411
423
|
}
|
|
412
424
|
|
|
413
425
|
# run regression analysis
|
|
414
|
-
runRegression <- function(formula, regtype, dat, outcome,
|
|
426
|
+
runRegression <- function(formula, regtype, dat, outcome, cachedir) {
|
|
415
427
|
# remove samples with NA values in any variable in the formula
|
|
416
428
|
# NOTE: even though regression functions (e.g. lm, glm, etc.)
|
|
417
429
|
# perform this filtration by default, this filtration
|
|
@@ -438,27 +450,27 @@ runRegression <- function(formula, regtype, dat, outcome, neuroOnc) {
|
|
|
438
450
|
}
|
|
439
451
|
if ("splineVariables" %in% names(formula)) {
|
|
440
452
|
# spline variables present
|
|
441
|
-
# plot cubic spline for each spline variable
|
|
442
|
-
|
|
443
|
-
# when snplocus terms are present)
|
|
453
|
+
# plot cubic spline for each spline variable (if applicable)
|
|
454
|
+
results$splinePlotFiles <- vector(mode = "character")
|
|
444
455
|
splineVariables <- formula$splineVariables
|
|
445
456
|
for (r in 1:nrow(splineVariables)) {
|
|
446
457
|
splineVariable <- splineVariables[r,]
|
|
447
|
-
if (
|
|
448
|
-
plot_spline(splineVariable, fdat, outcome, results$res, regtype)
|
|
458
|
+
if (isTRUE(splineVariable$spline$plot)) {
|
|
459
|
+
spline_plot_file <- plot_spline(splineVariable, fdat, outcome, results$res, regtype, formula$type, cachedir)
|
|
460
|
+
results$splinePlotFiles <- c(results$splinePlotFiles, spline_plot_file)
|
|
449
461
|
}
|
|
450
462
|
}
|
|
451
463
|
}
|
|
452
464
|
if (length(warns) > 0) results[["warnings"]] <- warns
|
|
453
|
-
results$coefficients <- formatCoefficients(results$coefficients, results$res, input$regressionType, fdat
|
|
465
|
+
results$coefficients <- formatCoefficients(results$coefficients, results$res, input$regressionType, fdat)
|
|
454
466
|
results$type3 <- formatType3(results$type3)
|
|
455
467
|
out <- list("id" = unbox(formula$id), "data" = results[names(results) != "res"])
|
|
456
|
-
if (
|
|
468
|
+
if (!is.null(formula$type)) out$type <- unbox(formula$type)
|
|
457
469
|
return(out)
|
|
458
470
|
}
|
|
459
471
|
|
|
460
|
-
# generate cubic spline plot
|
|
461
|
-
plot_spline <- function(splineVariable, dat, outcome, res, regtype) {
|
|
472
|
+
# generate cubic spline plot
|
|
473
|
+
plot_spline <- function(splineVariable, dat, outcome, res, regtype, formulatype, cachedir) {
|
|
462
474
|
# prepare test data table for predicting model outcome values
|
|
463
475
|
# columns are all independent variables
|
|
464
476
|
# for the spline variable, use regularly spaced data; for continuous variables, use data median; for categorical variables, use reference category
|
|
@@ -503,9 +515,9 @@ plot_spline <- function(splineVariable, dat, outcome, res, regtype) {
|
|
|
503
515
|
preddat_ci_adj <- preddat_ci + sum(apply(newdat2, 1, prod), na.rm = T)
|
|
504
516
|
|
|
505
517
|
# plot data
|
|
506
|
-
plotfile <-
|
|
518
|
+
plotfile <- paste0(cachedir, "splinePlot_", ifelse(is.null(formulatype), "", paste0(formulatype, "_")), createRandString(), ".png")
|
|
507
519
|
png(filename = plotfile, width = 950, height = 550, res = 200)
|
|
508
|
-
par(mar = c(3, 2.5, 1, 5), mgp = c(
|
|
520
|
+
par(mar = c(3, 2.5, 1, 5) + 0.1, mgp = c(0.5, 0.5, 0), xpd = T)
|
|
509
521
|
if (regtype == "linear" | regtype == "logistic") {
|
|
510
522
|
if (regtype == "linear") {
|
|
511
523
|
# for linear, plot predicted values
|
|
@@ -559,7 +571,12 @@ plot_spline <- function(splineVariable, dat, outcome, res, regtype) {
|
|
|
559
571
|
stop("unrecognized regression type")
|
|
560
572
|
}
|
|
561
573
|
|
|
562
|
-
#
|
|
574
|
+
# titles
|
|
575
|
+
if (is.null(formulatype)) title <- NULL
|
|
576
|
+
else if (formulatype == "univariate") title <- "Univariate"
|
|
577
|
+
else if (formulatype == "multivariate") title <- "Multivariable-adjusted"
|
|
578
|
+
else stop("unexpected formula type")
|
|
579
|
+
title(main = title, cex.main = 0.6)
|
|
563
580
|
title(xlab = splineVariable$name,
|
|
564
581
|
ylab = ylab,
|
|
565
582
|
line = 1.5,
|
|
@@ -609,6 +626,15 @@ plot_spline <- function(splineVariable, dat, outcome, res, regtype) {
|
|
|
609
626
|
border = c(NA, NA, NA)
|
|
610
627
|
)
|
|
611
628
|
dev.off()
|
|
629
|
+
return(plotfile)
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
createRandString <- function() {
|
|
633
|
+
digits = 0:9
|
|
634
|
+
v = c(sample(LETTERS, 4, replace = TRUE),
|
|
635
|
+
sample(digits, 4, replace = TRUE),
|
|
636
|
+
sample(LETTERS, 1, replace = TRUE))
|
|
637
|
+
return(paste0(v, collapse = ""))
|
|
612
638
|
}
|
|
613
639
|
|
|
614
640
|
# build coefficients table
|
|
@@ -624,11 +650,9 @@ build_coef_table <- function(res_summ) {
|
|
|
624
650
|
}
|
|
625
651
|
|
|
626
652
|
# reformat the coefficients table
|
|
627
|
-
formatCoefficients <- function(coefficients_table, res, regtype, dat
|
|
628
|
-
# round columns to
|
|
629
|
-
|
|
630
|
-
coefficients_table[,-ncol(coefficients_table)] <- round(coefficients_table[,-ncol(coefficients_table)], 2)
|
|
631
|
-
coefficients_table[,ncol(coefficients_table)] <- signif(coefficients_table[,ncol(coefficients_table)], 3)
|
|
653
|
+
formatCoefficients <- function(coefficients_table, res, regtype, dat) {
|
|
654
|
+
# round all columns to 4 significant digits
|
|
655
|
+
coefficients_table <- signif(coefficients_table, 4)
|
|
632
656
|
# add variable and category columns
|
|
633
657
|
if (regtype == "cox") {
|
|
634
658
|
vCol <- vector(mode = "character")
|
|
@@ -684,45 +708,42 @@ formatCoefficients <- function(coefficients_table, res, regtype, dat, neuroOnc)
|
|
|
684
708
|
|
|
685
709
|
coefficients_table <- cbind("Variable" = vCol, "Category" = cCol, coefficients_table)
|
|
686
710
|
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
sCol <- c(sCol, NA)
|
|
718
|
-
eCol <- c(eCol, NA)
|
|
719
|
-
}
|
|
711
|
+
# extract columns of interest
|
|
712
|
+
if (regtype == "linear") {
|
|
713
|
+
coefficients_table <- coefficients_table[, c("Variable", "Category", "Beta", "95% CI (low)", "95% CI (high)", "Pr(>|t|)"), drop = F]
|
|
714
|
+
} else if (regtype == "logistic") {
|
|
715
|
+
coefficients_table <- coefficients_table[, c("Variable", "Category", "Odds ratio", "95% CI (low)", "95% CI (high)", "Pr(>|z|)"), drop = F]
|
|
716
|
+
} else if (regtype == "cox") {
|
|
717
|
+
# cox regression
|
|
718
|
+
# report sample size and event counts of coefficients
|
|
719
|
+
sCol <- vector(mode = "character")
|
|
720
|
+
eCol <- vector(mode = "character")
|
|
721
|
+
for (i in 1:length(vCol)) {
|
|
722
|
+
v <- vCol[i]
|
|
723
|
+
c <- cCol[i]
|
|
724
|
+
if (v %in% names(res$xlevels)) {
|
|
725
|
+
# categorical variable
|
|
726
|
+
# determine sample size and event count of both ref and non-ref categories
|
|
727
|
+
# values will be stored in separate columns in the format "ref/nonref"
|
|
728
|
+
ref <- res$xlevels[[v]][1]
|
|
729
|
+
m <- table(dat[,"outcome_event"], dat[,v])
|
|
730
|
+
samplesize_ref <- sum(m[,ref])
|
|
731
|
+
samplesize_c <- sum(m[,c])
|
|
732
|
+
sCol <- c(sCol, paste(samplesize_ref, samplesize_c, sep = "/"))
|
|
733
|
+
eventcnt_ref <- m["1",ref]
|
|
734
|
+
eventcnt_c <- m["1",c]
|
|
735
|
+
eCol <- c(eCol, paste(eventcnt_ref, eventcnt_c, sep = "/"))
|
|
736
|
+
} else {
|
|
737
|
+
# continuous or spline variable
|
|
738
|
+
# set sample size and event count to NA
|
|
739
|
+
sCol <- c(sCol, NA)
|
|
740
|
+
eCol <- c(eCol, NA)
|
|
720
741
|
}
|
|
721
|
-
coefficients_table <- cbind(coefficients_table, "Sample Size (ref/non-ref)" = sCol, "Events (ref/non-ref)" = eCol)
|
|
722
|
-
coefficients_table <- coefficients_table[, c("Variable", "Category", "Sample Size (ref/non-ref)", "Events (ref/non-ref)", "HR", "95% CI (low)", "95% CI (high)", "Pr(>|z|)"), drop = F]
|
|
723
|
-
} else {
|
|
724
|
-
stop("regression type is not recognized")
|
|
725
742
|
}
|
|
743
|
+
coefficients_table <- cbind(coefficients_table, "Sample Size (ref/non-ref)" = sCol, "Events (ref/non-ref)" = eCol)
|
|
744
|
+
coefficients_table <- coefficients_table[, c("Variable", "Category", "Sample Size (ref/non-ref)", "Events (ref/non-ref)", "HR", "95% CI (low)", "95% CI (high)", "Pr(>|z|)"), drop = F]
|
|
745
|
+
} else {
|
|
746
|
+
stop("regression type is not recognized")
|
|
726
747
|
}
|
|
727
748
|
|
|
728
749
|
colnames(coefficients_table)[ncol(coefficients_table)] <- "P" # p-value column
|
|
@@ -740,10 +761,11 @@ formatType3 <- function(type3_table) {
|
|
|
740
761
|
|
|
741
762
|
# parse results from univariate and multivariate analyses
|
|
742
763
|
parseUniMultiResults <- function(reg_results, regtype) {
|
|
743
|
-
# get coefficients from the univariate and multivariate analyses
|
|
744
764
|
multiCoefficients <- NULL
|
|
745
765
|
uniCoefficients <- NULL
|
|
766
|
+
splinePlotFiles <- NULL
|
|
746
767
|
for (res in reg_results) {
|
|
768
|
+
# parse coefficients
|
|
747
769
|
coefs <- res$data$coefficients$rows
|
|
748
770
|
# remove intercept row because cannot merge together intercepts
|
|
749
771
|
# from different univariate analyses
|
|
@@ -759,6 +781,8 @@ parseUniMultiResults <- function(reg_results, regtype) {
|
|
|
759
781
|
} else {
|
|
760
782
|
stop ("results type not recognized")
|
|
761
783
|
}
|
|
784
|
+
# parse spline plots
|
|
785
|
+
splinePlotFiles <- c(splinePlotFiles, res$data$splinePlotFiles)
|
|
762
786
|
}
|
|
763
787
|
# prepare separate univariate and multivariate coefficients tables
|
|
764
788
|
uniCoefficients_table <- list("header" = colnames(uniCoefficients), "rows" = uniCoefficients)
|
|
@@ -767,5 +791,6 @@ parseUniMultiResults <- function(reg_results, regtype) {
|
|
|
767
791
|
reg_results_parsed <- list()
|
|
768
792
|
reg_results_parsed[[1]] <- list("id" = res$id, "data" = list("sampleSize" = res$data$sampleSize, "coefficients_uni" = uniCoefficients_table, "coefficients_multi" = multiCoefficients_table))
|
|
769
793
|
if (regtype == "cox") reg_results_parsed[[1]]$data$eventCnt = res$data$eventCnt
|
|
794
|
+
if (!is.null(splinePlotFiles)) reg_results_parsed[[1]]$data$splinePlotFiles = splinePlotFiles
|
|
770
795
|
return(reg_results_parsed)
|
|
771
796
|
}
|