@sjcrh/proteinpaint-server 2.77.1 → 2.79.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -109,7 +109,7 @@ benchmark[["prepareDataTable"]] <- unbox(paste(round(as.numeric(dtime), 4), attr
109
109
  ##################
110
110
 
111
111
  stime <- Sys.time()
112
- formulas <- buildFormulas(input$outcome, input$independent)
112
+ formulas <- buildFormulas(input$outcome, input$independent, input$includeUnivariate)
113
113
  etime <- Sys.time()
114
114
  dtime <- etime - stime
115
115
  benchmark[["buildFormulas"]] <- unbox(paste(round(as.numeric(dtime), 4), attr(dtime, "units")))
@@ -132,11 +132,24 @@ etime <- Sys.time()
132
132
  dtime <- etime - stime
133
133
  benchmark[["runRegression"]] <- unbox(paste(round(as.numeric(dtime), 4), attr(dtime, "units")))
134
134
 
135
+
136
+ ##################
137
+ # PARSE RESULTS #
138
+ ##################
139
+
140
+ if (isTRUE(input$includeUnivariate)) {
141
+ # univariate analysis included along with multivariate analysis
142
+ # parse the results
143
+ # TODO: this function will not work with snplocus regression because it
144
+ # will combine results from multiple analyses into a single set of results
145
+ reg_results <- parseUniMultiResults(reg_results, input$regressionType)
146
+ }
147
+
135
148
  out <- list(data = reg_results, benchmark = benchmark)
136
149
 
137
150
 
138
151
  ##################
139
- # EXPORT RESULTS #
152
+ # OUTPUT RESULTS #
140
153
  ##################
141
154
 
142
155
  # Export results as json to stdout
@@ -54,7 +54,7 @@ cubic_spline <- function(values, knots) {
54
54
  }
55
55
 
56
56
  # build formulas
57
- buildFormulas <- function(outcome, independent) {
57
+ buildFormulas <- function(outcome, independent, includeUnivariate) {
58
58
  # first, format variables for building formulas
59
59
 
60
60
  # declare new objects
@@ -150,11 +150,23 @@ buildFormulas <- function(outcome, independent) {
150
150
  }
151
151
  } else {
152
152
  # no snplocus snps
153
- # use single formula for all variables
154
- formula <- as.formula(paste(formula_outcome, paste(c(formula_independent, formula_interaction), collapse = "+"), sep = "~"))
155
- formulas[[1]] <- list("id" = "", "formula" = formula)
156
- if (nrow(splineVariables) > 0) {
157
- formulas[[1]][["splineVariables"]] = splineVariables
153
+ if (isTRUE(includeUnivariate)) {
154
+ # include univariate formulas along with the multivariate formula
155
+ if (length(formula_independent) < 2) stop("must have multiple covariates to build multivariate and univariate formulas")
156
+ if (length(formula_interaction) > 0) stop("interactions not supported in univariate models")
157
+ formula <- as.formula(paste(formula_outcome, paste(formula_independent, collapse = "+"), sep = "~"))
158
+ formulas[[1]] <- list("id" = "", "type" = "multivariate", "formula" = formula)
159
+ for (var in formula_independent) {
160
+ formula <- as.formula(paste(formula_outcome, var, sep = "~"))
161
+ formulas[[length(formulas) + 1]] <- list("id" = "", "type" = "univariate", "formula" = formula)
162
+ }
163
+ } else {
164
+ # use single formula for all variables
165
+ formula <- as.formula(paste(formula_outcome, paste(c(formula_independent, formula_interaction), collapse = "+"), sep = "~"))
166
+ formulas[[1]] <- list("id" = "", "formula" = formula)
167
+ if (nrow(splineVariables) > 0) {
168
+ formulas[[1]][["splineVariables"]] = splineVariables
169
+ }
158
170
  }
159
171
  }
160
172
  return(formulas)
@@ -400,7 +412,6 @@ coxRegression <- function(formula, dat) {
400
412
 
401
413
  # run regression analysis
402
414
  runRegression <- function(formula, regtype, dat, outcome, neuroOnc) {
403
- id <- formula$id
404
415
  # remove samples with NA values in any variable in the formula
405
416
  # NOTE: even though regression functions (e.g. lm, glm, etc.)
406
417
  # perform this filtration by default, this filtration
@@ -441,7 +452,8 @@ runRegression <- function(formula, regtype, dat, outcome, neuroOnc) {
441
452
  if (length(warns) > 0) results[["warnings"]] <- warns
442
453
  results$coefficients <- formatCoefficients(results$coefficients, results$res, input$regressionType, fdat, neuroOnc)
443
454
  results$type3 <- formatType3(results$type3)
444
- out <- list("id" = unbox(id), "data" = results[names(results) != "res"])
455
+ out <- list("id" = unbox(formula$id), "data" = results[names(results) != "res"])
456
+ if (isTRUE(neuroOnc)) out$type <- unbox(formula$type)
445
457
  return(out)
446
458
  }
447
459
 
@@ -613,8 +625,10 @@ build_coef_table <- function(res_summ) {
613
625
 
614
626
  # reformat the coefficients table
615
627
  formatCoefficients <- function(coefficients_table, res, regtype, dat, neuroOnc) {
616
- # round all columns to 4 significant digits
617
- coefficients_table <- signif(coefficients_table, 4)
628
+ # round columns to 2 decimal places
629
+ # round p-value column to 3 significant digits
630
+ coefficients_table[,-ncol(coefficients_table)] <- round(coefficients_table[,-ncol(coefficients_table)], 2)
631
+ coefficients_table[,ncol(coefficients_table)] <- signif(coefficients_table[,ncol(coefficients_table)], 3)
618
632
  # add variable and category columns
619
633
  if (regtype == "cox") {
620
634
  vCol <- vector(mode = "character")
@@ -668,43 +682,50 @@ formatCoefficients <- function(coefficients_table, res, regtype, dat, neuroOnc)
668
682
  }
669
683
  }
670
684
 
671
- if (regtype == "cox" && !is.null(neuroOnc)) {
672
- # neuro-oncology dataset using cox regression
673
- # report sample size and event counts of coefficients
674
- sCol <- vector(mode = "character")
675
- eCol <- vector(mode = "character")
676
- for (i in 1:length(vCol)) {
677
- v <- vCol[i]
678
- c <- cCol[i]
679
- if (grepl(":", v, fixed = T)) {
680
- # interacting variables
681
- # not allowed in neuro-oncology dataset
682
- stop("interacting variables not supported")
683
- }
684
- if (v %in% names(res$xlevels)) {
685
- # categorical variable
686
- # determine sample size and event count of both ref and non-ref categories
687
- # values will be stored in separate columns in the format "ref/nonref"
688
- ref <- res$xlevels[[v]][1]
689
- m <- table(dat[,"outcome_event"], dat[,v])
690
- samplesize_ref <- sum(m[,ref])
691
- samplesize_c <- sum(m[,c])
692
- sCol <- c(sCol, paste(samplesize_ref, samplesize_c, sep = "/"))
693
- eventcnt_ref <- m["1",ref]
694
- eventcnt_c <- m["1",c]
695
- eCol <- c(eCol, paste(eventcnt_ref, eventcnt_c, sep = "/"))
696
- } else {
697
- # continuous variable
698
- # set sample size and event count to NA
699
- sCol <- c(sCol, NA)
700
- eCol <- c(eCol, NA)
685
+ coefficients_table <- cbind("Variable" = vCol, "Category" = cCol, coefficients_table)
686
+
687
+ if (isTRUE(neuroOnc)) {
688
+ # neuro-oncology dataset
689
+ # extract columns of interest
690
+ if (regtype == "linear") {
691
+ coefficients_table <- coefficients_table[, c("Variable", "Category", "Beta", "95% CI (low)", "95% CI (high)", "Pr(>|t|)"), drop = F]
692
+ } else if (regtype == "logistic") {
693
+ coefficients_table <- coefficients_table[, c("Variable", "Category", "Odds ratio", "95% CI (low)", "95% CI (high)", "Pr(>|z|)"), drop = F]
694
+ } else if (regtype == "cox") {
695
+ # cox regression
696
+ # report sample size and event counts of coefficients
697
+ sCol <- vector(mode = "character")
698
+ eCol <- vector(mode = "character")
699
+ for (i in 1:length(vCol)) {
700
+ v <- vCol[i]
701
+ c <- cCol[i]
702
+ if (v %in% names(res$xlevels)) {
703
+ # categorical variable
704
+ # determine sample size and event count of both ref and non-ref categories
705
+ # values will be stored in separate columns in the format "ref/nonref"
706
+ ref <- res$xlevels[[v]][1]
707
+ m <- table(dat[,"outcome_event"], dat[,v])
708
+ samplesize_ref <- sum(m[,ref])
709
+ samplesize_c <- sum(m[,c])
710
+ sCol <- c(sCol, paste(samplesize_ref, samplesize_c, sep = "/"))
711
+ eventcnt_ref <- m["1",ref]
712
+ eventcnt_c <- m["1",c]
713
+ eCol <- c(eCol, paste(eventcnt_ref, eventcnt_c, sep = "/"))
714
+ } else {
715
+ # continuous variable
716
+ # set sample size and event count to NA
717
+ sCol <- c(sCol, NA)
718
+ eCol <- c(eCol, NA)
719
+ }
701
720
  }
721
+ coefficients_table <- cbind(coefficients_table, "Sample Size (ref/non-ref)" = sCol, "Events (ref/non-ref)" = eCol)
722
+ coefficients_table <- coefficients_table[, c("Variable", "Category", "Sample Size (ref/non-ref)", "Events (ref/non-ref)", "HR", "95% CI (low)", "95% CI (high)", "Pr(>|z|)"), drop = F]
723
+ } else {
724
+ stop("regression type is not recognized")
702
725
  }
703
- coefficients_table <- cbind("Variable" = vCol, "Category" = cCol, "Sample Size (ref/non-ref)" = sCol, "Events (ref/non-ref)" = eCol, coefficients_table)
704
- } else {
705
- coefficients_table <- cbind("Variable" = vCol, "Category" = cCol, coefficients_table)
706
726
  }
707
727
 
728
+ colnames(coefficients_table)[ncol(coefficients_table)] <- "P" # p-value column
708
729
  coefficients_table <- list("header" = colnames(coefficients_table), "rows" = coefficients_table)
709
730
  return(coefficients_table)
710
731
  }
@@ -715,4 +736,36 @@ formatType3 <- function(type3_table) {
715
736
  type3_table <- cbind("Variable" = sub("cubic_spline\\(", "", sub(", c\\(.*", "", row.names(type3_table))), type3_table)
716
737
  type3_table <- list("header" = colnames(type3_table), "rows" = type3_table)
717
738
  return(type3_table)
739
+ }
740
+
741
+ # parse results from univariate and multivariate analyses
742
+ parseUniMultiResults <- function(reg_results, regtype) {
743
+ # get coefficients from the univariate and multivariate analyses
744
+ multiCoefficients <- NULL
745
+ uniCoefficients <- NULL
746
+ for (res in reg_results) {
747
+ coefs <- res$data$coefficients$rows
748
+ # remove intercept row because cannot merge together intercepts
749
+ # from different univariate analyses
750
+ coefs <- coefs[row.names(coefs) != "(Intercept)", ,drop = F]
751
+ if (res$type == "multivariate") {
752
+ multiCoefficients <- coefs
753
+ } else if (res$type == "univariate") {
754
+ if (is.null(uniCoefficients)) {
755
+ uniCoefficients <- coefs
756
+ } else {
757
+ uniCoefficients <- rbind(uniCoefficients, coefs)
758
+ }
759
+ } else {
760
+ stop ("results type not recognized")
761
+ }
762
+ }
763
+ # prepare separate univariate and multivariate coefficients tables
764
+ uniCoefficients_table <- list("header" = colnames(uniCoefficients), "rows" = uniCoefficients)
765
+ multiCoefficients_table <- list("header" = colnames(multiCoefficients), "rows" = multiCoefficients)
766
+ # return parsed results containing the separate coefficients tables
767
+ reg_results_parsed <- list()
768
+ reg_results_parsed[[1]] <- list("id" = res$id, "data" = list("sampleSize" = res$data$sampleSize, "coefficients_uni" = uniCoefficients_table, "coefficients_multi" = multiCoefficients_table))
769
+ if (regtype == "cox") reg_results_parsed[[1]]$data$eventCnt = res$data$eventCnt
770
+ return(reg_results_parsed)
718
771
  }