@sjcrh/proteinpaint-server 2.77.1 → 2.78.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -109,7 +109,7 @@ benchmark[["prepareDataTable"]] <- unbox(paste(round(as.numeric(dtime), 4), attr
109
109
  ##################
110
110
 
111
111
  stime <- Sys.time()
112
- formulas <- buildFormulas(input$outcome, input$independent)
112
+ formulas <- buildFormulas(input$outcome, input$independent, input$neuroOnc)
113
113
  etime <- Sys.time()
114
114
  dtime <- etime - stime
115
115
  benchmark[["buildFormulas"]] <- unbox(paste(round(as.numeric(dtime), 4), attr(dtime, "units")))
@@ -132,11 +132,24 @@ etime <- Sys.time()
132
132
  dtime <- etime - stime
133
133
  benchmark[["runRegression"]] <- unbox(paste(round(as.numeric(dtime), 4), attr(dtime, "units")))
134
134
 
135
+
136
+ ##################
137
+ # PARSE RESULTS #
138
+ ##################
139
+
140
+ if (isTRUE(input$neuroOnc) && nrow(input$independent) > 1) {
141
+ # neuro-oncology dataset using multiple covariates
142
+ # parse the results from univariate and multivariate analyses
143
+ # TODO: this function will not work with snplocus regression because it
144
+ # will combine results from multiple analyses into a single set of results
145
+ reg_results <- parseUniMultiResults(reg_results, input$regressionType)
146
+ }
147
+
135
148
  out <- list(data = reg_results, benchmark = benchmark)
136
149
 
137
150
 
138
151
  ##################
139
- # EXPORT RESULTS #
152
+ # OUTPUT RESULTS #
140
153
  ##################
141
154
 
142
155
  # Export results as json to stdout
@@ -54,7 +54,7 @@ cubic_spline <- function(values, knots) {
54
54
  }
55
55
 
56
56
  # build formulas
57
- buildFormulas <- function(outcome, independent) {
57
+ buildFormulas <- function(outcome, independent, neuroOnc) {
58
58
  # first, format variables for building formulas
59
59
 
60
60
  # declare new objects
@@ -117,6 +117,8 @@ buildFormulas <- function(outcome, independent) {
117
117
  }
118
118
  }
119
119
 
120
+ if (isTRUE(neuroOnc) && length(formula_interaction) > 0) stop ("interactions not supported in neuro-onc datasets")
121
+
120
122
  # combine variables into formula(s)
121
123
  # if snplocus snps are present, then prepare a
122
124
  # separate formula for each snplocus snp
@@ -150,11 +152,22 @@ buildFormulas <- function(outcome, independent) {
150
152
  }
151
153
  } else {
152
154
  # no snplocus snps
153
- # use single formula for all variables
154
- formula <- as.formula(paste(formula_outcome, paste(c(formula_independent, formula_interaction), collapse = "+"), sep = "~"))
155
- formulas[[1]] <- list("id" = "", "formula" = formula)
156
- if (nrow(splineVariables) > 0) {
157
- formulas[[1]][["splineVariables"]] = splineVariables
155
+ if (isTRUE(neuroOnc) && length(formula_independent) > 1) {
156
+ # neuro-onc dataset using multiple covariates
157
+ # build multivariate and univariate formulas
158
+ formula <- as.formula(paste(formula_outcome, paste(formula_independent, collapse = "+"), sep = "~"))
159
+ formulas[[1]] <- list("id" = "", "type" = "multivariate", "formula" = formula)
160
+ for (var in formula_independent) {
161
+ formula <- as.formula(paste(formula_outcome, var, sep = "~"))
162
+ formulas[[length(formulas) + 1]] <- list("id" = "", "type" = "univariate", "formula" = formula)
163
+ }
164
+ } else {
165
+ # use single formula for all variables
166
+ formula <- as.formula(paste(formula_outcome, paste(c(formula_independent, formula_interaction), collapse = "+"), sep = "~"))
167
+ formulas[[1]] <- list("id" = "", "formula" = formula)
168
+ if (nrow(splineVariables) > 0) {
169
+ formulas[[1]][["splineVariables"]] = splineVariables
170
+ }
158
171
  }
159
172
  }
160
173
  return(formulas)
@@ -400,7 +413,6 @@ coxRegression <- function(formula, dat) {
400
413
 
401
414
  # run regression analysis
402
415
  runRegression <- function(formula, regtype, dat, outcome, neuroOnc) {
403
- id <- formula$id
404
416
  # remove samples with NA values in any variable in the formula
405
417
  # NOTE: even though regression functions (e.g. lm, glm, etc.)
406
418
  # perform this filtration by default, this filtration
@@ -441,7 +453,8 @@ runRegression <- function(formula, regtype, dat, outcome, neuroOnc) {
441
453
  if (length(warns) > 0) results[["warnings"]] <- warns
442
454
  results$coefficients <- formatCoefficients(results$coefficients, results$res, input$regressionType, fdat, neuroOnc)
443
455
  results$type3 <- formatType3(results$type3)
444
- out <- list("id" = unbox(id), "data" = results[names(results) != "res"])
456
+ out <- list("id" = unbox(formula$id), "data" = results[names(results) != "res"])
457
+ if (isTRUE(neuroOnc)) out$type <- unbox(formula$type)
445
458
  return(out)
446
459
  }
447
460
 
@@ -613,8 +626,10 @@ build_coef_table <- function(res_summ) {
613
626
 
614
627
  # reformat the coefficients table
615
628
  formatCoefficients <- function(coefficients_table, res, regtype, dat, neuroOnc) {
616
- # round all columns to 4 significant digits
617
- coefficients_table <- signif(coefficients_table, 4)
629
+ # round columns to 2 decimal places
630
+ # round p-value column to 3 significant digits
631
+ coefficients_table[,-ncol(coefficients_table)] <- round(coefficients_table[,-ncol(coefficients_table)], 2)
632
+ coefficients_table[,ncol(coefficients_table)] <- signif(coefficients_table[,ncol(coefficients_table)], 3)
618
633
  # add variable and category columns
619
634
  if (regtype == "cox") {
620
635
  vCol <- vector(mode = "character")
@@ -668,43 +683,50 @@ formatCoefficients <- function(coefficients_table, res, regtype, dat, neuroOnc)
668
683
  }
669
684
  }
670
685
 
671
- if (regtype == "cox" && !is.null(neuroOnc)) {
672
- # neuro-oncology dataset using cox regression
673
- # report sample size and event counts of coefficients
674
- sCol <- vector(mode = "character")
675
- eCol <- vector(mode = "character")
676
- for (i in 1:length(vCol)) {
677
- v <- vCol[i]
678
- c <- cCol[i]
679
- if (grepl(":", v, fixed = T)) {
680
- # interacting variables
681
- # not allowed in neuro-oncology dataset
682
- stop("interacting variables not supported")
683
- }
684
- if (v %in% names(res$xlevels)) {
685
- # categorical variable
686
- # determine sample size and event count of both ref and non-ref categories
687
- # values will be stored in separate columns in the format "ref/nonref"
688
- ref <- res$xlevels[[v]][1]
689
- m <- table(dat[,"outcome_event"], dat[,v])
690
- samplesize_ref <- sum(m[,ref])
691
- samplesize_c <- sum(m[,c])
692
- sCol <- c(sCol, paste(samplesize_ref, samplesize_c, sep = "/"))
693
- eventcnt_ref <- m["1",ref]
694
- eventcnt_c <- m["1",c]
695
- eCol <- c(eCol, paste(eventcnt_ref, eventcnt_c, sep = "/"))
696
- } else {
697
- # continuous variable
698
- # set sample size and event count to NA
699
- sCol <- c(sCol, NA)
700
- eCol <- c(eCol, NA)
686
+ coefficients_table <- cbind("Variable" = vCol, "Category" = cCol, coefficients_table)
687
+
688
+ if (isTRUE(neuroOnc)) {
689
+ # neuro-oncology dataset
690
+ # extract columns of interest
691
+ if (regtype == "linear") {
692
+ coefficients_table <- coefficients_table[, c("Variable", "Category", "Beta", "95% CI (low)", "95% CI (high)", "Pr(>|t|)"), drop = F]
693
+ } else if (regtype == "logistic") {
694
+ coefficients_table <- coefficients_table[, c("Variable", "Category", "Odds ratio", "95% CI (low)", "95% CI (high)", "Pr(>|z|)"), drop = F]
695
+ } else if (regtype == "cox") {
696
+ # cox regression
697
+ # report sample size and event counts of coefficients
698
+ sCol <- vector(mode = "character")
699
+ eCol <- vector(mode = "character")
700
+ for (i in 1:length(vCol)) {
701
+ v <- vCol[i]
702
+ c <- cCol[i]
703
+ if (v %in% names(res$xlevels)) {
704
+ # categorical variable
705
+ # determine sample size and event count of both ref and non-ref categories
706
+ # values will be stored in separate columns in the format "ref/nonref"
707
+ ref <- res$xlevels[[v]][1]
708
+ m <- table(dat[,"outcome_event"], dat[,v])
709
+ samplesize_ref <- sum(m[,ref])
710
+ samplesize_c <- sum(m[,c])
711
+ sCol <- c(sCol, paste(samplesize_ref, samplesize_c, sep = "/"))
712
+ eventcnt_ref <- m["1",ref]
713
+ eventcnt_c <- m["1",c]
714
+ eCol <- c(eCol, paste(eventcnt_ref, eventcnt_c, sep = "/"))
715
+ } else {
716
+ # continuous variable
717
+ # set sample size and event count to NA
718
+ sCol <- c(sCol, NA)
719
+ eCol <- c(eCol, NA)
720
+ }
701
721
  }
722
+ coefficients_table <- cbind(coefficients_table, "Sample Size (ref/non-ref)" = sCol, "Events (ref/non-ref)" = eCol)
723
+ coefficients_table <- coefficients_table[, c("Variable", "Category", "Sample Size (ref/non-ref)", "Events (ref/non-ref)", "HR", "95% CI (low)", "95% CI (high)", "Pr(>|z|)"), drop = F]
724
+ } else {
725
+ stop("regression type is not recognized")
702
726
  }
703
- coefficients_table <- cbind("Variable" = vCol, "Category" = cCol, "Sample Size (ref/non-ref)" = sCol, "Events (ref/non-ref)" = eCol, coefficients_table)
704
- } else {
705
- coefficients_table <- cbind("Variable" = vCol, "Category" = cCol, coefficients_table)
706
727
  }
707
728
 
729
+ colnames(coefficients_table)[ncol(coefficients_table)] <- "P" # p-value column
708
730
  coefficients_table <- list("header" = colnames(coefficients_table), "rows" = coefficients_table)
709
731
  return(coefficients_table)
710
732
  }
@@ -715,4 +737,36 @@ formatType3 <- function(type3_table) {
715
737
  type3_table <- cbind("Variable" = sub("cubic_spline\\(", "", sub(", c\\(.*", "", row.names(type3_table))), type3_table)
716
738
  type3_table <- list("header" = colnames(type3_table), "rows" = type3_table)
717
739
  return(type3_table)
740
+ }
741
+
742
+ # parse results from univariate and multivariate analyses
743
+ parseUniMultiResults <- function(reg_results, regtype) {
744
+ # get coefficients from the univariate and multivariate analyses
745
+ multiCoefficients <- NULL
746
+ uniCoefficients <- NULL
747
+ for (res in reg_results) {
748
+ coefs <- res$data$coefficients$rows
749
+ # remove intercept row because cannot merge together intercepts
750
+ # from different univariate analyses
751
+ coefs <- coefs[row.names(coefs) != "(Intercept)", ,drop = F]
752
+ if (res$type == "multivariate") {
753
+ multiCoefficients <- coefs
754
+ } else if (res$type == "univariate") {
755
+ if (is.null(uniCoefficients)) {
756
+ uniCoefficients <- coefs
757
+ } else {
758
+ uniCoefficients <- rbind(uniCoefficients, coefs)
759
+ }
760
+ } else {
761
+ stop ("results type not recognized")
762
+ }
763
+ }
764
+ # prepare separate univariate and multivariate coefficients tables
765
+ uniCoefficients_table <- list("header" = colnames(uniCoefficients), "rows" = uniCoefficients)
766
+ multiCoefficients_table <- list("header" = colnames(multiCoefficients), "rows" = multiCoefficients)
767
+ # return parsed results containing the separate coefficients tables
768
+ reg_results_parsed <- list()
769
+ reg_results_parsed[[1]] <- list("id" = res$id, "data" = list("sampleSize" = res$data$sampleSize, "coefficients_uni" = uniCoefficients_table, "coefficients_multi" = multiCoefficients_table))
770
+ if (regtype == "cox") reg_results_parsed[[1]]$data$eventCnt = res$data$eventCnt
771
+ return(reg_results_parsed)
718
772
  }