@sjcrh/proteinpaint-server 2.79.7 → 2.81.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -127,7 +127,7 @@ benchmark[["buildFormulas"]] <- unbox(paste(round(as.numeric(dtime), 4), attr(dt
127
127
  stime <- Sys.time()
128
128
  cores <- detectCores()
129
129
  if (is.na(cores)) stop("unable to detect number of cores")
130
- reg_results <- mclapply(X = formulas, FUN = runRegression, regtype = input$regressionType, dat = dat, outcome = input$outcome, neuroOnc = input$neuroOnc, mc.cores = cores)
130
+ reg_results <- mclapply(X = formulas, FUN = runRegression, regtype = input$regressionType, dat = dat, outcome = input$outcome, cachedir = input$cachedir, mc.cores = cores)
131
131
  etime <- Sys.time()
132
132
  dtime <- etime - stime
133
133
  benchmark[["runRegression"]] <- unbox(paste(round(as.numeric(dtime), 4), attr(dtime, "units")))
@@ -138,10 +138,8 @@ benchmark[["runRegression"]] <- unbox(paste(round(as.numeric(dtime), 4), attr(dt
138
138
  ##################
139
139
 
140
140
  if (isTRUE(input$includeUnivariate)) {
141
- # univariate analysis included along with multivariate analysis
142
- # parse the results
143
- # TODO: this function will not work with snplocus regression because it
144
- # will combine results from multiple analyses into a single set of results
141
+ # univariate analysis included along with multivariable analysis
142
+ # parse the univariate/multivariable results
145
143
  reg_results <- parseUniMultiResults(reg_results, input$regressionType)
146
144
  }
147
145
 
@@ -154,11 +154,23 @@ buildFormulas <- function(outcome, independent, includeUnivariate) {
154
154
  # include univariate formulas along with the multivariate formula
155
155
  if (length(formula_independent) < 2) stop("must have multiple covariates to build multivariate and univariate formulas")
156
156
  if (length(formula_interaction) > 0) stop("interactions not supported in univariate models")
157
+ # multivariate formula
157
158
  formula <- as.formula(paste(formula_outcome, paste(formula_independent, collapse = "+"), sep = "~"))
158
159
  formulas[[1]] <- list("id" = "", "type" = "multivariate", "formula" = formula)
160
+ if (nrow(splineVariables) > 0) {
161
+ formulas[[1]][["splineVariables"]] = splineVariables
162
+ }
163
+ # univariate formulas
159
164
  for (var in formula_independent) {
160
165
  formula <- as.formula(paste(formula_outcome, var, sep = "~"))
161
- formulas[[length(formulas) + 1]] <- list("id" = "", "type" = "univariate", "formula" = formula)
166
+ entry <- length(formulas) + 1
167
+ formulas[[entry]] <- list("id" = "", "type" = "univariate", "formula" = formula)
168
+ if (startsWith(var, "cubic_spline")) {
169
+ id <- gsub("cubic_spline\\(", "", gsub("\\,.*", "", var))
170
+ splineVariable <- splineVariables[splineVariables$id == id, ,drop = F]
171
+ if (nrow(splineVariable) == 0) stop("expected spline variable")
172
+ formulas[[entry]][["splineVariables"]] = splineVariable
173
+ }
162
174
  }
163
175
  } else {
164
176
  # use single formula for all variables
@@ -411,7 +423,7 @@ coxRegression <- function(formula, dat) {
411
423
  }
412
424
 
413
425
  # run regression analysis
414
- runRegression <- function(formula, regtype, dat, outcome, neuroOnc) {
426
+ runRegression <- function(formula, regtype, dat, outcome, cachedir) {
415
427
  # remove samples with NA values in any variable in the formula
416
428
  # NOTE: even though regression functions (e.g. lm, glm, etc.)
417
429
  # perform this filtration by default, this filtration
@@ -438,27 +450,27 @@ runRegression <- function(formula, regtype, dat, outcome, neuroOnc) {
438
450
  }
439
451
  if ("splineVariables" %in% names(formula)) {
440
452
  # spline variables present
441
- # plot cubic spline for each spline variable
442
- # do not plot if variable is missing plot file (to hide plot
443
- # when snplocus terms are present)
453
+ # plot cubic spline for each spline variable (if applicable)
454
+ results$splinePlotFiles <- vector(mode = "character")
444
455
  splineVariables <- formula$splineVariables
445
456
  for (r in 1:nrow(splineVariables)) {
446
457
  splineVariable <- splineVariables[r,]
447
- if ("plotfile" %in% names(splineVariable$spline)) {
448
- plot_spline(splineVariable, fdat, outcome, results$res, regtype)
458
+ if (isTRUE(splineVariable$spline$plot)) {
459
+ spline_plot_file <- plot_spline(splineVariable, fdat, outcome, results$res, regtype, formula$type, cachedir)
460
+ results$splinePlotFiles <- c(results$splinePlotFiles, spline_plot_file)
449
461
  }
450
462
  }
451
463
  }
452
464
  if (length(warns) > 0) results[["warnings"]] <- warns
453
- results$coefficients <- formatCoefficients(results$coefficients, results$res, input$regressionType, fdat, neuroOnc)
465
+ results$coefficients <- formatCoefficients(results$coefficients, results$res, input$regressionType, fdat)
454
466
  results$type3 <- formatType3(results$type3)
455
467
  out <- list("id" = unbox(formula$id), "data" = results[names(results) != "res"])
456
- if (isTRUE(neuroOnc)) out$type <- unbox(formula$type)
468
+ if (!is.null(formula$type)) out$type <- unbox(formula$type)
457
469
  return(out)
458
470
  }
459
471
 
460
- # generate cubic spline plot spline
461
- plot_spline <- function(splineVariable, dat, outcome, res, regtype) {
472
+ # generate cubic spline plot
473
+ plot_spline <- function(splineVariable, dat, outcome, res, regtype, formulatype, cachedir) {
462
474
  # prepare test data table for predicting model outcome values
463
475
  # columns are all independent variables
464
476
  # for the spline variable, use regularly spaced data; for continuous variables, use data median; for categorical variables, use reference category
@@ -503,9 +515,9 @@ plot_spline <- function(splineVariable, dat, outcome, res, regtype) {
503
515
  preddat_ci_adj <- preddat_ci + sum(apply(newdat2, 1, prod), na.rm = T)
504
516
 
505
517
  # plot data
506
- plotfile <- splineVariable$spline$plotfile
518
+ plotfile <- paste0(cachedir, "splinePlot_", ifelse(is.null(formulatype), "", paste0(formulatype, "_")), createRandString(), ".png")
507
519
  png(filename = plotfile, width = 950, height = 550, res = 200)
508
- par(mar = c(3, 2.5, 1, 5), mgp = c(1, 0.5, 0), xpd = T)
520
+ par(mar = c(3, 2.5, 1, 5) + 0.1, mgp = c(0.5, 0.5, 0), xpd = T)
509
521
  if (regtype == "linear" | regtype == "logistic") {
510
522
  if (regtype == "linear") {
511
523
  # for linear, plot predicted values
@@ -559,7 +571,12 @@ plot_spline <- function(splineVariable, dat, outcome, res, regtype) {
559
571
  stop("unrecognized regression type")
560
572
  }
561
573
 
562
- # axis titles
574
+ # titles
575
+ if (is.null(formulatype)) title <- NULL
576
+ else if (formulatype == "univariate") title <- "Univariate"
577
+ else if (formulatype == "multivariate") title <- "Multivariable-adjusted"
578
+ else stop("unexpected formula type")
579
+ title(main = title, cex.main = 0.6)
563
580
  title(xlab = splineVariable$name,
564
581
  ylab = ylab,
565
582
  line = 1.5,
@@ -609,6 +626,15 @@ plot_spline <- function(splineVariable, dat, outcome, res, regtype) {
609
626
  border = c(NA, NA, NA)
610
627
  )
611
628
  dev.off()
629
+ return(plotfile)
630
+ }
631
+
632
+ createRandString <- function() {
633
+ digits = 0:9
634
+ v = c(sample(LETTERS, 4, replace = TRUE),
635
+ sample(digits, 4, replace = TRUE),
636
+ sample(LETTERS, 1, replace = TRUE))
637
+ return(paste0(v, collapse = ""))
612
638
  }
613
639
 
614
640
  # build coefficients table
@@ -624,11 +650,9 @@ build_coef_table <- function(res_summ) {
624
650
  }
625
651
 
626
652
  # reformat the coefficients table
627
- formatCoefficients <- function(coefficients_table, res, regtype, dat, neuroOnc) {
628
- # round columns to 2 decimal places
629
- # round p-value column to 3 significant digits
630
- coefficients_table[,-ncol(coefficients_table)] <- round(coefficients_table[,-ncol(coefficients_table)], 2)
631
- coefficients_table[,ncol(coefficients_table)] <- signif(coefficients_table[,ncol(coefficients_table)], 3)
653
+ formatCoefficients <- function(coefficients_table, res, regtype, dat) {
654
+ # round all columns to 4 significant digits
655
+ coefficients_table <- signif(coefficients_table, 4)
632
656
  # add variable and category columns
633
657
  if (regtype == "cox") {
634
658
  vCol <- vector(mode = "character")
@@ -684,45 +708,42 @@ formatCoefficients <- function(coefficients_table, res, regtype, dat, neuroOnc)
684
708
 
685
709
  coefficients_table <- cbind("Variable" = vCol, "Category" = cCol, coefficients_table)
686
710
 
687
- if (isTRUE(neuroOnc)) {
688
- # neuro-oncology dataset
689
- # extract columns of interest
690
- if (regtype == "linear") {
691
- coefficients_table <- coefficients_table[, c("Variable", "Category", "Beta", "95% CI (low)", "95% CI (high)", "Pr(>|t|)"), drop = F]
692
- } else if (regtype == "logistic") {
693
- coefficients_table <- coefficients_table[, c("Variable", "Category", "Odds ratio", "95% CI (low)", "95% CI (high)", "Pr(>|z|)"), drop = F]
694
- } else if (regtype == "cox") {
695
- # cox regression
696
- # report sample size and event counts of coefficients
697
- sCol <- vector(mode = "character")
698
- eCol <- vector(mode = "character")
699
- for (i in 1:length(vCol)) {
700
- v <- vCol[i]
701
- c <- cCol[i]
702
- if (v %in% names(res$xlevels)) {
703
- # categorical variable
704
- # determine sample size and event count of both ref and non-ref categories
705
- # values will be stored in separate columns in the format "ref/nonref"
706
- ref <- res$xlevels[[v]][1]
707
- m <- table(dat[,"outcome_event"], dat[,v])
708
- samplesize_ref <- sum(m[,ref])
709
- samplesize_c <- sum(m[,c])
710
- sCol <- c(sCol, paste(samplesize_ref, samplesize_c, sep = "/"))
711
- eventcnt_ref <- m["1",ref]
712
- eventcnt_c <- m["1",c]
713
- eCol <- c(eCol, paste(eventcnt_ref, eventcnt_c, sep = "/"))
714
- } else {
715
- # continuous variable
716
- # set sample size and event count to NA
717
- sCol <- c(sCol, NA)
718
- eCol <- c(eCol, NA)
719
- }
711
+ # extract columns of interest
712
+ if (regtype == "linear") {
713
+ coefficients_table <- coefficients_table[, c("Variable", "Category", "Beta", "95% CI (low)", "95% CI (high)", "Pr(>|t|)"), drop = F]
714
+ } else if (regtype == "logistic") {
715
+ coefficients_table <- coefficients_table[, c("Variable", "Category", "Odds ratio", "95% CI (low)", "95% CI (high)", "Pr(>|z|)"), drop = F]
716
+ } else if (regtype == "cox") {
717
+ # cox regression
718
+ # report sample size and event counts of coefficients
719
+ sCol <- vector(mode = "character")
720
+ eCol <- vector(mode = "character")
721
+ for (i in 1:length(vCol)) {
722
+ v <- vCol[i]
723
+ c <- cCol[i]
724
+ if (v %in% names(res$xlevels)) {
725
+ # categorical variable
726
+ # determine sample size and event count of both ref and non-ref categories
727
+ # values will be stored in separate columns in the format "ref/nonref"
728
+ ref <- res$xlevels[[v]][1]
729
+ m <- table(dat[,"outcome_event"], dat[,v])
730
+ samplesize_ref <- sum(m[,ref])
731
+ samplesize_c <- sum(m[,c])
732
+ sCol <- c(sCol, paste(samplesize_ref, samplesize_c, sep = "/"))
733
+ eventcnt_ref <- m["1",ref]
734
+ eventcnt_c <- m["1",c]
735
+ eCol <- c(eCol, paste(eventcnt_ref, eventcnt_c, sep = "/"))
736
+ } else {
737
+ # continuous or spline variable
738
+ # set sample size and event count to NA
739
+ sCol <- c(sCol, NA)
740
+ eCol <- c(eCol, NA)
720
741
  }
721
- coefficients_table <- cbind(coefficients_table, "Sample Size (ref/non-ref)" = sCol, "Events (ref/non-ref)" = eCol)
722
- coefficients_table <- coefficients_table[, c("Variable", "Category", "Sample Size (ref/non-ref)", "Events (ref/non-ref)", "HR", "95% CI (low)", "95% CI (high)", "Pr(>|z|)"), drop = F]
723
- } else {
724
- stop("regression type is not recognized")
725
742
  }
743
+ coefficients_table <- cbind(coefficients_table, "Sample Size (ref/non-ref)" = sCol, "Events (ref/non-ref)" = eCol)
744
+ coefficients_table <- coefficients_table[, c("Variable", "Category", "Sample Size (ref/non-ref)", "Events (ref/non-ref)", "HR", "95% CI (low)", "95% CI (high)", "Pr(>|z|)"), drop = F]
745
+ } else {
746
+ stop("regression type is not recognized")
726
747
  }
727
748
 
728
749
  colnames(coefficients_table)[ncol(coefficients_table)] <- "P" # p-value column
@@ -740,10 +761,11 @@ formatType3 <- function(type3_table) {
740
761
 
741
762
  # parse results from univariate and multivariate analyses
742
763
  parseUniMultiResults <- function(reg_results, regtype) {
743
- # get coefficients from the univariate and multivariate analyses
744
764
  multiCoefficients <- NULL
745
765
  uniCoefficients <- NULL
766
+ splinePlotFiles <- NULL
746
767
  for (res in reg_results) {
768
+ # parse coefficients
747
769
  coefs <- res$data$coefficients$rows
748
770
  # remove intercept row because cannot merge together intercepts
749
771
  # from different univariate analyses
@@ -759,6 +781,8 @@ parseUniMultiResults <- function(reg_results, regtype) {
759
781
  } else {
760
782
  stop ("results type not recognized")
761
783
  }
784
+ # parse spline plots
785
+ splinePlotFiles <- c(splinePlotFiles, res$data$splinePlotFiles)
762
786
  }
763
787
  # prepare separate univariate and multivariate coefficients tables
764
788
  uniCoefficients_table <- list("header" = colnames(uniCoefficients), "rows" = uniCoefficients)
@@ -767,5 +791,6 @@ parseUniMultiResults <- function(reg_results, regtype) {
767
791
  reg_results_parsed <- list()
768
792
  reg_results_parsed[[1]] <- list("id" = res$id, "data" = list("sampleSize" = res$data$sampleSize, "coefficients_uni" = uniCoefficients_table, "coefficients_multi" = multiCoefficients_table))
769
793
  if (regtype == "cox") reg_results_parsed[[1]]$data$eventCnt = res$data$eventCnt
794
+ if (!is.null(splinePlotFiles)) reg_results_parsed[[1]]$data$splinePlotFiles = splinePlotFiles
770
795
  return(reg_results_parsed)
771
796
  }