@sjcrh/proteinpaint-server 2.122.0 → 2.124.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dataset/termdb.test.js +4 -11
- package/genome/hg38.js +13 -0
- package/genome/hg38.test.js +8 -0
- package/package.json +5 -7
- package/routes/burden.js +3 -4
- package/routes/correlationVolcano.js +2 -4
- package/routes/genesetOverrepresentation.js +1 -7
- package/routes/genomes.js +5 -1
- package/routes/samplewsimages.js +9 -0
- package/routes/termdb.DE.js +2 -4
- package/routes/termdb.cluster.js +2 -4
- package/routes/termdb.config.js +2 -5
- package/routes/termdb.rootterm.js +3 -3
- package/routes/termdb.termchildren.js +3 -3
- package/routes/termdb.violin.js +2 -5
- package/src/app.js +650 -575
- package/src/run_R.js +0 -66
- package/utils/binom.R +0 -17
- package/utils/burden-ci95.R +0 -134
- package/utils/burden-main.R +0 -46
- package/utils/corr.R +0 -38
- package/utils/cuminc.R +0 -279
- package/utils/density.R +0 -36
- package/utils/edge.R +0 -283
- package/utils/fdr.R +0 -9
- package/utils/fisher.2x3.R +0 -12
- package/utils/fisher.R +0 -9
- package/utils/getBurden.R +0 -371
- package/utils/getGeneFromMatrix.R +0 -40
- package/utils/hclust.R +0 -110
- package/utils/km.R +0 -13
- package/utils/lowess.R +0 -9
- package/utils/regression.R +0 -154
- package/utils/regression.utils.R +0 -804
- package/utils/survival.R +0 -92
- package/utils/wilcoxon.R +0 -73
package/utils/regression.utils.R
DELETED
|
@@ -1,804 +0,0 @@
|
|
|
1
|
-
########################
|
|
2
|
-
# REGRESSION UTILITIES #
|
|
3
|
-
########################
|
|
4
|
-
|
|
5
|
-
####################
|
|
6
|
-
# LIST OF FUNCTIONS
|
|
7
|
-
####################
|
|
8
|
-
|
|
9
|
-
# prepareDataTable
|
|
10
|
-
# cubic_spline
|
|
11
|
-
# buildFormulas
|
|
12
|
-
# linearRegression
|
|
13
|
-
# logisticRegression
|
|
14
|
-
# coxRegression
|
|
15
|
-
# runRegression
|
|
16
|
-
# plot_spline
|
|
17
|
-
# build_coef_table
|
|
18
|
-
# formatCoefficients
|
|
19
|
-
# formatType3
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
#######
|
|
23
|
-
# CODE
|
|
24
|
-
#######
|
|
25
|
-
|
|
26
|
-
# prepare data table
|
|
27
|
-
prepareDataTable <- function(tempdat, independent) {
|
|
28
|
-
dat <- tempdat[,colnames(tempdat) != "__sample"] # remove sample column
|
|
29
|
-
for (r in 1:nrow(independent)) {
|
|
30
|
-
variable <- independent[r,]
|
|
31
|
-
id <- variable$id
|
|
32
|
-
if (variable$rtype == "factor") {
|
|
33
|
-
# factor variable
|
|
34
|
-
# assign reference group
|
|
35
|
-
dat[,id] <- factor(dat[,id])
|
|
36
|
-
dat[,id] <- relevel(dat[,id], ref = variable$refGrp)
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
|
-
return(dat)
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
# compute cubic spline
|
|
43
|
-
# args: values = variable values; knots = knot values
|
|
44
|
-
cubic_spline <- function(values, knots) {
|
|
45
|
-
nknots <- length(knots) # there will be (nknots-1) cubic spline functions
|
|
46
|
-
f <- matrix(nrow = length(values), ncol = (nknots-1))
|
|
47
|
-
f[,1] <- values
|
|
48
|
-
for (j in 1:(nknots-2)) {
|
|
49
|
-
for (i in 1:length(values)) {
|
|
50
|
-
f[i,(j+1)] <- max(0,(values[i]-knots[j])^3)-max(0,(values[i]-knots[nknots-1])^3)*(knots[nknots]-knots[j])/(knots[nknots]-knots[nknots-1])+
|
|
51
|
-
max(0,(values[i]-knots[nknots])^3)*(knots[nknots-1]-knots[j])/(knots[nknots]-knots[nknots-1])
|
|
52
|
-
}
|
|
53
|
-
}
|
|
54
|
-
return(f)
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
# build formulas
|
|
58
|
-
buildFormulas <- function(outcome, independent, includeUnivariate) {
|
|
59
|
-
# first, format variables for building formulas
|
|
60
|
-
|
|
61
|
-
# declare new objects
|
|
62
|
-
formula_outcome <- vector(mode = "character") # outcome variables formatted for formula
|
|
63
|
-
formula_independent <- vector(mode = "character") # independent variables formatted for formula
|
|
64
|
-
formula_interaction <- vector(mode = "character") # interacting variables formatted for formula
|
|
65
|
-
snpLocusSnps <- independent[0,] # snplocus snps
|
|
66
|
-
splineVariables <- independent[0,] # spline variables
|
|
67
|
-
|
|
68
|
-
# outcome variable
|
|
69
|
-
if ("timeToEvent" %in% names(outcome)) {
|
|
70
|
-
# cox outcome variable
|
|
71
|
-
# time-to-event data
|
|
72
|
-
outcomeEventId <- outcome$timeToEvent$eventId
|
|
73
|
-
if (outcome$timeToEvent$timeScale == "time") {
|
|
74
|
-
outcomeTimeId <- outcome$timeToEvent$timeId
|
|
75
|
-
formula_outcome <- paste0("Surv(",outcomeTimeId,", ",outcomeEventId,")")
|
|
76
|
-
} else if (outcome$timeToEvent$timeScale == "age") {
|
|
77
|
-
outcomeAgeStartId <- outcome$timeToEvent$agestartId
|
|
78
|
-
outcomeAgeEndId <- outcome$timeToEvent$ageendId
|
|
79
|
-
formula_outcome <- paste0("Surv(",outcomeAgeStartId,", ",outcomeAgeEndId,", ",outcomeEventId,")")
|
|
80
|
-
} else {
|
|
81
|
-
stop ("unknown cox regression time scale")
|
|
82
|
-
}
|
|
83
|
-
} else {
|
|
84
|
-
# other outcome variable
|
|
85
|
-
formula_outcome <- outcome$id
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
# independent variables
|
|
89
|
-
for (r in 1:nrow(independent)) {
|
|
90
|
-
variable <- independent[r,]
|
|
91
|
-
if (variable$type == "spline") {
|
|
92
|
-
# cubic spline variable
|
|
93
|
-
# use call to cubic spline function in regression formula
|
|
94
|
-
splineCmd <- paste0("cubic_spline(", variable$id, ", ", paste0("c(", paste(variable$spline$knots[[1]],collapse = ","), ")"), ")")
|
|
95
|
-
formula_independent <- c(formula_independent, splineCmd)
|
|
96
|
-
splineVariables <- rbind(splineVariables, variable)
|
|
97
|
-
} else if (variable$type == "snplocus") {
|
|
98
|
-
# snplocus snp
|
|
99
|
-
# set aside so that separate formulas can be made for each snplocus snp
|
|
100
|
-
snpLocusSnps <- rbind(snpLocusSnps, variable)
|
|
101
|
-
next
|
|
102
|
-
} else {
|
|
103
|
-
# other independent variable
|
|
104
|
-
formula_independent <- c(formula_independent, variable$id)
|
|
105
|
-
}
|
|
106
|
-
if ("interactions" %in% names(variable) & length(variable$interactions[[1]]) > 0) {
|
|
107
|
-
# interactions
|
|
108
|
-
if (variable$type == "spline") stop("interactions not allowed with spline variable")
|
|
109
|
-
interactionIds <- variable$interactions[[1]]
|
|
110
|
-
for (intId in interactionIds) {
|
|
111
|
-
# get unique set of interactions
|
|
112
|
-
int1 <- paste(variable$id, intId, sep = ":")
|
|
113
|
-
int2 <- paste(intId, variable$id, sep = ":")
|
|
114
|
-
if (!(int1 %in% formula_interaction) & !(int2 %in% formula_interaction)) {
|
|
115
|
-
formula_interaction <- c(formula_interaction, int1)
|
|
116
|
-
}
|
|
117
|
-
}
|
|
118
|
-
}
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
# combine variables into formula(s)
|
|
122
|
-
# if snplocus snps are present, then prepare a
|
|
123
|
-
# separate formula for each snplocus snp
|
|
124
|
-
formulas <- list()
|
|
125
|
-
if (nrow(snpLocusSnps) > 0) {
|
|
126
|
-
# snplocus snps present
|
|
127
|
-
# build separate formula for each snplocus snp
|
|
128
|
-
for (r in 1:nrow(snpLocusSnps)) {
|
|
129
|
-
temp_formula_independent <- formula_independent
|
|
130
|
-
temp_formula_interaction <- formula_interaction
|
|
131
|
-
snp <- snpLocusSnps[r,]
|
|
132
|
-
temp_formula_independent <- c(temp_formula_independent, snp$id)
|
|
133
|
-
if ("interactions" %in% names(snp) & length(snp$interactions[[1]]) > 0) {
|
|
134
|
-
# interactions
|
|
135
|
-
interactionIds <- snp$interactions[[1]]
|
|
136
|
-
for (intId in interactionIds) {
|
|
137
|
-
# get unique set of interactions
|
|
138
|
-
int1 <- paste(snp$id, intId, sep = ":")
|
|
139
|
-
int2 <- paste(intId, snp$id, sep = ":")
|
|
140
|
-
if (!(int1 %in% temp_formula_interaction) & !(int2 %in% temp_formula_interaction)) {
|
|
141
|
-
temp_formula_interaction <- c(temp_formula_interaction, int1)
|
|
142
|
-
}
|
|
143
|
-
}
|
|
144
|
-
}
|
|
145
|
-
formula <- as.formula(paste(formula_outcome, paste(c(temp_formula_independent, temp_formula_interaction), collapse = "+"), sep = "~"))
|
|
146
|
-
entry <- length(formulas) + 1
|
|
147
|
-
formulas[[entry]] <- list("id" = snp$id, "formula" = formula)
|
|
148
|
-
if (nrow(splineVariables) > 0) {
|
|
149
|
-
formulas[[entry]][["splineVariables"]] = splineVariables
|
|
150
|
-
}
|
|
151
|
-
}
|
|
152
|
-
} else {
|
|
153
|
-
# no snplocus snps
|
|
154
|
-
if (isTRUE(includeUnivariate)) {
|
|
155
|
-
# include univariate formulas along with the multivariate formula
|
|
156
|
-
if (length(formula_independent) < 2) stop("must have multiple covariates to build multivariate and univariate formulas")
|
|
157
|
-
if (length(formula_interaction) > 0) stop("interactions not supported in univariate models")
|
|
158
|
-
# multivariate formula
|
|
159
|
-
formula <- as.formula(paste(formula_outcome, paste(formula_independent, collapse = "+"), sep = "~"))
|
|
160
|
-
formulas[[1]] <- list("id" = "", "type" = "multivariate", "formula" = formula)
|
|
161
|
-
if (nrow(splineVariables) > 0) {
|
|
162
|
-
formulas[[1]][["splineVariables"]] = splineVariables
|
|
163
|
-
}
|
|
164
|
-
# univariate formulas
|
|
165
|
-
for (var in formula_independent) {
|
|
166
|
-
formula <- as.formula(paste(formula_outcome, var, sep = "~"))
|
|
167
|
-
entry <- length(formulas) + 1
|
|
168
|
-
formulas[[entry]] <- list("id" = "", "type" = "univariate", "formula" = formula)
|
|
169
|
-
if (startsWith(var, "cubic_spline")) {
|
|
170
|
-
id <- gsub("cubic_spline\\(", "", gsub("\\,.*", "", var))
|
|
171
|
-
splineVariable <- splineVariables[splineVariables$id == id, ,drop = F]
|
|
172
|
-
if (nrow(splineVariable) == 0) stop("expected spline variable")
|
|
173
|
-
formulas[[entry]][["splineVariables"]] = splineVariable
|
|
174
|
-
}
|
|
175
|
-
}
|
|
176
|
-
} else {
|
|
177
|
-
# use single formula for all variables
|
|
178
|
-
formula <- as.formula(paste(formula_outcome, paste(c(formula_independent, formula_interaction), collapse = "+"), sep = "~"))
|
|
179
|
-
formulas[[1]] <- list("id" = "", "formula" = formula)
|
|
180
|
-
if (nrow(splineVariables) > 0) {
|
|
181
|
-
formulas[[1]][["splineVariables"]] = splineVariables
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
}
|
|
185
|
-
return(formulas)
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
# linear regression
|
|
189
|
-
linearRegression <- function(formula, dat) {
|
|
190
|
-
res <- lm(formula$formula, data = dat)
|
|
191
|
-
sampleSize <- res$df.residual + length(res$coefficients[!is.na(res$coefficients)])
|
|
192
|
-
res_summ <- summary(res)
|
|
193
|
-
|
|
194
|
-
# residuals table
|
|
195
|
-
residuals_table <- list("header" = c("Minimum","1st quartile","Median","3rd quartile","Maximum"), "rows" = unname(round(fivenum(res_summ$residuals),3)))
|
|
196
|
-
|
|
197
|
-
# coefficients table
|
|
198
|
-
coefficients_table <- build_coef_table(res_summ)
|
|
199
|
-
colnames(coefficients_table)[1] <- "Beta"
|
|
200
|
-
# compute confidence intervals of beta values
|
|
201
|
-
ci <- suppressMessages(confint(res))
|
|
202
|
-
colnames(ci) <- c("95% CI (low)","95% CI (high)")
|
|
203
|
-
coefficients_table <- cbind(coefficients_table, ci)
|
|
204
|
-
coefficients_table <- coefficients_table[,c(1,5,6,2,3,4)]
|
|
205
|
-
|
|
206
|
-
# type III statistics table
|
|
207
|
-
type3_table <- as.matrix(drop1(res, scope = ~., test = "F"))
|
|
208
|
-
# if there are interactions, then set the results
|
|
209
|
-
# of the main effects to "NA" because these type III
|
|
210
|
-
# stats cannot be accurately estimated
|
|
211
|
-
ints <- grep(":", row.names(type3_table), value = T, fixed = T)
|
|
212
|
-
intsIds <- unique(unlist(strsplit(ints, ":")))
|
|
213
|
-
type3_table[intsIds,] <- NA
|
|
214
|
-
# round values
|
|
215
|
-
type3_table[,c("Sum of Sq","RSS","AIC")] <- round(type3_table[,c("Sum of Sq","RSS","AIC")], 1)
|
|
216
|
-
type3_table[,"F value"] <- round(type3_table[,"F value"], 3)
|
|
217
|
-
type3_table[,"Pr(>F)"] <- signif(type3_table[,"Pr(>F)"], 4)
|
|
218
|
-
|
|
219
|
-
# total SNP effect table
|
|
220
|
-
# if a snplocus snp has an interaction, then compute
|
|
221
|
-
# the total effect of the snp on the model
|
|
222
|
-
# by determining the combined effect of removing the
|
|
223
|
-
# snp and all of its interactions from the model
|
|
224
|
-
totalSnpEffect_table <- NULL
|
|
225
|
-
if (formula$id != "" && length(ints) > 0 && any(grepl(formula$id, ints, fixed = T))) {
|
|
226
|
-
snp_vars <- grep(formula$id, row.names(type3_table), value = T, fixed = T)
|
|
227
|
-
formula_reduce <- update(formula$formula, paste0("~.",paste0("-", snp_vars, collapse = "")))
|
|
228
|
-
res_reduce <- lm(formula_reduce, data = dat)
|
|
229
|
-
totalSnpEffect_table <- as.matrix(anova(res, res_reduce, test = "F"))[2, c("Df","F","Pr(>F)"), drop = F]
|
|
230
|
-
row.names(totalSnpEffect_table) <- "Total"
|
|
231
|
-
totalSnpEffect_table[,"Df"] <- totalSnpEffect_table[,"Df"] * -1
|
|
232
|
-
totalSnpEffect_table[,"F"] <- round(totalSnpEffect_table[,"F"], 3)
|
|
233
|
-
totalSnpEffect_table[,"Pr(>F)"] <- signif(totalSnpEffect_table[,"Pr(>F)"], 4)
|
|
234
|
-
totalSnpEffect_table <- cbind("Variable" = row.names(totalSnpEffect_table), totalSnpEffect_table, "VariableIDs" = paste(snp_vars, collapse = ";"))
|
|
235
|
-
totalSnpEffect_table <- list("header" = colnames(totalSnpEffect_table), "rows" = totalSnpEffect_table)
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
# other summary stats table
|
|
239
|
-
other_table <- list(
|
|
240
|
-
"header" = c("Residual standard error", "Residual degrees of freedom", "R-squared", "Adjusted R-squared", "F-statistic", "P-value"),
|
|
241
|
-
"rows" = c(round(res_summ$sigma, 2), round(res$df.residual, 0), round(res_summ$r.squared, 5), round(res_summ$adj.r.squared, 5))
|
|
242
|
-
)
|
|
243
|
-
# check for F-statistic
|
|
244
|
-
# this statistic is not computed if variables have no variability
|
|
245
|
-
if ("fstatistic" %in% names(res_summ)) {
|
|
246
|
-
other_table[["rows"]] <- c(other_table[["rows"]], round(unname(res_summ$fstatistic[1]), 2), signif(unname(pf(res_summ$fstatistic[1], res_summ$fstatistic[2], res_summ$fstatistic[3], lower.tail = F)), 4))
|
|
247
|
-
} else {
|
|
248
|
-
other_table[["rows"]] <- c(other_table[["rows"]], NA, NA)
|
|
249
|
-
}
|
|
250
|
-
|
|
251
|
-
# export the results tables
|
|
252
|
-
out <- list("res" = res, "sampleSize" = unbox(sampleSize), "residuals" = residuals_table, "coefficients" = coefficients_table, "type3" = type3_table, "other" = other_table)
|
|
253
|
-
if (!is.null(totalSnpEffect_table)) out[["totalSnpEffect"]] <- totalSnpEffect_table
|
|
254
|
-
return(out)
|
|
255
|
-
}
|
|
256
|
-
|
|
257
|
-
# logistic regression
|
|
258
|
-
logisticRegression <- function(formula, dat) {
|
|
259
|
-
res <- glm(formula$formula, family = binomial(link='logit'), data = dat)
|
|
260
|
-
sampleSize <- res$df.residual + length(res$coefficients[!is.na(res$coefficients)])
|
|
261
|
-
res_summ <- summary(res)
|
|
262
|
-
|
|
263
|
-
# residuals table
|
|
264
|
-
residuals_table <- list("header" = c("Minimum","1st quartile","Median","3rd quartile","Maximum"), "rows" = unname(round(fivenum(res_summ$deviance.resid),3)))
|
|
265
|
-
|
|
266
|
-
# coefficients table
|
|
267
|
-
coefficients_table <- build_coef_table(res_summ)
|
|
268
|
-
colnames(coefficients_table)[1] <- "Log odds"
|
|
269
|
-
# compute odds ratio
|
|
270
|
-
coefficients_table <- cbind("Odds ratio" = exp(coef(res)), coefficients_table)
|
|
271
|
-
# compute confidence intervals of odds ratios
|
|
272
|
-
ci <- tryCatch(
|
|
273
|
-
exp(suppressMessages(confint(res))),
|
|
274
|
-
error = function(err) {
|
|
275
|
-
# computation of confidence intervals might fail
|
|
276
|
-
# because of small sample sizes of coefficients.
|
|
277
|
-
# use custom confidence interval computation instead
|
|
278
|
-
low <- coefficients_table[,"Log odds"] - (1.96 * coefficients_table[,"Std. Error"])
|
|
279
|
-
up <- coefficients_table[,"Log odds"] + (1.96 * coefficients_table[,"Std. Error"])
|
|
280
|
-
ci <- cbind(low,up)
|
|
281
|
-
row.names(ci) <- row.names(coefficients_table)
|
|
282
|
-
ci <- exp(ci)
|
|
283
|
-
return(ci)
|
|
284
|
-
}
|
|
285
|
-
)
|
|
286
|
-
colnames(ci) <- c("95% CI (low)","95% CI (high)")
|
|
287
|
-
coefficients_table <- cbind(coefficients_table, ci)
|
|
288
|
-
coefficients_table <- coefficients_table[,c(1,6,7,2,3,4,5)]
|
|
289
|
-
|
|
290
|
-
# type III statistics table
|
|
291
|
-
type3_table <- as.matrix(drop1(res, scope = ~., test = "LRT"))
|
|
292
|
-
# if there are interactions, then set the results
|
|
293
|
-
# of the main effects to "NA" because these type III
|
|
294
|
-
# stats cannot be accurately estimated
|
|
295
|
-
ints <- grep(":", row.names(type3_table), value = T, fixed = T)
|
|
296
|
-
intsIds <- unique(unlist(strsplit(ints, ":")))
|
|
297
|
-
type3_table[intsIds,] <- NA
|
|
298
|
-
# round values
|
|
299
|
-
type3_table[,c("Deviance","AIC")] <- round(type3_table[,c("Deviance","AIC")], 1)
|
|
300
|
-
type3_table[,"LRT"] <- round(type3_table[,"LRT"], 3)
|
|
301
|
-
type3_table[,"Pr(>Chi)"] <- signif(type3_table[,"Pr(>Chi)"], 4)
|
|
302
|
-
|
|
303
|
-
# total SNP effect table
|
|
304
|
-
# if a snplocus snp has an interaction, then compute
|
|
305
|
-
# the total effect of the snp on the model
|
|
306
|
-
# by determining the combined effect of removing the
|
|
307
|
-
# snp and all of its interactions from the model
|
|
308
|
-
totalSnpEffect_table <- NULL
|
|
309
|
-
if (formula$id != "" && length(ints) > 0 && any(grepl(formula$id, ints, fixed = T))) {
|
|
310
|
-
snp_vars <- grep(formula$id, row.names(type3_table), value = T, fixed = T)
|
|
311
|
-
formula_reduce <- update(formula$formula, paste0("~.",paste0("-", snp_vars, collapse = "")))
|
|
312
|
-
res_reduce <- glm(formula_reduce, family = binomial(link='logit'), data = dat)
|
|
313
|
-
totalSnpEffect_table <- as.matrix(lrtest(res, res_reduce))[2, 3:5, drop = F]
|
|
314
|
-
row.names(totalSnpEffect_table) <- "Total"
|
|
315
|
-
totalSnpEffect_table[,"Df"] <- totalSnpEffect_table[,"Df"] * -1
|
|
316
|
-
totalSnpEffect_table[,"Chisq"] <- round(totalSnpEffect_table[,"Chisq"], 3)
|
|
317
|
-
totalSnpEffect_table[,"Pr(>Chisq)"] <- signif(totalSnpEffect_table[,"Pr(>Chisq)"], 4)
|
|
318
|
-
totalSnpEffect_table <- cbind("Variable" = row.names(totalSnpEffect_table), totalSnpEffect_table, "VariableIDs" = paste(snp_vars, collapse = ";"))
|
|
319
|
-
totalSnpEffect_table <- list("header" = colnames(totalSnpEffect_table), "rows" = totalSnpEffect_table)
|
|
320
|
-
}
|
|
321
|
-
|
|
322
|
-
# other summary stats table
|
|
323
|
-
other_table <- list(
|
|
324
|
-
"header" = c("Dispersion parameter", "Null deviance", "Null deviance degrees of freedom", "Residual deviance", "Residual deviance degrees of freedom", "AIC"),
|
|
325
|
-
"rows" = round(c(res_summ$dispersion, res_summ$null.deviance, res_summ$df.null, res_summ$deviance, res_summ$df.residual, res_summ$aic), 1)
|
|
326
|
-
)
|
|
327
|
-
|
|
328
|
-
# export the results tables
|
|
329
|
-
out <- list("res" = res, "sampleSize" = unbox(sampleSize), "residuals" = residuals_table, "coefficients" = coefficients_table, "type3" = type3_table, "other" = other_table)
|
|
330
|
-
if (!is.null(totalSnpEffect_table)) out[["totalSnpEffect"]] <- totalSnpEffect_table
|
|
331
|
-
return(out)
|
|
332
|
-
}
|
|
333
|
-
|
|
334
|
-
# cox regression
|
|
335
|
-
coxRegression <- function(formula, dat) {
|
|
336
|
-
res <- coxph(formula$formula, data = dat, model = T)
|
|
337
|
-
|
|
338
|
-
# extract summary object
|
|
339
|
-
res_summ <- summary(res)
|
|
340
|
-
|
|
341
|
-
# numbers of samples and events
|
|
342
|
-
sampleSize <- res_summ$n
|
|
343
|
-
eventCnt <- res_summ$nevent
|
|
344
|
-
|
|
345
|
-
# coefficients table
|
|
346
|
-
coefficients_table <- res_summ$coefficients
|
|
347
|
-
# add in confidence intervals
|
|
348
|
-
confint_table <- res_summ$conf.int
|
|
349
|
-
coefficients_table <- cbind(coefficients_table, confint_table[, c("lower .95", "upper .95"), drop = F])
|
|
350
|
-
# rename and reorder columns
|
|
351
|
-
colnames(coefficients_table)[colnames(coefficients_table) == "coef"] <- "Beta"
|
|
352
|
-
colnames(coefficients_table)[colnames(coefficients_table) == "exp(coef)"] <- "HR"
|
|
353
|
-
colnames(coefficients_table)[colnames(coefficients_table) == "se(coef)"] <- "Std. Error"
|
|
354
|
-
colnames(coefficients_table)[colnames(coefficients_table) == "lower .95"] <- "95% CI (low)"
|
|
355
|
-
colnames(coefficients_table)[colnames(coefficients_table) == "upper .95"] <- "95% CI (high)"
|
|
356
|
-
coefficients_table <- coefficients_table[, c(2,6,7,1,3,4,5), drop = F]
|
|
357
|
-
|
|
358
|
-
# type III statistics table
|
|
359
|
-
# do not use drop1() to compute type III stats for cox regression
|
|
360
|
-
# because it will use the drop1.default() method and this will
|
|
361
|
-
# error out if samples are filtered from the data table
|
|
362
|
-
# TODO: may replace with car::Anova() when car package becomes available on servers
|
|
363
|
-
vlst <- attr(res$terms, "term.labels", exact = T)
|
|
364
|
-
type3_table <- matrix(data = NA, nrow = length(vlst), ncol = 5)
|
|
365
|
-
row.names(type3_table) <- vlst
|
|
366
|
-
for (i in 1:length(vlst)) {
|
|
367
|
-
v <- vlst[i]
|
|
368
|
-
formula_reduce <- update(formula$formula, paste0("~.-", v))
|
|
369
|
-
res_reduce <- coxph(formula_reduce, data = dat, model = T)
|
|
370
|
-
lt <- as.matrix(lrtest(res, res_reduce))
|
|
371
|
-
type3_table[i,] <- lt[2,]
|
|
372
|
-
}
|
|
373
|
-
colnames(type3_table) <- colnames(lt)
|
|
374
|
-
type3_table <- type3_table[,3:5,drop = F]
|
|
375
|
-
type3_table[,"Df"] <- type3_table[,"Df"] * -1 #convert Df diff to Df
|
|
376
|
-
# if there are interactions, then set the results
|
|
377
|
-
# of the main effects to "NA" because these type III
|
|
378
|
-
# stats cannot be accurately estimated
|
|
379
|
-
ints <- grep(":", row.names(type3_table), value = T, fixed = T)
|
|
380
|
-
intsIds <- unique(unlist(strsplit(ints, ":")))
|
|
381
|
-
type3_table[intsIds,] <- NA
|
|
382
|
-
# round values
|
|
383
|
-
type3_table[,"Chisq"] <- round(type3_table[,"Chisq"], 3)
|
|
384
|
-
type3_table[,"Pr(>Chisq)"] <- signif(type3_table[,"Pr(>Chisq)"], 4)
|
|
385
|
-
|
|
386
|
-
# total SNP effect table
|
|
387
|
-
# if a snplocus snp has an interaction, then compute
|
|
388
|
-
# the total effect of the snp on the model
|
|
389
|
-
# by determining the combined effect of removing the
|
|
390
|
-
# snp and all of its interactions from the model
|
|
391
|
-
totalSnpEffect_table <- NULL
|
|
392
|
-
if (formula$id != "" && length(ints) > 0 && any(grepl(formula$id, ints, fixed = T))) {
|
|
393
|
-
snp_vars <- grep(formula$id, row.names(type3_table), value = T, fixed = T)
|
|
394
|
-
formula_reduce <- update(formula$formula, paste0("~.",paste0("-", snp_vars, collapse = "")))
|
|
395
|
-
res_reduce <- coxph(formula_reduce, data = dat, model = T)
|
|
396
|
-
totalSnpEffect_table <- as.matrix(lrtest(res, res_reduce))[2, 3:5, drop = F]
|
|
397
|
-
row.names(totalSnpEffect_table) <- "Total"
|
|
398
|
-
totalSnpEffect_table[,"Df"] <- totalSnpEffect_table[,"Df"] * -1
|
|
399
|
-
totalSnpEffect_table[,"Chisq"] <- round(totalSnpEffect_table[,"Chisq"], 3)
|
|
400
|
-
totalSnpEffect_table[,"Pr(>Chisq)"] <- signif(totalSnpEffect_table[,"Pr(>Chisq)"], 4)
|
|
401
|
-
totalSnpEffect_table <- cbind("Variable" = row.names(totalSnpEffect_table), totalSnpEffect_table, "VariableIDs" = paste(snp_vars, collapse = ";"))
|
|
402
|
-
totalSnpEffect_table <- list("header" = colnames(totalSnpEffect_table), "rows" = totalSnpEffect_table)
|
|
403
|
-
}
|
|
404
|
-
|
|
405
|
-
# statistical tests table
|
|
406
|
-
tests_table <- rbind(res_summ$logtest, res_summ$waldtest, res_summ$sctest)
|
|
407
|
-
colnames(tests_table) <- c("Test statistic", "Df", "P-value")
|
|
408
|
-
tests_table[,"Test statistic"] <- round(tests_table[,"Test statistic"], 2)
|
|
409
|
-
tests_table[,"Df"] <- round(tests_table[,"Df"], 0)
|
|
410
|
-
tests_table[,"P-value"] <- signif(tests_table[,"P-value"], 4)
|
|
411
|
-
tests_table <- cbind("Test" = c("Likelihood ratio test", "Wald test", "Score (log rank) test"), tests_table)
|
|
412
|
-
tests_table <- list("header" = colnames(tests_table), "rows" = tests_table)
|
|
413
|
-
|
|
414
|
-
# other summary stats table
|
|
415
|
-
other_table <- list(
|
|
416
|
-
"header" = c("Concordance", "Concordance standard error"),
|
|
417
|
-
"rows" = c(unname(round(res_summ$concordance["C"], 3)), unname(round(res_summ$concordance["se(C)"], 3)))
|
|
418
|
-
)
|
|
419
|
-
|
|
420
|
-
# export the results tables
|
|
421
|
-
out <- list("res" = res, "sampleSize" = unbox(sampleSize), "eventCnt" = unbox(eventCnt),"coefficients" = coefficients_table, "type3" = type3_table, "tests" = tests_table, "other" = other_table)
|
|
422
|
-
if (!is.null(totalSnpEffect_table)) out[["totalSnpEffect"]] <- totalSnpEffect_table
|
|
423
|
-
return(out)
|
|
424
|
-
}
|
|
425
|
-
|
|
426
|
-
# run regression analysis
|
|
427
|
-
runRegression <- function(formula, regtype, dat, outcome, cachedir) {
|
|
428
|
-
# remove samples with NA values in any variable in the formula
|
|
429
|
-
# NOTE: even though regression functions (e.g. lm, glm, etc.)
|
|
430
|
-
# perform this filtration by default, this filtration
|
|
431
|
-
# should be done before any regression analysis because
|
|
432
|
-
# multiple regression models might need to be compared to one
|
|
433
|
-
# another (e.g. computation of total snp effect) and there might
|
|
434
|
-
# be samples that are NA for variables in one model but
|
|
435
|
-
# not in another model.
|
|
436
|
-
fdat <- dat[complete.cases(dat[,all.vars(formula$formula)]),]
|
|
437
|
-
warns <- vector(mode = "character")
|
|
438
|
-
handleWarns <- function(w) {
|
|
439
|
-
# handler for warning messages
|
|
440
|
-
warns <<- c(warns, conditionMessage(w))
|
|
441
|
-
invokeRestart("muffleWarning")
|
|
442
|
-
}
|
|
443
|
-
if (regtype == "linear") {
|
|
444
|
-
results <- withCallingHandlers(linearRegression(formula, fdat), warning = handleWarns)
|
|
445
|
-
} else if (regtype == "logistic") {
|
|
446
|
-
results <- withCallingHandlers(logisticRegression(formula, fdat), warning = handleWarns)
|
|
447
|
-
} else if (regtype == "cox") {
|
|
448
|
-
results <- withCallingHandlers(coxRegression(formula, fdat), warning = handleWarns)
|
|
449
|
-
} else {
|
|
450
|
-
stop("unknown regression type")
|
|
451
|
-
}
|
|
452
|
-
if ("splineVariables" %in% names(formula)) {
|
|
453
|
-
# spline variables present
|
|
454
|
-
# plot cubic spline for each spline variable (if applicable)
|
|
455
|
-
results$splinePlotFiles <- vector(mode = "character")
|
|
456
|
-
splineVariables <- formula$splineVariables
|
|
457
|
-
for (r in 1:nrow(splineVariables)) {
|
|
458
|
-
splineVariable <- splineVariables[r,]
|
|
459
|
-
if (isTRUE(splineVariable$spline$plot)) {
|
|
460
|
-
spline_plot_file <- plot_spline(splineVariable, fdat, outcome, results$res, regtype, formula$type, cachedir)
|
|
461
|
-
results$splinePlotFiles <- c(results$splinePlotFiles, spline_plot_file)
|
|
462
|
-
}
|
|
463
|
-
}
|
|
464
|
-
}
|
|
465
|
-
if (length(warns) > 0) results[["warnings"]] <- warns
|
|
466
|
-
results$coefficients <- formatCoefficients(results$coefficients, results$res, input$regressionType, fdat)
|
|
467
|
-
results$type3 <- formatType3(results$type3)
|
|
468
|
-
out <- list("id" = unbox(formula$id), "data" = results[names(results) != "res"])
|
|
469
|
-
if (!is.null(formula$type)) out$type <- unbox(formula$type)
|
|
470
|
-
return(out)
|
|
471
|
-
}
|
|
472
|
-
|
|
473
|
-
# generate cubic spline plot
|
|
474
|
-
plot_spline <- function(splineVariable, dat, outcome, res, regtype, formulatype, cachedir) {
|
|
475
|
-
# prepare test data table for predicting model outcome values
|
|
476
|
-
# columns are all independent variables
|
|
477
|
-
# for the spline variable, use regularly spaced data; for continuous variables, use data median; for categorical variables, use reference category
|
|
478
|
-
if (regtype == "cox") {
|
|
479
|
-
outcomeIds <- c(outcome$timeToEvent$timeId, outcome$timeToEvent$eventId)
|
|
480
|
-
} else {
|
|
481
|
-
outcomeIds <- outcome$id
|
|
482
|
-
}
|
|
483
|
-
independentIds <- colnames(dat)[!colnames(dat) %in% outcomeIds]
|
|
484
|
-
sampleSize <- ifelse(nrow(dat) > 1000, 1000, nrow(dat))
|
|
485
|
-
newdat <- dat[1:sampleSize, independentIds, drop = F]
|
|
486
|
-
|
|
487
|
-
# prepare test data table for adjusting model outcome values
|
|
488
|
-
# columns are the proportions and effect sizes of non-reference categorical coefficients
|
|
489
|
-
newdat2 <- matrix(data = NA, nrow = 0, ncol = 2, dimnames = list(c(),c("prop", "effectSize")))
|
|
490
|
-
|
|
491
|
-
# populate the test data tables
|
|
492
|
-
for (term in colnames(newdat)) {
|
|
493
|
-
if (term == splineVariable$id) {
|
|
494
|
-
newdat[,term] <- seq(from = min(dat[,term]), to = max(dat[,term]), length.out = sampleSize)
|
|
495
|
-
} else if (is.factor(dat[,term])) {
|
|
496
|
-
ref <- levels(dat[,term])[1]
|
|
497
|
-
newdat[,term] <- ref
|
|
498
|
-
props <- table(dat[,term], exclude = ref)/length(dat[,term])
|
|
499
|
-
for (category in names(props)) {
|
|
500
|
-
newdat2 <- rbind(newdat2, c(props[category], res$coefficients[paste0(term,category)]))
|
|
501
|
-
}
|
|
502
|
-
} else {
|
|
503
|
-
newdat[,term] <- median(dat[,term])
|
|
504
|
-
}
|
|
505
|
-
}
|
|
506
|
-
|
|
507
|
-
# test model
|
|
508
|
-
# predict outcome values of the model using the test data
|
|
509
|
-
preddat <- predict(res, newdata = newdat, se.fit = T)
|
|
510
|
-
# compute 95% confidence intervals
|
|
511
|
-
ci_upr <- preddat$fit + (1.96 * preddat$se.fit)
|
|
512
|
-
ci_lwr <- preddat$fit - (1.96 * preddat$se.fit)
|
|
513
|
-
preddat_ci <- cbind("fit" = preddat$fit, "lwr" = ci_lwr, "upr" = ci_upr)
|
|
514
|
-
# adjust the predicted values
|
|
515
|
-
# adjusted predicted values = predicted values + ((prop category A * coef category A) + (prop category B * coef category B) + etc.)
|
|
516
|
-
preddat_ci_adj <- preddat_ci + sum(apply(newdat2, 1, prod), na.rm = T)
|
|
517
|
-
|
|
518
|
-
# plot data
|
|
519
|
-
plotfile <- paste0(cachedir, "splinePlot_", ifelse(is.null(formulatype), "", paste0(formulatype, "_")), createRandString(), ".svg")
|
|
520
|
-
svg(filename = plotfile, width = 6.7, height = ifelse(is.null(formulatype),5.25,5.35), pointsize = 20)
|
|
521
|
-
par(mar = c(2, 2, ifelse(is.null(formulatype),0.7,1), 5) + 0.1, mgp = c(1, 1, 0))
|
|
522
|
-
if (regtype == "linear" | regtype == "logistic") {
|
|
523
|
-
if (regtype == "linear") {
|
|
524
|
-
# for linear, plot predicted values
|
|
525
|
-
pointtype <- 16
|
|
526
|
-
pointsize <- 0.3
|
|
527
|
-
pointalpha <- 0.8
|
|
528
|
-
ylab <- outcome$name
|
|
529
|
-
} else {
|
|
530
|
-
# for logistic, plot predicted probabilities
|
|
531
|
-
preddat_ci_adj <- 1/(1+exp(-preddat_ci_adj))
|
|
532
|
-
pointtype <- 124
|
|
533
|
-
pointsize <- 0.7
|
|
534
|
-
pointalpha <- 0.5
|
|
535
|
-
ylab <- paste0("Pr(", outcome$name, " ", outcome$categories$nonref, ")")
|
|
536
|
-
}
|
|
537
|
-
# use only finite predicted data
|
|
538
|
-
toKeep <- rowSums(!is.finite(preddat_ci_adj)) == 0
|
|
539
|
-
preddat_ci_adj <- preddat_ci_adj[toKeep,]
|
|
540
|
-
newdat <- newdat[toKeep,,drop = F]
|
|
541
|
-
# first plot actual (not predicted) data
|
|
542
|
-
# predicted data will be overlayed later
|
|
543
|
-
plot(dat[,splineVariable$id],
|
|
544
|
-
dat[,outcome$id],
|
|
545
|
-
ann = F,
|
|
546
|
-
xaxt = "n",
|
|
547
|
-
yaxt = "n",
|
|
548
|
-
type = "n"
|
|
549
|
-
)
|
|
550
|
-
points(dat[,splineVariable$id],
|
|
551
|
-
dat[,outcome$id],
|
|
552
|
-
pch = pointtype,
|
|
553
|
-
cex = pointsize,
|
|
554
|
-
col = adjustcolor("#ce768e", alpha.f = pointalpha)
|
|
555
|
-
)
|
|
556
|
-
} else if (regtype == "cox") {
|
|
557
|
-
# for cox, plot hazard ratios
|
|
558
|
-
preddat_ci_adj <- exp(preddat_ci_adj)
|
|
559
|
-
# use only finite predicted data
|
|
560
|
-
toKeep <- rowSums(!is.finite(preddat_ci_adj)) == 0
|
|
561
|
-
preddat_ci_adj <- preddat_ci_adj[toKeep,]
|
|
562
|
-
newdat <- newdat[toKeep,,drop = F]
|
|
563
|
-
ylab <- "Hazard Ratio"
|
|
564
|
-
# plot only predicted data
|
|
565
|
-
# do not also plot actual data (like for linear/logistic) because cox outcome data is time-to-event and will not be comparable to the predicted hazard ratios
|
|
566
|
-
plot(newdat[,splineVariable$id],
|
|
567
|
-
preddat_ci_adj[,"fit"],
|
|
568
|
-
ylim = c(min(preddat_ci_adj[,"lwr"]), max(preddat_ci_adj[,"upr"])),
|
|
569
|
-
ann = F,
|
|
570
|
-
xaxt = "n",
|
|
571
|
-
yaxt = "n",
|
|
572
|
-
type = "n"
|
|
573
|
-
)
|
|
574
|
-
} else {
|
|
575
|
-
stop("unrecognized regression type")
|
|
576
|
-
}
|
|
577
|
-
|
|
578
|
-
# axes
|
|
579
|
-
axis(1, cex.axis = 0.5, mgp = c(0, 0.2, 0))
|
|
580
|
-
axis(2, cex.axis = 0.5, mgp = c(0, 0.5, 0))
|
|
581
|
-
|
|
582
|
-
# titles
|
|
583
|
-
title(xlab = splineVariable$name, line = 1, cex.lab = 0.5)
|
|
584
|
-
title(ylab = ylab, line = 1.5, cex.lab = 0.5)
|
|
585
|
-
if (!is.null(formulatype)) {
|
|
586
|
-
if (formulatype == "univariate") plotTitle <- "Univariate"
|
|
587
|
-
else if (formulatype == "multivariate") plotTitle <- "Multivariable-adjusted"
|
|
588
|
-
else stop("unexpected formula type")
|
|
589
|
-
title(main = plotTitle, cex.main = 0.5, line = 0.45)
|
|
590
|
-
}
|
|
591
|
-
|
|
592
|
-
# knots
|
|
593
|
-
abline(v = splineVariable$spline$knots[[1]],
|
|
594
|
-
col = "grey60",
|
|
595
|
-
lty = 2,
|
|
596
|
-
lwd = 1,
|
|
597
|
-
xpd = F
|
|
598
|
-
)
|
|
599
|
-
|
|
600
|
-
# confidence intervals of regression line
|
|
601
|
-
polygon(x = c(newdat[,splineVariable$id], rev(newdat[,splineVariable$id])),
|
|
602
|
-
y = c(preddat_ci_adj[,"lwr"], rev(preddat_ci_adj[,"upr"])),
|
|
603
|
-
col = adjustcolor("grey", alpha.f = 0.8),
|
|
604
|
-
border = NA
|
|
605
|
-
)
|
|
606
|
-
|
|
607
|
-
# regression line
|
|
608
|
-
lines(newdat[,splineVariable$id],
|
|
609
|
-
preddat_ci_adj[,"fit"],
|
|
610
|
-
col = "blue",
|
|
611
|
-
lwd = 1.7
|
|
612
|
-
)
|
|
613
|
-
|
|
614
|
-
# legend for lines
|
|
615
|
-
legend("topright",
|
|
616
|
-
inset = c(-0.39, 0),
|
|
617
|
-
cex = 0.5,
|
|
618
|
-
legend = c("knots", "cubic spline fit", "95% CI"),
|
|
619
|
-
text.col = "white",
|
|
620
|
-
lty = c(2, 1, NA),
|
|
621
|
-
col = c("grey60", "blue", NA),
|
|
622
|
-
xpd = TRUE
|
|
623
|
-
)
|
|
624
|
-
|
|
625
|
-
# legend for ci
|
|
626
|
-
legend("topright",
|
|
627
|
-
inset = c(-0.39, 0),
|
|
628
|
-
cex = 0.5,
|
|
629
|
-
legend = c("knots", "cubic spline fit", "95% CI"),
|
|
630
|
-
text.col = "black",
|
|
631
|
-
bty = "n",
|
|
632
|
-
fill = c(NA, NA, adjustcolor("grey", alpha.f = 0.8)),
|
|
633
|
-
border = c(NA, NA, NA),
|
|
634
|
-
xpd = TRUE
|
|
635
|
-
)
|
|
636
|
-
dev.off()
|
|
637
|
-
return(plotfile)
|
|
638
|
-
}
|
|
639
|
-
|
|
640
|
-
createRandString <- function() {
|
|
641
|
-
digits = 0:9
|
|
642
|
-
v = c(sample(LETTERS, 4, replace = TRUE),
|
|
643
|
-
sample(digits, 4, replace = TRUE),
|
|
644
|
-
sample(LETTERS, 1, replace = TRUE))
|
|
645
|
-
return(paste0(v, collapse = ""))
|
|
646
|
-
}
|
|
647
|
-
|
|
648
|
-
# build coefficients table
|
|
649
|
-
build_coef_table <- function(res_summ) {
|
|
650
|
-
coefficients_table <- res_summ$coefficients
|
|
651
|
-
if (any(aliased <- res_summ$aliased)) {
|
|
652
|
-
# to keep coefficients with "NA" estimates in the table
|
|
653
|
-
cn <- names(aliased)
|
|
654
|
-
coefficients_table <- matrix(NA, length(aliased), 4, dimnames = list(cn, colnames(coefficients_table)))
|
|
655
|
-
coefficients_table[!aliased, ] <- res_summ$coefficients
|
|
656
|
-
}
|
|
657
|
-
return(coefficients_table)
|
|
658
|
-
}
|
|
659
|
-
|
|
660
|
-
# reformat the coefficients table
|
|
661
|
-
formatCoefficients <- function(coefficients_table, res, regtype, dat) {
|
|
662
|
-
# round all columns to 4 significant digits
|
|
663
|
-
coefficients_table <- signif(coefficients_table, 4)
|
|
664
|
-
# add variable and category columns
|
|
665
|
-
if (regtype == "cox") {
|
|
666
|
-
vCol <- vector(mode = "character")
|
|
667
|
-
cCol <- vector(mode = "character")
|
|
668
|
-
} else {
|
|
669
|
-
vCol <- c("Intercept")
|
|
670
|
-
cCol <- c("")
|
|
671
|
-
}
|
|
672
|
-
vlst <- attr(res$terms, "term.labels", exact = T)
|
|
673
|
-
for (v in vlst) {
|
|
674
|
-
if (grepl(":", v, fixed = T)) {
|
|
675
|
-
# interacting variables
|
|
676
|
-
v1 <- strsplit(v, split = ":", fixed = T)[[1]][1]
|
|
677
|
-
v2 <- strsplit(v, split = ":", fixed = T)[[1]][2]
|
|
678
|
-
clst1 <- ""
|
|
679
|
-
clst2 <- ""
|
|
680
|
-
if (v1 %in% names(res$xlevels)) {
|
|
681
|
-
clst1 <- res$xlevels[[v1]][-1] # extract categories (without reference category)
|
|
682
|
-
}
|
|
683
|
-
if (v2 %in% names(res$xlevels)) {
|
|
684
|
-
clst2 <- res$xlevels[[v2]][-1] # extract categories (without reference category)
|
|
685
|
-
}
|
|
686
|
-
for (c2 in clst2) {
|
|
687
|
-
for (c1 in clst1) {
|
|
688
|
-
cCol <- c(cCol, paste(c1, c2, sep = ":"))
|
|
689
|
-
vCol <- c(vCol, v)
|
|
690
|
-
}
|
|
691
|
-
}
|
|
692
|
-
} else {
|
|
693
|
-
# single variable
|
|
694
|
-
if (grepl("^cubic_spline", v)) {
|
|
695
|
-
# spline variable
|
|
696
|
-
vid <- sub("cubic_spline\\(", "", sub(", c\\(.*", "", v))
|
|
697
|
-
clst <- paste("spline function", 1:ncol(res$model[,v])) # determine the number of spline functions
|
|
698
|
-
for (c in clst) {
|
|
699
|
-
vCol <- c(vCol, vid)
|
|
700
|
-
cCol <- c(cCol, c)
|
|
701
|
-
}
|
|
702
|
-
} else if (v %in% names(res$xlevels)) {
|
|
703
|
-
# categorical variable
|
|
704
|
-
clst <- res$xlevels[[v]][-1] # extract non-ref categories
|
|
705
|
-
for (c in clst) {
|
|
706
|
-
vCol <- c(vCol, v)
|
|
707
|
-
cCol <- c(cCol, c)
|
|
708
|
-
}
|
|
709
|
-
} else {
|
|
710
|
-
# continuous variable
|
|
711
|
-
vCol <- c(vCol, v)
|
|
712
|
-
cCol <- c(cCol, "")
|
|
713
|
-
}
|
|
714
|
-
}
|
|
715
|
-
}
|
|
716
|
-
|
|
717
|
-
coefficients_table <- cbind("Variable" = vCol, "Category" = cCol, coefficients_table)
|
|
718
|
-
|
|
719
|
-
# extract columns of interest
|
|
720
|
-
if (regtype == "linear") {
|
|
721
|
-
coefficients_table <- coefficients_table[, c("Variable", "Category", "Beta", "95% CI (low)", "95% CI (high)", "Pr(>|t|)"), drop = F]
|
|
722
|
-
} else if (regtype == "logistic") {
|
|
723
|
-
coefficients_table <- coefficients_table[, c("Variable", "Category", "Odds ratio", "95% CI (low)", "95% CI (high)", "Pr(>|z|)"), drop = F]
|
|
724
|
-
} else if (regtype == "cox") {
|
|
725
|
-
# cox regression
|
|
726
|
-
# report sample size and event counts of coefficients
|
|
727
|
-
sCol <- vector(mode = "character")
|
|
728
|
-
eCol <- vector(mode = "character")
|
|
729
|
-
for (i in 1:length(vCol)) {
|
|
730
|
-
v <- vCol[i]
|
|
731
|
-
c <- cCol[i]
|
|
732
|
-
if (v %in% names(res$xlevels)) {
|
|
733
|
-
# categorical variable
|
|
734
|
-
# determine sample size and event count of both ref and non-ref categories
|
|
735
|
-
# values will be stored in separate columns in the format "ref/nonref"
|
|
736
|
-
ref <- res$xlevels[[v]][1]
|
|
737
|
-
m <- table(dat[,"outcome_event"], dat[,v])
|
|
738
|
-
samplesize_ref <- sum(m[,ref])
|
|
739
|
-
samplesize_c <- sum(m[,c])
|
|
740
|
-
sCol <- c(sCol, paste(samplesize_ref, samplesize_c, sep = "/"))
|
|
741
|
-
eventcnt_ref <- m["1",ref]
|
|
742
|
-
eventcnt_c <- m["1",c]
|
|
743
|
-
eCol <- c(eCol, paste(eventcnt_ref, eventcnt_c, sep = "/"))
|
|
744
|
-
} else {
|
|
745
|
-
# continuous or spline variable
|
|
746
|
-
# set sample size and event count to NA
|
|
747
|
-
sCol <- c(sCol, NA)
|
|
748
|
-
eCol <- c(eCol, NA)
|
|
749
|
-
}
|
|
750
|
-
}
|
|
751
|
-
coefficients_table <- cbind(coefficients_table, "Sample Size (ref/non-ref)" = sCol, "Events (ref/non-ref)" = eCol)
|
|
752
|
-
coefficients_table <- coefficients_table[, c("Variable", "Category", "Sample Size (ref/non-ref)", "Events (ref/non-ref)", "HR", "95% CI (low)", "95% CI (high)", "Pr(>|z|)"), drop = F]
|
|
753
|
-
} else {
|
|
754
|
-
stop("regression type is not recognized")
|
|
755
|
-
}
|
|
756
|
-
|
|
757
|
-
colnames(coefficients_table)[ncol(coefficients_table)] <- "P" # p-value column
|
|
758
|
-
coefficients_table <- list("header" = colnames(coefficients_table), "rows" = coefficients_table)
|
|
759
|
-
return(coefficients_table)
|
|
760
|
-
}
|
|
761
|
-
|
|
762
|
-
# reformat the type III statistics table
|
|
763
|
-
formatType3 <- function(type3_table) {
|
|
764
|
-
# add a variable column
|
|
765
|
-
type3_table <- cbind("Variable" = sub("cubic_spline\\(", "", sub(", c\\(.*", "", row.names(type3_table))), type3_table)
|
|
766
|
-
type3_table <- list("header" = colnames(type3_table), "rows" = type3_table)
|
|
767
|
-
return(type3_table)
|
|
768
|
-
}
|
|
769
|
-
|
|
770
|
-
# parse results from univariate and multivariate analyses
|
|
771
|
-
parseUniMultiResults <- function(reg_results, regtype) {
|
|
772
|
-
multiCoefficients <- NULL
|
|
773
|
-
uniCoefficients <- NULL
|
|
774
|
-
splinePlotFiles <- NULL
|
|
775
|
-
for (res in reg_results) {
|
|
776
|
-
# parse coefficients
|
|
777
|
-
coefs <- res$data$coefficients$rows
|
|
778
|
-
# remove intercept row because cannot merge together intercepts
|
|
779
|
-
# from different univariate analyses
|
|
780
|
-
coefs <- coefs[row.names(coefs) != "(Intercept)", ,drop = F]
|
|
781
|
-
if (res$type == "multivariate") {
|
|
782
|
-
multiCoefficients <- coefs
|
|
783
|
-
} else if (res$type == "univariate") {
|
|
784
|
-
if (is.null(uniCoefficients)) {
|
|
785
|
-
uniCoefficients <- coefs
|
|
786
|
-
} else {
|
|
787
|
-
uniCoefficients <- rbind(uniCoefficients, coefs)
|
|
788
|
-
}
|
|
789
|
-
} else {
|
|
790
|
-
stop ("results type not recognized")
|
|
791
|
-
}
|
|
792
|
-
# parse spline plots
|
|
793
|
-
splinePlotFiles <- c(splinePlotFiles, res$data$splinePlotFiles)
|
|
794
|
-
}
|
|
795
|
-
# prepare separate univariate and multivariate coefficients tables
|
|
796
|
-
uniCoefficients_table <- list("header" = colnames(uniCoefficients), "rows" = uniCoefficients)
|
|
797
|
-
multiCoefficients_table <- list("header" = colnames(multiCoefficients), "rows" = multiCoefficients)
|
|
798
|
-
# return parsed results containing the separate coefficients tables
|
|
799
|
-
reg_results_parsed <- list()
|
|
800
|
-
reg_results_parsed[[1]] <- list("id" = res$id, "data" = list("sampleSize" = res$data$sampleSize, "coefficients_uni" = uniCoefficients_table, "coefficients_multi" = multiCoefficients_table))
|
|
801
|
-
if (regtype == "cox") reg_results_parsed[[1]]$data$eventCnt = res$data$eventCnt
|
|
802
|
-
if (!is.null(splinePlotFiles)) reg_results_parsed[[1]]$data$splinePlotFiles = splinePlotFiles
|
|
803
|
-
return(reg_results_parsed)
|
|
804
|
-
}
|