@sjcrh/proteinpaint-server 2.122.0 → 2.123.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dataset/termdb.test.js +1 -1
- package/package.json +4 -6
- package/routes/burden.js +3 -4
- package/routes/correlationVolcano.js +2 -4
- package/routes/samplewsimages.js +9 -0
- package/routes/termdb.DE.js +2 -4
- package/routes/termdb.cluster.js +2 -4
- package/routes/termdb.config.js +2 -3
- package/routes/termdb.rootterm.js +3 -3
- package/routes/termdb.termchildren.js +3 -3
- package/routes/termdb.violin.js +2 -5
- package/src/app.js +563 -496
- package/src/run_R.js +0 -66
- package/utils/binom.R +0 -17
- package/utils/burden-ci95.R +0 -134
- package/utils/burden-main.R +0 -46
- package/utils/corr.R +0 -38
- package/utils/cuminc.R +0 -279
- package/utils/density.R +0 -36
- package/utils/edge.R +0 -283
- package/utils/fdr.R +0 -9
- package/utils/fisher.2x3.R +0 -12
- package/utils/fisher.R +0 -9
- package/utils/getBurden.R +0 -371
- package/utils/getGeneFromMatrix.R +0 -40
- package/utils/hclust.R +0 -110
- package/utils/km.R +0 -13
- package/utils/lowess.R +0 -9
- package/utils/regression.R +0 -154
- package/utils/regression.utils.R +0 -804
- package/utils/survival.R +0 -92
- package/utils/wilcoxon.R +0 -73
package/src/run_R.js
DELETED
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
Module for running R
|
|
3
|
-
|
|
4
|
-
Arguments:
|
|
5
|
-
- <path>: [string] path to R script.
|
|
6
|
-
- <data>: [string] input data for R script.
|
|
7
|
-
- <args>: [array] arguments for R script.
|
|
8
|
-
|
|
9
|
-
Input data is streamed into the standard input of the R script.
|
|
10
|
-
Standard output of the R script is returned.
|
|
11
|
-
*/
|
|
12
|
-
|
|
13
|
-
import fs from 'fs'
|
|
14
|
-
import serverconfig from './serverconfig.js'
|
|
15
|
-
import { spawn } from 'child_process'
|
|
16
|
-
import { Readable } from 'stream'
|
|
17
|
-
|
|
18
|
-
export default async function run_R(path, data, args) {
|
|
19
|
-
try {
|
|
20
|
-
await fs.promises.stat(path)
|
|
21
|
-
} catch (e) {
|
|
22
|
-
throw `${path} does not exist`
|
|
23
|
-
}
|
|
24
|
-
return new Promise((resolve, reject) => {
|
|
25
|
-
const _stdout = []
|
|
26
|
-
const _stderr = []
|
|
27
|
-
// spawn R child process
|
|
28
|
-
const sp = spawn(serverconfig.Rscript, args ? [path, ...args] : [path])
|
|
29
|
-
if (data) {
|
|
30
|
-
// stream input data into R
|
|
31
|
-
try {
|
|
32
|
-
const input = data.endsWith('\n') ? data : data + '\n' // R expects a final end-of-line marker
|
|
33
|
-
Readable.from(input).pipe(sp.stdin)
|
|
34
|
-
} catch (e) {
|
|
35
|
-
sp.kill()
|
|
36
|
-
let errmsg = e
|
|
37
|
-
const stderr = _stderr.join('').trim()
|
|
38
|
-
if (stderr) errmsg += `\nR stderr: ${stderr}`
|
|
39
|
-
reject(errmsg)
|
|
40
|
-
}
|
|
41
|
-
}
|
|
42
|
-
// store stdout and stderr from R
|
|
43
|
-
sp.stdout.on('data', data => _stdout.push(data))
|
|
44
|
-
sp.stderr.on('data', data => _stderr.push(data))
|
|
45
|
-
sp.on('error', err => reject(err))
|
|
46
|
-
// return stdout and stderr when R process closes
|
|
47
|
-
sp.on('close', code => {
|
|
48
|
-
const stdout = _stdout.join('').trim()
|
|
49
|
-
const stderr = _stderr.join('').trim()
|
|
50
|
-
if (code !== 0) {
|
|
51
|
-
// handle non-zero exit status
|
|
52
|
-
let errmsg = `R process exited with non-zero status code=${code}`
|
|
53
|
-
if (stdout) errmsg += `\nR stdout: ${stdout}`
|
|
54
|
-
if (stderr) errmsg += `\nR stderr: ${stderr}`
|
|
55
|
-
reject(errmsg)
|
|
56
|
-
}
|
|
57
|
-
if (stderr) {
|
|
58
|
-
// handle R stderr
|
|
59
|
-
const errmsg = `R process emitted standard error\nR stderr: ${stderr}`
|
|
60
|
-
reject(errmsg)
|
|
61
|
-
}
|
|
62
|
-
// return standard out from R
|
|
63
|
-
resolve(stdout)
|
|
64
|
-
})
|
|
65
|
-
})
|
|
66
|
-
}
|
package/utils/binom.R
DELETED
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
argv <- commandArgs(TRUE)
|
|
2
|
-
|
|
3
|
-
infile <- argv[1]
|
|
4
|
-
outfile <- argv[2]
|
|
5
|
-
|
|
6
|
-
out <- NULL
|
|
7
|
-
dat <- read.table(infile,sep="\t",header=F,quote="")
|
|
8
|
-
|
|
9
|
-
for (i in 1:nrow(dat)) {
|
|
10
|
-
x <- binom.test(dat[i,10],dat[i,10]+dat[i,9],0.5)
|
|
11
|
-
y <- abs(dat[i,10]/(dat[i,10]+dat[i,9]) - 0.5)
|
|
12
|
-
out <- rbind(out,c(x$p.value,y))
|
|
13
|
-
}
|
|
14
|
-
#colnames(out) <- c("pvalue","delta.abs")
|
|
15
|
-
out <- cbind(dat,out)
|
|
16
|
-
|
|
17
|
-
write.table(out,file=outfile,sep="\t",quote=F,row.names=F,col.names=F)
|
package/utils/burden-ci95.R
DELETED
|
@@ -1,134 +0,0 @@
|
|
|
1
|
-
rm(list=ls())
|
|
2
|
-
|
|
3
|
-
suppressPackageStartupMessages({
|
|
4
|
-
library(dplyr) ### Qi changed to load plyr first, due to R message: If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
|
|
5
|
-
library(survival)
|
|
6
|
-
library(jsonlite)
|
|
7
|
-
library(parallel)
|
|
8
|
-
library(doParallel)
|
|
9
|
-
})
|
|
10
|
-
|
|
11
|
-
options(warn=-1)
|
|
12
|
-
|
|
13
|
-
# stream in json input data
|
|
14
|
-
con <- file("stdin", "r")
|
|
15
|
-
json <- readLines(con)
|
|
16
|
-
close(con)
|
|
17
|
-
input <- fromJSON(json)
|
|
18
|
-
# handle input arguments
|
|
19
|
-
args <- commandArgs(trailingOnly = T)
|
|
20
|
-
if (length(args) != 0) stop("Usage: echo <in_json> | Rscript burden.R > <out_json>")
|
|
21
|
-
|
|
22
|
-
# register the parallel backend (used by foreach() for parallelization)
|
|
23
|
-
availCores <- detectCores()
|
|
24
|
-
if (is.na(availCores)) stop("cannot detect number of available cores")
|
|
25
|
-
registerDoParallel(cores = availCores - 1) # use all available cores except one
|
|
26
|
-
|
|
27
|
-
chc_nums <- c(1:32)[-c(2,5,14,20,23,26)] # CHCs. 6 out of 32 CHCs not used.
|
|
28
|
-
|
|
29
|
-
#####################
|
|
30
|
-
# Functions for our method
|
|
31
|
-
# Ref: https://stats.stackexchange.com/questions/46532/cox-baseline-hazard
|
|
32
|
-
#####################
|
|
33
|
-
# setwd("R:/Biostatistics/Biostatistics2/Qi/QiCommon/St Jude/Nature Review/CHCs/App/Rdata")
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
#################################
|
|
37
|
-
# Bootstrapping burden estimate #
|
|
38
|
-
#################################
|
|
39
|
-
|
|
40
|
-
# import get_burden() function
|
|
41
|
-
source(file.path(input$binpath, "utils/getBurden.R"))
|
|
42
|
-
|
|
43
|
-
# compute burden estimate for each bootstrap
|
|
44
|
-
# parallelize across bootstraps (not across chcs)
|
|
45
|
-
bootnums <- 20 # number of bootstraps
|
|
46
|
-
f <- input$datafiles
|
|
47
|
-
sampleData <- file.path(f$dir, f$files$sample) # dataframe with all the X's needed, and X's are updated by input values
|
|
48
|
-
outall <- foreach(i = 1:bootnums, .combine = rbind) %dopar% {
|
|
49
|
-
fitsData <- file.path(f$dir, f$boosubdir, paste0("boot",i), f$files$fit)
|
|
50
|
-
survData <- file.path(f$dir, f$boosubdir, paste0("boot",i), f$files$surv)
|
|
51
|
-
person_burden <- get_burden(fitsData, survData, sampleData, FALSE)
|
|
52
|
-
person_burden$boot <- i
|
|
53
|
-
person_burden
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
###########################
|
|
58
|
-
# 95% confidence interval #
|
|
59
|
-
###########################
|
|
60
|
-
|
|
61
|
-
# pr=5
|
|
62
|
-
# outall=NULL
|
|
63
|
-
# for(bootnum in 1:20){
|
|
64
|
-
# #bootnum=1 ##### loop this from 1 to 20.
|
|
65
|
-
# print(bootnum)
|
|
66
|
-
# setwd(paste("/Users/gmatt/data/tp/files/hg38/sjlife/burden/boot/boot",bootnum,sep=""))
|
|
67
|
-
# out1=read.csv(file=paste("./bootprimary",pr,".csv",sep=""))
|
|
68
|
-
# out1$boot=bootnum
|
|
69
|
-
# outall=rbind(outall,out1)
|
|
70
|
-
# }
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
### For each cell (each chc and age combination), there are 20 values. get the SD from the 20 bootstrapped burdens.
|
|
74
|
-
SDall=NULL
|
|
75
|
-
for(chc_num in chc_nums){
|
|
76
|
-
# chc_num=1
|
|
77
|
-
data=outall[outall$chc==chc_num,] ## data for this chc_num from the 20 bootstraps on each age point
|
|
78
|
-
data=data[,!colnames(data) %in% c("chc","boot")]
|
|
79
|
-
# for each column (each age point), get the SD
|
|
80
|
-
sd1=apply(data,2,sd)
|
|
81
|
-
sd1$chc=chc_num
|
|
82
|
-
SDall=rbind(SDall,sd1)
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
# #### burden for total of the 26 CHCs is the sum of the 26 burdens. So for each boot, take all the 26 and then sum up, to result in a column with 20 rows.
|
|
86
|
-
|
|
87
|
-
btotal=NULL
|
|
88
|
-
for(bootnum in 1:bootnums){
|
|
89
|
-
# bootnum=1
|
|
90
|
-
data=outall[outall$boot==bootnum,]
|
|
91
|
-
data=data[,!colnames(data) %in% c("chc","boot")]
|
|
92
|
-
total=apply(data,2,sum)
|
|
93
|
-
btotal=rbind(btotal,total)
|
|
94
|
-
}
|
|
95
|
-
### get SD for the total burdern from the 20 rows
|
|
96
|
-
sdtotal=apply(btotal,2,sd)
|
|
97
|
-
sdtotal=data.frame(t(sdtotal), check.names=F)
|
|
98
|
-
sdtotal$chc=0 ### indicating the total CHCs
|
|
99
|
-
|
|
100
|
-
sd=rbind(SDall,sdtotal)
|
|
101
|
-
sd=apply(sd,c(1,2),as.numeric)
|
|
102
|
-
|
|
103
|
-
##### read the burden from the original data. Use that and SD to get lower and upper bound.
|
|
104
|
-
# oburden=read.csv(paste("R:/Biostatistics/Biostatistics2/Qi/QiCommon/St Jude/Nature Review/CHCs/App/Rdata/primary",pr,".csv",sep=""))
|
|
105
|
-
oburden=input$burden
|
|
106
|
-
# oburden$boot=0
|
|
107
|
-
### total burden for the original data
|
|
108
|
-
total=apply(oburden[,!colnames(oburden) %in% c("chc","boot")],2,sum)
|
|
109
|
-
burdentotal=data.frame(t(total), check.names=F)
|
|
110
|
-
burdentotal$chc=0 ### indicating the total CHCs
|
|
111
|
-
oburden=rbind(oburden,burdentotal) #### burden for each chc with age in the columns. The last row is for the total burden.
|
|
112
|
-
oburden=data.frame(oburden, check.names=F)
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
#### lower bound, the lowest is 0 burden.
|
|
116
|
-
low=oburden[,!colnames(oburden) %in% "chc"]-1.96*sd[,!colnames(oburden) %in% "chc"]
|
|
117
|
-
low[low<0]=0
|
|
118
|
-
low$chc=oburden$chc
|
|
119
|
-
#### The upper bound
|
|
120
|
-
up=oburden[,!colnames(oburden) %in% "chc"]+1.96*sd[,!colnames(oburden) %in% "chc"]
|
|
121
|
-
up$chc=oburden$chc
|
|
122
|
-
|
|
123
|
-
### Take primary=5 CNS as an example (with hight TXs in step 3). At age 50, the burden is 9.04 with 95% CI (7.92 to 10.16)
|
|
124
|
-
# oburden$X.50.51[oburden$chc==0]
|
|
125
|
-
# low$X.50.51[low$chc==0]
|
|
126
|
-
# up$X.50.51[up$chc==0]
|
|
127
|
-
|
|
128
|
-
# plot(c(20,95),c(0,15),type="n",xlab="Age",ylab="Burden",font=2)
|
|
129
|
-
# lines(seq(20,94,1),oburden[oburden$chc==0,!colnames(oburden) %in% "chc"],lty=1)
|
|
130
|
-
# lines(seq(20,94,1),low[low$chc==0,!colnames(low) %in% "chc"],lty=2)
|
|
131
|
-
# lines(seq(20,94,1),up[up$chc==0,!colnames(up) %in% "chc"],lty=2)
|
|
132
|
-
|
|
133
|
-
ci <- list(low = low, up = up, overall=burdentotal)
|
|
134
|
-
toJSON(ci, digits = NA, na = "string")
|
package/utils/burden-main.R
DELETED
|
@@ -1,46 +0,0 @@
|
|
|
1
|
-
rm(list=ls())
|
|
2
|
-
|
|
3
|
-
suppressPackageStartupMessages({
|
|
4
|
-
library(dplyr) ### Qi changed to load plyr first, due to R message: If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
|
|
5
|
-
library(survival)
|
|
6
|
-
library(jsonlite)
|
|
7
|
-
library(parallel)
|
|
8
|
-
library(doParallel)
|
|
9
|
-
})
|
|
10
|
-
|
|
11
|
-
options(warn=-1)
|
|
12
|
-
|
|
13
|
-
# stream in json input data
|
|
14
|
-
con <- file("stdin", "r")
|
|
15
|
-
json <- readLines(con)
|
|
16
|
-
close(con)
|
|
17
|
-
input <- fromJSON(json)
|
|
18
|
-
# handle input arguments
|
|
19
|
-
args <- commandArgs(trailingOnly = T)
|
|
20
|
-
if (length(args) != 0) stop("Usage: echo <in_json> | Rscript burden.R > <out_json>")
|
|
21
|
-
|
|
22
|
-
# register the parallel backend (used by foreach() for parallelization)
|
|
23
|
-
availCores <- detectCores()
|
|
24
|
-
if (is.na(availCores)) stop("cannot detect number of available cores")
|
|
25
|
-
registerDoParallel(cores = availCores - 1) # use all available cores except one
|
|
26
|
-
|
|
27
|
-
chc_nums <- c(1:32)[-c(2,5,14,20,23,26)] # CHCs. 6 out of 32 CHCs not used.
|
|
28
|
-
|
|
29
|
-
#####################
|
|
30
|
-
# Functions for our method
|
|
31
|
-
# Ref: https://stats.stackexchange.com/questions/46532/cox-baseline-hazard
|
|
32
|
-
#####################
|
|
33
|
-
# setwd("R:/Biostatistics/Biostatistics2/Qi/QiCommon/St Jude/Nature Review/CHCs/App/Rdata")
|
|
34
|
-
|
|
35
|
-
# import get_burden() function
|
|
36
|
-
source(file.path(input$binpath, "utils/getBurden.R"))
|
|
37
|
-
|
|
38
|
-
# compute main burden estimate
|
|
39
|
-
# parallelize across CHCs
|
|
40
|
-
f <- input$datafiles
|
|
41
|
-
fitsData <- file.path(f$dir, f$files$fit)
|
|
42
|
-
survData <- file.path(f$dir, f$files$surv)
|
|
43
|
-
sampleData <- file.path(f$dir, f$files$sample) # dataframe with all the X's needed, and X's are updated by input values
|
|
44
|
-
person_burden <- get_burden(fitsData, survData, sampleData, TRUE)
|
|
45
|
-
|
|
46
|
-
toJSON(person_burden, digits = NA, na = "string")
|
package/utils/corr.R
DELETED
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
# Test syntax: cat ~/sjpp/test.txt | time Rscript corr.R
|
|
2
|
-
|
|
3
|
-
# Load required packages
|
|
4
|
-
suppressWarnings({
|
|
5
|
-
library(jsonlite)
|
|
6
|
-
})
|
|
7
|
-
|
|
8
|
-
# Read JSON input from stdin
|
|
9
|
-
con <- file("stdin", "r")
|
|
10
|
-
json <- readLines(con, warn=FALSE)
|
|
11
|
-
close(con)
|
|
12
|
-
input <- fromJSON(json)
|
|
13
|
-
|
|
14
|
-
ids <- input$terms$id
|
|
15
|
-
v1 <- input$terms$v1
|
|
16
|
-
v2 <- input$terms$v2
|
|
17
|
-
|
|
18
|
-
coeffs <- c()
|
|
19
|
-
pvalues <- c()
|
|
20
|
-
sample_sizes <- c()
|
|
21
|
-
for (i in 1:length(v1)) {
|
|
22
|
-
suppressWarnings({
|
|
23
|
-
cor <- cor.test(as.numeric(unlist(v1[i])), as.numeric(unlist(v2[i])), method = input$method)
|
|
24
|
-
})
|
|
25
|
-
coeffs <- c(coeffs, cor$estimate)
|
|
26
|
-
pvalues <- c(pvalues, cor$p.value)
|
|
27
|
-
sample_sizes <- c(sample_sizes, length(as.numeric(unlist(v1[i]))))
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
# Adjusting for multiple testing correction
|
|
31
|
-
adjust_p_values <- p.adjust(pvalues, method = "fdr")
|
|
32
|
-
output <- data.frame(ids, coeffs, pvalues, adjust_p_values, sample_sizes)
|
|
33
|
-
names(output)[1] <- "id"
|
|
34
|
-
names(output)[2] <- "correlation"
|
|
35
|
-
names(output)[3] <- "original_p_value"
|
|
36
|
-
names(output)[4] <- "adjusted_p_value"
|
|
37
|
-
names(output)[5] <- "sample_size"
|
|
38
|
-
toJSON(output, digits = NA, na = "string") # Setting digits = NA makes toJSON() use the max precision. na='string' causes any "not a number" to be reported as string. This from ?toJSON() documentation
|
package/utils/cuminc.R
DELETED
|
@@ -1,279 +0,0 @@
|
|
|
1
|
-
#################################
|
|
2
|
-
# CUMULATIVE INCIDENCE ANALYSIS #
|
|
3
|
-
#################################
|
|
4
|
-
|
|
5
|
-
#########
|
|
6
|
-
# USAGE
|
|
7
|
-
#########
|
|
8
|
-
|
|
9
|
-
# Usage: echo <in_json> | Rscript cuminc.R > <out_json>
|
|
10
|
-
|
|
11
|
-
# in_json: [string] input data in JSON format. Streamed through stdin.
|
|
12
|
-
# out_json: [string] cumulative incidence results in JSON format. Streamed to stdout.
|
|
13
|
-
|
|
14
|
-
# Input JSON:
|
|
15
|
-
# {
|
|
16
|
-
# data:
|
|
17
|
-
# chartId: [
|
|
18
|
-
# {
|
|
19
|
-
# time: time to event
|
|
20
|
-
# event: event code (0 = censored, 1 = event, 2 = competing risk event)
|
|
21
|
-
# series: series ID
|
|
22
|
-
# }
|
|
23
|
-
# ],
|
|
24
|
-
# startTime: custom start time of cuminc curve
|
|
25
|
-
# }
|
|
26
|
-
#
|
|
27
|
-
# Output JSON:
|
|
28
|
-
# {
|
|
29
|
-
# chartId: {
|
|
30
|
-
# estimates: {
|
|
31
|
-
# seriesId: [
|
|
32
|
-
# {
|
|
33
|
-
# time: time when estimate is computed
|
|
34
|
-
# est: estimated cumulative incidence value
|
|
35
|
-
# var: variance of cumulative incidence value
|
|
36
|
-
# low: 95% confidence interval - lower bound
|
|
37
|
-
# up: 95% confidence intervals - upper bound
|
|
38
|
-
# nrisk: # at-risk samples at timepoint
|
|
39
|
-
# ncensor: # censored samples at timepoint
|
|
40
|
-
# }
|
|
41
|
-
# ]
|
|
42
|
-
# },
|
|
43
|
-
# tests: [
|
|
44
|
-
# {
|
|
45
|
-
# series1: first series of test,
|
|
46
|
-
# series2: second series of test,
|
|
47
|
-
# pvalue: p-value of test,
|
|
48
|
-
# permutation: logical for whether permutation test was used
|
|
49
|
-
# }
|
|
50
|
-
# ]
|
|
51
|
-
# }
|
|
52
|
-
# }
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
########
|
|
56
|
-
# CODE
|
|
57
|
-
########
|
|
58
|
-
|
|
59
|
-
library(parallel)
|
|
60
|
-
library(jsonlite)
|
|
61
|
-
suppressPackageStartupMessages(library(cmprsk))
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
#############
|
|
65
|
-
# FUNCTIONS #
|
|
66
|
-
#############
|
|
67
|
-
|
|
68
|
-
# function to run cumulative incidence analysis on data for a chart
|
|
69
|
-
run_cuminc <- function(chart, startTime) {
|
|
70
|
-
chart$event <- as.factor(chart$event)
|
|
71
|
-
chart$series <- as.factor(chart$series)
|
|
72
|
-
estimates <- list()
|
|
73
|
-
if (length(levels(chart$series)) == 1) {
|
|
74
|
-
# single series
|
|
75
|
-
res <- cuminc(ftime = chart$time, fstatus = chart$event, cencode = 0)
|
|
76
|
-
# extract results of series for event of interest (i.e. event=1)
|
|
77
|
-
seriesRes <- as.data.frame(res[["1 1"]])
|
|
78
|
-
# if a custom start time is given, then this start time
|
|
79
|
-
# should serve as the first time point of the curve
|
|
80
|
-
if (!is.na(startTime)) {
|
|
81
|
-
seriesRes$time[1] <- startTime
|
|
82
|
-
if (seriesRes$time[1] == seriesRes$time[2]) {
|
|
83
|
-
# event occurred at startTime
|
|
84
|
-
# so time point 2 can serve as time point 1
|
|
85
|
-
seriesRes <- seriesRes[-1,]
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
# compute confidence intervals
|
|
89
|
-
seriesRes <- compute_ci(seriesRes)
|
|
90
|
-
# compute counts of at-risk samples, events, and
|
|
91
|
-
# censored exits at each time point
|
|
92
|
-
seriesRes <- compute_counts(seriesRes, chart)
|
|
93
|
-
estimates[[levels(chart$series)]] <- seriesRes
|
|
94
|
-
out <- list("estimates" = estimates)
|
|
95
|
-
} else {
|
|
96
|
-
# multiple series
|
|
97
|
-
# compute cumulative incidence for each pairwise combination of series
|
|
98
|
-
pairs <- combn(levels(chart$series), 2)
|
|
99
|
-
# vectors for storing results of Gray's tests
|
|
100
|
-
series1s <- character(length = ncol(pairs))
|
|
101
|
-
series2s <- character(length = ncol(pairs))
|
|
102
|
-
pvalues <- double(length = ncol(pairs))
|
|
103
|
-
usedPermutation <- logical(length = ncol(pairs))
|
|
104
|
-
# compute cumulative incidence for each pair
|
|
105
|
-
for (i in 1:ncol(pairs)) {
|
|
106
|
-
pair <- pairs[,i]
|
|
107
|
-
series1s[i] <- pair[1]
|
|
108
|
-
series2s[i] <- pair[2]
|
|
109
|
-
pairDat <- chart[chart$series %in% pair,]
|
|
110
|
-
pairDat$series <- droplevels(pairDat$series)
|
|
111
|
-
res <- cuminc(ftime = pairDat$time, fstatus = pairDat$event, group = pairDat$series, cencode = 0)
|
|
112
|
-
|
|
113
|
-
# get curve estimates
|
|
114
|
-
for (series in pair) {
|
|
115
|
-
if (series %in% names(estimates)) next
|
|
116
|
-
# extract results of series for event of interest (i.e. event=1)
|
|
117
|
-
seriesRes <- as.data.frame(res[[paste(series,1)]])
|
|
118
|
-
# if a custom start time is given, then this start time
|
|
119
|
-
# should serve as the first time point of the curve
|
|
120
|
-
if (!is.na(startTime)) {
|
|
121
|
-
seriesRes$time[1] <- startTime
|
|
122
|
-
if (seriesRes$time[1] == seriesRes$time[2]) {
|
|
123
|
-
# event occurred at startTime
|
|
124
|
-
# so time point 2 can serve as time point 1
|
|
125
|
-
seriesRes <- seriesRes[-1,]
|
|
126
|
-
}
|
|
127
|
-
}
|
|
128
|
-
# compute confidence intervals
|
|
129
|
-
seriesRes <- compute_ci(seriesRes)
|
|
130
|
-
# compute counts of at-risk samples, events, and
|
|
131
|
-
# censored exits at each time point
|
|
132
|
-
seriesRes <- compute_counts(seriesRes, chart[chart$series == series,])
|
|
133
|
-
estimates[[series]] <- seriesRes
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
# Gray's test
|
|
137
|
-
# the cuminc() function performed Gray's test between the pair of curves
|
|
138
|
-
# before using the results of the test, first check if permutation test is needed
|
|
139
|
-
# build an event-series contingency table
|
|
140
|
-
# if expected count of any cell in table is <5, then permutation test is needed
|
|
141
|
-
tbl <- table(pairDat$event, pairDat$series)
|
|
142
|
-
tbl <- tbl[c("0","1"),] # remove event2 samples
|
|
143
|
-
rmarg <- rowSums(tbl)
|
|
144
|
-
cmarg <- colSums(tbl)
|
|
145
|
-
tmarg <- sum(tbl)
|
|
146
|
-
E <- (rmarg %o% cmarg)/tmarg
|
|
147
|
-
if (any(E < 5)) {
|
|
148
|
-
# expected count of at least one cell in table is <5
|
|
149
|
-
# perform permutation test
|
|
150
|
-
usedPermutation[i] <- TRUE
|
|
151
|
-
tsO <- res$Tests["1","stat"] # test statistic computed for event of interest based on original data
|
|
152
|
-
pvalue <- permutationTest(pairDat, tsO)
|
|
153
|
-
} else {
|
|
154
|
-
# expected counts of all cells in table are >=5
|
|
155
|
-
# permutation test is not needed
|
|
156
|
-
# use computed Gray's test p-value for event
|
|
157
|
-
# of interest
|
|
158
|
-
usedPermutation[i] <- FALSE
|
|
159
|
-
pvalue <- signif(res$Tests["1","pv"], 2)
|
|
160
|
-
}
|
|
161
|
-
if (pvalue == 0) pvalue <- "<1e-16" # see https://stacks.cdc.gov/view/cdc/22757/cdc_22757_DS11.pdf
|
|
162
|
-
pvalues[i] <- pvalue
|
|
163
|
-
}
|
|
164
|
-
tests <- data.frame("series1" = series1s, "series2" = series2s, "pvalue" = pvalues, "permutation" = usedPermutation, stringsAsFactors = F)
|
|
165
|
-
out <- list("estimates" = estimates, "tests" = tests)
|
|
166
|
-
}
|
|
167
|
-
return(out)
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
# function to perform permutation test
|
|
171
|
-
permutationTest <- function(dat, tsO) {
|
|
172
|
-
tsPs <- runPermutations(100, dat) # start with 100 permutations
|
|
173
|
-
pvalue <- getPermutePvalue(tsPs, tsO)
|
|
174
|
-
if (pvalue <= 0.2) {
|
|
175
|
-
# p-value is <=0.2, so additional permutations are
|
|
176
|
-
# needed to get more accurate p-value to determine
|
|
177
|
-
# if p-value is significant
|
|
178
|
-
tsPs <- c(tsPs, runPermutations(100, dat))
|
|
179
|
-
pvalue <- getPermutePvalue(tsPs, tsO)
|
|
180
|
-
if (pvalue <= 0.1) {
|
|
181
|
-
# additional permutations are needed
|
|
182
|
-
tsPs <- c(tsPs, runPermutations(300, dat))
|
|
183
|
-
pvalue <- getPermutePvalue(tsPs, tsO)
|
|
184
|
-
if (pvalue <= 0.05) {
|
|
185
|
-
# additional permutations are needed
|
|
186
|
-
tsPs <- c(tsPs, runPermutations(500, dat))
|
|
187
|
-
pvalue <- getPermutePvalue(tsPs, tsO)
|
|
188
|
-
# no need to run more than a total of 1000 permutations
|
|
189
|
-
}
|
|
190
|
-
}
|
|
191
|
-
}
|
|
192
|
-
return(pvalue)
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
# function to perform permutations
|
|
196
|
-
# for each permutation:
|
|
197
|
-
# - shuffle series assignments of samples
|
|
198
|
-
# - perform cumulative incidence analysis
|
|
199
|
-
# - compute the test statistic of Gray's test
|
|
200
|
-
# return all permuted test statistics
|
|
201
|
-
runPermutations <- function(M, dat) {
|
|
202
|
-
tsPs <- replicate(M, cuminc(ftime = dat$time, fstatus = dat$event, group = sample(dat$series), cencode = 0)$Tests["1","stat"], simplify = T)
|
|
203
|
-
return(tsPs)
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
# function to compute p-value for permutation test
|
|
207
|
-
# perform two-tailed test
|
|
208
|
-
getPermutePvalue <- function(tsPs, tsO) {
|
|
209
|
-
P_left <- sum(tsPs <= -abs(tsO))/(length(tsPs)+1)
|
|
210
|
-
P_right <- sum(tsPs >= abs(tsO))/(length(tsPs)+1)
|
|
211
|
-
pvalue <- signif(P_left + P_right, 2)
|
|
212
|
-
return(pvalue)
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
# function to compute 95% confidence intervals
|
|
216
|
-
compute_ci <- function(res) {
|
|
217
|
-
low <- res$est - (1.96 * sqrt(res$var))
|
|
218
|
-
low[low < 0] <- 0
|
|
219
|
-
up <- res$est + (1.96 * sqrt(res$var))
|
|
220
|
-
res["low"] <- low
|
|
221
|
-
res["up"] <- up
|
|
222
|
-
return(res)
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
# function to compute counts of at-risk samples, events, and censored exits at each time point
|
|
226
|
-
# res: series-specific res
|
|
227
|
-
# chart: series-specific chart
|
|
228
|
-
compute_counts <- function(res, chart) {
|
|
229
|
-
# compute at-risk counts
|
|
230
|
-
# these counts are the number of samples
|
|
231
|
-
# that have not experienced an event or
|
|
232
|
-
# have not been censored prior to each time point
|
|
233
|
-
res$nrisk <- apply(res, 1, function(timepoint) length(which(chart$time >= timepoint["time"])))
|
|
234
|
-
# compute number of events and censored exits during each time point
|
|
235
|
-
times <- unique(res$time)
|
|
236
|
-
chart <- cbind(chart, "bin" = findInterval(chart$time, times))
|
|
237
|
-
res <- cbind(res, "bin" = findInterval(res$time, times))
|
|
238
|
-
m <- table(chart$bin, chart$event)
|
|
239
|
-
m <- m[, c("0","1"), drop = F] # remove competing risk events
|
|
240
|
-
colnames(m) <- c("ncensor", "nevent")
|
|
241
|
-
m <- cbind(m, "bin" = as.numeric(row.names(m)))
|
|
242
|
-
res <- merge(res, m, by = "bin", all.x = T)
|
|
243
|
-
res$nevent[is.na(res$nevent)] <- 0
|
|
244
|
-
res$ncensor[is.na(res$ncensor)] <- 0
|
|
245
|
-
res <- res[,c(2:7,9,8)]
|
|
246
|
-
return(res)
|
|
247
|
-
}
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
################
|
|
251
|
-
# PREPARE DATA #
|
|
252
|
-
################
|
|
253
|
-
|
|
254
|
-
# stream in json input
|
|
255
|
-
con <- file("stdin", "r")
|
|
256
|
-
json <- readLines(con)
|
|
257
|
-
close(con)
|
|
258
|
-
input <- fromJSON(json)
|
|
259
|
-
|
|
260
|
-
dat <- input$data
|
|
261
|
-
startTime <- ifelse("startTime" %in% names(input), input$startTime, NA)
|
|
262
|
-
|
|
263
|
-
#save.image("~/test.RData")
|
|
264
|
-
#stop("stop here")
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
################
|
|
268
|
-
# RUN ANALYSIS #
|
|
269
|
-
################
|
|
270
|
-
|
|
271
|
-
# perform cumulative incidence analysis
|
|
272
|
-
# parallelize the analysis across charts/variants
|
|
273
|
-
availCores <- detectCores()
|
|
274
|
-
if (is.na(availCores)) stop("unable to detect number of available cores")
|
|
275
|
-
cores <- ifelse(length(dat) < availCores, length(dat), availCores)
|
|
276
|
-
ci_results <- mclapply(X = dat, FUN = run_cuminc, startTime = startTime, mc.cores = cores)
|
|
277
|
-
|
|
278
|
-
# output results in json format
|
|
279
|
-
toJSON(ci_results, digits = NA, na = "string")
|
package/utils/density.R
DELETED
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
library(jsonlite)
|
|
2
|
-
# This script reads in a json string from stdin with the parameters,
|
|
3
|
-
# calculates the densities of each plot and returns the densities as a json string
|
|
4
|
-
# The input json string contains plot2Values, a dictionary where each field maps to an array of numbers and the
|
|
5
|
-
# global min and max values of the x axis.
|
|
6
|
-
# The output json string is a dictionary with the density for each plot. The density is represented
|
|
7
|
-
# by a an object where {x: [x density values], y: [y density values]}
|
|
8
|
-
# In order to test it you can run the following command from the command line replacing plot2Values and min and max
|
|
9
|
-
# with your values:
|
|
10
|
-
# echo '{plot2Values: {"plotA": [1.2, 2, 3], "plotB": [4.5, 5, 6]}, min: 1.2, max:6}' | Rscript ./density.R
|
|
11
|
-
|
|
12
|
-
con <- file("stdin", "r")
|
|
13
|
-
json <- readLines(con)
|
|
14
|
-
close(con)
|
|
15
|
-
data <- fromJSON(json)
|
|
16
|
-
plot2Values <- data$plot2Values
|
|
17
|
-
min <- data$min
|
|
18
|
-
max <- data$max
|
|
19
|
-
densities <- list()
|
|
20
|
-
for(plot in names(plot2Values)){
|
|
21
|
-
values = plot2Values[[plot]]
|
|
22
|
-
# If the plot has less than 5 values or all the values are the same, we will return a flat line
|
|
23
|
-
if(length(values) <= 5 | length(unique(values)) == 1){
|
|
24
|
-
y = rep(0, length(values))
|
|
25
|
-
densities[[plot]] <- list(x=values, y=y)
|
|
26
|
-
next
|
|
27
|
-
}
|
|
28
|
-
den = density(x = values, from=min, to=max)
|
|
29
|
-
x = den$x
|
|
30
|
-
y = den$y
|
|
31
|
-
result = list(x=x, y=y) #This is an object with two keys x and y that are number arrays
|
|
32
|
-
densities[[plot]] <- result
|
|
33
|
-
}
|
|
34
|
-
toJSON(densities, digits = NA, na = "string") # will return a json like { plotA: {x:[...], y: [...]}}
|
|
35
|
-
|
|
36
|
-
|