@sjcrh/proteinpaint-server 2.122.0 → 2.124.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/utils/survival.R DELETED
@@ -1,92 +0,0 @@
1
- #####################
2
- # Survival analysis #
3
- #####################
4
-
5
- #########
6
- # Usage #
7
- #########
8
-
9
- # Usage: echo <in_json> | Rscript survival.R > <out_json>
10
-
11
- # in_json: [string] input data in JSON format. Streamed through stdin.
12
- # out_json: [string] survival results in JSON format. Streamed to stdout.
13
-
14
- # Input JSON:
15
- # [
16
- # {
17
- # time: time to event
18
- # status: event code (0 = censored, 1 = dead)
19
- # series: series ID
20
- # }
21
- # ]
22
- #
23
- # Output JSON:
24
- # {
25
- # estimates: [{ series, time, surv, lower, upper, nevent, ncensor, nrisk }],
26
- # tests: [{ series1, series2, pvalue }]
27
- # }
28
-
29
-
30
- ########
31
- # Code #
32
- ########
33
-
34
- library(jsonlite)
35
- library(survival)
36
-
37
- # stream in json input
38
- con <- file("stdin", "r")
39
- json <- readLines(con)
40
- close(con)
41
- dat <- fromJSON(json)
42
-
43
- # perform survival analysis
44
- dat$series <- as.factor(dat$series)
45
- if (length(levels(dat$series)) == 1) {
46
- # single series
47
- results <- survfit(Surv(time, status) ~ 1, data = dat)
48
- # get survival estimates
49
- # prepend a starting prob=1 data point that survfit() does not include
50
- estimates <- data.frame("series" = levels(dat$series), "time" = c(0,results$time), "surv" = c(1,results$surv), "lower" = c(1,results$lower), "upper" = c(1,results$upper), "nevent" = c(0,results$n.event), "ncensor" = c(0,results$n.censor), "nrisk" = c(results$n.risk[1],results$n.risk), stringsAsFactors = F)
51
- } else {
52
- # multiple series
53
- # generate pairwise combinations of series
54
- pairs <- combn(levels(dat$series), 2)
55
- # initialize table for survival estimates
56
- estimates <- data.frame("series" = character(), "time" = double(), "surv" = double(), "lower" = double(), "upper" = double(), "nevent" = double(), "ncensor" = double(), "nrisk" = double(), stringsAsFactors = F)
57
- # initialize table for log-rank tests
58
- tests <- data.frame("series1" = character(length = ncol(pairs)), "series2" = character(length = ncol(pairs)), "pvalue" = double(length = ncol(pairs)), stringsAsFactors = F)
59
- # compute survival curves for each pairwise combination of series
60
- for (i in 1:ncol(pairs)) {
61
- pair <- pairs[,i]
62
- results <- survfit(Surv(time, status) ~ series, data = dat, subset = dat$series %in% pair)
63
- # get survival estimates for each series within pair
64
- # prepend a starting prob=1 data point that survfit() does not include
65
- for (series in pair) {
66
- if (series %in% estimates$series) next
67
- results_series <- results[paste0("series=",series)]
68
- estimates_series <- data.frame("series" = series, "time" = c(0,results_series$time), "surv" = c(1,results_series$surv), "lower" = c(1,results_series$lower), "upper" = c(1,results_series$upper), "nevent" = c(0,results_series$n.event), "ncensor" = c(0,results_series$n.censor), "nrisk" = c(results_series$n.risk[1],results_series$n.risk), stringsAsFactors = F)
69
- estimates <- rbind(estimates, estimates_series)
70
- }
71
- # test for difference between survival curves using log-rank test
72
- df <- dat[dat$series %in% pair, c("time", "status")]
73
- if (all(apply(df, 2, function(x) length(unique(x)) == 1))) {
74
- # survival curves are identical
75
- # do not compare curves using survdiff() because it will error out
76
- # set pvalue to 1
77
- pvalue <- 1
78
- } else {
79
- # suppress warnings to prevent warnings when test is done between
80
- # curves with no events
81
- test <- suppressWarnings(survdiff(Surv(time, status) ~ series, data = dat, subset = dat$series %in% pair))
82
- # compute p-value (see: https://www.emilyzabor.com/tutorials/survival_analysis_in_r_tutorial.html#Extracting_information_from_a_survdiff_object)
83
- pvalue <- 1 - pchisq(test$chisq, length(test$n) - 1)
84
- }
85
- tests[i,] <- c(pair, signif(pvalue, 2))
86
- }
87
- }
88
-
89
- # output results
90
- out <- list(estimates = estimates)
91
- if (length(levels(dat$series)) > 1) out[["tests"]] <- tests
92
- toJSON(out, digits = NA, na = "string")
package/utils/wilcoxon.R DELETED
@@ -1,73 +0,0 @@
1
- ##########################
2
- # Wilcoxon rank sum test #
3
- ##########################
4
-
5
- #########
6
- # Usage #
7
- #########
8
-
9
- # Usage: Rscript wilcoxon.R in.json > results
10
-
11
- # Input data is in JSON format and is read in from <in.json> file.
12
- # Results are written in JSON format to stdout.
13
-
14
- # Input JSON specifications:
15
- # [{
16
- # group1_id: group1 id,
17
- # group1_values: [] group1 data values,
18
- # group2_id: group2 id,
19
- # group2_values: [] group2 data values
20
- # }]
21
- #
22
- # Output JSON specifications:
23
- # [{
24
- # group1_id: group1 id,
25
- # group1_values: [] group1 data values,
26
- # group2_id: group2 id,
27
- # group2_values: [] group2 data values,
28
- # pvalue: p-value of test
29
- # }]
30
-
31
-
32
- ########
33
- # Code #
34
- ########
35
-
36
- library(jsonlite)
37
-
38
- # read in data
39
- args <- commandArgs(trailingOnly = T)
40
- if (length(args) != 1) stop("one argument required")
41
- infile <- args[1]
42
- dat <- fromJSON(infile)
43
-
44
- # function to compute wilcox p-value between two groups of values
45
- getPvalue <- function(x) {
46
- if (length(x$group1_values) == 0 || length(x$group2_values) == 0) {
47
- # all samples fall in one group
48
- # return NA p-value
49
- return(unbox("NA"))
50
- }
51
-
52
- # perform Wilcox test between groups
53
- # suppress warnings because a warning message will be
54
- # generated when sample size is small (<50) and ties
55
- # are present because an exact p-value cannot be computed
56
- # it is fine to ignore this message because a p-value will
57
- # still be computed using a normal approximation
58
- # NOTE: do not set exact=TRUE because this will use large
59
- # amounts of memory when sample sizes are large
60
- wt <- suppressWarnings(wilcox.test(x$group1_values, x$group2_values))
61
-
62
- # return p-value
63
- return(unbox(wt$p.value))
64
- }
65
-
66
- # compute Wilcox p-value for each data entry
67
- pvalues <- apply(dat, 1, getPvalue)
68
-
69
- # append p-values to data
70
- datWithPvalues <- cbind(dat, "pvalue" = pvalues, stringsAsFactors = F)
71
-
72
- # output data with p-values
73
- toJSON(datWithPvalues)