@sjcrh/proteinpaint-server 2.122.0 → 2.124.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,154 +0,0 @@
1
- #######################
2
- # REGRESSION ANALYSIS
3
- #######################
4
-
5
- ###########
6
- # USAGE
7
- ###########
8
-
9
- # Usage: echo <in_json> | Rscript regression.R > <out_json>
10
-
11
- # in_json: [string] input data in JSON format. Streamed through stdin.
12
- # out_json: [string] regression results in JSON format. Streamed to stdout.
13
-
14
- # Input JSON specifications:
15
- # {
16
- # "regressionType": regression type (linear/logistic/cox)
17
- # "binpath": server bin path
18
- # "data": [{}] per-sample data values
19
- # "outcome": {
20
- # "id": variable id
21
- # "name": variable name
22
- # "rtype": type of R variable ("numeric", "factor")
23
- # "timeToEvent": {} (only for cox outcome)
24
- # "timeScale": time/age
25
- # "timeId": id of time variable (for 'time' time scale)
26
- # "agestartId": id of age start variable (for 'age' time scale)
27
- # "ageendId": id of age end variable (for 'age' time scale)
28
- # "eventId": id of event variable
29
- # "categories": {} (only for logistic outcome)
30
- # "ref": reference category of outcome
31
- # "nonref": non-reference category of outcome
32
- # }
33
- # "independent": [
34
- # {
35
- # "id": variable id
36
- # "name": variable name
37
- # "type": type of independent variable ("snplst", "snplocus", "spline", "other")
38
- # "rtype": type of R variable ("numeric", "factor")
39
- # "refGrp": reference group
40
- # "interactions": [] ids of interacting variables (optional)
41
- # "spline": {} cubic spline settings (only for spline variable)
42
- # "knots": [] knot values
43
- # "plotfile": output png file of spline plot
44
- # }
45
- # ]
46
- # }
47
- #
48
- #
49
- # Output JSON specifications:
50
- # [{
51
- # data: {
52
- # "id": id of snplocus term (empty when no snplocus terms are present)
53
- # "data": {
54
- # "sampleSize": sample size of analysis,
55
- # "eventCnt": number of events (only for cox regression),
56
- # "residuals": { "header": [], "rows": [] },
57
- # "coefficients": { "header": [], "rows": [] },
58
- # "type3": { "header": [], "rows": [] },
59
- # "totalSnpEffect": { "header": [], "rows": [] } (only for snplocus interactions),
60
- # "tests": { "header": [], "rows": [] } (only for cox regression),
61
- # "other": { "header": [], "rows": [] },
62
- # "warnings": [] warning messages
63
- # },
64
- # }
65
- # benchmark: {} benchmarking results
66
- # }]
67
-
68
-
69
- ###########
70
- # CODE
71
- ###########
72
-
73
- suppressPackageStartupMessages({
74
- library(jsonlite)
75
- library(survival)
76
- library(parallel)
77
- library(lmtest)
78
- })
79
-
80
- benchmark <- list()
81
-
82
- ################
83
- # PREPARE DATA #
84
- ################
85
-
86
- # stream in json input
87
- stime <- Sys.time()
88
- con <- file("stdin", "r")
89
- json <- readLines(con)
90
- close(con)
91
- input <- fromJSON(json)
92
- etime <- Sys.time()
93
- dtime <- etime - stime
94
- benchmark[["read_json_input"]] <- unbox(paste(round(as.numeric(dtime), 4), attr(dtime, "units")))
95
-
96
- # import regression utilities
97
- source(paste0(input$binpath, "/utils/regression.utils.R"))
98
-
99
- # prepare data table
100
- stime <- Sys.time()
101
- dat <- prepareDataTable(input$data, input$independent)
102
- etime <- Sys.time()
103
- dtime <- etime - stime
104
- benchmark[["prepareDataTable"]] <- unbox(paste(round(as.numeric(dtime), 4), attr(dtime, "units")))
105
-
106
-
107
- ##################
108
- # BUILD FORMULAS #
109
- ##################
110
-
111
- stime <- Sys.time()
112
- formulas <- buildFormulas(input$outcome, input$independent, input$includeUnivariate)
113
- etime <- Sys.time()
114
- dtime <- etime - stime
115
- benchmark[["buildFormulas"]] <- unbox(paste(round(as.numeric(dtime), 4), attr(dtime, "units")))
116
-
117
-
118
- #save.image("~/test.RData")
119
- #stop("stop here")
120
-
121
- ##################
122
- # RUN REGRESSION #
123
- ##################
124
-
125
- # run a separate regression analysis for each formula
126
- # run the analyses in parallel using multiple cores
127
- stime <- Sys.time()
128
- cores <- detectCores()
129
- if (is.na(cores)) stop("unable to detect number of cores")
130
- reg_results <- mclapply(X = formulas, FUN = runRegression, regtype = input$regressionType, dat = dat, outcome = input$outcome, cachedir = input$cachedir, mc.cores = cores)
131
- etime <- Sys.time()
132
- dtime <- etime - stime
133
- benchmark[["runRegression"]] <- unbox(paste(round(as.numeric(dtime), 4), attr(dtime, "units")))
134
-
135
-
136
- ##################
137
- # PARSE RESULTS #
138
- ##################
139
-
140
- if (isTRUE(input$includeUnivariate)) {
141
- # univariate analysis included along with multivariable analysis
142
- # parse the univariate/multivariable results
143
- reg_results <- parseUniMultiResults(reg_results, input$regressionType)
144
- }
145
-
146
- out <- list(data = reg_results, benchmark = benchmark)
147
-
148
-
149
- ##################
150
- # OUTPUT RESULTS #
151
- ##################
152
-
153
- # Export results as json to stdout
154
- toJSON(out, digits = NA, na = "string")