@sjcrh/proteinpaint-server 2.27.1 → 2.28.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cards/hic.json +55 -13
- package/package.json +2 -2
- package/routes/termdb.getCategories.ts +80 -0
- package/routes/termdb.violin.ts +1 -1
- package/server.js +1 -1
- package/src/bedj.parseBed.js +3 -3
- package/src/checkReadingFrame.js +1 -1
- package/src/lines2R.js +7 -10
- package/src/mds3.gdc.filter.js +1 -1
- package/src/serverconfig.js +2 -0
- package/utils/burden.R +131 -104
- package/utils/fastclust.R +17 -8
package/src/bedj.parseBed.js
CHANGED
|
@@ -69,12 +69,12 @@ to parse line as gene file, require following:
|
|
|
69
69
|
|
|
70
70
|
*/
|
|
71
71
|
|
|
72
|
-
|
|
72
|
+
import checkReadingFrame from './checkReadingFrame'
|
|
73
73
|
|
|
74
74
|
//a valid exonFrames field can only contain members of validFrames, names -1, 0, 1, or 2
|
|
75
75
|
const validFrames = new Set(['-1', '0', '1', '2'])
|
|
76
76
|
|
|
77
|
-
|
|
77
|
+
export function parseBedLine(l, enst2desc) {
|
|
78
78
|
const chr = l[0],
|
|
79
79
|
chromstart = Number(l[2 - 1]),
|
|
80
80
|
chromstop = l[3 - 1],
|
|
@@ -264,7 +264,7 @@ exports.parseBedLine = function parseBedLine(l, enst2desc) {
|
|
|
264
264
|
}
|
|
265
265
|
if (!tmp3.some(i => !validFrames.has(i))) {
|
|
266
266
|
/* all fields are valid frames, reject values that are not -1, 0, 1, or 2 */
|
|
267
|
-
checkReadingFrame
|
|
267
|
+
checkReadingFrame(obj, exonframes)
|
|
268
268
|
}
|
|
269
269
|
return obj
|
|
270
270
|
}
|
package/src/checkReadingFrame.js
CHANGED
|
@@ -16,7 +16,7 @@ str:
|
|
|
16
16
|
if the first coding exon has a frame of 1/2 but not 0, the "startCodonFrame" attribute will be added to obj
|
|
17
17
|
so that it can be properly translatec
|
|
18
18
|
*/
|
|
19
|
-
|
|
19
|
+
export default function (obj, str) {
|
|
20
20
|
if (!obj.codingstart) {
|
|
21
21
|
// not coding
|
|
22
22
|
return
|
package/src/lines2R.js
CHANGED
|
@@ -9,13 +9,13 @@ Arguments:
|
|
|
9
9
|
Given an R script and a JavaScript array of input data lines, the data lines are streamed into the standard input of the R script. The standard output of the R script is then returned as a JavaScript array of output data lines.
|
|
10
10
|
*/
|
|
11
11
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
12
|
+
import fs from 'fs'
|
|
13
|
+
import path from 'path'
|
|
14
|
+
import serverconfig from './serverconfig'
|
|
15
|
+
import { spawn } from 'child_process'
|
|
16
|
+
import { Readable } from 'stream'
|
|
17
17
|
|
|
18
|
-
|
|
18
|
+
export default async function lines2R(Rscript, lines, args = []) {
|
|
19
19
|
try {
|
|
20
20
|
await fs.promises.stat(Rscript)
|
|
21
21
|
} catch (e) {
|
|
@@ -55,10 +55,7 @@ module.exports = async function lines2R(Rscript, lines, args = []) {
|
|
|
55
55
|
const errmsg = `R process emitted standard error\nR stderr: ${err}`
|
|
56
56
|
reject(errmsg)
|
|
57
57
|
}
|
|
58
|
-
const out = stdout
|
|
59
|
-
.join('')
|
|
60
|
-
.trim()
|
|
61
|
-
.split('\n')
|
|
58
|
+
const out = stdout.join('').trim().split('\n')
|
|
62
59
|
resolve(out)
|
|
63
60
|
})
|
|
64
61
|
})
|
package/src/mds3.gdc.filter.js
CHANGED
package/src/serverconfig.js
CHANGED
|
@@ -52,6 +52,8 @@ if (!serverconfig.bigBedInfo) serverconfig.bigBedInfo = 'bigBedInfo'
|
|
|
52
52
|
if (!serverconfig.bigBedNamedItems) serverconfig.bigBedNamedItems = 'bigBedNamedItems'
|
|
53
53
|
if (!serverconfig.clustalo) serverconfig.clustalo = 'clustalo'
|
|
54
54
|
if (!serverconfig.Rscript) serverconfig.Rscript = 'Rscript'
|
|
55
|
+
if (!serverconfig.gfServer) serverconfig.gfServer = 'gfServer'
|
|
56
|
+
if (!serverconfig.gfClient) serverconfig.gfClient = 'gfClient'
|
|
55
57
|
|
|
56
58
|
/******************
|
|
57
59
|
APPLY OVERRIDES
|
package/utils/burden.R
CHANGED
|
@@ -6,17 +6,23 @@ rm(list=ls())
|
|
|
6
6
|
suppressPackageStartupMessages(library(dplyr)) ### Qi changed to load plyr first, due to R message: If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
|
|
7
7
|
suppressPackageStartupMessages(library(survival))
|
|
8
8
|
library(jsonlite)
|
|
9
|
+
library(parallel)
|
|
9
10
|
|
|
10
11
|
options(warn=-1)
|
|
11
12
|
|
|
12
13
|
# Input from lines2R
|
|
13
14
|
args <- commandArgs(trailingOnly = T)
|
|
14
|
-
if (length(args) != 4) stop("Usage: Rscript burden.R in.json > results")
|
|
15
|
+
if (length(args) != 4) stop("Usage: Rscript burden.R in.json fitsData survData sampleData > results")
|
|
15
16
|
infile <- args[1]
|
|
16
17
|
fitsData <- args[2]
|
|
17
18
|
survData <- args[3]
|
|
18
19
|
sampleData <- args[4]
|
|
19
20
|
|
|
21
|
+
chc_nums <- c(1:32)[-c(2,5,14,20,23,26)] # CHCs. 6 out of 32 CHCs not used.
|
|
22
|
+
availCores <- detectCores()
|
|
23
|
+
if (is.na(availCores)) stop("cannot detect number of available cores")
|
|
24
|
+
cores <- ifelse(length(chc_nums) < availCores, length(chc_nums), availCores)
|
|
25
|
+
|
|
20
26
|
#####################
|
|
21
27
|
# Functions for our method
|
|
22
28
|
# Ref: https://stats.stackexchange.com/questions/46532/cox-baseline-hazard
|
|
@@ -130,12 +136,11 @@ newdata_chc_sampled$hdmtxdose=input$hdmtx # hdmtxval
|
|
|
130
136
|
# 10="Retinoblastoma"
|
|
131
137
|
# 11="Germ cell tumor";
|
|
132
138
|
|
|
133
|
-
|
|
134
|
-
for(
|
|
135
|
-
|
|
136
|
-
newdata_chc_sampled = data.frame(newdata_chc_sampled,tmp_Nj)
|
|
139
|
+
results <- mclapply(X = chc_nums, FUN = function(chc_num) predict(cphfits2[[chc_num]], newdata = data.frame(newdata_chc_sampled,primary=pr),type='expected'), mc.cores = cores)
|
|
140
|
+
for(n in 1:length(results)){
|
|
141
|
+
newdata_chc_sampled = data.frame(newdata_chc_sampled,results[[n]])
|
|
137
142
|
}
|
|
138
|
-
names(newdata_chc_sampled)[25:50]=paste0("est_chc",
|
|
143
|
+
names(newdata_chc_sampled)[25:50]=paste0("est_chc",chc_nums)
|
|
139
144
|
newdata_chc_sampled = newdata_chc_sampled %>%
|
|
140
145
|
mutate(sumN_tmp = rowSums(dplyr::select(.,starts_with("est_chc"))))%>%
|
|
141
146
|
group_by(mrn) %>%
|
|
@@ -225,106 +230,128 @@ newdata_chc_sampled1=newdata_chc_sampled ## do this so each run on chc_num loops
|
|
|
225
230
|
##########################################################################
|
|
226
231
|
person_burden=NULL
|
|
227
232
|
|
|
228
|
-
|
|
229
|
-
# print(chc_num)
|
|
230
|
-
newdata_chc_sampled=newdata_chc_sampled1
|
|
231
|
-
|
|
232
|
-
# linear predictor
|
|
233
|
-
newdata_chc_sampled$exp_lp = predict(cphfits2[[chc_num]], newdata = data.frame(newdata_chc_sampled,primary=pr),type='risk',reference="zero")
|
|
234
|
-
|
|
235
|
-
# Baseline nelson-aalan est
|
|
236
|
-
# https://stats.stackexchange.com/questions/46532/cox-baseline-hazard
|
|
237
|
-
j=chc_num
|
|
238
|
-
base = basehaz(cphfits2[[chc_num]],centered = F) # this is a cumulative hazard, so need to convert it into non-cumulative version
|
|
239
|
-
#centered, if TRUE return data from a predicted survival curve at the mean values of the covariates fit$mean, if FALSE return a prediction for all covariates equal to zero.
|
|
240
|
-
#request the hazard for that covariate combination from the survfit() function that is called by basehaz(). https://stats.stackexchange.com/questions/565210/about-getting-baseline-survival-probability-for-a-piecewise-cox-model-with-inter
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
### Max time in the data is 70.42. We need to estimate up to 90.
|
|
244
|
-
#Yutaka: I think we should smooth the cumulative hazard and then take the derivative to get the hazard.
|
|
245
|
-
#One thread I found on Web is: "As an approximation you can smooth the fitted baseline cumulative hazard (e.g. by package pspline) and ask for its derivative." Can you try using smooth.spline and smooth the cumulative hazard and then get the derivative? https://cran.r-project.org/web/packages/pspline/pspline.pdf
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
#### Qi added: base is for different DX. Now we run within each pr, so neeed cumulaive hazrd for that pr only
|
|
249
|
-
base=base[base$strata==paste("primary=",pr,sep=""),] #cumulative hazard
|
|
250
|
-
base=base[base$time<=agecut,] ### shouldn't we use the same age cutoff as the survival function splines? Yes, do so.
|
|
251
|
-
|
|
252
|
-
##### study the smooth parameter. I think spar=1 is the best one to use (most smoothest)
|
|
253
|
-
cumHspline=smooth.spline(base$time,base$hazard,spar=1)
|
|
254
|
-
predcumhz=predict(cumHspline,seq(0,95,1)) ### predicted cumulative hazard
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
##### In order to use the above way to get dN0, do Daisuke's original way using cumhz difference. But the difference is that: we fit cumhz with smooth.spline and can extend it to 90 years old.
|
|
258
|
-
base=data.frame(time=predcumhz$x,hazard=predcumhz$y) ##Daisuke used the cumHz, here we smoothed it and then use it.
|
|
259
|
-
#### fitted values had <0 values in age 0-8 or so. change to 0 cumulative hazard.
|
|
260
|
-
base$hazard[base$hazard<0]=0
|
|
261
|
-
base2 = base %>%
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
base2 = base2 %>%
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
base3 = base2 %>%
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
###############
|
|
275
|
-
# BCCT
|
|
276
|
-
###############
|
|
277
|
-
newdata_chc_sampled$time_cat = cut(newdata_chc_sampled$t.startage,breaks=seq(0,95,1),right = FALSE, include.lowest = TRUE)
|
|
278
|
-
|
|
279
|
-
#newdata_chc_sampled$time_cat = cut(newdata_chc_sampled$t.startage,breaks=seq(0,90,5),right = FALSE, include.lowest = TRUE) this won't work, because the input donors file had "t.startage" up to 55 only
|
|
280
|
-
|
|
281
|
-
newdata_chc_sampled = newdata_chc_sampled %>%
|
|
282
|
-
|
|
283
|
-
newdata_chc_sampled$dN0 = ifelse(is.na(newdata_chc_sampled$dN0),0,newdata_chc_sampled$dN0)
|
|
284
|
-
|
|
285
|
-
BCCT = newdata_chc_sampled %>%
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
for_web_BCCT = as.data.frame(tidyr::pivot_wider(BCCT,id_cols = mrn, names_from=time_cat,values_from=BCCT))
|
|
294
|
-
for_web_BCCT =for_web_BCCT[,-1]
|
|
295
|
-
|
|
296
|
-
#### for non-recurrent ones, maximum burden is 1 if the grouped conditions had only 1 condition. (11, 19,29) had only 1 conditons non-recurrent. (15,17,25) had 2 conditons. Take 25 as an example, it had obesity/underweight where underweight was so rare. So max 1 is still good.
|
|
297
|
-
#### non-recurrent CHCs are 11, 15, 17, 19, 25, 29. ==I think making it maximum 1 is not good always, becuase these are grouped conditions. For example, chc=10 contains 3 non-recurrent events, so one person could have each of these once, making it maximum 3 in this person for chc=10.
|
|
298
|
-
ncoltmp=75 ## from 20 to 94
|
|
299
|
-
if(chc_num %in% c(11, 15, 17, 19, 25, 29)){
|
|
300
|
-
for_web_BCCT2=apply(for_web_BCCT,c(1,2),function(x) min(x,1))
|
|
301
|
-
for_web_BCCT=as.data.frame(for_web_BCCT2)
|
|
302
|
-
colnames(for_web_BCCT)=colnames(person_burden[1:ncoltmp])
|
|
303
|
-
}
|
|
304
|
-
#For example, chc=10 contains 3 non-recurrent events, so one person could have each of these once, making it maximum 3 in this person for chc=10.
|
|
305
|
-
if(chc_num %in% c(10)){
|
|
306
|
-
for_web_BCCT2=apply(for_web_BCCT,c(1,2),function(x) min(x,3))
|
|
307
|
-
for_web_BCCT=as.data.frame(for_web_BCCT2)
|
|
308
|
-
colnames(for_web_BCCT)=colnames(person_burden[1:ncoltmp])
|
|
309
|
-
}
|
|
310
|
-
##### if female condition 6, then it is 0 for males.
|
|
311
|
-
if(chc_num %in% c(6) & sexval==1){
|
|
312
|
-
|
|
313
|
-
for_web_BCCT=as.data.frame(for_web_BCCT2)
|
|
314
|
-
colnames(for_web_BCCT)=colnames(person_burden[1:75])
|
|
315
|
-
}
|
|
316
|
-
##### if male condition 7, then it is 0 for females.d
|
|
317
|
-
if(chc_num %in% c(7) & sexval==0){
|
|
318
|
-
|
|
319
|
-
for_web_BCCT=as.data.frame(for_web_BCCT2)
|
|
320
|
-
colnames(for_web_BCCT)=colnames(person_burden[1:ncoltmp])
|
|
233
|
+
get_estimate <- function(chc_num) { #### Edgar, you may make this in separate runs to save time.
|
|
234
|
+
# print(chc_num)
|
|
235
|
+
newdata_chc_sampled=newdata_chc_sampled1
|
|
236
|
+
|
|
237
|
+
# linear predictor
|
|
238
|
+
newdata_chc_sampled$exp_lp = predict(cphfits2[[chc_num]], newdata = data.frame(newdata_chc_sampled,primary=pr),type='risk',reference="zero")
|
|
239
|
+
|
|
240
|
+
# Baseline nelson-aalan est
|
|
241
|
+
# https://stats.stackexchange.com/questions/46532/cox-baseline-hazard
|
|
242
|
+
j=chc_num
|
|
243
|
+
base = basehaz(cphfits2[[chc_num]],centered = F) # this is a cumulative hazard, so need to convert it into non-cumulative version
|
|
244
|
+
#centered, if TRUE return data from a predicted survival curve at the mean values of the covariates fit$mean, if FALSE return a prediction for all covariates equal to zero.
|
|
245
|
+
#request the hazard for that covariate combination from the survfit() function that is called by basehaz(). https://stats.stackexchange.com/questions/565210/about-getting-baseline-survival-probability-for-a-piecewise-cox-model-with-inter
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
### Max time in the data is 70.42. We need to estimate up to 90.
|
|
249
|
+
#Yutaka: I think we should smooth the cumulative hazard and then take the derivative to get the hazard.
|
|
250
|
+
#One thread I found on Web is: "As an approximation you can smooth the fitted baseline cumulative hazard (e.g. by package pspline) and ask for its derivative." Can you try using smooth.spline and smooth the cumulative hazard and then get the derivative? https://cran.r-project.org/web/packages/pspline/pspline.pdf
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
#### Qi added: base is for different DX. Now we run within each pr, so neeed cumulaive hazrd for that pr only
|
|
254
|
+
base=base[base$strata==paste("primary=",pr,sep=""),] #cumulative hazard
|
|
255
|
+
base=base[base$time<=agecut,] ### shouldn't we use the same age cutoff as the survival function splines? Yes, do so.
|
|
256
|
+
|
|
257
|
+
##### study the smooth parameter. I think spar=1 is the best one to use (most smoothest)
|
|
258
|
+
cumHspline=smooth.spline(base$time,base$hazard,spar=1)
|
|
259
|
+
predcumhz=predict(cumHspline,seq(0,95,1)) ### predicted cumulative hazard
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
##### In order to use the above way to get dN0, do Daisuke's original way using cumhz difference. But the difference is that: we fit cumhz with smooth.spline and can extend it to 90 years old.
|
|
263
|
+
base=data.frame(time=predcumhz$x,hazard=predcumhz$y) ##Daisuke used the cumHz, here we smoothed it and then use it.
|
|
264
|
+
#### fitted values had <0 values in age 0-8 or so. change to 0 cumulative hazard.
|
|
265
|
+
base$hazard[base$hazard<0]=0
|
|
266
|
+
base2 = base %>%
|
|
267
|
+
mutate(hazard2 = hazard - c(0,hazard[-length(hazard)])) %>%
|
|
268
|
+
ungroup() %>% as.data.frame()
|
|
269
|
+
|
|
270
|
+
base2 = base2 %>%
|
|
271
|
+
mutate(time_cat = cut(time,breaks=seq(0,95,1),right = FALSE, include.lowest = TRUE)) %>%
|
|
272
|
+
ungroup()
|
|
273
|
+
|
|
274
|
+
base3 = base2 %>%
|
|
275
|
+
group_by(time_cat) %>%
|
|
276
|
+
dplyr::summarize(dN0 = sum(hazard2)) %>%
|
|
277
|
+
filter(!is.na(time_cat))
|
|
278
|
+
|
|
279
|
+
###############
|
|
280
|
+
# BCCT
|
|
281
|
+
###############
|
|
282
|
+
newdata_chc_sampled$time_cat = cut(newdata_chc_sampled$t.startage,breaks=seq(0,95,1),right = FALSE, include.lowest = TRUE)
|
|
283
|
+
|
|
284
|
+
#newdata_chc_sampled$time_cat = cut(newdata_chc_sampled$t.startage,breaks=seq(0,90,5),right = FALSE, include.lowest = TRUE) this won't work, because the input donors file had "t.startage" up to 55 only
|
|
285
|
+
|
|
286
|
+
newdata_chc_sampled = newdata_chc_sampled %>%
|
|
287
|
+
left_join(base3,by="time_cat")
|
|
288
|
+
newdata_chc_sampled$dN0 = ifelse(is.na(newdata_chc_sampled$dN0),0,newdata_chc_sampled$dN0)
|
|
289
|
+
|
|
290
|
+
BCCT = newdata_chc_sampled %>%
|
|
291
|
+
group_by(mrn) %>%
|
|
292
|
+
mutate(BCCT_tmp = exp_lp*survprob*dN0) %>%
|
|
293
|
+
mutate(BCCT = cumsum(BCCT_tmp)) %>%
|
|
294
|
+
filter(t.startage>=20) %>%
|
|
295
|
+
ungroup() %>%
|
|
296
|
+
as.data.frame()
|
|
297
|
+
|
|
298
|
+
for_web_BCCT = as.data.frame(tidyr::pivot_wider(BCCT,id_cols = mrn, names_from=time_cat,values_from=BCCT))
|
|
299
|
+
for_web_BCCT =for_web_BCCT[,-1]
|
|
300
|
+
|
|
301
|
+
#### for non-recurrent ones, maximum burden is 1 if the grouped conditions had only 1 condition. (11, 19,29) had only 1 conditons non-recurrent. (15,17,25) had 2 conditons. Take 25 as an example, it had obesity/underweight where underweight was so rare. So max 1 is still good.
|
|
302
|
+
#### non-recurrent CHCs are 11, 15, 17, 19, 25, 29. ==I think making it maximum 1 is not good always, becuase these are grouped conditions. For example, chc=10 contains 3 non-recurrent events, so one person could have each of these once, making it maximum 3 in this person for chc=10.
|
|
303
|
+
ncoltmp=75 ## from 20 to 94
|
|
304
|
+
if(chc_num %in% c(11, 15, 17, 19, 25, 29)){
|
|
305
|
+
for_web_BCCT2=apply(for_web_BCCT,c(1,2),function(x) min(x,1))
|
|
306
|
+
for_web_BCCT=as.data.frame(for_web_BCCT2)
|
|
307
|
+
colnames(for_web_BCCT)=colnames(person_burden[1:ncoltmp])
|
|
308
|
+
}
|
|
309
|
+
#For example, chc=10 contains 3 non-recurrent events, so one person could have each of these once, making it maximum 3 in this person for chc=10.
|
|
310
|
+
if(chc_num %in% c(10)){
|
|
311
|
+
for_web_BCCT2=apply(for_web_BCCT,c(1,2),function(x) min(x,3))
|
|
312
|
+
for_web_BCCT=as.data.frame(for_web_BCCT2)
|
|
313
|
+
colnames(for_web_BCCT)=colnames(person_burden[1:ncoltmp])
|
|
314
|
+
}
|
|
315
|
+
##### if female condition 6, then it is 0 for males.
|
|
316
|
+
if(chc_num %in% c(6) & sexval==1){
|
|
317
|
+
for_web_BCCT2=matrix(0,nrow=1,ncol=ncoltmp)
|
|
318
|
+
for_web_BCCT=as.data.frame(for_web_BCCT2)
|
|
319
|
+
colnames(for_web_BCCT)=colnames(person_burden[1:75])
|
|
320
|
+
}
|
|
321
|
+
##### if male condition 7, then it is 0 for females.d
|
|
322
|
+
if(chc_num %in% c(7) & sexval==0){
|
|
323
|
+
for_web_BCCT2=matrix(0,nrow=1,ncol=ncoltmp)
|
|
324
|
+
for_web_BCCT=as.data.frame(for_web_BCCT2)
|
|
325
|
+
colnames(for_web_BCCT)=colnames(person_burden[1:ncoltmp])
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
for_web_BCCT$chc=chc_num
|
|
329
|
+
|
|
330
|
+
return(for_web_BCCT)
|
|
321
331
|
}
|
|
322
332
|
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
person_burden=rbind(person_burden,
|
|
326
|
-
|
|
327
|
-
|
|
333
|
+
# this serial loop works
|
|
334
|
+
# for(chc_num in chc_nums) {
|
|
335
|
+
# person_burden=rbind(person_burden, get_estimate(chc_num))
|
|
336
|
+
# }
|
|
337
|
+
|
|
338
|
+
# get estimates
|
|
339
|
+
# parallelize across chc_nums
|
|
340
|
+
results <- mclapply(X = chc_nums, FUN = get_estimate, mc.cores = cores)
|
|
341
|
+
|
|
342
|
+
# combine rows into person_burden data frame
|
|
343
|
+
for (n in 1:length(results)) {
|
|
344
|
+
row <- results[[n]]
|
|
345
|
+
if (!identical(names(row), names(results[[1]]))) {
|
|
346
|
+
# some rows may have empty column names because they
|
|
347
|
+
# used the columns names from the person_burden table, which
|
|
348
|
+
# is NULL when get_estimate() is run in parallel (see the
|
|
349
|
+
# if() statements in get_estimate())
|
|
350
|
+
# in this situation, use the column names from the first row
|
|
351
|
+
names(row) <- names(results[[1]])
|
|
352
|
+
}
|
|
353
|
+
person_burden <- rbind(person_burden, row)
|
|
354
|
+
}
|
|
328
355
|
|
|
329
356
|
# person_burden[,30:31]
|
|
330
357
|
# sum(person_burden[,31]) ## total burden at 50 years old. 8.971574 for this example.
|
package/utils/fastclust.R
CHANGED
|
@@ -49,6 +49,13 @@ if (length(input$valueIsTransformed) == 0 || input$valueIsTransformed == FALSE)
|
|
|
49
49
|
normalized_matrix <- input$matrix
|
|
50
50
|
}
|
|
51
51
|
|
|
52
|
+
rownames(normalized_matrix) <- input$row_names
|
|
53
|
+
colnames(normalized_matrix) <- input$col_names
|
|
54
|
+
normalized_matrix <- na.omit(normalized_matrix) # Removes rows with NA values
|
|
55
|
+
|
|
56
|
+
#print ("normalized_matrix")
|
|
57
|
+
#print (dim(normalized_matrix))
|
|
58
|
+
|
|
52
59
|
# For columns (i.e samples)
|
|
53
60
|
RowDist <- dist(normalized_matrix, method = "euclidean") # Transposing the matrix
|
|
54
61
|
|
|
@@ -103,14 +110,16 @@ print ("Done")
|
|
|
103
110
|
# Sorting the matrix
|
|
104
111
|
|
|
105
112
|
SortedMatrix <- normalized_matrix[RowDend$order, ColumnDend$order]
|
|
106
|
-
SortedRowNames <-
|
|
107
|
-
SortedColumnNames <-
|
|
108
|
-
|
|
109
|
-
m <- matrix(SortedMatrix,length(SortedRowNames),length(SortedColumnNames))
|
|
110
|
-
colnames(m) <- SortedColumnNames
|
|
111
|
-
rownames(m) <- SortedRowNames
|
|
112
|
-
cat("
|
|
113
|
-
cat("
|
|
113
|
+
SortedRowNames <- rownames(normalized_matrix)[RowDend$order]
|
|
114
|
+
SortedColumnNames <- colnames(normalized_matrix)[ColumnDend$order]
|
|
115
|
+
|
|
116
|
+
#m <- matrix(SortedMatrix,length(SortedRowNames),length(SortedColumnNames))
|
|
117
|
+
#colnames(m) <- SortedColumnNames
|
|
118
|
+
#rownames(m) <- SortedRowNames
|
|
119
|
+
cat("rowindexes",RowDend$order,"\n",sep="\t") # Prints out row indices
|
|
120
|
+
cat("colindexes",ColumnDend$order,"\n",sep="\t") # Prints out column indicies
|
|
121
|
+
cat("rownames",SortedRowNames,"\n",sep="\t") # Prints out row names
|
|
122
|
+
cat("colnames",SortedColumnNames,"\n",sep="\t") # Prints out column names
|
|
114
123
|
cat ("OutputMatrix",normalized_matrix,"\n",sep="\t") # This outputs the 2D array in 1D column-wise. This is later converted to 2D array in nodejs.
|
|
115
124
|
|
|
116
125
|
|