@sjcrh/proteinpaint-server 2.44.0 → 2.46.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dataset/clinvar.hg19.js +53 -52
- package/dataset/clinvar.hg38.js +74 -73
- package/dataset/clinvar.js +164 -47
- package/dataset/termdb.test.js +257 -0
- package/genome/CriGri.js +1859 -27
- package/genome/cgc.js +743 -7
- package/genome/danRer10.js +1108 -46
- package/genome/dm3.js +71 -44
- package/genome/dm6.js +1926 -45
- package/genome/galGal5.js +23522 -46
- package/genome/galGal6.js +512 -46
- package/genome/hg19.js +293 -198
- package/genome/hg38.js +472 -105
- package/genome/hg38.test.js +406 -40
- package/genome/hgvirus.js +45 -20
- package/genome/mm10.js +135 -67
- package/genome/mm9.js +116 -79
- package/genome/rn6.js +1002 -47
- package/package.json +31 -35
- package/routes/_template_.js +30 -0
- package/routes/burden.js +149 -0
- package/routes/dataset.js +266 -0
- package/routes/dsdata.js +127 -0
- package/routes/gdc.maf.js +120 -0
- package/routes/gdc.mafBuild.js +106 -0
- package/routes/gdc.topMutatedGenes.js +465 -0
- package/routes/gene2canonicalisoform.js +41 -0
- package/routes/genelookup.js +52 -0
- package/routes/genomes.js +144 -0
- package/routes/healthcheck.js +30 -0
- package/routes/hicdata.js +98 -0
- package/routes/hicstat.js +55 -0
- package/routes/isoformlst.js +57 -0
- package/routes/ntseq.js +43 -0
- package/routes/pdomain.js +61 -0
- package/routes/snp.js +107 -0
- package/routes/termdb.categories.js +209 -0
- package/routes/termdb.cluster.js +228 -0
- package/routes/termdb.cohort.summary.js +38 -0
- package/routes/termdb.cohorts.js +49 -0
- package/routes/termdb.config.js +201 -0
- package/routes/termdb.getdescrstats.js +102 -0
- package/routes/termdb.getnumericcategories.js +92 -0
- package/routes/termdb.getpercentile.js +108 -0
- package/routes/termdb.getrootterm.js +65 -0
- package/routes/termdb.gettermchildren.js +67 -0
- package/routes/termdb.singleSampleMutation.js +80 -0
- package/routes/termdb.singlecellData.js +46 -0
- package/routes/termdb.singlecellSamples.js +160 -0
- package/routes/termdb.termsbyids.js +59 -0
- package/routes/termdb.topVariablyExpressedGenes.js +171 -0
- package/routes/termdb.violin.js +77 -0
- package/src/app.js +41498 -0
- package/src/serverconfig.js +14 -8
- package/start.js +3 -3
- package/routes/README.md +0 -84
- package/routes/burden.ts +0 -143
- package/routes/gdc.maf.ts +0 -195
- package/routes/gdc.mafBuild.ts +0 -114
- package/routes/gdc.topMutatedGenes.ts +0 -586
- package/routes/genelookup.ts +0 -50
- package/routes/healthcheck.ts +0 -29
- package/routes/hicdata.ts +0 -111
- package/routes/hicstat.ts +0 -55
- package/routes/termdb.categories.ts +0 -245
- package/routes/termdb.cluster.ts +0 -248
- package/routes/termdb.getdescrstats.ts +0 -102
- package/routes/termdb.getnumericcategories.ts +0 -99
- package/routes/termdb.getpercentile.ts +0 -118
- package/routes/termdb.getrootterm.ts +0 -73
- package/routes/termdb.gettermchildren.ts +0 -82
- package/routes/termdb.singleSampleMutation.ts +0 -87
- package/routes/termdb.singlecellData.ts +0 -49
- package/routes/termdb.singlecellSamples.ts +0 -175
- package/routes/termdb.termsbyids.ts +0 -63
- package/routes/termdb.topVariablyExpressedGenes.ts +0 -214
- package/routes/termdb.violin.ts +0 -77
- package/server.js +0 -2
- package/server.js.map +0 -1
- package/shared/common.js +0 -1080
- package/shared/termdb.initbinconfig.js +0 -96
- package/shared/vcf.js +0 -629
package/routes/hicdata.ts
DELETED
|
@@ -1,111 +0,0 @@
|
|
|
1
|
-
import { HicdataRequest, HicdataResponse, Item } from '#shared/types/routes/hicdata.ts'
|
|
2
|
-
import { fileurl } from '#src/utils.js'
|
|
3
|
-
import { spawn } from 'child_process'
|
|
4
|
-
import readline from 'readline'
|
|
5
|
-
import serverconfig from '#src/serverconfig.js'
|
|
6
|
-
|
|
7
|
-
export const api: any = {
|
|
8
|
-
endpoint: 'hicdata',
|
|
9
|
-
methods: {
|
|
10
|
-
get: {
|
|
11
|
-
init,
|
|
12
|
-
request: {
|
|
13
|
-
typeId: 'HicdataRequest'
|
|
14
|
-
},
|
|
15
|
-
response: {
|
|
16
|
-
typeId: 'HicdataResponse'
|
|
17
|
-
}
|
|
18
|
-
/*
|
|
19
|
-
examples: [
|
|
20
|
-
{
|
|
21
|
-
request: {
|
|
22
|
-
body: {
|
|
23
|
-
genome: 'hg38-test',
|
|
24
|
-
dslabel: 'TermdbTest',
|
|
25
|
-
embedder: 'localhost',
|
|
26
|
-
gettermbyid: 'subcohort'
|
|
27
|
-
}
|
|
28
|
-
},
|
|
29
|
-
response: {
|
|
30
|
-
header: { status: 200 }
|
|
31
|
-
}
|
|
32
|
-
}
|
|
33
|
-
]
|
|
34
|
-
*/
|
|
35
|
-
},
|
|
36
|
-
post: {
|
|
37
|
-
alternativeFor: 'get',
|
|
38
|
-
init
|
|
39
|
-
}
|
|
40
|
-
}
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
function init() {
|
|
44
|
-
return async (req: any, res: any): Promise<void> => {
|
|
45
|
-
try {
|
|
46
|
-
const payload = await handle_hicdata(req.query as HicdataRequest)
|
|
47
|
-
res.send(payload)
|
|
48
|
-
} catch (e) {
|
|
49
|
-
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
|
|
50
|
-
// @ts-ignore
|
|
51
|
-
res.send({ error: e?.message || e })
|
|
52
|
-
if (e instanceof Error && e.stack) console.log(e)
|
|
53
|
-
}
|
|
54
|
-
}
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
function handle_hicdata(q: HicdataRequest) {
|
|
58
|
-
return new Promise((resolve, reject) => {
|
|
59
|
-
const [e, file, isurl] = fileurl({ query: q })
|
|
60
|
-
if (e) reject({ error: 'illegal file name' })
|
|
61
|
-
|
|
62
|
-
/*Value passed from client is not the proper straw parameter.
|
|
63
|
-
Must convert to straw parameter and apply the corresponding maths to the result.
|
|
64
|
-
Use 'observed' as default if not provided.
|
|
65
|
-
*/
|
|
66
|
-
const matrixType = q.matrixType == 'log(oe)' ? 'oe' : q.matrixType ? q.matrixType : 'observed'
|
|
67
|
-
|
|
68
|
-
const par = [matrixType, q.nmeth || 'NONE', file, q.pos1, q.pos2, q.isfrag ? 'FRAG' : 'BP', q.resolution]
|
|
69
|
-
|
|
70
|
-
const ps = spawn(serverconfig.hicstraw, par)
|
|
71
|
-
const rl = readline.createInterface({ input: ps.stdout })
|
|
72
|
-
|
|
73
|
-
const items = [] as Item[]
|
|
74
|
-
const erroutput = [] as string[]
|
|
75
|
-
let linenot3fields = 0
|
|
76
|
-
let fieldnotnumerical = 0
|
|
77
|
-
|
|
78
|
-
rl.on('line', line => {
|
|
79
|
-
// straw output: pos1 \t pos2 \t value
|
|
80
|
-
const l = line.split('\t')
|
|
81
|
-
if (l.length != 3) {
|
|
82
|
-
linenot3fields++
|
|
83
|
-
return
|
|
84
|
-
}
|
|
85
|
-
const n1 = Number.parseInt(l[0])
|
|
86
|
-
const n2 = Number.parseInt(l[1])
|
|
87
|
-
const v = q.matrixType == 'log(oe)' ? Math.log(Number.parseFloat(l[2])) : Number.parseFloat(l[2])
|
|
88
|
-
if (Number.isNaN(n1) || Number.isNaN(n2) || Number.isNaN(v)) {
|
|
89
|
-
fieldnotnumerical++
|
|
90
|
-
return
|
|
91
|
-
}
|
|
92
|
-
if (q.mincutoff != undefined && v <= q.mincutoff) {
|
|
93
|
-
return
|
|
94
|
-
}
|
|
95
|
-
items.push([n1, n2, v] as Item)
|
|
96
|
-
})
|
|
97
|
-
|
|
98
|
-
ps.stderr.on('data', i => erroutput.push(i))
|
|
99
|
-
ps.on('close', () => {
|
|
100
|
-
const err = erroutput.join('')
|
|
101
|
-
if (err) reject({ error: err })
|
|
102
|
-
|
|
103
|
-
if (linenot3fields) reject({ error: linenot3fields + ' lines have other than 3 fields' })
|
|
104
|
-
|
|
105
|
-
if (fieldnotnumerical)
|
|
106
|
-
reject({ error: fieldnotnumerical + ' lines have non-numerical values in any of the 3 fields' })
|
|
107
|
-
|
|
108
|
-
resolve({ items })
|
|
109
|
-
})
|
|
110
|
-
})
|
|
111
|
-
}
|
package/routes/hicstat.ts
DELETED
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
import { fileurl, file_is_readable } from '#src/utils.js'
|
|
2
|
-
import { do_hicstat } from '#src/hicstat.ts'
|
|
3
|
-
import { HicstatRequestWithValidation } from '#shared/types/routes/hicstat.ts'
|
|
4
|
-
|
|
5
|
-
export const api = {
|
|
6
|
-
endpoint: 'hicstat',
|
|
7
|
-
methods: {
|
|
8
|
-
get: {
|
|
9
|
-
init,
|
|
10
|
-
request: {
|
|
11
|
-
typeId: 'HicstatRequest'
|
|
12
|
-
},
|
|
13
|
-
response: {
|
|
14
|
-
typeId: 'HicstatResponse'
|
|
15
|
-
},
|
|
16
|
-
examples: [
|
|
17
|
-
{
|
|
18
|
-
request: {
|
|
19
|
-
body: {
|
|
20
|
-
genome: 'hg19',
|
|
21
|
-
file: 'proteinpaint_demo/hg19/hic/hic_demo.hic',
|
|
22
|
-
embedder: 'localhost'
|
|
23
|
-
}
|
|
24
|
-
},
|
|
25
|
-
response: {
|
|
26
|
-
header: { status: 200 }
|
|
27
|
-
}
|
|
28
|
-
}
|
|
29
|
-
]
|
|
30
|
-
},
|
|
31
|
-
post: {
|
|
32
|
-
alternativeFor: 'get',
|
|
33
|
-
init
|
|
34
|
-
}
|
|
35
|
-
}
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
function init() {
|
|
39
|
-
return async (req: HicstatRequestWithValidation, res: any): Promise<void> => {
|
|
40
|
-
try {
|
|
41
|
-
const [e, file, isurl] = fileurl(req)
|
|
42
|
-
if (e) throw 'illegal file name'
|
|
43
|
-
if (!isurl) {
|
|
44
|
-
await file_is_readable(file)
|
|
45
|
-
}
|
|
46
|
-
const out = await do_hicstat(file, isurl)
|
|
47
|
-
res.send({ out })
|
|
48
|
-
} catch (e: any) {
|
|
49
|
-
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
|
|
50
|
-
// @ts-ignore
|
|
51
|
-
res.send({ error: e?.message || e })
|
|
52
|
-
if (e instanceof Error && e.stack) console.log(e)
|
|
53
|
-
}
|
|
54
|
-
}
|
|
55
|
-
}
|
|
@@ -1,245 +0,0 @@
|
|
|
1
|
-
import { getcategoriesRequest, getcategoriesResponse } from '#shared/types/routes/termdb.categories.ts'
|
|
2
|
-
import { getOrderedLabels } from '#src/termdb.barchart.js'
|
|
3
|
-
import { getData } from '#src/termdb.matrix.js'
|
|
4
|
-
|
|
5
|
-
export const api: any = {
|
|
6
|
-
endpoint: 'termdb/categories',
|
|
7
|
-
methods: {
|
|
8
|
-
get: {
|
|
9
|
-
init,
|
|
10
|
-
request: {
|
|
11
|
-
typeId: 'getcategoriesRequest'
|
|
12
|
-
},
|
|
13
|
-
response: {
|
|
14
|
-
typeId: 'getcategoriesResponse'
|
|
15
|
-
},
|
|
16
|
-
examples: [
|
|
17
|
-
{
|
|
18
|
-
request: {
|
|
19
|
-
body: {
|
|
20
|
-
genome: 'hg38-test',
|
|
21
|
-
dslabel: 'TermdbTest',
|
|
22
|
-
embedder: 'localhost',
|
|
23
|
-
getcategories: 1,
|
|
24
|
-
tid: 'diaggrp',
|
|
25
|
-
term1_q: {
|
|
26
|
-
isAtomic: true,
|
|
27
|
-
hiddenValues: {},
|
|
28
|
-
type: 'values',
|
|
29
|
-
groupsetting: { disabled: true },
|
|
30
|
-
mode: 'discrete'
|
|
31
|
-
},
|
|
32
|
-
filter: {
|
|
33
|
-
type: 'tvslst',
|
|
34
|
-
in: true,
|
|
35
|
-
join: '',
|
|
36
|
-
lst: [
|
|
37
|
-
{
|
|
38
|
-
tag: 'cohortFilter',
|
|
39
|
-
type: 'tvs',
|
|
40
|
-
tvs: {
|
|
41
|
-
term: {
|
|
42
|
-
name: 'Cohort',
|
|
43
|
-
type: 'categorical',
|
|
44
|
-
values: { ABC: { label: 'ABC' }, XYZ: { label: 'XYZ' } },
|
|
45
|
-
id: 'subcohort',
|
|
46
|
-
isleaf: false,
|
|
47
|
-
groupsetting: { disabled: true }
|
|
48
|
-
},
|
|
49
|
-
values: [{ key: 'ABC', label: 'ABC' }]
|
|
50
|
-
}
|
|
51
|
-
}
|
|
52
|
-
]
|
|
53
|
-
}
|
|
54
|
-
}
|
|
55
|
-
},
|
|
56
|
-
response: {
|
|
57
|
-
header: { status: 200 }
|
|
58
|
-
}
|
|
59
|
-
}
|
|
60
|
-
]
|
|
61
|
-
},
|
|
62
|
-
post: {
|
|
63
|
-
alternativeFor: 'get',
|
|
64
|
-
init
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
function init({ genomes }) {
|
|
70
|
-
return async (req: any, res: any): Promise<void> => {
|
|
71
|
-
const q = req.query as getcategoriesRequest
|
|
72
|
-
try {
|
|
73
|
-
const g = genomes[req.query.genome]
|
|
74
|
-
if (!g) throw 'invalid genome name'
|
|
75
|
-
const ds = g.datasets[req.query.dslabel]
|
|
76
|
-
if (!ds) throw 'invalid dataset name'
|
|
77
|
-
const tdb = ds.cohort.termdb
|
|
78
|
-
if (!tdb) throw 'invalid termdb object'
|
|
79
|
-
|
|
80
|
-
await trigger_getcategories(q, res, tdb, ds, g) // as getcategoriesResponse
|
|
81
|
-
} catch (e) {
|
|
82
|
-
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
|
|
83
|
-
// @ts-ignore
|
|
84
|
-
res.send({ error: e?.message || e })
|
|
85
|
-
if (e instanceof Error && e.stack) console.log(e)
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
async function trigger_getcategories(
|
|
91
|
-
q: { tid: string | number; type: string; filter: any; term1_q: any; currentGeneNames?: string[]; rglst?: any },
|
|
92
|
-
res: { send: (arg0: { lst: any[]; orderedLabels?: any }) => void },
|
|
93
|
-
tdb: { q: { termjsonByOneid: (arg0: any) => any } },
|
|
94
|
-
ds: { assayAvailability: { byDt: { [s: string]: any } | ArrayLike<any> } },
|
|
95
|
-
genome: any
|
|
96
|
-
) {
|
|
97
|
-
// thin wrapper of get_summary
|
|
98
|
-
// works for all types of terms
|
|
99
|
-
if (!q.tid) throw '.tid missing'
|
|
100
|
-
const term =
|
|
101
|
-
q.type == 'geneVariant' ? { name: q.tid, type: 'geneVariant', isleaf: true } : tdb.q.termjsonByOneid(q.tid)
|
|
102
|
-
|
|
103
|
-
const arg = {
|
|
104
|
-
filter: q.filter,
|
|
105
|
-
terms:
|
|
106
|
-
q.type == 'geneVariant'
|
|
107
|
-
? [{ term: term, q: { isAtomic: true } }]
|
|
108
|
-
: [{ id: q.tid, term, q: q.term1_q || getDefaultQ(term, q) }],
|
|
109
|
-
currentGeneNames: q.currentGeneNames, // optional, from mds3 mayAddGetCategoryArgs()
|
|
110
|
-
rglst: q.rglst // optional, from mds3 mayAddGetCategoryArgs()
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
const data = await getData(arg, ds, genome)
|
|
114
|
-
if (data.error) throw data.error
|
|
115
|
-
|
|
116
|
-
const lst = [] as any[]
|
|
117
|
-
if (q.type == 'geneVariant') {
|
|
118
|
-
const samples = data.samples as { [sampleId: string]: any }
|
|
119
|
-
const dtClassMap = new Map()
|
|
120
|
-
if (ds.assayAvailability?.byDt) {
|
|
121
|
-
for (const [dtType, dtValue] of Object.entries(ds.assayAvailability.byDt)) {
|
|
122
|
-
if (dtValue.byOrigin) {
|
|
123
|
-
dtClassMap.set(parseInt(dtType), { byOrigin: { germline: {}, somatic: {} } })
|
|
124
|
-
}
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
const sampleCountedFor = new Set() // if the sample is counted
|
|
128
|
-
for (const [sampleId, sampleData] of Object.entries(samples)) {
|
|
129
|
-
const values = sampleData[q.tid].values
|
|
130
|
-
sampleCountedFor.clear()
|
|
131
|
-
/* values here is an array of result entires, one or more entries for each dt. e.g.
|
|
132
|
-
[
|
|
133
|
-
{ dt: 1, class: 'Blank', _SAMPLEID_: 1, origin: 'germline' },
|
|
134
|
-
{ dt: 1, class: 'WT', _SAMPLEID_: 1, origin: 'somatic' },
|
|
135
|
-
{ dt: 2, class: 'Blank', _SAMPLEID_: 1 },
|
|
136
|
-
{ dt: 4, class: 'WT', _SAMPLEID_: 1 }
|
|
137
|
-
]
|
|
138
|
-
*/
|
|
139
|
-
for (const value of values) {
|
|
140
|
-
if (!dtClassMap.has(value.dt)) {
|
|
141
|
-
dtClassMap.set(value.dt, {})
|
|
142
|
-
}
|
|
143
|
-
const dtClasses = dtClassMap.get(value.dt)
|
|
144
|
-
if (dtClasses.byOrigin) {
|
|
145
|
-
if (!dtClasses.byOrigin[value.origin][value.class]) {
|
|
146
|
-
dtClasses.byOrigin[value.origin][value.class] = 1
|
|
147
|
-
sampleCountedFor.add(`${value.dt} ${value.origin} ${value.class}`)
|
|
148
|
-
}
|
|
149
|
-
if (!sampleCountedFor.has(`${value.dt} ${value.origin} ${value.class}`)) {
|
|
150
|
-
sampleCountedFor.add(`${value.dt} ${value.origin} ${value.class}`)
|
|
151
|
-
dtClasses.byOrigin[value.origin][value.class] += 1
|
|
152
|
-
}
|
|
153
|
-
} else {
|
|
154
|
-
if (!dtClasses[value.class]) {
|
|
155
|
-
sampleCountedFor.add(`${value.dt} ${value.class}`)
|
|
156
|
-
dtClasses[value.class] = 1
|
|
157
|
-
}
|
|
158
|
-
if (!sampleCountedFor.has(`${value.dt} ${value.class}`)) {
|
|
159
|
-
sampleCountedFor.add(`${value.dt} ${value.class}`)
|
|
160
|
-
dtClasses[value.class] += 1
|
|
161
|
-
}
|
|
162
|
-
}
|
|
163
|
-
}
|
|
164
|
-
}
|
|
165
|
-
for (const [dt, classes] of dtClassMap) {
|
|
166
|
-
lst.push({
|
|
167
|
-
dt,
|
|
168
|
-
classes
|
|
169
|
-
})
|
|
170
|
-
}
|
|
171
|
-
} else {
|
|
172
|
-
const key2count = new Map()
|
|
173
|
-
// k: category key
|
|
174
|
-
// v: number of samples
|
|
175
|
-
for (const sid in data.samples) {
|
|
176
|
-
const v = data.samples[sid][q.tid]
|
|
177
|
-
if (!v) continue
|
|
178
|
-
if (!('key' in v)) continue
|
|
179
|
-
key2count.set(v.key, 1 + (key2count.get(v.key) || 0))
|
|
180
|
-
}
|
|
181
|
-
for (const [key, count] of key2count) {
|
|
182
|
-
lst.push({
|
|
183
|
-
samplecount: count,
|
|
184
|
-
key,
|
|
185
|
-
label:
|
|
186
|
-
data.refs?.byTermId?.[q.tid]?.events?.find((e: { event: any }) => e.event === key).label ||
|
|
187
|
-
term?.values?.[key]?.label ||
|
|
188
|
-
key
|
|
189
|
-
})
|
|
190
|
-
}
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
const orderedLabels = getOrderedLabels(
|
|
194
|
-
term,
|
|
195
|
-
data.refs?.byTermId?.[q.tid]?.bins || [],
|
|
196
|
-
data.refs?.byTermId?.[q.tid]?.events,
|
|
197
|
-
q.term1_q
|
|
198
|
-
)
|
|
199
|
-
if (orderedLabels.length) {
|
|
200
|
-
lst.sort((a, b) => orderedLabels.indexOf(a.label) - orderedLabels.indexOf(b.label))
|
|
201
|
-
}
|
|
202
|
-
res.send({
|
|
203
|
-
lst,
|
|
204
|
-
orderedLabels
|
|
205
|
-
} as getcategoriesResponse)
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
function getDefaultQ(
|
|
209
|
-
term: { type: string; bins: { default: any } },
|
|
210
|
-
q: {
|
|
211
|
-
mode?: any
|
|
212
|
-
breaks?: any
|
|
213
|
-
bar_by_grade?: any
|
|
214
|
-
bar_by_children?: any
|
|
215
|
-
value_by_max_grade?: any
|
|
216
|
-
value_by_most_recent?: any
|
|
217
|
-
value_by_computable_grade?: any
|
|
218
|
-
tid?: string | number
|
|
219
|
-
type?: string
|
|
220
|
-
filter?: any
|
|
221
|
-
term1_q?: any
|
|
222
|
-
currentGeneNames?: any
|
|
223
|
-
}
|
|
224
|
-
) {
|
|
225
|
-
if (term.type == 'categorical') return {}
|
|
226
|
-
if (term.type == 'survival') return {}
|
|
227
|
-
if (term.type == 'integer' || term.type == 'float') return term.bins.default
|
|
228
|
-
if (term.type == 'condition') {
|
|
229
|
-
return {
|
|
230
|
-
mode: q.mode,
|
|
231
|
-
breaks: q.breaks,
|
|
232
|
-
bar_by_grade: q.bar_by_grade,
|
|
233
|
-
/*Leave this here until bug with term1_q not passing to getCategories is figured out.
|
|
234
|
-
Commented out b/c tvs condition tests fail.*/
|
|
235
|
-
//bar_by_children: term.subconditions || q.bar_by_children,
|
|
236
|
-
bar_by_children: q.bar_by_children,
|
|
237
|
-
value_by_max_grade: q.value_by_max_grade,
|
|
238
|
-
value_by_most_recent: q.value_by_most_recent,
|
|
239
|
-
//value_by_computable_grade: term.subconditions || q.value_by_computable_grade
|
|
240
|
-
value_by_computable_grade: q.value_by_computable_grade
|
|
241
|
-
}
|
|
242
|
-
}
|
|
243
|
-
if (term.type == 'geneVariant') return {}
|
|
244
|
-
throw 'unknown term type'
|
|
245
|
-
}
|
package/routes/termdb.cluster.ts
DELETED
|
@@ -1,248 +0,0 @@
|
|
|
1
|
-
import path from 'path'
|
|
2
|
-
import fs from 'fs'
|
|
3
|
-
import lines2R from '#src/lines2R.js'
|
|
4
|
-
import {
|
|
5
|
-
TermdbClusterRequest,
|
|
6
|
-
TermdbClusterResponse,
|
|
7
|
-
Clustering,
|
|
8
|
-
ValidResponse,
|
|
9
|
-
SinglegeneResponse
|
|
10
|
-
} from '#shared/types/routes/termdb.cluster.ts'
|
|
11
|
-
import * as utils from '#src/utils.js'
|
|
12
|
-
import serverconfig from '#src/serverconfig.js'
|
|
13
|
-
import { GeneExpressionQuery, GeneExpressionQueryNative } from '#shared/types/dataset.ts'
|
|
14
|
-
import { gdc_validate_query_geneExpression } from '#src/mds3.gdc.js'
|
|
15
|
-
import { mayLimitSamples } from '#src/mds3.filter.js'
|
|
16
|
-
import { dtgeneexpression } from '#shared/common.js'
|
|
17
|
-
|
|
18
|
-
export const api = {
|
|
19
|
-
endpoint: 'termdb/cluster',
|
|
20
|
-
methods: {
|
|
21
|
-
all: {
|
|
22
|
-
init,
|
|
23
|
-
request: {
|
|
24
|
-
typeId: 'TermdbClusterRequest'
|
|
25
|
-
},
|
|
26
|
-
response: {
|
|
27
|
-
typeId: 'TermdbClusterResponse'
|
|
28
|
-
}
|
|
29
|
-
}
|
|
30
|
-
}
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
function init({ genomes }) {
|
|
34
|
-
return async (req: any, res: any): Promise<void> => {
|
|
35
|
-
const q = req.query as TermdbClusterRequest
|
|
36
|
-
let result
|
|
37
|
-
try {
|
|
38
|
-
const g = genomes[q.genome]
|
|
39
|
-
if (!g) throw 'invalid genome name'
|
|
40
|
-
const ds = g.datasets[q.dslabel]
|
|
41
|
-
if (!ds) throw 'invalid dataset name'
|
|
42
|
-
if (ds.__gdc && !ds.__gdc.doneCaching)
|
|
43
|
-
throw 'The server has not finished caching the case IDs: try again in ~2 minutes'
|
|
44
|
-
if (q.dataType == dtgeneexpression) {
|
|
45
|
-
if (!ds.queries?.geneExpression) throw 'no geneExpression data on this dataset'
|
|
46
|
-
result = (await getResult(q, ds)) as TermdbClusterResponse
|
|
47
|
-
} else {
|
|
48
|
-
throw 'unknown q.dataType ' + q.dataType
|
|
49
|
-
}
|
|
50
|
-
} catch (e: any) {
|
|
51
|
-
if (e.stack) console.log(e.stack)
|
|
52
|
-
result = {
|
|
53
|
-
status: e.status || 400,
|
|
54
|
-
error: e.message || e
|
|
55
|
-
} as TermdbClusterResponse
|
|
56
|
-
}
|
|
57
|
-
res.send(result)
|
|
58
|
-
}
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
async function getResult(q: TermdbClusterRequest, ds: any) {
|
|
62
|
-
const { gene2sample2value, byTermId, bySampleId } = await ds.queries.geneExpression.get(q)
|
|
63
|
-
if (gene2sample2value.size == 0) throw 'no data'
|
|
64
|
-
if (gene2sample2value.size == 1) {
|
|
65
|
-
// get data for only 1 gene; still return data, may create violin plot later
|
|
66
|
-
const g = Array.from(gene2sample2value.keys())[0]
|
|
67
|
-
return { gene: g, data: gene2sample2value.get(g) } as SinglegeneResponse
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
// have data for multiple genes, run clustering
|
|
71
|
-
const t = Date.now() // use "t=new Date()" will lead to tsc error
|
|
72
|
-
const clustering: Clustering = await doClustering(gene2sample2value, q)
|
|
73
|
-
if (serverconfig.debugmode) console.log('clustering done:', Date.now() - t, 'ms')
|
|
74
|
-
return { clustering, byTermId, bySampleId } as ValidResponse
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
async function doClustering(data: any, q: TermdbClusterRequest) {
|
|
78
|
-
// get set of unique sample names, to generate col_names dimension
|
|
79
|
-
const sampleSet = new Set()
|
|
80
|
-
for (const o of data.values()) {
|
|
81
|
-
// {sampleId: value}
|
|
82
|
-
for (const s in o) sampleSet.add(s)
|
|
83
|
-
break
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
const inputData = {
|
|
87
|
-
matrix: [] as number[][],
|
|
88
|
-
row_names: [] as string[], // genes
|
|
89
|
-
col_names: [...sampleSet] as string[], // samples
|
|
90
|
-
cluster_method: q.clusterMethod as string,
|
|
91
|
-
plot_image: false // When true causes cluster.rs to plot the image into a png file (EXPERIMENTAL)
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
// compose "data{}" into a matrix
|
|
95
|
-
for (const [gene, o] of data) {
|
|
96
|
-
inputData.row_names.push(gene)
|
|
97
|
-
const row: number[] = []
|
|
98
|
-
for (const s of inputData.col_names) {
|
|
99
|
-
row.push(o[s] || 0)
|
|
100
|
-
}
|
|
101
|
-
inputData.matrix.push(getZscore(row))
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
const Rinputfile = path.join(serverconfig.cachedir, Math.random().toString() + '.json')
|
|
105
|
-
await utils.write_file(Rinputfile, JSON.stringify(inputData))
|
|
106
|
-
const Routput = JSON.parse(await lines2R(path.join(serverconfig.binpath, 'utils/hclust.R'), [], [Rinputfile]))
|
|
107
|
-
await fs.promises.unlink(Rinputfile)
|
|
108
|
-
|
|
109
|
-
const row_names_index: number[] = Routput.RowOrder.map(row => inputData.row_names.indexOf(row.name)) // sorted rows. value is array index in input data
|
|
110
|
-
const col_names_index: number[] = Routput.ColOrder.map(col => inputData.col_names.indexOf(col.name)) // sorted columns, value is array index from input array
|
|
111
|
-
|
|
112
|
-
// generated sorted matrix based on row/col clustering order
|
|
113
|
-
const output_matrix: number[][] = []
|
|
114
|
-
for (const rowI of row_names_index) {
|
|
115
|
-
const newRow: number[] = []
|
|
116
|
-
for (const colI of col_names_index) {
|
|
117
|
-
newRow.push(inputData.matrix[rowI][colI])
|
|
118
|
-
}
|
|
119
|
-
output_matrix.push(newRow)
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
return {
|
|
123
|
-
row: {
|
|
124
|
-
merge: Routput.RowMerge,
|
|
125
|
-
height: Routput.RowHeight,
|
|
126
|
-
order: Routput.RowOrder,
|
|
127
|
-
inputOrder: inputData.row_names
|
|
128
|
-
},
|
|
129
|
-
col: {
|
|
130
|
-
merge: Routput.ColumnMerge,
|
|
131
|
-
height: Routput.ColumnHeight,
|
|
132
|
-
order: Routput.ColOrder,
|
|
133
|
-
inputOrder: inputData.col_names
|
|
134
|
-
},
|
|
135
|
-
matrix: output_matrix
|
|
136
|
-
}
|
|
137
|
-
}
|
|
138
|
-
function getZscore(l: number[]) {
|
|
139
|
-
const mean: number = l.reduce((sum, v) => sum + v, 0) / l.length
|
|
140
|
-
const sd: number = Math.sqrt(l.reduce((sum, v) => sum + Math.pow(v - mean, 2), 0) / l.length)
|
|
141
|
-
|
|
142
|
-
if (sd == 0) {
|
|
143
|
-
return l
|
|
144
|
-
}
|
|
145
|
-
return l.map(v => (v - mean) / sd)
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
export async function validate_query_geneExpression(ds: any, genome: any) {
|
|
149
|
-
const q = ds.queries.geneExpression as GeneExpressionQuery
|
|
150
|
-
if (!q) return
|
|
151
|
-
|
|
152
|
-
if (q.src == 'gdcapi') {
|
|
153
|
-
gdc_validate_query_geneExpression(ds, genome)
|
|
154
|
-
// q.get() added
|
|
155
|
-
return
|
|
156
|
-
}
|
|
157
|
-
if (q.src == 'native') {
|
|
158
|
-
await validateNative(q, ds, genome)
|
|
159
|
-
return
|
|
160
|
-
}
|
|
161
|
-
throw 'unknown queries.geneExpression.src'
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
async function validateNative(q: GeneExpressionQueryNative, ds: any, genome: any) {
|
|
165
|
-
if (!q.file.startsWith(serverconfig.tpmasterdir)) q.file = path.join(serverconfig.tpmasterdir, q.file)
|
|
166
|
-
if (!q.samples) q.samples = []
|
|
167
|
-
await utils.validate_tabixfile(q.file)
|
|
168
|
-
q.nochr = await utils.tabix_is_nochr(q.file, null, genome)
|
|
169
|
-
q.samples = [] as number[]
|
|
170
|
-
|
|
171
|
-
{
|
|
172
|
-
// is a gene-by-sample matrix file
|
|
173
|
-
const lines = await utils.get_header_tabix(q.file)
|
|
174
|
-
if (!lines[0]) throw 'header line missing from ' + q.file
|
|
175
|
-
const l = lines[0].split('\t')
|
|
176
|
-
if (l.slice(0, 4).join('\t') != '#chr\tstart\tstop\tgene') throw 'header line has wrong content for columns 1-4'
|
|
177
|
-
for (let i = 4; i < l.length; i++) {
|
|
178
|
-
const id = ds.cohort.termdb.q.sampleName2id(l[i])
|
|
179
|
-
if (id == undefined) throw 'queries.geneExpression: unknown sample from header: ' + l[i]
|
|
180
|
-
q.samples.push(id)
|
|
181
|
-
}
|
|
182
|
-
console.log(q.samples.length, 'samples from geneExpression of', ds.label)
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
q.get = async (param: TermdbClusterRequest) => {
|
|
186
|
-
const limitSamples = await mayLimitSamples(param, q.samples, ds)
|
|
187
|
-
if (limitSamples?.size == 0) {
|
|
188
|
-
// got 0 sample after filtering, must still return expected structure with no data
|
|
189
|
-
return { gene2sample2value: new Set(), byTermId: {}, bySampleId: {} }
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
// has at least 1 sample passing filter and with exp data
|
|
193
|
-
// TODO what if there's just 1 sample not enough for clustering?
|
|
194
|
-
const bySampleId = {}
|
|
195
|
-
const samples = q.samples || []
|
|
196
|
-
if (limitSamples) {
|
|
197
|
-
for (const sid of limitSamples) {
|
|
198
|
-
bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) }
|
|
199
|
-
}
|
|
200
|
-
} else {
|
|
201
|
-
// use all samples with exp data
|
|
202
|
-
for (const sid of samples) {
|
|
203
|
-
bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) }
|
|
204
|
-
}
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
// only valid genes with data are added. invalid genes or genes missing from data file is not added. backend returned genes is allowed to be fewer than supplied by client
|
|
208
|
-
const gene2sample2value = new Map() // k: gene symbol, v: { sampleId : value }
|
|
209
|
-
|
|
210
|
-
for (const g of param.genes) {
|
|
211
|
-
// FIXME newly added geneVariant terms from client to be changed to {gene} but not {name}
|
|
212
|
-
if (!g.gene) continue
|
|
213
|
-
|
|
214
|
-
if (!g.chr) {
|
|
215
|
-
// quick fix: newly added gene from client will lack chr/start/stop
|
|
216
|
-
const lst = genome.genedb.getjsonbyname.all(g.gene)
|
|
217
|
-
if (lst.length == 0) continue
|
|
218
|
-
const j = JSON.parse(lst.find(i => i.isdefault).genemodel || lst[0].genemodel)
|
|
219
|
-
g.start = j.start
|
|
220
|
-
g.stop = j.stop
|
|
221
|
-
g.chr = j.chr
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
const s2v = {}
|
|
225
|
-
await utils.get_lines_bigfile({
|
|
226
|
-
args: [q.file, (q.nochr ? g.chr?.replace('chr', '') : g.chr) + ':' + g.start + '-' + g.stop], // must do g.chr?.replace to avoid tsc error
|
|
227
|
-
callback: line => {
|
|
228
|
-
const l = line.split('\t')
|
|
229
|
-
// case-insensitive match! FIXME if g.gene is alias won't work
|
|
230
|
-
if (l[3].toLowerCase() != g.gene.toLowerCase()) return
|
|
231
|
-
for (let i = 4; i < l.length; i++) {
|
|
232
|
-
const sampleId = samples[i - 4]
|
|
233
|
-
if (limitSamples && !limitSamples.has(sampleId)) continue // doing filtering and sample of current column is not used
|
|
234
|
-
if (!l[i]) continue // blank string
|
|
235
|
-
const v = Number(l[i])
|
|
236
|
-
if (Number.isNaN(v)) throw 'exp value not number'
|
|
237
|
-
s2v[sampleId] = v
|
|
238
|
-
}
|
|
239
|
-
}
|
|
240
|
-
} as any)
|
|
241
|
-
// Above!! add "as any" to suppress a npx tsc alert
|
|
242
|
-
if (Object.keys(s2v).length) gene2sample2value.set(g.gene, s2v) // only add gene if has data
|
|
243
|
-
}
|
|
244
|
-
// pass blank byTermId to match with expected output structure
|
|
245
|
-
const byTermId = {}
|
|
246
|
-
return { gene2sample2value, byTermId, bySampleId }
|
|
247
|
-
}
|
|
248
|
-
}
|