@sjcrh/proteinpaint-server 2.44.0 → 2.46.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dataset/clinvar.hg19.js +53 -52
- package/dataset/clinvar.hg38.js +74 -73
- package/dataset/clinvar.js +164 -47
- package/dataset/termdb.test.js +257 -0
- package/genome/CriGri.js +1859 -27
- package/genome/cgc.js +743 -7
- package/genome/danRer10.js +1108 -46
- package/genome/dm3.js +71 -44
- package/genome/dm6.js +1926 -45
- package/genome/galGal5.js +23522 -46
- package/genome/galGal6.js +512 -46
- package/genome/hg19.js +293 -198
- package/genome/hg38.js +472 -105
- package/genome/hg38.test.js +406 -40
- package/genome/hgvirus.js +45 -20
- package/genome/mm10.js +135 -67
- package/genome/mm9.js +116 -79
- package/genome/rn6.js +1002 -47
- package/package.json +31 -35
- package/routes/_template_.js +30 -0
- package/routes/burden.js +149 -0
- package/routes/dataset.js +266 -0
- package/routes/dsdata.js +127 -0
- package/routes/gdc.maf.js +120 -0
- package/routes/gdc.mafBuild.js +106 -0
- package/routes/gdc.topMutatedGenes.js +465 -0
- package/routes/gene2canonicalisoform.js +41 -0
- package/routes/genelookup.js +52 -0
- package/routes/genomes.js +144 -0
- package/routes/healthcheck.js +30 -0
- package/routes/hicdata.js +98 -0
- package/routes/hicstat.js +55 -0
- package/routes/isoformlst.js +57 -0
- package/routes/ntseq.js +43 -0
- package/routes/pdomain.js +61 -0
- package/routes/snp.js +107 -0
- package/routes/termdb.categories.js +209 -0
- package/routes/termdb.cluster.js +228 -0
- package/routes/termdb.cohort.summary.js +38 -0
- package/routes/termdb.cohorts.js +49 -0
- package/routes/termdb.config.js +202 -0
- package/routes/termdb.getdescrstats.js +102 -0
- package/routes/termdb.getnumericcategories.js +92 -0
- package/routes/termdb.getpercentile.js +108 -0
- package/routes/termdb.getrootterm.js +65 -0
- package/routes/termdb.gettermchildren.js +67 -0
- package/routes/termdb.singleSampleMutation.js +80 -0
- package/routes/termdb.singlecellData.js +46 -0
- package/routes/termdb.singlecellSamples.js +160 -0
- package/routes/termdb.termsbyids.js +59 -0
- package/routes/termdb.topVariablyExpressedGenes.js +171 -0
- package/routes/termdb.violin.js +77 -0
- package/src/app.js +41500 -0
- package/src/serverconfig.js +14 -8
- package/start.js +3 -3
- package/routes/README.md +0 -84
- package/routes/burden.ts +0 -143
- package/routes/gdc.maf.ts +0 -195
- package/routes/gdc.mafBuild.ts +0 -114
- package/routes/gdc.topMutatedGenes.ts +0 -586
- package/routes/genelookup.ts +0 -50
- package/routes/healthcheck.ts +0 -29
- package/routes/hicdata.ts +0 -111
- package/routes/hicstat.ts +0 -55
- package/routes/termdb.categories.ts +0 -245
- package/routes/termdb.cluster.ts +0 -248
- package/routes/termdb.getdescrstats.ts +0 -102
- package/routes/termdb.getnumericcategories.ts +0 -99
- package/routes/termdb.getpercentile.ts +0 -118
- package/routes/termdb.getrootterm.ts +0 -73
- package/routes/termdb.gettermchildren.ts +0 -82
- package/routes/termdb.singleSampleMutation.ts +0 -87
- package/routes/termdb.singlecellData.ts +0 -49
- package/routes/termdb.singlecellSamples.ts +0 -175
- package/routes/termdb.termsbyids.ts +0 -63
- package/routes/termdb.topVariablyExpressedGenes.ts +0 -214
- package/routes/termdb.violin.ts +0 -77
- package/server.js +0 -2
- package/server.js.map +0 -1
- package/shared/common.js +0 -1080
- package/shared/termdb.initbinconfig.js +0 -96
- package/shared/vcf.js +0 -629
package/src/serverconfig.js
CHANGED
|
@@ -3,15 +3,15 @@
|
|
|
3
3
|
including generating and applying overrides as needed
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
|
-
|
|
7
|
-
|
|
6
|
+
import fs from 'fs'
|
|
7
|
+
import path from 'path'
|
|
8
8
|
|
|
9
9
|
// do not assume that serverconfig.json is in the same dir as server.js
|
|
10
10
|
// for example, when using proteinpaint as an npm module or binary
|
|
11
11
|
// or when calling a pp utility script from a tp data directory
|
|
12
12
|
const workdirconfig = process.cwd() ? process.cwd() + '/serverconfig.json' : ''
|
|
13
|
-
const serverdirconfig = path.join(
|
|
14
|
-
const pprootdirconfig = path.join(
|
|
13
|
+
const serverdirconfig = path.join(import.meta.dirname, '../serverconfig.json')
|
|
14
|
+
const pprootdirconfig = path.join(import.meta.dirname, '../../serverconfig.json')
|
|
15
15
|
// check which config file exists in order of usage priority
|
|
16
16
|
const serverconfigfile =
|
|
17
17
|
workdirconfig && fs.existsSync(workdirconfig)
|
|
@@ -26,6 +26,7 @@ const serverconfigfile =
|
|
|
26
26
|
GET SERVERCONFIG
|
|
27
27
|
********************/
|
|
28
28
|
let serverconfig
|
|
29
|
+
|
|
29
30
|
if (!serverconfigfile) {
|
|
30
31
|
throw 'missing serverconfig.json'
|
|
31
32
|
} else {
|
|
@@ -71,12 +72,16 @@ if (!serverconfig.binpath) {
|
|
|
71
72
|
} else {
|
|
72
73
|
const specfile = process.argv.find(n => n.includes('.spec.js'))
|
|
73
74
|
if (specfile) {
|
|
74
|
-
serverconfig.binpath = path.dirname(
|
|
75
|
+
serverconfig.binpath = path.dirname(import.meta.dirname)
|
|
76
|
+
} else if (import.meta.filename.includes('node_modules/@sjcrh/proteinpaint-server')) {
|
|
77
|
+
const p = import.meta.filename.split('/proteinpaint-server')[0]
|
|
78
|
+
serverconfig.binpath = `${p}/proteinpaint-server`
|
|
75
79
|
} else {
|
|
76
80
|
const jsfile = process.argv.find(
|
|
77
81
|
n =>
|
|
78
82
|
n.endsWith('/bin.js') ||
|
|
79
83
|
n.endsWith('/server.js') ||
|
|
84
|
+
n.endsWith('/start.js') ||
|
|
80
85
|
n.endsWith('/proteinpaint') ||
|
|
81
86
|
n.endsWith('/proteinpaint-server')
|
|
82
87
|
)
|
|
@@ -90,8 +95,9 @@ if (!serverconfig.binpath) {
|
|
|
90
95
|
} else {
|
|
91
96
|
if (fs.existsSync('./server')) serverconfig.binpath = fs.realpathSync('./server')
|
|
92
97
|
else if (fs.existsSync('./src')) serverconfig.binpath = fs.realpathSync('./src/..')
|
|
93
|
-
else if (
|
|
94
|
-
|
|
98
|
+
else if (import.meta.dirname.includes('/server/'))
|
|
99
|
+
serverconfig.binpath = import.meta.dirname.split('/server/')[0] + '/server'
|
|
100
|
+
else if (import.meta.dirname.includes('/proteinpaint')) serverconfig.binpath = import.meta.dirname
|
|
95
101
|
else throw 'unable to determine the serverconfig.binpath'
|
|
96
102
|
}
|
|
97
103
|
}
|
|
@@ -216,4 +222,4 @@ if (fs.existsSync('./package.json')) {
|
|
|
216
222
|
serverconfig.version = JSON.parse(pkg).version
|
|
217
223
|
}
|
|
218
224
|
|
|
219
|
-
|
|
225
|
+
export default serverconfig
|
package/start.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
|
|
3
|
-
console.log(
|
|
4
|
-
|
|
2
|
+
import { launch } from './src/app.js'
|
|
3
|
+
console.log(`starting the server ...`)
|
|
4
|
+
launch()
|
|
5
5
|
console.log('server code loaded ...')
|
package/routes/README.md
DELETED
|
@@ -1,84 +0,0 @@
|
|
|
1
|
-
# Server Routes
|
|
2
|
-
|
|
3
|
-
## Introduction
|
|
4
|
-
|
|
5
|
-
This directory contains files that specify server route APIs. By following this guidelines,
|
|
6
|
-
the auto-generation of server routes, tests, and API documentation will work as expected.
|
|
7
|
-
|
|
8
|
-
## Guidelines
|
|
9
|
-
|
|
10
|
-
### 1. Use Express to do most of the routing logic
|
|
11
|
-
|
|
12
|
-
- decentralize the route handling code into smaller, independent handler functions
|
|
13
|
-
- common request processing logic, like genome, dataset, termdb set-up should be imported
|
|
14
|
-
from a shared helper module that is common to a group of routes, or for more advanced cases,
|
|
15
|
-
moved to a [router-level middleware](https://expressjs.com/en/guide/using-middleware.html#middleware.router)
|
|
16
|
-
|
|
17
|
-
### 2. Export an `api` from the route file
|
|
18
|
-
|
|
19
|
-
Use the code from other files in this directory as examples
|
|
20
|
-
|
|
21
|
-
TODO: define the `api` type
|
|
22
|
-
|
|
23
|
-
```ts
|
|
24
|
-
// work-in-progress
|
|
25
|
-
type RouteApi {
|
|
26
|
-
[key as methods]: RouteApiMethod
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
type methods = 'get' | 'post'
|
|
30
|
-
|
|
31
|
-
type initArg = {
|
|
32
|
-
app?: any // Express app instance
|
|
33
|
-
genome: any // `Genome` from shared/types/genome.ts
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
/**
|
|
37
|
-
@param
|
|
38
|
-
*/
|
|
39
|
-
type RouteApiMethod = {
|
|
40
|
-
endpoint: string
|
|
41
|
-
init: (initArg) => void
|
|
42
|
-
request: {
|
|
43
|
-
typeId: string
|
|
44
|
-
body?: any // specific to the route
|
|
45
|
-
}
|
|
46
|
-
response: {
|
|
47
|
-
typeId: string
|
|
48
|
-
header?: {
|
|
49
|
-
status: number
|
|
50
|
-
}
|
|
51
|
-
body?: any // specific to the route
|
|
52
|
-
}
|
|
53
|
-
examples: RouteExample[]
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
type RouteExample = {
|
|
57
|
-
request: {
|
|
58
|
-
body?: any
|
|
59
|
-
}
|
|
60
|
-
response?: {
|
|
61
|
-
header: {
|
|
62
|
-
status: number
|
|
63
|
-
}
|
|
64
|
-
body?: any
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
```
|
|
68
|
-
### 3. Use the appropriate [HTTP response code](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status)
|
|
69
|
-
|
|
70
|
-
This is a best practice especially for error responses. Use `res.status(code)` to set the error code.
|
|
71
|
-
This convention helps with error troubleshooting. Examples:
|
|
72
|
-
- Status `400` 'Bad Request', something is wrong with the http request payload
|
|
73
|
-
- Status `401` 'Unauthorized', the user must authenticate. The `server/src/auth.js` sets this status code
|
|
74
|
-
- Status `403` 'Forbidden', the user is authenticated/signed-in, but is not permitted to access the requested data
|
|
75
|
-
- Status `404` 'Not Found' for genome, dataset, or other data that is not found
|
|
76
|
-
- Status `500` 'Server Error' for errors related to the server process or host machine, such as the GDC API
|
|
77
|
-
not being available. Do not use code=`500` for errors that are related to specific request handler or data processing functions.
|
|
78
|
-
|
|
79
|
-
### 4. Auto-generate
|
|
80
|
-
|
|
81
|
-
- the server code will detect the routes in `server/src/run.sh`
|
|
82
|
-
- `npm run doc` to see the documented routes in http://localhost:3000/server.html
|
|
83
|
-
- `./augen/readme.sh > public/docs/readme.json` for content in http://localhost:3000/readme.html
|
|
84
|
-
|
package/routes/burden.ts
DELETED
|
@@ -1,143 +0,0 @@
|
|
|
1
|
-
import { BurdenRequest, BurdenResponse } from '#shared/types/routes/burden.ts'
|
|
2
|
-
import lines2R from '#src/lines2R.js'
|
|
3
|
-
import path from 'path'
|
|
4
|
-
import serverconfig from '#src/serverconfig.js'
|
|
5
|
-
import { write_file } from '#src/utils.js'
|
|
6
|
-
|
|
7
|
-
export const api = {
|
|
8
|
-
endpoint: 'burden',
|
|
9
|
-
methods: {
|
|
10
|
-
get: {
|
|
11
|
-
init({ genomes }) {
|
|
12
|
-
return async (req: any, res: any): Promise<void> => {
|
|
13
|
-
try {
|
|
14
|
-
const genome = genomes[req.query.genome]
|
|
15
|
-
if (!genome) throw `invalid q.genome=${req.query.genome}`
|
|
16
|
-
const q = req.query as BurdenRequest
|
|
17
|
-
const ds = genome.datasets[q.dslabel]
|
|
18
|
-
if (!ds) throw `invalid q.genome=${req.query.dslabel}`
|
|
19
|
-
if (!ds.cohort.cumburden?.files) throw `missing ds.cohort.cumburden.files`
|
|
20
|
-
|
|
21
|
-
const estimates = await getBurdenEstimates(req, ds)
|
|
22
|
-
const { keys, rows } = formatPayload(estimates)
|
|
23
|
-
res.send({ status: 'ok', keys, rows } as BurdenResponse)
|
|
24
|
-
} catch (e: any) {
|
|
25
|
-
res.send({ status: 'error', error: e.message || e })
|
|
26
|
-
}
|
|
27
|
-
}
|
|
28
|
-
},
|
|
29
|
-
request: {
|
|
30
|
-
typeId: 'BurdenRequest'
|
|
31
|
-
},
|
|
32
|
-
response: {
|
|
33
|
-
typeId: 'BurdenResponse'
|
|
34
|
-
},
|
|
35
|
-
examples: [
|
|
36
|
-
{
|
|
37
|
-
request: {
|
|
38
|
-
body: {
|
|
39
|
-
genome: 'hg38',
|
|
40
|
-
// TODO: !!! use hg38-test and TermdbTest !!!
|
|
41
|
-
dslabel: 'SJLife',
|
|
42
|
-
diaggrp: 5,
|
|
43
|
-
sex: 1,
|
|
44
|
-
white: 1,
|
|
45
|
-
agedx: 1,
|
|
46
|
-
bleo: 0,
|
|
47
|
-
etop: 0,
|
|
48
|
-
cisp: 0,
|
|
49
|
-
carbo: 0,
|
|
50
|
-
steriod: 0,
|
|
51
|
-
vcr: 0,
|
|
52
|
-
hdmtx: 0,
|
|
53
|
-
itmt: 0,
|
|
54
|
-
ced: 0,
|
|
55
|
-
dox: 0,
|
|
56
|
-
heart: 0,
|
|
57
|
-
brain: 0,
|
|
58
|
-
abd: 0,
|
|
59
|
-
pelvis: 0,
|
|
60
|
-
chest: 0
|
|
61
|
-
}
|
|
62
|
-
},
|
|
63
|
-
response: {
|
|
64
|
-
header: { status: 200 }
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
]
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
async function getBurdenEstimates(
|
|
73
|
-
q: { query: { [x: string]: any } },
|
|
74
|
-
ds: { cohort: { cumburden: { files: { fit: any; surv: any; sample: any } } } }
|
|
75
|
-
) {
|
|
76
|
-
const infile = path.join(serverconfig.cachedir, Math.random().toString() + '.json')
|
|
77
|
-
for (const k in q.query) {
|
|
78
|
-
q.query[k] = Number(q.query[k])
|
|
79
|
-
}
|
|
80
|
-
const data = Object.assign({}, defaults, q.query)
|
|
81
|
-
//console.log(40, data, JSON.stringify(data))
|
|
82
|
-
await write_file(infile, JSON.stringify(data))
|
|
83
|
-
// TODO: use the dataset location
|
|
84
|
-
const { fit, surv, sample } = ds.cohort.cumburden.files
|
|
85
|
-
if (!fit || !surv || !sample) throw `missing one or more of ds.cohort.burden.files.{fit, surv, sample}`
|
|
86
|
-
const args = [
|
|
87
|
-
infile,
|
|
88
|
-
`${serverconfig.tpmasterdir}/${fit}`,
|
|
89
|
-
`${serverconfig.tpmasterdir}/${surv}`,
|
|
90
|
-
`${serverconfig.tpmasterdir}/${sample}`
|
|
91
|
-
]
|
|
92
|
-
const Routput = await lines2R(path.join(serverconfig.binpath, 'utils/burden.R'), [], args)
|
|
93
|
-
const estimates = JSON.parse(Routput[0])
|
|
94
|
-
return estimates
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
function formatPayload(estimates: object[]) {
|
|
98
|
-
const rawKeys = Object.keys(estimates[0])
|
|
99
|
-
const outKeys = [] as string[]
|
|
100
|
-
const keys = [] as string[]
|
|
101
|
-
for (const k of rawKeys) {
|
|
102
|
-
if (k == 'chc') {
|
|
103
|
-
keys.push(k)
|
|
104
|
-
outKeys.push(k)
|
|
105
|
-
} else {
|
|
106
|
-
const age = Number(k.slice(1).split(',')[0])
|
|
107
|
-
if (age <= 60 && age % 2 == 0) {
|
|
108
|
-
keys.push(k)
|
|
109
|
-
outKeys.push(`burden${age}`)
|
|
110
|
-
}
|
|
111
|
-
}
|
|
112
|
-
}
|
|
113
|
-
const rows = [] as number[][]
|
|
114
|
-
// v = an array of objects with age as keys as cumulative burden as value for a given CHC
|
|
115
|
-
for (const v of estimates) {
|
|
116
|
-
rows.push(keys.map(k => v[k]))
|
|
117
|
-
}
|
|
118
|
-
return { keys: outKeys, rows }
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
const defaults = Object.freeze({
|
|
122
|
-
diaggrp: 5,
|
|
123
|
-
sex: 0,
|
|
124
|
-
white: 1,
|
|
125
|
-
agedx: 1,
|
|
126
|
-
// chemotherapy
|
|
127
|
-
steriod: 0,
|
|
128
|
-
bleo: 0,
|
|
129
|
-
vcr: 0, //12, // Vincristine
|
|
130
|
-
etop: 0, //2500, // Etoposide
|
|
131
|
-
itmt: 0, // Intrathecal methothrexate_grp: 0,
|
|
132
|
-
ced: 0, //1.6, // Cyclophosphamide, 0.7692 mean 7692.
|
|
133
|
-
cisp: 0, //300, // Cisplatin
|
|
134
|
-
dox: 0, // Anthracycline, 3 mean 300 ml/m2
|
|
135
|
-
carbo: 0, // Carboplatin
|
|
136
|
-
hdmtx: 0, // High-Dose Methotrexate
|
|
137
|
-
// radiation
|
|
138
|
-
brain: 0, //5.4,
|
|
139
|
-
chest: 0, //2.4,
|
|
140
|
-
heart: 0,
|
|
141
|
-
pelvis: 0,
|
|
142
|
-
abd: 0 //2.4
|
|
143
|
-
})
|
package/routes/gdc.maf.ts
DELETED
|
@@ -1,195 +0,0 @@
|
|
|
1
|
-
import { GdcMafRequest, GdcMafResponse, File } from '#shared/types/routes/gdc.maf.ts'
|
|
2
|
-
import path from 'path'
|
|
3
|
-
import got from 'got'
|
|
4
|
-
import serverconfig from '#src/serverconfig.js'
|
|
5
|
-
|
|
6
|
-
/*
|
|
7
|
-
this route lists available gdc MAF files based on user's cohort filter
|
|
8
|
-
and return them to client to be shown in a table for selection
|
|
9
|
-
*/
|
|
10
|
-
|
|
11
|
-
const maxFileNumber = 1000 // determines max number of files to return to client
|
|
12
|
-
// preliminary testing:
|
|
13
|
-
// 36s for 1000 (87Mb)
|
|
14
|
-
// 78s for 2000 (177Mb)
|
|
15
|
-
// if safe to increase to 2000, maybe fast when this runs in gdc env
|
|
16
|
-
|
|
17
|
-
const allowedWorkflowType = 'Aliquot Ensemble Somatic Variant Merging and Masking'
|
|
18
|
-
|
|
19
|
-
// change to 400 so it won't limit number of files; should keep this setting as a safeguard; also it's fast to check file size (.5s in gdc.mafBuild.ts)
|
|
20
|
-
export const maxTotalSizeCompressed = serverconfig.features.gdcMafMaxFileSize || 400000000 // 400Mb
|
|
21
|
-
|
|
22
|
-
export const api = {
|
|
23
|
-
endpoint: 'gdc/maf',
|
|
24
|
-
methods: {
|
|
25
|
-
all: {
|
|
26
|
-
init,
|
|
27
|
-
request: {
|
|
28
|
-
typeId: 'GdcMafRequest'
|
|
29
|
-
},
|
|
30
|
-
response: {
|
|
31
|
-
typeId: 'GdcMafResponse'
|
|
32
|
-
// will combine this with type checker
|
|
33
|
-
//valid: (t) => {}
|
|
34
|
-
},
|
|
35
|
-
examples: [
|
|
36
|
-
{
|
|
37
|
-
request: {
|
|
38
|
-
body: {
|
|
39
|
-
experimentalStrategy: 'WXS',
|
|
40
|
-
embedder: 'localhost'
|
|
41
|
-
}
|
|
42
|
-
},
|
|
43
|
-
response: {
|
|
44
|
-
header: { status: 200 }
|
|
45
|
-
}
|
|
46
|
-
}
|
|
47
|
-
]
|
|
48
|
-
}
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
function init({ genomes }) {
|
|
53
|
-
return async (req: any, res: any): Promise<void> => {
|
|
54
|
-
try {
|
|
55
|
-
// g and ds are not used right now, but could be later
|
|
56
|
-
const g = genomes.hg38
|
|
57
|
-
if (!g) throw 'hg38 missing'
|
|
58
|
-
const ds = g.datasets.GDC
|
|
59
|
-
if (!ds) throw 'hg38 GDC missing'
|
|
60
|
-
|
|
61
|
-
const payload = await listMafFiles(req.query as GdcMafRequest, ds)
|
|
62
|
-
res.send(payload)
|
|
63
|
-
} catch (e: any) {
|
|
64
|
-
res.send({ status: 'error', error: e.message || e })
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
/*
|
|
70
|
-
req.query {
|
|
71
|
-
filter0 // optional gdc GFF cohort filter, invisible and read only
|
|
72
|
-
experimentalStrategy: WXS/Targeted Sequencing
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
ds {
|
|
76
|
-
__gdc {
|
|
77
|
-
gdcOpenProjects
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
*/
|
|
81
|
-
async function listMafFiles(q: GdcMafRequest, ds) {
|
|
82
|
-
const filters = {
|
|
83
|
-
op: 'and',
|
|
84
|
-
content: [
|
|
85
|
-
{ op: '=', content: { field: 'data_format', value: 'MAF' } },
|
|
86
|
-
{ op: '=', content: { field: 'experimental_strategy', value: q.experimentalStrategy } },
|
|
87
|
-
{ op: '=', content: { field: 'analysis.workflow_type', value: allowedWorkflowType } },
|
|
88
|
-
{ op: '=', content: { field: 'access', value: 'open' } } // delete if later to support controlled files
|
|
89
|
-
]
|
|
90
|
-
}
|
|
91
|
-
const case_filters: any = { op: 'and', content: [] }
|
|
92
|
-
if (q.filter0) {
|
|
93
|
-
case_filters.content.push(q.filter0)
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
const { host, headers } = ds.getHostHeaders(q)
|
|
97
|
-
|
|
98
|
-
const data = {
|
|
99
|
-
filters,
|
|
100
|
-
case_filters,
|
|
101
|
-
size: maxFileNumber,
|
|
102
|
-
fields: [
|
|
103
|
-
'id',
|
|
104
|
-
'file_size',
|
|
105
|
-
'cases.project.project_id', // for display only
|
|
106
|
-
'cases.submitter_id', // used when listing all cases & files
|
|
107
|
-
'cases.samples.sample_type'
|
|
108
|
-
// may add diagnosis and primary site
|
|
109
|
-
].join(',')
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
const response = await got.post(path.join(host.rest, 'files'), { headers, body: JSON.stringify(data) })
|
|
113
|
-
|
|
114
|
-
let re
|
|
115
|
-
try {
|
|
116
|
-
re = JSON.parse(response.body)
|
|
117
|
-
} catch (e) {
|
|
118
|
-
throw 'invalid JSON from ' + api.endpoint
|
|
119
|
-
}
|
|
120
|
-
if (!Number.isInteger(re.data?.pagination?.total)) throw 're.data.pagination.total is not int'
|
|
121
|
-
if (!Array.isArray(re.data?.hits)) throw 're.data.hits[] not array'
|
|
122
|
-
|
|
123
|
-
// flatten api return to table row objects
|
|
124
|
-
// it is possible to set a max size limit to limit the number of files passed to client
|
|
125
|
-
const files = [] as File[]
|
|
126
|
-
|
|
127
|
-
for (const h of re.data.hits) {
|
|
128
|
-
/*
|
|
129
|
-
{
|
|
130
|
-
"id": "39768777-fec5-4a79-9515-65712c002b19",
|
|
131
|
-
"cases": [
|
|
132
|
-
{
|
|
133
|
-
"submitter_id": "HTMCP-03-06-02104",
|
|
134
|
-
"project": {
|
|
135
|
-
"project_id":"xx"
|
|
136
|
-
},
|
|
137
|
-
"samples": [
|
|
138
|
-
{
|
|
139
|
-
"sample_type": "Blood Derived Normal"
|
|
140
|
-
},
|
|
141
|
-
{
|
|
142
|
-
"sample_type": "Primary Tumor"
|
|
143
|
-
}
|
|
144
|
-
]
|
|
145
|
-
}
|
|
146
|
-
],
|
|
147
|
-
"analysis": {
|
|
148
|
-
"workflow_type": "MuSE Annotation"
|
|
149
|
-
},
|
|
150
|
-
"experimental_strategy": "Targeted Sequencing",
|
|
151
|
-
"file_size": 146038
|
|
152
|
-
}
|
|
153
|
-
*/
|
|
154
|
-
|
|
155
|
-
const c = h.cases?.[0]
|
|
156
|
-
if (!c) throw 'h.cases[0] missing'
|
|
157
|
-
|
|
158
|
-
// only keep files from open access projects for now
|
|
159
|
-
/*
|
|
160
|
-
if (c.project?.project_id) {
|
|
161
|
-
if (ds.__gdc.gdcOpenProjects.has(c.project.project_id)) {
|
|
162
|
-
// open-access project, keep
|
|
163
|
-
} else {
|
|
164
|
-
// not open access
|
|
165
|
-
continue
|
|
166
|
-
}
|
|
167
|
-
} else {
|
|
168
|
-
throw 'h.cases[0].project.project_id missing'
|
|
169
|
-
}
|
|
170
|
-
*/
|
|
171
|
-
|
|
172
|
-
const file = {
|
|
173
|
-
id: h.id,
|
|
174
|
-
project_id: c.project.project_id,
|
|
175
|
-
file_size: h.file_size
|
|
176
|
-
} as File
|
|
177
|
-
|
|
178
|
-
file.case_submitter_id = c.submitter_id
|
|
179
|
-
if (c.samples) {
|
|
180
|
-
file.sample_types = c.samples.map((i: { sample_type: any }) => i.sample_type).sort()
|
|
181
|
-
// sort to show sample type names in consistent alphabetical order
|
|
182
|
-
// otherwise one file shows 'Blood, Primary' and another shows 'Primary, Blood'
|
|
183
|
-
// FIXME this includes samples not associated with current maf file
|
|
184
|
-
}
|
|
185
|
-
files.push(file)
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
const result = {
|
|
189
|
-
files,
|
|
190
|
-
filesTotal: re.data.pagination.total,
|
|
191
|
-
maxTotalSizeCompressed
|
|
192
|
-
} as GdcMafResponse
|
|
193
|
-
|
|
194
|
-
return result
|
|
195
|
-
}
|
package/routes/gdc.mafBuild.ts
DELETED
|
@@ -1,114 +0,0 @@
|
|
|
1
|
-
import got from 'got'
|
|
2
|
-
import path from 'path'
|
|
3
|
-
import fs from 'fs'
|
|
4
|
-
import { run_rust_stream } from '@sjcrh/proteinpaint-rust'
|
|
5
|
-
import serverconfig from '#src/serverconfig.js'
|
|
6
|
-
import Readable from 'stream'
|
|
7
|
-
import { GdcMafBuildRequest } from '#shared/types/routes/gdc.mafBuild.ts'
|
|
8
|
-
import { maxTotalSizeCompressed } from './gdc.maf.ts'
|
|
9
|
-
|
|
10
|
-
export const api = {
|
|
11
|
-
endpoint: 'gdc/mafBuild',
|
|
12
|
-
methods: {
|
|
13
|
-
all: {
|
|
14
|
-
init,
|
|
15
|
-
request: {
|
|
16
|
-
typeId: 'GdcMafBuildRequest'
|
|
17
|
-
},
|
|
18
|
-
response: {
|
|
19
|
-
typeId: null // 'GdcMafBuildResponse'
|
|
20
|
-
}
|
|
21
|
-
}
|
|
22
|
-
}
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
function init({ genomes }) {
|
|
26
|
-
return async (req: any, res: any): Promise<void> => {
|
|
27
|
-
try {
|
|
28
|
-
const g = genomes.hg38
|
|
29
|
-
if (!g) throw 'hg38 missing'
|
|
30
|
-
const ds = g.datasets.GDC
|
|
31
|
-
if (!ds) throw 'hg38 GDC missing'
|
|
32
|
-
await buildMaf(req.query as GdcMafBuildRequest, res, ds)
|
|
33
|
-
} catch (e: any) {
|
|
34
|
-
if (e.stack) console.log(e.stack)
|
|
35
|
-
res.send({ status: 'error', error: e.message || e })
|
|
36
|
-
}
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
/*
|
|
41
|
-
q{}
|
|
42
|
-
res{}
|
|
43
|
-
*/
|
|
44
|
-
async function buildMaf(q: GdcMafBuildRequest, res: any, ds) {
|
|
45
|
-
const t0 = Date.now()
|
|
46
|
-
const { host, headers } = ds.getHostHeaders(q)
|
|
47
|
-
const fileLst2 = (await getFileLstUnderSizeLimit(q.fileIdLst, host, headers)) as string[]
|
|
48
|
-
|
|
49
|
-
if (serverconfig.debugmode)
|
|
50
|
-
console.log(
|
|
51
|
-
`${fileLst2.length} out of ${q.fileIdLst.length} input MAF files accepted by size limit`,
|
|
52
|
-
Date.now() - t0
|
|
53
|
-
)
|
|
54
|
-
|
|
55
|
-
const arg = {
|
|
56
|
-
fileIdLst: fileLst2,
|
|
57
|
-
columns: q.columns,
|
|
58
|
-
host: path.join(host.rest, 'data') // must use the /data/ endpoint from current host
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
const rustStream = run_rust_stream('gdcmaf', JSON.stringify(arg))
|
|
62
|
-
res.setHeader('Content-Type', 'application/octet-stream')
|
|
63
|
-
res.setHeader('Content-Disposition', 'attachment; filename=cohort.maf.gz')
|
|
64
|
-
rustStream.pipe(res)
|
|
65
|
-
|
|
66
|
-
rustStream.on('end', () => {
|
|
67
|
-
// report amount of time taken to run rust
|
|
68
|
-
if (serverconfig.debugmode) console.log('rust gdcmaf', Date.now() - t0)
|
|
69
|
-
res.end()
|
|
70
|
-
})
|
|
71
|
-
|
|
72
|
-
rustStream.on('error', err => {
|
|
73
|
-
console.error(err)
|
|
74
|
-
res.statusCode = 500
|
|
75
|
-
res.end('Internal Server Error')
|
|
76
|
-
})
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
/*
|
|
80
|
-
query api get size of each input maf file, and only process those files with total size under a set limit,
|
|
81
|
-
excess files are not processed in order not to crash server
|
|
82
|
-
must not rely on file size sent by client, as that can be spoofed and never to be trusted
|
|
83
|
-
it's inexpensive to query api for this
|
|
84
|
-
*/
|
|
85
|
-
async function getFileLstUnderSizeLimit(lst: string[], host, headers) {
|
|
86
|
-
if (lst.length == 0) throw 'fileIdLst[] not array or blank'
|
|
87
|
-
const data = {
|
|
88
|
-
filters: {
|
|
89
|
-
op: 'in',
|
|
90
|
-
content: { field: 'file_id', value: lst }
|
|
91
|
-
},
|
|
92
|
-
size: 10000,
|
|
93
|
-
fields: 'file_size'
|
|
94
|
-
}
|
|
95
|
-
const response = await got.post(path.join(host.rest, 'files'), { headers, body: JSON.stringify(data) })
|
|
96
|
-
let re
|
|
97
|
-
try {
|
|
98
|
-
re = JSON.parse(response.body)
|
|
99
|
-
} catch (e) {
|
|
100
|
-
throw 'invalid json from getFileLstUnderSizeLimit'
|
|
101
|
-
}
|
|
102
|
-
if (!Array.isArray(re.data?.hits)) throw 're.data.hits[] not array'
|
|
103
|
-
const out = [] as string[]
|
|
104
|
-
let cumsize = 0
|
|
105
|
-
for (const h of re.data.hits) {
|
|
106
|
-
if (cumsize >= maxTotalSizeCompressed) break // maxed out
|
|
107
|
-
if (!h.id) throw '.id missing'
|
|
108
|
-
if (!Number.isInteger(h.file_size)) throw '.file_size not integer'
|
|
109
|
-
cumsize += h.file_size
|
|
110
|
-
out.push(h.id)
|
|
111
|
-
}
|
|
112
|
-
if (out.length == 0) throw 'no file available'
|
|
113
|
-
return out
|
|
114
|
-
}
|